Skip to content

Commit

Permalink
Add flag to mark explicitly defined persons & don't override them
Browse files Browse the repository at this point in the history
  • Loading branch information
mbollmann committed May 5, 2024
1 parent edb76c6 commit 8dc9515
Show file tree
Hide file tree
Showing 4 changed files with 11 additions and 3 deletions.
1 change: 1 addition & 0 deletions python/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
### Changed

- Bugfix: Converting a `<texmath>` expression to Unicode no longer serializes the tail of the XML tag, but only the TeX math expression itself.
- Bugfix: Heuristic scoring of name variants will no longer overwrite canonical names that are explicitly defined in `name_variants.yaml`.

## [0.4.3] — 2023-11-05

Expand Down
5 changes: 4 additions & 1 deletion python/acl_anthology/people/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,9 @@ def get_or_create_person(
person = self.data[pid]
# If the name scores higher than the current canonical one, we
# also assume we should set this as the canonical one
if name.score() > person.canonical_name.score():
if (not person.is_explicit) and (
name.score() > person.canonical_name.score()
):
person.set_canonical_name(name)
else:
person.add_name(name)
Expand Down Expand Up @@ -316,6 +318,7 @@ def _load_variant_list(self) -> None:
parent=self.parent,
names=names,
comment=entry.get("comment", None),
is_explicit=True,
)
# ...and add it to the index
self.add_person(person)
Expand Down
3 changes: 3 additions & 0 deletions python/acl_anthology/people/person.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ class Person:
names: A list of names under which this person has published.
item_ids: A set of volume and/or paper IDs this person has authored or edited.
comment: A comment for disambiguation purposes; can be stored in `name_variants.yaml`.
is_explicit: True if this person is explicitly defined in the metadata
(i.e. `name_variants.yaml`), rather than implicitly created.
"""

id: str
Expand All @@ -43,6 +45,7 @@ class Person:
factory=set, repr=lambda x: f"<set of {len(x)} AnthologyIDTuple objects>"
)
comment: Optional[str] = field(default=None)
is_explicit: Optional[bool] = field(default=False)

@property
def canonical_name(self) -> Name:
Expand Down
5 changes: 3 additions & 2 deletions python/tests/people/personindex_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,8 +128,9 @@ def test_get_or_create_person_with_explicit_canonical_name(index):
ns1 = NameSpecification(Name("Emily", "Prud’hommeaux"))
# This one is not, but scores higher according to our heuristics
ns2 = NameSpecification(Name("Emily", "Prud’Hommeaux"))
assert ns2.name.score() > ns1.name.score(), \
"This test assumes that `ns2` will score higher than `ns1`."
assert (
ns2.name.score() > ns1.name.score()
), "This test assumes that `ns2` will score higher than `ns1`."
person1 = index.get_or_create_person(ns1)
person2 = index.get_or_create_person(ns2)
assert person1 is person2
Expand Down

0 comments on commit 8dc9515

Please sign in to comment.