Skip to content

Commit

Permalink
fixed some bugs regarding unknown/undetermined both showing up
Browse files Browse the repository at this point in the history
  • Loading branch information
Bikatr7 committed Jun 26, 2024
1 parent 2067ab4 commit 9c420de
Showing 1 changed file with 17 additions and 9 deletions.
26 changes: 17 additions & 9 deletions modules/common/gender_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ class GenderUtil:
genders:typing.Optional[dict] = None
cache = {}

is_cote:bool = False

##-------------------start-of-find_english_words()---------------------------------------------------------------------------------------------------------------------------------------------------------------------------

@staticmethod
Expand Down Expand Up @@ -162,6 +164,10 @@ def discard_non_names(names: list[str]) -> list[str]:

new_names = [name for name in names if any(any(part in full_name for part in GenderUtil.honorific_stripper(name).split(' ')) for gender, gender_names in GenderUtil.genders.items() for full_name, _ in gender_names.items())]

if(GenderUtil.is_cote):
## known issues with cote
new_names = [name for name in new_names if name not in ["king"] and len(name) > 1]

return new_names

##-------------------start-of-honorific_stripper()---------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Expand Down Expand Up @@ -244,7 +250,7 @@ def discard_similar_names(names: list[str]) -> list[str]:
##-------------------start-of-find_name_gender()---------------------------------------------------------------------------------------------------------------------------------------------------------------------------

@staticmethod
def find_name_gender(name:str, is_cote:bool = False) -> list[str]:
def find_name_gender(name:str) -> list[str]:

"""
Expand Down Expand Up @@ -278,7 +284,7 @@ def find_name_gender(name:str, is_cote:bool = False) -> list[str]:
stripped_name = GenderUtil.honorific_stripper(name)

## check if the name is predetermined
if((stripped_name, honorific) in cote_predetermined and is_cote):
if((stripped_name, honorific) in cote_predetermined and GenderUtil.is_cote):
result = [cote_predetermined[(stripped_name, honorific)]]
GenderUtil.cache[name] = result
return result
Expand All @@ -299,7 +305,7 @@ def find_name_gender(name:str, is_cote:bool = False) -> list[str]:
result.remove(gender)
result.append(gender)

if(len(set(result)) > 1 or result == ["Undetermined"]):
if(len(set(result)) > 1 or result in ["Undetermined", "Unknown"]):
if(honorific == "kun"):
result = ["Male"]
elif(honorific == "chan"):
Expand Down Expand Up @@ -332,7 +338,9 @@ def get_pronoun_assumption_for_system_prompt(sample:str) -> typing.List[str]:
gender_to_pronoun_map = {
"Male": "he",
"Female": "she",
"Undetermined": "they"
## we used unknown in the json file, but we should use undetermined and no im not changing the json file
"Undetermined": "they",
"Unknown": "they"
}

names_with_positions = GenderUtil.find_english_words(sample)
Expand All @@ -342,9 +350,9 @@ def get_pronoun_assumption_for_system_prompt(sample:str) -> typing.List[str]:
filtered_names = GenderUtil.discard_similar_names(actual_names)

assumptions = [
"{} : {}\n".format(name, gender[0]) if gender and len(set(gender)) == 1 and "Undetermined" not in gender else "{} : Undetermined\n".format(name)
"{} : {}\n".format(name, gender[0]) if gender and len(set(gender)) == 1 and gender not in ["Undetermined", "Unknown"] else "{} : Undetermined\n".format(name)
for name in filtered_names
for gender in [GenderUtil.find_name_gender(name, is_cote=True)]
for gender in [GenderUtil.find_name_gender(name)]
]

pronoun_assumptions = [
Expand Down Expand Up @@ -379,13 +387,13 @@ def get_gender_assumption_for_system_prompt(sample:str) -> typing.List[str]:
filtered_names = GenderUtil.discard_similar_names(actual_names)

assumptions = [
"{} : {}\n".format(name, gender[0]) if gender and len(set(gender)) == 1 and "Undetermined" not in gender else "{} : Undetermined\n".format(name)
"{} : {}\n".format(name, gender[0]) if gender and len(set(gender)) == 1 and gender not in ["Undetermined", "Unknown"] else "{} : Undetermined\n".format(name)
for name in filtered_names
for gender in [GenderUtil.find_name_gender(name, is_cote=True)]
for gender in [GenderUtil.find_name_gender(name)]
]

gender_assumptions = [
"{} : {}\n".format(name.strip(), gender.strip())
"{} : {}\n".format(name.strip(), gender.strip().replace("Unknown", "Undetermined"))
for assumption in assumptions
for name, gender in [assumption.split(":")]
]
Expand Down

0 comments on commit 9c420de

Please sign in to comment.