From 0047a42f37a03d81021cb963470c4f6bb86482e4 Mon Sep 17 00:00:00 2001 From: olikelly00 Date: Wed, 5 Jun 2024 12:46:54 +0100 Subject: [PATCH] negator.py updated - Team Cheemu --- src/harmony/matching/negator.py | 100 +++++++++++++++++++++++++++++++- 1 file changed, 98 insertions(+), 2 deletions(-) diff --git a/src/harmony/matching/negator.py b/src/harmony/matching/negator.py index 3ded536..fe789a9 100644 --- a/src/harmony/matching/negator.py +++ b/src/harmony/matching/negator.py @@ -40,6 +40,13 @@ def get_change_en(doc) -> dict: if tok.text.lower() in {"always", "rather", "really", "very", "totally", "utterly", "absolutely", "completely", "frequently", "often", "sometimes", "generally", "usually"}: return {tok.i: ("replace", "never")} + # Team Cheemu: added these if statements to handle negative contractions (eg. can't, won't, shan't) + if tok.text.lower() == "ca" and doc[tok.i + 1].text.lower() == "n't": + return {tok.i: ("replace", "can"), tok.i + 1: ("replace", "")} + if tok.text.lower() == "wo" and doc[tok.i + 1].text.lower() == "n't": + return {tok.i: ("replace", "will"), tok.i + 1: ("replace", "")} + if tok.text.lower() == "sha" and doc[tok.i + 1].text.lower() == "n't": + return {tok.i: ("replace", "shall"), tok.i + 1: ("replace", "")} if tok.text.lower() in {"never", "not", "n't"}: return {tok.i: ("replace", "")} if tok.text.lower() in {"cannot"}: @@ -73,18 +80,107 @@ def get_change_pt(doc) -> dict: return {0: ("insert_before", "não")} +def get_change_es(doc) -> dict: + """ + # Team Cheemu: Identify how to change a Spanish sentence from positive to negative or vice versa. + :param doc: + :return: + """ + for tok in doc: + if tok.text.lower() in {"siempre", "bastante", "realmente", "muy", "mucho", "totalmente", "totalmente", "absolutamente", + "completamente", + "frecuentemente", "frequentemente", "veces"}: + return {tok.i: ("replace", "nunca")} + if tok.text.lower() in {"nunca", "jamás", "ni", "no"}: + return {tok.i: ("replace", "")} + result = {} + if len(result) > 0: + return result + return {0: ("insert_before", "no")} + + + +def get_change_it(doc) -> dict: + """ + # Team Cheemu: Identify how to change an Italian sentence from positive to negative or vice versa. + :param doc: + :return: + """ + for tok in doc: + if tok.text.lower() in {"sempre", "abbastanza", "realmente", "davvero", "veramente", "molto", "molta", "molti", "molte", "totalmente", "assolutamente", + "completamente", + "frequentemente", "qualche volta", "a volte", "ogni tanto"}: + return {tok.i: ("replace", "mai")} + if tok.text.lower() in {"mai", "né", "non", "nessuno", "nulla", "niente"}: + return {tok.i: ("replace", "")} + result = {} + for tok in doc: + if tok.text.lower() in {"è", "sono", "ero", "erano", "avevano", "avevo", "ho avuto", "sono stato", "sono stata", "sono stati", "siamo stati", "sono state"}: + result[tok.i] = "insert_before", "non" + if len(result) > 0: + return result + return {0: ("insert_before", "non")} + +def get_change_de(doc) -> dict: + """ + # Team Cheemu: Identify how to change a German sentence from positive to negative or vice versa. + :param doc: + :return: + """ + for tok in doc: + if tok.text.lower() in {"immer", "ziemlich", "wirklich", "sehr", "viel", "total", "absolut", + "vollständig", + "häufig", "manchmal"}: + return {tok.i: ("replace", "nie")} + if tok.text.lower() in {"nie", "niemals", "weder", "nicht"}: + return {tok.i: ("replace", "")} + result = {} + if len(result) > 0: + return result + return {0: ("insert_before", "nicht")} +# if we had time: add functionality to handle german word order using Spacy + + +def get_change_fr(doc) -> dict: + """ + # Team Cheemu: Identify how to change a French sentence from positive to negative or vice versa. + :param doc: + :return: + """ + for tok in doc: + if tok.text.lower() in {"toujours", "assez", "vraiment", "très", "beaucoup de", "totalement", "absolumment", + "complètement", "plus", "trop de", "plein de", + "souvent", "de temps en temps"}: + return {tok.i: ("replace", "nie")} + if tok.text.lower() in {"personne", "jamais", "ni", "rien", "pas", "non", "ne", "n'", "nulle", "aucun", "aucune", "guère"}: + return {tok.i: ("replace", "")} + result = {} + if len(result) > 0: + return result + return {0: ("insert_before", "ne pas")} + def negate(text: str, language: str) -> str: """ - Converts negative sentences to pos and vice versa. + Converts negative sentences to positive and vice versa. Not meant to generate 100% accurate natural language, it's to go into transformer model and is not shown to a human. :param text: - :param language: "en" or "pt" + :param language: + "en" for English, "pt" for Portuguese, "es" for Spanish, "it" for Italian, "de" for German, "fr" for French. :return: the sentence negated """ doc = nlp(text) if language == "pt": changes = get_change_pt(doc) + # Team Cheemu: added handling of four additional languages + elif language == "es": + changes = get_change_es(doc) + elif language == "it": + changes = get_change_it(doc) + elif language == "fr": + changes = get_change_fr(doc) + elif language == "de": + changes = get_change_de(doc) else: changes = get_change_en(doc)