MycroftAI · JarbasAl · Oct 21, 2019 · Oct 22, 2019
diff --git a/lingua_franca/format.py b/lingua_franca/format.py
@@ -49,6 +49,30 @@
 import re
 
 
+def singularize(word, lang=None):
+    lang_code = get_primary_lang_code(lang)
+    if lang_code == "en":
+        return singularize_en(word)
+    elif lang_code == "pt":
+        return singularize_pt(word)
+
+    # TODO: Other languages
+    _log_unsupported_language(lang_code, ['en', 'pt'])
+    return word
+
+
+def pluralize(word, lang=None):
+    lang_code = get_primary_lang_code(lang)
+    if lang_code == "en":
+        return pluralize_en(word)
+    elif lang_code == "pt":
+        return pluralize_pt(word)
+
+    # TODO: Other languages
+    _log_unsupported_language(lang_code, ['en', 'pt'])
+    return word
+
+
 def _translate_word(name, lang):
     """ Helper to get word tranlations
 
@@ -63,7 +87,7 @@ def _translate_word(name, lang):
 
     lang_code = get_full_lang_code(lang)
 
-    filename = resolve_resource_file(join("text", lang_code, name+".word"))
+    filename = resolve_resource_file(join("text", lang_code, name + ".word"))
     if filename:
         # open the file
         try:
@@ -141,7 +165,7 @@ def _number_strings(self, number, lang):
         x_in_x000 = self.lang_config[lang]['number'].get(str(int(
             number % 10000 / 1000))) or str(int(number % 10000 / 1000))
         x0_in_x000 = self.lang_config[lang]['number'].get(str(int(
-            number % 10000 / 1000)*10)) or str(int(number % 10000 / 1000)*10)
+            number % 10000 / 1000) * 10)) or str(int(number % 10000 / 1000) * 10)
         x_in_0x00 = self.lang_config[lang]['number'].get(str(int(
             number % 1000 / 100)) or str(int(number % 1000 / 100)))
 
@@ -242,7 +266,7 @@ def year_format(self, dt, lang, bc):
 
 
 date_time_format = DateTimeFormat(os.path.join(os.path.dirname(__file__),
-                                  'res/text'))
+                                               'res/text'))
 
 
 def nice_number(number, lang=None, speech=True, denominators=None):
@@ -520,7 +544,7 @@ def nice_duration(duration, lang=None, speech=True):
             out += str(hours) + ":"
         if minutes < 10 and (hours > 0 or days > 0):
             out += "0"
-        out += str(minutes)+":"
+        out += str(minutes) + ":"
         if seconds < 10:
             out += "0"
         out += str(seconds)

diff --git a/lingua_franca/lang/common_data_pt.py b/lingua_franca/lang/common_data_pt.py
@@ -1,3 +1,5 @@
+from lingua_franca.lang.parse_common import invert_dict
+
 # Undefined articles ["um", "uma", "uns", "umas"] can not be supressed,
 # in PT, "um cavalo" means "a horse" or "one horse".
 
@@ -18,6 +20,59 @@
 _MALE_DETERMINANTS_PT = ["o", "os", "este", "estes", "esse", "esses"]
 _FEMALE_DETERMINANTS_PT = ["a", "as", "estas", "estas", "essa", "essas"]
 
+# constants used for singularize / pluralize
+_VOWELS_PT = ["a", "ã", "á", "à",
+              "e", "é", "è",
+              "i", "ì", "í",
+              "o", "ó", "ò", "õ",
+              "u", "ú", "ù"]
+
+_INVARIANTS_PT = ["ontem", "depressa", "ali", "além", "sob", "por", "contra", "desde", "entre",
+                  "até", "perante", "porém", "contudo", "todavia", "entretanto", "senão", "portanto",
+                  "oba", "eba", "exceto", "excepto", "apenas", "menos", "também", "inclusive", "aliás",
+                  "que", "onde", "isto", "isso", "aquilo", "algo", "alguém", "nada", "ninguém", "tudo", "cada",
+                  "outrem", "quem", "mais", "menos", "demais",
+                  # NOTE some words ommited because it depends on POS_TAG
+                  # NOTE these multi word expressions are also invariant
+                  "ou melhor", "isto é", "por exemplo", "a saber", "digo", "ou seja",
+                  "por assim dizer", "com efeito", "ou antes"]
+
+_PLURAL_EXCEPTIONS_PT = {
+    "cânon": "cânones",
+    "cós": "coses",  # cós (unchanged word) is also valid
+    "cais": "cais",
+    "xis": "xis",
+    "mal": "males",
+    "cônsul": "cônsules",
+    "mel": "méis",  # "meles" also valid
+    "fel": "féis",  # "feles" also valid
+    "cal": "cais",  # "cales" also valid
+    "aval": "avais",  # "avales also valid
+    "mol": "móis",  # "moles also valid
+    "real": "réis",
+    "fax": "faxes",
+    "cálix": "cálices",
+    "índex": "índices",
+    "apêndix": "apêndices",
+    "hélix": "hélices",
+    "hálux": "háluces",
+    "códex": "códices",
+    "fénix": "fénixes",  # "fénix" also valid
+    "til": "tis",  # "tiles" also valid
+    "pão": "pães",
+    "cão": "cães",
+    "alemão": "alemães",
+    "balão": "balões",
+    "anão": "anões",
+    "dez": "dez",
+    "três": "três",
+    "seis": "seis"
+}
+
+# in general words that end with "s" in singular form should be added bellow
+_SINGULAR_EXCEPTIONS_PT = invert_dict(_PLURAL_EXCEPTIONS_PT)
+
+# constants for number handling
 _NUMBERS_PT = {
     "zero": 0,
     "um": 1,

diff --git a/lingua_franca/lang/format_en.py b/lingua_franca/lang/format_en.py
@@ -18,6 +18,7 @@
 from lingua_franca.lang.format_common import convert_to_mixed_fraction
 from lingua_franca.lang.common_data_en import _NUM_STRING_EN, \
     _FRACTION_STRING_EN, _LONG_SCALE_EN, _SHORT_SCALE_EN
+import inflection
 
 
 def nice_number_en(number, speech, denominators):
@@ -144,7 +145,7 @@ def pronounce_number_en(num, places=2, short_scale=True, scientific=False):
                 if _num[3:4] == '0':
                     last = number_names[int(_num[2:4])]
                 else:
-                    second = number_names[int(_num[2:3])*10]
+                    second = number_names[int(_num[2:3]) * 10]
                     last = second + " " + number_names[int(_num[3:4])]
                 return first + " " + last
     # exception used to catch any unforseen edge cases
@@ -214,7 +215,7 @@ def _long_scale(n):
                     # plus one as we skip 'thousand'
                     # (and 'hundred', but this is excluded by index value)
                     number = number.replace(',', '')
-                    number += " " + hundreds[i+1]
+                    number += " " + hundreds[i + 1]
                 res.append(number)
             return ", ".join(reversed(res))
 
@@ -318,3 +319,11 @@ def nice_time_en(dt, speech=True, use_24hour=False, use_ampm=False):
                 speak += " a.m."
 
         return speak
+
+
+def singularize_en(word):
+    return inflection.singularize(word)
+
+
+def pluralize_en(word):
+    return inflection.pluralize(word)
diff --git a/lingua_franca/lang/format_pt.py b/lingua_franca/lang/format_pt.py
@@ -17,7 +17,69 @@
 
 from lingua_franca.lang.format_common import convert_to_mixed_fraction
 from lingua_franca.lang.common_data_pt import _FRACTION_STRING_PT, \
-    _NUM_STRING_PT
+    _NUM_STRING_PT, _VOWELS_PT, _PLURAL_EXCEPTIONS_PT, _SINGULAR_EXCEPTIONS_PT, _INVARIANTS_PT
+
+
+def singularize_pt(word):
+    if word in _INVARIANTS_PT:
+        return _INVARIANTS_PT[word]
+    if word in _SINGULAR_EXCEPTIONS_PT:
+        return _SINGULAR_EXCEPTIONS_PT[word]
+    # TODO implement is_plural helper
+    # can not ensure word is in plural, assuming it is,
+    # if in singular form it might in some cases be wrongly mutated
+    # in general words that end with "s" in singular form should be added to exceptions dict
+    if word.endswith("is"):
+        return word.rstrip("is") + "il"
+    if word.endswith("ões"):
+        return word.replace("ões", "ão")
+    if word.endswith("ães"):
+        return word.replace("ães", "ão")
+    if word.endswith("es"):
+        return word.rstrip("es")
+    if word.endswith("s"):
+        return word.rstrip("s")
+    return word
+
+
+def pluralize_pt(word):
+    if word in _INVARIANTS_PT:
+        return _INVARIANTS_PT[word]
+    if word in _PLURAL_EXCEPTIONS_PT:
+        return _PLURAL_EXCEPTIONS_PT[word]
+    if word.endswith("x"):
+        return word
+    if word.endswith("s"):
+        if word[-2] in _VOWELS_PT or word[-3] in _VOWELS_PT:
+            # if word is an oxytone, add "es", else word remains unchanged
+            # this check is overly simplified but should work 99% of the time
+            # https://en.wikipedia.org/wiki/Oxytone
+            return word + "es"
+        return word
+    if word.endswith("ão"):
+        # crap, can either end with "ãos", "aẽs" or "ões", most times they are all valid
+        # the other times lets hope the word is in exceptions dict
+        # TODO check if numeric, then it's always "ões"
+        return word + "s"
+    if word[-1] in _VOWELS_PT:
+        # if word ends with a vowel add an "s"
+        return word + 's'
+    for ending in ["r", "z", "n"]:
+        if word.endswith(ending):
+            return word + "es"
+    for ending in ["al", "el", "ol", "ul"]:
+        if word.endswith(ending):
+            return word.rstrip("l") + "is"
+    if word.endswith("il"):
+        return word.rstrip("l") + "s"
+    if word.endswith("m"):
+        return word.rstrip("m") + "ns"
+    # foreign words that have been "unportuguesified" have an "s" added
+    # simple check is looking for endings that don't exist in portuguese
+    for ending in ["w", "y", "k", "t"]:
+        if word.endswith(ending):
+            return word + "s"
+    return word
 
 
 def nice_number_pt(number, speech, denominators):

diff --git a/requirements.txt b/requirements.txt
@@ -1 +1,2 @@
-python-dateutil==2.6.0
+python-dateutil==2.6.0
+inflection
diff --git a/test/test_format.py b/test/test_format.py
@@ -30,6 +30,7 @@
 from lingua_franca.format import pronounce_number
 from lingua_franca.format import date_time_format
 from lingua_franca.format import join_list
+from lingua_franca.format import singularize, pluralize
 
 NUMBERS_FIXTURE_EN = {
     1.435634: '1.436',
@@ -186,12 +187,12 @@ def test_auto_scientific_notation(self):
                                         "power of negative one hundred "
                                         "and fifty")
         # value is platform dependent so better not use in tests?
-        #self.assertEqual(
+        # self.assertEqual(
         #    pronounce_number(sys.float_info.min), "two point two two times "
         #                                          "ten to the power of "
         #                                          "negative three hundred "
         #                                          "and eight")
-        #self.assertEqual(
+        # self.assertEqual(
         #    pronounce_number(sys.float_info.max), "one point seven nine "
         #                                          "times ten to the power of"
         #                                          " three hundred and eight")
@@ -519,7 +520,7 @@ def test_nice_year(self):
                 self.assertTrue(len(nice_year(dt, lang=lang)) > 0)
                 # Looking through the date sequence can be helpful
 
-#                print(nice_year(dt, lang=lang))
+    #                print(nice_year(dt, lang=lang))
 
     def test_nice_duration(self):
         self.assertEqual(nice_duration(1), "one second")
@@ -556,5 +557,34 @@ def test_join(self):
         self.assertEqual(join_list([1, "b", 3, "d"], "or"), "1, b, 3 or d")
 
 
+class TestInflection(unittest.TestCase):
+    def test_singularize(self):
+        self.assertEqual(singularize("posts"), "post")
+        self.assertEqual(singularize("octopi"), "octopus")
+        self.assertEqual(singularize("sheep"), "sheep")
+        # test already singular
+        self.assertEqual(singularize("word"), "word")
+        # test garbage
+        self.assertEqual(singularize("CamelOctopi"), "CamelOctopus")
+
+    def test_pluralize(self):
+        self.assertEqual(pluralize("post"), "posts")
+        self.assertEqual(pluralize("octopus"), "octopi")
+        self.assertEqual(pluralize("sheep"), "sheep")
+        # test already plural
+        self.assertEqual(pluralize("words"), "words")
+        # irregular verbs
+        self.assertEqual(pluralize("person"), "people")
+        self.assertEqual(pluralize("man"), "men")
+        self.assertEqual(pluralize("human"), "humans")
+        self.assertEqual(pluralize('child'), 'children')
+        self.assertEqual(pluralize('sex'), 'sexes')
+        self.assertEqual(pluralize('move'), 'moves')
+        self.assertEqual(pluralize('cow'), 'kine')
+        self.assertEqual(pluralize('zombie'), 'zombies')
+        # test garbage
+        self.assertEqual(pluralize("CamelOctopus"), "CamelOctopi")
+
+
 if __name__ == "__main__":
     unittest.main()