singularize/pluralize

MycroftAI · Oct 21, 2019 · 22143f9 · 22143f9
1 parent aa04301
commit 22143f9
Show file tree

Hide file tree

Showing 4 changed files with 70 additions and 10 deletions.
diff --git a/lingua_franca/format.py b/lingua_franca/format.py
@@ -49,6 +49,26 @@
 import re
 
 
+def singularize(word, lang=None):
+    lang_code = get_primary_lang_code(lang)
+    if lang_code == "en":
+        return singularize_en(word)
+
+    # TODO: Other languages
+    _log_unsupported_language(lang_code, ['en'])
+    return word
+
+
+def pluralize(word, lang=None):
+    lang_code = get_primary_lang_code(lang)
+    if lang_code == "en":
+        return pluralize_en(word)
+
+    # TODO: Other languages
+    _log_unsupported_language(lang_code, ['en'])
+    return word
+
+
 def _translate_word(name, lang):
     """ Helper to get word tranlations
 
@@ -63,7 +83,7 @@ def _translate_word(name, lang):
 
     lang_code = get_full_lang_code(lang)
 
-    filename = resolve_resource_file(join("text", lang_code, name+".word"))
+    filename = resolve_resource_file(join("text", lang_code, name + ".word"))
     if filename:
         # open the file
         try:
@@ -141,7 +161,7 @@ def _number_strings(self, number, lang):
         x_in_x000 = self.lang_config[lang]['number'].get(str(int(
             number % 10000 / 1000))) or str(int(number % 10000 / 1000))
         x0_in_x000 = self.lang_config[lang]['number'].get(str(int(
-            number % 10000 / 1000)*10)) or str(int(number % 10000 / 1000)*10)
+            number % 10000 / 1000) * 10)) or str(int(number % 10000 / 1000) * 10)
         x_in_0x00 = self.lang_config[lang]['number'].get(str(int(
             number % 1000 / 100)) or str(int(number % 1000 / 100)))
 
@@ -242,7 +262,7 @@ def year_format(self, dt, lang, bc):
 
 
 date_time_format = DateTimeFormat(os.path.join(os.path.dirname(__file__),
-                                  'res/text'))
+                                               'res/text'))
 
 
 def nice_number(number, lang=None, speech=True, denominators=None):
@@ -520,7 +540,7 @@ def nice_duration(duration, lang=None, speech=True):
             out += str(hours) + ":"
         if minutes < 10 and (hours > 0 or days > 0):
             out += "0"
-        out += str(minutes)+":"
+        out += str(minutes) + ":"
         if seconds < 10:
             out += "0"
         out += str(seconds)

diff --git a/lingua_franca/lang/format_en.py b/lingua_franca/lang/format_en.py
@@ -18,6 +18,7 @@
 from lingua_franca.lang.format_common import convert_to_mixed_fraction
 from lingua_franca.lang.common_data_en import _NUM_STRING_EN, \
     _FRACTION_STRING_EN, _LONG_SCALE_EN, _SHORT_SCALE_EN
+import inflection
 
 
 def nice_number_en(number, speech, denominators):
@@ -144,7 +145,7 @@ def pronounce_number_en(num, places=2, short_scale=True, scientific=False):
                 if _num[3:4] == '0':
                     last = number_names[int(_num[2:4])]
                 else:
-                    second = number_names[int(_num[2:3])*10]
+                    second = number_names[int(_num[2:3]) * 10]
                     last = second + " " + number_names[int(_num[3:4])]
                 return first + " " + last
     # exception used to catch any unforseen edge cases
@@ -214,7 +215,7 @@ def _long_scale(n):
                     # plus one as we skip 'thousand'
                     # (and 'hundred', but this is excluded by index value)
                     number = number.replace(',', '')
-                    number += " " + hundreds[i+1]
+                    number += " " + hundreds[i + 1]
                 res.append(number)
             return ", ".join(reversed(res))
 
@@ -318,3 +319,11 @@ def nice_time_en(dt, speech=True, use_24hour=False, use_ampm=False):
                 speak += " a.m."
 
         return speak
+
+
+def singularize_en(word):
+    return inflection.singularize(word)
+
+
+def pluralize_en(word):
+    return inflection.pluralize(word)
diff --git a/requirements.txt b/requirements.txt
@@ -1 +1,2 @@
-python-dateutil==2.6.0
+python-dateutil==2.6.0
+inflection
diff --git a/test/test_format.py b/test/test_format.py
@@ -30,6 +30,7 @@
 from lingua_franca.format import pronounce_number
 from lingua_franca.format import date_time_format
 from lingua_franca.format import join_list
+from lingua_franca.format import singularize, pluralize
 
 NUMBERS_FIXTURE_EN = {
     1.435634: '1.436',
@@ -186,12 +187,12 @@ def test_auto_scientific_notation(self):
                                         "power of negative one hundred "
                                         "and fifty")
         # value is platform dependent so better not use in tests?
-        #self.assertEqual(
+        # self.assertEqual(
         #    pronounce_number(sys.float_info.min), "two point two two times "
         #                                          "ten to the power of "
         #                                          "negative three hundred "
         #                                          "and eight")
-        #self.assertEqual(
+        # self.assertEqual(
         #    pronounce_number(sys.float_info.max), "one point seven nine "
         #                                          "times ten to the power of"
         #                                          " three hundred and eight")
@@ -519,7 +520,7 @@ def test_nice_year(self):
                 self.assertTrue(len(nice_year(dt, lang=lang)) > 0)
                 # Looking through the date sequence can be helpful
 
-#                print(nice_year(dt, lang=lang))
+    #                print(nice_year(dt, lang=lang))
 
     def test_nice_duration(self):
         self.assertEqual(nice_duration(1), "one second")
@@ -556,5 +557,34 @@ def test_join(self):
         self.assertEqual(join_list([1, "b", 3, "d"], "or"), "1, b, 3 or d")
 
 
+class TestInflection(unittest.TestCase):
+    def test_singularize(self):
+        self.assertEqual(singularize("posts"), "post")
+        self.assertEqual(singularize("octopi"), "octopus")
+        self.assertEqual(singularize("sheep"), "sheep")
+        # test already singular
+        self.assertEqual(singularize("word"), "word")
+        # test garbage
+        self.assertEqual(singularize("CamelOctopi"), "CamelOctopus")
+
+    def test_pluralize(self):
+        self.assertEqual(pluralize("post"), "posts")
+        self.assertEqual(pluralize("octopus"), "octopi")
+        self.assertEqual(pluralize("sheep"), "sheep")
+        # test already plural
+        self.assertEqual(pluralize("words"), "words")
+        # irregular verbs
+        self.assertEqual(pluralize("person"), "people")
+        self.assertEqual(pluralize("man"), "men")
+        self.assertEqual(pluralize("human"), "humans")
+        self.assertEqual(pluralize('child'), 'children')
+        self.assertEqual(pluralize('sex'), 'sexes')
+        self.assertEqual(pluralize('move'), 'moves')
+        self.assertEqual(pluralize('cow'), 'kine')
+        self.assertEqual(pluralize('zombie'), 'zombies')
+        # test garbage
+        self.assertEqual(pluralize("CamelOctopus"), "CamelOctopi")
+
+
 if __name__ == "__main__":
     unittest.main()