From 22143f9c748f40881c8c67538ab2d39352c0c49d Mon Sep 17 00:00:00 2001 From: jarbasal Date: Mon, 21 Oct 2019 14:56:45 +0100 Subject: [PATCH] singularize/pluralize --- lingua_franca/format.py | 28 +++++++++++++++++++++---- lingua_franca/lang/format_en.py | 13 ++++++++++-- requirements.txt | 3 ++- test/test_format.py | 36 ++++++++++++++++++++++++++++++--- 4 files changed, 70 insertions(+), 10 deletions(-) diff --git a/lingua_franca/format.py b/lingua_franca/format.py index 76dce2ac..46dd03b5 100755 --- a/lingua_franca/format.py +++ b/lingua_franca/format.py @@ -49,6 +49,26 @@ import re +def singularize(word, lang=None): + lang_code = get_primary_lang_code(lang) + if lang_code == "en": + return singularize_en(word) + + # TODO: Other languages + _log_unsupported_language(lang_code, ['en']) + return word + + +def pluralize(word, lang=None): + lang_code = get_primary_lang_code(lang) + if lang_code == "en": + return pluralize_en(word) + + # TODO: Other languages + _log_unsupported_language(lang_code, ['en']) + return word + + def _translate_word(name, lang): """ Helper to get word tranlations @@ -63,7 +83,7 @@ def _translate_word(name, lang): lang_code = get_full_lang_code(lang) - filename = resolve_resource_file(join("text", lang_code, name+".word")) + filename = resolve_resource_file(join("text", lang_code, name + ".word")) if filename: # open the file try: @@ -141,7 +161,7 @@ def _number_strings(self, number, lang): x_in_x000 = self.lang_config[lang]['number'].get(str(int( number % 10000 / 1000))) or str(int(number % 10000 / 1000)) x0_in_x000 = self.lang_config[lang]['number'].get(str(int( - number % 10000 / 1000)*10)) or str(int(number % 10000 / 1000)*10) + number % 10000 / 1000) * 10)) or str(int(number % 10000 / 1000) * 10) x_in_0x00 = self.lang_config[lang]['number'].get(str(int( number % 1000 / 100)) or str(int(number % 1000 / 100))) @@ -242,7 +262,7 @@ def year_format(self, dt, lang, bc): date_time_format = DateTimeFormat(os.path.join(os.path.dirname(__file__), - 'res/text')) + 'res/text')) def nice_number(number, lang=None, speech=True, denominators=None): @@ -520,7 +540,7 @@ def nice_duration(duration, lang=None, speech=True): out += str(hours) + ":" if minutes < 10 and (hours > 0 or days > 0): out += "0" - out += str(minutes)+":" + out += str(minutes) + ":" if seconds < 10: out += "0" out += str(seconds) diff --git a/lingua_franca/lang/format_en.py b/lingua_franca/lang/format_en.py index b35623d1..01ae2fbd 100644 --- a/lingua_franca/lang/format_en.py +++ b/lingua_franca/lang/format_en.py @@ -18,6 +18,7 @@ from lingua_franca.lang.format_common import convert_to_mixed_fraction from lingua_franca.lang.common_data_en import _NUM_STRING_EN, \ _FRACTION_STRING_EN, _LONG_SCALE_EN, _SHORT_SCALE_EN +import inflection def nice_number_en(number, speech, denominators): @@ -144,7 +145,7 @@ def pronounce_number_en(num, places=2, short_scale=True, scientific=False): if _num[3:4] == '0': last = number_names[int(_num[2:4])] else: - second = number_names[int(_num[2:3])*10] + second = number_names[int(_num[2:3]) * 10] last = second + " " + number_names[int(_num[3:4])] return first + " " + last # exception used to catch any unforseen edge cases @@ -214,7 +215,7 @@ def _long_scale(n): # plus one as we skip 'thousand' # (and 'hundred', but this is excluded by index value) number = number.replace(',', '') - number += " " + hundreds[i+1] + number += " " + hundreds[i + 1] res.append(number) return ", ".join(reversed(res)) @@ -318,3 +319,11 @@ def nice_time_en(dt, speech=True, use_24hour=False, use_ampm=False): speak += " a.m." return speak + + +def singularize_en(word): + return inflection.singularize(word) + + +def pluralize_en(word): + return inflection.pluralize(word) diff --git a/requirements.txt b/requirements.txt index d4315405..8eb13f11 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,2 @@ -python-dateutil==2.6.0 \ No newline at end of file +python-dateutil==2.6.0 +inflection \ No newline at end of file diff --git a/test/test_format.py b/test/test_format.py index d8675a01..b3ea3a36 100755 --- a/test/test_format.py +++ b/test/test_format.py @@ -30,6 +30,7 @@ from lingua_franca.format import pronounce_number from lingua_franca.format import date_time_format from lingua_franca.format import join_list +from lingua_franca.format import singularize, pluralize NUMBERS_FIXTURE_EN = { 1.435634: '1.436', @@ -186,12 +187,12 @@ def test_auto_scientific_notation(self): "power of negative one hundred " "and fifty") # value is platform dependent so better not use in tests? - #self.assertEqual( + # self.assertEqual( # pronounce_number(sys.float_info.min), "two point two two times " # "ten to the power of " # "negative three hundred " # "and eight") - #self.assertEqual( + # self.assertEqual( # pronounce_number(sys.float_info.max), "one point seven nine " # "times ten to the power of" # " three hundred and eight") @@ -519,7 +520,7 @@ def test_nice_year(self): self.assertTrue(len(nice_year(dt, lang=lang)) > 0) # Looking through the date sequence can be helpful -# print(nice_year(dt, lang=lang)) + # print(nice_year(dt, lang=lang)) def test_nice_duration(self): self.assertEqual(nice_duration(1), "one second") @@ -556,5 +557,34 @@ def test_join(self): self.assertEqual(join_list([1, "b", 3, "d"], "or"), "1, b, 3 or d") +class TestInflection(unittest.TestCase): + def test_singularize(self): + self.assertEqual(singularize("posts"), "post") + self.assertEqual(singularize("octopi"), "octopus") + self.assertEqual(singularize("sheep"), "sheep") + # test already singular + self.assertEqual(singularize("word"), "word") + # test garbage + self.assertEqual(singularize("CamelOctopi"), "CamelOctopus") + + def test_pluralize(self): + self.assertEqual(pluralize("post"), "posts") + self.assertEqual(pluralize("octopus"), "octopi") + self.assertEqual(pluralize("sheep"), "sheep") + # test already plural + self.assertEqual(pluralize("words"), "words") + # irregular verbs + self.assertEqual(pluralize("person"), "people") + self.assertEqual(pluralize("man"), "men") + self.assertEqual(pluralize("human"), "humans") + self.assertEqual(pluralize('child'), 'children') + self.assertEqual(pluralize('sex'), 'sexes') + self.assertEqual(pluralize('move'), 'moves') + self.assertEqual(pluralize('cow'), 'kine') + self.assertEqual(pluralize('zombie'), 'zombies') + # test garbage + self.assertEqual(pluralize("CamelOctopus"), "CamelOctopi") + + if __name__ == "__main__": unittest.main()