Skip to content

Commit

Permalink
singularize/pluralize
Browse files Browse the repository at this point in the history
  • Loading branch information
JarbasAl committed Oct 21, 2019
1 parent aa04301 commit 22143f9
Show file tree
Hide file tree
Showing 4 changed files with 70 additions and 10 deletions.
28 changes: 24 additions & 4 deletions lingua_franca/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,26 @@
import re


def singularize(word, lang=None):
lang_code = get_primary_lang_code(lang)
if lang_code == "en":
return singularize_en(word)

# TODO: Other languages
_log_unsupported_language(lang_code, ['en'])
return word


def pluralize(word, lang=None):
lang_code = get_primary_lang_code(lang)
if lang_code == "en":
return pluralize_en(word)

# TODO: Other languages
_log_unsupported_language(lang_code, ['en'])
return word


def _translate_word(name, lang):
""" Helper to get word tranlations
Expand All @@ -63,7 +83,7 @@ def _translate_word(name, lang):

lang_code = get_full_lang_code(lang)

filename = resolve_resource_file(join("text", lang_code, name+".word"))
filename = resolve_resource_file(join("text", lang_code, name + ".word"))
if filename:
# open the file
try:
Expand Down Expand Up @@ -141,7 +161,7 @@ def _number_strings(self, number, lang):
x_in_x000 = self.lang_config[lang]['number'].get(str(int(
number % 10000 / 1000))) or str(int(number % 10000 / 1000))
x0_in_x000 = self.lang_config[lang]['number'].get(str(int(
number % 10000 / 1000)*10)) or str(int(number % 10000 / 1000)*10)
number % 10000 / 1000) * 10)) or str(int(number % 10000 / 1000) * 10)
x_in_0x00 = self.lang_config[lang]['number'].get(str(int(
number % 1000 / 100)) or str(int(number % 1000 / 100)))

Expand Down Expand Up @@ -242,7 +262,7 @@ def year_format(self, dt, lang, bc):


date_time_format = DateTimeFormat(os.path.join(os.path.dirname(__file__),
'res/text'))
'res/text'))


def nice_number(number, lang=None, speech=True, denominators=None):
Expand Down Expand Up @@ -520,7 +540,7 @@ def nice_duration(duration, lang=None, speech=True):
out += str(hours) + ":"
if minutes < 10 and (hours > 0 or days > 0):
out += "0"
out += str(minutes)+":"
out += str(minutes) + ":"
if seconds < 10:
out += "0"
out += str(seconds)
Expand Down
13 changes: 11 additions & 2 deletions lingua_franca/lang/format_en.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from lingua_franca.lang.format_common import convert_to_mixed_fraction
from lingua_franca.lang.common_data_en import _NUM_STRING_EN, \
_FRACTION_STRING_EN, _LONG_SCALE_EN, _SHORT_SCALE_EN
import inflection


def nice_number_en(number, speech, denominators):
Expand Down Expand Up @@ -144,7 +145,7 @@ def pronounce_number_en(num, places=2, short_scale=True, scientific=False):
if _num[3:4] == '0':
last = number_names[int(_num[2:4])]
else:
second = number_names[int(_num[2:3])*10]
second = number_names[int(_num[2:3]) * 10]
last = second + " " + number_names[int(_num[3:4])]
return first + " " + last
# exception used to catch any unforseen edge cases
Expand Down Expand Up @@ -214,7 +215,7 @@ def _long_scale(n):
# plus one as we skip 'thousand'
# (and 'hundred', but this is excluded by index value)
number = number.replace(',', '')
number += " " + hundreds[i+1]
number += " " + hundreds[i + 1]
res.append(number)
return ", ".join(reversed(res))

Expand Down Expand Up @@ -318,3 +319,11 @@ def nice_time_en(dt, speech=True, use_24hour=False, use_ampm=False):
speak += " a.m."

return speak


def singularize_en(word):
return inflection.singularize(word)


def pluralize_en(word):
return inflection.pluralize(word)
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
python-dateutil==2.6.0
python-dateutil==2.6.0
inflection
36 changes: 33 additions & 3 deletions test/test_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
from lingua_franca.format import pronounce_number
from lingua_franca.format import date_time_format
from lingua_franca.format import join_list
from lingua_franca.format import singularize, pluralize

NUMBERS_FIXTURE_EN = {
1.435634: '1.436',
Expand Down Expand Up @@ -186,12 +187,12 @@ def test_auto_scientific_notation(self):
"power of negative one hundred "
"and fifty")
# value is platform dependent so better not use in tests?
#self.assertEqual(
# self.assertEqual(
# pronounce_number(sys.float_info.min), "two point two two times "
# "ten to the power of "
# "negative three hundred "
# "and eight")
#self.assertEqual(
# self.assertEqual(
# pronounce_number(sys.float_info.max), "one point seven nine "
# "times ten to the power of"
# " three hundred and eight")
Expand Down Expand Up @@ -519,7 +520,7 @@ def test_nice_year(self):
self.assertTrue(len(nice_year(dt, lang=lang)) > 0)
# Looking through the date sequence can be helpful

# print(nice_year(dt, lang=lang))
# print(nice_year(dt, lang=lang))

def test_nice_duration(self):
self.assertEqual(nice_duration(1), "one second")
Expand Down Expand Up @@ -556,5 +557,34 @@ def test_join(self):
self.assertEqual(join_list([1, "b", 3, "d"], "or"), "1, b, 3 or d")


class TestInflection(unittest.TestCase):
def test_singularize(self):
self.assertEqual(singularize("posts"), "post")
self.assertEqual(singularize("octopi"), "octopus")
self.assertEqual(singularize("sheep"), "sheep")
# test already singular
self.assertEqual(singularize("word"), "word")
# test garbage
self.assertEqual(singularize("CamelOctopi"), "CamelOctopus")

def test_pluralize(self):
self.assertEqual(pluralize("post"), "posts")
self.assertEqual(pluralize("octopus"), "octopi")
self.assertEqual(pluralize("sheep"), "sheep")
# test already plural
self.assertEqual(pluralize("words"), "words")
# irregular verbs
self.assertEqual(pluralize("person"), "people")
self.assertEqual(pluralize("man"), "men")
self.assertEqual(pluralize("human"), "humans")
self.assertEqual(pluralize('child'), 'children')
self.assertEqual(pluralize('sex'), 'sexes')
self.assertEqual(pluralize('move'), 'moves')
self.assertEqual(pluralize('cow'), 'kine')
self.assertEqual(pluralize('zombie'), 'zombies')
# test garbage
self.assertEqual(pluralize("CamelOctopus"), "CamelOctopi")


if __name__ == "__main__":
unittest.main()

0 comments on commit 22143f9

Please sign in to comment.