OpenVoiceOS · NeonJarbas · Dec 11, 2020 · Jul 13, 2022
diff --git a/lingua_franca/format.py b/lingua_franca/format.py
@@ -29,21 +29,56 @@
     is_supported_full_lang, _raise_unsupported_language, \
     UnsupportedLanguageError, NoneLangWarning, InvalidLangWarning, \
     FunctionNotLocalizedError, resolve_resource_file, FunctionNotLocalizedError
-
+from lingua_franca.lang.format_common import PluralAmount, PluralCategory
 
 _REGISTERED_FUNCTIONS = ("nice_number",
                          "nice_time",
                          "pronounce_number",
                          "pronounce_lang",
                          "nice_response",
-                         "nice_duration")
+                         "nice_duration",
+                         "get_plural_category",
+                         "get_plural_form")
 
 populate_localized_function_dict("format", langs=get_active_langs())
 
 
-def _translate_word(name, lang=''):
+def _get_word(name, amount=1, lang=''):
     """ Helper to get word translations
 
+    Args:
+        name (str): Word name. Returned as the default value if not translated
+        amount (int): Amount of that word. Used for pluralization
+        lang (str): Language code, e.g. "en-us"
+
+    Returns:
+        str: translated version of resource name
+    """
+    from lingua_franca.internal import resolve_resource_file
+    if not lang:
+        if lang is None:
+            warn(NoneLangWarning)
+        lang = get_default_loc()
+
+    lang_code = lang if is_supported_full_lang(lang) else get_full_lang_code(lang)
+    filename = resolve_resource_file(join("text", lang_code, "pluralizations.json"))
+
+    if filename:
+        try:
+            with open(filename, 'r', encoding='utf8') as file:
+                translations = json.load(file)
+                return translations[name][get_plural_category(amount, lang=lang)]
+        except Exception:
+            pass
+    return _translate_word(name + ('s' if amount > 1 else ''), lang)  # fallback to legacy translation
+
+
+def _translate_word(name, lang=''):
+    """ Legacy helper to get word translations.
+
+    Do not use this function directly. Remove it once
+    all languages are migrated to the new format.
+
     Args:
         name (str): Word name. Returned as the default value if not translated
         lang (str, optional): an optional BCP-47 language code, if omitted
@@ -98,7 +133,7 @@ def cache(self, lang):
             except FileNotFoundError:
                 # Fallback to English formatting
                 with open(self.config_path + '/en-us/date_time.json',
-                          'r') as lang_config_file:
+                          'r', encoding='utf8') as lang_config_file:
                     self.lang_config[lang] = json.loads(
                         lang_config_file.read())
 
@@ -439,35 +474,23 @@ def nice_duration(duration, lang='', speech=True):
         out = ""
         if days > 0:
             out += pronounce_number(days, lang) + " "
-            if days == 1:
-                out += _translate_word("day", lang)
-            else:
-                out += _translate_word("days", lang)
+            out += _get_word("day", amount=days, lang=lang)
             out += " "
         if hours > 0:
             if out:
                 out += " "
             out += pronounce_number(hours, lang) + " "
-            if hours == 1:
-                out += _translate_word("hour", lang)
-            else:
-                out += _translate_word("hours", lang)
+            out += _get_word("hour", amount=hours, lang=lang)
         if minutes > 0:
             if out:
                 out += " "
             out += pronounce_number(minutes, lang) + " "
-            if minutes == 1:
-                out += _translate_word("minute", lang)
-            else:
-                out += _translate_word("minutes", lang)
+            out += _get_word("minute", amount=minutes, lang=lang)
         if seconds > 0:
             if out:
                 out += " "
             out += pronounce_number(seconds, lang) + " "
-            if seconds == 1:
-                out += _translate_word("second", lang)
-            else:
-                out += _translate_word("seconds", lang)
+            out += _get_word("second", amount=seconds, lang=lang)
     else:
         # M:SS, MM:SS, H:MM:SS, Dd H:MM:SS format
         out = ""
@@ -512,7 +535,7 @@ def join_list(items, connector, sep=None, lang=''):
     else:
         sep += " "
     return (sep.join(str(item) for item in items[:-1]) +
-            " " + _translate_word(connector, lang) +
+            " " + _get_word(connector, lang=lang) +
             " " + items[-1])
 
 
@@ -578,3 +601,55 @@ def nice_response(text, lang=''):
         assertEqual(nice_response_de("10 ^ 2"),
                          "10 hoch 2")
     """
+
+
+@localized_function(run_own_code_on=[FunctionNotLocalizedError])
+def get_plural_category(amount, type=PluralCategory.CARDINAL, lang=""):
+    """
+    Get plural category for the specified amount. Category can be one of
+    the categories specified by Unicode CLDR Plural Rules.
+
+    For more details:
+    http://cldr.unicode.org/index/cldr-spec/plural-rules
+    https://unicode-org.github.io/cldr-staging/charts/37/supplemental/language_plural_rules.html
+
+    Args:
+        amount(int or float or pair or list): The amount that is used to
+            determine the category. If type is range, it must contain
+            the start and end numbers.
+        type(str): Either cardinal (default), ordinal or range.
+        lang(str): The BCP-47 code for the language to use, None for default.
+    Returns:
+        (str): The plural category. Either zero, one, two, few, many or other.
+    """
+
+    if type == PluralCategory.CARDINAL:
+        warn(RuntimeWarning("Pluralization has not been implemented in the specified language. Falling back to "
+                            "basic singular and plural for compatibility with built-in functions."))
+
+        if amount == 1:
+            return PluralAmount.ONE
+        else:
+            return PluralAmount.OTHER
+
+    else:
+        raise FunctionNotLocalizedError("This function has not been implemented in the specified language.")
+
+
+@localized_function(run_own_code_on=[FunctionNotLocalizedError])
+def get_plural_form(word, amount, type=PluralCategory.CARDINAL, lang=""):
+    """
+    Get plural form of the specified word for the specified amount.
+
+    Args:
+        word(str): Word to be pluralized.
+        amount(int or float or pair or list): The amount that is used to
+            determine the category. If type is range, it must contain
+            the start and end numbers.
+        type(str): Either cardinal (default), ordinal or range.
+        lang(str): The BCP-47 code for the language to use, None for default.
+    Returns:
+        (str): Pluralized word.
+    """
+    warn(RuntimeWarning("Pluralization has not been implemented in the specified language. Word unchanged"))
+    return word
diff --git a/lingua_franca/lang/common_data_pt.py b/lingua_franca/lang/common_data_pt.py
@@ -1,3 +1,6 @@
+from lingua_franca.lang.parse_common import invert_dict
+
+
 _FUNCTION_NOT_IMPLEMENTED_WARNING = "esta função não foi implementada em 'pt'"
 
 # Undefined articles ["um", "uma", "uns", "umas"] can not be supressed,
@@ -20,6 +23,60 @@
 _MALE_DETERMINANTS_PT = ["o", "os", "este", "estes", "esse", "esses"]
 _FEMALE_DETERMINANTS_PT = ["a", "as", "estas", "estas", "essa", "essas"]
 
+
+# constants used for singularize / pluralize
+_VOWELS_PT = ["a", "ã", "á", "à",
+              "e", "é", "è",
+              "i", "ì", "í",
+              "o", "ó", "ò", "õ",
+              "u", "ú", "ù"]
+
+_INVARIANTS_PT = ["ontem", "depressa", "ali", "além", "sob", "por", "contra", "desde", "entre",
+                  "até", "perante", "porém", "contudo", "todavia", "entretanto", "senão", "portanto",
+                  "oba", "eba", "exceto", "excepto", "apenas", "menos", "também", "inclusive", "aliás",
+                  "que", "onde", "isto", "isso", "aquilo", "algo", "alguém", "nada", "ninguém", "tudo", "cada",
+                  "outrem", "quem", "mais", "menos", "demais",
+                  # NOTE some words ommited because it depends on POS_TAG
+                  # NOTE these multi word expressions are also invariant
+                  "ou melhor", "isto é", "por exemplo", "a saber", "digo", "ou seja",
+                  "por assim dizer", "com efeito", "ou antes"]
+
+_PLURAL_EXCEPTIONS_PT = {
+    "cânon": "cânones",
+    "cós": "coses",  # cós (unchanged word) is also valid
+    "cais": "cais",
+    "xis": "xis",
+    "mal": "males",
+    "cônsul": "cônsules",
+    "mel": "méis",  # "meles" also valid
+    "fel": "féis",  # "feles" also valid
+    "cal": "cais",  # "cales" also valid
+    "aval": "avais",  # "avales also valid
+    "mol": "móis",  # "moles also valid
+    "real": "réis",
+    "fax": "faxes",
+    "cálix": "cálices",
+    "índex": "índices",
+    "apêndix": "apêndices",
+    "hélix": "hélices",
+    "hálux": "háluces",
+    "códex": "códices",
+    "fénix": "fénixes",  # "fénix" also valid
+    "til": "tis",  # "tiles" also valid
+    "pão": "pães",
+    "cão": "cães",
+    "alemão": "alemães",
+    "balão": "balões",
+    "anão": "anões",
+    "dez": "dez",
+    "três": "três",
+    "seis": "seis"
+}
+
+# in general words that end with "s" in singular form should be added bellow
+_SINGULAR_EXCEPTIONS_PT = invert_dict(_PLURAL_EXCEPTIONS_PT)
+
+# constants for number handling
 _NUMBERS_PT = {
     "zero": 0,
     "um": 1,

diff --git a/lingua_franca/lang/format_common.py b/lingua_franca/lang/format_common.py
@@ -13,6 +13,36 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+import enum
+
+
+class PluralCategory(str, enum.Enum):
+    """
+    plural category for the specified amount. Category can be one of
+    the categories specified by Unicode CLDR Plural Rules.
+
+    For more details:
+    http://cldr.unicode.org/index/cldr-spec/plural-rules
+    https://unicode-org.github.io/cldr-staging/charts/37/supplemental/language_plural_rules.html
+
+    """
+    CARDINAL = "cardinal"
+    ORDINAL = "ordinal"
+    RANGE = "range"
+
+
+class PluralAmount(str, enum.Enum):
+    """
+    For more details:
+    http://cldr.unicode.org/index/cldr-spec/plural-rules
+    https://unicode-org.github.io/cldr-staging/charts/37/supplemental/language_plural_rules.html
+    """
+    ZERO = "zero"
+    ONE = "one"
+    TWO = "two"
+    FEW = "few"
+    MANY = "many"
+    OTHER = "other"
 
 
 def convert_to_mixed_fraction(number, denominators=range(1, 21)):

diff --git a/lingua_franca/lang/format_en.py b/lingua_franca/lang/format_en.py
@@ -14,8 +14,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-
-from lingua_franca.lang.format_common import convert_to_mixed_fraction
+import inflection
+from lingua_franca.lang.format_common import convert_to_mixed_fraction, PluralCategory, PluralAmount
 from lingua_franca.lang.common_data_en import _NUM_STRING_EN, \
     _FRACTION_STRING_EN, _LONG_SCALE_EN, _SHORT_SCALE_EN, _SHORT_ORDINAL_EN, _LONG_ORDINAL_EN
 
@@ -384,3 +384,48 @@ def nice_time_en(dt, speech=True, use_24hour=False, use_ampm=False):
                 speak += " a.m."
 
         return speak
+
+
+def get_plural_category_en(amount, type=PluralCategory.CARDINAL):
+    if type == PluralCategory.CARDINAL:
+        if amount == 1:
+            return PluralAmount.ONE
+        else:
+            return PluralAmount.OTHER
+
+    elif type == PluralCategory.ORDINAL:
+        if amount % 10 == 1 and amount % 100 != 11:
+            return PluralAmount.ONE
+        elif amount % 10 == 2 and amount % 100 != 12:
+            return PluralAmount.TWO
+        elif amount % 10 == 3 and amount % 100 != 13:
+            return PluralAmount.FEW
+        else:
+            return PluralAmount.OTHER
+
+    elif type == PluralCategory.RANGE:
+        if not (isinstance(amount, tuple) or isinstance(amount, list)) or len(amount) != 2:
+            raise ValueError("Argument \"number\" must be tuple|list type with the start and end numbers")
+
+        return PluralAmount.OTHER
+
+    else:
+        return ValueError("Argument \"type\" must be cardinal|ordinal|range")
+
+
+def get_plural_form_en(word, amount, type=PluralCategory.CARDINAL):
+    """
+    Get plural form of the specified word for the specified amount.
+
+    Args:
+        word(str): Word to be pluralized.
+        amount(int or float or pair or list): The amount that is used to
+            determine the category. If type is range, it must contain
+            the start and end numbers.
+        type(str): Either cardinal (default), ordinal or range.
+    Returns:
+        (str): Pluralized word.
+    """
+    if amount == 1:
+        return inflection.singularize(word)
+    return inflection.pluralize(word)