From f473b80258978ed26ee08449a26143dc67931ae0 Mon Sep 17 00:00:00 2001 From: ChanceNCounter Date: Thu, 1 Apr 2021 17:10:20 -0700 Subject: [PATCH 01/21] Initial, working impl. of #128 (not hooked up) This is a technically-working implementation of #128, which creates an object, exposed at runtime as lingua_franca.config, which descends from dict. More robust get/set methods to follow. The implementation is extensible to support full use of localization and 'full lang codes'. Either that or hooking it up to the functions will be next. --- lingua_franca/__init__.py | 17 ++++++-- lingua_franca/config.py | 1 - lingua_franca/configuration.py | 50 ++++++++++++++++++++++++ lingua_franca/internal.py | 11 +++++- lingua_franca/res/text/en-us/config.json | 3 ++ 5 files changed, 75 insertions(+), 7 deletions(-) delete mode 100644 lingua_franca/config.py create mode 100644 lingua_franca/configuration.py create mode 100644 lingua_franca/res/text/en-us/config.json diff --git a/lingua_franca/__init__.py b/lingua_franca/__init__.py index 0404403c..e03c78d9 100644 --- a/lingua_franca/__init__.py +++ b/lingua_franca/__init__.py @@ -1,6 +1,15 @@ +### DO NOT CHANGE THIS IMPORT ORDER ### +from .internal import get_active_langs, get_supported_locs, \ + get_full_lang_code + +from .configuration import Config + +### END OF IMPORT ORDER ### + from .internal import get_default_lang, set_default_lang, get_default_loc, \ - get_active_langs, _set_active_langs, get_primary_lang_code, \ - get_full_lang_code, resolve_resource_file, load_language, \ - load_languages, unload_language, unload_languages, get_supported_langs + _set_active_langs, get_primary_lang_code, resolve_resource_file, \ + load_language, load_languages, unload_language, unload_languages, \ + get_supported_langs + -from lingua_franca import config +config = Config() diff --git a/lingua_franca/config.py b/lingua_franca/config.py deleted file mode 100644 index 06dc9677..00000000 --- a/lingua_franca/config.py +++ /dev/null @@ -1 +0,0 @@ -load_langs_on_demand = False diff --git a/lingua_franca/configuration.py b/lingua_franca/configuration.py new file mode 100644 index 00000000..74822640 --- /dev/null +++ b/lingua_franca/configuration.py @@ -0,0 +1,50 @@ +import json +from os import path + +from lingua_franca import get_active_langs, get_supported_locs, \ + get_full_lang_code +from lingua_franca.internal import UnsupportedLanguageError, resolve_resource_file + +default_global_values = \ + { + 'load_langs_on_demand': False + } + +class LangConfig(dict): + def __init__(self, lang_code): + if lang_code not in get_supported_locs(): + # DO NOT catch UnsupportedLanguageError! + # If this fails, we want to crash. This can *only* result from + # someone trying to override sanity checks upstairs. There are no + # circumstances under which this should fail and allow the program + # to continue. + lang_code = get_full_lang_code(lang_code) + + + resource_file = resolve_resource_file(f'text/{lang_code}/config.json') + with open(resource_file, 'r', encoding='utf-8') as i_file: + default_values = json.load(i_file) + for k in default_values: + self[k] = default_values[k] + +class Config(dict): + def __init__(self): + self['global'] = dict(default_global_values) + for lang in get_active_langs(): + ''' + TODO proper full loc support here will handle languages similarly to global: + + self['en']['universal'] for 'default' English config + (all dialects if not overridden) + self['en']['en-us'] for overrides specific to en-US + self['en']['en-au'] for overrides specific to en-AU + + and so forth. + ''' + if all((lang not in self.keys(), lang not in get_supported_locs())): + self[lang] = {} + self[lang]['universal'] = LangConfig(lang) + # begin portion that will need to adapt for the todo above + full_loc = lang if lang in get_supported_locs() else \ + get_full_lang_code(lang) + self[lang][full_loc] = LangConfig(lang) \ No newline at end of file diff --git a/lingua_franca/internal.py b/lingua_franca/internal.py index 27ca74bf..20bda682 100644 --- a/lingua_franca/internal.py +++ b/lingua_franca/internal.py @@ -5,8 +5,6 @@ from sys import version from warnings import warn -from lingua_franca import config - _SUPPORTED_LANGUAGES = ("ca", "cs", "da", "de", "en", "es", "fr", "hu", "it", "nl", "pl", "pt", "sl", "sv") @@ -88,6 +86,13 @@ def get_supported_langs(): """ return _SUPPORTED_LANGUAGES +def get_supported_locs(): + """ + Returns: + list(str) + """ + return _SUPPORTED_FULL_LOCALIZATIONS + def get_active_langs(): """ Get the list of currently-loaded language codes @@ -448,6 +453,8 @@ def is_error_type(_type): def localized_function_decorator(func): # Wrapper's logic def _call_localized_function(func, *args, **kwargs): + from lingua_franca import config + lang_code = None load_langs_on_demand = config.load_langs_on_demand unload_language_afterward = False diff --git a/lingua_franca/res/text/en-us/config.json b/lingua_franca/res/text/en-us/config.json new file mode 100644 index 00000000..062916f4 --- /dev/null +++ b/lingua_franca/res/text/en-us/config.json @@ -0,0 +1,3 @@ +{ + "short_scale": true +} \ No newline at end of file From b0db4bf7012f0c3217e9192d8ca469f01036c366 Mon Sep 17 00:00:00 2001 From: ChanceNCounter Date: Thu, 1 Apr 2021 18:04:56 -0700 Subject: [PATCH 02/21] add working getter: config.get(setting, lang) lang param defaults to 'global' (only index in config that isn't a lang) --- lingua_franca/__init__.py | 10 ++++----- lingua_franca/configuration.py | 41 ++++++++++++++++++++++++++++++++-- lingua_franca/internal.py | 2 +- test/test_localizer.py | 4 ++-- 4 files changed, 47 insertions(+), 10 deletions(-) diff --git a/lingua_franca/__init__.py b/lingua_franca/__init__.py index e03c78d9..0af7bc8e 100644 --- a/lingua_franca/__init__.py +++ b/lingua_franca/__init__.py @@ -1,15 +1,15 @@ ### DO NOT CHANGE THIS IMPORT ORDER ### from .internal import get_active_langs, get_supported_locs, \ - get_full_lang_code + get_full_lang_code, get_supported_langs, get_default_loc, \ + get_primary_lang_code from .configuration import Config ### END OF IMPORT ORDER ### -from .internal import get_default_lang, set_default_lang, get_default_loc, \ - _set_active_langs, get_primary_lang_code, resolve_resource_file, \ - load_language, load_languages, unload_language, unload_languages, \ - get_supported_langs +from .internal import get_default_lang, set_default_lang, \ + _set_active_langs, resolve_resource_file, \ + load_language, load_languages, unload_language, unload_languages config = Config() diff --git a/lingua_franca/configuration.py b/lingua_franca/configuration.py index 74822640..92128977 100644 --- a/lingua_franca/configuration.py +++ b/lingua_franca/configuration.py @@ -2,7 +2,8 @@ from os import path from lingua_franca import get_active_langs, get_supported_locs, \ - get_full_lang_code + get_supported_langs, get_primary_lang_code, get_full_lang_code, \ + get_default_loc from lingua_franca.internal import UnsupportedLanguageError, resolve_resource_file default_global_values = \ @@ -47,4 +48,40 @@ def __init__(self): # begin portion that will need to adapt for the todo above full_loc = lang if lang in get_supported_locs() else \ get_full_lang_code(lang) - self[lang][full_loc] = LangConfig(lang) \ No newline at end of file + self[lang][full_loc] = LangConfig(lang) + + def get(self, setting=None, lang='global'): + if setting is None: + raise ValueError("lingua_franca.config.get() requires " + "a setting parameter!") + + if lang is None: + lang = get_default_loc() + + setting_available_in = [] + possible_locs = [] + + stop = False + while True: + if setting in self['global']: + setting_available_in.append('global') + if lang == 'global': + break + + if lang in get_supported_langs(): + possible_locs.append(self[lang]['universal']) + possible_locs.append(self[lang][get_full_lang_code(lang)]) + + if lang in get_supported_locs(): + possible_locs.append(self[get_primary_lang_code(lang)]['universal']) + possible_locs.append(self[get_primary_lang_code(lang)][lang]) + + for place in possible_locs: + if setting in place: + setting_available_in.append(place) + break + try: + return self[setting_available_in[-1]][setting] + except KeyError as e: + # TODO: lots of sanity checking before PR is ready + raise e \ No newline at end of file diff --git a/lingua_franca/internal.py b/lingua_franca/internal.py index 20bda682..200f3c10 100644 --- a/lingua_franca/internal.py +++ b/lingua_franca/internal.py @@ -456,7 +456,7 @@ def _call_localized_function(func, *args, **kwargs): from lingua_franca import config lang_code = None - load_langs_on_demand = config.load_langs_on_demand + load_langs_on_demand = config.get('load_langs_on_demand') unload_language_afterward = False func_signature = signature(func) func_params = list(func_signature.parameters) diff --git a/test/test_localizer.py b/test/test_localizer.py index 44efcff6..5cf84d6b 100644 --- a/test/test_localizer.py +++ b/test/test_localizer.py @@ -101,13 +101,13 @@ class TestLanguageLoading(unittest.TestCase): def test_load_on_demand(self): unload_all_languages() lingua_franca.load_language("en") - lingua_franca.config.load_langs_on_demand = True + lingua_franca.config.get('load_langs_on_demand') = True self.assertEqual(lingua_franca.parse.extract_number("one", lang="en"), 1) self.assertEqual(lingua_franca.parse.extract_number("uno", lang="es"), 1) - lingua_franca.config.load_langs_on_demand = False + lingua_franca.config.get('load_langs_on_demand') = False # English should still be loaded, but not Spanish self.assertEqual(lingua_franca.parse.extract_number("one", lang="en"), 1) From bd675f648f4bcef855f8627c611e917e4552482d Mon Sep 17 00:00:00 2001 From: ChanceNCounter Date: Thu, 1 Apr 2021 19:10:16 -0700 Subject: [PATCH 03/21] add mostly-working get/set --- lingua_franca/configuration.py | 60 +++++++++++++++++++++++++--------- test/test_localizer.py | 4 +-- 2 files changed, 46 insertions(+), 18 deletions(-) diff --git a/lingua_franca/configuration.py b/lingua_franca/configuration.py index 92128977..b405a0e9 100644 --- a/lingua_franca/configuration.py +++ b/lingua_franca/configuration.py @@ -23,10 +23,13 @@ def __init__(self, lang_code): resource_file = resolve_resource_file(f'text/{lang_code}/config.json') - with open(resource_file, 'r', encoding='utf-8') as i_file: - default_values = json.load(i_file) - for k in default_values: - self[k] = default_values[k] + try: + with open(resource_file, 'r', encoding='utf-8') as i_file: + default_values = json.load(i_file) + for k in default_values: + self[k] = default_values[k] + except (FileNotFoundError, TypeError): + self = {} class Config(dict): def __init__(self): @@ -50,13 +53,11 @@ def __init__(self): get_full_lang_code(lang) self[lang][full_loc] = LangConfig(lang) - def get(self, setting=None, lang='global'): + def _find_setting(self, setting=None, lang=None): if setting is None: - raise ValueError("lingua_franca.config.get() requires " + raise ValueError("lingua_franca.config requires " "a setting parameter!") - if lang is None: - lang = get_default_loc() setting_available_in = [] possible_locs = [] @@ -68,20 +69,47 @@ def get(self, setting=None, lang='global'): if lang == 'global': break + lang = lang or get_default_loc() + if lang in get_supported_langs(): - possible_locs.append(self[lang]['universal']) - possible_locs.append(self[lang][get_full_lang_code(lang)]) + possible_locs.append((lang, 'universal')) + possible_locs.append((lang, get_full_lang_code(lang))) if lang in get_supported_locs(): - possible_locs.append(self[get_primary_lang_code(lang)]['universal']) - possible_locs.append(self[get_primary_lang_code(lang)][lang]) - + possible_locs.append((get_primary_lang_code(lang), 'universal')) + possible_locs.append((get_primary_lang_code(lang), lang)) + for place in possible_locs: - if setting in place: + if setting in self[place[0]][place[1]]: setting_available_in.append(place) + break + return setting_available_in[-1] + + def get(self, setting=None, lang=None): + if lang != 'global': + if all((lang, + get_primary_lang_code(lang) not in get_active_langs())): + raise ModuleNotFoundError(f"{lang} is not currently loaded") + try: - return self[setting_available_in[-1]][setting] + setting_location = self._find_setting(setting, lang) + if setting_location == 'global': + return self['global'][setting] + return self[setting_location[0]][setting_location[1]][setting] + except KeyError as e: # TODO: lots of sanity checking before PR is ready - raise e \ No newline at end of file + raise e + + def set(self, setting=None, value=None, lang='global'): + if lang == 'global': + self['global'][setting] = value + return + + setting_location = self._find_setting(setting, lang) + if setting_location != 'global': + self[setting_location[0]][setting_location[1]][setting] = value + return + + raise KeyError(f"{setting} is not available as a setting for language: '{lang}'") \ No newline at end of file diff --git a/test/test_localizer.py b/test/test_localizer.py index 5cf84d6b..535e7af5 100644 --- a/test/test_localizer.py +++ b/test/test_localizer.py @@ -101,13 +101,13 @@ class TestLanguageLoading(unittest.TestCase): def test_load_on_demand(self): unload_all_languages() lingua_franca.load_language("en") - lingua_franca.config.get('load_langs_on_demand') = True + lingua_franca.config['global']['load_langs_on_demand'] = True self.assertEqual(lingua_franca.parse.extract_number("one", lang="en"), 1) self.assertEqual(lingua_franca.parse.extract_number("uno", lang="es"), 1) - lingua_franca.config.get('load_langs_on_demand') = False + lingua_franca.config['global']['load_langs_on_demand'] = False # English should still be loaded, but not Spanish self.assertEqual(lingua_franca.parse.extract_number("one", lang="en"), 1) From 6602a0f8695d18b189fce58084858689e5bab4f5 Mon Sep 17 00:00:00 2001 From: ChanceNCounter Date: Thu, 1 Apr 2021 20:15:23 -0700 Subject: [PATCH 04/21] load config when language is loaded --- lingua_franca/configuration.py | 20 +++++++++++++++----- lingua_franca/internal.py | 8 ++++++++ test/test_localizer.py | 7 ++++--- 3 files changed, 27 insertions(+), 8 deletions(-) diff --git a/lingua_franca/configuration.py b/lingua_franca/configuration.py index b405a0e9..b314590e 100644 --- a/lingua_franca/configuration.py +++ b/lingua_franca/configuration.py @@ -45,14 +45,24 @@ def __init__(self): and so forth. ''' - if all((lang not in self.keys(), lang not in get_supported_locs())): - self[lang] = {} - self[lang]['universal'] = LangConfig(lang) - # begin portion that will need to adapt for the todo above + # if all((lang not in self.keys(), lang not in get_supported_locs())): + # self[lang] = {} + # self[lang]['universal'] = LangConfig(lang) + # # begin portion that will need to adapt for the todo above + # full_loc = lang if lang in get_supported_locs() else \ + # get_full_lang_code(lang) + # self[lang][full_loc] = LangConfig(lang) + load_lang(lang) + + def load_lang(self, lang): + if all((lang not in self.keys(), lang not in get_supported_locs())): + self[lang] = {} + self[lang]['universal'] = LangConfig(lang) full_loc = lang if lang in get_supported_locs() else \ get_full_lang_code(lang) self[lang][full_loc] = LangConfig(lang) - + + def _find_setting(self, setting=None, lang=None): if setting is None: raise ValueError("lingua_franca.config requires " diff --git a/lingua_franca/internal.py b/lingua_franca/internal.py index 200f3c10..957fc4b8 100644 --- a/lingua_franca/internal.py +++ b/lingua_franca/internal.py @@ -177,17 +177,25 @@ def load_language(lang): whether 'primary' or 'full') Case-insensitive. """ + from lingua_franca.configuration import LangConfig + from lingua_franca import config if not isinstance(lang, str): raise TypeError("lingua_franca.load_language expects 'str' " "(got " + type(lang) + ")") + loc = None if lang not in _SUPPORTED_LANGUAGES: if lang in _SUPPORTED_FULL_LOCALIZATIONS: + loc = lang lang = get_primary_lang_code(lang) if lang not in __loaded_langs: __loaded_langs.append(lang) if not __default_lang: set_default_lang(lang) _set_active_langs(__loaded_langs) + if lang not in config.keys(): + config.load_lang(lang) + if all((loc, loc not in config[lang].keys())): + config.load_lang(loc) def load_languages(langs): diff --git a/test/test_localizer.py b/test/test_localizer.py index 535e7af5..084d5154 100644 --- a/test/test_localizer.py +++ b/test/test_localizer.py @@ -101,13 +101,14 @@ class TestLanguageLoading(unittest.TestCase): def test_load_on_demand(self): unload_all_languages() lingua_franca.load_language("en") - lingua_franca.config['global']['load_langs_on_demand'] = True + # lingua_franca.config['global']['load_langs_on_demand'] = True + lingua_franca.config.set(setting='load_langs_on_demand', value=True) self.assertEqual(lingua_franca.parse.extract_number("one", lang="en"), 1) self.assertEqual(lingua_franca.parse.extract_number("uno", lang="es"), 1) - - lingua_franca.config['global']['load_langs_on_demand'] = False + lingua_franca.config.set(setting='load_langs_on_demand', value=False) + # lingua_franca.config['global']['load_langs_on_demand'] = False # English should still be loaded, but not Spanish self.assertEqual(lingua_franca.parse.extract_number("one", lang="en"), 1) From f97df73b29360d2ff33a81474b103b150b994cd4 Mon Sep 17 00:00:00 2001 From: ChanceNCounter Date: Fri, 2 Apr 2021 15:31:45 -0700 Subject: [PATCH 05/21] working partial impl (still not hooked up) --- lingua_franca/configuration.py | 29 +++++++++++++------- lingua_franca/internal.py | 49 ++++++++++++++++++++++++++++------ test/test_localizer.py | 2 -- 3 files changed, 61 insertions(+), 19 deletions(-) diff --git a/lingua_franca/configuration.py b/lingua_franca/configuration.py index b314590e..1990b355 100644 --- a/lingua_franca/configuration.py +++ b/lingua_franca/configuration.py @@ -52,15 +52,21 @@ def __init__(self): # full_loc = lang if lang in get_supported_locs() else \ # get_full_lang_code(lang) # self[lang][full_loc] = LangConfig(lang) - load_lang(lang) + self.load_lang(lang) def load_lang(self, lang): - if all((lang not in self.keys(), lang not in get_supported_locs())): - self[lang] = {} - self[lang]['universal'] = LangConfig(lang) - full_loc = lang if lang in get_supported_locs() else \ - get_full_lang_code(lang) - self[lang][full_loc] = LangConfig(lang) + if lang not in get_supported_locs(): + # if all((lang not in self.keys(), lang not in get_supported_locs())): + if lang not in self.keys(): + self[lang] = {} + self[lang]['universal'] = LangConfig(lang) + + full_loc = get_full_lang_code(lang) + else: + full_loc = lang + lang = get_primary_lang_code(lang) + + self[lang][full_loc] = LangConfig(full_loc) def _find_setting(self, setting=None, lang=None): @@ -86,8 +92,12 @@ def _find_setting(self, setting=None, lang=None): possible_locs.append((lang, get_full_lang_code(lang))) if lang in get_supported_locs(): - possible_locs.append((get_primary_lang_code(lang), 'universal')) - possible_locs.append((get_primary_lang_code(lang), lang)) + primary_lang_code = get_primary_lang_code(lang) + possible_locs.append((primary_lang_code, 'universal')) + possible_locs.append((primary_lang_code, get_default_loc(primary_lang_code))) + possible_locs.append((primary_lang_code, lang)) + + for place in possible_locs: if setting in self[place[0]][place[1]]: @@ -104,6 +114,7 @@ def get(self, setting=None, lang=None): try: setting_location = self._find_setting(setting, lang) + if setting_location == 'global': return self['global'][setting] return self[setting_location[0]][setting_location[1]][setting] diff --git a/lingua_franca/internal.py b/lingua_franca/internal.py index 957fc4b8..31799bab 100644 --- a/lingua_franca/internal.py +++ b/lingua_franca/internal.py @@ -29,10 +29,11 @@ 'sl': 'sl-si', 'sv': 'sv-se', 'tr': 'tr-tr'} - +__default_full_codes = dict(_DEFAULT_FULL_LANG_CODES) __default_lang = None __active_lang_code = None __loaded_langs = [] +__loaded_locs = [] _localized_functions = {} @@ -177,7 +178,6 @@ def load_language(lang): whether 'primary' or 'full') Case-insensitive. """ - from lingua_franca.configuration import LangConfig from lingua_franca import config if not isinstance(lang, str): raise TypeError("lingua_franca.load_language expects 'str' " @@ -186,7 +186,9 @@ def load_language(lang): if lang not in _SUPPORTED_LANGUAGES: if lang in _SUPPORTED_FULL_LOCALIZATIONS: loc = lang + lang = get_primary_lang_code(lang) + if lang not in __loaded_langs: __loaded_langs.append(lang) if not __default_lang: @@ -194,8 +196,15 @@ def load_language(lang): _set_active_langs(__loaded_langs) if lang not in config.keys(): config.load_lang(lang) + default_loc = get_default_loc(lang) + if default_loc not in __loaded_locs: + __loaded_locs.append(default_loc) + if all((loc, loc != default_loc)): + + set_default_loc(lang, loc) if all((loc, loc not in config[lang].keys())): config.load_lang(loc) + __loaded_locs.append(loc) def load_languages(langs): @@ -252,7 +261,7 @@ def get_default_lang(): return __default_lang -def get_default_loc(): +def get_default_loc(lang=None): """ Return the current, localized BCP-47 language code, such as 'en-US' or 'es-ES'. For the default language *family* - which is passed to most parsers and formatters - call `get_default_lang` @@ -260,8 +269,12 @@ def get_default_loc(): The 'localized' portion conforms to ISO 3166-1 alpha-2 https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2 """ - return __active_lang_code - + if not lang: + return __active_lang_code + elif lang.lower() not in _SUPPORTED_LANGUAGES: + + raise UnsupportedLanguageError(lang) + return __default_full_codes[lang.lower()] def set_default_lang(lang_code): """ Set the active BCP-47 language code to be used in formatting/parsing @@ -298,9 +311,29 @@ def set_default_lang(lang_code): else: __active_lang_code = get_full_lang_code(__default_lang) -# TODO remove this when invalid lang codes are removed (currently deprecated) +def set_default_loc(lang: str=None, loc: str=None): + if not loc: + raise ValueError("set_default_loc expects a BCP-47 lang code") + if not lang: + lang = get_default_lang() + lang = lang.lower() + loc = loc.lower() + if lang not in _SUPPORTED_LANGUAGES or \ + loc not in _SUPPORTED_FULL_LOCALIZATIONS: + raise UnsupportedLanguageError(f"{lang} - {loc}") + + if get_primary_lang_code(loc) != lang: + raise ValueError(f"Localization '{loc}'' does not correspond to " + "language '{lang}'") + + __default_full_codes[lang] = loc + if lang == get_default_lang(): + global __active_lang_code + __active_lang_code = loc + +# TODO remove this when invalid lang codes are removed (currently deprecated) def get_primary_lang_code(lang=''): if not lang: if lang is None: @@ -386,8 +419,8 @@ def __get_full_lang_code_deprecation_warning(lang=''): "got {}".format(type(lang))) if lang.lower() in _SUPPORTED_FULL_LOCALIZATIONS: return lang - elif lang in _DEFAULT_FULL_LANG_CODES: - return _DEFAULT_FULL_LANG_CODES[lang] + elif lang in __default_full_codes: + return __default_full_codes[lang] else: raise UnsupportedLanguageError(lang) diff --git a/test/test_localizer.py b/test/test_localizer.py index 084d5154..d7fb649e 100644 --- a/test/test_localizer.py +++ b/test/test_localizer.py @@ -101,14 +101,12 @@ class TestLanguageLoading(unittest.TestCase): def test_load_on_demand(self): unload_all_languages() lingua_franca.load_language("en") - # lingua_franca.config['global']['load_langs_on_demand'] = True lingua_franca.config.set(setting='load_langs_on_demand', value=True) self.assertEqual(lingua_franca.parse.extract_number("one", lang="en"), 1) self.assertEqual(lingua_franca.parse.extract_number("uno", lang="es"), 1) lingua_franca.config.set(setting='load_langs_on_demand', value=False) - # lingua_franca.config['global']['load_langs_on_demand'] = False # English should still be loaded, but not Spanish self.assertEqual(lingua_franca.parse.extract_number("one", lang="en"), 1) From 1cbcad8e20bdcdc6d78c06580c97218737c07543 Mon Sep 17 00:00:00 2001 From: ChanceNCounter Date: Fri, 2 Apr 2021 15:48:25 -0700 Subject: [PATCH 06/21] address failing tests, break different test --- lingua_franca/internal.py | 17 +++++++++++------ lingua_franca/res/text/en-au/config.json | 3 +++ 2 files changed, 14 insertions(+), 6 deletions(-) create mode 100644 lingua_franca/res/text/en-au/config.json diff --git a/lingua_franca/internal.py b/lingua_franca/internal.py index 31799bab..c8102e43 100644 --- a/lingua_franca/internal.py +++ b/lingua_franca/internal.py @@ -193,14 +193,14 @@ def load_language(lang): __loaded_langs.append(lang) if not __default_lang: set_default_lang(lang) - _set_active_langs(__loaded_langs) + else: + _set_active_langs(__loaded_langs) if lang not in config.keys(): config.load_lang(lang) default_loc = get_default_loc(lang) if default_loc not in __loaded_locs: __loaded_locs.append(default_loc) if all((loc, loc != default_loc)): - set_default_loc(lang, loc) if all((loc, loc not in config[lang].keys())): config.load_lang(loc) @@ -298,18 +298,23 @@ def set_default_lang(lang_code): else: __default_lang = primary_lang_code + if lang_code not in __loaded_langs and lang_code not in __loaded_locs: + load_language(lang_code) + # make sure the default language is loaded. # also make sure the default language is at the front. # position doesn't matter here, but it clarifies things while debugging. - if __default_lang in __loaded_langs: - __loaded_langs.remove(__default_lang) + if __default_lang not in __loaded_langs: + load_language(__default_lang) + + __loaded_langs.remove(__default_lang) __loaded_langs.insert(0, __default_lang) _refresh_function_dict() if is_supported_full_lang(lang_code): - __active_lang_code = lang_code + set_default_loc(get_primary_lang_code(lang_code), lang_code) else: - __active_lang_code = get_full_lang_code(__default_lang) + set_default_loc(lang_code, get_full_lang_code(lang_code)) def set_default_loc(lang: str=None, loc: str=None): if not loc: diff --git a/lingua_franca/res/text/en-au/config.json b/lingua_franca/res/text/en-au/config.json new file mode 100644 index 00000000..54086600 --- /dev/null +++ b/lingua_franca/res/text/en-au/config.json @@ -0,0 +1,3 @@ +{ + "short_scale": false +} \ No newline at end of file From dbb1caa05313d0feb93902b378b0e4e90fbbc2a8 Mon Sep 17 00:00:00 2001 From: ChanceNCounter Date: Fri, 2 Apr 2021 18:02:45 -0700 Subject: [PATCH 07/21] "finish" locale part, hook up to short_scale --- lingua_franca/configuration.py | 19 ++++----- lingua_franca/internal.py | 73 ++++++++++++++++++++++++++++++---- lingua_franca/parse.py | 9 +++-- 3 files changed, 81 insertions(+), 20 deletions(-) diff --git a/lingua_franca/configuration.py b/lingua_franca/configuration.py index 1990b355..9bfb07bd 100644 --- a/lingua_franca/configuration.py +++ b/lingua_franca/configuration.py @@ -55,8 +55,8 @@ def __init__(self): self.load_lang(lang) def load_lang(self, lang): - if lang not in get_supported_locs(): - # if all((lang not in self.keys(), lang not in get_supported_locs())): + if all((lang not in get_supported_locs(), \ + lang in get_supported_langs())): if lang not in self.keys(): self[lang] = {} self[lang]['universal'] = LangConfig(lang) @@ -65,9 +65,8 @@ def load_lang(self, lang): else: full_loc = lang lang = get_primary_lang_code(lang) - - self[lang][full_loc] = LangConfig(full_loc) + self[lang][full_loc] = LangConfig(full_loc) def _find_setting(self, setting=None, lang=None): if setting is None: @@ -104,8 +103,11 @@ def _find_setting(self, setting=None, lang=None): setting_available_in.append(place) break - return setting_available_in[-1] - + try: + return setting_available_in[-1] + except IndexError: + return None + def get(self, setting=None, lang=None): if lang != 'global': if all((lang, @@ -119,9 +121,8 @@ def get(self, setting=None, lang=None): return self['global'][setting] return self[setting_location[0]][setting_location[1]][setting] - except KeyError as e: - # TODO: lots of sanity checking before PR is ready - raise e + except TypeError: + return None def set(self, setting=None, value=None, lang='global'): if lang == 'global': diff --git a/lingua_franca/internal.py b/lingua_franca/internal.py index c8102e43..595e8e78 100644 --- a/lingua_franca/internal.py +++ b/lingua_franca/internal.py @@ -52,6 +52,10 @@ class UnsupportedLanguageError(NotImplementedError): class FunctionNotLocalizedError(NotImplementedError): pass +class ConfigVar(): + name: str + def __init__(self, name: str): + self.name = name NoneLangWarning = \ DeprecationWarning("Lingua Franca is dropping support" @@ -124,7 +128,7 @@ def _set_active_langs(langs=None, override_default=True): if not isinstance(langs, list): raise(TypeError("lingua_franca.internal._set_active_langs expects" " 'str' or 'list'")) - global __loaded_langs, __default_lang + global __loaded_langs, __default_lang, __active_lang_code __loaded_langs = list(dict.fromkeys(langs)) if __default_lang: if override_default or get_primary_lang_code(__default_lang) \ @@ -133,6 +137,12 @@ def _set_active_langs(langs=None, override_default=True): set_default_lang(get_full_lang_code(__loaded_langs[0])) else: __default_lang = None + + if get_primary_lang_code(__active_lang_code) != __default_lang: + __cur_default = __default_full_codes[__default_lang] + __active_lang_code = __cur_default if __cur_default in __loaded_locs \ + else _DEFAULT_FULL_LANG_CODES[__default_lang] + _refresh_function_dict() @@ -229,9 +239,35 @@ def unload_language(lang): Args: lang (str): language code to unload """ + from lingua_franca import config + + lang = lang.lower() + # if passed full lang code, unload that locale + if lang in __loaded_locs: + loc = lang + lang = get_primary_lang_code(loc) + + __loaded_locs.remove(loc) + config[lang].pop(loc) + + only_remaining_loc_of_this_lang = True + for _loc in __loaded_locs: + if get_primary_lang_code(_loc) == lang: + only_remaining_loc_of_this_lang = False + set_default_loc(lang, _loc) + + if not only_remaining_loc_of_this_lang: + return + else: + locales = [_loc for _loc in __loaded_locs \ + if get_primary_lang_code(_loc) == lang] + for _loc in locales: + unload_language(_loc) + # unload the whole language if lang in __loaded_langs: __loaded_langs.remove(lang) _set_active_langs(__loaded_langs) + config.pop(lang) def unload_languages(langs): @@ -242,8 +278,7 @@ def unload_languages(langs): langs (list[str]) """ for lang in langs: - __loaded_langs.remove(lang) - _set_active_langs(__loaded_langs) + unload_language(lang) def get_default_lang(): @@ -430,7 +465,7 @@ def __get_full_lang_code_deprecation_warning(lang=''): raise UnsupportedLanguageError(lang) -def localized_function(run_own_code_on=[type(None)]): +def localized_function(run_own_code_on=[type(None)], config_vars=[]): """ Decorator which finds localized functions, and calls them, from signatures defined in the top-level modules. See lingua_franca.format or .parse for @@ -469,7 +504,10 @@ def nice_number(number, lang='', speech=True, denominators=None): If this argument is omitted, the function itself will never be run. Calls to the wrapped function will be passed to the appropriate, localized function. - + config_vars(list(str), optional) + A list of variable names whose default values should be obtained + from lingua_franca.config, rather than specified in the top-level + function signature. """ # Make sure everything in run_own_code_on is an Error or None @@ -477,6 +515,10 @@ def nice_number(number, lang='', speech=True, denominators=None): ValueError("@localized_function(run_own_code_on=<>) expected an " "Error type, or a list of Error types. Instead, it " "received this value:\n" + str(run_own_code_on)) + NotStringsError = \ + ValueError("@localized_function(config_vars=<>) expected a string," + "or a list of strings. Instead, it received this value:\n" + f"{str(config_vars)}") # TODO deprecate these kwarg values 6-12 months after v0.3.0 releases def is_error_type(_type): @@ -494,6 +536,14 @@ def is_error_type(_type): if run_own_code_on != [None]: if not all((is_error_type(e) for e in run_own_code_on)): raise BadTypeError + if not isinstance(config_vars, list): + try: + config_vars = list(config_vars) + except TypeError: + raise NotStringsError + if config_vars != [None]: + if not all((isinstance(v, str) for v in config_vars)): + raise NotStringsError # Begin wrapper def localized_function_decorator(func): @@ -512,7 +562,7 @@ def _call_localized_function(func, *args, **kwargs): # Check if we're passing a lang as a kwarg if 'lang' in kwargs.keys(): lang_param = kwargs['lang'] - if lang_param == None: + if lang_param is None: warn(NoneLangWarning) lang_code = get_default_lang() else: @@ -521,7 +571,7 @@ def _call_localized_function(func, *args, **kwargs): # Check if we're passing a lang as a positional arg elif lang_param_index < len(args): lang_param = args[lang_param_index] - if lang_param == None: + if lang_param is None: warn(NoneLangWarning) lang_code = get_default_lang() elif lang_param in _SUPPORTED_LANGUAGES or \ @@ -569,6 +619,7 @@ def _call_localized_function(func, *args, **kwargs): else: full_lang_code = get_full_lang_code(lang_code) + # Here comes the ugly business. _module_name = func.__module__.split('.')[-1] _module = import_module(".lang." + _module_name + @@ -619,6 +670,14 @@ def _call_localized_function(func, *args, **kwargs): args = tuple(arg for arg in list(args) if arg not in (lang_code, full_lang_code)) + # Now let's substitute any values that are supposed to come from + # lingua_franca.config + for kwarg in loc_signature.parameters: + if all((kwarg not in kwargs, kwarg in config_vars)): + config_var = config.get(kwarg, full_lang_code) + if config_var is not None: + kwargs[kwarg] = config_var + # Now we call the function, ignoring any kwargs from the # wrapped function that aren't in the localized function. r_val = localized_func(*args, diff --git a/lingua_franca/parse.py b/lingua_franca/parse.py index ac7732a8..8d25b6b1 100644 --- a/lingua_franca/parse.py +++ b/lingua_franca/parse.py @@ -16,9 +16,10 @@ from difflib import SequenceMatcher from warnings import warn +from lingua_franca import config from lingua_franca.time import now_local from lingua_franca.internal import populate_localized_function_dict, \ - get_active_langs, get_full_lang_code, get_primary_lang_code, \ + get_active_langs, get_full_lang_code, get_primary_lang_code, ConfigVar, \ get_default_lang, localized_function, _raise_unsupported_language _REGISTERED_FUNCTIONS = ("extract_numbers", @@ -72,7 +73,7 @@ def match_one(query, choices): @localized_function() -def extract_numbers(text, short_scale=True, ordinals=False, lang=''): +def extract_numbers(text, short_scale: bool=ConfigVar('short_scale'), ordinals=False, lang=''): """ Takes in a string and extracts a list of numbers. @@ -89,8 +90,8 @@ def extract_numbers(text, short_scale=True, ordinals=False, lang=''): """ -@localized_function() -def extract_number(text, short_scale=True, ordinals=False, lang=''): +@localized_function(config_vars=['short_scale']) +def extract_number(text, short_scale: bool, ordinals=False, lang=''): """Takes in a string and extracts a number. Args: From 3e3d2e2c74e8bbf4682cd6011ea4492ccb57b41e Mon Sep 17 00:00:00 2001 From: ChanceNCounter Date: Fri, 2 Apr 2021 19:00:10 -0700 Subject: [PATCH 08/21] fix positional args (in case of heathens) --- lingua_franca/internal.py | 33 +++++++++++++++++++++------------ test/test_localizer.py | 5 +++-- test/test_parse.py | 4 ++-- 3 files changed, 26 insertions(+), 16 deletions(-) diff --git a/lingua_franca/internal.py b/lingua_franca/internal.py index 595e8e78..d6a98ae6 100644 --- a/lingua_franca/internal.py +++ b/lingua_franca/internal.py @@ -133,12 +133,14 @@ def _set_active_langs(langs=None, override_default=True): if __default_lang: if override_default or get_primary_lang_code(__default_lang) \ not in __loaded_langs: - if len(__loaded_langs): + if len(__loaded_langs) > 0: set_default_lang(get_full_lang_code(__loaded_langs[0])) else: __default_lang = None - if get_primary_lang_code(__active_lang_code) != __default_lang: + if not __default_lang: + __active_lang_code = None + elif get_primary_lang_code(__active_lang_code) != __default_lang: __cur_default = __default_full_codes[__default_lang] __active_lang_code = __cur_default if __cur_default in __loaded_locs \ else _DEFAULT_FULL_LANG_CODES[__default_lang] @@ -196,15 +198,14 @@ def load_language(lang): if lang not in _SUPPORTED_LANGUAGES: if lang in _SUPPORTED_FULL_LOCALIZATIONS: loc = lang - lang = get_primary_lang_code(lang) - + if lang not in __loaded_langs: __loaded_langs.append(lang) if not __default_lang: set_default_lang(lang) else: - _set_active_langs(__loaded_langs) + _set_active_langs(__loaded_langs, override_default=False) if lang not in config.keys(): config.load_lang(lang) default_loc = get_default_loc(lang) @@ -246,9 +247,10 @@ def unload_language(lang): if lang in __loaded_locs: loc = lang lang = get_primary_lang_code(loc) - + __loaded_locs.remove(loc) - config[lang].pop(loc) + if loc in config[lang]: + config[lang].pop(loc) only_remaining_loc_of_this_lang = True for _loc in __loaded_locs: @@ -333,7 +335,12 @@ def set_default_lang(lang_code): else: __default_lang = primary_lang_code - if lang_code not in __loaded_langs and lang_code not in __loaded_locs: + if primary_lang_code != lang_code: + if primary_lang_code not in __loaded_langs: + load_language(primary_lang_code) + if lang_code not in __loaded_locs: + load_language(lang_code) + else: load_language(lang_code) # make sure the default language is loaded. @@ -459,8 +466,8 @@ def __get_full_lang_code_deprecation_warning(lang=''): "got {}".format(type(lang))) if lang.lower() in _SUPPORTED_FULL_LOCALIZATIONS: return lang - elif lang in __default_full_codes: - return __default_full_codes[lang] + elif lang in _DEFAULT_FULL_LANG_CODES: + return _DEFAULT_FULL_LANG_CODES[lang] else: raise UnsupportedLanguageError(lang) @@ -669,11 +676,13 @@ def _call_localized_function(func, *args, **kwargs): del kwargs['lang'] args = tuple(arg for arg in list(args) if arg not in (lang_code, full_lang_code)) - # Now let's substitute any values that are supposed to come from # lingua_franca.config for kwarg in loc_signature.parameters: - if all((kwarg not in kwargs, kwarg in config_vars)): + if all((kwarg not in kwargs, + kwarg in config_vars, + len(args) < \ + list(loc_signature.parameters).index(kwarg) + 1)): config_var = config.get(kwarg, full_lang_code) if config_var is not None: kwargs[kwarg] = config_var diff --git a/test/test_localizer.py b/test/test_localizer.py index d7fb649e..9c97f9f9 100644 --- a/test/test_localizer.py +++ b/test/test_localizer.py @@ -6,7 +6,8 @@ import lingua_franca.parse import lingua_franca.format -from lingua_franca.internal import localized_function, _SUPPORTED_LANGUAGES +from lingua_franca.internal import localized_function, _SUPPORTED_LANGUAGES, \ + unload_languages def unload_all_languages(): @@ -14,7 +15,7 @@ def unload_all_languages(): your test util to run them in order. Sadly, spamming this function is easier and probably less onerous for most devs. """ - lingua_franca._set_active_langs([]) + unload_languages(_SUPPORTED_LANGUAGES) def setUpModule(): diff --git a/test/test_parse.py b/test/test_parse.py index bc309138..5f322893 100644 --- a/test/test_parse.py +++ b/test/test_parse.py @@ -29,8 +29,8 @@ def setUpModule(): # TODO spin off English tests - load_language('en') - set_default_lang('en') + # load_language('en-us') + set_default_lang('en-us') def tearDownModule(): From 73110edce8e81e872ed77ca6aded7ce21cf5eecb Mon Sep 17 00:00:00 2001 From: ChanceNCounter Date: Sat, 3 Apr 2021 12:43:36 -0700 Subject: [PATCH 09/21] move from localized_function() param to data type change initial implementation from @localized_function(config_vars=['short_scale', 'ordinals'] to @localized_function def extract_number(..., short_scale=ConfigVar, ordinals=ConfigVar) also, .gitignore my editor settings because it is driving me out of my mind --- .gitignore | 1 + lingua_franca/configuration.py | 3 +-- lingua_franca/internal.py | 21 +++------------------ lingua_franca/lang/parse_en.py | 22 +++++++++++----------- lingua_franca/parse.py | 16 +++++++++++----- lingua_franca/res/text/en-us/config.json | 4 +++- test/test_localizer.py | 2 -- 7 files changed, 30 insertions(+), 39 deletions(-) diff --git a/.gitignore b/.gitignore index 632a9568..38c7f46a 100644 --- a/.gitignore +++ b/.gitignore @@ -107,3 +107,4 @@ venv.bak/ .vscode/ vscode/ *.code-workspace +.vscode-* \ No newline at end of file diff --git a/lingua_franca/configuration.py b/lingua_franca/configuration.py index 9bfb07bd..89b62280 100644 --- a/lingua_franca/configuration.py +++ b/lingua_franca/configuration.py @@ -1,10 +1,9 @@ import json -from os import path from lingua_franca import get_active_langs, get_supported_locs, \ get_supported_langs, get_primary_lang_code, get_full_lang_code, \ get_default_loc -from lingua_franca.internal import UnsupportedLanguageError, resolve_resource_file +from lingua_franca.internal import resolve_resource_file default_global_values = \ { diff --git a/lingua_franca/internal.py b/lingua_franca/internal.py index d6a98ae6..207e533b 100644 --- a/lingua_franca/internal.py +++ b/lingua_franca/internal.py @@ -511,10 +511,6 @@ def nice_number(number, lang='', speech=True, denominators=None): If this argument is omitted, the function itself will never be run. Calls to the wrapped function will be passed to the appropriate, localized function. - config_vars(list(str), optional) - A list of variable names whose default values should be obtained - from lingua_franca.config, rather than specified in the top-level - function signature. """ # Make sure everything in run_own_code_on is an Error or None @@ -522,10 +518,6 @@ def nice_number(number, lang='', speech=True, denominators=None): ValueError("@localized_function(run_own_code_on=<>) expected an " "Error type, or a list of Error types. Instead, it " "received this value:\n" + str(run_own_code_on)) - NotStringsError = \ - ValueError("@localized_function(config_vars=<>) expected a string," - "or a list of strings. Instead, it received this value:\n" - f"{str(config_vars)}") # TODO deprecate these kwarg values 6-12 months after v0.3.0 releases def is_error_type(_type): @@ -543,14 +535,7 @@ def is_error_type(_type): if run_own_code_on != [None]: if not all((is_error_type(e) for e in run_own_code_on)): raise BadTypeError - if not isinstance(config_vars, list): - try: - config_vars = list(config_vars) - except TypeError: - raise NotStringsError - if config_vars != [None]: - if not all((isinstance(v, str) for v in config_vars)): - raise NotStringsError + # Begin wrapper def localized_function_decorator(func): @@ -679,8 +664,8 @@ def _call_localized_function(func, *args, **kwargs): # Now let's substitute any values that are supposed to come from # lingua_franca.config for kwarg in loc_signature.parameters: - if all((kwarg not in kwargs, - kwarg in config_vars, + if all((loc_signature.parameters[kwarg].default is ConfigVar, + kwarg not in kwargs, len(args) < \ list(loc_signature.parameters).index(kwarg) + 1)): config_var = config.get(kwarg, full_lang_code) diff --git a/lingua_franca/lang/parse_en.py b/lingua_franca/lang/parse_en.py index 4b0feb4c..b6bbaa22 100644 --- a/lingua_franca/lang/parse_en.py +++ b/lingua_franca/lang/parse_en.py @@ -28,10 +28,10 @@ import re import json -from lingua_franca.internal import resolve_resource_file +from lingua_franca.internal import resolve_resource_file, ConfigVar -def _convert_words_to_numbers_en(text, short_scale=True, ordinals=False): +def _convert_words_to_numbers_en(text, short_scale=ConfigVar, ordinals=ConfigVar): """ Convert words in a string into their equivalent numbers. Args: @@ -66,8 +66,8 @@ def _convert_words_to_numbers_en(text, short_scale=True, ordinals=False): return ' '.join(results) -def _extract_numbers_with_text_en(tokens, short_scale=True, - ordinals=False, fractional_numbers=True): +def _extract_numbers_with_text_en(tokens, short_scale=ConfigVar, + ordinals=ConfigVar, fractional_numbers=True): """ Extract all numbers from a list of Tokens, with the words that represent them. @@ -108,8 +108,8 @@ def _extract_numbers_with_text_en(tokens, short_scale=True, return results -def _extract_number_with_text_en(tokens, short_scale=True, - ordinals=False, fractional_numbers=True): +def _extract_number_with_text_en(tokens, short_scale=ConfigVar, + ordinals=ConfigVar, fractional_numbers=True): """ This function extracts a number from a list of Tokens. @@ -132,7 +132,7 @@ def _extract_number_with_text_en(tokens, short_scale=True, def _extract_number_with_text_en_helper(tokens, - short_scale=True, ordinals=False, + short_scale=ConfigVar, ordinals=ConfigVar, fractional_numbers=True): """ Helper for _extract_number_with_text_en. @@ -529,7 +529,7 @@ def _initialize_number_data_en(short_scale, speech=True): return multiplies, string_num_ordinal_en, string_num_scale_en -def extract_number_en(text, short_scale=True, ordinals=False): +def extract_number_en(text, short_scale=ConfigVar, ordinals=ConfigVar): """ This function extracts a number from a text string, handles pronunciations in long scale and short scale @@ -1420,7 +1420,7 @@ def date_found(): return [extractedDate, resultStr] -def is_fractional_en(input_str, short_scale=True, spoken=True): +def is_fractional_en(input_str, short_scale=ConfigVar, spoken=True): """ This function takes the given text and checks if it is a fraction. @@ -1450,7 +1450,7 @@ def is_fractional_en(input_str, short_scale=True, spoken=True): return False -def extract_numbers_en(text, short_scale=True, ordinals=False): +def extract_numbers_en(text, short_scale=ConfigVar, ordinals=ConfigVar): """ Takes in a string and extracts a list of numbers. @@ -1477,6 +1477,6 @@ def numbers_to_digits(self, utterance): return _convert_words_to_numbers_en(utterance, ordinals=None) -def normalize_en(text, remove_articles=True): +def normalize_en(text, remove_articles=ConfigVar): """ English string normalization """ return EnglishNormalizer().normalize(text, remove_articles) diff --git a/lingua_franca/parse.py b/lingua_franca/parse.py index 8d25b6b1..2592e4a6 100644 --- a/lingua_franca/parse.py +++ b/lingua_franca/parse.py @@ -73,7 +73,10 @@ def match_one(query, choices): @localized_function() -def extract_numbers(text, short_scale: bool=ConfigVar('short_scale'), ordinals=False, lang=''): +def extract_numbers(text: str, + short_scale: bool=ConfigVar, + ordinals: bool=ConfigVar, + lang: str=''): """ Takes in a string and extracts a list of numbers. @@ -90,8 +93,11 @@ def extract_numbers(text, short_scale: bool=ConfigVar('short_scale'), ordinals=F """ -@localized_function(config_vars=['short_scale']) -def extract_number(text, short_scale: bool, ordinals=False, lang=''): +@localized_function() +def extract_number(text: str, + short_scale: bool=ConfigVar, + ordinals: bool=ConfigVar, + lang: str=''): """Takes in a string and extracts a number. Args: @@ -192,7 +198,7 @@ def extract_datetime(text, anchorDate=None, lang='', default_time=None): @localized_function() -def normalize(text, lang='', remove_articles=True): +def normalize(text, lang='', remove_articles=ConfigVar): """Prepare a string for parsing This function prepares the given text for parsing by making @@ -228,7 +234,7 @@ def get_gender(word, context="", lang=''): @localized_function() -def is_fractional(input_str, short_scale=True, lang=''): +def is_fractional(input_str, short_scale=ConfigVar, lang=''): """ This function takes the given text and checks if it is a fraction. Used by most of the number exractors. diff --git a/lingua_franca/res/text/en-us/config.json b/lingua_franca/res/text/en-us/config.json index 062916f4..77b94b17 100644 --- a/lingua_franca/res/text/en-us/config.json +++ b/lingua_franca/res/text/en-us/config.json @@ -1,3 +1,5 @@ { - "short_scale": true + "ordinals": false, + "short_scale": true, + "remove_articles": true } \ No newline at end of file diff --git a/test/test_localizer.py b/test/test_localizer.py index 9c97f9f9..ed2779c6 100644 --- a/test/test_localizer.py +++ b/test/test_localizer.py @@ -1,7 +1,5 @@ import unittest -from sys import version - import lingua_franca import lingua_franca.parse import lingua_franca.format From b0a488f7f7b87d0f03c309d3bb1bffc57fa875a4 Mon Sep 17 00:00:00 2001 From: ChanceNCounter Date: Sat, 3 Apr 2021 12:46:54 -0700 Subject: [PATCH 10/21] bump version to help downstreams test in prod =P DOUBLE CHECK CORRECTNESS BEFORE MERGING i got a peaceful, easy feeling... i know you won't let me down... --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 57253d24..da51a390 100644 --- a/setup.py +++ b/setup.py @@ -27,7 +27,7 @@ def required(requirements_file): setup( name='lingua_franca', - version='0.3.1', + version='0.4.0a1', packages=['lingua_franca', 'lingua_franca.lang'], url='https://github.com/MycroftAI/lingua-franca', license='Apache2.0', From 9a88d12a846a4ae6467a0ac79518df792a6f71ec Mon Sep 17 00:00:00 2001 From: ChanceNCounter Date: Sat, 3 Apr 2021 15:01:16 -0700 Subject: [PATCH 11/21] impr. setting find-pick, begin proper en integrat. also contains a partially-functional fix for problem in nice_number when the param `denominators` is not iterable. this was intended to help integrate that param with config. if it isn't working by the time this feature branch is finished, strip it out as OOS. --- lingua_franca/configuration.py | 10 ++++--- lingua_franca/format.py | 33 +++++++++++++++++++----- lingua_franca/lang/format_en.py | 6 +++-- lingua_franca/lang/parse_en.py | 12 ++++----- lingua_franca/res/text/en-us/config.json | 5 ++-- 5 files changed, 45 insertions(+), 21 deletions(-) diff --git a/lingua_franca/configuration.py b/lingua_franca/configuration.py index 89b62280..8c4a3efe 100644 --- a/lingua_franca/configuration.py +++ b/lingua_franca/configuration.py @@ -125,11 +125,13 @@ def get(self, setting=None, lang=None): def set(self, setting=None, value=None, lang='global'): if lang == 'global': - self['global'][setting] = value - return + if setting in self['global']: + self['global'][setting] = value + return - setting_location = self._find_setting(setting, lang) - if setting_location != 'global': + setting_location = self._find_setting(setting, lang if lang != \ + 'global' else get_default_loc()) + if all((setting_location, setting_location != 'global')): self[setting_location[0]][setting_location[1]][setting] = value return diff --git a/lingua_franca/format.py b/lingua_franca/format.py index d4f195c4..bee48975 100755 --- a/lingua_franca/format.py +++ b/lingua_franca/format.py @@ -21,6 +21,7 @@ from warnings import warn from os.path import join +from lingua_franca import config from lingua_franca.bracket_expansion import SentenceTreeParser from lingua_franca.internal import localized_function, \ @@ -28,7 +29,7 @@ get_full_lang_code, get_default_lang, get_default_loc, \ is_supported_full_lang, _raise_unsupported_language, \ UnsupportedLanguageError, NoneLangWarning, InvalidLangWarning, \ - FunctionNotLocalizedError + FunctionNotLocalizedError, ConfigVar _REGISTERED_FUNCTIONS = ("nice_number", @@ -241,7 +242,7 @@ def year_format(self, dt, lang, bc): 'res/text')) -@localized_function(run_own_code_on=[UnsupportedLanguageError]) +@localized_function(run_own_code_on=[UnsupportedLanguageError, TypeError]) def nice_number(number, lang='', speech=True, denominators=None): """Format a float to human readable functions @@ -255,12 +256,24 @@ def nice_number(number, lang='', speech=True, denominators=None): Returns: (str): The formatted string. """ + args = locals() + if denominators: + try: + denominators.__iter__ + except (AttributeError, TypeError): + print("substituting") + try: + args[denominators] = range(*denominators) + except TypeError: + raise ValueError("nice_number(denominators) must be " + "iterable, or a valid param for range()") + nice_number(**args) return str(number) @localized_function() -def nice_time(dt, lang='', speech=True, use_24hour=False, - use_ampm=False, variant=None): +def nice_time(dt, lang='', speech=True, use_24hour=ConfigVar, + use_ampm=ConfigVar, variant=ConfigVar): """ Format a time to a comfortable human format @@ -281,7 +294,7 @@ def nice_time(dt, lang='', speech=True, use_24hour=False, @localized_function() -def pronounce_number(number, lang='', places=2, short_scale=True, +def pronounce_number(number, lang='', places=2, short_scale=ConfigVar, scientific=False, ordinals=False): """ Convert a number to it's spoken equivalent @@ -321,8 +334,8 @@ def nice_date(dt, lang='', now=None): return date_time_format.date_format(dt, full_code, now) -def nice_date_time(dt, lang='', now=None, use_24hour=False, - use_ampm=False): +def nice_date_time(dt, lang='', now=None, use_24hour=ConfigVar, + use_ampm=ConfigVar): """ Format a datetime to a pronounceable date and time @@ -343,9 +356,15 @@ def nice_date_time(dt, lang='', now=None, use_24hour=False, (str): The formatted date time string """ + full_code = get_full_lang_code(lang) date_time_format.cache(full_code) + if use_24hour is ConfigVar: + use_24hour = config.get(setting='use_24hour', lang=full_code) + if use_ampm is ConfigVar: + use_ampm = config.get(setting='use_ampm', lang=full_code) + return date_time_format.date_time_format(dt, full_code, now, use_24hour, use_ampm) diff --git a/lingua_franca/lang/format_en.py b/lingua_franca/lang/format_en.py index c2911fe8..52b49839 100644 --- a/lingua_franca/lang/format_en.py +++ b/lingua_franca/lang/format_en.py @@ -14,12 +14,14 @@ # See the License for the specific language governing permissions and # limitations under the License. # +from lingua_franca.internal import ConfigVar from lingua_franca.lang.format_common import convert_to_mixed_fraction from lingua_franca.lang.common_data_en import _NUM_STRING_EN, \ _FRACTION_STRING_EN, _LONG_SCALE_EN, _SHORT_SCALE_EN, _SHORT_ORDINAL_EN, _LONG_ORDINAL_EN + def nice_number_en(number, speech=True, denominators=range(1, 21)): """ English helper for nice_number @@ -65,7 +67,7 @@ def nice_number_en(number, speech=True, denominators=range(1, 21)): return return_string -def pronounce_number_en(number, places=2, short_scale=True, scientific=False, +def pronounce_number_en(number, places=2, short_scale=ConfigVar, scientific=False, ordinals=False): """ Convert a number to it's spoken equivalent @@ -302,7 +304,7 @@ def _long_scale(n): return result -def nice_time_en(dt, speech=True, use_24hour=False, use_ampm=False): +def nice_time_en(dt, speech=True, use_24hour=ConfigVar, use_ampm=ConfigVar): """ Format a time to a comfortable human format For example, generate 'five thirty' for speech or '5:30' for diff --git a/lingua_franca/lang/parse_en.py b/lingua_franca/lang/parse_en.py index b6bbaa22..ed15baa3 100644 --- a/lingua_franca/lang/parse_en.py +++ b/lingua_franca/lang/parse_en.py @@ -31,7 +31,7 @@ from lingua_franca.internal import resolve_resource_file, ConfigVar -def _convert_words_to_numbers_en(text, short_scale=ConfigVar, ordinals=ConfigVar): +def _convert_words_to_numbers_en(text, short_scale=ConfigVar, ordinals=False): """ Convert words in a string into their equivalent numbers. Args: @@ -67,7 +67,7 @@ def _convert_words_to_numbers_en(text, short_scale=ConfigVar, ordinals=ConfigVar def _extract_numbers_with_text_en(tokens, short_scale=ConfigVar, - ordinals=ConfigVar, fractional_numbers=True): + ordinals=False, fractional_numbers=True): """ Extract all numbers from a list of Tokens, with the words that represent them. @@ -109,7 +109,7 @@ def _extract_numbers_with_text_en(tokens, short_scale=ConfigVar, def _extract_number_with_text_en(tokens, short_scale=ConfigVar, - ordinals=ConfigVar, fractional_numbers=True): + ordinals=False, fractional_numbers=True): """ This function extracts a number from a list of Tokens. @@ -132,7 +132,7 @@ def _extract_number_with_text_en(tokens, short_scale=ConfigVar, def _extract_number_with_text_en_helper(tokens, - short_scale=ConfigVar, ordinals=ConfigVar, + short_scale=ConfigVar, ordinals=False, fractional_numbers=True): """ Helper for _extract_number_with_text_en. @@ -529,7 +529,7 @@ def _initialize_number_data_en(short_scale, speech=True): return multiplies, string_num_ordinal_en, string_num_scale_en -def extract_number_en(text, short_scale=ConfigVar, ordinals=ConfigVar): +def extract_number_en(text, short_scale=ConfigVar, ordinals=False): """ This function extracts a number from a text string, handles pronunciations in long scale and short scale @@ -1450,7 +1450,7 @@ def is_fractional_en(input_str, short_scale=ConfigVar, spoken=True): return False -def extract_numbers_en(text, short_scale=ConfigVar, ordinals=ConfigVar): +def extract_numbers_en(text, short_scale=ConfigVar, ordinals=False): """ Takes in a string and extracts a list of numbers. diff --git a/lingua_franca/res/text/en-us/config.json b/lingua_franca/res/text/en-us/config.json index 77b94b17..e204bcf7 100644 --- a/lingua_franca/res/text/en-us/config.json +++ b/lingua_franca/res/text/en-us/config.json @@ -1,5 +1,6 @@ { - "ordinals": false, "short_scale": true, - "remove_articles": true + "remove_articles": true, + "use_24hour": false, + "use_ampm": false } \ No newline at end of file From 69ee1c0ea5039c14438d29fce74c7c6eaa2e4bc4 Mon Sep 17 00:00:00 2001 From: ChanceNCounter Date: Sat, 3 Apr 2021 15:23:30 -0700 Subject: [PATCH 12/21] cleanup pass --- .pylintrc | 3 ++ lingua_franca/__init__.py | 4 +-- lingua_franca/bracket_expansion.py | 2 +- lingua_franca/configuration.py | 52 +++++++++++------------------- lingua_franca/format.py | 1 - lingua_franca/internal.py | 36 ++++++++++++--------- lingua_franca/parse.py | 21 +++++------- 7 files changed, 52 insertions(+), 67 deletions(-) create mode 100644 .pylintrc diff --git a/.pylintrc b/.pylintrc new file mode 100644 index 00000000..d8bd1ec5 --- /dev/null +++ b/.pylintrc @@ -0,0 +1,3 @@ +[MESSAGES CONTROL] + +disable=C0103,C0114,C0115,C0116,W0613 \ No newline at end of file diff --git a/lingua_franca/__init__.py b/lingua_franca/__init__.py index 0af7bc8e..acfaf302 100644 --- a/lingua_franca/__init__.py +++ b/lingua_franca/__init__.py @@ -1,7 +1,7 @@ ### DO NOT CHANGE THIS IMPORT ORDER ### from .internal import get_active_langs, get_supported_locs, \ - get_full_lang_code, get_supported_langs, get_default_loc, \ - get_primary_lang_code + get_full_lang_code, get_supported_langs, get_default_loc, \ + get_primary_lang_code from .configuration import Config diff --git a/lingua_franca/bracket_expansion.py b/lingua_franca/bracket_expansion.py index 2998e7af..2205dbef 100644 --- a/lingua_franca/bracket_expansion.py +++ b/lingua_franca/bracket_expansion.py @@ -172,4 +172,4 @@ def _expand_tree(self, tree): def expand_parentheses(self): tree = self._parse() - return self._expand_tree(tree) \ No newline at end of file + return self._expand_tree(tree) diff --git a/lingua_franca/configuration.py b/lingua_franca/configuration.py index 8c4a3efe..f6bd60ac 100644 --- a/lingua_franca/configuration.py +++ b/lingua_franca/configuration.py @@ -1,8 +1,8 @@ import json from lingua_franca import get_active_langs, get_supported_locs, \ - get_supported_langs, get_primary_lang_code, get_full_lang_code, \ - get_default_loc + get_supported_langs, get_primary_lang_code, get_full_lang_code, \ + get_default_loc from lingua_franca.internal import resolve_resource_file default_global_values = \ @@ -10,8 +10,10 @@ 'load_langs_on_demand': False } + class LangConfig(dict): def __init__(self, lang_code): + super().__init__() if lang_code not in get_supported_locs(): # DO NOT catch UnsupportedLanguageError! # If this fails, we want to crash. This can *only* result from @@ -20,7 +22,6 @@ def __init__(self, lang_code): # to continue. lang_code = get_full_lang_code(lang_code) - resource_file = resolve_resource_file(f'text/{lang_code}/config.json') try: with open(resource_file, 'r', encoding='utf-8') as i_file: @@ -28,34 +29,19 @@ def __init__(self, lang_code): for k in default_values: self[k] = default_values[k] except (FileNotFoundError, TypeError): - self = {} + pass + class Config(dict): def __init__(self): + super().__init__() self['global'] = dict(default_global_values) for lang in get_active_langs(): - ''' - TODO proper full loc support here will handle languages similarly to global: - - self['en']['universal'] for 'default' English config - (all dialects if not overridden) - self['en']['en-us'] for overrides specific to en-US - self['en']['en-au'] for overrides specific to en-AU - - and so forth. - ''' - # if all((lang not in self.keys(), lang not in get_supported_locs())): - # self[lang] = {} - # self[lang]['universal'] = LangConfig(lang) - # # begin portion that will need to adapt for the todo above - # full_loc = lang if lang in get_supported_locs() else \ - # get_full_lang_code(lang) - # self[lang][full_loc] = LangConfig(lang) self.load_lang(lang) - + def load_lang(self, lang): - if all((lang not in get_supported_locs(), \ - lang in get_supported_langs())): + if all((lang not in get_supported_locs(), + lang in get_supported_langs())): if lang not in self.keys(): self[lang] = {} self[lang]['universal'] = LangConfig(lang) @@ -71,12 +57,10 @@ def _find_setting(self, setting=None, lang=None): if setting is None: raise ValueError("lingua_franca.config requires " "a setting parameter!") - - + setting_available_in = [] possible_locs = [] - stop = False while True: if setting in self['global']: setting_available_in.append('global') @@ -92,10 +76,9 @@ def _find_setting(self, setting=None, lang=None): if lang in get_supported_locs(): primary_lang_code = get_primary_lang_code(lang) possible_locs.append((primary_lang_code, 'universal')) - possible_locs.append((primary_lang_code, get_default_loc(primary_lang_code))) + possible_locs.append( + (primary_lang_code, get_default_loc(primary_lang_code))) possible_locs.append((primary_lang_code, lang)) - - for place in possible_locs: if setting in self[place[0]][place[1]]: @@ -110,7 +93,7 @@ def _find_setting(self, setting=None, lang=None): def get(self, setting=None, lang=None): if lang != 'global': if all((lang, - get_primary_lang_code(lang) not in get_active_langs())): + get_primary_lang_code(lang) not in get_active_langs())): raise ModuleNotFoundError(f"{lang} is not currently loaded") try: @@ -129,10 +112,11 @@ def set(self, setting=None, value=None, lang='global'): self['global'][setting] = value return - setting_location = self._find_setting(setting, lang if lang != \ + setting_location = self._find_setting(setting, lang if lang != 'global' else get_default_loc()) if all((setting_location, setting_location != 'global')): self[setting_location[0]][setting_location[1]][setting] = value return - - raise KeyError(f"{setting} is not available as a setting for language: '{lang}'") \ No newline at end of file + + raise KeyError( + f"{setting} is not available as a setting for language: '{lang}'") diff --git a/lingua_franca/format.py b/lingua_franca/format.py index bee48975..8f4c6316 100755 --- a/lingua_franca/format.py +++ b/lingua_franca/format.py @@ -356,7 +356,6 @@ def nice_date_time(dt, lang='', now=None, use_24hour=ConfigVar, (str): The formatted date time string """ - full_code = get_full_lang_code(lang) date_time_format.cache(full_code) diff --git a/lingua_franca/internal.py b/lingua_franca/internal.py index 207e533b..5c6e72e6 100644 --- a/lingua_franca/internal.py +++ b/lingua_franca/internal.py @@ -52,11 +52,14 @@ class UnsupportedLanguageError(NotImplementedError): class FunctionNotLocalizedError(NotImplementedError): pass + class ConfigVar(): name: str + def __init__(self, name: str): self.name = name + NoneLangWarning = \ DeprecationWarning("Lingua Franca is dropping support" " for 'lang=None' as an explicit" @@ -91,6 +94,7 @@ def get_supported_langs(): """ return _SUPPORTED_LANGUAGES + def get_supported_locs(): """ Returns: @@ -137,14 +141,14 @@ def _set_active_langs(langs=None, override_default=True): set_default_lang(get_full_lang_code(__loaded_langs[0])) else: __default_lang = None - + if not __default_lang: __active_lang_code = None elif get_primary_lang_code(__active_lang_code) != __default_lang: __cur_default = __default_full_codes[__default_lang] __active_lang_code = __cur_default if __cur_default in __loaded_locs \ else _DEFAULT_FULL_LANG_CODES[__default_lang] - + _refresh_function_dict() @@ -241,7 +245,7 @@ def unload_language(lang): lang (str): language code to unload """ from lingua_franca import config - + lang = lang.lower() # if passed full lang code, unload that locale if lang in __loaded_locs: @@ -261,8 +265,8 @@ def unload_language(lang): if not only_remaining_loc_of_this_lang: return else: - locales = [_loc for _loc in __loaded_locs \ - if get_primary_lang_code(_loc) == lang] + locales = [_loc for _loc in __loaded_locs + if get_primary_lang_code(_loc) == lang] for _loc in locales: unload_language(_loc) # unload the whole language @@ -309,10 +313,11 @@ def get_default_loc(lang=None): if not lang: return __active_lang_code elif lang.lower() not in _SUPPORTED_LANGUAGES: - + raise UnsupportedLanguageError(lang) return __default_full_codes[lang.lower()] + def set_default_lang(lang_code): """ Set the active BCP-47 language code to be used in formatting/parsing Will choose a default localization if passed a primary language family @@ -348,7 +353,7 @@ def set_default_lang(lang_code): # position doesn't matter here, but it clarifies things while debugging. if __default_lang not in __loaded_langs: load_language(__default_lang) - + __loaded_langs.remove(__default_lang) __loaded_langs.insert(0, __default_lang) _refresh_function_dict() @@ -358,7 +363,8 @@ def set_default_lang(lang_code): else: set_default_loc(lang_code, get_full_lang_code(lang_code)) -def set_default_loc(lang: str=None, loc: str=None): + +def set_default_loc(lang: str = None, loc: str = None): if not loc: raise ValueError("set_default_loc expects a BCP-47 lang code") if not lang: @@ -366,12 +372,12 @@ def set_default_loc(lang: str=None, loc: str=None): lang = lang.lower() loc = loc.lower() if lang not in _SUPPORTED_LANGUAGES or \ - loc not in _SUPPORTED_FULL_LOCALIZATIONS: - raise UnsupportedLanguageError(f"{lang} - {loc}") + loc not in _SUPPORTED_FULL_LOCALIZATIONS: + raise UnsupportedLanguageError(f"{lang} - {loc}") if get_primary_lang_code(loc) != lang: raise ValueError(f"Localization '{loc}'' does not correspond to " - "language '{lang}'") + "language '{lang}'") __default_full_codes[lang] = loc if lang == get_default_lang(): @@ -379,7 +385,6 @@ def set_default_loc(lang: str=None, loc: str=None): __active_lang_code = loc - # TODO remove this when invalid lang codes are removed (currently deprecated) def get_primary_lang_code(lang=''): if not lang: @@ -536,8 +541,8 @@ def is_error_type(_type): if not all((is_error_type(e) for e in run_own_code_on)): raise BadTypeError - # Begin wrapper + def localized_function_decorator(func): # Wrapper's logic def _call_localized_function(func, *args, **kwargs): @@ -611,7 +616,6 @@ def _call_localized_function(func, *args, **kwargs): else: full_lang_code = get_full_lang_code(lang_code) - # Here comes the ugly business. _module_name = func.__module__.split('.')[-1] _module = import_module(".lang." + _module_name + @@ -666,8 +670,8 @@ def _call_localized_function(func, *args, **kwargs): for kwarg in loc_signature.parameters: if all((loc_signature.parameters[kwarg].default is ConfigVar, kwarg not in kwargs, - len(args) < \ - list(loc_signature.parameters).index(kwarg) + 1)): + len(args) < + list(loc_signature.parameters).index(kwarg) + 1)): config_var = config.get(kwarg, full_lang_code) if config_var is not None: kwargs[kwarg] = config_var diff --git a/lingua_franca/parse.py b/lingua_franca/parse.py index 2592e4a6..240c0adc 100644 --- a/lingua_franca/parse.py +++ b/lingua_franca/parse.py @@ -15,12 +15,8 @@ # from difflib import SequenceMatcher -from warnings import warn -from lingua_franca import config -from lingua_franca.time import now_local from lingua_franca.internal import populate_localized_function_dict, \ - get_active_langs, get_full_lang_code, get_primary_lang_code, ConfigVar, \ - get_default_lang, localized_function, _raise_unsupported_language + get_active_langs, ConfigVar, localized_function _REGISTERED_FUNCTIONS = ("extract_numbers", "extract_number", @@ -68,15 +64,14 @@ def match_one(query, choices): if isinstance(choices, dict): return (choices[best[0]], best[1]) - else: - return best + return best @localized_function() def extract_numbers(text: str, - short_scale: bool=ConfigVar, - ordinals: bool=ConfigVar, - lang: str=''): + short_scale: bool = ConfigVar, + ordinals: bool = ConfigVar, + lang: str = ''): """ Takes in a string and extracts a list of numbers. @@ -95,9 +90,9 @@ def extract_numbers(text: str, @localized_function() def extract_number(text: str, - short_scale: bool=ConfigVar, - ordinals: bool=ConfigVar, - lang: str=''): + short_scale: bool = ConfigVar, + ordinals: bool = ConfigVar, + lang: str = ''): """Takes in a string and extracts a number. Args: From 8cc728965943fc931db0034a896d30be3f780041 Mon Sep 17 00:00:00 2001 From: ChanceNCounter Date: Sat, 3 Apr 2021 20:09:47 -0700 Subject: [PATCH 13/21] begin integration, prep for variant params --- lingua_franca/configuration.py | 4 +-- lingua_franca/internal.py | 45 +++++++++++++++--------- lingua_franca/lang/format_ca.py | 2 +- lingua_franca/lang/format_cs.py | 4 ++- lingua_franca/lang/format_da.py | 3 +- lingua_franca/lang/format_de.py | 3 +- lingua_franca/lang/format_es.py | 2 ++ lingua_franca/lang/format_fr.py | 2 ++ lingua_franca/lang/format_hu.py | 3 +- lingua_franca/lang/format_it.py | 3 +- lingua_franca/lang/format_nl.py | 3 +- lingua_franca/lang/format_pl.py | 4 +-- lingua_franca/lang/format_pt.py | 1 + lingua_franca/lang/format_sl.py | 3 +- lingua_franca/lang/format_sv.py | 3 +- lingua_franca/lang/parse_ca.py | 6 ++-- lingua_franca/lang/parse_cs.py | 15 ++++---- lingua_franca/lang/parse_da.py | 8 +++-- lingua_franca/lang/parse_de.py | 8 +++-- lingua_franca/lang/parse_es.py | 8 +++-- lingua_franca/lang/parse_fr.py | 8 +++-- lingua_franca/lang/parse_hu.py | 1 + lingua_franca/lang/parse_it.py | 7 ++-- lingua_franca/lang/parse_nl.py | 15 ++++---- lingua_franca/lang/parse_pl.py | 15 ++++---- lingua_franca/lang/parse_pt.py | 6 ++-- lingua_franca/lang/parse_sv.py | 6 ++-- lingua_franca/res/text/ca-es/config.json | 6 ++++ lingua_franca/res/text/cs-cz/config.json | 6 ++++ lingua_franca/res/text/da-dk/config.json | 6 ++++ lingua_franca/res/text/de-de/config.json | 6 ++++ lingua_franca/res/text/en-au/config.json | 5 ++- lingua_franca/res/text/es-es/config.json | 6 ++++ lingua_franca/res/text/fr-fr/config.json | 6 ++++ lingua_franca/res/text/hu-hu/config.json | 6 ++++ lingua_franca/res/text/it-it/config.json | 6 ++++ lingua_franca/res/text/nl-nl/config.json | 6 ++++ lingua_franca/res/text/pl-pl/config.json | 6 ++++ lingua_franca/res/text/pt-pt/config.json | 6 ++++ lingua_franca/res/text/ru-ru/config.json | 6 ++++ lingua_franca/res/text/sl-si/config.json | 6 ++++ lingua_franca/res/text/sv-se/config.json | 6 ++++ lingua_franca/res/text/tr-tr/config.json | 6 ++++ test/test_localizer.py | 2 -- test/test_parse_nl.py | 3 ++ 45 files changed, 210 insertions(+), 78 deletions(-) create mode 100644 lingua_franca/res/text/ca-es/config.json create mode 100644 lingua_franca/res/text/cs-cz/config.json create mode 100644 lingua_franca/res/text/da-dk/config.json create mode 100644 lingua_franca/res/text/de-de/config.json create mode 100644 lingua_franca/res/text/es-es/config.json create mode 100644 lingua_franca/res/text/fr-fr/config.json create mode 100644 lingua_franca/res/text/hu-hu/config.json create mode 100644 lingua_franca/res/text/it-it/config.json create mode 100644 lingua_franca/res/text/nl-nl/config.json create mode 100644 lingua_franca/res/text/pl-pl/config.json create mode 100644 lingua_franca/res/text/pt-pt/config.json create mode 100644 lingua_franca/res/text/ru-ru/config.json create mode 100644 lingua_franca/res/text/sl-si/config.json create mode 100644 lingua_franca/res/text/sv-se/config.json create mode 100644 lingua_franca/res/text/tr-tr/config.json diff --git a/lingua_franca/configuration.py b/lingua_franca/configuration.py index f6bd60ac..b738d6f2 100644 --- a/lingua_franca/configuration.py +++ b/lingua_franca/configuration.py @@ -53,7 +53,7 @@ def load_lang(self, lang): self[lang][full_loc] = LangConfig(full_loc) - def _find_setting(self, setting=None, lang=None): + def _find_setting(self, setting=None, lang=''): if setting is None: raise ValueError("lingua_franca.config requires " "a setting parameter!") @@ -90,7 +90,7 @@ def _find_setting(self, setting=None, lang=None): except IndexError: return None - def get(self, setting=None, lang=None): + def get(self, setting=None, lang=''): if lang != 'global': if all((lang, get_primary_lang_code(lang) not in get_active_langs())): diff --git a/lingua_franca/internal.py b/lingua_franca/internal.py index 5c6e72e6..d3b4f867 100644 --- a/lingua_franca/internal.py +++ b/lingua_franca/internal.py @@ -144,10 +144,11 @@ def _set_active_langs(langs=None, override_default=True): if not __default_lang: __active_lang_code = None - elif get_primary_lang_code(__active_lang_code) != __default_lang: - __cur_default = __default_full_codes[__default_lang] - __active_lang_code = __cur_default if __cur_default in __loaded_locs \ - else _DEFAULT_FULL_LANG_CODES[__default_lang] + elif __active_lang_code is not None: + if get_primary_lang_code(__active_lang_code) != __default_lang: + __cur_default = __default_full_codes[__default_lang] + __active_lang_code = __cur_default if __cur_default in __loaded_locs \ + else _DEFAULT_FULL_LANG_CODES[__default_lang] _refresh_function_dict() @@ -302,7 +303,7 @@ def get_default_lang(): return __default_lang -def get_default_loc(lang=None): +def get_default_loc(lang=''): """ Return the current, localized BCP-47 language code, such as 'en-US' or 'es-ES'. For the default language *family* - which is passed to most parsers and formatters - call `get_default_lang` @@ -318,7 +319,7 @@ def get_default_loc(lang=None): return __default_full_codes[lang.lower()] -def set_default_lang(lang_code): +def set_default_lang(lang_code=''): """ Set the active BCP-47 language code to be used in formatting/parsing Will choose a default localization if passed a primary language family (ex: `set_default_lang("en")` will default to "en-US") @@ -360,7 +361,7 @@ def set_default_lang(lang_code): if is_supported_full_lang(lang_code): set_default_loc(get_primary_lang_code(lang_code), lang_code) - else: + elif lang_code: set_default_loc(lang_code, get_full_lang_code(lang_code)) @@ -464,9 +465,14 @@ def __get_full_lang_code_deprecation_warning(lang=''): Returns: str: A full language code, such as "en-us" or "de-de" """ - if lang is None: - return __active_lang_code.lower() - elif not isinstance(lang, str): + if not lang: + if lang is None: + warn(NoneLangWarning) + if __active_lang_code: + return __active_lang_code.lower() + raise ModuleNotFoundError("No language module loaded!") + + if not isinstance(lang, str): raise TypeError("get_full_lang_code expects str, " "got {}".format(type(lang))) if lang.lower() in _SUPPORTED_FULL_LOCALIZATIONS: @@ -668,13 +674,18 @@ def _call_localized_function(func, *args, **kwargs): # Now let's substitute any values that are supposed to come from # lingua_franca.config for kwarg in loc_signature.parameters: - if all((loc_signature.parameters[kwarg].default is ConfigVar, - kwarg not in kwargs, - len(args) < - list(loc_signature.parameters).index(kwarg) + 1)): - config_var = config.get(kwarg, full_lang_code) - if config_var is not None: - kwargs[kwarg] = config_var + default = loc_signature.parameters[kwarg].default + if default is ConfigVar or \ + isinstance(default, + ConfigVar): + if all((kwarg not in kwargs, + len(args) < + list(loc_signature.parameters).index(kwarg) + 1)): + config_var = config.get(kwarg, full_lang_code) if \ + default is ConfigVar else \ + config.get(default.name, full_lang_code) + if config_var is not None: + kwargs[kwarg] = config_var # Now we call the function, ignoring any kwargs from the # wrapped function that aren't in the localized function. diff --git a/lingua_franca/lang/format_ca.py b/lingua_franca/lang/format_ca.py index b3dc265c..0bd7b925 100644 --- a/lingua_franca/lang/format_ca.py +++ b/lingua_franca/lang/format_ca.py @@ -17,7 +17,7 @@ from lingua_franca.lang.format_common import convert_to_mixed_fraction from lingua_franca.lang.common_data_ca import _FRACTION_STRING_CA, \ _NUM_STRING_CA -from lingua_franca.internal import lookup_variant +from lingua_franca.internal import lookup_variant, ConfigVar from enum import IntEnum diff --git a/lingua_franca/lang/format_cs.py b/lingua_franca/lang/format_cs.py index 841a9693..e07139e5 100644 --- a/lingua_franca/lang/format_cs.py +++ b/lingua_franca/lang/format_cs.py @@ -15,6 +15,8 @@ # limitations under the License. # +from lingua_franca.internal import ConfigVar + from lingua_franca.lang.format_common import convert_to_mixed_fraction from lingua_franca.lang.common_data_cs import _NUM_STRING_CS, \ _FRACTION_STRING_CS, _LONG_SCALE_CS, _SHORT_SCALE_CS, _SHORT_ORDINAL_CS, _LONG_ORDINAL_CS @@ -68,7 +70,7 @@ def nice_number_cs(number, speech=True, denominators=range(1, 21)): return return_string -def pronounce_number_cs(number, places=2, short_scale=True, scientific=False, +def pronounce_number_cs(number, places=2, short_scale=ConfigVar, scientific=False, ordinals=False): """ Convert a number to it's spoken equivalent diff --git a/lingua_franca/lang/format_da.py b/lingua_franca/lang/format_da.py index f1ccdcc1..bb42586c 100644 --- a/lingua_franca/lang/format_da.py +++ b/lingua_franca/lang/format_da.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # +from lingua_franca.internal import ConfigVar from lingua_franca.lang.format_common import convert_to_mixed_fraction from lingua_franca.lang.common_data_da import _EXTRA_SPACE_DA, \ @@ -59,7 +60,7 @@ def nice_number_da(number, speech=True, denominators=range(1, 21)): return return_string -def pronounce_number_da(number, places=2, short_scale=True, scientific=False, +def pronounce_number_da(number, places=2, short_scale=ConfigVar, scientific=False, ordinals=False): """ Convert a number to it's spoken equivalent diff --git a/lingua_franca/lang/format_de.py b/lingua_franca/lang/format_de.py index 9f6c7455..00f34550 100644 --- a/lingua_franca/lang/format_de.py +++ b/lingua_franca/lang/format_de.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # +from lingua_franca.internal import ConfigVar from lingua_franca.lang.format_common import convert_to_mixed_fraction from lingua_franca.lang.common_data_de import _EXTRA_SPACE_DE, \ @@ -58,7 +59,7 @@ def nice_number_de(number, speech=True, denominators=range(1, 21)): return return_string -def pronounce_number_de(number, places=2, short_scale=True, scientific=False, +def pronounce_number_de(number, places=2, short_scale=ConfigVar, scientific=False, ordinals=False): """ Convert a number to it's spoken equivalent diff --git a/lingua_franca/lang/format_es.py b/lingua_franca/lang/format_es.py index a224af57..c6c6922a 100644 --- a/lingua_franca/lang/format_es.py +++ b/lingua_franca/lang/format_es.py @@ -17,6 +17,8 @@ Format functions for castillian (es-es) """ +from lingua_franca.internal import ConfigVar + from lingua_franca.lang.format_common import convert_to_mixed_fraction from lingua_franca.lang.common_data_es import _NUM_STRING_ES, \ _FRACTION_STRING_ES diff --git a/lingua_franca/lang/format_fr.py b/lingua_franca/lang/format_fr.py index 5eea39c3..31cd1ac4 100644 --- a/lingua_franca/lang/format_fr.py +++ b/lingua_franca/lang/format_fr.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. # +from lingua_franca.internal import ConfigVar + from lingua_franca.lang.format_common import convert_to_mixed_fraction from lingua_franca.lang.common_data_fr import _NUM_STRING_FR, \ _FRACTION_STRING_FR diff --git a/lingua_franca/lang/format_hu.py b/lingua_franca/lang/format_hu.py index f12a1843..84204a7d 100644 --- a/lingua_franca/lang/format_hu.py +++ b/lingua_franca/lang/format_hu.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # +from lingua_franca.internal import ConfigVar from lingua_franca.lang.format_common import convert_to_mixed_fraction from lingua_franca.lang.common_data_hu import _NUM_POWERS_OF_TEN, \ @@ -74,7 +75,7 @@ def nice_number_hu(number, speech=True, denominators=range(1, 21)): return return_string -def pronounce_number_hu(number, places=2, short_scale=True, scientific=False, +def pronounce_number_hu(number, places=2, short_scale=ConfigVar, scientific=False, ordinals=False): """ Convert a number to it's spoken equivalent diff --git a/lingua_franca/lang/format_it.py b/lingua_franca/lang/format_it.py index 52cf4e11..9058eecf 100644 --- a/lingua_franca/lang/format_it.py +++ b/lingua_franca/lang/format_it.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # +from lingua_franca.internal import ConfigVar from lingua_franca.lang.format_common import convert_to_mixed_fraction from lingua_franca.lang.common_data_it import _NUM_STRING_IT, \ @@ -76,7 +77,7 @@ def nice_number_it(number, speech=True, denominators=range(1, 21)): return return_string -def pronounce_number_it(number, places=2, short_scale=False, scientific=False): +def pronounce_number_it(number, places=2, short_scale=ConfigVar, scientific=False): """ Convert a number to it's spoken equivalent adapted to italian fron en version diff --git a/lingua_franca/lang/format_nl.py b/lingua_franca/lang/format_nl.py index cba2110d..3b248475 100644 --- a/lingua_franca/lang/format_nl.py +++ b/lingua_franca/lang/format_nl.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # +from lingua_franca.internal import ConfigVar from .format_common import convert_to_mixed_fraction from lingua_franca.lang.common_data_nl import _NUM_POWERS_OF_TEN, \ @@ -58,7 +59,7 @@ def nice_number_nl(number, speech=True, denominators=range(1, 21)): return return_string -def pronounce_number_nl(number, places=2, short_scale=True, scientific=False, +def pronounce_number_nl(number, places=2, short_scale=ConfigVar, scientific=False, ordinals=False): """ Convert a number to it's spoken equivalent diff --git a/lingua_franca/lang/format_pl.py b/lingua_franca/lang/format_pl.py index 483a0fc8..5cf9540e 100644 --- a/lingua_franca/lang/format_pl.py +++ b/lingua_franca/lang/format_pl.py @@ -18,7 +18,7 @@ from lingua_franca.lang.format_common import convert_to_mixed_fraction from lingua_franca.lang.common_data_pl import _NUM_STRING_PL, \ _FRACTION_STRING_PL, _SHORT_SCALE_PL, _SHORT_ORDINAL_PL, _ALT_ORDINALS_PL -from lingua_franca.internal import FunctionNotLocalizedError +from lingua_franca.internal import FunctionNotLocalizedError, ConfigVar def nice_number_pl(number, speech=True, denominators=range(1, 21)): @@ -61,7 +61,7 @@ def nice_number_pl(number, speech=True, denominators=range(1, 21)): return return_string -def pronounce_number_pl(num, places=2, short_scale=True, scientific=False, +def pronounce_number_pl(num, places=2, short_scale=ConfigVar, scientific=False, ordinals=False, scientific_run=False): """ Convert a number to it's spoken equivalent diff --git a/lingua_franca/lang/format_pt.py b/lingua_franca/lang/format_pt.py index 7c8107ed..4b8cd251 100644 --- a/lingua_franca/lang/format_pt.py +++ b/lingua_franca/lang/format_pt.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # +from lingua_franca.internal import ConfigVar from lingua_franca.lang.format_common import convert_to_mixed_fraction from lingua_franca.lang.common_data_pt import _FRACTION_STRING_PT, \ diff --git a/lingua_franca/lang/format_sl.py b/lingua_franca/lang/format_sl.py index 6a48802d..3b2fd23a 100644 --- a/lingua_franca/lang/format_sl.py +++ b/lingua_franca/lang/format_sl.py @@ -14,6 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # +from lingua_franca.internal import ConfigVar from lingua_franca.lang.common_data_sl import _NUM_STRING_SL, \ _FRACTION_STRING_SL, _LONG_SCALE_SL, _SHORT_SCALE_SL, _SHORT_ORDINAL_SL @@ -67,7 +68,7 @@ def nice_number_sl(number, speech=True, denominators=range(1, 21)): return return_string -def pronounce_number_sl(num, places=2, short_scale=True, scientific=False, +def pronounce_number_sl(num, places=2, short_scale=ConfigVar, scientific=False, ordinals=False): """ Convert a number to it's spoken equivalent diff --git a/lingua_franca/lang/format_sv.py b/lingua_franca/lang/format_sv.py index 260f03b1..ccd793a3 100644 --- a/lingua_franca/lang/format_sv.py +++ b/lingua_franca/lang/format_sv.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # +from lingua_franca.internal import ConfigVar from .format_common import convert_to_mixed_fraction from lingua_franca.lang.common_data_sv import _EXTRA_SPACE_SV, \ @@ -64,7 +65,7 @@ def nice_number_sv(number, speech=True, denominators=range(1, 21)): return return_string -def pronounce_number_sv(number, places=2, short_scale=True, scientific=False, +def pronounce_number_sv(number, places=2, short_scale=ConfigVar, scientific=False, ordinals=False): """ Convert a number to it's spoken equivalent diff --git a/lingua_franca/lang/parse_ca.py b/lingua_franca/lang/parse_ca.py index 87cfa4a0..766d780c 100644 --- a/lingua_franca/lang/parse_ca.py +++ b/lingua_franca/lang/parse_ca.py @@ -27,13 +27,13 @@ _FEMALE_DETERMINANTS_CA, _FEMALE_ENDINGS_CA, \ _MALE_DETERMINANTS_CA, _MALE_ENDINGS_CA, _GENDERS_CA, \ _TENS_CA, _AFTER_TENS_CA, _HUNDREDS_CA, _BEFORE_HUNDREDS_CA -from lingua_franca.internal import resolve_resource_file +from lingua_franca.internal import resolve_resource_file, ConfigVar from lingua_franca.lang.parse_common import Normalizer import json import re -def is_fractional_ca(input_str, short_scale=True): +def is_fractional_ca(input_str, short_scale=ConfigVar): """ This function takes the given text and checks if it is a fraction. @@ -90,7 +90,7 @@ def is_fractional_ca(input_str, short_scale=True): return False -def extract_number_ca(text, short_scale=True, ordinals=False): +def extract_number_ca(text, short_scale=ConfigVar, ordinals=False): """ This function prepares the given text for parsing by making numbers consistent, getting rid of contractions, etc. diff --git a/lingua_franca/lang/parse_cs.py b/lingua_franca/lang/parse_cs.py index 0541cbfd..b791122e 100644 --- a/lingua_franca/lang/parse_cs.py +++ b/lingua_franca/lang/parse_cs.py @@ -27,6 +27,7 @@ import re import json from lingua_franca import resolve_resource_file +from lingua_franca.internal import ConfigVar from lingua_franca.time import now_local @@ -82,7 +83,7 @@ def generate_plurals_cs(originals): _STRING_LONG_ORDINAL_CS = invert_dict(_LONG_ORDINAL_CS) -def _convert_words_to_numbers_cs(text, short_scale=True, ordinals=False): +def _convert_words_to_numbers_cs(text, short_scale=ConfigVar, ordinals=False): """ Convert words in a string into their equivalent numbers. Args: @@ -118,7 +119,7 @@ def _convert_words_to_numbers_cs(text, short_scale=True, ordinals=False): return ' '.join(results) -def _extract_numbers_with_text_cs(tokens, short_scale=True, +def _extract_numbers_with_text_cs(tokens, short_scale=ConfigVar, ordinals=False, fractional_numbers=True): """ Extract all numbers from a list of Tokens, with the words that @@ -160,7 +161,7 @@ def _extract_numbers_with_text_cs(tokens, short_scale=True, return results -def _extract_number_with_text_cs(tokens, short_scale=True, +def _extract_number_with_text_cs(tokens, short_scale=ConfigVar, ordinals=False, fractional_numbers=True): """ This function extracts a number from a list of Tokens. @@ -184,7 +185,7 @@ def _extract_number_with_text_cs(tokens, short_scale=True, def _extract_number_with_text_cs_helper(tokens, - short_scale=True, ordinals=False, + short_scale=ConfigVar, ordinals=False, fractional_numbers=True): """ Helper for _extract_number_with_text_en. @@ -579,7 +580,7 @@ def _initialize_number_data(short_scale): return multiplies, string_num_ordinal_cs, string_num_scale_cs -def extract_number_cs(text, short_scale=True, ordinals=False): +def extract_number_cs(text, short_scale=ConfigVar, ordinals=False): """ This function extracts a number from a text string, handles pronunciations in long scale and short scale @@ -1533,7 +1534,7 @@ def date_found(): return [extractedDate, resultStr] -def isFractional_cs(input_str, short_scale=True): +def isFractional_cs(input_str, short_scale=ConfigVar): """ This function takes the given text and checks if it is a fraction. @@ -1559,7 +1560,7 @@ def isFractional_cs(input_str, short_scale=True): return False -def extract_numbers_cs(text, short_scale=True, ordinals=False): +def extract_numbers_cs(text, short_scale=ConfigVar, ordinals=False): """ Takes in a string and extracts a list of numbers. diff --git a/lingua_franca/lang/parse_da.py b/lingua_franca/lang/parse_da.py index c6dc4353..433ea47b 100644 --- a/lingua_franca/lang/parse_da.py +++ b/lingua_franca/lang/parse_da.py @@ -15,13 +15,15 @@ # from datetime import datetime from dateutil.relativedelta import relativedelta + from lingua_franca.lang.parse_common import is_numeric, look_for_fractions, \ extract_numbers_generic, Normalizer from lingua_franca.lang.common_data_da import _DA_NUMBERS from lingua_franca.lang.format_da import pronounce_number_da +from lingua_franca.internal import ConfigVar -def extract_number_da(text, short_scale=True, ordinals=False): +def extract_number_da(text, short_scale=ConfigVar, ordinals=False): """ This function prepares the given text for parsing by making numbers consistent, getting rid of contractions, etc. @@ -768,7 +770,7 @@ def date_found(): return [extractedDate, resultStr] -def is_fractional_da(input_str, short_scale=True): +def is_fractional_da(input_str, short_scale=ConfigVar): """ This function takes the given text and checks if it is a fraction. @@ -864,7 +866,7 @@ def normalize_da(text, remove_articles=True): return normalized[1:] # strip the initial space -def extract_numbers_da(text, short_scale=True, ordinals=False): +def extract_numbers_da(text, short_scale=ConfigVar, ordinals=False): """ Takes in a string and extracts a list of numbers. diff --git a/lingua_franca/lang/parse_de.py b/lingua_franca/lang/parse_de.py index 5f7f7b32..193c4b26 100644 --- a/lingua_franca/lang/parse_de.py +++ b/lingua_franca/lang/parse_de.py @@ -21,6 +21,8 @@ from lingua_franca.lang.common_data_de import _DE_NUMBERS from lingua_franca.lang.format_de import pronounce_number_de +from lingua_franca.internal import ConfigVar + de_numbers = { 'null': 0, 'ein': 1, @@ -141,7 +143,7 @@ def repl(match): return (duration, text) -def extract_number_de(text, short_scale=True, ordinals=False): +def extract_number_de(text, short_scale=ConfigVar, ordinals=False): """ This function prepares the given text for parsing by making numbers consistent, getting rid of contractions, etc. @@ -894,7 +896,7 @@ def date_found(): return [extractedDate, resultStr] -def is_fractional_de(input_str, short_scale=True): +def is_fractional_de(input_str, short_scale=ConfigVar): """ This function takes the given text and checks if it is a fraction. @@ -997,7 +999,7 @@ def normalize_de(text, remove_articles=True): return normalized[1:] # strip the initial space -def extract_numbers_de(text, short_scale=True, ordinals=False): +def extract_numbers_de(text, short_scale=ConfigVar, ordinals=False): """ Takes in a string and extracts a list of numbers. diff --git a/lingua_franca/lang/parse_es.py b/lingua_franca/lang/parse_es.py index bd0ef0d2..d42295ed 100644 --- a/lingua_franca/lang/parse_es.py +++ b/lingua_franca/lang/parse_es.py @@ -20,8 +20,10 @@ from lingua_franca.lang.parse_common import * from lingua_franca.lang.common_data_es import _ARTICLES_ES, _STRING_NUM_ES +from lingua_franca.internal import ConfigVar -def is_fractional_es(input_str, short_scale=True): + +def is_fractional_es(input_str, short_scale=ConfigVar): """ This function takes the given text and checks if it is a fraction. @@ -55,7 +57,7 @@ def is_fractional_es(input_str, short_scale=True): return False -def extract_number_es(text, short_scale=True, ordinals=False): +def extract_number_es(text, short_scale=ConfigVar, ordinals=False): """ This function prepares the given text for parsing by making numbers consistent, getting rid of contractions, etc. @@ -267,7 +269,7 @@ def es_number(i): return es_number(i) -def extract_numbers_es(text, short_scale=True, ordinals=False): +def extract_numbers_es(text, short_scale=ConfigVar, ordinals=False): """ Takes in a string and extracts a list of numbers. diff --git a/lingua_franca/lang/parse_fr.py b/lingua_franca/lang/parse_fr.py index 895990f5..7f344a8a 100644 --- a/lingua_franca/lang/parse_fr.py +++ b/lingua_franca/lang/parse_fr.py @@ -22,6 +22,8 @@ from lingua_franca.lang.common_data_fr import _ARTICLES_FR, _NUMBERS_FR, \ _ORDINAL_ENDINGS_FR +from lingua_franca.internal import ConfigVar + def extract_duration_fr(text): """ Convert an french phrase into a number of seconds @@ -366,7 +368,7 @@ def _number_ordinal_fr(words, i): return None -def extract_number_fr(text, short_scale=True, ordinals=False): +def extract_number_fr(text, short_scale=ConfigVar, ordinals=False): """Takes in a string and extracts a number. Args: text (str): the string to extract a number from @@ -991,7 +993,7 @@ def date_found(): return [extractedDate, resultStr] -def is_fractional_fr(input_str, short_scale=True): +def is_fractional_fr(input_str, short_scale=ConfigVar): """ This function takes the given text and checks if it is a fraction. Args: @@ -1061,7 +1063,7 @@ def normalize_fr(text, remove_articles=True): return normalized[1:] # strip the initial space -def extract_numbers_fr(text, short_scale=True, ordinals=False): +def extract_numbers_fr(text, short_scale=ConfigVar, ordinals=False): """ Takes in a string and extracts a list of numbers. diff --git a/lingua_franca/lang/parse_hu.py b/lingua_franca/lang/parse_hu.py index ba4f523b..4e8f1049 100644 --- a/lingua_franca/lang/parse_hu.py +++ b/lingua_franca/lang/parse_hu.py @@ -16,6 +16,7 @@ from datetime import datetime, timedelta from lingua_franca.lang.parse_common import Normalizer +from lingua_franca.internal import ConfigVar class HungarianNormalizer(Normalizer): """ TODO implement language specific normalizer""" diff --git a/lingua_franca/lang/parse_it.py b/lingua_franca/lang/parse_it.py index ed05dd0b..5a444b99 100644 --- a/lingua_franca/lang/parse_it.py +++ b/lingua_franca/lang/parse_it.py @@ -28,8 +28,9 @@ from lingua_franca.lang.common_data_it import _SHORT_ORDINAL_STRING_IT, \ _ARTICLES_IT, _LONG_ORDINAL_STRING_IT, _STRING_NUM_IT +from lingua_franca.internal import ConfigVar -def is_fractional_it(input_str, short_scale=False): +def is_fractional_it(input_str, short_scale=ConfigVar): """ This function takes the given text and checks if it is a fraction. Updated to italian from en version 18.8.9 @@ -223,7 +224,7 @@ def _extract_number_long_it(word): return value -def extract_number_it(text, short_scale=False, ordinals=False): +def extract_number_it(text, short_scale=ConfigVar, ordinals=False): """ This function extracts a number from a text string, handles pronunciations in long scale and short scale @@ -1147,7 +1148,7 @@ def get_gender_it(word, context=""): return gender -def extract_numbers_it(text, short_scale=False, ordinals=False): +def extract_numbers_it(text, short_scale=ConfigVar, ordinals=False): """ Takes in a string and extracts a list of numbers. diff --git a/lingua_franca/lang/parse_nl.py b/lingua_franca/lang/parse_nl.py index 79a1f4be..fdd482a9 100644 --- a/lingua_franca/lang/parse_nl.py +++ b/lingua_franca/lang/parse_nl.py @@ -26,8 +26,9 @@ _STRING_SHORT_ORDINAL_NL, _SUMS_NL import re +from lingua_franca.internal import ConfigVar -def _convert_words_to_numbers_nl(text, short_scale=True, ordinals=False): +def _convert_words_to_numbers_nl(text, short_scale=ConfigVar, ordinals=False): """Convert words in a string into their equivalent numbers. Args: text str: @@ -61,7 +62,7 @@ def _convert_words_to_numbers_nl(text, short_scale=True, ordinals=False): return ' '.join(results) -def _extract_numbers_with_text_nl(tokens, short_scale=True, +def _extract_numbers_with_text_nl(tokens, short_scale=ConfigVar, ordinals=False, fractional_numbers=True): """Extract all numbers from a list of _Tokens, with the representing words. @@ -100,7 +101,7 @@ def _extract_numbers_with_text_nl(tokens, short_scale=True, return results -def _extract_number_with_text_nl(tokens, short_scale=True, +def _extract_number_with_text_nl(tokens, short_scale=ConfigVar, ordinals=False, fractional_numbers=True): """This function extracts a number from a list of _Tokens. @@ -122,7 +123,7 @@ def _extract_number_with_text_nl(tokens, short_scale=True, def _extract_number_with_text_nl_helper(tokens, - short_scale=True, ordinals=False, + short_scale=ConfigVar, ordinals=False, fractional_numbers=True): """Helper for _extract_number_with_text_nl. @@ -413,7 +414,7 @@ def _initialize_number_data_nl(short_scale): return multiplies, string_num_ordinal_nl, string_num_scale_nl -def extract_number_nl(text, short_scale=True, ordinals=False): +def extract_number_nl(text, short_scale=ConfigVar, ordinals=False): """Extract a number from a text string The function handles pronunciations in long scale and short scale @@ -1268,7 +1269,7 @@ def date_found(): return [extractedDate, resultStr] -def is_fractional_nl(input_str, short_scale=True): +def is_fractional_nl(input_str, short_scale=ConfigVar): """This function takes the given text and checks if it is a fraction. Args: @@ -1292,7 +1293,7 @@ def is_fractional_nl(input_str, short_scale=True): return False -def extract_numbers_nl(text, short_scale=True, ordinals=False): +def extract_numbers_nl(text, short_scale=ConfigVar, ordinals=False): """Takes in a string and extracts a list of numbers. Args: diff --git a/lingua_franca/lang/parse_pl.py b/lingua_franca/lang/parse_pl.py index e8df170b..ad9fca9e 100644 --- a/lingua_franca/lang/parse_pl.py +++ b/lingua_franca/lang/parse_pl.py @@ -26,6 +26,7 @@ import re +from lingua_franca.internal import ConfigVar def generate_plurals_pl(originals): """ @@ -109,7 +110,7 @@ def generate_fractions_pl(fractions): _REV_FRACTITONS = generate_fractions_pl(invert_dict(_FRACTION_STRING_PL)) -def _convert_words_to_numbers_pl(text, short_scale=True, ordinals=False): +def _convert_words_to_numbers_pl(text, short_scale=ConfigVar, ordinals=False): """ Convert words in a string into their equivalent numbers. Args: @@ -145,7 +146,7 @@ def _convert_words_to_numbers_pl(text, short_scale=True, ordinals=False): return ' '.join(results) -def _extract_numbers_with_text_pl(tokens, short_scale=True, +def _extract_numbers_with_text_pl(tokens, short_scale=ConfigVar, ordinals=False, fractional_numbers=True): """ Extract all numbers from a list of Tokens, with the words that @@ -187,7 +188,7 @@ def _extract_numbers_with_text_pl(tokens, short_scale=True, return results -def _extract_number_with_text_pl(tokens, short_scale=True, +def _extract_number_with_text_pl(tokens, short_scale=ConfigVar, ordinals=False, fractional_numbers=True): """ This function extracts a number from a list of Tokens. @@ -209,7 +210,7 @@ def _extract_number_with_text_pl(tokens, short_scale=True, def _extract_number_with_text_pl_helper(tokens, - short_scale=True, ordinals=False, + short_scale=ConfigVar, ordinals=False, fractional_numbers=True): """ Helper for _extract_number_with_text_en. @@ -576,7 +577,7 @@ def _initialize_number_data(short_scale): return multiplies, _STRING_SHORT_ORDINAL_PL, string_num_scale -def extract_number_pl(text, short_scale=True, ordinals=False): +def extract_number_pl(text, short_scale=ConfigVar, ordinals=False): """ This function extracts a number from a text string, handles pronunciations in long scale and short scale @@ -1314,7 +1315,7 @@ def date_found(): return [extractedDate, resultStr] -def isFractional_pl(input_str, short_scale=True): +def isFractional_pl(input_str, short_scale=ConfigVar): """ This function takes the given text and checks if it is a fraction. @@ -1332,7 +1333,7 @@ def isFractional_pl(input_str, short_scale=True): return False -def extract_numbers_pl(text, short_scale=True, ordinals=False): +def extract_numbers_pl(text, short_scale=ConfigVar, ordinals=False): """ Takes in a string and extracts a list of numbers. diff --git a/lingua_franca/lang/parse_pt.py b/lingua_franca/lang/parse_pt.py index a17ab107..875dcea7 100644 --- a/lingua_franca/lang/parse_pt.py +++ b/lingua_franca/lang/parse_pt.py @@ -26,13 +26,13 @@ from lingua_franca.lang.common_data_pt import _NUMBERS_PT, \ _FEMALE_DETERMINANTS_PT, _FEMALE_ENDINGS_PT, \ _MALE_DETERMINANTS_PT, _MALE_ENDINGS_PT, _GENDERS_PT -from lingua_franca.internal import resolve_resource_file +from lingua_franca.internal import resolve_resource_file, ConfigVar from lingua_franca.lang.parse_common import Normalizer import json import re -def is_fractional_pt(input_str, short_scale=True): +def is_fractional_pt(input_str, short_scale=ConfigVar): """ This function takes the given text and checks if it is a fraction. @@ -66,7 +66,7 @@ def is_fractional_pt(input_str, short_scale=True): return False -def extract_number_pt(text, short_scale=True, ordinals=False): +def extract_number_pt(text, short_scale=ConfigVar, ordinals=False): """ This function prepares the given text for parsing by making numbers consistent, getting rid of contractions, etc. diff --git a/lingua_franca/lang/parse_sv.py b/lingua_franca/lang/parse_sv.py index 8d66c63f..a075a3d4 100644 --- a/lingua_franca/lang/parse_sv.py +++ b/lingua_franca/lang/parse_sv.py @@ -16,9 +16,9 @@ from datetime import datetime from dateutil.relativedelta import relativedelta from .parse_common import is_numeric, look_for_fractions, Normalizer +from lingua_franca.internal import ConfigVar - -def extract_number_sv(text, short_scale=True, ordinals=False): +def extract_number_sv(text, short_scale=ConfigVar, ordinals=False): """ This function prepares the given text for parsing by making numbers consistent, getting rid of contractions, etc. @@ -725,7 +725,7 @@ def date_found(): return [extractedDate, resultStr] -def is_fractional_sv(input_str, short_scale=True): +def is_fractional_sv(input_str, short_scale=ConfigVar): """ This function takes the given text and checks if it is a fraction. diff --git a/lingua_franca/res/text/ca-es/config.json b/lingua_franca/res/text/ca-es/config.json new file mode 100644 index 00000000..e204bcf7 --- /dev/null +++ b/lingua_franca/res/text/ca-es/config.json @@ -0,0 +1,6 @@ +{ + "short_scale": true, + "remove_articles": true, + "use_24hour": false, + "use_ampm": false +} \ No newline at end of file diff --git a/lingua_franca/res/text/cs-cz/config.json b/lingua_franca/res/text/cs-cz/config.json new file mode 100644 index 00000000..e204bcf7 --- /dev/null +++ b/lingua_franca/res/text/cs-cz/config.json @@ -0,0 +1,6 @@ +{ + "short_scale": true, + "remove_articles": true, + "use_24hour": false, + "use_ampm": false +} \ No newline at end of file diff --git a/lingua_franca/res/text/da-dk/config.json b/lingua_franca/res/text/da-dk/config.json new file mode 100644 index 00000000..e204bcf7 --- /dev/null +++ b/lingua_franca/res/text/da-dk/config.json @@ -0,0 +1,6 @@ +{ + "short_scale": true, + "remove_articles": true, + "use_24hour": false, + "use_ampm": false +} \ No newline at end of file diff --git a/lingua_franca/res/text/de-de/config.json b/lingua_franca/res/text/de-de/config.json new file mode 100644 index 00000000..e204bcf7 --- /dev/null +++ b/lingua_franca/res/text/de-de/config.json @@ -0,0 +1,6 @@ +{ + "short_scale": true, + "remove_articles": true, + "use_24hour": false, + "use_ampm": false +} \ No newline at end of file diff --git a/lingua_franca/res/text/en-au/config.json b/lingua_franca/res/text/en-au/config.json index 54086600..d3e5b728 100644 --- a/lingua_franca/res/text/en-au/config.json +++ b/lingua_franca/res/text/en-au/config.json @@ -1,3 +1,6 @@ { - "short_scale": false + "short_scale": false, + "remove_articles": true, + "use_24hour": false, + "use_ampm": false } \ No newline at end of file diff --git a/lingua_franca/res/text/es-es/config.json b/lingua_franca/res/text/es-es/config.json new file mode 100644 index 00000000..e204bcf7 --- /dev/null +++ b/lingua_franca/res/text/es-es/config.json @@ -0,0 +1,6 @@ +{ + "short_scale": true, + "remove_articles": true, + "use_24hour": false, + "use_ampm": false +} \ No newline at end of file diff --git a/lingua_franca/res/text/fr-fr/config.json b/lingua_franca/res/text/fr-fr/config.json new file mode 100644 index 00000000..e204bcf7 --- /dev/null +++ b/lingua_franca/res/text/fr-fr/config.json @@ -0,0 +1,6 @@ +{ + "short_scale": true, + "remove_articles": true, + "use_24hour": false, + "use_ampm": false +} \ No newline at end of file diff --git a/lingua_franca/res/text/hu-hu/config.json b/lingua_franca/res/text/hu-hu/config.json new file mode 100644 index 00000000..e204bcf7 --- /dev/null +++ b/lingua_franca/res/text/hu-hu/config.json @@ -0,0 +1,6 @@ +{ + "short_scale": true, + "remove_articles": true, + "use_24hour": false, + "use_ampm": false +} \ No newline at end of file diff --git a/lingua_franca/res/text/it-it/config.json b/lingua_franca/res/text/it-it/config.json new file mode 100644 index 00000000..d3e5b728 --- /dev/null +++ b/lingua_franca/res/text/it-it/config.json @@ -0,0 +1,6 @@ +{ + "short_scale": false, + "remove_articles": true, + "use_24hour": false, + "use_ampm": false +} \ No newline at end of file diff --git a/lingua_franca/res/text/nl-nl/config.json b/lingua_franca/res/text/nl-nl/config.json new file mode 100644 index 00000000..e204bcf7 --- /dev/null +++ b/lingua_franca/res/text/nl-nl/config.json @@ -0,0 +1,6 @@ +{ + "short_scale": true, + "remove_articles": true, + "use_24hour": false, + "use_ampm": false +} \ No newline at end of file diff --git a/lingua_franca/res/text/pl-pl/config.json b/lingua_franca/res/text/pl-pl/config.json new file mode 100644 index 00000000..e204bcf7 --- /dev/null +++ b/lingua_franca/res/text/pl-pl/config.json @@ -0,0 +1,6 @@ +{ + "short_scale": true, + "remove_articles": true, + "use_24hour": false, + "use_ampm": false +} \ No newline at end of file diff --git a/lingua_franca/res/text/pt-pt/config.json b/lingua_franca/res/text/pt-pt/config.json new file mode 100644 index 00000000..e204bcf7 --- /dev/null +++ b/lingua_franca/res/text/pt-pt/config.json @@ -0,0 +1,6 @@ +{ + "short_scale": true, + "remove_articles": true, + "use_24hour": false, + "use_ampm": false +} \ No newline at end of file diff --git a/lingua_franca/res/text/ru-ru/config.json b/lingua_franca/res/text/ru-ru/config.json new file mode 100644 index 00000000..d3e5b728 --- /dev/null +++ b/lingua_franca/res/text/ru-ru/config.json @@ -0,0 +1,6 @@ +{ + "short_scale": false, + "remove_articles": true, + "use_24hour": false, + "use_ampm": false +} \ No newline at end of file diff --git a/lingua_franca/res/text/sl-si/config.json b/lingua_franca/res/text/sl-si/config.json new file mode 100644 index 00000000..e204bcf7 --- /dev/null +++ b/lingua_franca/res/text/sl-si/config.json @@ -0,0 +1,6 @@ +{ + "short_scale": true, + "remove_articles": true, + "use_24hour": false, + "use_ampm": false +} \ No newline at end of file diff --git a/lingua_franca/res/text/sv-se/config.json b/lingua_franca/res/text/sv-se/config.json new file mode 100644 index 00000000..e204bcf7 --- /dev/null +++ b/lingua_franca/res/text/sv-se/config.json @@ -0,0 +1,6 @@ +{ + "short_scale": true, + "remove_articles": true, + "use_24hour": false, + "use_ampm": false +} \ No newline at end of file diff --git a/lingua_franca/res/text/tr-tr/config.json b/lingua_franca/res/text/tr-tr/config.json new file mode 100644 index 00000000..d3e5b728 --- /dev/null +++ b/lingua_franca/res/text/tr-tr/config.json @@ -0,0 +1,6 @@ +{ + "short_scale": false, + "remove_articles": true, + "use_24hour": false, + "use_ampm": false +} \ No newline at end of file diff --git a/test/test_localizer.py b/test/test_localizer.py index ed2779c6..cbc450ab 100644 --- a/test/test_localizer.py +++ b/test/test_localizer.py @@ -132,8 +132,6 @@ def test_load_language(self): unload_all_languages() def test_auto_default_language(self): - lingua_franca.load_language('en') - # Load two languages, ensure first is default lingua_franca.load_languages(['en', 'es']) self.assertEqual(lingua_franca.get_default_lang(), 'en') diff --git a/test/test_parse_nl.py b/test/test_parse_nl.py index cff8c1c2..af29ebd1 100644 --- a/test/test_parse_nl.py +++ b/test/test_parse_nl.py @@ -34,6 +34,9 @@ def tearDownModule(): class TestParsing(unittest.TestCase): + def setUpClass(): + load_language('nl-nl') + set_default_lang('nl-nl') def test_articles(self): self.assertEqual( normalize("dit is de test", LANG, remove_articles=True), From 6d657ef54275acd0180f7efee74ddd3361340343 Mon Sep 17 00:00:00 2001 From: ChanceNCounter Date: Sat, 3 Apr 2021 20:22:55 -0700 Subject: [PATCH 14/21] complete integration; + TimeVariant to ca-es conf --- lingua_franca/format.py | 2 +- lingua_franca/lang/__init__.py | 4 ++-- lingua_franca/lang/format_ca.py | 4 ++-- lingua_franca/lang/format_cs.py | 2 +- lingua_franca/lang/format_da.py | 2 +- lingua_franca/lang/format_de.py | 2 +- lingua_franca/lang/format_es.py | 2 +- lingua_franca/lang/format_fr.py | 2 +- lingua_franca/lang/format_hu.py | 2 +- lingua_franca/lang/format_it.py | 2 +- lingua_franca/lang/format_nl.py | 2 +- lingua_franca/lang/format_pl.py | 2 +- lingua_franca/lang/format_pt.py | 2 +- lingua_franca/lang/format_sl.py | 2 +- lingua_franca/lang/format_sv.py | 2 +- lingua_franca/res/text/ca-es/config.json | 3 ++- lingua_franca/res/text/cs-cz/config.json | 2 +- 17 files changed, 20 insertions(+), 19 deletions(-) diff --git a/lingua_franca/format.py b/lingua_franca/format.py index 8f4c6316..77db3f96 100755 --- a/lingua_franca/format.py +++ b/lingua_franca/format.py @@ -273,7 +273,7 @@ def nice_number(number, lang='', speech=True, denominators=None): @localized_function() def nice_time(dt, lang='', speech=True, use_24hour=ConfigVar, - use_ampm=ConfigVar, variant=ConfigVar): + use_ampm=ConfigVar, variant=ConfigVar("TimeVariant")): """ Format a time to a comfortable human format diff --git a/lingua_franca/lang/__init__.py b/lingua_franca/lang/__init__.py index a82a87d3..086a6fb7 100644 --- a/lingua_franca/lang/__init__.py +++ b/lingua_franca/lang/__init__.py @@ -44,7 +44,7 @@ def set_active_lang(lang_code): set_default_lang(lang_code=lang_code) -def get_primary_lang_code(lang=None): +def get_primary_lang_code(lang=''): """ Get the primary language code Args: @@ -58,7 +58,7 @@ def get_primary_lang_code(lang=None): return gplc(lang=lang) -def get_full_lang_code(lang=None): +def get_full_lang_code(lang=''): """ Get the full language code Args: diff --git a/lingua_franca/lang/format_ca.py b/lingua_franca/lang/format_ca.py index 0bd7b925..ff33994a 100644 --- a/lingua_franca/lang/format_ca.py +++ b/lingua_franca/lang/format_ca.py @@ -134,8 +134,8 @@ def pronounce_number_ca(number, places=2): "full_bell": TimeVariantCA.FULL_BELL, "spanish": TimeVariantCA.SPANISH_LIKE }) -def nice_time_ca(dt, speech=True, use_24hour=False, use_ampm=False, - variant=None): +def nice_time_ca(dt, speech=True, use_24hour=ConfigVar, use_ampm=False, + variant=ConfigVar("TimeVariant")): """ Format a time to a comfortable human format For example, generate 'cinc trenta' for speech or '5:30' for diff --git a/lingua_franca/lang/format_cs.py b/lingua_franca/lang/format_cs.py index e07139e5..8711d4c3 100644 --- a/lingua_franca/lang/format_cs.py +++ b/lingua_franca/lang/format_cs.py @@ -307,7 +307,7 @@ def _long_scale(n): return result -def nice_time_cs(dt, speech=True, use_24hour=True, use_ampm=False): +def nice_time_cs(dt, speech=True, use_24hour=ConfigVar, use_ampm=False): """ Format a time to a comfortable human format For example, generate 'five thirty' for speech or '5:30' for diff --git a/lingua_franca/lang/format_da.py b/lingua_franca/lang/format_da.py index bb42586c..0b57230e 100644 --- a/lingua_franca/lang/format_da.py +++ b/lingua_franca/lang/format_da.py @@ -215,7 +215,7 @@ def pronounce_ordinal_da(number): return pronounce_number_da(number) + "ende" -def nice_time_da(dt, speech=True, use_24hour=False, use_ampm=False): +def nice_time_da(dt, speech=True, use_24hour=ConfigVar, use_ampm=False): """ Format a time to a comfortable human format diff --git a/lingua_franca/lang/format_de.py b/lingua_franca/lang/format_de.py index 00f34550..1a15c2e7 100644 --- a/lingua_franca/lang/format_de.py +++ b/lingua_franca/lang/format_de.py @@ -208,7 +208,7 @@ def pronounce_ordinal_de(number): return pronounce_number_de(number) + "ste" -def nice_time_de(dt, speech=True, use_24hour=False, use_ampm=False): +def nice_time_de(dt, speech=True, use_24hour=ConfigVar, use_ampm=False): """ Format a time to a comfortable human format For example, generate 'five thirty' for speech or '5:30' for diff --git a/lingua_franca/lang/format_es.py b/lingua_franca/lang/format_es.py index c6c6922a..c72e7c27 100644 --- a/lingua_franca/lang/format_es.py +++ b/lingua_franca/lang/format_es.py @@ -155,7 +155,7 @@ def pronounce_number_es(number, places=2): return result -def nice_time_es(dt, speech=True, use_24hour=False, use_ampm=False): +def nice_time_es(dt, speech=True, use_24hour=ConfigVar, use_ampm=False): """ Format a time to a comfortable human format diff --git a/lingua_franca/lang/format_fr.py b/lingua_franca/lang/format_fr.py index 31cd1ac4..0365818c 100644 --- a/lingua_franca/lang/format_fr.py +++ b/lingua_franca/lang/format_fr.py @@ -152,7 +152,7 @@ def pronounce_number_fr(number, places=2): return result -def nice_time_fr(dt, speech=True, use_24hour=False, use_ampm=False): +def nice_time_fr(dt, speech=True, use_24hour=ConfigVar, use_ampm=False): """ Format a time to a comfortable human format diff --git a/lingua_franca/lang/format_hu.py b/lingua_franca/lang/format_hu.py index 84204a7d..02218a5f 100644 --- a/lingua_franca/lang/format_hu.py +++ b/lingua_franca/lang/format_hu.py @@ -231,7 +231,7 @@ def pronounce_ordinal_hu(number): return root + "edik" if vtype == 1 else root + "adik" -def nice_time_hu(dt, speech=True, use_24hour=False, use_ampm=False): +def nice_time_hu(dt, speech=True, use_24hour=ConfigVar, use_ampm=False): """ Format a time to a comfortable human format diff --git a/lingua_franca/lang/format_it.py b/lingua_franca/lang/format_it.py index 9058eecf..daed230e 100644 --- a/lingua_franca/lang/format_it.py +++ b/lingua_franca/lang/format_it.py @@ -238,7 +238,7 @@ def _long_scale(n): return result -def nice_time_it(dt, speech=True, use_24hour=False, use_ampm=False): +def nice_time_it(dt, speech=True, use_24hour=ConfigVar, use_ampm=False): """ Format a time to a comfortable human format adapted to italian fron en version diff --git a/lingua_franca/lang/format_nl.py b/lingua_franca/lang/format_nl.py index 3b248475..a71e20aa 100644 --- a/lingua_franca/lang/format_nl.py +++ b/lingua_franca/lang/format_nl.py @@ -203,7 +203,7 @@ def pronounce_ordinal_nl(number): return pronounce_number_nl(number) + "ste" -def nice_time_nl(dt, speech=True, use_24hour=False, use_ampm=False): +def nice_time_nl(dt, speech=True, use_24hour=ConfigVar, use_ampm=False): """ Format a time to a comfortable human format diff --git a/lingua_franca/lang/format_pl.py b/lingua_franca/lang/format_pl.py index 5cf9540e..8be62d37 100644 --- a/lingua_franca/lang/format_pl.py +++ b/lingua_franca/lang/format_pl.py @@ -232,7 +232,7 @@ def _split_by(n, split=1000): return result -def nice_time_pl(dt, speech=True, use_24hour=True, use_ampm=False): +def nice_time_pl(dt, speech=True, use_24hour=ConfigVar, use_ampm=False): """ Format a time to a comfortable human format For example, generate 'five thirty' for speech or '5:30' for diff --git a/lingua_franca/lang/format_pt.py b/lingua_franca/lang/format_pt.py index 4b8cd251..da2069c3 100644 --- a/lingua_franca/lang/format_pt.py +++ b/lingua_franca/lang/format_pt.py @@ -116,7 +116,7 @@ def pronounce_number_pt(number, places=2): return result -def nice_time_pt(dt, speech=True, use_24hour=False, use_ampm=False): +def nice_time_pt(dt, speech=True, use_24hour=ConfigVar, use_ampm=False): """ Format a time to a comfortable human format For example, generate 'cinco treinta' for speech or '5:30' for diff --git a/lingua_franca/lang/format_sl.py b/lingua_franca/lang/format_sl.py index 3b2fd23a..5316c527 100644 --- a/lingua_franca/lang/format_sl.py +++ b/lingua_franca/lang/format_sl.py @@ -333,7 +333,7 @@ def _long_scale(n): return result -def nice_time_sl(dt, speech=True, use_24hour=False, use_ampm=False): +def nice_time_sl(dt, speech=True, use_24hour=ConfigVar, use_ampm=False): """ Format a time to a comfortable human format For example, generate 'pet trideset' for speech or '5:30' for diff --git a/lingua_franca/lang/format_sv.py b/lingua_franca/lang/format_sv.py index ccd793a3..3bac1eb6 100644 --- a/lingua_franca/lang/format_sv.py +++ b/lingua_franca/lang/format_sv.py @@ -220,7 +220,7 @@ def pronounce_ordinal_sv(number): return result -def nice_time_sv(dt, speech=True, use_24hour=False, use_ampm=False): +def nice_time_sv(dt, speech=True, use_24hour=ConfigVar, use_ampm=False): """ Format a time to a comfortable human format diff --git a/lingua_franca/res/text/ca-es/config.json b/lingua_franca/res/text/ca-es/config.json index e204bcf7..10dd2367 100644 --- a/lingua_franca/res/text/ca-es/config.json +++ b/lingua_franca/res/text/ca-es/config.json @@ -2,5 +2,6 @@ "short_scale": true, "remove_articles": true, "use_24hour": false, - "use_ampm": false + "use_ampm": false, + "TimeVariant": "default" } \ No newline at end of file diff --git a/lingua_franca/res/text/cs-cz/config.json b/lingua_franca/res/text/cs-cz/config.json index e204bcf7..72e5387f 100644 --- a/lingua_franca/res/text/cs-cz/config.json +++ b/lingua_franca/res/text/cs-cz/config.json @@ -1,6 +1,6 @@ { "short_scale": true, "remove_articles": true, - "use_24hour": false, + "use_24hour": true, "use_ampm": false } \ No newline at end of file From fb93906395e8f8fc1c538ce45ccc900178cdeaa9 Mon Sep 17 00:00:00 2001 From: ChanceNCounter Date: Sat, 3 Apr 2021 22:45:13 -0700 Subject: [PATCH 15/21] fix variant param/config integration --- lingua_franca/format.py | 3 +- lingua_franca/internal.py | 43 ++++++++++++++---------- lingua_franca/lang/format_ca.py | 7 ++-- lingua_franca/res/text/ca-es/config.json | 2 +- 4 files changed, 32 insertions(+), 23 deletions(-) diff --git a/lingua_franca/format.py b/lingua_franca/format.py index 77db3f96..7060eb86 100755 --- a/lingua_franca/format.py +++ b/lingua_franca/format.py @@ -261,7 +261,6 @@ def nice_number(number, lang='', speech=True, denominators=None): try: denominators.__iter__ except (AttributeError, TypeError): - print("substituting") try: args[denominators] = range(*denominators) except TypeError: @@ -273,7 +272,7 @@ def nice_number(number, lang='', speech=True, denominators=None): @localized_function() def nice_time(dt, lang='', speech=True, use_24hour=ConfigVar, - use_ampm=ConfigVar, variant=ConfigVar("TimeVariant")): + use_ampm=ConfigVar, variant=ConfigVar): """ Format a time to a comfortable human format diff --git a/lingua_franca/internal.py b/lingua_franca/internal.py index d3b4f867..52dd1d15 100644 --- a/lingua_franca/internal.py +++ b/lingua_franca/internal.py @@ -674,18 +674,12 @@ def _call_localized_function(func, *args, **kwargs): # Now let's substitute any values that are supposed to come from # lingua_franca.config for kwarg in loc_signature.parameters: - default = loc_signature.parameters[kwarg].default - if default is ConfigVar or \ - isinstance(default, - ConfigVar): - if all((kwarg not in kwargs, - len(args) < - list(loc_signature.parameters).index(kwarg) + 1)): - config_var = config.get(kwarg, full_lang_code) if \ - default is ConfigVar else \ - config.get(default.name, full_lang_code) - if config_var is not None: - kwargs[kwarg] = config_var + if all((kwarg not in kwargs, + len(args) < + list(loc_signature.parameters).index(kwarg) + 1)): + config_var = config.get(kwarg, full_lang_code) + if config_var is not None: + kwargs[kwarg] = config_var # Now we call the function, ignoring any kwargs from the # wrapped function that aren't in the localized function. @@ -840,7 +834,7 @@ def resolve_resource_file(res_name, data_dir=None): return None # Resource cannot be resolved -def lookup_variant(mappings, key="variant"): +def lookup_variant(mappings, key="variant", config_name=None): """function decorator maps strings to Enums expected by language specific functions mappings can be used to translate values read from configuration files @@ -865,12 +859,25 @@ def lang_variant_function_decorator(func): @wraps(func) def call_function(*args, **kwargs): - if key in kwargs and isinstance(kwargs[key], str): - if kwargs[key] in mappings: - kwargs[key] = mappings[kwargs[key]] + from lingua_franca import config + get_from_config = False + if key not in kwargs: + get_from_config = True if config_name else False + elif isinstance(kwargs[key], ConfigVar): + get_from_config = True + if get_from_config: + if 'lang' in kwargs: + lang = kwargs['lang'] else: - raise ValueError("Unknown variant, mapping does not " - "exist for {v}".format(v=key)) + lang = get_default_loc() + kwargs[key] = config.get((config_name if config_name else key), lang) + if key in kwargs: + if isinstance(kwargs[key], str): + if kwargs[key] in mappings: + kwargs[key] = mappings[kwargs[key]] + else: + raise ValueError("Unknown variant, mapping does not " + "exist for {v}".format(v=key)) return func(*args, **kwargs) return call_function diff --git a/lingua_franca/lang/format_ca.py b/lingua_franca/lang/format_ca.py index ff33994a..ef9f0c62 100644 --- a/lingua_franca/lang/format_ca.py +++ b/lingua_franca/lang/format_ca.py @@ -14,6 +14,7 @@ # limitations under the License. # +from lingua_franca import config from lingua_franca.lang.format_common import convert_to_mixed_fraction from lingua_franca.lang.common_data_ca import _FRACTION_STRING_CA, \ _NUM_STRING_CA @@ -133,9 +134,11 @@ def pronounce_number_ca(number, places=2): "bell": TimeVariantCA.BELL, "full_bell": TimeVariantCA.FULL_BELL, "spanish": TimeVariantCA.SPANISH_LIKE -}) + }, + key="variant", + config_name="time_variant") def nice_time_ca(dt, speech=True, use_24hour=ConfigVar, use_ampm=False, - variant=ConfigVar("TimeVariant")): + variant=ConfigVar): """ Format a time to a comfortable human format For example, generate 'cinc trenta' for speech or '5:30' for diff --git a/lingua_franca/res/text/ca-es/config.json b/lingua_franca/res/text/ca-es/config.json index 10dd2367..c4c8d865 100644 --- a/lingua_franca/res/text/ca-es/config.json +++ b/lingua_franca/res/text/ca-es/config.json @@ -3,5 +3,5 @@ "remove_articles": true, "use_24hour": false, "use_ampm": false, - "TimeVariant": "default" + "time_variant": "default" } \ No newline at end of file From 5d3ef168ea219b70657ecdbe59ac845d04a025c3 Mon Sep 17 00:00:00 2001 From: ChanceNCounter Date: Sat, 3 Apr 2021 23:34:06 -0700 Subject: [PATCH 16/21] use 1st loaded lang's full code as default loc --- lingua_franca/internal.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lingua_franca/internal.py b/lingua_franca/internal.py index 52dd1d15..d317a941 100644 --- a/lingua_franca/internal.py +++ b/lingua_franca/internal.py @@ -208,7 +208,7 @@ def load_language(lang): if lang not in __loaded_langs: __loaded_langs.append(lang) if not __default_lang: - set_default_lang(lang) + set_default_lang(loc or lang) else: _set_active_langs(__loaded_langs, override_default=False) if lang not in config.keys(): From 1874671c28e61a31c3242aca8902ad47bc1af71d Mon Sep 17 00:00:00 2001 From: ChanceNCounter Date: Sat, 3 Apr 2021 23:53:00 -0700 Subject: [PATCH 17/21] properly unload all locs when load_langs_on_demand --- lingua_franca/internal.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/lingua_franca/internal.py b/lingua_franca/internal.py index d317a941..46cb3623 100644 --- a/lingua_franca/internal.py +++ b/lingua_franca/internal.py @@ -633,7 +633,8 @@ def _call_localized_function(func, *args, **kwargs): _module_name + " not recognized") if lang_code not in _localized_functions[_module_name].keys(): if load_langs_on_demand: - load_language(lang_code) + old_langs = __loaded_langs + __loaded_locs + load_language(full_lang_code) unload_language_afterward = True else: raise ModuleNotFoundError(_module_name + @@ -692,7 +693,10 @@ def _call_localized_function(func, *args, **kwargs): del localized_func del _module if unload_language_afterward: - unload_language(lang_code) + unload_language(full_lang_code) + unload_also = [language for language in \ + __loaded_langs + __loaded_locs] + unload_languages(unload_also) return r_val # Actual wrapper From 603ce7eeb96adff057456aa34cb5fe08e1c8bf86 Mon Sep 17 00:00:00 2001 From: ChanceNCounter Date: Sun, 4 Apr 2021 00:35:29 -0700 Subject: [PATCH 18/21] fix overzealous unloading --- lingua_franca/internal.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lingua_franca/internal.py b/lingua_franca/internal.py index 46cb3623..f7ca1d05 100644 --- a/lingua_franca/internal.py +++ b/lingua_franca/internal.py @@ -695,7 +695,7 @@ def _call_localized_function(func, *args, **kwargs): if unload_language_afterward: unload_language(full_lang_code) unload_also = [language for language in \ - __loaded_langs + __loaded_locs] + __loaded_langs + __loaded_locs if language not in old_langs] unload_languages(unload_also) return r_val From f21246e685a5abab6fb9f0d743dcb6c500cfcb9b Mon Sep 17 00:00:00 2001 From: ChanceNCounter Date: Sun, 4 Apr 2021 11:57:13 -0700 Subject: [PATCH 19/21] fix load_langs_on_demand() --- lingua_franca/internal.py | 21 ++++++++++++++++----- test/test_localizer.py | 2 +- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/lingua_franca/internal.py b/lingua_franca/internal.py index f7ca1d05..7021afda 100644 --- a/lingua_franca/internal.py +++ b/lingua_franca/internal.py @@ -104,13 +104,21 @@ def get_supported_locs(): def get_active_langs(): - """ Get the list of currently-loaded language codes + """ Get the list of currently-loaded languages Returns: list(str) """ return __loaded_langs +def get_active_locs(): + """ Get the list of currently-loaded languages + + Returns: + list(str) + """ + return __loaded_locs + def _set_active_langs(langs=None, override_default=True): """ Set the list of languages to load. @@ -615,12 +623,14 @@ def _call_localized_function(func, *args, **kwargs): lang_code = get_default_lang() full_lang_code = get_full_lang_code() __use_tmp = False - if lang_code not in _SUPPORTED_LANGUAGES: - _raise_unsupported_language(lang_code) if __use_tmp: full_lang_code = tmp + if full_lang_code not in __loaded_locs: + if load_langs_on_demand: + load_language(full_lang_code) else: - full_lang_code = get_full_lang_code(lang_code) + full_lang_code = get_full_lang_code(lang_code) if lang_code \ + not in _SUPPORTED_FULL_LOCALIZATIONS else lang_code # Here comes the ugly business. _module_name = func.__module__.split('.')[-1] @@ -631,7 +641,8 @@ def _call_localized_function(func, *args, **kwargs): if _module_name not in _localized_functions.keys(): raise ModuleNotFoundError("Module lingua_franca." + _module_name + " not recognized") - if lang_code not in _localized_functions[_module_name].keys(): + if lang_code not in get_active_langs() or full_lang_code not in \ + get_active_locs(): if load_langs_on_demand: old_langs = __loaded_langs + __loaded_locs load_language(full_lang_code) diff --git a/test/test_localizer.py b/test/test_localizer.py index cbc450ab..32557290 100644 --- a/test/test_localizer.py +++ b/test/test_localizer.py @@ -99,7 +99,7 @@ class TestLanguageLoading(unittest.TestCase): def test_load_on_demand(self): unload_all_languages() - lingua_franca.load_language("en") + lingua_franca.load_language("en-us") lingua_franca.config.set(setting='load_langs_on_demand', value=True) self.assertEqual(lingua_franca.parse.extract_number("one", lang="en"), 1) From d563fa54a8879cba3621268224ac727a5b994b19 Mon Sep 17 00:00:00 2001 From: ChanceNCounter Date: Sun, 4 Apr 2021 12:27:44 -0700 Subject: [PATCH 20/21] fix introduced bug in loaded module checks --- lingua_franca/internal.py | 25 +++++++++++++++++++++---- test/test_parse_da.py | 5 ++++- 2 files changed, 25 insertions(+), 5 deletions(-) diff --git a/lingua_franca/internal.py b/lingua_franca/internal.py index 7021afda..f2df6b26 100644 --- a/lingua_franca/internal.py +++ b/lingua_franca/internal.py @@ -588,6 +588,8 @@ def _call_localized_function(func, *args, **kwargs): elif lang_param in _SUPPORTED_LANGUAGES or \ lang_param in _SUPPORTED_FULL_LOCALIZATIONS: lang_code = args[lang_param_index] + else: + lang_code = lang_param args = args[:lang_param_index] + args[lang_param_index+1:] # Turns out, we aren't passing a lang code at all @@ -623,6 +625,9 @@ def _call_localized_function(func, *args, **kwargs): lang_code = get_default_lang() full_lang_code = get_full_lang_code() __use_tmp = False + # if the lang code is still invalid, abort directly + if lang_code not in _SUPPORTED_LANGUAGES: + raise UnsupportedLanguageError(lang_param) if __use_tmp: full_lang_code = tmp if full_lang_code not in __loaded_locs: @@ -641,17 +646,22 @@ def _call_localized_function(func, *args, **kwargs): if _module_name not in _localized_functions.keys(): raise ModuleNotFoundError("Module lingua_franca." + _module_name + " not recognized") - if lang_code not in get_active_langs() or full_lang_code not in \ - get_active_locs(): + + # Check if language/loc loaded, handle load_langs_on_demand + load_full = full_lang_code not in get_active_locs() + load_primary = lang_code not in get_active_langs() + if load_full or load_primary: if load_langs_on_demand: old_langs = __loaded_langs + __loaded_locs load_language(full_lang_code) unload_language_afterward = True else: - raise ModuleNotFoundError(_module_name + + if not load_full: + raise ModuleNotFoundError(_module_name + " module of language '" + lang_code + "' is not currently loaded.") + func_name = func.__name__.split('.')[-1] # At some point in the past, both the module and the language # were imported/loaded, respectively. @@ -664,7 +674,14 @@ def _call_localized_function(func, *args, **kwargs): # If we didn't find a localized function to correspond with # the wrapped function, we cached NotImplementedError in its # place. - loc_signature = _localized_functions[_module_name][lang_code][func_name] + try: + loc_signature = \ + _localized_functions[_module_name][lang_code][func_name] + except KeyError: + raise ModuleNotFoundError(_module_name + + " module of language '" + + lang_code + + "' is not currently loaded.") if isinstance(loc_signature, type(NotImplementedError())): raise loc_signature diff --git a/test/test_parse_da.py b/test/test_parse_da.py index 8d9de64d..2f31e343 100644 --- a/test/test_parse_da.py +++ b/test/test_parse_da.py @@ -16,7 +16,8 @@ import unittest from datetime import datetime, time -from lingua_franca import load_language, unload_language, set_default_lang +from lingua_franca import load_language, unload_language, set_default_lang, \ + get_active_langs, config from lingua_franca.parse import extract_datetime from lingua_franca.parse import extract_number from lingua_franca.parse import normalize @@ -170,6 +171,8 @@ def testExtract(text, expected_date, expected_leftover): def test_extractdatetime_no_time(self): """Check that None is returned if no time is found in sentence.""" + if 'da' not in get_active_langs(): + raise(ValueError(f"{get_active_langs()}")) with self.assertWarns(UserWarning): self.assertEqual(extract_datetime('ingen tid', lang='da-da'), None) From 4ba5a65e5ecf49c6b8e5ad6ee755320646388ca8 Mon Sep 17 00:00:00 2001 From: ChanceNCounter Date: Sun, 4 Apr 2021 14:36:02 -0700 Subject: [PATCH 21/21] document usage of `ConfigVar` in project-structure --- project-structure.md | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/project-structure.md b/project-structure.md index eda6d2d5..b89125a8 100644 --- a/project-structure.md +++ b/project-structure.md @@ -17,6 +17,7 @@ ├─ res/ (fully localized data, 'en-us' vs 'en-au' and etc.) │ ├─ text/ │ │ ├─ / + | | | ├─ config.json (locale-specific settings) │ │ │ ├─ date_time.json │ │ │ ├─ common words @@ -27,6 +28,9 @@ Ensure that all supported languages are registered in `lingua_franca.internal.py`, in the list `_SUPPORTED_LANGUAGES`. +All locales must have a `config.json` in their resource folder. This can be copied and pasted +from a similar locale, if Lingua Franca has already been localized in the same or a similar language, but must contain all of the settings present in that language's localized functions (see below.) + ## Localizing functions If you are localizing an existing top-level function, there is no need to alter the top-level @@ -63,12 +67,28 @@ What you must do: - Name function arguments exactly as they are named in the top-level modules - You do not need to implement all arguments, but you must name them identically - All arguments must be keyword arguments (except the primary arguments) + - If an argument's default value should be read from `config`, rather than hardcoded, set its + default value to the *data type* `ConfigVar` (see below) - If you need to add new arguments, feel free, but MAKE SURE you add the argument to the top-level function, as a keyword arg. This is the only time you should need to modify the top-level functions while localizing. Ensure that any new arguments are at the end of the function signatures, both in the top-level function, and in your localized function. +If a function argument's default value should be read from config, it should be set +to the *data type* `ConfigVar` (not an instance.) For example, here is `parse.extract_number()`'s +top-level signature: + +```python3 + def extract_number(text: str, + short_scale: bool = ConfigVar, + ordinals: bool = ConfigVar, + lang: str = '') +``` + +Again, take care to ensure that these arguments default to the data type `ConfigVar`, rather +than an instance of `ConfigVar`. + ## Adding new functions Ensure that all functions which will have localized versions are registered in their module's