Skip to content

Commit

Permalink
Merge pull request #108 from ChanceNCounter/refactor/languages_cleanup
Browse files Browse the repository at this point in the history
Refactor: function discovery and introspection
  • Loading branch information
krisgesling authored Nov 2, 2020
2 parents 85fbc12 + 0d1c105 commit 903c64c
Show file tree
Hide file tree
Showing 64 changed files with 3,874 additions and 2,580 deletions.
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -103,5 +103,7 @@ venv.bak/
# mypy
.mypy_cache/

# vscode settings
# VSCod(e/ium)
.vscode/
vscode/
*.code-workspace
70 changes: 4 additions & 66 deletions lingua_franca/__init__.py
Original file line number Diff line number Diff line change
@@ -1,66 +1,4 @@
import os
from os.path import join, expanduser


def _log_unsupported_language(language, supported_languages):
"""
Log a warning when a language is unsupported
Arguments:
language: str
The language that was supplied.
supported_languages: [str]
The list of supported languages.
"""
supported = ' '.join(supported_languages)
print('Language "{language}" not recognized! Please make sure your '
'language is one of the following: {supported}.'
.format(language=language, supported=supported))


def resolve_resource_file(res_name, data_dir=None):
"""Convert a resource into an absolute filename.
Resource names are in the form: 'filename.ext'
or 'path/filename.ext'
The system wil look for ~/.mycroft/res_name first, and
if not found will look at /opt/mycroft/res_name,
then finally it will look for res_name in the 'mycroft/res'
folder of the source code package.
Example:
With mycroft running as the user 'bob', if you called
resolve_resource_file('snd/beep.wav')
it would return either '/home/bob/.mycroft/snd/beep.wav' or
'/opt/mycroft/snd/beep.wav' or '.../mycroft/res/snd/beep.wav',
where the '...' is replaced by the path where the package has
been installed.
Args:
res_name (str): a resource path/name
Returns:
str: path to resource or None if no resource found
"""
# First look for fully qualified file (e.g. a user setting)
if os.path.isfile(res_name):
return res_name

# Now look for ~/.mycroft/res_name (in user folder)
filename = os.path.expanduser("~/.mycroft/" + res_name)
if os.path.isfile(filename):
return filename

# Next look for /opt/mycroft/res/res_name
data_dir = data_dir or expanduser("/opt/mycroft/res/")
filename = os.path.expanduser(join(data_dir, res_name))
if os.path.isfile(filename):
return filename

# Finally look for it in the source package
filename = os.path.join(os.path.dirname(__file__), 'res', res_name)
filename = os.path.abspath(os.path.normpath(filename))
if os.path.isfile(filename):
return filename

return None # Resource cannot be resolved
from .internal import get_default_lang, set_default_lang, get_default_loc, \
get_active_langs, _set_active_langs, get_primary_lang_code, \
get_full_lang_code, resolve_resource_file, load_language, \
load_languages, unload_language, unload_languages, get_supported_langs
205 changes: 68 additions & 137 deletions lingua_franca/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,59 +13,48 @@
# limitations under the License.
#

import datetime
import json
import os
import re
from collections import namedtuple
from inspect import signature
from warnings import warn
from os.path import join

from lingua_franca.lang import get_full_lang_code, get_primary_lang_code

from lingua_franca.lang.format_en import *
from lingua_franca.lang.format_pt import *
from lingua_franca.lang.format_it import *
from lingua_franca.lang.format_sv import *
from lingua_franca.lang.format_hu import *
from lingua_franca.lang.format_cs import *

from lingua_franca.lang.format_es import nice_number_es
from lingua_franca.lang.format_es import nice_time_es
from lingua_franca.lang.format_es import pronounce_number_es
from lingua_franca.lang.format_de import nice_number_de
from lingua_franca.lang.format_de import nice_time_de
from lingua_franca.lang.format_de import pronounce_number_de
from lingua_franca.lang.format_fr import nice_number_fr
from lingua_franca.lang.format_fr import nice_time_fr
from lingua_franca.lang.format_fr import pronounce_number_fr
from lingua_franca.lang.format_nl import nice_time_nl
from lingua_franca.lang.format_nl import pronounce_number_nl
from lingua_franca.lang.format_nl import nice_number_nl
from lingua_franca.lang.format_da import nice_number_da
from lingua_franca.lang.format_da import nice_time_da
from lingua_franca.lang.format_da import pronounce_number_da
from lingua_franca.lang.format_cs import nice_number_cs
from lingua_franca.lang.format_cs import nice_time_cs
from lingua_franca.lang.format_cs import pronounce_number_cs

from lingua_franca.bracket_expansion import SentenceTreeParser
from lingua_franca import _log_unsupported_language
from lingua_franca.internal import localized_function, \
populate_localized_function_dict, get_active_langs, \
get_full_lang_code, get_default_lang, get_default_loc, \
is_supported_full_lang, _raise_unsupported_language, \
UnsupportedLanguageError, NoneLangWarning, InvalidLangWarning

from collections import namedtuple
import json
import os
import datetime
import re

_REGISTERED_FUNCTIONS = ("nice_number",
"nice_time",
"pronounce_number",
"nice_response")

populate_localized_function_dict("format", langs=get_active_langs())


def _translate_word(name, lang):
""" Helper to get word tranlations
def _translate_word(name, lang=None):
""" Helper to get word translations
Args:
name (str): Word name. Returned as the default value if not translated.
name (str): Word name. Returned as the default value if not translated
lang (str): Language code, e.g. "en-us"
Returns:
str: translated version of resource name
"""
from lingua_franca import resolve_resource_file
from lingua_franca.internal import resolve_resource_file
if not lang:
lang = get_default_loc()

lang_code = get_full_lang_code(lang)
lang_code = lang if is_supported_full_lang(lang) else \
get_full_lang_code(lang)

filename = resolve_resource_file(join("text", lang_code, name+".word"))
if filename:
Expand Down Expand Up @@ -126,12 +115,12 @@ def _number_strings(self, number, lang):
str(int(number % 100 / 10))) or str(int(number % 100 / 10))
x0 = (self.lang_config[lang]['number'].get(
str(int(number % 100 / 10) * 10)) or
str(int(number % 100 / 10) * 10))
str(int(number % 100 / 10) * 10))
xxx = (self.lang_config[lang]['number'].get(str(number % 1000)) or
str(number % 1000))
x00 = (self.lang_config[lang]['number'].get(str(int(
number % 1000 / 100) * 100)) or
str(int(number % 1000 / 100) * 100))
str(int(number % 1000 / 100) * 100))
x_in_x00 = self.lang_config[lang]['number'].get(str(int(
number % 1000 / 100))) or str(int(number % 1000 / 100))
xx00 = self.lang_config[lang]['number'].get(str(int(
Expand All @@ -141,7 +130,7 @@ def _number_strings(self, number, lang):
number % 10000 / 100))) or str(int(number % 10000 / 100))
x000 = (self.lang_config[lang]['number'].get(str(int(
number % 10000 / 1000) * 1000)) or
str(int(number % 10000 / 1000) * 1000))
str(int(number % 10000 / 1000) * 1000))
x_in_x000 = self.lang_config[lang]['number'].get(str(int(
number % 10000 / 1000))) or str(int(number % 10000 / 1000))
x0_in_x000 = self.lang_config[lang]['number'].get(str(int(
Expand Down Expand Up @@ -246,9 +235,10 @@ def year_format(self, dt, lang, bc):


date_time_format = DateTimeFormat(os.path.join(os.path.dirname(__file__),
'res/text'))
'res/text'))


@localized_function(run_own_code_on=[UnsupportedLanguageError])
def nice_number(number, lang=None, speech=True, denominators=None):
"""Format a float to human readable functions
Expand All @@ -262,39 +252,10 @@ def nice_number(number, lang=None, speech=True, denominators=None):
Returns:
(str): The formatted string.
"""
# Convert to spoken representation in appropriate language
lang_code = get_primary_lang_code(lang)
if lang_code == "en":
return nice_number_en(number, speech, denominators)
elif lang_code == "es":
return nice_number_es(number, speech, denominators)
elif lang_code == "pt":
return nice_number_pt(number, speech, denominators)
elif lang_code == "it":
return nice_number_it(number, speech, denominators)
elif lang_code == "fr":
return nice_number_fr(number, speech, denominators)
elif lang_code == "sv":
return nice_number_sv(number, speech, denominators)
elif lang_code == "de":
return nice_number_de(number, speech, denominators)
elif lang_code == "hu":
return nice_number_hu(number, speech, denominators)
elif lang_code == "nl":
return nice_number_nl(number, speech, denominators)
elif lang_code == "da":
return nice_number_da(number, speech, denominators)
elif lang_code == "cs":
return nice_number_cs(number, speech, denominators)

# Default to the raw number for unsupported languages,
# hopefully the STT engine will pronounce understandably.
# TODO: nice_number_XX for other languages
_log_unsupported_language(lang_code, ['en', 'es', 'pt', 'it', 'fr',
'sv', 'de', 'hu', 'nl', 'da', 'cs'])
return str(number)


@localized_function()
def nice_time(dt, lang=None, speech=True, use_24hour=False,
use_ampm=False):
"""
Expand All @@ -312,35 +273,9 @@ def nice_time(dt, lang=None, speech=True, use_24hour=False,
Returns:
(str): The formatted time string
"""
lang_code = get_primary_lang_code(lang)
if lang_code == "en":
return nice_time_en(dt, speech, use_24hour, use_ampm)
elif lang_code == "es":
return nice_time_es(dt, speech, use_24hour, use_ampm)
elif lang_code == "it":
return nice_time_it(dt, speech, use_24hour, use_ampm)
elif lang_code == "fr":
return nice_time_fr(dt, speech, use_24hour, use_ampm)
elif lang_code == "de":
return nice_time_de(dt, speech, use_24hour, use_ampm)
elif lang_code == "hu":
return nice_time_hu(dt, speech, use_24hour, use_ampm)
elif lang_code == "nl":
return nice_time_nl(dt, speech, use_24hour, use_ampm)
elif lang_code == "da":
return nice_time_da(dt, speech, use_24hour, use_ampm)
elif lang_code == "pt":
return nice_time_pt(dt, speech, use_24hour, use_ampm)
elif lang_code == "sv":
return nice_time_sv(dt, speech, use_24hour, use_ampm)
elif lang_code == "cs":
return nice_time_cs(dt, speech, use_24hour, use_ampm)
# TODO: Other languages
_log_unsupported_language(lang_code, ['en', 'es', 'pt', 'it', 'fr',
'sv', 'de', 'hu', 'nl', 'da','cs'])
return str(dt)


@localized_function()
def pronounce_number(number, lang=None, places=2, short_scale=True,
scientific=False, ordinals=False):
"""
Expand All @@ -357,43 +292,6 @@ def pronounce_number(number, lang=None, places=2, short_scale=True,
Returns:
(str): The pronounced number
"""
lang_code = get_primary_lang_code(lang)
if lang_code == "en":
return pronounce_number_en(number, places=places,
short_scale=short_scale,
scientific=scientific,
ordinals=ordinals)
elif lang_code == "it":
return pronounce_number_it(number, places=places,
short_scale=short_scale,
scientific=scientific)
elif lang_code == "es":
return pronounce_number_es(number, places=places)
elif lang_code == "fr":
return pronounce_number_fr(number, places=places)
elif lang_code == "de":
return pronounce_number_de(number, places=places)
elif lang_code == "hu":
return pronounce_number_hu(number, places=places)
elif lang_code == "nl":
return pronounce_number_nl(number, places=places)
elif lang_code == "da":
return pronounce_number_da(number, places=places)
elif lang_code == "pt":
return pronounce_number_pt(number, places=places)
elif lang_code == "sv":
return pronounce_number_sv(number, places=places)
elif lang_code == "cs":
return pronounce_number_cs(number, places=places,
short_scale=short_scale,
scientific=scientific,
ordinals=ordinals)

# Default to just returning the numeric value
# TODO: Other languages
_log_unsupported_language(lang_code, ['en', 'es', 'pt', 'it', 'fr',
'sv', 'de', 'hu', 'nl', 'da','cs'])
return str(number)


def nice_date(dt, lang=None, now=None):
Expand Down Expand Up @@ -483,7 +381,18 @@ def nice_duration(duration, lang=None, speech=True):
Returns:
str: timespan as a string
"""
if type(duration) is datetime.timedelta:
if not lang:
warn(NoneLangWarning)
lang = get_default_loc()
if not is_supported_full_lang(lang):
# TODO deprecated; delete when 'lang=None' and 'lang=invalid' are removed
try:
lang = get_full_lang_code(lang)
except UnsupportedLanguageError:
warn(InvalidLangWarning)
lang = get_default_loc()

if isinstance(duration, datetime.timedelta):
duration = duration.total_seconds()

# Do traditional rounding: 2.5->3, 3.5->4, plus this
Expand Down Expand Up @@ -606,3 +515,25 @@ def expand_options(parentheses_line: str) -> list:
# 'a(this|that)b' -> [['a', 'this', 'b'], ['a', 'that', 'b']]
options = expand_parentheses(re.split(r'([(|)])', parentheses_line))
return [re.sub(r'\s+', ' ', ' '.join(i)).strip() for i in options]


@localized_function()
def nice_response(text, lang=None):
"""
In some languages, sanitizes certain numeric input for TTS
Most of the time, this function will be called by any formatters
which might need it. It's exposed here just in case you've got a clever
use.
As of July 2020, this function sanitizes some dates and "x ^ y"-formatted
exponents in the following primary language codes:
da de nl sv
Example:
assertEqual(nice_response_de("dies ist der 31. mai"),
"dies ist der einunddreißigste mai")
assertEqual(nice_response_de("10 ^ 2"),
"10 hoch 2")
"""
Loading

0 comments on commit 903c64c

Please sign in to comment.