Skip to content

Commit

Permalink
replace comma-decimal handling with param
Browse files Browse the repository at this point in the history
Alternate decimal points now specified with function parameter
  • Loading branch information
ChanceNCounter committed Feb 2, 2020
1 parent 246855d commit 402c1f2
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 12 deletions.
32 changes: 24 additions & 8 deletions lingua_franca/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,17 +80,20 @@ def match_one(query, choices):
return best


def normalize_decimals(text):
def normalize_decimals(text, decimal):
"""
Replace decimal commas with decimal periods so Python can floatify them
Replace 'decimal' with decimal periods so Python can floatify them
"""
sanitize_decimals = re.compile(r"\b\d+,{1}\d+\b")
regex = r"\b\d+" + decimal + r"{1}\d+\b"
sanitize_decimals = re.compile(regex)
for _, match in enumerate(re.finditer(sanitize_decimals, text)):
text = text.replace(match.group(0), match.group(0).replace(',', '.'))
text = text.replace(match.group(
0), match.group(0).replace(decimal, '.'))
return text


def extract_numbers(text, short_scale=True, ordinals=False, lang=None):
def extract_numbers(text, short_scale=True, ordinals=False, lang=None,
decimal='.'):
"""
Takes in a string and extracts a list of numbers.
Expand All @@ -102,10 +105,16 @@ def extract_numbers(text, short_scale=True, ordinals=False, lang=None):
See https://en.wikipedia.org/wiki/Names_of_large_numbers
ordinals (bool): consider ordinal numbers, e.g. third=3 instead of 1/3
lang (str): the BCP-47 code for the language to use, None uses default
decimal (str): character to use as decimal point. defaults to '.'
Returns:
list: list of extracted numbers as floats, or empty list if none found
Note:
will always extract numbers formatted with a decimal dot/full stop,
such as '3.5', even if 'decimal' is specified.
"""
text = normalize_decimals(text)
if decimal != '.':
text = normalize_decimals(text, decimal)

lang_code = get_primary_lang_code(lang)
if lang_code == "en":
return extract_numbers_en(text, short_scale, ordinals)
Expand All @@ -125,7 +134,8 @@ def extract_numbers(text, short_scale=True, ordinals=False, lang=None):
return []


def extract_number(text, short_scale=True, ordinals=False, lang=None):
def extract_number(text, short_scale=True, ordinals=False, lang=None,
decimal='.'):
"""Takes in a string and extracts a number.
Args:
Expand All @@ -136,11 +146,17 @@ def extract_number(text, short_scale=True, ordinals=False, lang=None):
See https://en.wikipedia.org/wiki/Names_of_large_numbers
ordinals (bool): consider ordinal numbers, e.g. third=3 instead of 1/3
lang (str): the BCP-47 code for the language to use, None uses default
decimal (str): character to use as decimal point. defaults to '.'
Returns:
(int, float or False): The number extracted or False if the input
text contains no numbers
Note:
will always extract numbers formatted with a decimal dot/full stop,
such as '3.5', even if 'decimal' is specified.
"""
text = normalize_decimals(text)
if decimal != '.':
text = normalize_decimals(text, decimal)

lang_code = get_primary_lang_code(lang)
if lang_code == "en":
return extractnumber_en(text, short_scale=short_scale,
Expand Down
11 changes: 7 additions & 4 deletions test/test_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,8 +125,9 @@ def test_extract_number(self):
short_scale=False), 1e-12)

# Test decimal normalization
self.assertEqual(extract_number("4,4"), 4.4)
self.assertEqual(extract_number("we have 3,5 kilometers to go"), 3.5)
self.assertEqual(extract_number("4,4", decimal=','), 4.4)
self.assertEqual(extract_number("we have 3,5 kilometers to go",
decimal=','), 3.5)

# TODO handle this case
# self.assertEqual(
Expand Down Expand Up @@ -708,9 +709,11 @@ def test_multiple_numbers(self):
self.assertEqual(extract_numbers("this is a seven eight nine and a"
" half test"),
[7.0, 8.0, 9.5])
self.assertEqual(extract_numbers("this is a seven eight 9,5 test"),
self.assertEqual(extract_numbers("this is a seven eight 9,5 test",
decimal=','),
[7.0, 8.0, 9.5])
self.assertEqual(extract_numbers("this is a 7,0 8.0 9,6 test"), [7.0, 8.0, 9.6])
self.assertEqual(extract_numbers("this is a 7,0 8.0 9,6 test",
decimal=','), [7.0, 8.0, 9.6])

def test_contractions(self):
self.assertEqual(normalize("ain't"), "is not")
Expand Down

0 comments on commit 402c1f2

Please sign in to comment.