From 12453ab0bb99666f2df63072a83542dc9ac5b4e9 Mon Sep 17 00:00:00 2001
From: ChanceNCounter <ChanceNCounter@icloud.com>
Date: Fri, 10 Jan 2020 17:40:05 -0800
Subject: [PATCH 1/6] Add logic to normalize comma-delimited decimals

---
 lingua_franca/parse.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/lingua_franca/parse.py b/lingua_franca/parse.py
index 303baedd..76e580ac 100644
--- a/lingua_franca/parse.py
+++ b/lingua_franca/parse.py
@@ -14,6 +14,8 @@
 # limitations under the License.
 #
 from difflib import SequenceMatcher
+import re
+
 from lingua_franca.time import now_local
 from lingua_franca.lang import get_primary_lang_code
 
@@ -93,6 +95,13 @@ def extract_numbers(text, short_scale=True, ordinals=False, lang=None):
     Returns:
         list: list of extracted numbers as floats, or empty list if none found
     """
+    # Replace decimal commas with decimal periods so Python can floatify them
+    sanitize_decimals = re.compile(r".*\d+,{1}\d+")
+    match = sanitize_decimals.match(text)
+    while match:
+        text = text.replace(match[0], match[0].replace(',', '.'))
+        match = sanitize_decimals.match(text)
+
     lang_code = get_primary_lang_code(lang)
     if lang_code == "en":
         return extract_numbers_en(text, short_scale, ordinals)

From b7c8ad6feaf6cc2894ef16bf337e26289a465fa0 Mon Sep 17 00:00:00 2001
From: ChanceNCounter <ChanceNCounter@icloud.com>
Date: Sat, 11 Jan 2020 02:13:38 -0800
Subject: [PATCH 2/6] spin off normalize_decimal logic

create function for both extract_number and extract_numbers to call
---
 lingua_franca/parse.py | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/lingua_franca/parse.py b/lingua_franca/parse.py
index 76e580ac..800e5a3f 100644
--- a/lingua_franca/parse.py
+++ b/lingua_franca/parse.py
@@ -80,6 +80,18 @@ def match_one(query, choices):
         return best
 
 
+def normalize_decimals(text):
+    """
+        Replace decimal commas with decimal periods so Python can floatify them
+    """
+    sanitize_decimals = re.compile(r".*\d+,{1}\d+")
+    match = sanitize_decimals.match(text)
+    while match:
+        text = text.replace(match[0], match[0].replace(',', '.'))
+        match = sanitize_decimals.match(text)
+    return text
+
+
 def extract_numbers(text, short_scale=True, ordinals=False, lang=None):
     """
         Takes in a string and extracts a list of numbers.
@@ -95,13 +107,7 @@ def extract_numbers(text, short_scale=True, ordinals=False, lang=None):
     Returns:
         list: list of extracted numbers as floats, or empty list if none found
     """
-    # Replace decimal commas with decimal periods so Python can floatify them
-    sanitize_decimals = re.compile(r".*\d+,{1}\d+")
-    match = sanitize_decimals.match(text)
-    while match:
-        text = text.replace(match[0], match[0].replace(',', '.'))
-        match = sanitize_decimals.match(text)
-
+    text = normalize_decimals(text)
     lang_code = get_primary_lang_code(lang)
     if lang_code == "en":
         return extract_numbers_en(text, short_scale, ordinals)
@@ -136,6 +142,7 @@ def extract_number(text, short_scale=True, ordinals=False, lang=None):
         (int, float or False): The number extracted or False if the input
                                text contains no numbers
     """
+    text = normalize_decimals(text)
     lang_code = get_primary_lang_code(lang)
     if lang_code == "en":
         return extractnumber_en(text, short_scale=short_scale,

From f7e8f5b4b2c46be5d22d7cae8939b0a03fef4499 Mon Sep 17 00:00:00 2001
From: ChanceNCounter <ChanceNCounter@icloud.com>
Date: Sat, 11 Jan 2020 09:30:54 -0800
Subject: [PATCH 3/6] iterate over regex the python.regex way

---
 lingua_franca/parse.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/lingua_franca/parse.py b/lingua_franca/parse.py
index 800e5a3f..950d80d6 100644
--- a/lingua_franca/parse.py
+++ b/lingua_franca/parse.py
@@ -84,11 +84,9 @@ def normalize_decimals(text):
     """
         Replace decimal commas with decimal periods so Python can floatify them
     """
-    sanitize_decimals = re.compile(r".*\d+,{1}\d+")
-    match = sanitize_decimals.match(text)
-    while match:
+    sanitize_decimals = re.compile(r"\b\d+,{1}\d+\b")
+    for _, match in enumerate(re.finditer(sanitize_decimals, text)):
         text = text.replace(match[0], match[0].replace(',', '.'))
-        match = sanitize_decimals.match(text)
     return text
 
 

From 9375e550c00ebc7a58d6b2cc88afa38c5b5dbc48 Mon Sep 17 00:00:00 2001
From: ChanceNCounter <ChanceNCounter@icloud.com>
Date: Sat, 11 Jan 2020 09:38:06 -0800
Subject: [PATCH 4/6] add tests for decimal normalization

---
 test/test_parse.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/test/test_parse.py b/test/test_parse.py
index 01aec528..915e9b8a 100644
--- a/test/test_parse.py
+++ b/test/test_parse.py
@@ -123,6 +123,11 @@ def test_extract_number(self):
                                         short_scale=False), 1e12)
         self.assertEqual(extract_number("this is the billionth test",
                                         short_scale=False), 1e-12)
+        
+        # Test decimal normalization
+        self.assertEqual(extract_number("4,4"), 4.4)
+        self.assertEqual(extract_number("we have 3,5 kilometers to go"), 3.5)
+        
         # TODO handle this case
         # self.assertEqual(
         #    extract_number("6 dot six six six"),
@@ -703,6 +708,9 @@ def test_multiple_numbers(self):
         self.assertEqual(extract_numbers("this is a seven eight nine and a"
                                          " half test"),
                          [7.0, 8.0, 9.5])
+        self.assertEqual(extract_numbers("this is a seven eight 9,5 test"),
+                         [7.0, 8.0, 9.5])
+        self.assertEqual(extract_numbers("this is a 7,0 8.0 9,6 test"), [7.0, 8.0, 9.6])
 
     def test_contractions(self):
         self.assertEqual(normalize("ain't"), "is not")

From 246855d5d6b3cf567bfa09754f9b9388852a3a35 Mon Sep 17 00:00:00 2001
From: ChanceNCounter <ChanceNCounter@icloud.com>
Date: Sat, 11 Jan 2020 10:36:52 -0800
Subject: [PATCH 5/6] fix regex to support py3.5

---
 lingua_franca/parse.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lingua_franca/parse.py b/lingua_franca/parse.py
index 950d80d6..b906e729 100644
--- a/lingua_franca/parse.py
+++ b/lingua_franca/parse.py
@@ -86,7 +86,7 @@ def normalize_decimals(text):
     """
     sanitize_decimals = re.compile(r"\b\d+,{1}\d+\b")
     for _, match in enumerate(re.finditer(sanitize_decimals, text)):
-        text = text.replace(match[0], match[0].replace(',', '.'))
+        text = text.replace(match.group(0), match.group(0).replace(',', '.'))
     return text
 
 

From 402c1f271e70b1a354018bb296751762d216f2eb Mon Sep 17 00:00:00 2001
From: ChanceNCounter <ChanceNCounter@icloud.com>
Date: Sun, 2 Feb 2020 10:15:49 -0800
Subject: [PATCH 6/6] replace comma-decimal handling with param

Alternate decimal points now specified with function parameter
---
 lingua_franca/parse.py | 32 ++++++++++++++++++++++++--------
 test/test_parse.py     | 11 +++++++----
 2 files changed, 31 insertions(+), 12 deletions(-)

diff --git a/lingua_franca/parse.py b/lingua_franca/parse.py
index b906e729..cf5a6fb0 100644
--- a/lingua_franca/parse.py
+++ b/lingua_franca/parse.py
@@ -80,17 +80,20 @@ def match_one(query, choices):
         return best
 
 
-def normalize_decimals(text):
+def normalize_decimals(text, decimal):
     """
-        Replace decimal commas with decimal periods so Python can floatify them
+        Replace 'decimal' with decimal periods so Python can floatify them
     """
-    sanitize_decimals = re.compile(r"\b\d+,{1}\d+\b")
+    regex = r"\b\d+" + decimal + r"{1}\d+\b"
+    sanitize_decimals = re.compile(regex)
     for _, match in enumerate(re.finditer(sanitize_decimals, text)):
-        text = text.replace(match.group(0), match.group(0).replace(',', '.'))
+        text = text.replace(match.group(
+            0), match.group(0).replace(decimal, '.'))
     return text
 
 
-def extract_numbers(text, short_scale=True, ordinals=False, lang=None):
+def extract_numbers(text, short_scale=True, ordinals=False, lang=None,
+                    decimal='.'):
     """
         Takes in a string and extracts a list of numbers.
 
@@ -102,10 +105,16 @@ def extract_numbers(text, short_scale=True, ordinals=False, lang=None):
             See https://en.wikipedia.org/wiki/Names_of_large_numbers
         ordinals (bool): consider ordinal numbers, e.g. third=3 instead of 1/3
         lang (str): the BCP-47 code for the language to use, None uses default
+        decimal (str): character to use as decimal point. defaults to '.'
     Returns:
         list: list of extracted numbers as floats, or empty list if none found
+    Note:
+        will always extract numbers formatted with a decimal dot/full stop,
+        such as '3.5', even if 'decimal' is specified.
     """
-    text = normalize_decimals(text)
+    if decimal != '.':
+        text = normalize_decimals(text, decimal)
+
     lang_code = get_primary_lang_code(lang)
     if lang_code == "en":
         return extract_numbers_en(text, short_scale, ordinals)
@@ -125,7 +134,8 @@ def extract_numbers(text, short_scale=True, ordinals=False, lang=None):
     return []
 
 
-def extract_number(text, short_scale=True, ordinals=False, lang=None):
+def extract_number(text, short_scale=True, ordinals=False, lang=None,
+                   decimal='.'):
     """Takes in a string and extracts a number.
 
     Args:
@@ -136,11 +146,17 @@ def extract_number(text, short_scale=True, ordinals=False, lang=None):
             See https://en.wikipedia.org/wiki/Names_of_large_numbers
         ordinals (bool): consider ordinal numbers, e.g. third=3 instead of 1/3
         lang (str): the BCP-47 code for the language to use, None uses default
+        decimal (str): character to use as decimal point. defaults to '.'
     Returns:
         (int, float or False): The number extracted or False if the input
                                text contains no numbers
+    Note:
+        will always extract numbers formatted with a decimal dot/full stop,
+        such as '3.5', even if 'decimal' is specified.
     """
-    text = normalize_decimals(text)
+    if decimal != '.':
+        text = normalize_decimals(text, decimal)
+
     lang_code = get_primary_lang_code(lang)
     if lang_code == "en":
         return extractnumber_en(text, short_scale=short_scale,
diff --git a/test/test_parse.py b/test/test_parse.py
index 915e9b8a..afc1fddc 100644
--- a/test/test_parse.py
+++ b/test/test_parse.py
@@ -125,8 +125,9 @@ def test_extract_number(self):
                                         short_scale=False), 1e-12)
         
         # Test decimal normalization
-        self.assertEqual(extract_number("4,4"), 4.4)
-        self.assertEqual(extract_number("we have 3,5 kilometers to go"), 3.5)
+        self.assertEqual(extract_number("4,4", decimal=','), 4.4)
+        self.assertEqual(extract_number("we have 3,5 kilometers to go",
+                                        decimal=','), 3.5)
         
         # TODO handle this case
         # self.assertEqual(
@@ -708,9 +709,11 @@ def test_multiple_numbers(self):
         self.assertEqual(extract_numbers("this is a seven eight nine and a"
                                          " half test"),
                          [7.0, 8.0, 9.5])
-        self.assertEqual(extract_numbers("this is a seven eight 9,5 test"),
+        self.assertEqual(extract_numbers("this is a seven eight 9,5 test",
+                                         decimal=','),
                          [7.0, 8.0, 9.5])
-        self.assertEqual(extract_numbers("this is a 7,0 8.0 9,6 test"), [7.0, 8.0, 9.6])
+        self.assertEqual(extract_numbers("this is a 7,0 8.0 9,6 test",
+                                         decimal=','), [7.0, 8.0, 9.6])
 
     def test_contractions(self):
         self.assertEqual(normalize("ain't"), "is not")