diff --git a/lingua_franca/lang/parse_cs.py b/lingua_franca/lang/parse_cs.py index 15f0b29a..8f73e281 100644 --- a/lingua_franca/lang/parse_cs.py +++ b/lingua_franca/lang/parse_cs.py @@ -633,31 +633,29 @@ def extract_duration_cs(text): # Czech inflection for time: minuta,minuty,minut - safe to use minut as pattern # For day: den, dny, dnů - short patern not applicable, list all - time_units_en = { - 'microseconds': None, - 'milliseconds': None, - 'seconds': None, - 'minutes': None, - 'hours': None, - 'days': None, - 'weeks': None - } + time_units = { + 'microseconds': 0, + 'milliseconds': 0, + 'seconds': 0, + 'minutes': 0, + 'hours': 0, + 'days': 0, + 'weeks': 0 + } pattern = r"(?P\d+(?:\.?\d+)?)(?:\s+|\-){unit}[ay]?" text = _convert_words_to_numbers_cs(text) - for unit in _TIME_UNITS_CONVERSION: - unit_pattern = pattern.format(unit=unit) - matches = re.findall(unit_pattern, text) - value = sum(map(float, matches)) - unit_en= _TIME_UNITS_CONVERSION.get(unit) # Find unit in english - # Check if is neccesary to write value - handle (days, weeks) - if time_units_en[unit_en] is None or time_units_en.get(unit_en) == 0: - time_units_en[unit_en] = value # Write value to english unit - text = re.sub(unit_pattern, '', text) + for (unit_cs, unit_en) in _TIME_UNITS_CONVERSION.items(): + unit_pattern = pattern.format(unit=unit_cs) + + def repl(match): + time_units[unit_en] += float(match.group(1)) + return '' + text = re.sub(unit_pattern, repl, text) text = text.strip() - duration = timedelta(**time_units_en) if any(time_units_en.values()) else None + duration = timedelta(**time_units) if any(time_units.values()) else None return (duration, text) diff --git a/lingua_franca/lang/parse_de.py b/lingua_franca/lang/parse_de.py index c6686662..620de389 100644 --- a/lingua_franca/lang/parse_de.py +++ b/lingua_franca/lang/parse_de.py @@ -124,13 +124,14 @@ def extract_duration_de(text): #### TODO Einstiegspunkt für Text-zu-Zahlen Konversion #text = _convert_words_to_numbers_de(text) - for unit in time_units: - unit_de = time_units[unit] + for (unit_en, unit_de) in time_units.items(): unit_pattern = pattern.format(unit=unit_de[:-1]) # remove 'n'/'e' from unit - matches = re.findall(unit_pattern, text) - value = sum(map(float, matches)) - time_units[unit] = value - text = re.sub(unit_pattern, '', text) + time_units[unit_en] = 0 + + def repl(match): + time_units[unit_en] += float(match.group(1)) + return '' + text = re.sub(unit_pattern, repl, text) text = text.strip() duration = timedelta(**time_units) if any(time_units.values()) else None diff --git a/lingua_franca/lang/parse_en.py b/lingua_franca/lang/parse_en.py index a10eaccf..35a4a6e5 100644 --- a/lingua_franca/lang/parse_en.py +++ b/lingua_franca/lang/parse_en.py @@ -610,24 +610,25 @@ def extract_duration_en(text): return None time_units = { - 'microseconds': None, - 'milliseconds': None, - 'seconds': None, - 'minutes': None, - 'hours': None, - 'days': None, - 'weeks': None + 'microseconds': 0, + 'milliseconds': 0, + 'seconds': 0, + 'minutes': 0, + 'hours': 0, + 'days': 0, + 'weeks': 0 } pattern = r"(?P\d+(?:\.?\d+)?)(?:\s+|\-){unit}s?" text = _convert_words_to_numbers_en(text) - for unit in time_units: - unit_pattern = pattern.format(unit=unit[:-1]) # remove 's' from unit - matches = re.findall(unit_pattern, text) - value = sum(map(float, matches)) - time_units[unit] = value - text = re.sub(unit_pattern, '', text) + for unit_en in time_units: + unit_pattern = pattern.format(unit=unit_en[:-1]) # remove 's' from unit + + def repl(match): + time_units[unit_en] += float(match.group(1)) + return '' + text = re.sub(unit_pattern, repl, text) text = text.strip() duration = timedelta(**time_units) if any(time_units.values()) else None