Skip to content

Commit

Permalink
ref: add zwo as alternative for 2
Browse files Browse the repository at this point in the history
  • Loading branch information
souvikg10 committed Sep 6, 2023
1 parent 7520daa commit dd372c7
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 2 deletions.
15 changes: 15 additions & 0 deletions Duckling/Numeral/DE/Corpus.hs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,11 @@ allExamples = concat
, "Eine"
, "einen"
]
, examples (NumeralValue 2)
[ "2"
, "Zwei"
, "Zwo"
]
, examples (NumeralValue 3)
[ "3"
, "Drei"
Expand Down Expand Up @@ -77,6 +82,16 @@ allExamples = concat
[ "18"
, "achtzehn"
]
, examples (NumeralValue 82)
[ "82"
, "zwoundachtzig"
, "zweiundachtzig"
]
, examples (NumeralValue 182)
[ "182"
, "einhundertzwoundachtzig"
, "einhundertzweiundachtzig"
]
, examples (NumeralValue 200)
[ "200"
, "zweihundert"
Expand Down
5 changes: 4 additions & 1 deletion Duckling/Numeral/DE/NumParser.hs
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,9 @@ two =
, times10 = [assign 20 "zwanzig"]
}

two_alternative :: NumItem
two_alternative = defaultNumItem 2 "zwo"

three :: NumItem
three =
(defaultNumItem 3 "drei")
Expand Down Expand Up @@ -139,7 +142,7 @@ nine :: NumItem
nine = defaultNumItem 9 "neun"

digitLexicon :: [NumItem]
digitLexicon = [one, two, three, four, five, six, seven, eight, nine]
digitLexicon = [one, two_alternative, two, three, four, five, six, seven, eight, nine]

from1to9 :: NumParser
from1to9 = foldr ((<|>) . base) empty digitLexicon
Expand Down
2 changes: 1 addition & 1 deletion Duckling/Numeral/DE/Rules.hs
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ ruleIntegerWithThousandsSeparator = Rule
ruleAllNumeralWords :: Rule
ruleAllNumeralWords = Rule
{ name = "simple and complex numerals written as one word"
, pattern = [regex "(ein|zwei|drei|vier|fünf|sech|sieb|acht|neun|zehn|elf|zwölf|hundert|tausend)?([^\\s]+)?(eine[m|n|r|s]?|eins?|zwei|drei|vier|fünf|sechs|sieben|acht|neun|zehn|elf|zwölf|[s|ß|z]ig|hundert|tausend)"]
, pattern = [regex "(ein|zwei|zwo|drei|vier|fünf|sech|sieb|acht|neun|zehn|elf|zwölf|hundert|tausend)?([^\\s]+)?(eine[m|n|r|s]?|eins?|zwei|drei|vier|fünf|sechs|sieben|acht|neun|zehn|elf|zwölf|[s|ß|z]ig|hundert|tausend)"]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch matches) : _) ->
(parseNumeral $ concat $ Text.unpack . Text.toLower <$> matches)
Expand Down

0 comments on commit dd372c7

Please sign in to comment.