From 51c3ce7c564d4cf18dfc1594bd3a8efb29d5dc18 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=B6rje=20Karlsson?= Date: Tue, 17 Dec 2019 17:50:04 +0800 Subject: [PATCH] .NET adding internal cache prototype (perf improvement) (#1998) * .NET adding internal cache prototype for speed (#1) * Initial config normalization in English * Add internal cache prototype * Updated new Swedish recognizers and fixed Date cache experiment. * Removing support for .NET Framework 4.5 and 4.5.2. * Adding flag to disable internal cache. * Fixing TurkishNumberRangeParserConfiguration to use Turkish number extractors. * Making sure English number range passes Options internally. * Moving cache keys to tuples. Making sure all datetime extractions receive reference time. * Further improvements with cache reuse and key changes. * Adding ISO 639-2 language codes to relevant pattern files * Moving to standardize configs. Needs to be propagated to other languages. * Reducing number of cache instances and propagating to Dutch. * Centralizing number resultscache code and propagating configs to Chinese. * Propagated configs and cache to Spanish and Portuguese numbers. * Patch for other platforms after the language marker changes to match ISO * Minor cleanup --- .../Chinese/DateTimeDefinitions.cs | 1 + .../Chinese/NumbersDefinitions.cs | 2 +- .../Dutch/DateTimeDefinitions.cs | 1 + .../Dutch/NumberDefinitions.cs | 2 +- .../English/DateTimeDefinitions.cs | 1 + .../French/DateTimeDefinitions.cs | 1 + .../French/NumbersDefinitions.cs | 2 +- .../German/DateTimeDefinitions.cs | 1 + .../Hindi/DateTimeDefinitions.cs | 1 + .../Italian/DateTimeDefinitions.cs | 3 +- .../Japanese/DateTimeDefinitions.cs | 1 + ...soft.Recognizers.Definitions.Common.csproj | 8 +- .../Portuguese/DateTimeDefinitions.cs | 1 + .../Spanish/DateTimeDefinitions.cs | 1 + .../Turkish/DateTimeDefinitions.cs | 3 +- .../Turkish/NumbersDefinitions.cs | 2 +- .../Microsoft.Recognizers.Definitions.csproj | 2 +- .../Microsoft.Recognizers.Text.Choice.csproj | 9 +- .../Microsoft.Recognizers.Text.Choice.nuspec | 2 - ...ft.Recognizers.Text.DataDrivenTests.csproj | 7 +- .../Number/LongFormTestConfiguration.cs | 2 +- .../TestNumberRecognizerInitialization.cs | 8 +- .../Number/TestParserFactory.cs | 20 +- .../TestBase.cs | 5 +- .../TestHelpers.cs | 4 +- ...zers.Text.DataTypes.DataDrivenTests.csproj | 4 +- ...zers.Text.DataTypes.TimexExpression.csproj | 8 +- ...zers.Text.DataTypes.TimexExpression.nuspec | 2 - .../ChineseDateExtractorConfiguration.cs | 3 +- ...ChineseDatePeriodExtractorConfiguration.cs | 23 +- ...eseDateTimePeriodExtractorConfiguration.cs | 20 +- .../ChineseMergedExtractorConfiguration.cs | 12 +- .../Parsers/ChineseDateParserConfiguration.cs | 20 +- .../ChineseDatePeriodParserConfiguration.cs | 72 ++--- .../Chinese/Parsers/ChineseDateTimeParser.cs | 15 +- ...hineseDateTimePeriodParserConfiguration.cs | 29 +- .../ChineseHolidayParserConfiguration.cs | 250 +++++++++--------- .../BaseDateTimeOptionsConfiguration.cs | 2 + .../Config/IDateTimeOptionsConfiguration.cs | 2 + .../DateTimeOptions.cs | 5 + .../DutchDateExtractorConfiguration.cs | 16 +- .../DutchDatePeriodExtractorConfiguration.cs | 16 +- .../DutchDateTimeExtractorConfiguration.cs | 13 +- ...tchDateTimePeriodExtractorConfiguration.cs | 12 +- .../DutchDurationExtractorConfiguration.cs | 12 +- .../DutchMergedExtractorConfiguration.cs | 12 +- .../DutchTimePeriodExtractorConfiguration.cs | 13 +- .../DutchCommonDateTimeParserConfiguration.cs | 17 +- .../DutchDurationParserConfiguration.cs | 2 +- .../EnglishDateExtractorConfiguration.cs | 15 +- ...EnglishDatePeriodExtractorConfiguration.cs | 17 +- .../EnglishDateTimeExtractorConfiguration.cs | 13 +- ...ishDateTimePeriodExtractorConfiguration.cs | 12 +- .../EnglishDurationExtractorConfiguration.cs | 13 +- .../EnglishMergedExtractorConfiguration.cs | 12 +- ...EnglishTimePeriodExtractorConfiguration.cs | 13 +- ...nglishCommonDateTimeParserConfiguration.cs | 18 +- .../EnglishDurationParserConfiguration.cs | 2 +- .../Extractors/AbstractYearExtractor.cs | 3 +- .../Extractors/BaseDateExtractor.cs | 63 +++-- .../Extractors/BaseDatePeriodExtractor.cs | 47 +++- .../Extractors/BaseDateTimeAltExtractor.cs | 4 +- .../Extractors/BaseDateTimeExtractor.cs | 2 + .../Extractors/BaseDateTimePeriodExtractor.cs | 11 +- .../Extractors/BaseDurationExtractor.cs | 1 - .../Extractors/BaseSetExtractor.cs | 5 +- .../Extractors/BaseTimeExtractor.cs | 42 ++- .../Extractors/BaseTimePeriodExtractor.cs | 25 ++ .../Extractors/IDateTimeZoneExtractor.cs | 1 - .../FrenchDateExtractorConfiguration.cs | 12 +- .../FrenchDatePeriodExtractorConfiguration.cs | 14 +- ...nchDateTimePeriodExtractorConfiguration.cs | 12 +- .../FrenchTimePeriodExtractorConfiguration.cs | 13 +- ...FrenchCommonDateTimeParserConfiguration.cs | 11 +- .../FrenchDurationParserConfiguration.cs | 2 +- .../GermanDateExtractorConfiguration.cs | 12 +- .../GermanDatePeriodExtractorConfiguration.cs | 14 +- ...GermanCommonDateTimeParserConfiguration.cs | 11 +- .../GermanDurationParserConfiguration.cs | 2 +- .../HindiDateExtractorConfiguration.cs | 12 +- .../HindiDatePeriodExtractorConfiguration.cs | 14 +- .../HindiCommonDateTimeParserConfiguration.cs | 12 +- .../HindiDurationParserConfiguration.cs | 2 +- .../ItalianDateExtractorConfiguration.cs | 12 +- ...ItalianDatePeriodExtractorConfiguration.cs | 14 +- ...ianDateTimePeriodExtractorConfiguration.cs | 12 +- ...talianCommonDateTimeParserConfiguration.cs | 11 +- .../ItalianDurationParserConfiguration.cs | 2 +- .../JapaneseDateParserConfiguration.cs | 11 +- .../JapaneseDatePeriodParserConfiguration.cs | 58 ++-- .../Parsers/JapaneseDateTimeParser.cs | 12 +- ...paneseDateTimePeriodParserConfiguration.cs | 23 +- .../JapaneseHolidayParserConfiguration.cs | 236 +++++++++-------- ...Microsoft.Recognizers.Text.DateTime.csproj | 11 +- ...Microsoft.Recognizers.Text.DateTime.nuspec | 2 - .../Microsoft.Recognizers.Text.DateTime.xml | 5 + .../Parsers/BaseDateParser.cs | 3 +- .../Parsers/BaseDatePeriodParser.cs | 4 +- .../Parsers/BaseDateTimePeriodParser.cs | 10 +- .../Parsers/BaseDurationParser.cs | 2 +- .../Parsers/BaseMergedDateTimeParser.cs | 101 +++---- .../Parsers/BaseTimePeriodParser.cs | 2 +- .../Parsers/FullDateTimeParser.cs | 55 ++-- .../Parsers/IDurationParserConfiguration.cs | 2 +- .../PortugueseDateExtractorConfiguration.cs | 16 +- ...tugueseDatePeriodExtractorConfiguration.cs | 16 +- ...ortugueseDateTimeExtractorConfiguration.cs | 12 +- ...eseDateTimePeriodExtractorConfiguration.cs | 11 +- ...ortugueseDurationExtractorConfiguration.cs | 12 +- .../PortugueseMergedExtractorConfiguration.cs | 12 +- ...tugueseTimePeriodExtractorConfiguration.cs | 13 +- ...ugueseCommonDateTimeParserConfiguration.cs | 17 +- .../PortugueseDurationParserConfiguration.cs | 2 +- .../SpanishDateExtractorConfiguration.cs | 16 +- ...SpanishDatePeriodExtractorConfiguration.cs | 16 +- .../SpanishDateTimeExtractorConfiguration.cs | 18 +- ...ishDateTimePeriodExtractorConfiguration.cs | 11 +- .../SpanishDurationExtractorConfiguration.cs | 12 +- .../SpanishMergedExtractorConfiguration.cs | 12 +- ...SpanishTimePeriodExtractorConfiguration.cs | 13 +- ...panishCommonDateTimeParserConfiguration.cs | 17 +- .../SpanishDurationParserConfiguration.cs | 2 +- .../TurkishDateExtractorConfiguration.cs | 11 +- ...TurkishDatePeriodExtractorConfiguration.cs | 14 +- ...urkishCommonDateTimeParserConfiguration.cs | 12 +- .../TurkishDurationParserConfiguration.cs | 2 +- .../Utilities/DurationParsingUtil.cs | 1 + .../Utilities/TimeFunctions.cs | 13 +- .../Utilities/Token.cs | 13 +- .../Chinese/Extractors/CardinalExtractor.cs | 9 +- .../Chinese/Extractors/DoubleExtractor.cs | 2 +- .../Chinese/Extractors/FractionExtractor.cs | 2 +- .../Chinese/Extractors/IntegerExtractor.cs | 2 +- .../Chinese/Extractors/NumberExtractor.cs | 6 +- .../Extractors/NumberRangeExtractor.cs | 5 +- .../Chinese/Extractors/OrdinalExtractor.cs | 2 +- .../Chinese/Extractors/PercentageExtractor.cs | 2 +- .../ChineseNumberParserConfiguration.cs | 2 +- .../ChineseNumberRangeParserConfiguration.cs | 6 +- .../Config/BaseNumberOptionsConfiguration.cs | 12 +- .../Config/INumberOptionsConfiguration.cs | 2 + .../Constants.cs | 10 +- .../Dutch/Extractors/CardinalExtractor.cs | 37 ++- .../Dutch/Extractors/DoubleExtractor.cs | 26 +- .../Dutch/Extractors/FractionExtractor.cs | 27 +- .../Dutch/Extractors/IntegerExtractor.cs | 39 ++- .../Dutch/Extractors/NumberExtractor.cs | 45 ++-- .../Dutch/Extractors/NumberRangeExtractor.cs | 4 +- .../Dutch/Extractors/OrdinalExtractor.cs | 28 +- .../Dutch/Extractors/PercentageExtractor.cs | 6 +- .../Parsers/DutchNumberParserConfiguration.cs | 2 +- .../DutchNumberRangeParserConfiguration.cs | 6 +- .../English/Extractors/CardinalExtractor.cs | 35 ++- .../English/Extractors/DoubleExtractor.cs | 27 +- .../English/Extractors/FractionExtractor.cs | 26 +- .../English/Extractors/IntegerExtractor.cs | 37 ++- .../Extractors/MergedNumberExtractor.cs | 20 +- .../English/Extractors/NumberExtractor.cs | 51 ++-- .../Extractors/NumberRangeExtractor.cs | 9 +- .../English/Extractors/OrdinalExtractor.cs | 30 ++- .../English/Extractors/PercentageExtractor.cs | 14 +- .../EnglishNumberParserConfiguration.cs | 2 +- .../EnglishNumberRangeParserConfiguration.cs | 7 +- .../Extractors/BaseMergedNumberExtractor.cs | 4 +- .../Extractors/BaseNumberExtractor.cs | 10 +- .../Extractors/BaseNumberRangeExtractor.cs | 5 +- .../Extractors/BasePercentageExtractor.cs | 20 +- .../Extractors/CachedNumberExtractor.cs | 35 +++ .../French/Extractors/FractionExtractor.cs | 4 +- .../French/Extractors/NumberExtractor.cs | 4 +- .../FrenchNumberParserConfiguration.cs | 2 +- .../GermanNumberParserConfiguration.cs | 2 +- .../Hindi/Extractors/FractionExtractor.cs | 4 +- .../Hindi/Extractors/NumberExtractor.cs | 4 +- .../Hindi/Extractors/OrdinalExtractor.cs | 1 + .../Parsers/HindiNumberParserConfiguration.cs | 2 +- .../HindiNumberRangeParserConfiguration.cs | 7 +- .../Italian/Extractors/FractionExtractor.cs | 4 +- .../Italian/Extractors/NumberExtractor.cs | 4 +- .../ItalianNumberParserConfiguration.cs | 2 +- .../JapaneseNumberParserConfiguration.cs | 2 +- .../KoreanNumberParserConfiguration.cs | 2 +- .../Microsoft.Recognizers.Text.Number.csproj | 4 +- .../Microsoft.Recognizers.Text.Number.nuspec | 2 - .../Microsoft.Recognizers.Text.Number.xml | 7 +- .../NumberOptions.cs | 5 + .../NumberRecognizer.cs | 31 +-- .../Parsers/BaseCJKNumberParser.cs | 9 +- .../BaseIndianNumberParserConfiguration.cs | 2 +- .../Parsers/BaseNumberParser.cs | 6 +- .../Parsers/BasePercentageParser.cs | 2 +- .../Parsers/INumberParserConfiguration.cs | 6 +- .../Extractors/CardinalExtractor.cs | 32 ++- .../Portuguese/Extractors/DoubleExtractor.cs | 25 +- .../Extractors/FractionExtractor.cs | 25 +- .../Portuguese/Extractors/IntegerExtractor.cs | 36 ++- .../Portuguese/Extractors/NumberExtractor.cs | 45 ++-- .../Portuguese/Extractors/OrdinalExtractor.cs | 27 +- .../Extractors/PercentageExtractor.cs | 6 +- .../PortugueseNumberParserConfiguration.cs | 2 +- .../Spanish/Extractors/CardinalExtractor.cs | 33 ++- .../Spanish/Extractors/DoubleExtractor.cs | 25 +- .../Spanish/Extractors/FractionExtractor.cs | 25 +- .../Spanish/Extractors/IntegerExtractor.cs | 35 ++- .../Spanish/Extractors/NumberExtractor.cs | 45 ++-- .../Extractors/NumberRangeExtractor.cs | 4 +- .../Spanish/Extractors/OrdinalExtractor.cs | 27 +- .../Spanish/Extractors/PercentageExtractor.cs | 8 +- .../SpanishNumberParserConfiguration.cs | 2 +- .../SpanishNumberRangeParserConfiguration.cs | 6 +- .../Swedish/Extractors/FractionExtractor.cs | 15 +- .../Swedish/Extractors/NumberExtractor.cs | 15 +- .../Swedish/Extractors/OrdinalExtractor.cs | 14 +- .../SwedishNumberParserConfiguration.cs | 2 +- .../Turkish/Extractors/FractionExtractor.cs | 4 +- .../Turkish/Extractors/NumberExtractor.cs | 4 +- .../Extractors/NumberRangeExtractor.cs | 3 +- .../TurkishNumberParserConfiguration.cs | 2 +- .../TurkishNumberRangeParserConfiguration.cs | 6 +- ...eseNumberWithUnitExtractorConfiguration.cs | 7 +- ...hineseNumberWithUnitParserConfiguration.cs | 9 +- ...tchNumberWithUnitExtractorConfiguration.cs | 5 +- .../DutchNumberWithUnitParserConfiguration.cs | 9 +- ...ishNumberWithUnitExtractorConfiguration.cs | 5 +- ...nglishNumberWithUnitParserConfiguration.cs | 8 +- .../Extractors/BaseMergedUnitExtractor.cs | 14 +- .../Extractors/NumberWithUnitExtractor.cs | 45 ++-- ...FrenchNumberWithUnitParserConfiguration.cs | 7 +- ...GermanNumberWithUnitParserConfiguration.cs | 7 +- .../HindiNumberWithUnitParserConfiguration.cs | 6 +- ...talianNumberWithUnitParserConfiguration.cs | 7 +- ...paneseNumberWithUnitParserConfiguration.cs | 7 +- ...oft.Recognizers.Text.NumberWithUnit.csproj | 8 +- ...oft.Recognizers.Text.NumberWithUnit.nuspec | 2 - ...rosoft.Recognizers.Text.NumberWithUnit.xml | 10 + .../NumberWithUnitOptions.cs | 10 + ...eseNumberWithUnitExtractorConfiguration.cs | 5 +- ...ugueseNumberWithUnitParserConfiguration.cs | 9 +- ...ishNumberWithUnitExtractorConfiguration.cs | 6 +- ...panishNumberWithUnitParserConfiguration.cs | 9 +- ...urkishNumberWithUnitParserConfiguration.cs | 7 +- ...Microsoft.Recognizers.Text.Sequence.csproj | 7 +- ...Microsoft.Recognizers.Text.Sequence.nuspec | 2 - .NET/Microsoft.Recognizers.Text.sln | 3 +- ...Microsoft.Recognizers.Text.sln.DotSettings | 2 + .../Extractors/IExtractor.cs | 10 +- .../Extractors/Metadata.cs | 5 + .../InternalCache/ICloneableType.cs | 7 + .../InternalCache/ResultsCache.cs | 50 ++++ .../Microsoft.Recognizers.Text.csproj | 11 +- .../Microsoft.Recognizers.Text.nuspec | 5 +- .NET/Microsoft.Recognizers.Text/Recognizer.cs | 1 + .NET/test-pack.sh | 18 ++ .../datetime/resources/ChineseDateTime.java | 2 + .../datetime/resources/EnglishDateTime.java | 2 + .../datetime/resources/FrenchDateTime.java | 2 + .../resources/PortugueseDateTime.java | 2 + .../datetime/resources/SpanishDateTime.java | 2 + .../chinese/extractors/DoubleExtractor.java | 4 +- .../chinese/extractors/FractionExtractor.java | 2 +- .../chinese/extractors/IntegerExtractor.java | 8 +- .../chinese/extractors/OrdinalExtractor.java | 4 +- .../extractors/PercentageExtractor.java | 4 +- .../french/extractors/DoubleExtractor.java | 2 +- .../french/extractors/FractionExtractor.java | 6 +- .../french/extractors/IntegerExtractor.java | 4 +- .../french/extractors/OrdinalExtractor.java | 2 +- .../text/number/resources/ChineseNumeric.java | 2 +- .../text/number/resources/FrenchNumeric.java | 2 +- .../src/resources/chineseDateTime.ts | 1 + .../src/resources/englishDateTime.ts | 1 + .../src/resources/frenchDateTime.ts | 1 + .../src/resources/portugueseDateTime.ts | 1 + .../src/resources/spanishDateTime.ts | 1 + .../src/number/chinese/extractors.ts | 24 +- .../src/number/french/extractors.ts | 14 +- .../src/resources/chineseNumeric.ts | 2 +- .../src/resources/frenchNumeric.ts | 2 +- Patterns/Bulgarian/Bulgarian-Numbers.yaml | 1 + Patterns/Chinese/Chinese-DateTime.yaml | 2 + Patterns/Chinese/Chinese-Numbers.yaml | 3 +- Patterns/Dutch/Dutch-DateTime.yaml | 2 + Patterns/Dutch/Dutch-Numbers.yaml | 3 +- Patterns/English/English-DateTime.yaml | 2 + Patterns/English/English-Numbers.yaml | 1 + Patterns/French/French-DateTime.yaml | 2 + Patterns/French/French-Numbers.yaml | 3 +- Patterns/German/German-DateTime.yaml | 2 + Patterns/German/German-Numbers.yaml | 1 + Patterns/Hindi/Hindi-DateTime.yaml | 2 + Patterns/Hindi/Hindi-Numbers.yaml | 1 + Patterns/Italian/Italian-DateTime.yaml | 4 +- Patterns/Italian/Italian-Numbers.yaml | 1 + Patterns/Japanese/Japanese-DateTime.yaml | 2 + Patterns/Japanese/Japanese-Numbers.yaml | 3 +- Patterns/Korean/Korean-Numbers.yaml | 2 +- Patterns/Portuguese/Portuguese-DateTime.yaml | 2 + Patterns/Portuguese/Portuguese-Numbers.yaml | 1 + Patterns/Spanish/Spanish-DateTime.yaml | 2 + Patterns/Spanish/Spanish-Numbers.yaml | 1 + Patterns/Swedish/Swedish-Numbers.yaml | 1 + Patterns/Turkish/Turkish-DateTime.yaml | 4 +- Patterns/Turkish/Turkish-Numbers.yaml | 3 +- .../resources/chinese_date_time.py | 1 + .../resources/english_date_time.py | 1 + .../resources/french_date_time.py | 1 + .../resources/portuguese_date_time.py | 1 + .../resources/spanish_date_time.py | 1 + .../number/chinese/extractors.py | 24 +- .../number/french/extractors.py | 14 +- .../resources/chinese_numeric.py | 2 +- .../resources/french_numeric.py | 2 +- Specs/DateTime/Dutch/DateTimeExtractor.json | 2 +- Specs/DateTime/English/DateTimeModel.json | 23 ++ .../English/DateTimeModelComplexCalendar.json | 23 ++ Specs/DateTime/German/DateTimeModel.json | 1 - Specs/DateTime/Italian/TimeExtractor.json | 6 +- Specs/DateTime/Turkish/MergedParser.json | 4 +- Specs/Number/English/NumberModel.json | 15 ++ .../NumberRangeModelExperimentalMode.json | 12 +- 320 files changed, 2616 insertions(+), 1207 deletions(-) create mode 100644 .NET/Microsoft.Recognizers.Text.Number/Extractors/CachedNumberExtractor.cs create mode 100644 .NET/Microsoft.Recognizers.Text/InternalCache/ICloneableType.cs create mode 100644 .NET/Microsoft.Recognizers.Text/InternalCache/ResultsCache.cs create mode 100644 .NET/test-pack.sh diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Chinese/DateTimeDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Chinese/DateTimeDefinitions.cs index e360498463..1960f2dcf9 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/Chinese/DateTimeDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Chinese/DateTimeDefinitions.cs @@ -21,6 +21,7 @@ namespace Microsoft.Recognizers.Definitions.Chinese public static class DateTimeDefinitions { + public const string LangMarker = @"Chi"; public const string MonthRegex = @"(?正月|一月|二月|三月|四月|五月|六月|七月|八月|九月|十月|十一月|十二月|01月|02月|03月|04月|05月|06月|07月|08月|09月|10月|11月|12月|1月|2月|3月|4月|5月|6月|7月|8月|9月|大年)"; public const string DayRegex = @"(?01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|1|2|3|4|5|6|7|8|9)"; public const string DateDayRegexInChinese = @"(?初一|三十|一日|十一日|二十一日|三十一日|二日|三日|四日|五日|六日|七日|八日|九日|十二日|十三日|十四日|十五日|十六日|十七日|十八日|十九日|二十二日|二十三日|二十四日|二十五日|二十六日|二十七日|二十八日|二十九日|一日|十一日|十日|二十一日|二十日|三十一日|三十日|二日|三日|四日|五日|六日|七日|八日|九日|十二日|十三日|十四日|十五日|十六日|十七日|十八日|十九日|二十二日|二十三日|二十四日|二十五日|二十六日|二十七日|二十八日|二十九日|十日|二十日|三十日|10日|11日|12日|13日|14日|15日|16日|17日|18日|19日|1日|20日|21日|22日|23日|24日|25日|26日|27日|28日|29日|2日|30日|31日|3日|4日|5日|6日|7日|8日|9日|一号|十一号|二十一号|三十一号|二号|三号|四号|五号|六号|七号|八号|九号|十二号|十三号|十四号|十五号|十六号|十七号|十八号|十九号|二十二号|二十三号|二十四号|二十五号|二十六号|二十七号|二十八号|二十九号|一号|十一号|十号|二十一号|二十号|三十一号|三十号|二号|三号|四号|五号|六号|七号|八号|九号|十二号|十三号|十四号|十五号|十六号|十七号|十八号|十九号|二十二号|二十三号|二十四号|二十五号|二十六号|二十七号|二十八号|二十九号|十号|二十号|三十号|10号|11号|12号|13号|14号|15号|16号|17号|18号|19号|1号|20号|21号|22号|23号|24号|25号|26号|27号|28号|29号|2号|30号|31号|3号|4号|5号|6号|7号|8号|9号)"; diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Chinese/NumbersDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Chinese/NumbersDefinitions.cs index bbe2e62f3e..ee1738cf1a 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/Chinese/NumbersDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Chinese/NumbersDefinitions.cs @@ -21,7 +21,7 @@ namespace Microsoft.Recognizers.Definitions.Chinese public static class NumbersDefinitions { - public const string LangMarker = @"Chs"; + public const string LangMarker = @"Chi"; public const bool CompoundNumberLanguage = true; public const bool MultiDecimalSeparatorCulture = false; public const char DecimalSeparatorChar = '.'; diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Dutch/DateTimeDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Dutch/DateTimeDefinitions.cs index c348cbc5d6..dc3d92c1a7 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/Dutch/DateTimeDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Dutch/DateTimeDefinitions.cs @@ -21,6 +21,7 @@ namespace Microsoft.Recognizers.Definitions.Dutch public static class DateTimeDefinitions { + public const string LangMarker = @"Dut"; public const bool CheckBothBeforeAfter = false; public static readonly string TillRegex = $@"(?\b(tot|totdat|gedurende|tijdens|ten tijde van)\b|{BaseDateTime.RangeConnectorSymbolRegex})"; public static readonly string RangeConnectorRegex = $@"(?\b(en|tot en met|t/m|tot|tot aan)\b|{BaseDateTime.RangeConnectorSymbolRegex})"; diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Dutch/NumberDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Dutch/NumberDefinitions.cs index 952135cc65..5bcbd1d506 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/Dutch/NumberDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Dutch/NumberDefinitions.cs @@ -21,7 +21,7 @@ namespace Microsoft.Recognizers.Definitions.Dutch public static class NumbersDefinitions { - public const string LangMarker = @"Nl"; + public const string LangMarker = @"Dut"; public const bool CompoundNumberLanguage = true; public const bool MultiDecimalSeparatorCulture = false; public const string RoundNumberIntegerRegex = @"(honderd|duizend|miljoen|miljard|biljoen)"; diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/English/DateTimeDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/English/DateTimeDefinitions.cs index 4f0a8c550b..1e858a1ade 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/English/DateTimeDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/English/DateTimeDefinitions.cs @@ -21,6 +21,7 @@ namespace Microsoft.Recognizers.Definitions.English public static class DateTimeDefinitions { + public const string LangMarker = @"Eng"; public const bool CheckBothBeforeAfter = false; public static readonly string TillRegex = $@"(?\b(to|(un)?till?|thru|through)\b|{BaseDateTime.RangeConnectorSymbolRegex})"; public static readonly string RangeConnectorRegex = $@"(?\b(and|through|to)\b|{BaseDateTime.RangeConnectorSymbolRegex})"; diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/French/DateTimeDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/French/DateTimeDefinitions.cs index c69a30bef8..62c39c9ac7 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/French/DateTimeDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/French/DateTimeDefinitions.cs @@ -21,6 +21,7 @@ namespace Microsoft.Recognizers.Definitions.French public static class DateTimeDefinitions { + public const string LangMarker = @"Fre"; public const bool CheckBothBeforeAfter = false; public const string TillRegex = @"(?au|et|(jusqu')?[aà]|avant|--|-|—|——)"; public const string RangeConnectorRegex = @"(?de la|au|[aà]|et(\s*la)?|--|-|—|——)"; diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/French/NumbersDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/French/NumbersDefinitions.cs index dc34d49fa5..b0ffd66740 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/French/NumbersDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/French/NumbersDefinitions.cs @@ -21,7 +21,7 @@ namespace Microsoft.Recognizers.Definitions.French public static class NumbersDefinitions { - public const string LangMarker = @"Fr"; + public const string LangMarker = @"Fre"; public const bool CompoundNumberLanguage = false; public const bool MultiDecimalSeparatorCulture = true; public const string RoundNumberIntegerRegex = @"(cent|mille|millions|million|milliard|milliards|billion|billions)"; diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/German/DateTimeDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/German/DateTimeDefinitions.cs index 071fbf823b..6aa6435344 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/German/DateTimeDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/German/DateTimeDefinitions.cs @@ -21,6 +21,7 @@ namespace Microsoft.Recognizers.Definitions.German public static class DateTimeDefinitions { + public const string LangMarker = @"Ger"; public const bool CheckBothBeforeAfter = false; public const string TillRegex = @"(?zu|bis\s*zum|zum|bis|bis\s*hin(\s*zum)?|--|-|—|——)"; public const string RangeConnectorRegex = @"(?und|--|-|—|——)"; diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Hindi/DateTimeDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Hindi/DateTimeDefinitions.cs index 423ad2bc2b..6ebcb04b4d 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/Hindi/DateTimeDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Hindi/DateTimeDefinitions.cs @@ -21,6 +21,7 @@ namespace Microsoft.Recognizers.Definitions.Hindi public static class DateTimeDefinitions { + public const string LangMarker = @"Hin"; public const bool CheckBothBeforeAfter = true; public static readonly string TillRegex = $@"(?\b(और|तक|द्वारा|से|to)|{BaseDateTime.RangeConnectorSymbolRegex})"; public static readonly string RangeConnectorRegex = $@"(?\b(और|तक|द्वारा|से|to)|{BaseDateTime.RangeConnectorSymbolRegex})"; diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Italian/DateTimeDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Italian/DateTimeDefinitions.cs index 1a1c0d7a30..1405d8a148 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/Italian/DateTimeDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Italian/DateTimeDefinitions.cs @@ -21,6 +21,7 @@ namespace Microsoft.Recognizers.Definitions.Italian public static class DateTimeDefinitions { + public const string LangMarker = @"Ita"; public const bool CheckBothBeforeAfter = false; public const string TillRegex = @"(?\b(fino\s+a(l(l[aoe'])?|gli|i)?|a(l(l[aoe'])?|gli|i)?|e\s+(il?|l[aoe']|gli))\b|--|-|—|——|~)"; public const string RestrictedTillRegex = @"(?\b(fino\s+a(l(l[aoe'])?|gli|i)?)\b|--|-|—|——|~)"; @@ -157,7 +158,7 @@ public static class DateTimeDefinitions public const string AmbiguousRangeModifierPrefix = @"^[.]"; public static readonly string NumberEndingPattern = $@"^(\s+(?riunione|appuntamento|conferenza|chiamata|chiamata skype)\s+all['e]\s*(?{PeriodHourNumRegex}|{HourRegex})((\.)?$|(\.,|,|!|\?)))"; public static readonly string TimeRegex1 = $@"(((((?<=\b(da|al)?(le|l'|ore)\s*)({EngTimeRegex}))|((?<=\b(da|al)?(le|l'|ore)\s*)({HourNumRegex}|{BaseDateTime.HourRegex})(?![\.,]\d+)(?=\s*({PrepRegex}))))|(({TimePrefix}\s+)({EngTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex}))|(({EngTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex})\s+{TimePrefix}))((\s*{DescRegex})|\b))"; - public static readonly string TimeRegex2 = $@"({BaseDateTime.HourRegex})(\s*)?:(\s*)?{BaseDateTime.MinuteRegex}((\s*)?:(\s*)?{BaseDateTime.SecondRegex})?((\s*{DescRegex})|\b)"; + public static readonly string TimeRegex2 = $@"(t)?({BaseDateTime.HourRegex})(\s*)?:(\s*)?{BaseDateTime.MinuteRegex}((\s*)?:(\s*)?{BaseDateTime.SecondRegex})?((\s*{DescRegex})|\b)"; public static readonly string TimeRegex3 = $@"\b{BaseDateTime.HourRegex}\.{BaseDateTime.MinuteRegex}(\s*{DescRegex})(\s+{TimePrefix})?"; public static readonly string TimeRegex4 = $@"\b({BasicTime}(\s*{DescRegex})?(\s+{TimePrefix})?(\s*{DescRegex})?\s+{TimeSuffix}|{OclockPrefix}\s+{BasicTime}(\s*{DescRegex})?(\s+{TimePrefix})?(\s*{DescRegex})?)\b"; public static readonly string TimeRegex5 = $@"\b(({BasicTime}\s*{DescRegex}(\s+{TimePrefix})?)|({BasicTime}(\s+{TimePrefix})((\s*{DescRegex})|\b))|((?<=\b(da|al)?(le|l'|ore)\s*)(\b(?{EngTimeRegex}|{BaseDateTime.HourRegex}:{BaseDateTime.MinuteRegex}(:{BaseDateTime.SecondRegex})?)|\b(?{HourNumRegex}|{BaseDateTime.HourRegex})(?![\.,]\d+)(?=\s*({PrepRegex})\b))))"; diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Japanese/DateTimeDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Japanese/DateTimeDefinitions.cs index 7fd0558829..ffc5c46e9c 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/Japanese/DateTimeDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Japanese/DateTimeDefinitions.cs @@ -21,6 +21,7 @@ namespace Microsoft.Recognizers.Definitions.Japanese public static class DateTimeDefinitions { + public const string LangMarker = @"Jpn"; public const string MonthRegex = @"(?正月|一月|二月|三月|四月|五月|六月|七月|八月|九月|十月|十一月|十二月|01月|02月|03月|04月|05月|06月|07月|08月|09月|10月|11月|12月|1月|2月|3月|4月|5月|6月|7月|8月|9月)"; public const string MonthRegexForPeriod = @"(?正月|一月|二月|三月|四月|五月|六月|七月|八月|九月|十月|十一月|十二月|01月|02月|03月|04月|05月|06月|07月|08月|09月|10月|11月|12月|1月|2月|3月|4月|5月|6月|7月|8月|9月)(?=\b|t|まで|から)?"; public const string MonthNumRegexForPeriod = @"(?01|02|03|04|05|06|07|08|09|10|11|12|1|2|3|4|5|6|7|8|9)(?=\b|t|まで|から)?"; diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Microsoft.Recognizers.Definitions.Common.csproj b/.NET/Microsoft.Recognizers.Definitions.Common/Microsoft.Recognizers.Definitions.Common.csproj index b881bd9fcd..907609fc4e 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/Microsoft.Recognizers.Definitions.Common.csproj +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Microsoft.Recognizers.Definitions.Common.csproj @@ -1,14 +1,15 @@  - net462 + net462;netstandard2.0 false false ../Recognizers-Text.ruleset OnOutputUpdated - + + $(OutputPath)$(AssemblyName).xml $(NoWarn),1573,1591,1712 - + + all diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Portuguese/DateTimeDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Portuguese/DateTimeDefinitions.cs index c815bd06e1..bde09845dc 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/Portuguese/DateTimeDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Portuguese/DateTimeDefinitions.cs @@ -21,6 +21,7 @@ namespace Microsoft.Recognizers.Definitions.Portuguese public static class DateTimeDefinitions { + public const string LangMarker = @"Por"; public const bool CheckBothBeforeAfter = false; public const string TillRegex = @"(?ate|as|às|até|ateh|a|ao|--|-|—|——)(\s+(o|[aà](s)?))?"; public const string AndRegex = @"(?e|e\s*o|--|-|—|——)"; diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Spanish/DateTimeDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Spanish/DateTimeDefinitions.cs index 9ceca361a3..3b20131999 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/Spanish/DateTimeDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Spanish/DateTimeDefinitions.cs @@ -21,6 +21,7 @@ namespace Microsoft.Recognizers.Definitions.Spanish public static class DateTimeDefinitions { + public const string LangMarker = @"Spa"; public const bool CheckBothBeforeAfter = false; public const string TillRegex = @"(?hasta|al|a|--|-|—|——)(\s+(el|la(s)?))?"; public const string AndRegex = @"(?y|y\s*el|--|-|—|——)"; diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Turkish/DateTimeDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Turkish/DateTimeDefinitions.cs index dbfec77a53..05e6039eda 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/Turkish/DateTimeDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Turkish/DateTimeDefinitions.cs @@ -21,6 +21,7 @@ namespace Microsoft.Recognizers.Definitions.Turkish public static class DateTimeDefinitions { + public const string LangMarker = @"Tur"; public const bool CheckBothBeforeAfter = true; public static readonly string TillRegex = $@"(?\b(kadar|dek\b|değin)|{BaseDateTime.RangeConnectorSymbolRegex})"; public static readonly string TillConnectorRegex = $@"(?('?tan|'?ten|'?den|'?dan|ile)\b|{BaseDateTime.RangeConnectorSymbolRegex})"; @@ -207,7 +208,7 @@ public static class DateTimeDefinitions public static readonly string ConnectNumRegex = $@"\b({DescRegex}\s+){HourRegex}(?00|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|32|33|34|35|36|37|38|39|40|41|42|43|44|45|46|47|48|49|50|51|52|53|54|55|56|57|58|59)(?!\d)"; public static readonly string TimeRegexWithDotConnector = $@"({HourRegex}(\s*\.\s*){BaseDateTime.MinuteRegex})"; public static readonly string TimeRegex1 = $@"\b(({TimePrefix}\s+)(saat\s)?({WrittenTimeRegex}|{HourNumRegex}|{AtHourNumRegex}|{HourRegex}))(?!(\s+saat|\d+))"; - public static readonly string TimeRegex2 = $@"\b({TimePrefix}\s+)?(saat\s)?({HourRegex}:{BaseDateTime.MinuteRegex}(:{BaseDateTime.SecondRegex})?)"; + public static readonly string TimeRegex2 = $@"\b({TimePrefix}\s+)?(saat\s)?(t)?({HourRegex}:{BaseDateTime.MinuteRegex}(:{BaseDateTime.SecondRegex})?)"; public static readonly string TimeRegex3 = $@"\b({TimePrefix}\s+)?(saat\s)?({HourRegex}:{BaseDateTime.MinuteRegex})"; public static readonly string TimeRegex4 = $@"\b({TimePrefix}\s+)?(saat\s)?{TimeSuffix}(?=(\b|dan))"; public static readonly string TimeRegex5 = $@"\b({TimePrefix}\s+)?(saat\s)?(? - netstandard2.0;net462;net452;net45 + netstandard2.0;net462 false false diff --git a/.NET/Microsoft.Recognizers.Text.Choice/Microsoft.Recognizers.Text.Choice.csproj b/.NET/Microsoft.Recognizers.Text.Choice/Microsoft.Recognizers.Text.Choice.csproj index 13c54f1fc5..39ef7af5a8 100644 --- a/.NET/Microsoft.Recognizers.Text.Choice/Microsoft.Recognizers.Text.Choice.csproj +++ b/.NET/Microsoft.Recognizers.Text.Choice/Microsoft.Recognizers.Text.Choice.csproj @@ -1,7 +1,7 @@  - netstandard2.0;net462;net452;net45 + netstandard2.0;net462 false false @@ -9,7 +9,8 @@ $(OutputPath)$(AssemblyName).xml $(NoWarn),1573,1591,1712 - + + $(OutputPath)$(AssemblyName).xml $(NoWarn),1573,1591,1712 - + + all @@ -44,4 +46,5 @@ + diff --git a/.NET/Microsoft.Recognizers.Text.Choice/Microsoft.Recognizers.Text.Choice.nuspec b/.NET/Microsoft.Recognizers.Text.Choice/Microsoft.Recognizers.Text.Choice.nuspec index ce6e08edfb..d5c01b51d6 100644 --- a/.NET/Microsoft.Recognizers.Text.Choice/Microsoft.Recognizers.Text.Choice.nuspec +++ b/.NET/Microsoft.Recognizers.Text.Choice/Microsoft.Recognizers.Text.Choice.nuspec @@ -19,8 +19,6 @@ - - diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Microsoft.Recognizers.Text.DataDrivenTests.csproj b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Microsoft.Recognizers.Text.DataDrivenTests.csproj index c2e048e92d..2cf4396864 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Microsoft.Recognizers.Text.DataDrivenTests.csproj +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Microsoft.Recognizers.Text.DataDrivenTests.csproj @@ -5,6 +5,7 @@ false © Microsoft Corporation. All rights reserved. + true full @@ -15,6 +16,7 @@ 4 ../Recognizers-Text.ruleset + pdbonly true @@ -24,6 +26,7 @@ 4 ../Recognizers-Text.ruleset + $(OutputPath)$(AssemblyName).xml $(NoWarn),1573,1591,1712 - + + @@ -54,4 +58,5 @@ + \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/LongFormTestConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/LongFormTestConfiguration.cs index 4562fd2d56..49857f6141 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/LongFormTestConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/LongFormTestConfiguration.cs @@ -46,7 +46,7 @@ public LongFormTestConfiguration(char decimalSep, char nonDecimalSep) public string HalfADozenText { get; } - public string LangMarker { get; } = "SelfDefined"; + public string LanguageMarker { get; } = "SelfDefined"; public char NonDecimalSeparatorChar { get; } diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestNumberRecognizerInitialization.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestNumberRecognizerInitialization.cs index cd5ccaf122..0be68d727f 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestNumberRecognizerInitialization.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestNumberRecognizerInitialization.cs @@ -18,10 +18,12 @@ public class TestNumberRecognizerInitialization public TestNumberRecognizerInitialization() { + var numConfig = new BaseNumberOptionsConfiguration(EnglishCulture, NumberOptions.None); + var pureNumConfig = new BaseNumberOptionsConfiguration(EnglishCulture, NumberOptions.None, NumberMode.PureNumber); + controlModel = new NumberModel( - AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, - new EnglishNumberParserConfiguration(new BaseNumberOptionsConfiguration(EnglishCulture))), - NumberExtractor.GetInstance(NumberMode.PureNumber)); + AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, new EnglishNumberParserConfiguration(numConfig)), + NumberExtractor.GetInstance(pureNumConfig)); } [TestMethod] diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestParserFactory.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestParserFactory.cs index c215fb6286..39be5d3b66 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestParserFactory.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/Number/TestParserFactory.cs @@ -18,7 +18,7 @@ public class TestParserFactory [TestMethod] public void TestEnglishParser() { - var config = new EnglishNumberParserConfiguration(new BaseNumberOptionsConfiguration(Culture.English)); + var config = new EnglishNumberParserConfiguration(new BaseNumberOptionsConfiguration(Culture.English, NumberOptions.None)); IParser parserNumber = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, config); IParser parserCardinal = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Cardinal, config); @@ -32,7 +32,7 @@ public void TestEnglishParser() [TestMethod] public void TestSpanishParser() { - var config = new SpanishNumberParserConfiguration(new BaseNumberOptionsConfiguration(Culture.Spanish)); + var config = new SpanishNumberParserConfiguration(new BaseNumberOptionsConfiguration(Culture.Spanish, NumberOptions.None)); IParser parserNumber = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, config); IParser parserCardinal = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Cardinal, config); @@ -46,7 +46,7 @@ public void TestSpanishParser() [TestMethod] public void TestPortugueseParser() { - var config = new PortugueseNumberParserConfiguration(new BaseNumberOptionsConfiguration(Culture.Portuguese)); + var config = new PortugueseNumberParserConfiguration(new BaseNumberOptionsConfiguration(Culture.Portuguese, NumberOptions.None)); IParser parserNumber = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, config); IParser parserCardinal = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Cardinal, config); @@ -60,7 +60,7 @@ public void TestPortugueseParser() [TestMethod] public void TestChineseParser() { - var config = new ChineseNumberParserConfiguration(new BaseNumberOptionsConfiguration(Culture.Chinese)); + var config = new ChineseNumberParserConfiguration(new BaseNumberOptionsConfiguration(Culture.Chinese, NumberOptions.None)); IParser parserNumber = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, config); IParser parserCardinal = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Cardinal, config); @@ -74,7 +74,7 @@ public void TestChineseParser() [TestMethod] public void TestJapaneseParser() { - var config = new JapaneseNumberParserConfiguration(new BaseNumberOptionsConfiguration(Culture.Japanese)); + var config = new JapaneseNumberParserConfiguration(new BaseNumberOptionsConfiguration(Culture.Japanese, NumberOptions.None)); IParser parserNumber = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, config); IParser parserCardinal = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Cardinal, config); @@ -88,7 +88,7 @@ public void TestJapaneseParser() [TestMethod] public void TestKoreanParser() { - var config = new KoreanNumberParserConfiguration(new BaseNumberOptionsConfiguration(Culture.Korean)); + var config = new KoreanNumberParserConfiguration(new BaseNumberOptionsConfiguration(Culture.Korean, NumberOptions.None)); IParser parserNumber = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, config); @@ -98,7 +98,7 @@ public void TestKoreanParser() [TestMethod] public void TestFrenchParser() { - var config = new FrenchNumberParserConfiguration(new BaseNumberOptionsConfiguration(Culture.French)); + var config = new FrenchNumberParserConfiguration(new BaseNumberOptionsConfiguration(Culture.French, NumberOptions.None)); IParser parseNumber = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, config); IParser parseCardinal = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Cardinal, config); @@ -112,7 +112,7 @@ public void TestFrenchParser() [TestMethod] public void TestGermanParser() { - var config = new GermanNumberParserConfiguration(new BaseNumberOptionsConfiguration(Culture.German)); + var config = new GermanNumberParserConfiguration(new BaseNumberOptionsConfiguration(Culture.German, NumberOptions.None)); IParser parseNumber = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, config); IParser parseCardinal = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Cardinal, config); @@ -126,7 +126,7 @@ public void TestGermanParser() [TestMethod] public void TestItalianParser() { - var config = new ItalianNumberParserConfiguration(new BaseNumberOptionsConfiguration(Culture.Italian)); + var config = new ItalianNumberParserConfiguration(new BaseNumberOptionsConfiguration(Culture.Italian, NumberOptions.None)); IParser parseNumber = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, config); IParser parseCardinal = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Cardinal, config); @@ -140,7 +140,7 @@ public void TestItalianParser() [TestMethod] public void TestTurkishParser() { - var config = new TurkishNumberParserConfiguration(new BaseNumberOptionsConfiguration(Culture.Turkish)); + var config = new TurkishNumberParserConfiguration(new BaseNumberOptionsConfiguration(Culture.Turkish, NumberOptions.None)); IParser parseNumber = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, config); IParser parseCardinal = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Cardinal, config); diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/TestBase.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/TestBase.cs index 79a3fa87c9..088791fbbc 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/TestBase.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/TestBase.cs @@ -1,5 +1,4 @@ using System; -using System.Collections; using System.Collections.Generic; using System.Collections.Immutable; using System.Diagnostics; @@ -343,10 +342,8 @@ private void ValidateResults(TestModel testSpec, IEnumerable testResolut Assert.AreEqual(expectedResults.Count(), actualResults.Count, GetMessage(testSpec)); - foreach (var tuple in Enumerable.Zip(expectedResults, actualResults, Tuple.Create)) + foreach (var (expected, actual) in Enumerable.Zip(expectedResults, actualResults, Tuple.Create)) { - var expected = tuple.Item1; - var actual = tuple.Item2; Assert.AreEqual(expected.TypeName, actual.TypeName, GetMessage(testSpec)); Assert.AreEqual(expected.Text, actual.Text, GetMessage(testSpec)); diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/TestHelpers.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/TestHelpers.cs index fd30e05f1f..6b0e3a0d1d 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/TestHelpers.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/TestHelpers.cs @@ -428,13 +428,13 @@ public static IDateTimeExtractor GetChineseExtractor(DateTimeExtractors extracto case DateTimeExtractors.Time: return new DateTime.Chinese.ChineseTimeExtractorConfiguration(); case DateTimeExtractors.DatePeriod: - return new DateTime.Chinese.ChineseDatePeriodExtractorConfiguration(); + return new DateTime.Chinese.ChineseDatePeriodExtractorConfiguration(defaultConfig); case DateTimeExtractors.TimePeriod: return new DateTime.Chinese.ChineseTimePeriodExtractorChsConfiguration(); case DateTimeExtractors.DateTime: return new DateTime.Chinese.ChineseDateTimeExtractorConfiguration(); case DateTimeExtractors.DateTimePeriod: - return new DateTime.Chinese.ChineseDateTimePeriodExtractorConfiguration(); + return new DateTime.Chinese.ChineseDateTimePeriodExtractorConfiguration(defaultConfig); case DateTimeExtractors.Duration: return new DateTime.Chinese.ChineseDurationExtractorConfiguration(); case DateTimeExtractors.Holiday: diff --git a/.NET/Microsoft.Recognizers.Text.DataTypes.DataDrivenTests/Microsoft.Recognizers.Text.DataTypes.DataDrivenTests.csproj b/.NET/Microsoft.Recognizers.Text.DataTypes.DataDrivenTests/Microsoft.Recognizers.Text.DataTypes.DataDrivenTests.csproj index 2425fedb66..8bb3046dc2 100644 --- a/.NET/Microsoft.Recognizers.Text.DataTypes.DataDrivenTests/Microsoft.Recognizers.Text.DataTypes.DataDrivenTests.csproj +++ b/.NET/Microsoft.Recognizers.Text.DataTypes.DataDrivenTests/Microsoft.Recognizers.Text.DataTypes.DataDrivenTests.csproj @@ -16,7 +16,7 @@ --> $(OutputPath)$(AssemblyName).xml $(NoWarn),1573,1591,1712 - + true @@ -28,6 +28,7 @@ 4 ../Recognizers-Text.ruleset + pdbonly true @@ -51,4 +52,5 @@ + diff --git a/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/Microsoft.Recognizers.Text.DataTypes.TimexExpression.csproj b/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/Microsoft.Recognizers.Text.DataTypes.TimexExpression.csproj index bea2935f68..187464792e 100644 --- a/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/Microsoft.Recognizers.Text.DataTypes.TimexExpression.csproj +++ b/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/Microsoft.Recognizers.Text.DataTypes.TimexExpression.csproj @@ -1,12 +1,13 @@  - netstandard2.0;net462;net452;net45 + netstandard2.0;net462 false false ../Recognizers-Text.ruleset - + + $(OutputPath)$(AssemblyName).xml $(NoWarn),1573,1591,1712 - + + diff --git a/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/Microsoft.Recognizers.Text.DataTypes.TimexExpression.nuspec b/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/Microsoft.Recognizers.Text.DataTypes.TimexExpression.nuspec index 8c8bedcb1f..c3f332dfcb 100644 --- a/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/Microsoft.Recognizers.Text.DataTypes.TimexExpression.nuspec +++ b/.NET/Microsoft.Recognizers.Text.DataTypes.TimexExpression/Microsoft.Recognizers.Text.DataTypes.TimexExpression.nuspec @@ -14,8 +14,6 @@ nlp entity-extraction parser-library recognizer timex datatime netstandard2.0 - - diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Extractors/ChineseDateExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Extractors/ChineseDateExtractorConfiguration.cs index 359f87a0b0..18b356d637 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Extractors/ChineseDateExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Extractors/ChineseDateExtractorConfiguration.cs @@ -54,7 +54,8 @@ public class ChineseDateExtractorConfiguration : AbstractYearExtractor, IDateTim public static readonly Regex UnitRegex = new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags); - public static readonly IParser NumberParser = new BaseCJKNumberParser(new ChineseNumberParserConfiguration(new BaseNumberOptionsConfiguration(Culture.Chinese))); + public static readonly IParser NumberParser = new BaseCJKNumberParser(new ChineseNumberParserConfiguration( + new BaseNumberOptionsConfiguration(Culture.Chinese, NumberOptions.None))); public static readonly Regex[] DateRegexList = { diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Extractors/ChineseDatePeriodExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Extractors/ChineseDatePeriodExtractorConfiguration.cs index 2a2d4fc06a..dcca99b6e7 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Extractors/ChineseDatePeriodExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Extractors/ChineseDatePeriodExtractorConfiguration.cs @@ -2,7 +2,7 @@ using System.Collections.Generic; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Chinese; -using Microsoft.Recognizers.Text; +using Microsoft.Recognizers.Text.Number; using Microsoft.Recognizers.Text.Number.Chinese; using Microsoft.Recognizers.Text.Utilities; using DateObject = System.DateTime; @@ -85,8 +85,6 @@ public class ChineseDatePeriodExtractorConfiguration : IDateTimeExtractor private static readonly ChineseDateExtractorConfiguration DatePointExtractor = new ChineseDateExtractorConfiguration(); - private static readonly IntegerExtractor IntegerExtractor = new IntegerExtractor(); - private static readonly Regex[] SimpleCasesRegexes = { SimpleCasesRegex, @@ -105,6 +103,21 @@ public class ChineseDatePeriodExtractorConfiguration : IDateTimeExtractor DecadeRegex, }; + private readonly IntegerExtractor integerExtractor; + + public ChineseDatePeriodExtractorConfiguration(IDateTimeOptionsConfiguration config) + { + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + integerExtractor = new IntegerExtractor(numConfig); + } + public List Extract(string text) { return Extract(text, DateObject.Now); @@ -184,12 +197,12 @@ private static List MergeTwoTimePoints(string text, DateObject referenceT } // extract case like "前两年" "前三个月" - private static List MatchNumberWithUnit(string text) + private List MatchNumberWithUnit(string text) { var ret = new List(); var durations = new List(); - var ers = IntegerExtractor.Extract(text); + var ers = integerExtractor.Extract(text); foreach (var er in ers) { diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Extractors/ChineseDateTimePeriodExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Extractors/ChineseDateTimePeriodExtractorConfiguration.cs index 56dbdd9d47..e827c4b6c3 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Extractors/ChineseDateTimePeriodExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Extractors/ChineseDateTimePeriodExtractorConfiguration.cs @@ -3,6 +3,7 @@ using System.Linq; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Chinese; +using Microsoft.Recognizers.Text.Number; using Microsoft.Recognizers.Text.Number.Chinese; using Microsoft.Recognizers.Text.Utilities; using DateObject = System.DateTime; @@ -51,10 +52,23 @@ public class ChineseDateTimePeriodExtractorConfiguration : IDateTimeExtractor private static readonly ChineseDateExtractorConfiguration SingleDateExtractor = new ChineseDateExtractorConfiguration(); - private static readonly CardinalExtractor CardinalExtractor = new CardinalExtractor(); - private static readonly ChineseTimePeriodExtractorChsConfiguration TimePeriodExtractor = new ChineseTimePeriodExtractorChsConfiguration(); + private readonly CardinalExtractor cardinalExtractor; + + public ChineseDateTimePeriodExtractorConfiguration(IDateTimeOptionsConfiguration config) + { + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + cardinalExtractor = new CardinalExtractor(numConfig); + } + public List Extract(string text) { return Extract(text, DateObject.Now); @@ -259,7 +273,7 @@ private List MatchNumberWithUnit(string text) var ret = new List(); var durations = new List(); - var ers = CardinalExtractor.Extract(text); + var ers = cardinalExtractor.Extract(text); foreach (var er in ers) { diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Extractors/ChineseMergedExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Extractors/ChineseMergedExtractorConfiguration.cs index cdeb004f26..d061df8362 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Extractors/ChineseMergedExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Extractors/ChineseMergedExtractorConfiguration.cs @@ -23,12 +23,13 @@ public class ChineseMergedExtractorConfiguration : IDateTimeExtractor private static readonly ChineseDateExtractorConfiguration DateExtractor = new ChineseDateExtractorConfiguration(); private static readonly ChineseTimeExtractorConfiguration TimeExtractor = new ChineseTimeExtractorConfiguration(); private static readonly ChineseDateTimeExtractorConfiguration DateTimeExtractor = new ChineseDateTimeExtractorConfiguration(); - private static readonly ChineseDatePeriodExtractorConfiguration DatePeriodExtractor = new ChineseDatePeriodExtractorConfiguration(); private static readonly ChineseTimePeriodExtractorChsConfiguration TimePeriodExtractor = new ChineseTimePeriodExtractorChsConfiguration(); - private static readonly ChineseDateTimePeriodExtractorConfiguration DateTimePeriodExtractor = new ChineseDateTimePeriodExtractorConfiguration(); private static readonly ChineseDurationExtractorConfiguration DurationExtractor = new ChineseDurationExtractorConfiguration(); private static readonly ChineseSetExtractorConfiguration SetExtractor = new ChineseSetExtractorConfiguration(); + private readonly ChineseDateTimePeriodExtractorConfiguration dateTimePeriodExtractor; + private readonly ChineseDatePeriodExtractorConfiguration datePeriodExtractor; + private readonly IDateTimeOptionsConfiguration config; public ChineseMergedExtractorConfiguration(IDateTimeOptionsConfiguration config) @@ -37,6 +38,9 @@ public ChineseMergedExtractorConfiguration(IDateTimeOptionsConfiguration config) AmbiguityFiltersDict = DefinitionLoader.LoadAmbiguityFilters(DateTimeDefinitions.AmbiguityFiltersDict); HolidayExtractor = new BaseHolidayExtractor(new ChineseHolidayExtractorConfiguration(config)); + + dateTimePeriodExtractor = new ChineseDateTimePeriodExtractorConfiguration(config); + datePeriodExtractor = new ChineseDatePeriodExtractorConfiguration(config); } public Dictionary AmbiguityFiltersDict { get; } @@ -55,10 +59,10 @@ public List Extract(string text, DateObject referenceTime) // the order is important, since there is a problem in merging AddTo(ret, TimeExtractor.Extract(text, referenceTime)); AddTo(ret, DurationExtractor.Extract(text, referenceTime)); - AddTo(ret, DatePeriodExtractor.Extract(text, referenceTime)); + AddTo(ret, datePeriodExtractor.Extract(text, referenceTime)); AddTo(ret, DateTimeExtractor.Extract(text, referenceTime)); AddTo(ret, TimePeriodExtractor.Extract(text, referenceTime)); - AddTo(ret, DateTimePeriodExtractor.Extract(text, referenceTime)); + AddTo(ret, dateTimePeriodExtractor.Extract(text, referenceTime)); AddTo(ret, SetExtractor.Extract(text, referenceTime)); AddTo(ret, HolidayExtractor.Extract(text, referenceTime)); diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Parsers/ChineseDateParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Parsers/ChineseDateParserConfiguration.cs index 4a3261301c..2b79cc3690 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Parsers/ChineseDateParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Parsers/ChineseDateParserConfiguration.cs @@ -1,5 +1,6 @@ using System; using System.Collections.Generic; +using System.Globalization; using System.Text.RegularExpressions; using Microsoft.Recognizers.Text.Number; using Microsoft.Recognizers.Text.Number.Chinese; @@ -23,10 +24,21 @@ public class ChineseDateParserConfiguration : IDateTimeParser public ChineseDateParserConfiguration(ChineseDateTimeParserConfiguration configuration) { config = configuration; - integerExtractor = new IntegerExtractor(); - ordinalExtractor = new OrdinalExtractor(); + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + integerExtractor = new IntegerExtractor(numConfig); + ordinalExtractor = new OrdinalExtractor(numConfig); + + numberParser = new BaseCJKNumberParser(new ChineseNumberParserConfiguration(numConfig)); + durationExtractor = new ChineseDurationExtractorConfiguration(); - numberParser = new BaseCJKNumberParser(new ChineseNumberParserConfiguration(new BaseNumberOptionsConfiguration(configuration.Culture))); } public ParseResult Parse(ExtractResult extResult) @@ -514,7 +526,7 @@ protected DateTimeResolutionResult Match2Date(Match match, DateObject referenceD day = this.config.DayOfMonth[dayStr] > 31 ? this.config.DayOfMonth[dayStr] % 31 : this.config.DayOfMonth[dayStr]; if (!string.IsNullOrEmpty(yearStr)) { - year = int.Parse(yearStr); + year = int.Parse(yearStr, CultureInfo.InvariantCulture); if (year < 100 && year >= Constants.MinTwoDigitYearPastNum) { year += 1900; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Parsers/ChineseDatePeriodParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Parsers/ChineseDatePeriodParserConfiguration.cs index 9085c04eef..82cbfd0b25 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Parsers/ChineseDatePeriodParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Parsers/ChineseDatePeriodParserConfiguration.cs @@ -1,6 +1,5 @@ using System; using System.Collections.Generic; -using System.Globalization; using System.Linq; using Microsoft.Recognizers.Definitions.Chinese; using Microsoft.Recognizers.Text.Number; @@ -16,19 +15,30 @@ public class ChineseDatePeriodParserConfiguration : IDateTimeParser private static readonly IDateTimeExtractor SingleDateExtractor = new ChineseDateExtractorConfiguration(); - private static readonly IExtractor IntegerExtractor = new IntegerExtractor(); - - private static readonly IParser IntegerParser = new BaseCJKNumberParser(new ChineseNumberParserConfiguration(new BaseNumberOptionsConfiguration(Culture.Chinese))); - private static readonly IDateTimeExtractor DurationExtractor = new ChineseDurationExtractorConfiguration(); - private static readonly Calendar Cal = DateTimeFormatInfo.InvariantInfo.Calendar; + private readonly IExtractor integerExtractor; + + private readonly IParser integerParser; private readonly IFullDateTimeParserConfiguration config; public ChineseDatePeriodParserConfiguration(IFullDateTimeParserConfiguration configuration) { config = configuration; + + var numOptions = NumberOptions.None; + + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + integerExtractor = new IntegerExtractor(numConfig); + integerParser = new BaseCJKNumberParser(new ChineseNumberParserConfiguration(numConfig)); + } public ParseResult Parse(ExtractResult extResult) @@ -157,16 +167,33 @@ public List FilterResults(string query, List= 0) { - build.Append(timeResult.Hour.ToString("D2")); + build.Append(timeResult.Hour.ToString("D2", CultureInfo.InvariantCulture)); } if (timeResult.Minute >= 0) { - build.Append(":" + timeResult.Minute.ToString("D2")); + build.Append(":" + timeResult.Minute.ToString("D2", CultureInfo.InvariantCulture)); } if (timeResult.Second >= 0) { - build.Append(":" + timeResult.Second.ToString("D2")); + build.Append(":" + timeResult.Second.ToString("D2", CultureInfo.InvariantCulture)); } return build.ToString(); @@ -537,10 +550,10 @@ private DateTimeResolutionResult ParseNumberWithUnit(string text, DateObject ref string unitStr; // if there are spaces between number and unit - var ers = CardinalExtractor.Extract(text); + var ers = cardinalExtractor.Extract(text); if (ers.Count == 1) { - var pr = CardinalParser.Parse(ers[0]); + var pr = cardinalParser.Parse(ers[0]); var srcUnit = text.Substring(ers[0].Start + ers[0].Length ?? 0).Trim(); if (srcUnit.StartsWith("个")) diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Parsers/ChineseHolidayParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Parsers/ChineseHolidayParserConfiguration.cs index 23d8c0c5b0..6882b80a43 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Parsers/ChineseHolidayParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Parsers/ChineseHolidayParserConfiguration.cs @@ -59,15 +59,27 @@ public class ChineseHolidayParserConfiguration : IDateTimeParser public static readonly Dictionary NoFixedTimex = DateTimeDefinitions.HolidayNoFixedTimex; - private static readonly IExtractor IntegerExtractor = new IntegerExtractor(); + private readonly IExtractor integerExtractor; - private static readonly IParser IntegerParser = new BaseCJKNumberParser(new ChineseNumberParserConfiguration(new BaseNumberOptionsConfiguration(Culture.Chinese))); + private readonly IParser integerParser; private readonly IFullDateTimeParserConfiguration config; public ChineseHolidayParserConfiguration(IFullDateTimeParserConfiguration configuration) { config = configuration; + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + integerExtractor = new IntegerExtractor(numConfig); + integerParser = new BaseCJKNumberParser(new ChineseNumberParserConfiguration(numConfig)); + } public ParseResult Parse(ExtractResult extResult) @@ -80,7 +92,7 @@ public DateTimeParseResult Parse(ExtractResult er, DateObject refDate) var referenceDate = refDate; object value = null; - if (er.Type.Equals(ParserName)) + if (er.Type.Equals(ParserName, StringComparison.InvariantCulture)) { var innerResult = ParseHolidayRegexMatch(er.Text, referenceDate); @@ -121,115 +133,6 @@ public List FilterResults(string query, List= 90) - { - year += 1900; - } - else if (year < 20) - { - year += 2000; - } - - if (!string.IsNullOrEmpty(holidayStr)) - { - DateObject value; - string timexStr; - if (FixedHolidaysDict.ContainsKey(holidayStr)) - { - value = FixedHolidaysDict[holidayStr](year); - timexStr = $"-{value.Month:D2}-{value.Day:D2}"; - } - else - { - if (HolidayFuncDict.ContainsKey(holidayStr)) - { - value = HolidayFuncDict[holidayStr](year); - timexStr = NoFixedTimex[holidayStr]; - } - else - { - return ret; - } - } - - if (hasYear) - { - ret.Timex = year.ToString("D4") + timexStr; - ret.FutureValue = ret.PastValue = DateObject.MinValue.SafeCreateFromValue(year, value.Month, value.Day); - ret.Success = true; - return ret; - } - - ret.Timex = "XXXX" + timexStr; - ret.FutureValue = GetFutureValue(value, referenceDate, holidayStr); - ret.PastValue = GetPastValue(value, referenceDate, holidayStr); - ret.Success = true; - return ret; - } - - return ret; - } - private static DateObject GetFutureValue(DateObject value, DateObject referenceDate, string holiday) { if (value < referenceDate) @@ -375,17 +278,126 @@ where DateObject.MinValue.SafeCreateFromValue(year, 11, day).DayOfWeek == DayOfW select day).ElementAt(3)); } - private static int ConvertChineseToInteger(string yearChsStr) + private DateTimeResolutionResult ParseHolidayRegexMatch(string text, DateObject referenceDate) + { + foreach (var regex in ChineseHolidayExtractorConfiguration.HolidayRegexList) + { + var match = regex.Match(text); + + if (match.Success) + { + // Value string will be set in Match2Date method + var ret = Match2Date(match, referenceDate); + return ret; + } + } + + return new DateTimeResolutionResult(); + } + + private DateTimeResolutionResult Match2Date(Match match, DateObject referenceDate) + { + var ret = new DateTimeResolutionResult(); + var holidayStr = match.Groups["holiday"].Value; + + var year = referenceDate.Year; + var hasYear = false; + var yearNum = match.Groups["year"].Value; + var yearChs = match.Groups["yearchs"].Value; + var yearRel = match.Groups["yearrel"].Value; + if (!string.IsNullOrEmpty(yearNum)) + { + hasYear = true; + if (yearNum.EndsWith("年")) + { + yearNum = yearNum.Substring(0, yearNum.Length - 1); + } + + year = int.Parse(yearNum); + } + else if (!string.IsNullOrEmpty(yearChs)) + { + hasYear = true; + if (yearChs.EndsWith("年")) + { + yearChs = yearChs.Substring(0, yearChs.Length - 1); + } + + year = ConvertChineseToInteger(yearChs); + } + else if (!string.IsNullOrEmpty(yearRel)) + { + hasYear = true; + if (yearRel.EndsWith("去年")) + { + year--; + } + else if (yearRel.EndsWith("明年")) + { + year++; + } + } + + if (year < 100 && year >= 90) + { + year += 1900; + } + else if (year < 20) + { + year += 2000; + } + + if (!string.IsNullOrEmpty(holidayStr)) + { + DateObject value; + string timexStr; + if (FixedHolidaysDict.ContainsKey(holidayStr)) + { + value = FixedHolidaysDict[holidayStr](year); + timexStr = $"-{value.Month:D2}-{value.Day:D2}"; + } + else + { + if (HolidayFuncDict.ContainsKey(holidayStr)) + { + value = HolidayFuncDict[holidayStr](year); + timexStr = NoFixedTimex[holidayStr]; + } + else + { + return ret; + } + } + + if (hasYear) + { + ret.Timex = year.ToString("D4") + timexStr; + ret.FutureValue = ret.PastValue = DateObject.MinValue.SafeCreateFromValue(year, value.Month, value.Day); + ret.Success = true; + return ret; + } + + ret.Timex = "XXXX" + timexStr; + ret.FutureValue = GetFutureValue(value, referenceDate, holidayStr); + ret.PastValue = GetPastValue(value, referenceDate, holidayStr); + ret.Success = true; + return ret; + } + + return ret; + } + + private int ConvertChineseToInteger(string yearChsStr) { var year = 0; var num = 0; - var er = IntegerExtractor.Extract(yearChsStr); + var er = integerExtractor.Extract(yearChsStr); if (er.Count != 0) { - if (er[0].Type.Equals(Number.Constants.SYS_NUM_INTEGER)) + if (er[0].Type.Equals(Number.Constants.SYS_NUM_INTEGER, StringComparison.Ordinal)) { - num = Convert.ToInt32((double)(IntegerParser.Parse(er[0]).Value ?? 0)); + num = Convert.ToInt32((double)(integerParser.Parse(er[0]).Value ?? 0)); } } @@ -395,12 +407,12 @@ private static int ConvertChineseToInteger(string yearChsStr) foreach (var ch in yearChsStr) { num *= 10; - er = IntegerExtractor.Extract(ch.ToString()); + er = integerExtractor.Extract(ch.ToString()); if (er.Count != 0) { - if (er[0].Type.Equals(Number.Constants.SYS_NUM_INTEGER)) + if (er[0].Type.Equals(Number.Constants.SYS_NUM_INTEGER, StringComparison.Ordinal)) { - num += Convert.ToInt32((double)(IntegerParser.Parse(er[0]).Value ?? 0)); + num += Convert.ToInt32((double)(integerParser.Parse(er[0]).Value ?? 0)); } } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Config/BaseDateTimeOptionsConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Config/BaseDateTimeOptionsConfiguration.cs index 4e8df3fa67..bdc8bf14b7 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Config/BaseDateTimeOptionsConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Config/BaseDateTimeOptionsConfiguration.cs @@ -20,6 +20,8 @@ public BaseDateTimeOptionsConfiguration(IDateTimeOptionsConfiguration config) public bool DmyDateFormat { get; } + public string LanguageMarker { get; set; } + public string Culture { get; } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Config/IDateTimeOptionsConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Config/IDateTimeOptionsConfiguration.cs index 3227e463e8..972795fb8c 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Config/IDateTimeOptionsConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Config/IDateTimeOptionsConfiguration.cs @@ -7,5 +7,7 @@ public interface IDateTimeOptionsConfiguration : IConfiguration DateTimeOptions Options { get; } bool DmyDateFormat { get; } + + string LanguageMarker { get; } } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/DateTimeOptions.cs b/.NET/Microsoft.Recognizers.Text.DateTime/DateTimeOptions.cs index ed9f3acb77..025b3b0d1b 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/DateTimeOptions.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/DateTimeOptions.cs @@ -30,6 +30,11 @@ public enum DateTimeOptions /// ExtendedTypes = 8, + /// + /// NoProtoCache + /// + NoProtoCache = 16, + /// /// FailFast, mode that aborts extraction/tagging quickly for non-entity cases. May be removed later. /// diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchDateExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchDateExtractorConfiguration.cs index f2d8410cf3..d33d7fb8dc 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchDateExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchDateExtractorConfiguration.cs @@ -125,10 +125,20 @@ public class DutchDateExtractorConfiguration : BaseDateTimeOptionsConfiguration, public DutchDateExtractorConfiguration(IDateTimeOptionsConfiguration config) : base(config) { - IntegerExtractor = Number.Dutch.IntegerExtractor.GetInstance(); - OrdinalExtractor = Number.Dutch.OrdinalExtractor.GetInstance(); - NumberParser = new BaseNumberParser(new DutchNumberParserConfiguration(new BaseNumberOptionsConfiguration(config.Culture))); + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + IntegerExtractor = Number.Dutch.IntegerExtractor.GetInstance(numConfig); + OrdinalExtractor = Number.Dutch.OrdinalExtractor.GetInstance(numConfig); + + NumberParser = new BaseNumberParser(new DutchNumberParserConfiguration(numConfig)); + DurationExtractor = new BaseDurationExtractor(new DutchDurationExtractorConfiguration(this)); UtilityConfiguration = new DutchDatetimeUtilityConfiguration(); diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchDatePeriodExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchDatePeriodExtractorConfiguration.cs index 9f87bebfea..461cea427a 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchDatePeriodExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchDatePeriodExtractorConfiguration.cs @@ -241,10 +241,20 @@ public DutchDatePeriodExtractorConfiguration(IDateTimeOptionsConfiguration confi : base(config) { DatePointExtractor = new BaseDateExtractor(new DutchDateExtractorConfiguration(this)); - CardinalExtractor = Number.Dutch.CardinalExtractor.GetInstance(); - OrdinalExtractor = Number.Dutch.OrdinalExtractor.GetInstance(); DurationExtractor = new BaseDurationExtractor(new DutchDurationExtractorConfiguration(this)); - NumberParser = new BaseNumberParser(new DutchNumberParserConfiguration(new BaseNumberOptionsConfiguration(config.Culture))); + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + CardinalExtractor = Number.Dutch.CardinalExtractor.GetInstance(numConfig); + OrdinalExtractor = Number.Dutch.OrdinalExtractor.GetInstance(numConfig); + + NumberParser = new BaseNumberParser(new DutchNumberParserConfiguration(numConfig)); } public IDateExtractor DatePointExtractor { get; } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchDateTimeExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchDateTimeExtractorConfiguration.cs index cf208ddf9e..b0c44efb06 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchDateTimeExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchDateTimeExtractorConfiguration.cs @@ -2,6 +2,7 @@ using Microsoft.Recognizers.Definitions.Dutch; using Microsoft.Recognizers.Text.DateTime.Dutch.Utilities; using Microsoft.Recognizers.Text.DateTime.Utilities; +using Microsoft.Recognizers.Text.Number; namespace Microsoft.Recognizers.Text.DateTime.Dutch { @@ -66,7 +67,17 @@ public class DutchDateTimeExtractorConfiguration : BaseDateTimeOptionsConfigurat public DutchDateTimeExtractorConfiguration(IDateTimeOptionsConfiguration config) : base(config) { - IntegerExtractor = Number.Dutch.IntegerExtractor.GetInstance(); + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + IntegerExtractor = Number.Dutch.IntegerExtractor.GetInstance(numConfig); + DatePointExtractor = new BaseDateExtractor(new DutchDateExtractorConfiguration(this)); TimePointExtractor = new BaseTimeExtractor(new DutchTimeExtractorConfiguration(this)); DurationExtractor = new BaseDurationExtractor(new DutchDurationExtractorConfiguration(this)); diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchDateTimePeriodExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchDateTimePeriodExtractorConfiguration.cs index cee7b1d6d2..3d8a91a10f 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchDateTimePeriodExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchDateTimePeriodExtractorConfiguration.cs @@ -2,6 +2,7 @@ using System.Collections.Generic; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Dutch; +using Microsoft.Recognizers.Text.Number; using Microsoft.Recognizers.Text.Utilities; namespace Microsoft.Recognizers.Text.DateTime.Dutch @@ -79,7 +80,16 @@ public DutchDateTimePeriodExtractorConfiguration(IDateTimeOptionsConfiguration c { TokenBeforeDate = DateTimeDefinitions.TokenBeforeDate; - CardinalExtractor = Number.Dutch.CardinalExtractor.GetInstance(); + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + CardinalExtractor = Number.Dutch.CardinalExtractor.GetInstance(numConfig); + SingleDateExtractor = new BaseDateExtractor(new DutchDateExtractorConfiguration(this)); SingleTimeExtractor = new BaseTimeExtractor(new DutchTimeExtractorConfiguration(this)); SingleDateTimeExtractor = new BaseDateTimeExtractor(new DutchDateTimeExtractorConfiguration(this)); diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchDurationExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchDurationExtractorConfiguration.cs index a40b6a37e4..cc992811ed 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchDurationExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchDurationExtractorConfiguration.cs @@ -2,6 +2,7 @@ using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Dutch; +using Microsoft.Recognizers.Text.Number; namespace Microsoft.Recognizers.Text.DateTime.Dutch { @@ -59,7 +60,16 @@ public class DutchDurationExtractorConfiguration : BaseDateTimeOptionsConfigurat public DutchDurationExtractorConfiguration(IDateTimeOptionsConfiguration config) : base(config) { - CardinalExtractor = Number.Dutch.CardinalExtractor.GetInstance(); + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + CardinalExtractor = Number.Dutch.CardinalExtractor.GetInstance(numConfig); + UnitMap = DateTimeDefinitions.UnitMap.ToImmutableDictionary(); UnitValueMap = DateTimeDefinitions.UnitValueMap.ToImmutableDictionary(); } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchMergedExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchMergedExtractorConfiguration.cs index b926be6d53..1036821287 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchMergedExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchMergedExtractorConfiguration.cs @@ -4,6 +4,7 @@ using Microsoft.Recognizers.Definitions.Dutch; using Microsoft.Recognizers.Definitions.Utilities; using Microsoft.Recognizers.Text.Matcher; +using Microsoft.Recognizers.Text.Number; namespace Microsoft.Recognizers.Text.DateTime.Dutch { @@ -72,7 +73,16 @@ public DutchMergedExtractorConfiguration(IDateTimeOptionsConfiguration config) HolidayExtractor = new BaseHolidayExtractor(new DutchHolidayExtractorConfiguration(this)); TimeZoneExtractor = new BaseTimeZoneExtractor(new DutchTimeZoneExtractorConfiguration(this)); DateTimeAltExtractor = new BaseDateTimeAltExtractor(new DutchDateTimeAltExtractorConfiguration(this)); - IntegerExtractor = Number.Dutch.IntegerExtractor.GetInstance(); + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + IntegerExtractor = Number.Dutch.IntegerExtractor.GetInstance(numConfig); AmbiguityFiltersDict = DefinitionLoader.LoadAmbiguityFilters(DateTimeDefinitions.AmbiguityFiltersDict); diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchTimePeriodExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchTimePeriodExtractorConfiguration.cs index 52dd13e689..ae6cbeefa0 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchTimePeriodExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchTimePeriodExtractorConfiguration.cs @@ -4,6 +4,7 @@ using Microsoft.Recognizers.Definitions.Dutch; using Microsoft.Recognizers.Text.DateTime.Dutch.Utilities; using Microsoft.Recognizers.Text.DateTime.Utilities; +using Microsoft.Recognizers.Text.Number; namespace Microsoft.Recognizers.Text.DateTime.Dutch { @@ -68,7 +69,17 @@ public DutchTimePeriodExtractorConfiguration(IDateTimeOptionsConfiguration confi TokenBeforeDate = DateTimeDefinitions.TokenBeforeDate; SingleTimeExtractor = new BaseTimeExtractor(new DutchTimeExtractorConfiguration(this)); UtilityConfiguration = new DutchDatetimeUtilityConfiguration(); - IntegerExtractor = Number.Dutch.IntegerExtractor.GetInstance(); + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + IntegerExtractor = Number.Dutch.IntegerExtractor.GetInstance(numConfig); + TimeZoneExtractor = new BaseTimeZoneExtractor(new DutchTimeZoneExtractorConfiguration(this)); } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Parsers/DutchCommonDateTimeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Parsers/DutchCommonDateTimeParserConfiguration.cs index 5e464d0c6b..58fd2c7740 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Parsers/DutchCommonDateTimeParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Parsers/DutchCommonDateTimeParserConfiguration.cs @@ -26,12 +26,21 @@ public DutchCommonDateTimeParserConfiguration(IDateTimeOptionsConfiguration conf WrittenDecades = DateTimeDefinitions.WrittenDecades.ToImmutableDictionary(); SpecialDecadeCases = DateTimeDefinitions.SpecialDecadeCases.ToImmutableDictionary(); - CardinalExtractor = Number.Dutch.CardinalExtractor.GetInstance(); - IntegerExtractor = Number.Dutch.IntegerExtractor.GetInstance(); - OrdinalExtractor = Number.Dutch.OrdinalExtractor.GetInstance(); + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + CardinalExtractor = Number.Dutch.CardinalExtractor.GetInstance(numConfig); + IntegerExtractor = Number.Dutch.IntegerExtractor.GetInstance(numConfig); + OrdinalExtractor = Number.Dutch.OrdinalExtractor.GetInstance(numConfig); + + NumberParser = new BaseNumberParser(new DutchNumberParserConfiguration(numConfig)); TimeZoneParser = new BaseTimeZoneParser(); - NumberParser = new BaseNumberParser(new DutchNumberParserConfiguration(new BaseNumberOptionsConfiguration(config.Culture))); DateExtractor = new BaseDateExtractor(new DutchDateExtractorConfiguration(this)); TimeExtractor = new BaseTimeExtractor(new DutchTimeExtractorConfiguration(this)); DateTimeExtractor = new BaseDateTimeExtractor(new DutchDateTimeExtractorConfiguration(this)); diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Parsers/DutchDurationParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Parsers/DutchDurationParserConfiguration.cs index 8a4f71abfd..eddb6620c0 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Parsers/DutchDurationParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Parsers/DutchDurationParserConfiguration.cs @@ -35,7 +35,7 @@ public DutchDurationParserConfiguration(ICommonDateTimeParserConfiguration confi public IExtractor CardinalExtractor { get; } - public IExtractor DurationExtractor { get; } + public IDateTimeExtractor DurationExtractor { get; } public IParser NumberParser { get; } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishDateExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishDateExtractorConfiguration.cs index 5bb3865bb9..ea255a0a90 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishDateExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishDateExtractorConfiguration.cs @@ -126,10 +126,19 @@ public class EnglishDateExtractorConfiguration : BaseDateTimeOptionsConfiguratio public EnglishDateExtractorConfiguration(IDateTimeOptionsConfiguration config) : base(config) { - IntegerExtractor = Number.English.IntegerExtractor.GetInstance(); - OrdinalExtractor = Number.English.OrdinalExtractor.GetInstance(); - NumberParser = new BaseNumberParser(new EnglishNumberParserConfiguration(new BaseNumberOptionsConfiguration(config.Culture))); + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + IntegerExtractor = Number.English.IntegerExtractor.GetInstance(numConfig); + OrdinalExtractor = Number.English.OrdinalExtractor.GetInstance(numConfig); + + NumberParser = new BaseNumberParser(new EnglishNumberParserConfiguration(numConfig)); DurationExtractor = new BaseDurationExtractor(new EnglishDurationExtractorConfiguration(this)); UtilityConfiguration = new EnglishDatetimeUtilityConfiguration(); diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishDatePeriodExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishDatePeriodExtractorConfiguration.cs index f481beaa2b..0c4d6bcbd3 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishDatePeriodExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishDatePeriodExtractorConfiguration.cs @@ -241,10 +241,21 @@ public EnglishDatePeriodExtractorConfiguration(IDateTimeOptionsConfiguration con : base(config) { DatePointExtractor = new BaseDateExtractor(new EnglishDateExtractorConfiguration(this)); - CardinalExtractor = Number.English.CardinalExtractor.GetInstance(); - OrdinalExtractor = Number.English.OrdinalExtractor.GetInstance(); + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + CardinalExtractor = Number.English.CardinalExtractor.GetInstance(numConfig); + OrdinalExtractor = Number.English.OrdinalExtractor.GetInstance(numConfig); + + NumberParser = new BaseNumberParser(new EnglishNumberParserConfiguration(numConfig)); + DurationExtractor = new BaseDurationExtractor(new EnglishDurationExtractorConfiguration(this)); - NumberParser = new BaseNumberParser(new EnglishNumberParserConfiguration(new BaseNumberOptionsConfiguration(config.Culture))); } public IDateExtractor DatePointExtractor { get; } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishDateTimeExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishDateTimeExtractorConfiguration.cs index 573d7b7636..4765780432 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishDateTimeExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishDateTimeExtractorConfiguration.cs @@ -2,6 +2,7 @@ using Microsoft.Recognizers.Definitions.English; using Microsoft.Recognizers.Text.DateTime.English.Utilities; using Microsoft.Recognizers.Text.DateTime.Utilities; +using Microsoft.Recognizers.Text.Number; namespace Microsoft.Recognizers.Text.DateTime.English { @@ -66,7 +67,17 @@ public class EnglishDateTimeExtractorConfiguration : BaseDateTimeOptionsConfigur public EnglishDateTimeExtractorConfiguration(IDateTimeOptionsConfiguration config) : base(config) { - IntegerExtractor = Number.English.IntegerExtractor.GetInstance(); + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + IntegerExtractor = Number.English.IntegerExtractor.GetInstance(numConfig); + DatePointExtractor = new BaseDateExtractor(new EnglishDateExtractorConfiguration(this)); TimePointExtractor = new BaseTimeExtractor(new EnglishTimeExtractorConfiguration(this)); DurationExtractor = new BaseDurationExtractor(new EnglishDurationExtractorConfiguration(this)); diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishDateTimePeriodExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishDateTimePeriodExtractorConfiguration.cs index 7f5db11ef5..78676c2f1c 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishDateTimePeriodExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishDateTimePeriodExtractorConfiguration.cs @@ -2,6 +2,7 @@ using System.Collections.Generic; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.English; +using Microsoft.Recognizers.Text.Number; using Microsoft.Recognizers.Text.Utilities; namespace Microsoft.Recognizers.Text.DateTime.English @@ -79,7 +80,16 @@ public EnglishDateTimePeriodExtractorConfiguration(IDateTimeOptionsConfiguration { TokenBeforeDate = DateTimeDefinitions.TokenBeforeDate; - CardinalExtractor = Number.English.CardinalExtractor.GetInstance(); + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + CardinalExtractor = Number.English.CardinalExtractor.GetInstance(numConfig); + SingleDateExtractor = new BaseDateExtractor(new EnglishDateExtractorConfiguration(this)); SingleTimeExtractor = new BaseTimeExtractor(new EnglishTimeExtractorConfiguration(this)); SingleDateTimeExtractor = new BaseDateTimeExtractor(new EnglishDateTimeExtractorConfiguration(this)); diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishDurationExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishDurationExtractorConfiguration.cs index 4c712ce4a0..ecc995c61e 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishDurationExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishDurationExtractorConfiguration.cs @@ -2,6 +2,7 @@ using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.English; +using Microsoft.Recognizers.Text.Number; namespace Microsoft.Recognizers.Text.DateTime.English { @@ -59,7 +60,17 @@ public class EnglishDurationExtractorConfiguration : BaseDateTimeOptionsConfigur public EnglishDurationExtractorConfiguration(IDateTimeOptionsConfiguration config) : base(config) { - CardinalExtractor = Number.English.CardinalExtractor.GetInstance(); + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + CardinalExtractor = Number.English.CardinalExtractor.GetInstance(numConfig); + UnitMap = DateTimeDefinitions.UnitMap.ToImmutableDictionary(); UnitValueMap = DateTimeDefinitions.UnitValueMap.ToImmutableDictionary(); } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishMergedExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishMergedExtractorConfiguration.cs index ad44e0325b..9f714bed39 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishMergedExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishMergedExtractorConfiguration.cs @@ -4,6 +4,7 @@ using Microsoft.Recognizers.Definitions.English; using Microsoft.Recognizers.Definitions.Utilities; using Microsoft.Recognizers.Text.Matcher; +using Microsoft.Recognizers.Text.Number; namespace Microsoft.Recognizers.Text.DateTime.English { @@ -78,7 +79,16 @@ public EnglishMergedExtractorConfiguration(IDateTimeOptionsConfiguration config) HolidayExtractor = new BaseHolidayExtractor(new EnglishHolidayExtractorConfiguration(this)); TimeZoneExtractor = new BaseTimeZoneExtractor(new EnglishTimeZoneExtractorConfiguration(this)); DateTimeAltExtractor = new BaseDateTimeAltExtractor(new EnglishDateTimeAltExtractorConfiguration(this)); - IntegerExtractor = Number.English.IntegerExtractor.GetInstance(); + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + IntegerExtractor = Number.English.IntegerExtractor.GetInstance(numConfig); AmbiguityFiltersDict = DefinitionLoader.LoadAmbiguityFilters(DateTimeDefinitions.AmbiguityFiltersDict); diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishTimePeriodExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishTimePeriodExtractorConfiguration.cs index 88ac210c37..28b8a10fbf 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishTimePeriodExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishTimePeriodExtractorConfiguration.cs @@ -4,6 +4,7 @@ using Microsoft.Recognizers.Definitions.English; using Microsoft.Recognizers.Text.DateTime.English.Utilities; using Microsoft.Recognizers.Text.DateTime.Utilities; +using Microsoft.Recognizers.Text.Number; namespace Microsoft.Recognizers.Text.DateTime.English { @@ -68,7 +69,17 @@ public EnglishTimePeriodExtractorConfiguration(IDateTimeOptionsConfiguration con TokenBeforeDate = DateTimeDefinitions.TokenBeforeDate; SingleTimeExtractor = new BaseTimeExtractor(new EnglishTimeExtractorConfiguration(this)); UtilityConfiguration = new EnglishDatetimeUtilityConfiguration(); - IntegerExtractor = Number.English.IntegerExtractor.GetInstance(); + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + IntegerExtractor = Number.English.IntegerExtractor.GetInstance(numConfig); + TimeZoneExtractor = new BaseTimeZoneExtractor(new EnglishTimeZoneExtractorConfiguration(this)); } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/English/Parsers/EnglishCommonDateTimeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/English/Parsers/EnglishCommonDateTimeParserConfiguration.cs index a926b8d677..4fb306c914 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/English/Parsers/EnglishCommonDateTimeParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/English/Parsers/EnglishCommonDateTimeParserConfiguration.cs @@ -26,12 +26,22 @@ public EnglishCommonDateTimeParserConfiguration(IDateTimeOptionsConfiguration co WrittenDecades = DateTimeDefinitions.WrittenDecades.ToImmutableDictionary(); SpecialDecadeCases = DateTimeDefinitions.SpecialDecadeCases.ToImmutableDictionary(); - CardinalExtractor = Number.English.CardinalExtractor.GetInstance(); - IntegerExtractor = Number.English.IntegerExtractor.GetInstance(); - OrdinalExtractor = Number.English.OrdinalExtractor.GetInstance(); + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + CardinalExtractor = Number.English.CardinalExtractor.GetInstance(numConfig); + IntegerExtractor = Number.English.IntegerExtractor.GetInstance(numConfig); + OrdinalExtractor = Number.English.OrdinalExtractor.GetInstance(numConfig); + + NumberParser = new BaseNumberParser(new EnglishNumberParserConfiguration(numConfig)); TimeZoneParser = new BaseTimeZoneParser(); - NumberParser = new BaseNumberParser(new EnglishNumberParserConfiguration(new BaseNumberOptionsConfiguration(config.Culture))); + DateExtractor = new BaseDateExtractor(new EnglishDateExtractorConfiguration(this)); TimeExtractor = new BaseTimeExtractor(new EnglishTimeExtractorConfiguration(this)); DateTimeExtractor = new BaseDateTimeExtractor(new EnglishDateTimeExtractorConfiguration(this)); diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/English/Parsers/EnglishDurationParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/English/Parsers/EnglishDurationParserConfiguration.cs index 95aae678aa..ec9244ded8 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/English/Parsers/EnglishDurationParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/English/Parsers/EnglishDurationParserConfiguration.cs @@ -35,7 +35,7 @@ public EnglishDurationParserConfiguration(ICommonDateTimeParserConfiguration con public IExtractor CardinalExtractor { get; } - public IExtractor DurationExtractor { get; } + public IDateTimeExtractor DurationExtractor { get; } public IParser NumberParser { get; } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/AbstractYearExtractor.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/AbstractYearExtractor.cs index c5ee718275..5b5b3721d8 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/AbstractYearExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/AbstractYearExtractor.cs @@ -6,7 +6,8 @@ namespace Microsoft.Recognizers.Text.DateTime { public abstract class AbstractYearExtractor : IDateExtractor { - public AbstractYearExtractor(IDateExtractorConfiguration config) + + protected AbstractYearExtractor(IDateExtractorConfiguration config) { this.Config = config; } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseDateExtractor.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseDateExtractor.cs index fb6e81682e..74ee2fb504 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseDateExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseDateExtractor.cs @@ -2,6 +2,8 @@ using System.Collections.Generic; using System.Collections.Immutable; using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Text.InternalCache; using Microsoft.Recognizers.Text.Utilities; using DateObject = System.DateTime; @@ -11,9 +13,16 @@ public class BaseDateExtractor : AbstractYearExtractor, IDateExtractor { public static readonly string ExtractorName = Constants.SYS_DATETIME_DATE; // "Date"; + private static readonly ResultsCache ResultsCache = new ResultsCache(); + + private readonly string keyPrefix; + public BaseDateExtractor(IDateExtractorConfiguration config) : base(config) { + + keyPrefix = string.Intern(Config.Options + "_" + Config.LanguageMarker); + } public static bool IsOverlapWithExistExtractions(Token er, List existErs) @@ -36,13 +45,21 @@ public override List Extract(string text) public override List Extract(string text, DateObject reference) { - var tokens = new List(); - tokens.AddRange(BasicRegexMatch(text)); - tokens.AddRange(ImplicitDate(text)); - tokens.AddRange(NumberWithMonth(text, reference)); - tokens.AddRange(ExtractRelativeDurationDate(text, reference)); - return Token.MergeAllTokens(tokens, text, ExtractorName); + List results; + + if ((this.Config.Options & DateTimeOptions.NoProtoCache) != 0) + { + results = ExtractImpl(text, reference); + } + else + { + var key = (keyPrefix, text, reference); + + results = ResultsCache.GetOrCreate(key, () => ExtractImpl(text, reference)); + } + + return results; } // "In 3 days/weeks/months/years" = "3 days/weeks/months/years from now" @@ -117,10 +134,25 @@ private static bool IsInequalityDuration(ExtractResult er) return er.Data != null && (er.Data.ToString() == Constants.MORE_THAN_MOD || er.Data.ToString() == Constants.LESS_THAN_MOD); } + private List ExtractImpl(string text, DateObject reference) + { + var tokens = new List(); + tokens.AddRange(BasicRegexMatch(text)); + tokens.AddRange(ImplicitDate(text)); + tokens.AddRange(NumberWithMonth(text, reference)); + tokens.AddRange(ExtractRelativeDurationDate(text, reference)); + + var results = Token.MergeAllTokens(tokens, text, ExtractorName); + + return results; + } + // match basic patterns in DateRegexList private List BasicRegexMatch(string text) { - var ret = new List(); + + var results = new List(); + foreach (var regex in this.Config.DateRegexList) { var matches = regex.Matches(text); @@ -132,20 +164,21 @@ private List BasicRegexMatch(string text) // Cases that the relative term is before the detected date entity, like "this 5/12", "next friday 5/12" var preText = text.Substring(0, match.Index); var relativeRegex = this.Config.StrictRelativeRegex.MatchEnd(preText, trim: true); + if (relativeRegex.Success) { - ret.Add(new Token(relativeRegex.Index, match.Index + match.Length)); + results.Add(new Token(relativeRegex.Index, match.Index + match.Length)); } else { - ret.Add(new Token(match.Index, match.Index + match.Length)); + results.Add(new Token(match.Index, match.Index + match.Length)); } } } } - return ret; + return results; } // this method is to validate whether the match is part of date range and is a correct split @@ -160,7 +193,7 @@ private bool ValidateMatch(Match match, string text) var yearGroup = match.Groups["year"]; // If the "year" part is not at the end of the match, it's a valid match - if (!(yearGroup.Index + yearGroup.Length == match.Index + match.Length)) + if (yearGroup.Index + yearGroup.Length != match.Index + match.Length) { isValidMatch = true; } @@ -187,7 +220,7 @@ private bool ValidateMatch(Match match, string text) return isValidMatch; } - // TODO: Simplify this method to improve the performance + // TODO: Simplify this method to improve its performance private string TrimStartRangeConnectorSymbols(string text) { var rangeConnectorSymbolMatches = Config.RangeConnectorSymbolRegex.Matches(text); @@ -210,7 +243,7 @@ private string TrimStartRangeConnectorSymbols(string text) return text.Trim(); } - // TODO: Simplify this method to improve the performance + // TODO: Simplify this method to improve its performance private bool StartsWithBasicDate(string text) { foreach (var regex in this.Config.DateRegexList) @@ -253,9 +286,9 @@ private List NumberWithMonth(string text, DateObject reference) foreach (var result in er) { - int.TryParse((this.Config.NumberParser.Parse(result).Value ?? 0).ToString(), out int num); + var parsed = int.TryParse((this.Config.NumberParser.Parse(result).Value ?? 0).ToString(), out int num); - if (num < 1 || num > 31) + if (!parsed || (num < 1 || num > 31)) { continue; } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseDatePeriodExtractor.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseDatePeriodExtractor.cs index 1731ae438d..a5fced2bad 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseDatePeriodExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseDatePeriodExtractor.cs @@ -1,6 +1,8 @@ using System.Collections.Generic; using System.Linq; using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Text.InternalCache; using Microsoft.Recognizers.Text.Utilities; using DateObject = System.DateTime; @@ -10,11 +12,16 @@ public class BaseDatePeriodExtractor : IDateTimeExtractor { private const string ExtractorName = Constants.SYS_DATETIME_DATEPERIOD; + private static readonly ResultsCache ResultsCache = new ResultsCache(); + private readonly IDatePeriodExtractorConfiguration config; + private readonly string keyPrefix; + public BaseDatePeriodExtractor(IDatePeriodExtractorConfiguration config) { this.config = config; + keyPrefix = string.Intern(config.Options + "_" + config.LanguageMarker); } public List Extract(string text) @@ -24,20 +31,20 @@ public List Extract(string text) public List Extract(string text, DateObject reference) { - var tokens = new List(); - tokens.AddRange(MatchSimpleCases(text)); + List results; - var simpleCasesResults = Token.MergeAllTokens(tokens, text, ExtractorName); - var ordinalExtractions = config.OrdinalExtractor.Extract(text); + if ((this.config.Options & DateTimeOptions.NoProtoCache) != 0) + { + results = ExtractImpl(text, reference); + } + else + { + var key = (keyPrefix, text, reference); - tokens.AddRange(MergeTwoTimePoints(text, reference)); - tokens.AddRange(MatchDuration(text, reference)); - tokens.AddRange(SingleTimePointWithPatterns(text, new List(ordinalExtractions), reference)); - tokens.AddRange(MatchComplexCases(text, simpleCasesResults, reference)); - tokens.AddRange(MatchYearPeriod(text, reference)); - tokens.AddRange(MatchOrdinalNumberWithCenturySuffix(text, new List(ordinalExtractions))); + results = ResultsCache.GetOrCreate(key, () => ExtractImpl(text, reference)); + } - return Token.MergeAllTokens(tokens, text, ExtractorName); + return results; } public List MatchDuration(string text, DateObject reference) @@ -316,6 +323,24 @@ private static bool HasDigitNumberAfterDash(string source, int dashSuffixIndex, return hasDigitNumberAfterDash; } + private List ExtractImpl(string text, DateObject reference) + { + var tokens = new List(); + tokens.AddRange(MatchSimpleCases(text)); + + var simpleCasesResults = Token.MergeAllTokens(tokens, text, ExtractorName); + var ordinalExtractions = config.OrdinalExtractor.Extract(text); + + tokens.AddRange(MergeTwoTimePoints(text, reference)); + tokens.AddRange(MatchDuration(text, reference)); + tokens.AddRange(SingleTimePointWithPatterns(text, new List(ordinalExtractions), reference)); + tokens.AddRange(MatchComplexCases(text, simpleCasesResults, reference)); + tokens.AddRange(MatchYearPeriod(text, reference)); + tokens.AddRange(MatchOrdinalNumberWithCenturySuffix(text, new List(ordinalExtractions))); + + return Token.MergeAllTokens(tokens, text, ExtractorName); + } + // Cases like "21st century" private List MatchOrdinalNumberWithCenturySuffix(string text, List ordinalExtractions) { diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseDateTimeAltExtractor.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseDateTimeAltExtractor.cs index 980b61d12d..cb7f2c1789 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseDateTimeAltExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseDateTimeAltExtractor.cs @@ -242,8 +242,8 @@ private static bool ApplyParentTextMetadata(List extractResults, private static void ApplyMetadata(List ers, Dictionary metadata, string parentText) { // Share the timeZone info - var metaDataOrigin = ers[0].Data as Dictionary; - if (metaDataOrigin != null && metaDataOrigin.ContainsKey(Constants.SYS_DATETIME_TIMEZONE)) + if (ers[0].Data is Dictionary metaDataOrigin && + metaDataOrigin.ContainsKey(Constants.SYS_DATETIME_TIMEZONE)) { metadata.Add(Constants.SYS_DATETIME_TIMEZONE, metaDataOrigin[Constants.SYS_DATETIME_TIMEZONE]); } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseDateTimeExtractor.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseDateTimeExtractor.cs index 97ab5fb0bd..bc7eded323 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseDateTimeExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseDateTimeExtractor.cs @@ -87,6 +87,7 @@ public List MergeDateAndTime(string text, DateObject reference) Text = match.Value, Type = Number.Constants.SYS_NUM_INTEGER, }; + numErs.Add(node); } @@ -116,6 +117,7 @@ public List MergeDateAndTime(string text, DateObject reference) (ers[i].Type.Equals(Constants.SYS_DATETIME_DATE, StringComparison.Ordinal) && ers[j].Type.Equals(Number.Constants.SYS_NUM_INTEGER, StringComparison.Ordinal))) { + var middleBegin = ers[i].Start + ers[i].Length ?? 0; var middleEnd = ers[j].Start ?? 0; if (middleBegin > middleEnd) diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseDateTimePeriodExtractor.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseDateTimePeriodExtractor.cs index c1d1adb3ee..ff70c619b4 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseDateTimePeriodExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseDateTimePeriodExtractor.cs @@ -2,6 +2,7 @@ using System.Collections.Generic; using System.Linq; using System.Text.RegularExpressions; + using Microsoft.Recognizers.Text.Utilities; using DateObject = System.DateTime; @@ -108,6 +109,7 @@ private IEnumerable MergeDateWithTimePeriodSuffix(string text, List MergeDateWithTimePeriodSuffix(string text, List MergeTwoTimePoints(string text, DateObject reference, List tokenListBeforeDate = config.TokenBeforeDate.Split('|').ToList(); foreach (string token in tokenListBeforeDate.Where(n => !string.IsNullOrEmpty(n))) { - if (midStr.Trim().Equals(token)) + if (midStr.Trim().Equals(token, StringComparison.OrdinalIgnoreCase)) { isMatchTokenBeforeDate = true; break; @@ -393,7 +396,7 @@ private List MergeTwoTimePoints(string text, DateObject reference, List MatchTimeOfDay(string text, DateObject reference, List 0) { foreach (var tp in timeErs) @@ -539,7 +542,7 @@ private List MatchTimeOfDay(string text, DateObject reference, List 0) { foreach (var tp in timeErs) diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseDurationExtractor.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseDurationExtractor.cs index d9d8239703..d9e67df2f5 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseDurationExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseDurationExtractor.cs @@ -1,5 +1,4 @@ using System.Collections.Generic; -using System.Text.RegularExpressions; using Microsoft.Recognizers.Text.Utilities; using DateObject = System.DateTime; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseSetExtractor.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseSetExtractor.cs index e50dfd5ba5..8d1aff8b93 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseSetExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseSetExtractor.cs @@ -87,6 +87,7 @@ public virtual List TimeEveryday(string text, DateObject reference) { var ret = new List(); var ers = this.config.TimeExtractor.Extract(text, reference); + foreach (var er in ers) { var afterStr = text.Substring(er.Start + er.Length ?? 0); @@ -116,6 +117,7 @@ public List MatchEach(IDateTimeExtractor extractor, string text, DateObje { var ret = new List(); var matches = config.SetEachRegex.Matches(text); + foreach (Match match in matches) { if (match.Success) @@ -141,6 +143,7 @@ public List MatchEach(IDateTimeExtractor extractor, string text, DateObje Tuple weekdayTuple = config.WeekDayGroupMatchTuple(match); string weekday = weekdayTuple.Item1; int del = weekdayTuple.Item2; + var trimmedText = text.Remove(match.Index, match.Length); trimmedText = trimmedText.Insert(match.Index, weekday); @@ -150,7 +153,7 @@ public List MatchEach(IDateTimeExtractor extractor, string text, DateObje if (er.Start <= match.Index && er.Text.Contains(match.Groups["weekday"].Value)) { var len = (er.Length ?? 0) + del; - if (match.Groups[Constants.PrefixGroupName].ToString() != string.Empty) + if (match.Groups[Constants.PrefixGroupName].ToString().Length > 0) { len += match.Groups[Constants.PrefixGroupName].ToString().Length; } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseTimeExtractor.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseTimeExtractor.cs index b77e42fdaa..162b79343c 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseTimeExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseTimeExtractor.cs @@ -1,6 +1,10 @@ -using System.Collections.Generic; +using System; +using System.Collections.Generic; using System.Text.RegularExpressions; + using Microsoft.Recognizers.Definitions; +using Microsoft.Recognizers.Text.InternalCache; + using DateObject = System.DateTime; namespace Microsoft.Recognizers.Text.DateTime @@ -8,21 +12,26 @@ namespace Microsoft.Recognizers.Text.DateTime public class BaseTimeExtractor : IDateTimeExtractor { public static readonly Regex HourRegex = - new Regex(BaseDateTime.HourRegex, RegexOptions.Singleline); + new Regex(BaseDateTime.HourRegex, RegexOptions.Singleline | RegexOptions.Compiled); public static readonly Regex MinuteRegex = - new Regex(BaseDateTime.MinuteRegex, RegexOptions.Singleline); + new Regex(BaseDateTime.MinuteRegex, RegexOptions.Singleline | RegexOptions.Compiled); public static readonly Regex SecondRegex = - new Regex(BaseDateTime.SecondRegex, RegexOptions.Singleline); + new Regex(BaseDateTime.SecondRegex, RegexOptions.Singleline | RegexOptions.Compiled); private const string ExtractorName = Constants.SYS_DATETIME_TIME; // "Time"; + private static readonly ResultsCache ResultsCache = new ResultsCache(); + + private readonly string keyPrefix; + private readonly ITimeExtractorConfiguration config; public BaseTimeExtractor(ITimeExtractorConfiguration config) { this.config = config; + keyPrefix = string.Intern(config.Options + "_" + config.LanguageMarker); } public virtual List Extract(string text) @@ -31,6 +40,25 @@ public virtual List Extract(string text) } public virtual List Extract(string text, DateObject reference) + { + + List results; + + if ((this.config.Options & DateTimeOptions.NoProtoCache) != 0) + { + results = ExtractImpl(text, reference); + } + else + { + var key = (keyPrefix, text, reference); + + results = ResultsCache.GetOrCreate(key, () => ExtractImpl(text, reference)); + } + + return results; + } + + public virtual List ExtractImpl(string text, DateObject reference) { var tokens = new List(); tokens.AddRange(BasicRegexMatch(text)); @@ -50,7 +78,7 @@ public virtual List Extract(string text, DateObject reference) private List BasicRegexMatch(string text) { - var result = new List(); + var results = new List(); foreach (var regex in this.config.TimeRegexList) { @@ -58,12 +86,12 @@ private List BasicRegexMatch(string text) foreach (Match match in matches) { - result.Add(new Token(match.Index, match.Index + match.Length)); + results.Add(new Token(match.Index, match.Index + match.Length)); } } - return result; + return results; } private List AtRegexMatch(string text) diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseTimePeriodExtractor.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseTimePeriodExtractor.cs index e464c3368e..b913dcdee6 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseTimePeriodExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseTimePeriodExtractor.cs @@ -1,6 +1,8 @@ using System.Collections.Generic; using System.Linq; using System.Text.RegularExpressions; + +using Microsoft.Recognizers.Text.InternalCache; using Microsoft.Recognizers.Text.Utilities; using DateObject = System.DateTime; @@ -10,11 +12,16 @@ public class BaseTimePeriodExtractor : IDateTimeExtractor { public static readonly string ExtractorName = Constants.SYS_DATETIME_TIMEPERIOD; // "TimePeriod"; + private static readonly ResultsCache ResultsCache = new ResultsCache(); + private readonly ITimePeriodExtractorConfiguration config; + private readonly string keyPrefix; + public BaseTimePeriodExtractor(ITimePeriodExtractorConfiguration config) { this.config = config; + keyPrefix = string.Intern(config.Options + "_" + config.LanguageMarker); } public List Extract(string text) @@ -23,6 +30,24 @@ public List Extract(string text) } public List Extract(string text, DateObject reference) + { + List results; + + if ((this.config.Options & DateTimeOptions.NoProtoCache) != 0) + { + results = ExtractImpl(text, reference); + } + else + { + var key = (keyPrefix, text, reference); + + results = ResultsCache.GetOrCreate(key, () => ExtractImpl(text, reference)); + } + + return results; + } + + private List ExtractImpl(string text, DateObject reference) { var tokens = new List(); tokens.AddRange(MatchSimpleCases(text)); diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/IDateTimeZoneExtractor.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/IDateTimeZoneExtractor.cs index dc223f3bc6..6b37ef1800 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/IDateTimeZoneExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/IDateTimeZoneExtractor.cs @@ -1,5 +1,4 @@ using System.Collections.Generic; -using DateObject = System.DateTime; namespace Microsoft.Recognizers.Text.DateTime { diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/French/Extractors/FrenchDateExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/French/Extractors/FrenchDateExtractorConfiguration.cs index 9b95539446..4bff16d34e 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/French/Extractors/FrenchDateExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/French/Extractors/FrenchDateExtractorConfiguration.cs @@ -139,9 +139,19 @@ public class FrenchDateExtractorConfiguration : BaseDateTimeOptionsConfiguration public FrenchDateExtractorConfiguration(IDateTimeOptionsConfiguration config) : base(config) { + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + IntegerExtractor = Number.French.IntegerExtractor.GetInstance(); OrdinalExtractor = Number.French.OrdinalExtractor.GetInstance(); - NumberParser = new BaseNumberParser(new FrenchNumberParserConfiguration(new BaseNumberOptionsConfiguration(config.Culture))); + NumberParser = new BaseNumberParser(new FrenchNumberParserConfiguration(numConfig)); + DurationExtractor = new BaseDurationExtractor(new FrenchDurationExtractorConfiguration(this)); UtilityConfiguration = new FrenchDatetimeUtilityConfiguration(); diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/French/Extractors/FrenchDatePeriodExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/French/Extractors/FrenchDatePeriodExtractorConfiguration.cs index ca747b230d..6e1135e6d1 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/French/Extractors/FrenchDatePeriodExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/French/Extractors/FrenchDatePeriodExtractorConfiguration.cs @@ -220,10 +220,20 @@ public FrenchDatePeriodExtractorConfiguration(IDateTimeOptionsConfiguration conf : base(config) { DatePointExtractor = new BaseDateExtractor(new FrenchDateExtractorConfiguration(this)); + DurationExtractor = new BaseDurationExtractor(new FrenchDurationExtractorConfiguration(this)); + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + CardinalExtractor = Number.French.CardinalExtractor.GetInstance(); OrdinalExtractor = Number.French.OrdinalExtractor.GetInstance(); - DurationExtractor = new BaseDurationExtractor(new FrenchDurationExtractorConfiguration(this)); - NumberParser = new BaseNumberParser(new FrenchNumberParserConfiguration(new BaseNumberOptionsConfiguration(config.Culture))); + + NumberParser = new BaseNumberParser(new FrenchNumberParserConfiguration(numConfig)); } public IDateExtractor DatePointExtractor { get; } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/French/Extractors/FrenchDateTimePeriodExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/French/Extractors/FrenchDateTimePeriodExtractorConfiguration.cs index ab4e7a0586..af5d9281a6 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/French/Extractors/FrenchDateTimePeriodExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/French/Extractors/FrenchDateTimePeriodExtractorConfiguration.cs @@ -2,6 +2,7 @@ using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.French; +using Microsoft.Recognizers.Text.Number; namespace Microsoft.Recognizers.Text.DateTime.French { @@ -84,7 +85,16 @@ public FrenchDateTimePeriodExtractorConfiguration(IDateTimeOptionsConfiguration { TokenBeforeDate = DateTimeDefinitions.TokenBeforeDate; - CardinalExtractor = Number.English.CardinalExtractor.GetInstance(); + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + CardinalExtractor = Number.English.CardinalExtractor.GetInstance(numConfig); + SingleDateExtractor = new BaseDateExtractor(new FrenchDateExtractorConfiguration(this)); SingleTimeExtractor = new BaseTimeExtractor(new FrenchTimeExtractorConfiguration(this)); SingleDateTimeExtractor = new BaseDateTimeExtractor(new FrenchDateTimeExtractorConfiguration(this)); diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/French/Extractors/FrenchTimePeriodExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/French/Extractors/FrenchTimePeriodExtractorConfiguration.cs index b2312292fc..89f13cfb53 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/French/Extractors/FrenchTimePeriodExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/French/Extractors/FrenchTimePeriodExtractorConfiguration.cs @@ -4,6 +4,7 @@ using Microsoft.Recognizers.Definitions.French; using Microsoft.Recognizers.Text.DateTime.French.Utilities; using Microsoft.Recognizers.Text.DateTime.Utilities; +using Microsoft.Recognizers.Text.Number; namespace Microsoft.Recognizers.Text.DateTime.French { @@ -79,7 +80,17 @@ public FrenchTimePeriodExtractorConfiguration(IDateTimeOptionsConfiguration conf TokenBeforeDate = DateTimeDefinitions.TokenBeforeDate; SingleTimeExtractor = new BaseTimeExtractor(new FrenchTimeExtractorConfiguration(this)); UtilityConfiguration = new FrenchDatetimeUtilityConfiguration(); - IntegerExtractor = Number.English.IntegerExtractor.GetInstance(); + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + IntegerExtractor = Number.English.IntegerExtractor.GetInstance(numConfig); + TimeZoneExtractor = new BaseTimeZoneExtractor(new FrenchTimeZoneExtractorConfiguration(this)); } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/French/Parsers/FrenchCommonDateTimeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/French/Parsers/FrenchCommonDateTimeParserConfiguration.cs index e4cddd2032..c260c4128d 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/French/Parsers/FrenchCommonDateTimeParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/French/Parsers/FrenchCommonDateTimeParserConfiguration.cs @@ -26,11 +26,20 @@ public FrenchCommonDateTimeParserConfiguration(IDateTimeOptionsConfiguration con WrittenDecades = DateTimeDefinitions.WrittenDecades.ToImmutableDictionary(); SpecialDecadeCases = DateTimeDefinitions.SpecialDecadeCases.ToImmutableDictionary(); + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + CardinalExtractor = Number.French.CardinalExtractor.GetInstance(); IntegerExtractor = Number.French.IntegerExtractor.GetInstance(); OrdinalExtractor = Number.French.OrdinalExtractor.GetInstance(); - NumberParser = new BaseNumberParser(new FrenchNumberParserConfiguration(new BaseNumberOptionsConfiguration(config.Culture))); + NumberParser = new BaseNumberParser(new FrenchNumberParserConfiguration(numConfig)); + DateExtractor = new BaseDateExtractor(new FrenchDateExtractorConfiguration(this)); TimeExtractor = new BaseTimeExtractor(new FrenchTimeExtractorConfiguration(this)); DateTimeExtractor = new BaseDateTimeExtractor(new FrenchDateTimeExtractorConfiguration(this)); diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/French/Parsers/FrenchDurationParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/French/Parsers/FrenchDurationParserConfiguration.cs index 7cc9da64fc..31e76c202d 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/French/Parsers/FrenchDurationParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/French/Parsers/FrenchDurationParserConfiguration.cs @@ -33,7 +33,7 @@ public FrenchDurationParserConfiguration(ICommonDateTimeParserConfiguration conf public IExtractor CardinalExtractor { get; } - public IExtractor DurationExtractor { get; } + public IDateTimeExtractor DurationExtractor { get; } public IParser NumberParser { get; } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/German/Extractors/GermanDateExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/German/Extractors/GermanDateExtractorConfiguration.cs index 125c58bb87..97083661f1 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/German/Extractors/GermanDateExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/German/Extractors/GermanDateExtractorConfiguration.cs @@ -131,9 +131,19 @@ public class GermanDateExtractorConfiguration : BaseDateTimeOptionsConfiguration public GermanDateExtractorConfiguration(IDateTimeOptionsConfiguration config) : base(config) { + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + IntegerExtractor = Number.German.IntegerExtractor.GetInstance(); OrdinalExtractor = Number.German.OrdinalExtractor.GetInstance(); - NumberParser = new BaseNumberParser(new GermanNumberParserConfiguration(new BaseNumberOptionsConfiguration(config.Culture))); + NumberParser = new BaseNumberParser(new GermanNumberParserConfiguration(new BaseNumberOptionsConfiguration(numConfig))); + DurationExtractor = new BaseDurationExtractor(new GermanDurationExtractorConfiguration(this)); UtilityConfiguration = new GermanDatetimeUtilityConfiguration(); diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/German/Extractors/GermanDatePeriodExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/German/Extractors/GermanDatePeriodExtractorConfiguration.cs index fe02ea252e..e2d2a3eaa6 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/German/Extractors/GermanDatePeriodExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/German/Extractors/GermanDatePeriodExtractorConfiguration.cs @@ -198,10 +198,20 @@ public GermanDatePeriodExtractorConfiguration(IDateTimeOptionsConfiguration conf : base(config) { DatePointExtractor = new BaseDateExtractor(new GermanDateExtractorConfiguration(this)); + DurationExtractor = new BaseDurationExtractor(new GermanDurationExtractorConfiguration(this)); + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + CardinalExtractor = Number.German.CardinalExtractor.GetInstance(); OrdinalExtractor = Number.German.OrdinalExtractor.GetInstance(); - DurationExtractor = new BaseDurationExtractor(new GermanDurationExtractorConfiguration(this)); - NumberParser = new BaseNumberParser(new GermanNumberParserConfiguration(new BaseNumberOptionsConfiguration(config.Culture))); + + NumberParser = new BaseNumberParser(new GermanNumberParserConfiguration(numConfig)); } public IDateExtractor DatePointExtractor { get; } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/German/Parsers/GermanCommonDateTimeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/German/Parsers/GermanCommonDateTimeParserConfiguration.cs index 435f41cce2..b20c2e5c59 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/German/Parsers/GermanCommonDateTimeParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/German/Parsers/GermanCommonDateTimeParserConfiguration.cs @@ -26,11 +26,20 @@ public GermanCommonDateTimeParserConfiguration(IDateTimeOptionsConfiguration con WrittenDecades = DateTimeDefinitions.WrittenDecades.ToImmutableDictionary(); SpecialDecadeCases = DateTimeDefinitions.SpecialDecadeCases.ToImmutableDictionary(); + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + CardinalExtractor = Number.German.CardinalExtractor.GetInstance(); IntegerExtractor = Number.German.IntegerExtractor.GetInstance(); OrdinalExtractor = Number.German.OrdinalExtractor.GetInstance(); - NumberParser = new BaseNumberParser(new GermanNumberParserConfiguration(new BaseNumberOptionsConfiguration(config.Culture))); + NumberParser = new BaseNumberParser(new GermanNumberParserConfiguration(numConfig)); + DateExtractor = new BaseDateExtractor(new GermanDateExtractorConfiguration(this)); TimeExtractor = new BaseTimeExtractor(new GermanTimeExtractorConfiguration(this)); DateTimeExtractor = new BaseDateTimeExtractor(new GermanDateTimeExtractorConfiguration(this)); diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/German/Parsers/GermanDurationParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/German/Parsers/GermanDurationParserConfiguration.cs index 03477dc895..949efc9d04 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/German/Parsers/GermanDurationParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/German/Parsers/GermanDurationParserConfiguration.cs @@ -34,7 +34,7 @@ public GermanDurationParserConfiguration(ICommonDateTimeParserConfiguration conf public IExtractor CardinalExtractor { get; } - public IExtractor DurationExtractor { get; } + public IDateTimeExtractor DurationExtractor { get; } public IParser NumberParser { get; } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Extractors/HindiDateExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Extractors/HindiDateExtractorConfiguration.cs index 75af7fc69a..c52cdcb43e 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Extractors/HindiDateExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Extractors/HindiDateExtractorConfiguration.cs @@ -125,10 +125,20 @@ public class HindiDateExtractorConfiguration : BaseDateTimeOptionsConfiguration, public HindiDateExtractorConfiguration(IDateTimeOptionsConfiguration config) : base(config) { + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + IntegerExtractor = Number.Hindi.IntegerExtractor.GetInstance(); OrdinalExtractor = Number.Hindi.OrdinalExtractor.GetInstance(); - NumberParser = new BaseIndianNumberParser(new HindiNumberParserConfiguration(new BaseNumberOptionsConfiguration(config.Culture))); + NumberParser = new BaseIndianNumberParser(new HindiNumberParserConfiguration(numConfig)); + DurationExtractor = new BaseDurationExtractor(new HindiDurationExtractorConfiguration(this)); UtilityConfiguration = new HindiDatetimeUtilityConfiguration(); diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Extractors/HindiDatePeriodExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Extractors/HindiDatePeriodExtractorConfiguration.cs index 645111d81b..d44a63649b 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Extractors/HindiDatePeriodExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Extractors/HindiDatePeriodExtractorConfiguration.cs @@ -241,10 +241,20 @@ public HindiDatePeriodExtractorConfiguration(IDateTimeOptionsConfiguration confi : base(config) { DatePointExtractor = new BaseDateExtractor(new HindiDateExtractorConfiguration(this)); + DurationExtractor = new BaseDurationExtractor(new HindiDurationExtractorConfiguration(this)); + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + CardinalExtractor = Number.Hindi.CardinalExtractor.GetInstance(); OrdinalExtractor = Number.Hindi.OrdinalExtractor.GetInstance(); - DurationExtractor = new BaseDurationExtractor(new HindiDurationExtractorConfiguration(this)); - NumberParser = new BaseNumberParser(new HindiNumberParserConfiguration(new BaseNumberOptionsConfiguration(config.Culture))); + + NumberParser = new BaseNumberParser(new HindiNumberParserConfiguration(numConfig)); } public IDateExtractor DatePointExtractor { get; } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Parsers/HindiCommonDateTimeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Parsers/HindiCommonDateTimeParserConfiguration.cs index 814e272927..161a59d322 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Parsers/HindiCommonDateTimeParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Parsers/HindiCommonDateTimeParserConfiguration.cs @@ -26,12 +26,22 @@ public HindiCommonDateTimeParserConfiguration(IDateTimeOptionsConfiguration conf WrittenDecades = DateTimeDefinitions.WrittenDecades.ToImmutableDictionary(); SpecialDecadeCases = DateTimeDefinitions.SpecialDecadeCases.ToImmutableDictionary(); + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + CardinalExtractor = Number.Hindi.CardinalExtractor.GetInstance(); IntegerExtractor = Number.Hindi.IntegerExtractor.GetInstance(); OrdinalExtractor = Number.Hindi.OrdinalExtractor.GetInstance(); + NumberParser = new BaseIndianNumberParser(new HindiNumberParserConfiguration(numConfig)); + TimeZoneParser = new BaseTimeZoneParser(); - NumberParser = new BaseIndianNumberParser(new HindiNumberParserConfiguration(new BaseNumberOptionsConfiguration(config.Culture))); + DateExtractor = new BaseDateExtractor(new HindiDateExtractorConfiguration(this)); TimeExtractor = new BaseTimeExtractor(new HindiTimeExtractorConfiguration(this)); DateTimeExtractor = new BaseDateTimeExtractor(new HindiDateTimeExtractorConfiguration(this)); diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Parsers/HindiDurationParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Parsers/HindiDurationParserConfiguration.cs index cc22022807..34f3e035af 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Parsers/HindiDurationParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Parsers/HindiDurationParserConfiguration.cs @@ -34,7 +34,7 @@ public HindiDurationParserConfiguration(ICommonDateTimeParserConfiguration confi public IExtractor CardinalExtractor { get; } - public IExtractor DurationExtractor { get; } + public IDateTimeExtractor DurationExtractor { get; } public IParser NumberParser { get; } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Extractors/ItalianDateExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Extractors/ItalianDateExtractorConfiguration.cs index e9ae145f5f..eb8066168c 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Extractors/ItalianDateExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Extractors/ItalianDateExtractorConfiguration.cs @@ -135,9 +135,19 @@ public class ItalianDateExtractorConfiguration : BaseDateTimeOptionsConfiguratio public ItalianDateExtractorConfiguration(IDateTimeOptionsConfiguration config) : base(config) { + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + IntegerExtractor = Number.Italian.IntegerExtractor.GetInstance(); OrdinalExtractor = Number.Italian.OrdinalExtractor.GetInstance(); - NumberParser = new BaseNumberParser(new ItalianNumberParserConfiguration(new BaseNumberOptionsConfiguration(config.Culture))); + NumberParser = new BaseNumberParser(new ItalianNumberParserConfiguration(numConfig)); + DurationExtractor = new BaseDurationExtractor(new ItalianDurationExtractorConfiguration(this)); UtilityConfiguration = new ItalianDatetimeUtilityConfiguration(); diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Extractors/ItalianDatePeriodExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Extractors/ItalianDatePeriodExtractorConfiguration.cs index fe8be3fbce..e3e5cf5018 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Extractors/ItalianDatePeriodExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Extractors/ItalianDatePeriodExtractorConfiguration.cs @@ -235,10 +235,20 @@ public ItalianDatePeriodExtractorConfiguration(IDateTimeOptionsConfiguration con : base(config) { DatePointExtractor = new BaseDateExtractor(new ItalianDateExtractorConfiguration(this)); + DurationExtractor = new BaseDurationExtractor(new ItalianDurationExtractorConfiguration(this)); + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + CardinalExtractor = Number.Italian.CardinalExtractor.GetInstance(); OrdinalExtractor = Number.Italian.OrdinalExtractor.GetInstance(); - DurationExtractor = new BaseDurationExtractor(new ItalianDurationExtractorConfiguration(this)); - NumberParser = new BaseNumberParser(new ItalianNumberParserConfiguration(new BaseNumberOptionsConfiguration(config.Culture))); + + NumberParser = new BaseNumberParser(new ItalianNumberParserConfiguration(numConfig)); } public IDateExtractor DatePointExtractor { get; } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Extractors/ItalianDateTimePeriodExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Extractors/ItalianDateTimePeriodExtractorConfiguration.cs index 2148a6fe84..0e08b3e30c 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Extractors/ItalianDateTimePeriodExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Extractors/ItalianDateTimePeriodExtractorConfiguration.cs @@ -2,6 +2,7 @@ using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Italian; +using Microsoft.Recognizers.Text.Number; using Microsoft.Recognizers.Text.Utilities; namespace Microsoft.Recognizers.Text.DateTime.Italian @@ -85,7 +86,16 @@ public ItalianDateTimePeriodExtractorConfiguration(IDateTimeOptionsConfiguration { TokenBeforeDate = DateTimeDefinitions.TokenBeforeDate; - CardinalExtractor = Number.English.CardinalExtractor.GetInstance(); + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + CardinalExtractor = Number.English.CardinalExtractor.GetInstance(numConfig); + SingleDateExtractor = new BaseDateExtractor(new ItalianDateExtractorConfiguration(this)); SingleTimeExtractor = new BaseTimeExtractor(new ItalianTimeExtractorConfiguration(this)); SingleDateTimeExtractor = new BaseDateTimeExtractor(new ItalianDateTimeExtractorConfiguration(this)); diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Parsers/ItalianCommonDateTimeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Parsers/ItalianCommonDateTimeParserConfiguration.cs index 8e7389562d..c9818d4021 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Parsers/ItalianCommonDateTimeParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Parsers/ItalianCommonDateTimeParserConfiguration.cs @@ -25,11 +25,20 @@ public ItalianCommonDateTimeParserConfiguration(IDateTimeOptionsConfiguration co WrittenDecades = DateTimeDefinitions.WrittenDecades.ToImmutableDictionary(); SpecialDecadeCases = DateTimeDefinitions.SpecialDecadeCases.ToImmutableDictionary(); + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + CardinalExtractor = Number.Italian.CardinalExtractor.GetInstance(); IntegerExtractor = Number.Italian.IntegerExtractor.GetInstance(); OrdinalExtractor = Number.Italian.OrdinalExtractor.GetInstance(); - NumberParser = new BaseNumberParser(new ItalianNumberParserConfiguration(new BaseNumberOptionsConfiguration(config.Culture))); + NumberParser = new BaseNumberParser(new ItalianNumberParserConfiguration(numConfig)); + DateExtractor = new BaseDateExtractor(new ItalianDateExtractorConfiguration(this)); TimeExtractor = new BaseTimeExtractor(new ItalianTimeExtractorConfiguration(this)); DateTimeExtractor = new BaseDateTimeExtractor(new ItalianDateTimeExtractorConfiguration(this)); diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Parsers/ItalianDurationParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Parsers/ItalianDurationParserConfiguration.cs index 668f11ed6d..6d91812ccc 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Parsers/ItalianDurationParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Parsers/ItalianDurationParserConfiguration.cs @@ -39,7 +39,7 @@ public ItalianDurationParserConfiguration(ICommonDateTimeParserConfiguration con public IExtractor CardinalExtractor { get; } - public IExtractor DurationExtractor { get; } + public IDateTimeExtractor DurationExtractor { get; } public IParser NumberParser { get; } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Parsers/JapaneseDateParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Parsers/JapaneseDateParserConfiguration.cs index 7ed5c1c0b6..c26ec73a7d 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Parsers/JapaneseDateParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Parsers/JapaneseDateParserConfiguration.cs @@ -23,9 +23,18 @@ public class JapaneseDateParserConfiguration : IDateTimeParser public JapaneseDateParserConfiguration(JapaneseDateTimeParserConfiguration configuration) { config = configuration; + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + integerExtractor = new IntegerExtractor(); durationExtractor = new JapaneseDurationExtractorConfiguration(); - numberParser = new BaseCJKNumberParser(new JapaneseNumberParserConfiguration(new BaseNumberOptionsConfiguration(config.Culture))); + numberParser = new BaseCJKNumberParser(new JapaneseNumberParserConfiguration(numConfig)); } public ParseResult Parse(ExtractResult extResult) diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Parsers/JapaneseDatePeriodParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Parsers/JapaneseDatePeriodParserConfiguration.cs index 98d67a1058..7d46bbcba7 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Parsers/JapaneseDatePeriodParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Parsers/JapaneseDatePeriodParserConfiguration.cs @@ -18,17 +18,27 @@ public class JapaneseDatePeriodParserConfiguration : IDateTimeParser private static readonly IExtractor IntegerExtractor = new IntegerExtractor(); - private static readonly IParser IntegerParser = new BaseCJKNumberParser(new JapaneseNumberParserConfiguration(new BaseNumberOptionsConfiguration(Culture.Japanese))); - private static readonly IDateTimeExtractor DurationExtractor = new JapaneseDurationExtractorConfiguration(); private static readonly Calendar Cal = DateTimeFormatInfo.InvariantInfo.Calendar; + private readonly IParser integerParser; + private readonly IFullDateTimeParserConfiguration config; public JapaneseDatePeriodParserConfiguration(IFullDateTimeParserConfiguration configuration) { config = configuration; + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + integerParser = new BaseCJKNumberParser(new JapaneseNumberParserConfiguration(numConfig)); } public ParseResult Parse(ExtractResult extResult) @@ -157,8 +167,25 @@ public List FilterResults(string query, List= 0) { - build.Append(timeResult.Hour.ToString("D2")); + build.Append(timeResult.Hour.ToString("D2", CultureInfo.InvariantCulture)); } if (timeResult.Minute >= 0) { - build.Append(":" + timeResult.Minute.ToString("D2")); + build.Append(":" + timeResult.Minute.ToString("D2", CultureInfo.InvariantCulture)); } if (timeResult.Second >= 0) { - build.Append(":" + timeResult.Second.ToString("D2")); + build.Append(":" + timeResult.Second.ToString("D2", CultureInfo.InvariantCulture)); } return build.ToString(); @@ -539,7 +550,7 @@ private DateTimeResolutionResult ParseNumberWithUnit(string text, DateObject ref var ers = CardinalExtractor.Extract(text); if (ers.Count == 1) { - var pr = CardinalParser.Parse(ers[0]); + var pr = cardinalParser.Parse(ers[0]); var srcUnit = text.Substring(ers[0].Start + ers[0].Length ?? 0).Trim(); if (srcUnit.StartsWith("个")) { diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Parsers/JapaneseHolidayParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Parsers/JapaneseHolidayParserConfiguration.cs index 09e0d8da3c..a71012ded5 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Parsers/JapaneseHolidayParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Parsers/JapaneseHolidayParserConfiguration.cs @@ -60,13 +60,23 @@ public class JapaneseHolidayParserConfiguration : IDateTimeParser private static readonly IExtractor IntegerExtractor = new IntegerExtractor(); - private static readonly IParser IntegerParser = new BaseCJKNumberParser(new JapaneseNumberParserConfiguration(new BaseNumberOptionsConfiguration(Culture.Japanese))); + private readonly IParser integerParser; private readonly IFullDateTimeParserConfiguration config; public JapaneseHolidayParserConfiguration(IFullDateTimeParserConfiguration configuration) { config = configuration; + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + integerParser = new BaseCJKNumberParser(new JapaneseNumberParserConfiguration(numConfig)); } public ParseResult Parse(ExtractResult extResult) @@ -120,115 +130,6 @@ public List FilterResults(string query, List= 90) - { - year += 1900; - } - else if (year < 20) - { - year += 2000; - } - - if (!string.IsNullOrEmpty(holidayStr)) - { - DateObject value; - string timexStr; - if (FixedHolidaysDict.ContainsKey(holidayStr)) - { - value = FixedHolidaysDict[holidayStr](year); - timexStr = $"-{value.Month:D2}-{value.Day:D2}"; - } - else - { - if (HolidayFuncDict.ContainsKey(holidayStr)) - { - value = HolidayFuncDict[holidayStr](year); - timexStr = NoFixedTimex[holidayStr]; - } - else - { - return ret; - } - } - - if (hasYear) - { - ret.Timex = year.ToString("D4") + timexStr; - ret.FutureValue = ret.PastValue = DateObject.MinValue.SafeCreateFromValue(year, value.Month, value.Day); - ret.Success = true; - return ret; - } - - ret.Timex = "XXXX" + timexStr; - ret.FutureValue = GetFutureValue(value, referenceDate, holidayStr); - ret.PastValue = GetPastValue(value, referenceDate, holidayStr); - ret.Success = true; - return ret; - } - - return ret; - } - private static DateObject GetFutureValue(DateObject value, DateObject referenceDate, string holiday) { if (value < referenceDate) @@ -372,7 +273,116 @@ where DateObject.MinValue.SafeCreateFromValue(year, 11, day).DayOfWeek == DayOfW select day).ElementAt(3)); } - private static int ConvertJapaneseToInteger(string yearJapStr) + private DateTimeResolutionResult ParseHolidayRegexMatch(string text, DateObject referenceDate) + { + foreach (var regex in JapaneseHolidayExtractorConfiguration.HolidayRegexList) + { + var match = regex.MatchExact(text, trim: true); + + if (match.Success) + { + // Value string will be set in Match2Date method + var ret = Match2Date(match.Match, referenceDate); + return ret; + } + } + + return new DateTimeResolutionResult(); + } + + private DateTimeResolutionResult Match2Date(Match match, DateObject referenceDate) + { + var ret = new DateTimeResolutionResult(); + var holidayStr = match.Groups["holiday"].Value; + + var year = referenceDate.Year; + var hasYear = false; + var yearNum = match.Groups["year"].Value; + var yearJap = match.Groups["yearJap"].Value; + var yearRel = match.Groups["yearrel"].Value; + if (!string.IsNullOrEmpty(yearNum)) + { + hasYear = true; + if (yearNum.EndsWith("年")) + { + yearNum = yearNum.Substring(0, yearNum.Length - 1); + } + + year = int.Parse(yearNum); + } + else if (!string.IsNullOrEmpty(yearJap)) + { + hasYear = true; + if (yearJap.EndsWith("年")) + { + yearJap = yearJap.Substring(0, yearJap.Length - 1); + } + + year = ConvertJapaneseToInteger(yearJap); + } + else if (!string.IsNullOrEmpty(yearRel)) + { + hasYear = true; + if (yearRel.EndsWith("前年") || yearRel.EndsWith("先年")) + { + year--; + } + else if (yearRel.EndsWith("来年")) + { + year++; + } + } + + if (year < 100 && year >= 90) + { + year += 1900; + } + else if (year < 20) + { + year += 2000; + } + + if (!string.IsNullOrEmpty(holidayStr)) + { + DateObject value; + string timexStr; + if (FixedHolidaysDict.ContainsKey(holidayStr)) + { + value = FixedHolidaysDict[holidayStr](year); + timexStr = $"-{value.Month:D2}-{value.Day:D2}"; + } + else + { + if (HolidayFuncDict.ContainsKey(holidayStr)) + { + value = HolidayFuncDict[holidayStr](year); + timexStr = NoFixedTimex[holidayStr]; + } + else + { + return ret; + } + } + + if (hasYear) + { + ret.Timex = year.ToString("D4") + timexStr; + ret.FutureValue = ret.PastValue = DateObject.MinValue.SafeCreateFromValue(year, value.Month, value.Day); + ret.Success = true; + return ret; + } + + ret.Timex = "XXXX" + timexStr; + ret.FutureValue = GetFutureValue(value, referenceDate, holidayStr); + ret.PastValue = GetPastValue(value, referenceDate, holidayStr); + ret.Success = true; + return ret; + } + + return ret; + } + + private int ConvertJapaneseToInteger(string yearJapStr) { var year = 0; var num = 0; @@ -382,7 +392,7 @@ private static int ConvertJapaneseToInteger(string yearJapStr) { if (er[0].Type.Equals(Number.Constants.SYS_NUM_INTEGER, StringComparison.Ordinal)) { - num = Convert.ToInt32((double)(IntegerParser.Parse(er[0]).Value ?? 0)); + num = Convert.ToInt32((double)(integerParser.Parse(er[0]).Value ?? 0)); } } @@ -397,7 +407,7 @@ private static int ConvertJapaneseToInteger(string yearJapStr) { if (er[0].Type.Equals(Number.Constants.SYS_NUM_INTEGER, StringComparison.Ordinal)) { - num += Convert.ToInt32((double)(IntegerParser.Parse(er[0]).Value ?? 0)); + num += Convert.ToInt32((double)(integerParser.Parse(er[0]).Value ?? 0)); } } } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Microsoft.Recognizers.Text.DateTime.csproj b/.NET/Microsoft.Recognizers.Text.DateTime/Microsoft.Recognizers.Text.DateTime.csproj index eb8be9a7c0..f8d566b66a 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Microsoft.Recognizers.Text.DateTime.csproj +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Microsoft.Recognizers.Text.DateTime.csproj @@ -1,13 +1,14 @@  - netstandard2.0;net462;net452;net45 + netstandard2.0;net462 false false ../Recognizers-Text.ruleset - + + $(OutputPath)$(AssemblyName).xml $(NoWarn),1573,1591,1712 - + + all @@ -34,7 +36,7 @@ - + @@ -44,4 +46,5 @@ + diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Microsoft.Recognizers.Text.DateTime.nuspec b/.NET/Microsoft.Recognizers.Text.DateTime/Microsoft.Recognizers.Text.DateTime.nuspec index 0f2bbbe910..caecf56a7b 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Microsoft.Recognizers.Text.DateTime.nuspec +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Microsoft.Recognizers.Text.DateTime.nuspec @@ -21,8 +21,6 @@ - - diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Microsoft.Recognizers.Text.DateTime.xml b/.NET/Microsoft.Recognizers.Text.DateTime/Microsoft.Recognizers.Text.DateTime.xml index 66cc7b9d1b..c75e80923f 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Microsoft.Recognizers.Text.DateTime.xml +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Microsoft.Recognizers.Text.DateTime.xml @@ -79,6 +79,11 @@ ExtendedTypes + + + NoProtoCache + + FailFast, mode that aborts extraction/tagging quickly for non-entity cases. May be removed later. diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseDateParser.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseDateParser.cs index dacb75e677..903c2b7d2d 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseDateParser.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseDateParser.cs @@ -1,6 +1,7 @@ using System; using System.Collections.Generic; using System.Collections.Immutable; +using System.Globalization; using System.Linq; using System.Text.RegularExpressions; using Microsoft.Recognizers.Text.Utilities; @@ -737,7 +738,7 @@ private DateTimeResolutionResult Match2Date(Match match, DateObject referenceDat day = this.config.DayOfMonth[dayStr]; if (!string.IsNullOrEmpty(yearStr)) { - year = int.Parse(yearStr); + year = int.Parse(yearStr, CultureInfo.InvariantCulture); if (year < 100 && year >= Constants.MinTwoDigitYearPastNum) { year += 1900; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseDatePeriodParser.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseDatePeriodParser.cs index c621d9861f..b6482440a6 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseDatePeriodParser.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseDatePeriodParser.cs @@ -186,7 +186,7 @@ private static ModAndDateResult GetModAndDate(DateObject beginDate, DateObject e { DateObject beginDateResult = beginDate; DateObject endDateResult = endDate; - var isBusinessDay = timex.EndsWith(Constants.TimexBusinessDay); + var isBusinessDay = timex.EndsWith(Constants.TimexBusinessDay, StringComparison.Ordinal); var businessDayCount = 0; List dateList = null; @@ -607,7 +607,7 @@ private DateTimeResolutionResult ParseDatePointWithAgoAndLater(string text, Date } var pr = this.config.DateParser.Parse(er, referenceDate); - var durationExtractionResult = this.config.DurationExtractor.Extract(er.Text).FirstOrDefault(); + var durationExtractionResult = this.config.DurationExtractor.Extract(er.Text, referenceDate).FirstOrDefault(); if (durationExtractionResult != null) { diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseDateTimePeriodParser.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseDateTimePeriodParser.cs index aafc2cd8bf..954872c2a2 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseDateTimePeriodParser.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseDateTimePeriodParser.cs @@ -283,14 +283,14 @@ protected virtual DateTimeResolutionResult ParseSpecificTimeOfDay(string text, D var afterStr = trimmedText.Substring(match.Index + match.Length).Trim(); // Eliminate time period, if any - var timePeriodErs = this.Config.TimePeriodExtractor.Extract(beforeStr); + var timePeriodErs = this.Config.TimePeriodExtractor.Extract(beforeStr, referenceTime); if (timePeriodErs.Count > 0) { beforeStr = beforeStr.Remove(timePeriodErs[0].Start ?? 0, timePeriodErs[0].Length ?? 0).Trim(); } else { - timePeriodErs = this.Config.TimePeriodExtractor.Extract(afterStr); + timePeriodErs = this.Config.TimePeriodExtractor.Extract(afterStr, referenceTime); if (timePeriodErs.Count > 0) { afterStr = afterStr.Remove(timePeriodErs[0].Start ?? 0, timePeriodErs[0].Length ?? 0).Trim(); @@ -420,8 +420,8 @@ private DateTimeResolutionResult ParseDateWithTimePeriodSuffix(string text, Date { var ret = new DateTimeResolutionResult(); - var dateEr = this.Config.DateExtractor.Extract(text).FirstOrDefault(); - var timeEr = this.Config.TimeExtractor.Extract(text).FirstOrDefault(); + var dateEr = this.Config.DateExtractor.Extract(text, referenceTime).FirstOrDefault(); + var timeEr = this.Config.TimeExtractor.Extract(text, referenceTime).FirstOrDefault(); if (dateEr != null && timeEr != null) { @@ -511,7 +511,7 @@ private DateTimeResolutionResult ParseDateWithPeriodPrefix(string text, DateObje { var ret = new DateTimeResolutionResult(); - var dateResult = this.Config.DateExtractor.Extract(text); + var dateResult = this.Config.DateExtractor.Extract(text, referenceTime); if (dateResult.Count > 0) { var beforeString = text.Substring(0, (int)dateResult.Last().Start).TrimEnd(); diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseDurationParser.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseDurationParser.cs index 43f930f986..917ac0db01 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseDurationParser.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseDurationParser.cs @@ -393,7 +393,7 @@ private DateTimeResolutionResult ParseMergedDuration(string text, DateObject ref var durationExtractor = this.config.DurationExtractor; // DurationExtractor without parameter will not extract merged duration - var ers = durationExtractor.Extract(text); + var ers = durationExtractor.Extract(text, referenceTime); // only handle merged duration cases like "1 month 21 days" if (ers.Count <= 1) diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseMergedDateTimeParser.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseMergedDateTimeParser.cs index d805fef4bb..cd30ad49f8 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseMergedDateTimeParser.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseMergedDateTimeParser.cs @@ -13,7 +13,6 @@ public class BaseMergedDateTimeParser : IDateTimeParser public static readonly string DateMinString = DateTimeFormatUtil.FormatDate(DateObject.MinValue); public static readonly string DateTimeMinString = DateTimeFormatUtil.FormatDateTime(DateObject.MinValue); - private static readonly Calendar Cal = DateTimeFormatInfo.InvariantInfo.Calendar; public BaseMergedDateTimeParser(IMergedParserConfiguration configuration) { @@ -39,8 +38,10 @@ public static void AddAltSingleDateTimeToResolution(Dictionary r } } - public static void AddSingleDateTimeToResolution(Dictionary resolutionDic, string type, string mod, Dictionary res) + public static void AddSingleDateTimeToResolution(Dictionary resolutionDic, string type, string mod, + Dictionary res) { + // If an "invalid" Date or DateTime is extracted, it should not have an assigned resolution. // Only valid entities should pass this condition. if (resolutionDic.ContainsKey(type) && @@ -77,7 +78,8 @@ public static void AddSingleDateTimeToResolution(Dictionary reso } } - public static void AddPeriodToResolution(Dictionary resolutionDic, string startType, string endType, string mod, Dictionary res) + public static void AddPeriodToResolution(Dictionary resolutionDic, string startType, string endType, string mod, + Dictionary res) { var start = string.Empty; var end = string.Empty; @@ -150,8 +152,10 @@ public static void AddPeriodToResolution(Dictionary resolutionDi } } - public static string GenerateEndInclusiveTimex(string originalTimex, DatePeriodTimexType datePeriodTimexType, DateObject startDate, DateObject endDate) + public static string GenerateEndInclusiveTimex(string originalTimex, DatePeriodTimexType datePeriodTimexType, + DateObject startDate, DateObject endDate) { + var timexEndInclusive = TimexUtility.GenerateDatePeriodTimex(startDate, endDate, datePeriodTimexType); // Sometimes the original timex contains fuzzy part like "XXXX-05-31" @@ -200,10 +204,11 @@ public static DateTimeParseResult SetInclusivePeriodEnd(DateTimeParseResult slot if (values.ContainsKey(DateTimeResolutionKey.Start) && values.ContainsKey(DateTimeResolutionKey.End) && values.ContainsKey(DateTimeResolutionKey.Timex)) { - var startDate = DateObject.Parse(values[DateTimeResolutionKey.Start]); - var endDate = DateObject.Parse(values[DateTimeResolutionKey.End]); + var startDate = DateObject.Parse(values[DateTimeResolutionKey.Start], CultureInfo.InvariantCulture); + var endDate = DateObject.Parse(values[DateTimeResolutionKey.End], CultureInfo.InvariantCulture); var durationStr = timexComponents[2]; var datePeriodTimexType = TimexUtility.GetDatePeriodTimexType(durationStr); + endDate = TimexUtility.OffsetDateObject(endDate, offset: 1, timexType: datePeriodTimexType); values[DateTimeResolutionKey.End] = DateTimeFormatUtil.LuisDate(endDate); values[DateTimeResolutionKey.Timex] = @@ -245,7 +250,28 @@ public static void AddAltPeriodToResolution(Dictionary resolutio public static bool AreUnresolvedDates(string startDate, string endDate) { return string.IsNullOrEmpty(startDate) || string.IsNullOrEmpty(endDate) || - startDate.StartsWith(DateMinString, StringComparison.Ordinal) || endDate.StartsWith(DateMinString, StringComparison.Ordinal); + startDate.StartsWith(DateMinString, StringComparison.Ordinal) || + endDate.StartsWith(DateMinString, StringComparison.Ordinal); + } + + public static string DetermineSourceEntityType(string sourceType, string newType, bool hasMod) + { + if (!hasMod) + { + return null; + } + + if (!newType.Equals(sourceType, StringComparison.Ordinal)) + { + return Constants.SYS_DATETIME_DATETIMEPOINT; + } + + if (newType.Equals(Constants.SYS_DATETIME_DATEPERIOD, StringComparison.Ordinal)) + { + return Constants.SYS_DATETIME_DATETIMEPERIOD; + } + + return null; } public ParseResult Parse(ExtractResult er) @@ -365,9 +391,10 @@ public DateTimeParseResult Parse(ExtractResult er, DateObject refTime) er.Text = matchIsAfter ? er.Text.Substring(0, (int)er.Length) : er.Text.Substring(equalMatch.Length); modStr = equalMatch.Value; } - else if ((er.Type.Equals(Constants.SYS_DATETIME_DATEPERIOD, StringComparison.Ordinal) && Config.YearRegex.Match(er.Text).Success) || - er.Type.Equals(Constants.SYS_DATETIME_DATE, StringComparison.Ordinal) || - er.Type.Equals(Constants.SYS_DATETIME_TIME, StringComparison.Ordinal)) + else if ((er.Type.Equals(Constants.SYS_DATETIME_DATEPERIOD, StringComparison.Ordinal) && + Config.YearRegex.Match(er.Text).Success) || + er.Type.Equals(Constants.SYS_DATETIME_DATE, StringComparison.Ordinal) || + er.Type.Equals(Constants.SYS_DATETIME_TIME, StringComparison.Ordinal)) { // This has to be put at the end of the if, or cases like "before 2012" and "after 2012" would fall into this // 2012 or after/above @@ -552,26 +579,6 @@ public string DetermineDateTimeType(string type, bool hasMod) return type; } - public string DetermineSourceEntityType(string sourceType, string newType, bool hasMod) - { - if (!hasMod) - { - return null; - } - - if (!newType.Equals(sourceType, StringComparison.Ordinal)) - { - return Constants.SYS_DATETIME_DATETIMEPOINT; - } - - if (newType.Equals(Constants.SYS_DATETIME_DATEPERIOD, StringComparison.Ordinal)) - { - return Constants.SYS_DATETIME_DATETIMEPERIOD; - } - - return null; - } - public List DateTimeResolutionForSplit(DateTimeParseResult slot) { var results = new List(); @@ -637,7 +644,7 @@ public SortedDictionary DateTimeResolution(DateTimeParseResult s AddResolutionFields(res, Constants.Comment, comment); AddResolutionFields(res, DateTimeResolutionKey.Mod, mod); AddResolutionFields(res, ResolutionKey.Type, typeOutput); - AddResolutionFields(res, DateTimeResolutionKey.IsLunar, isLunar ? isLunar.ToString() : string.Empty); + AddResolutionFields(res, DateTimeResolutionKey.IsLunar, isLunar ? isLunar.ToString(CultureInfo.InvariantCulture) : string.Empty); var hasTimeZone = false; @@ -651,7 +658,7 @@ public SortedDictionary DateTimeResolution(DateTimeParseResult s AddResolutionFields(res, Constants.ResolveTimeZone, new Dictionary { { ResolutionKey.Value, val.TimeZoneResolution.Value }, - { Constants.UtcOffsetMinsKey, val.TimeZoneResolution.UtcOffsetMins.ToString() }, + { Constants.UtcOffsetMinsKey, val.TimeZoneResolution.UtcOffsetMins.ToString(CultureInfo.InvariantCulture) }, }); } else @@ -660,7 +667,8 @@ public SortedDictionary DateTimeResolution(DateTimeParseResult s hasTimeZone = true; AddResolutionFields(res, Constants.TimeZone, val.TimeZoneResolution.Value); AddResolutionFields(res, Constants.TimeZoneText, val.TimeZoneResolution.TimeZoneText); - AddResolutionFields(res, Constants.UtcOffsetMinsKey, val.TimeZoneResolution.UtcOffsetMins.ToString()); + AddResolutionFields(res, Constants.UtcOffsetMinsKey, + val.TimeZoneResolution.UtcOffsetMins.ToString(CultureInfo.InvariantCulture)); } } @@ -727,7 +735,8 @@ public SortedDictionary DateTimeResolution(DateTimeParseResult s AddResolutionFields(value, DateTimeResolutionKey.Timex, timex); AddResolutionFields(value, DateTimeResolutionKey.Mod, mod); AddResolutionFields(value, ResolutionKey.Type, typeOutput); - AddResolutionFields(value, DateTimeResolutionKey.IsLunar, isLunar ? isLunar.ToString() : string.Empty); + AddResolutionFields(value, DateTimeResolutionKey.IsLunar, + isLunar ? isLunar.ToString(CultureInfo.InvariantCulture) : string.Empty); AddResolutionFields(value, DateTimeResolutionKey.List, list); AddResolutionFields(value, DateTimeResolutionKey.SourceEntity, sourceEntity); @@ -735,19 +744,13 @@ public SortedDictionary DateTimeResolution(DateTimeParseResult s { AddResolutionFields(value, Constants.TimeZone, val.TimeZoneResolution.Value); AddResolutionFields(value, Constants.TimeZoneText, val.TimeZoneResolution.TimeZoneText); - AddResolutionFields(value, Constants.UtcOffsetMinsKey, val.TimeZoneResolution.UtcOffsetMins.ToString()); + AddResolutionFields(value, Constants.UtcOffsetMinsKey, + val.TimeZoneResolution.UtcOffsetMins.ToString(CultureInfo.InvariantCulture)); } foreach (var q in dictionary) { - if (value.ContainsKey(q.Key)) - { - value[q.Key] = q.Value; - } - else - { - value.Add(q.Key, q.Value); - } + value[q.Key] = q.Value; } resolutions.Add(value); @@ -838,16 +841,18 @@ internal static void ResolveAmpm(Dictionary resolutionDic, strin case Constants.SYS_DATETIME_DATETIMEPERIOD: if (resolution.ContainsKey(DateTimeResolutionKey.Start)) { - var start = Convert.ToDateTime(resolution[DateTimeResolutionKey.Start]); - start = start.Hour == Constants.HalfDayHourCount ? start.AddHours(-Constants.HalfDayHourCount) : start.AddHours(Constants.HalfDayHourCount); + var start = Convert.ToDateTime(resolution[DateTimeResolutionKey.Start], CultureInfo.InvariantCulture); + start = start.Hour == Constants.HalfDayHourCount ? + start.AddHours(-Constants.HalfDayHourCount) : start.AddHours(Constants.HalfDayHourCount); resolutionPm[DateTimeResolutionKey.Start] = DateTimeFormatUtil.FormatDateTime(start); } if (resolution.ContainsKey(DateTimeResolutionKey.End)) { - var end = Convert.ToDateTime(resolution[DateTimeResolutionKey.End]); - end = end.Hour == Constants.HalfDayHourCount ? end.AddHours(-Constants.HalfDayHourCount) : end.AddHours(Constants.HalfDayHourCount); + var end = Convert.ToDateTime(resolution[DateTimeResolutionKey.End], CultureInfo.InvariantCulture); + end = end.Hour == Constants.HalfDayHourCount ? + end.AddHours(-Constants.HalfDayHourCount) : end.AddHours(Constants.HalfDayHourCount); resolutionPm[DateTimeResolutionKey.End] = DateTimeFormatUtil.FormatDateTime(end); } @@ -866,7 +871,7 @@ internal static void ResolveWeekOf(Dictionary resolutionDic, str { var resolution = (Dictionary)resolutionDic[keyName]; - var monday = DateObject.Parse(resolution[DateTimeResolutionKey.Start]); + var monday = DateObject.Parse(resolution[DateTimeResolutionKey.Start], CultureInfo.InvariantCulture); resolution[DateTimeResolutionKey.Timex] = DateTimeFormatUtil.ToIsoWeekTimex(monday); resolutionDic.Remove(keyName); diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseTimePeriodParser.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseTimePeriodParser.cs index 10157912bf..e108ac4980 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseTimePeriodParser.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseTimePeriodParser.cs @@ -339,7 +339,7 @@ private DateTimeResolutionResult ParsePureNumCases(string text, DateObject refer } // Try to get the timezone resolution - var timeErs = config.TimeExtractor.Extract(trimmedText); + var timeErs = config.TimeExtractor.Extract(trimmedText, referenceTime); foreach (var er in timeErs) { var pr = config.TimeParser.Parse(er, referenceTime); diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/FullDateTimeParser.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/FullDateTimeParser.cs index 985742d1a1..827d7fc0b6 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/FullDateTimeParser.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/FullDateTimeParser.cs @@ -17,7 +17,8 @@ public FullDateTimeParser(IFullDateTimeParserConfiguration configuration) config = configuration; } - public static void AddSingleDateTimeToResolution(Dictionary resolutionDic, string type, string mod, Dictionary res) + public static void AddSingleDateTimeToResolution(Dictionary resolutionDic, string type, + string mod, Dictionary res) { if (resolutionDic.ContainsKey(type)) { @@ -40,7 +41,8 @@ public static void AddSingleDateTimeToResolution(Dictionary reso } } - public static void AddPeriodToResolution(Dictionary resolutionDic, string startType, string endType, string mod, Dictionary res) + public static void AddPeriodToResolution(Dictionary resolutionDic, string startType, string endType, + string mod, Dictionary res) { var start = string.Empty; var end = string.Empty; @@ -116,6 +118,26 @@ public static string DetermineDateTimeType(string type, bool hasRangeChangingMod return type; } + public static string DetermineSourceEntityType(string sourceType, string newType, bool hasMod) + { + if (!hasMod) + { + return null; + } + + if (!newType.Equals(sourceType, StringComparison.Ordinal)) + { + return Constants.SYS_DATETIME_DATETIMEPOINT; + } + + if (newType.Equals(Constants.SYS_DATETIME_DATEPERIOD, StringComparison.Ordinal)) + { + return Constants.SYS_DATETIME_DATETIMEPERIOD; + } + + return null; + } + public ParseResult Parse(ExtractResult extResult) { return Parse(extResult, DateObject.Now); @@ -401,14 +423,7 @@ public SortedDictionary DateTimeResolution(DateTimeParseResult s foreach (var q in dictionary) { - if (value.ContainsKey(q.Key)) - { - value[q.Key] = q.Value; - } - else - { - value.Add(q.Key, q.Value); - } + value[q.Key] = q.Value; } resolutions.Add(value); @@ -441,26 +456,6 @@ public List FilterResults(string query, List resolutionDic, string keyName) { if (resolutionDic.ContainsKey(keyName)) diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/IDurationParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/IDurationParserConfiguration.cs index ebf4841dc3..b09bdf683c 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/IDurationParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/IDurationParserConfiguration.cs @@ -7,7 +7,7 @@ public interface IDurationParserConfiguration : IDateTimeOptionsConfiguration { IExtractor CardinalExtractor { get; } - IExtractor DurationExtractor { get; } + IDateTimeExtractor DurationExtractor { get; } IParser NumberParser { get; } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseDateExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseDateExtractorConfiguration.cs index f12be3acdd..29fec51694 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseDateExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseDateExtractorConfiguration.cs @@ -127,9 +127,19 @@ public class PortugueseDateExtractorConfiguration : BaseDateTimeOptionsConfigura public PortugueseDateExtractorConfiguration(IDateTimeOptionsConfiguration config) : base(config) { - IntegerExtractor = Number.Portuguese.IntegerExtractor.GetInstance(); - OrdinalExtractor = Number.Portuguese.OrdinalExtractor.GetInstance(); - NumberParser = new BaseNumberParser(new PortugueseNumberParserConfiguration(new BaseNumberOptionsConfiguration(config.Culture))); + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + IntegerExtractor = Number.Portuguese.IntegerExtractor.GetInstance(numConfig); + OrdinalExtractor = Number.Portuguese.OrdinalExtractor.GetInstance(numConfig); + NumberParser = new BaseNumberParser(new PortugueseNumberParserConfiguration(numConfig)); + DurationExtractor = new BaseDurationExtractor(new PortugueseDurationExtractorConfiguration(this)); UtilityConfiguration = new PortugueseDatetimeUtilityConfiguration(); diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseDatePeriodExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseDatePeriodExtractorConfiguration.cs index ea9057717c..58569faceb 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseDatePeriodExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseDatePeriodExtractorConfiguration.cs @@ -203,10 +203,20 @@ public PortugueseDatePeriodExtractorConfiguration(IDateTimeOptionsConfiguration : base(config) { DatePointExtractor = new BaseDateExtractor(new PortugueseDateExtractorConfiguration(this)); - CardinalExtractor = Number.Portuguese.CardinalExtractor.GetInstance(); - OrdinalExtractor = Number.Portuguese.OrdinalExtractor.GetInstance(); DurationExtractor = new BaseDurationExtractor(new PortugueseDurationExtractorConfiguration(this)); - NumberParser = new BaseNumberParser(new PortugueseNumberParserConfiguration(new BaseNumberOptionsConfiguration(config.Culture))); + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + CardinalExtractor = Number.Portuguese.CardinalExtractor.GetInstance(numConfig); + OrdinalExtractor = Number.Portuguese.OrdinalExtractor.GetInstance(numConfig); + + NumberParser = new BaseNumberParser(new PortugueseNumberParserConfiguration(numConfig)); } public IDateExtractor DatePointExtractor { get; } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseDateTimeExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseDateTimeExtractorConfiguration.cs index b901d63e28..3c2f61be07 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseDateTimeExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseDateTimeExtractorConfiguration.cs @@ -3,6 +3,7 @@ using Microsoft.Recognizers.Definitions.Portuguese; using Microsoft.Recognizers.Text.DateTime.Portuguese.Utilities; using Microsoft.Recognizers.Text.DateTime.Utilities; +using Microsoft.Recognizers.Text.Number; namespace Microsoft.Recognizers.Text.DateTime.Portuguese { @@ -69,7 +70,16 @@ public class PortugueseDateTimeExtractorConfiguration : BaseDateTimeOptionsConfi public PortugueseDateTimeExtractorConfiguration(IDateTimeOptionsConfiguration config) : base(config) { - IntegerExtractor = Number.Portuguese.IntegerExtractor.GetInstance(); + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + IntegerExtractor = Number.Portuguese.IntegerExtractor.GetInstance(numConfig); + DatePointExtractor = new BaseDateExtractor(new PortugueseDateExtractorConfiguration(this)); TimePointExtractor = new BaseTimeExtractor(new PortugueseTimeExtractorConfiguration(this)); DurationExtractor = new BaseDurationExtractor(new PortugueseDurationExtractorConfiguration(this)); diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseDateTimePeriodExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseDateTimePeriodExtractorConfiguration.cs index b8d5c1fabd..1a740df7e7 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseDateTimePeriodExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseDateTimePeriodExtractorConfiguration.cs @@ -1,6 +1,7 @@ using System.Collections.Generic; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Portuguese; +using Microsoft.Recognizers.Text.Number; namespace Microsoft.Recognizers.Text.DateTime.Portuguese { @@ -67,7 +68,15 @@ public PortugueseDateTimePeriodExtractorConfiguration(IDateTimeOptionsConfigurat { TokenBeforeDate = DateTimeDefinitions.TokenBeforeDate; - CardinalExtractor = Number.Portuguese.CardinalExtractor.GetInstance(); + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + CardinalExtractor = Number.Portuguese.CardinalExtractor.GetInstance(numConfig); SingleDateExtractor = new BaseDateExtractor(new PortugueseDateExtractorConfiguration(this)); SingleTimeExtractor = new BaseTimeExtractor(new PortugueseTimeExtractorConfiguration(this)); diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseDurationExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseDurationExtractorConfiguration.cs index fb2c71138e..04508bfc3f 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseDurationExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseDurationExtractorConfiguration.cs @@ -1,6 +1,7 @@ using System.Collections.Immutable; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Portuguese; +using Microsoft.Recognizers.Text.Number; namespace Microsoft.Recognizers.Text.DateTime.Portuguese { @@ -63,7 +64,16 @@ public class PortugueseDurationExtractorConfiguration : BaseDateTimeOptionsConfi public PortugueseDurationExtractorConfiguration(IDateTimeOptionsConfiguration config) : base(config) { - CardinalExtractor = Number.Portuguese.CardinalExtractor.GetInstance(); + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + CardinalExtractor = Number.Portuguese.CardinalExtractor.GetInstance(numConfig); + UnitMap = DateTimeDefinitions.UnitMap.ToImmutableDictionary(); UnitValueMap = DateTimeDefinitions.UnitValueMap.ToImmutableDictionary(); } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseMergedExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseMergedExtractorConfiguration.cs index 793c3b63e0..01a65c2db9 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseMergedExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseMergedExtractorConfiguration.cs @@ -3,6 +3,7 @@ using Microsoft.Recognizers.Definitions; using Microsoft.Recognizers.Definitions.Portuguese; using Microsoft.Recognizers.Text.Matcher; +using Microsoft.Recognizers.Text.Number; namespace Microsoft.Recognizers.Text.DateTime.Portuguese { @@ -64,8 +65,17 @@ public PortugueseMergedExtractorConfiguration(IDateTimeOptionsConfiguration conf SetExtractor = new BaseSetExtractor(new PortugueseSetExtractorConfiguration(this)); HolidayExtractor = new BaseHolidayExtractor(new PortugueseHolidayExtractorConfiguration(this)); TimeZoneExtractor = new BaseTimeZoneExtractor(new PortugueseTimeZoneExtractorConfiguration(this)); - IntegerExtractor = Number.Portuguese.IntegerExtractor.GetInstance(); DateTimeAltExtractor = new BaseDateTimeAltExtractor(new PortugueseDateTimeAltExtractorConfiguration(this)); + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + IntegerExtractor = Number.Portuguese.IntegerExtractor.GetInstance(numConfig); } public IDateExtractor DateExtractor { get; } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseTimePeriodExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseTimePeriodExtractorConfiguration.cs index 4b052e70fb..38880f81f1 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseTimePeriodExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseTimePeriodExtractorConfiguration.cs @@ -4,6 +4,7 @@ using Microsoft.Recognizers.Definitions.Portuguese; using Microsoft.Recognizers.Text.DateTime.Portuguese.Utilities; using Microsoft.Recognizers.Text.DateTime.Utilities; +using Microsoft.Recognizers.Text.Number; namespace Microsoft.Recognizers.Text.DateTime.Portuguese { @@ -62,7 +63,17 @@ public PortugueseTimePeriodExtractorConfiguration(IDateTimeOptionsConfiguration TokenBeforeDate = DateTimeDefinitions.TokenBeforeDate; SingleTimeExtractor = new BaseTimeExtractor(new PortugueseTimeExtractorConfiguration(this)); UtilityConfiguration = new PortugueseDatetimeUtilityConfiguration(); - IntegerExtractor = Number.English.IntegerExtractor.GetInstance(); + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + IntegerExtractor = Number.English.IntegerExtractor.GetInstance(numConfig); + TimeZoneExtractor = new BaseTimeZoneExtractor(new PortugueseTimeZoneExtractorConfiguration(this)); } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Parsers/PortugueseCommonDateTimeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Parsers/PortugueseCommonDateTimeParserConfiguration.cs index 1c20072293..2f9a69f3e5 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Parsers/PortugueseCommonDateTimeParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Parsers/PortugueseCommonDateTimeParserConfiguration.cs @@ -26,11 +26,20 @@ public PortugueseCommonDateTimeParserConfiguration(IDateTimeOptionsConfiguration WrittenDecades = DateTimeDefinitions.WrittenDecades.ToImmutableDictionary(); SpecialDecadeCases = DateTimeDefinitions.SpecialDecadeCases.ToImmutableDictionary(); - CardinalExtractor = Number.Portuguese.CardinalExtractor.GetInstance(); - IntegerExtractor = Number.Portuguese.IntegerExtractor.GetInstance(); - OrdinalExtractor = Number.Portuguese.OrdinalExtractor.GetInstance(); + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + CardinalExtractor = Number.Portuguese.CardinalExtractor.GetInstance(numConfig); + IntegerExtractor = Number.Portuguese.IntegerExtractor.GetInstance(numConfig); + OrdinalExtractor = Number.Portuguese.OrdinalExtractor.GetInstance(numConfig); + + NumberParser = new BaseNumberParser(new PortugueseNumberParserConfiguration(numConfig)); - NumberParser = new BaseNumberParser(new PortugueseNumberParserConfiguration(new BaseNumberOptionsConfiguration(config.Culture))); DateExtractor = new BaseDateExtractor(new PortugueseDateExtractorConfiguration(this)); TimeExtractor = new BaseTimeExtractor(new PortugueseTimeExtractorConfiguration(this)); DateTimeExtractor = new BaseDateTimeExtractor(new PortugueseDateTimeExtractorConfiguration(this)); diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Parsers/PortugueseDurationParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Parsers/PortugueseDurationParserConfiguration.cs index 495ddf56ea..562120554b 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Parsers/PortugueseDurationParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Parsers/PortugueseDurationParserConfiguration.cs @@ -37,7 +37,7 @@ public PortugueseDurationParserConfiguration(ICommonDateTimeParserConfiguration public IParser NumberParser { get; } - public IExtractor DurationExtractor { get; } + public IDateTimeExtractor DurationExtractor { get; } public Regex NumberCombinedWithUnit { get; } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishDateExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishDateExtractorConfiguration.cs index f678c40728..e44819c60a 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishDateExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishDateExtractorConfiguration.cs @@ -127,9 +127,19 @@ public class SpanishDateExtractorConfiguration : BaseDateTimeOptionsConfiguratio public SpanishDateExtractorConfiguration(IDateTimeOptionsConfiguration config) : base(config) { - IntegerExtractor = Number.Spanish.IntegerExtractor.GetInstance(); - OrdinalExtractor = Number.Spanish.OrdinalExtractor.GetInstance(); - NumberParser = new BaseNumberParser(new SpanishNumberParserConfiguration(new BaseNumberOptionsConfiguration(config.Culture))); + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + IntegerExtractor = Number.Spanish.IntegerExtractor.GetInstance(numConfig); + OrdinalExtractor = Number.Spanish.OrdinalExtractor.GetInstance(numConfig); + NumberParser = new BaseNumberParser(new SpanishNumberParserConfiguration(numConfig)); + DurationExtractor = new BaseDurationExtractor(new SpanishDurationExtractorConfiguration(this)); UtilityConfiguration = new SpanishDatetimeUtilityConfiguration(); diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishDatePeriodExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishDatePeriodExtractorConfiguration.cs index 9b84f67af1..d595824f37 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishDatePeriodExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishDatePeriodExtractorConfiguration.cs @@ -205,10 +205,20 @@ public SpanishDatePeriodExtractorConfiguration(IDateTimeOptionsConfiguration con : base(config) { DatePointExtractor = new BaseDateExtractor(new SpanishDateExtractorConfiguration(this)); - CardinalExtractor = Number.Spanish.CardinalExtractor.GetInstance(); - OrdinalExtractor = Number.Spanish.OrdinalExtractor.GetInstance(); DurationExtractor = new BaseDurationExtractor(new SpanishDurationExtractorConfiguration(this)); - NumberParser = new BaseNumberParser(new SpanishNumberParserConfiguration(new BaseNumberOptionsConfiguration(config.Culture))); + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + CardinalExtractor = Number.Spanish.CardinalExtractor.GetInstance(numConfig); + OrdinalExtractor = Number.Spanish.OrdinalExtractor.GetInstance(numConfig); + + NumberParser = new BaseNumberParser(new SpanishNumberParserConfiguration(numConfig)); } public IDateExtractor DatePointExtractor { get; } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishDateTimeExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishDateTimeExtractorConfiguration.cs index a68d55a162..3abef437d2 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishDateTimeExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishDateTimeExtractorConfiguration.cs @@ -3,6 +3,7 @@ using Microsoft.Recognizers.Definitions.Spanish; using Microsoft.Recognizers.Text.DateTime.Spanish.Utilities; using Microsoft.Recognizers.Text.DateTime.Utilities; +using Microsoft.Recognizers.Text.Number; namespace Microsoft.Recognizers.Text.DateTime.Spanish { @@ -69,7 +70,16 @@ public class SpanishDateTimeExtractorConfiguration : BaseDateTimeOptionsConfigur public SpanishDateTimeExtractorConfiguration(IDateTimeOptionsConfiguration config) : base(config) { - IntegerExtractor = Number.Spanish.IntegerExtractor.GetInstance(); + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + IntegerExtractor = Number.Spanish.IntegerExtractor.GetInstance(numConfig); + DatePointExtractor = new BaseDateExtractor(new SpanishDateExtractorConfiguration(this)); TimePointExtractor = new BaseTimeExtractor(new SpanishTimeExtractorConfiguration(this)); DurationExtractor = new BaseDurationExtractor(new SpanishDurationExtractorConfiguration(this)); @@ -119,9 +129,9 @@ public SpanishDateTimeExtractorConfiguration(IDateTimeOptionsConfiguration confi public bool IsConnector(string text) { text = text.Trim(); - return string.IsNullOrEmpty(text) - || PrepositionRegex.IsMatch(text) - || ConnectorRegex.IsMatch(text); + return string.IsNullOrEmpty(text) || + PrepositionRegex.IsMatch(text) || + ConnectorRegex.IsMatch(text); } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishDateTimePeriodExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishDateTimePeriodExtractorConfiguration.cs index d0878ad429..0835e25fcb 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishDateTimePeriodExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishDateTimePeriodExtractorConfiguration.cs @@ -1,6 +1,7 @@ using System.Collections.Generic; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Spanish; +using Microsoft.Recognizers.Text.Number; namespace Microsoft.Recognizers.Text.DateTime.Spanish { @@ -68,7 +69,15 @@ public SpanishDateTimePeriodExtractorConfiguration(IDateTimeOptionsConfiguration { TokenBeforeDate = DateTimeDefinitions.TokenBeforeDate; - CardinalExtractor = Number.Spanish.CardinalExtractor.GetInstance(); + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + CardinalExtractor = Number.Spanish.CardinalExtractor.GetInstance(numConfig); SingleDateExtractor = new BaseDateExtractor(new SpanishDateExtractorConfiguration(this)); SingleTimeExtractor = new BaseTimeExtractor(new SpanishTimeExtractorConfiguration(this)); diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishDurationExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishDurationExtractorConfiguration.cs index a8dd368966..6d24759df5 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishDurationExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishDurationExtractorConfiguration.cs @@ -1,6 +1,7 @@ using System.Collections.Immutable; using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions.Spanish; +using Microsoft.Recognizers.Text.Number; namespace Microsoft.Recognizers.Text.DateTime.Spanish { @@ -63,7 +64,16 @@ public class SpanishDurationExtractorConfiguration : BaseDateTimeOptionsConfigur public SpanishDurationExtractorConfiguration(IDateTimeOptionsConfiguration config) : base(config) { - CardinalExtractor = Number.Spanish.CardinalExtractor.GetInstance(); + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + CardinalExtractor = Number.Spanish.CardinalExtractor.GetInstance(numConfig); + UnitMap = DateTimeDefinitions.UnitMap.ToImmutableDictionary(); UnitValueMap = DateTimeDefinitions.UnitValueMap.ToImmutableDictionary(); } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishMergedExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishMergedExtractorConfiguration.cs index fd17c5b50d..6e759efa09 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishMergedExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishMergedExtractorConfiguration.cs @@ -3,6 +3,7 @@ using Microsoft.Recognizers.Definitions; using Microsoft.Recognizers.Definitions.Spanish; using Microsoft.Recognizers.Text.Matcher; +using Microsoft.Recognizers.Text.Number; namespace Microsoft.Recognizers.Text.DateTime.Spanish { @@ -65,7 +66,16 @@ public SpanishMergedExtractorConfiguration(IDateTimeOptionsConfiguration config) DateTimeAltExtractor = new BaseDateTimeAltExtractor(new SpanishDateTimeAltExtractorConfiguration(this)); HolidayExtractor = new BaseHolidayExtractor(new SpanishHolidayExtractorConfiguration(this)); TimeZoneExtractor = new BaseTimeZoneExtractor(new SpanishTimeZoneExtractorConfiguration(this)); - IntegerExtractor = Number.Spanish.IntegerExtractor.GetInstance(); + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + IntegerExtractor = Number.Spanish.IntegerExtractor.GetInstance(numConfig); } public IDateExtractor DateExtractor { get; } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishTimePeriodExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishTimePeriodExtractorConfiguration.cs index 6f8b029f1b..9791bed008 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishTimePeriodExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishTimePeriodExtractorConfiguration.cs @@ -4,6 +4,7 @@ using Microsoft.Recognizers.Definitions.Spanish; using Microsoft.Recognizers.Text.DateTime.Spanish.Utilities; using Microsoft.Recognizers.Text.DateTime.Utilities; +using Microsoft.Recognizers.Text.Number; namespace Microsoft.Recognizers.Text.DateTime.Spanish { @@ -62,7 +63,17 @@ public SpanishTimePeriodExtractorConfiguration(IDateTimeOptionsConfiguration con TokenBeforeDate = DateTimeDefinitions.TokenBeforeDate; SingleTimeExtractor = new BaseTimeExtractor(new SpanishTimeExtractorConfiguration(this)); UtilityConfiguration = new SpanishDatetimeUtilityConfiguration(); - IntegerExtractor = Number.English.IntegerExtractor.GetInstance(); + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + IntegerExtractor = Number.English.IntegerExtractor.GetInstance(numConfig); + TimeZoneExtractor = new BaseTimeZoneExtractor(new SpanishTimeZoneExtractorConfiguration(this)); } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Parsers/SpanishCommonDateTimeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Parsers/SpanishCommonDateTimeParserConfiguration.cs index 5111b7baee..097b8509bc 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Parsers/SpanishCommonDateTimeParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Parsers/SpanishCommonDateTimeParserConfiguration.cs @@ -26,11 +26,20 @@ public SpanishCommonDateTimeParserConfiguration(IDateTimeOptionsConfiguration co WrittenDecades = DateTimeDefinitions.WrittenDecades.ToImmutableDictionary(); SpecialDecadeCases = DateTimeDefinitions.SpecialDecadeCases.ToImmutableDictionary(); - CardinalExtractor = Number.Spanish.CardinalExtractor.GetInstance(); - IntegerExtractor = Number.Spanish.IntegerExtractor.GetInstance(); - OrdinalExtractor = Number.Spanish.OrdinalExtractor.GetInstance(); + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + + CardinalExtractor = Number.Spanish.CardinalExtractor.GetInstance(numConfig); + IntegerExtractor = Number.Spanish.IntegerExtractor.GetInstance(numConfig); + OrdinalExtractor = Number.Spanish.OrdinalExtractor.GetInstance(numConfig); + + NumberParser = new BaseNumberParser(new SpanishNumberParserConfiguration(numConfig)); - NumberParser = new BaseNumberParser(new SpanishNumberParserConfiguration(new BaseNumberOptionsConfiguration(config.Culture))); DateExtractor = new BaseDateExtractor(new SpanishDateExtractorConfiguration(this)); TimeExtractor = new BaseTimeExtractor(new SpanishTimeExtractorConfiguration(this)); DateTimeExtractor = new BaseDateTimeExtractor(new SpanishDateTimeExtractorConfiguration(this)); diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Parsers/SpanishDurationParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Parsers/SpanishDurationParserConfiguration.cs index 4fb3225917..737ccdf6f2 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Parsers/SpanishDurationParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Parsers/SpanishDurationParserConfiguration.cs @@ -36,7 +36,7 @@ public SpanishDurationParserConfiguration(ICommonDateTimeParserConfiguration con public IParser NumberParser { get; } - public IExtractor DurationExtractor { get; } + public IDateTimeExtractor DurationExtractor { get; } public Regex NumberCombinedWithUnit { get; } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Extractors/TurkishDateExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Extractors/TurkishDateExtractorConfiguration.cs index 5e7ad94c6d..4220b4da1c 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Extractors/TurkishDateExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Extractors/TurkishDateExtractorConfiguration.cs @@ -126,10 +126,19 @@ public class TurkishDateExtractorConfiguration : BaseDateTimeOptionsConfiguratio public TurkishDateExtractorConfiguration(IDateTimeOptionsConfiguration config) : base(config) { + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + IntegerExtractor = Number.Turkish.IntegerExtractor.GetInstance(); OrdinalExtractor = Number.Turkish.OrdinalExtractor.GetInstance(); - NumberParser = new BaseNumberParser(new TurkishNumberParserConfiguration(new BaseNumberOptionsConfiguration(config.Culture))); + NumberParser = new BaseNumberParser(new TurkishNumberParserConfiguration(new BaseNumberOptionsConfiguration(numConfig))); DurationExtractor = new BaseDurationExtractor(new TurkishDurationExtractorConfiguration(this)); UtilityConfiguration = new TurkishDatetimeUtilityConfiguration(); diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Extractors/TurkishDatePeriodExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Extractors/TurkishDatePeriodExtractorConfiguration.cs index c1908a7304..bd5a9b5570 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Extractors/TurkishDatePeriodExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Extractors/TurkishDatePeriodExtractorConfiguration.cs @@ -250,10 +250,20 @@ public TurkishDatePeriodExtractorConfiguration(IDateTimeOptionsConfiguration con : base(config) { DatePointExtractor = new BaseDateExtractor(new TurkishDateExtractorConfiguration(this)); + DurationExtractor = new BaseDurationExtractor(new TurkishDurationExtractorConfiguration(this)); + + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + CardinalExtractor = Number.Turkish.CardinalExtractor.GetInstance(); OrdinalExtractor = Number.Turkish.OrdinalExtractor.GetInstance(); - DurationExtractor = new BaseDurationExtractor(new TurkishDurationExtractorConfiguration(this)); - NumberParser = new BaseNumberParser(new TurkishNumberParserConfiguration(new BaseNumberOptionsConfiguration(config.Culture))); + + NumberParser = new BaseNumberParser(new TurkishNumberParserConfiguration(numConfig)); } public IDateExtractor DatePointExtractor { get; } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Parsers/TurkishCommonDateTimeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Parsers/TurkishCommonDateTimeParserConfiguration.cs index 7c28345ec0..bf5e3c4c7d 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Parsers/TurkishCommonDateTimeParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Parsers/TurkishCommonDateTimeParserConfiguration.cs @@ -26,12 +26,22 @@ public TurkishCommonDateTimeParserConfiguration(IDateTimeOptionsConfiguration co WrittenDecades = DateTimeDefinitions.WrittenDecades.ToImmutableDictionary(); SpecialDecadeCases = DateTimeDefinitions.SpecialDecadeCases.ToImmutableDictionary(); + var numOptions = NumberOptions.None; + if ((config.Options & DateTimeOptions.NoProtoCache) != 0) + { + numOptions = NumberOptions.NoProtoCache; + } + + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, numOptions); + CardinalExtractor = Number.Turkish.CardinalExtractor.GetInstance(); IntegerExtractor = Number.Turkish.IntegerExtractor.GetInstance(); OrdinalExtractor = Number.Turkish.OrdinalExtractor.GetInstance(); + NumberParser = new BaseNumberParser(new TurkishNumberParserConfiguration(numConfig)); + TimeZoneParser = new BaseTimeZoneParser(); - NumberParser = new BaseNumberParser(new TurkishNumberParserConfiguration(new BaseNumberOptionsConfiguration(config.Culture))); + DateExtractor = new BaseDateExtractor(new TurkishDateExtractorConfiguration(this)); TimeExtractor = new BaseTimeExtractor(new TurkishTimeExtractorConfiguration(this)); DateTimeExtractor = new BaseDateTimeExtractor(new TurkishDateTimeExtractorConfiguration(this)); diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Parsers/TurkishDurationParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Parsers/TurkishDurationParserConfiguration.cs index 62cc97b128..5be4f7bf91 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Parsers/TurkishDurationParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Parsers/TurkishDurationParserConfiguration.cs @@ -35,7 +35,7 @@ public TurkishDurationParserConfiguration(ICommonDateTimeParserConfiguration con public IExtractor CardinalExtractor { get; } - public IExtractor DurationExtractor { get; } + public IDateTimeExtractor DurationExtractor { get; } public IParser NumberParser { get; } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/DurationParsingUtil.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/DurationParsingUtil.cs index 9d99c3ef4d..0bdd3997cf 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/DurationParsingUtil.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/DurationParsingUtil.cs @@ -116,6 +116,7 @@ private static DateObject GetShiftResult(IImmutableDictionary ti { var unitStr = pair.Key; var number = pair.Value; + switch (unitStr) { case "H": diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/TimeFunctions.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/TimeFunctions.cs index e36344c155..3bdbc276f4 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/TimeFunctions.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/TimeFunctions.cs @@ -1,4 +1,5 @@ using System.Collections.Generic; +using System.Globalization; using System.Linq; using System.Text; using System.Text.RegularExpressions; @@ -87,17 +88,17 @@ public DateTimeResolutionResult PackTimeResult(DateTimeExtra extra, Ti var build = new StringBuilder("T"); if (timeResult.Hour >= 0) { - build.Append(timeResult.Hour.ToString("D2")); + build.Append(timeResult.Hour.ToString("D2", CultureInfo.InvariantCulture)); } if (timeResult.Minute >= 0) { - build.Append(":" + timeResult.Minute.ToString("D2")); + build.Append(":" + timeResult.Minute.ToString("D2", CultureInfo.InvariantCulture)); } if (timeResult.Second >= 0) { - build.Append(":" + timeResult.Second.ToString("D2")); + build.Append(":" + timeResult.Second.ToString("D2", CultureInfo.InvariantCulture)); } if (noDesc) @@ -112,8 +113,10 @@ public DateTimeResolutionResult PackTimeResult(DateTimeExtra extra, Ti hour = 0; } - dateTimeResult.FutureValue = dateTimeResult.PastValue = DateObject.MinValue.SafeCreateFromValue(year, month, day, hour, min, second); + dateTimeResult.FutureValue = dateTimeResult.PastValue = + DateObject.MinValue.SafeCreateFromValue(year, month, day, hour, min, second); dateTimeResult.Success = true; + return dateTimeResult; } @@ -126,7 +129,7 @@ public int MatchToValue(string text) if (Regex.IsMatch(text, @"\d+")) { - return int.Parse(text); + return int.Parse(text, CultureInfo.InvariantCulture); } if (text.Length == 1) diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/Token.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/Token.cs index 861586e776..b588f9ffb9 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/Token.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/Token.cs @@ -2,9 +2,11 @@ using System.Linq; using System.Text.RegularExpressions; +using Microsoft.Recognizers.Text.InternalCache; + namespace Microsoft.Recognizers.Text.DateTime { - public class Token + public class Token : ICloneableType { public Token(int s, int e, Metadata metadata = null) @@ -95,8 +97,8 @@ public static List MergeAllTokens(List tokens, string text { var start = token.Start; var length = token.Length; - var substr = text.Substring(start, length); - + var substr = text.Substring(start, length); + var er = new ExtractResult { Start = start, @@ -112,5 +114,10 @@ public static List MergeAllTokens(List tokens, string text return ret; } + + public Token Clone() + { + return (Token)MemberwiseClone(); + } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/Chinese/Extractors/CardinalExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Chinese/Extractors/CardinalExtractor.cs index 9567bc8781..e68a254a6c 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Chinese/Extractors/CardinalExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Chinese/Extractors/CardinalExtractor.cs @@ -5,17 +5,20 @@ namespace Microsoft.Recognizers.Text.Number.Chinese { + public class CardinalExtractor : BaseNumberExtractor { + // CardinalExtractor = Int + Double - public CardinalExtractor(CJKNumberExtractorMode mode = CJKNumberExtractorMode.Default) + public CardinalExtractor(BaseNumberOptionsConfiguration config, CJKNumberExtractorMode mode = CJKNumberExtractorMode.Default) { + var builder = ImmutableDictionary.CreateBuilder(); - var intExtractChs = new IntegerExtractor(mode); + var intExtractChs = new IntegerExtractor(config, mode); builder.AddRange(intExtractChs.Regexes); - var douExtractorChs = new DoubleExtractor(); + var douExtractorChs = new DoubleExtractor(config); builder.AddRange(douExtractorChs.Regexes); Regexes = builder.ToImmutable(); diff --git a/.NET/Microsoft.Recognizers.Text.Number/Chinese/Extractors/DoubleExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Chinese/Extractors/DoubleExtractor.cs index 80bdc35102..3255468cf2 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Chinese/Extractors/DoubleExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Chinese/Extractors/DoubleExtractor.cs @@ -11,7 +11,7 @@ public class DoubleExtractor : BaseNumberExtractor private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - public DoubleExtractor() + public DoubleExtractor(BaseNumberOptionsConfiguration config) { var regexes = new Dictionary { diff --git a/.NET/Microsoft.Recognizers.Text.Number/Chinese/Extractors/FractionExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Chinese/Extractors/FractionExtractor.cs index 003d62f206..ff8f845ff3 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Chinese/Extractors/FractionExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Chinese/Extractors/FractionExtractor.cs @@ -11,7 +11,7 @@ public class FractionExtractor : BaseNumberExtractor private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - public FractionExtractor() + public FractionExtractor(BaseNumberOptionsConfiguration config) { var regexes = new Dictionary { diff --git a/.NET/Microsoft.Recognizers.Text.Number/Chinese/Extractors/IntegerExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Chinese/Extractors/IntegerExtractor.cs index 748122bfc8..a05d1b1541 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Chinese/Extractors/IntegerExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Chinese/Extractors/IntegerExtractor.cs @@ -11,7 +11,7 @@ public class IntegerExtractor : BaseNumberExtractor { private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - public IntegerExtractor(CJKNumberExtractorMode mode = CJKNumberExtractorMode.Default) + public IntegerExtractor(BaseNumberOptionsConfiguration config, CJKNumberExtractorMode mode = CJKNumberExtractorMode.Default) { var regexes = new Dictionary() { diff --git a/.NET/Microsoft.Recognizers.Text.Number/Chinese/Extractors/NumberExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Chinese/Extractors/NumberExtractor.cs index b0daf200e2..a39ff6a82c 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Chinese/Extractors/NumberExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Chinese/Extractors/NumberExtractor.cs @@ -8,16 +8,16 @@ namespace Microsoft.Recognizers.Text.Number.Chinese public class NumberExtractor : BaseNumberExtractor { - public NumberExtractor(CJKNumberExtractorMode mode = CJKNumberExtractorMode.Default) + public NumberExtractor(BaseNumberOptionsConfiguration config, CJKNumberExtractorMode mode = CJKNumberExtractorMode.Default) { var builder = ImmutableDictionary.CreateBuilder(); // Add Cardinal - var cardExtractChs = new CardinalExtractor(mode); + var cardExtractChs = new CardinalExtractor(config, mode); builder.AddRange(cardExtractChs.Regexes); // Add Fraction - var fracExtractChs = new FractionExtractor(); + var fracExtractChs = new FractionExtractor(config); builder.AddRange(fracExtractChs.Regexes); Regexes = builder.ToImmutable(); diff --git a/.NET/Microsoft.Recognizers.Text.Number/Chinese/Extractors/NumberRangeExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Chinese/Extractors/NumberRangeExtractor.cs index fd5993e77d..54210563b9 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Chinese/Extractors/NumberRangeExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Chinese/Extractors/NumberRangeExtractor.cs @@ -8,7 +8,10 @@ namespace Microsoft.Recognizers.Text.Number.Chinese public class NumberRangeExtractor : BaseNumberRangeExtractor { public NumberRangeExtractor(INumberOptionsConfiguration config) - : base(new NumberExtractor(), new OrdinalExtractor(), new BaseCJKNumberParser(new ChineseNumberParserConfiguration(config)), config) + : base(new NumberExtractor(new BaseNumberOptionsConfiguration(config)), + new OrdinalExtractor(new BaseNumberOptionsConfiguration(config)), + new BaseCJKNumberParser(new ChineseNumberParserConfiguration(config)), + config) { var regexes = new Dictionary() { diff --git a/.NET/Microsoft.Recognizers.Text.Number/Chinese/Extractors/OrdinalExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Chinese/Extractors/OrdinalExtractor.cs index 28eaa3f92a..44606c2a44 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Chinese/Extractors/OrdinalExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Chinese/Extractors/OrdinalExtractor.cs @@ -11,7 +11,7 @@ public class OrdinalExtractor : BaseNumberExtractor private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - public OrdinalExtractor() + public OrdinalExtractor(BaseNumberOptionsConfiguration config) { var regexes = new Dictionary { diff --git a/.NET/Microsoft.Recognizers.Text.Number/Chinese/Extractors/PercentageExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Chinese/Extractors/PercentageExtractor.cs index 191d6f038d..2ac6ae3e51 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Chinese/Extractors/PercentageExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Chinese/Extractors/PercentageExtractor.cs @@ -11,7 +11,7 @@ public class PercentageExtractor : BaseNumberExtractor private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - public PercentageExtractor() + public PercentageExtractor(BaseNumberOptionsConfiguration config) { var regexes = new Dictionary { diff --git a/.NET/Microsoft.Recognizers.Text.Number/Chinese/Parsers/ChineseNumberParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Number/Chinese/Parsers/ChineseNumberParserConfiguration.cs index fa55e8af90..fc2c0e3c98 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Chinese/Parsers/ChineseNumberParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Chinese/Parsers/ChineseNumberParserConfiguration.cs @@ -16,7 +16,7 @@ public class ChineseNumberParserConfiguration : BaseNumberParserConfiguration, I public ChineseNumberParserConfiguration(INumberOptionsConfiguration config) { - this.LangMarker = NumbersDefinitions.LangMarker; + this.LanguageMarker = NumbersDefinitions.LangMarker; this.CultureInfo = new CultureInfo(config.Culture); this.Config = config; diff --git a/.NET/Microsoft.Recognizers.Text.Number/Chinese/Parsers/ChineseNumberRangeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Number/Chinese/Parsers/ChineseNumberRangeParserConfiguration.cs index e654f8ca45..b880f578ed 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Chinese/Parsers/ChineseNumberRangeParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Chinese/Parsers/ChineseNumberRangeParserConfiguration.cs @@ -13,8 +13,10 @@ public ChineseNumberRangeParserConfiguration(INumberOptionsConfiguration config) { CultureInfo = new CultureInfo(config.Culture); - NumberExtractor = new NumberExtractor(); - OrdinalExtractor = new OrdinalExtractor(); + var numConfig = new BaseNumberOptionsConfiguration(config); + + NumberExtractor = new NumberExtractor(numConfig); + OrdinalExtractor = new OrdinalExtractor(numConfig); NumberParser = new BaseCJKNumberParser(new ChineseNumberParserConfiguration(config)); MoreOrEqual = new Regex(NumbersDefinitions.MoreOrEqual, RegexFlags); diff --git a/.NET/Microsoft.Recognizers.Text.Number/Config/BaseNumberOptionsConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Number/Config/BaseNumberOptionsConfiguration.cs index 1e4a2aed9f..81e547881b 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Config/BaseNumberOptionsConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Config/BaseNumberOptionsConfiguration.cs @@ -1,12 +1,17 @@ -namespace Microsoft.Recognizers.Text.Number +using Microsoft.Recognizers.Definitions; + +namespace Microsoft.Recognizers.Text.Number { public class BaseNumberOptionsConfiguration : INumberOptionsConfiguration { - public BaseNumberOptionsConfiguration(string culture, NumberOptions options = NumberOptions.None, NumberMode mode = NumberMode.Default) + + public BaseNumberOptionsConfiguration(string culture, NumberOptions options = NumberOptions.None, + NumberMode mode = NumberMode.Default, string placeholder = BaseNumbers.PlaceHolderDefault) { Culture = culture; Options = options; Mode = mode; + Placeholder = placeholder; } public BaseNumberOptionsConfiguration(INumberOptionsConfiguration config) @@ -14,12 +19,15 @@ public BaseNumberOptionsConfiguration(INumberOptionsConfiguration config) Culture = config.Culture; Options = config.Options; Mode = config.Mode; + Placeholder = config.Placeholder; } public NumberOptions Options { get; } public NumberMode Mode { get; } + public string Placeholder { get; } + public string Culture { get; } } diff --git a/.NET/Microsoft.Recognizers.Text.Number/Config/INumberOptionsConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Number/Config/INumberOptionsConfiguration.cs index c4cde05c99..824b08b32a 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Config/INumberOptionsConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Config/INumberOptionsConfiguration.cs @@ -7,5 +7,7 @@ public interface INumberOptionsConfiguration : IConfiguration NumberOptions Options { get; } NumberMode Mode { get; } + + string Placeholder { get; } } } diff --git a/.NET/Microsoft.Recognizers.Text.Number/Constants.cs b/.NET/Microsoft.Recognizers.Text.Number/Constants.cs index 864177553c..12da1d6456 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Constants.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Constants.cs @@ -25,20 +25,20 @@ public static class Constants // NARROW NO-BREAK SPACE public const char NO_BREAK_SPACE = '\u202f'; - // Language Markers + // Language Markers - ISO 639-2 B public const string ENGLISH = "Eng"; - public const string CHINESE = "Chs"; - public const string FRENCH = "Fr"; + public const string CHINESE = "Chi"; + public const string FRENCH = "Fre"; public const string GERMAN = "Ger"; public const string JAPANESE = "Jpn"; public const string PORTUGUESE = "Por"; public const string SPANISH = "Spa"; - public const string DUTCH = "Nl"; + public const string DUTCH = "Dut"; public const string KOREAN = "Kor"; public const string ITALIAN = "Ita"; public const string SWEDISH = "Swe"; public const string BULGARIAN = "Bul"; - public const string TURKISH = "Tr"; + public const string TURKISH = "Tur"; public const string HINDI = "Hin"; // Regex Prefixes / Suffixes diff --git a/.NET/Microsoft.Recognizers.Text.Number/Dutch/Extractors/CardinalExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Dutch/Extractors/CardinalExtractor.cs index 9674966d12..f6271e2f20 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Dutch/Extractors/CardinalExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Dutch/Extractors/CardinalExtractor.cs @@ -2,24 +2,29 @@ using System.Collections.Immutable; using System.Text.RegularExpressions; -using Microsoft.Recognizers.Definitions.Dutch; - namespace Microsoft.Recognizers.Text.Number.Dutch { - public class CardinalExtractor : BaseNumberExtractor + public class CardinalExtractor : CachedNumberExtractor { - private static readonly ConcurrentDictionary Instances = new ConcurrentDictionary(); + private static readonly ConcurrentDictionary Instances = + new ConcurrentDictionary(); + + private readonly string keyPrefix; - private CardinalExtractor(string placeholder = NumbersDefinitions.PlaceHolderDefault) + private CardinalExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) { + + keyPrefix = string.Intern(ExtractType + "_" + config.Options + "_" + config.Placeholder + "_" + config.Culture); + var builder = ImmutableDictionary.CreateBuilder(); // Add Integer Regexes - var intExtract = IntegerExtractor.GetInstance(placeholder); + var intExtract = IntegerExtractor.GetInstance(config); builder.AddRange(intExtract.Regexes); // Add Double Regexes - var douExtract = DoubleExtractor.GetInstance(placeholder); + var douExtract = DoubleExtractor.GetInstance(config); builder.AddRange(douExtract.Regexes); Regexes = builder.ToImmutable(); @@ -29,15 +34,23 @@ private CardinalExtractor(string placeholder = NumbersDefinitions.PlaceHolderDef protected sealed override string ExtractType { get; } = Constants.SYS_NUM_CARDINAL; // "Cardinal"; - public static CardinalExtractor GetInstance(string placeholder = NumbersDefinitions.PlaceHolderDefault) + public static CardinalExtractor GetInstance(BaseNumberOptionsConfiguration config) { - if (!Instances.ContainsKey(placeholder)) + + var extractorKey = config.Placeholder; + + if (!Instances.ContainsKey(extractorKey)) { - var instance = new CardinalExtractor(placeholder); - Instances.TryAdd(placeholder, instance); + var instance = new CardinalExtractor(config); + Instances.TryAdd(extractorKey, instance); } - return Instances[placeholder]; + return Instances[extractorKey]; + } + + protected override object GenKey(string input) + { + return (keyPrefix, input); } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/Dutch/Extractors/DoubleExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Dutch/Extractors/DoubleExtractor.cs index ccf068caa8..d1defdfd4a 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Dutch/Extractors/DoubleExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Dutch/Extractors/DoubleExtractor.cs @@ -13,16 +13,17 @@ public class DoubleExtractor : BaseNumberExtractor private static readonly ConcurrentDictionary Instances = new ConcurrentDictionary(); - private DoubleExtractor(string placeholder = NumbersDefinitions.PlaceHolderDefault) + private DoubleExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) { var regexes = new Dictionary { { - new Regex(NumbersDefinitions.DoubleDecimalPointRegex(placeholder), RegexFlags), + new Regex(NumbersDefinitions.DoubleDecimalPointRegex(config.Placeholder), RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.DoubleWithoutIntegralRegex(placeholder), RegexFlags), + new Regex(NumbersDefinitions.DoubleWithoutIntegralRegex(config.Placeholder), RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { @@ -46,15 +47,15 @@ private DoubleExtractor(string placeholder = NumbersDefinitions.PlaceHolderDefau RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.POWER_SUFFIX) }, { - GenerateLongFormatNumberRegexes(LongFormatType.DoubleNumDotComma, placeholder, RegexFlags), + GenerateLongFormatNumberRegexes(LongFormatType.DoubleNumDotComma, config.Placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { - GenerateLongFormatNumberRegexes(LongFormatType.DoubleNumNoBreakSpaceComma, placeholder, RegexFlags), + GenerateLongFormatNumberRegexes(LongFormatType.DoubleNumNoBreakSpaceComma, config.Placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { - GenerateLongFormatNumberRegexes(LongFormatType.DoubleNumBlankComma, placeholder, RegexFlags), + GenerateLongFormatNumberRegexes(LongFormatType.DoubleNumBlankComma, config.Placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, }; @@ -66,15 +67,18 @@ private DoubleExtractor(string placeholder = NumbersDefinitions.PlaceHolderDefau protected sealed override string ExtractType { get; } = Constants.SYS_NUM_DOUBLE; // "Double"; - public static DoubleExtractor GetInstance(string placeholder = NumbersDefinitions.PlaceHolderDefault) + public static DoubleExtractor GetInstance(BaseNumberOptionsConfiguration config) { - if (!Instances.ContainsKey(placeholder)) + + var extractorKey = config.Placeholder; + + if (!Instances.ContainsKey(extractorKey)) { - var instance = new DoubleExtractor(placeholder); - Instances.TryAdd(placeholder, instance); + var instance = new DoubleExtractor(config); + Instances.TryAdd(extractorKey, instance); } - return Instances[placeholder]; + return Instances[extractorKey]; } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/Dutch/Extractors/FractionExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Dutch/Extractors/FractionExtractor.cs index d867346088..34e81cd6d9 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Dutch/Extractors/FractionExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Dutch/Extractors/FractionExtractor.cs @@ -11,12 +11,12 @@ public class FractionExtractor : BaseNumberExtractor { private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - private static readonly ConcurrentDictionary<(NumberMode, NumberOptions, string), FractionExtractor> Instances = - new ConcurrentDictionary<(NumberMode, NumberOptions, string), FractionExtractor>(); + private static readonly ConcurrentDictionary<(NumberMode, NumberOptions), FractionExtractor> Instances = + new ConcurrentDictionary<(NumberMode, NumberOptions), FractionExtractor>(); - private FractionExtractor(NumberMode mode, NumberOptions options) + private FractionExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) { - Options = options; var regexes = new Dictionary { @@ -40,7 +40,7 @@ private FractionExtractor(NumberMode mode, NumberOptions options) }; // Not add FractionPrepositionRegex when the mode is Unit to avoid wrong recognize cases like "$1000 over 3" - if (mode != NumberMode.Unit) + if (config.Mode != NumberMode.Unit) { if ((Options & NumberOptions.PercentageMode) != 0) { @@ -59,22 +59,23 @@ private FractionExtractor(NumberMode mode, NumberOptions options) Regexes = regexes.ToImmutableDictionary(); } - internal sealed override ImmutableDictionary Regexes { get; } + public sealed override NumberOptions Options { get; } - protected sealed override NumberOptions Options { get; } + internal sealed override ImmutableDictionary Regexes { get; } protected sealed override string ExtractType { get; } = Constants.SYS_NUM_FRACTION; // "Fraction"; - public static FractionExtractor GetInstance(NumberMode mode = NumberMode.Default, NumberOptions options = NumberOptions.None, string placeholder = "") + public static FractionExtractor GetInstance(BaseNumberOptionsConfiguration config) { - var cacheKey = (mode, options, placeholder); - if (!Instances.ContainsKey(cacheKey)) + var extractorKey = (config.Mode, config.Options); + + if (!Instances.ContainsKey(extractorKey)) { - var instance = new FractionExtractor(mode, options); - Instances.TryAdd(cacheKey, instance); + var instance = new FractionExtractor(config); + Instances.TryAdd(extractorKey, instance); } - return Instances[cacheKey]; + return Instances[extractorKey]; } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/Dutch/Extractors/IntegerExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Dutch/Extractors/IntegerExtractor.cs index abb75af917..8cfd749ebe 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Dutch/Extractors/IntegerExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Dutch/Extractors/IntegerExtractor.cs @@ -7,18 +7,24 @@ namespace Microsoft.Recognizers.Text.Number.Dutch { - public class IntegerExtractor : BaseNumberExtractor + public class IntegerExtractor : CachedNumberExtractor { private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; private static readonly ConcurrentDictionary Instances = new ConcurrentDictionary(); - private IntegerExtractor(string placeholder = NumbersDefinitions.PlaceHolderDefault) + private readonly string keyPrefix; + + private IntegerExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) { + + keyPrefix = string.Intern(ExtractType + "_" + config.Options + "_" + config.Placeholder + "_" + config.Culture); + var regexes = new Dictionary { { - new Regex(NumbersDefinitions.NumbersWithPlaceHolder(placeholder), RegexFlags), + new Regex(NumbersDefinitions.NumbersWithPlaceHolder(config.Placeholder), RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { @@ -42,19 +48,19 @@ private IntegerExtractor(string placeholder = NumbersDefinitions.PlaceHolderDefa RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.DUTCH) }, { - GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumComma, placeholder, RegexFlags), + GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumComma, config.Placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { - GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumBlank, placeholder, RegexFlags), + GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumBlank, config.Placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { - GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumNoBreakSpace, placeholder, RegexFlags), + GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumNoBreakSpace, config.Placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { - GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumDot, placeholder, RegexFlags), + GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumDot, config.Placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, }; @@ -66,15 +72,24 @@ private IntegerExtractor(string placeholder = NumbersDefinitions.PlaceHolderDefa protected sealed override string ExtractType { get; } = Constants.SYS_NUM_INTEGER; // "Integer"; - public static IntegerExtractor GetInstance(string placeholder = NumbersDefinitions.PlaceHolderDefault) + public static IntegerExtractor GetInstance(BaseNumberOptionsConfiguration config) { - if (!Instances.ContainsKey(placeholder)) + + var extractorKey = config.Placeholder; + + if (!Instances.ContainsKey(extractorKey)) { - var instance = new IntegerExtractor(placeholder); - Instances.TryAdd(placeholder, instance); + var instance = new IntegerExtractor(config); + Instances.TryAdd(extractorKey, instance); } - return Instances[placeholder]; + return Instances[extractorKey]; } + + protected override object GenKey(string input) + { + return (keyPrefix, input); + } + } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/Dutch/Extractors/NumberExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Dutch/Extractors/NumberExtractor.cs index bbacc4f841..e53e1eaf20 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Dutch/Extractors/NumberExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Dutch/Extractors/NumberExtractor.cs @@ -2,32 +2,37 @@ using System.Collections.Immutable; using System.Text.RegularExpressions; -using Microsoft.Recognizers.Definitions; using Microsoft.Recognizers.Definitions.Dutch; namespace Microsoft.Recognizers.Text.Number.Dutch { - public class NumberExtractor : BaseNumberExtractor + public class NumberExtractor : CachedNumberExtractor { private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; private static readonly ConcurrentDictionary<(NumberMode, NumberOptions), NumberExtractor> Instances = new ConcurrentDictionary<(NumberMode, NumberOptions), NumberExtractor>(); - private NumberExtractor(NumberMode mode, NumberOptions options) + private readonly string keyPrefix; + + private NumberExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) { - NegativeNumberTermsRegex = new Regex(NumbersDefinitions.NegativeNumberTermsRegex + '$', RegexFlags); - Options = options; + keyPrefix = string.Intern(ExtractType + "_" + config.Options + "_" + config.Mode + "_" + config.Culture); + + NegativeNumberTermsRegex = new Regex(NumbersDefinitions.NegativeNumberTermsRegex + '$', RegexFlags); var builder = ImmutableDictionary.CreateBuilder(); // Add Cardinal CardinalExtractor cardExtract = null; - switch (mode) + switch (config.Mode) { case NumberMode.PureNumber: - cardExtract = CardinalExtractor.GetInstance(NumbersDefinitions.PlaceHolderPureNumber); + var purNumConfig = new BaseNumberOptionsConfiguration(config.Culture, config.Options, config.Mode, + NumbersDefinitions.PlaceHolderPureNumber); + cardExtract = CardinalExtractor.GetInstance(purNumConfig); break; case NumberMode.Currency: builder.Add( @@ -40,13 +45,13 @@ private NumberExtractor(NumberMode mode, NumberOptions options) if (cardExtract == null) { - cardExtract = CardinalExtractor.GetInstance(); + cardExtract = CardinalExtractor.GetInstance(config); } builder.AddRange(cardExtract.Regexes); // Add Fraction - var fracExtract = FractionExtractor.GetInstance(mode, Options); + var fracExtract = FractionExtractor.GetInstance(config); builder.AddRange(fracExtract.Regexes); Regexes = builder.ToImmutable(); @@ -54,7 +59,7 @@ private NumberExtractor(NumberMode mode, NumberOptions options) var ambiguityBuilder = ImmutableDictionary.CreateBuilder(); // Do not filter the ambiguous number cases like '$2000' in NumberWithUnit, otherwise they can't be resolved. - if (mode != NumberMode.Unit) + if (config.Mode != NumberMode.Unit) { foreach (var item in NumbersDefinitions.AmbiguityFiltersDict) { @@ -69,22 +74,26 @@ private NumberExtractor(NumberMode mode, NumberOptions options) protected sealed override ImmutableDictionary AmbiguityFiltersDict { get; } - protected sealed override NumberOptions Options { get; } - protected sealed override string ExtractType { get; } = Constants.SYS_NUM; // "Number"; protected sealed override Regex NegativeNumberTermsRegex { get; } - public static NumberExtractor GetInstance(NumberMode mode = NumberMode.Default, NumberOptions options = NumberOptions.None) + public static NumberExtractor GetInstance(BaseNumberOptionsConfiguration config) { - var cacheKey = (mode, options); - if (!Instances.ContainsKey(cacheKey)) + var extractorKey = (config.Mode, config.Options); + + if (!Instances.ContainsKey(extractorKey)) { - var instance = new NumberExtractor(mode, options); - Instances.TryAdd(cacheKey, instance); + var instance = new NumberExtractor(config); + Instances.TryAdd(extractorKey, instance); } - return Instances[cacheKey]; + return Instances[extractorKey]; + } + + protected override object GenKey(string input) + { + return (keyPrefix, input); } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/Dutch/Extractors/NumberRangeExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Dutch/Extractors/NumberRangeExtractor.cs index c0b9a3e75a..480ece8a8b 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Dutch/Extractors/NumberRangeExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Dutch/Extractors/NumberRangeExtractor.cs @@ -12,8 +12,8 @@ public class NumberRangeExtractor : BaseNumberRangeExtractor private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; public NumberRangeExtractor(INumberOptionsConfiguration config) - : base(NumberExtractor.GetInstance(), - OrdinalExtractor.GetInstance(), + : base(NumberExtractor.GetInstance(new BaseNumberOptionsConfiguration(config.Culture, config.Options)), + OrdinalExtractor.GetInstance(new BaseNumberOptionsConfiguration(config.Culture, config.Options)), new BaseNumberParser(new DutchNumberParserConfiguration(config)), config) { diff --git a/.NET/Microsoft.Recognizers.Text.Number/Dutch/Extractors/OrdinalExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Dutch/Extractors/OrdinalExtractor.cs index 2abe35fd9e..e190a7236f 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Dutch/Extractors/OrdinalExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Dutch/Extractors/OrdinalExtractor.cs @@ -7,15 +7,21 @@ namespace Microsoft.Recognizers.Text.Number.Dutch { - public class OrdinalExtractor : BaseNumberExtractor + public class OrdinalExtractor : CachedNumberExtractor { private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; private static readonly ConcurrentDictionary Instances = new ConcurrentDictionary(); - private OrdinalExtractor() + private readonly string keyPrefix; + + private OrdinalExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) { + + keyPrefix = string.Intern(ExtractType + "_" + config.Options.ToString() + "_" + config.Culture); + var regexes = new Dictionary { { @@ -43,15 +49,23 @@ private OrdinalExtractor() protected sealed override string ExtractType { get; } = Constants.SYS_NUM_ORDINAL; // "Ordinal"; - public static OrdinalExtractor GetInstance(string placeholder = "") + public static OrdinalExtractor GetInstance(BaseNumberOptionsConfiguration config) { - if (!Instances.ContainsKey(placeholder)) + + var extractorKey = config.Options.ToString(); + + if (!Instances.ContainsKey(extractorKey)) { - var instance = new OrdinalExtractor(); - Instances.TryAdd(placeholder, instance); + var instance = new OrdinalExtractor(config); + Instances.TryAdd(extractorKey, instance); } - return Instances[placeholder]; + return Instances[extractorKey]; + } + + protected override object GenKey(string input) + { + return (keyPrefix, input); } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/Dutch/Extractors/PercentageExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Dutch/Extractors/PercentageExtractor.cs index 8d3182b0e8..fa7652b856 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Dutch/Extractors/PercentageExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Dutch/Extractors/PercentageExtractor.cs @@ -8,10 +8,10 @@ namespace Microsoft.Recognizers.Text.Number.Dutch { public sealed class PercentageExtractor : BasePercentageExtractor { - public PercentageExtractor(NumberOptions options = NumberOptions.None) - : base(NumberExtractor.GetInstance(options: options)) + public PercentageExtractor(BaseNumberOptionsConfiguration config) + : base(NumberExtractor.GetInstance(config)) { - Options = options; + Options = config.Options; Regexes = InitRegexes(); } diff --git a/.NET/Microsoft.Recognizers.Text.Number/Dutch/Parsers/DutchNumberParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Number/Dutch/Parsers/DutchNumberParserConfiguration.cs index c3bde4f6b9..9d56140888 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Dutch/Parsers/DutchNumberParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Dutch/Parsers/DutchNumberParserConfiguration.cs @@ -13,7 +13,7 @@ public class DutchNumberParserConfiguration : BaseNumberParserConfiguration public DutchNumberParserConfiguration(INumberOptionsConfiguration config) { this.Config = config; - this.LangMarker = NumbersDefinitions.LangMarker; + this.LanguageMarker = NumbersDefinitions.LangMarker; this.CultureInfo = new CultureInfo(config.Culture); this.IsCompoundNumberLanguage = NumbersDefinitions.CompoundNumberLanguage; diff --git a/.NET/Microsoft.Recognizers.Text.Number/Dutch/Parsers/DutchNumberRangeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Number/Dutch/Parsers/DutchNumberRangeParserConfiguration.cs index 4ed4e4174e..2d34bf0d47 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Dutch/Parsers/DutchNumberRangeParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Dutch/Parsers/DutchNumberRangeParserConfiguration.cs @@ -13,8 +13,10 @@ public DutchNumberRangeParserConfiguration(INumberOptionsConfiguration config) { CultureInfo = new CultureInfo(config.Culture); - NumberExtractor = Dutch.NumberExtractor.GetInstance(); - OrdinalExtractor = Dutch.OrdinalExtractor.GetInstance(); + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, config.Options); + + NumberExtractor = Dutch.NumberExtractor.GetInstance(numConfig); + OrdinalExtractor = Dutch.OrdinalExtractor.GetInstance(numConfig); // @TODO Change init to follow design in other languages NumberParser = new BaseNumberParser(new DutchNumberParserConfiguration(config)); diff --git a/.NET/Microsoft.Recognizers.Text.Number/English/Extractors/CardinalExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/English/Extractors/CardinalExtractor.cs index b3a65c2e4d..385501ae29 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/English/Extractors/CardinalExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/English/Extractors/CardinalExtractor.cs @@ -2,25 +2,29 @@ using System.Collections.Immutable; using System.Text.RegularExpressions; -using Microsoft.Recognizers.Definitions.English; - namespace Microsoft.Recognizers.Text.Number.English { - public class CardinalExtractor : BaseNumberExtractor + public class CardinalExtractor : CachedNumberExtractor { private static readonly ConcurrentDictionary Instances = new ConcurrentDictionary(); - private CardinalExtractor(string placeholder = NumbersDefinitions.PlaceHolderDefault) + private readonly string keyPrefix; + + private CardinalExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) { + + keyPrefix = string.Intern(ExtractType + "_" + config.Options + "_" + config.Placeholder + "_" + config.Culture); + var builder = ImmutableDictionary.CreateBuilder(); // Add Integer Regexes - var intExtract = IntegerExtractor.GetInstance(placeholder); + var intExtract = IntegerExtractor.GetInstance(config); builder.AddRange(intExtract.Regexes); // Add Double Regexes - var douExtract = DoubleExtractor.GetInstance(placeholder); + var douExtract = DoubleExtractor.GetInstance(config); builder.AddRange(douExtract.Regexes); Regexes = builder.ToImmutable(); @@ -30,15 +34,24 @@ private CardinalExtractor(string placeholder = NumbersDefinitions.PlaceHolderDef protected sealed override string ExtractType { get; } = Constants.SYS_NUM_CARDINAL; // "Cardinal"; - public static CardinalExtractor GetInstance(string placeholder = NumbersDefinitions.PlaceHolderDefault) + public static CardinalExtractor GetInstance(BaseNumberOptionsConfiguration config) { - if (!Instances.ContainsKey(placeholder)) + + var extractorKey = config.Placeholder; + + if (!Instances.ContainsKey(extractorKey)) { - var instance = new CardinalExtractor(placeholder); - Instances.TryAdd(placeholder, instance); + var instance = new CardinalExtractor(config); + Instances.TryAdd(extractorKey, instance); } - return Instances[placeholder]; + return Instances[extractorKey]; } + + protected override object GenKey(string input) + { + return (keyPrefix, input); + } + } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/English/Extractors/DoubleExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/English/Extractors/DoubleExtractor.cs index 84fe37ba1c..f8b60accee 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/English/Extractors/DoubleExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/English/Extractors/DoubleExtractor.cs @@ -14,16 +14,18 @@ public class DoubleExtractor : BaseNumberExtractor private static readonly ConcurrentDictionary Instances = new ConcurrentDictionary(); - private DoubleExtractor(string placeholder = NumbersDefinitions.PlaceHolderDefault) + private DoubleExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) { + var regexes = new Dictionary { { - new Regex(NumbersDefinitions.DoubleDecimalPointRegex(placeholder), RegexFlags), + new Regex(NumbersDefinitions.DoubleDecimalPointRegex(config.Placeholder), RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.DoubleWithoutIntegralRegex(placeholder), RegexFlags), + new Regex(NumbersDefinitions.DoubleWithoutIntegralRegex(config.Placeholder), RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { @@ -47,15 +49,15 @@ private DoubleExtractor(string placeholder = NumbersDefinitions.PlaceHolderDefau RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.POWER_SUFFIX) }, { - GenerateLongFormatNumberRegexes(LongFormatType.DoubleNumCommaDot, placeholder, RegexFlags), + GenerateLongFormatNumberRegexes(LongFormatType.DoubleNumCommaDot, config.Placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { - GenerateLongFormatNumberRegexes(LongFormatType.DoubleNumBlankDot, placeholder, RegexFlags), + GenerateLongFormatNumberRegexes(LongFormatType.DoubleNumBlankDot, config.Placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { - GenerateLongFormatNumberRegexes(LongFormatType.DoubleNumNoBreakSpaceDot, placeholder, RegexFlags), + GenerateLongFormatNumberRegexes(LongFormatType.DoubleNumNoBreakSpaceDot, config.Placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, }; @@ -67,15 +69,18 @@ private DoubleExtractor(string placeholder = NumbersDefinitions.PlaceHolderDefau protected sealed override string ExtractType { get; } = Constants.SYS_NUM_DOUBLE; // "Double"; - public static DoubleExtractor GetInstance(string placeholder = NumbersDefinitions.PlaceHolderDefault) + public static DoubleExtractor GetInstance(BaseNumberOptionsConfiguration config) { - if (!Instances.ContainsKey(placeholder)) + + var extractorKey = config.Placeholder; + + if (!Instances.ContainsKey(extractorKey)) { - var instance = new DoubleExtractor(placeholder); - Instances.TryAdd(placeholder, instance); + var instance = new DoubleExtractor(config); + Instances.TryAdd(extractorKey, instance); } - return Instances[placeholder]; + return Instances[extractorKey]; } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/English/Extractors/FractionExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/English/Extractors/FractionExtractor.cs index 4886ae0140..7aeed3a056 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/English/Extractors/FractionExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/English/Extractors/FractionExtractor.cs @@ -9,14 +9,15 @@ namespace Microsoft.Recognizers.Text.Number.English { public class FractionExtractor : BaseNumberExtractor { + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - private static readonly ConcurrentDictionary<(NumberMode, NumberOptions, string), FractionExtractor> Instances = - new ConcurrentDictionary<(NumberMode, NumberOptions, string), FractionExtractor>(); + private static readonly ConcurrentDictionary<(NumberMode, NumberOptions), FractionExtractor> Instances = + new ConcurrentDictionary<(NumberMode, NumberOptions), FractionExtractor>(); - private FractionExtractor(NumberMode mode, NumberOptions options) + private FractionExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) { - Options = options; var regexes = new Dictionary { @@ -39,7 +40,7 @@ private FractionExtractor(NumberMode mode, NumberOptions options) }; // Not add FractionPrepositionRegex when the mode is Unit to avoid wrong recognize cases like "$1000 over 3" - if (mode != NumberMode.Unit) + if (config.Mode != NumberMode.Unit) { if ((Options & NumberOptions.PercentageMode) != 0) { @@ -60,20 +61,19 @@ private FractionExtractor(NumberMode mode, NumberOptions options) internal sealed override ImmutableDictionary Regexes { get; } - protected sealed override NumberOptions Options { get; } - protected sealed override string ExtractType { get; } = Constants.SYS_NUM_FRACTION; // "Fraction"; - public static FractionExtractor GetInstance(NumberMode mode = NumberMode.Default, NumberOptions options = NumberOptions.None, string placeholder = "") + public static FractionExtractor GetInstance(BaseNumberOptionsConfiguration config) { - var cacheKey = (mode, options, placeholder); - if (!Instances.ContainsKey(cacheKey)) + var extractorKey = (config.Mode, config.Options); + + if (!Instances.ContainsKey(extractorKey)) { - var instance = new FractionExtractor(mode, options); - Instances.TryAdd(cacheKey, instance); + var instance = new FractionExtractor(config); + Instances.TryAdd(extractorKey, instance); } - return Instances[cacheKey]; + return Instances[extractorKey]; } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/English/Extractors/IntegerExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/English/Extractors/IntegerExtractor.cs index ccf0498a94..79274c6e6b 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/English/Extractors/IntegerExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/English/Extractors/IntegerExtractor.cs @@ -7,19 +7,25 @@ namespace Microsoft.Recognizers.Text.Number.English { - public class IntegerExtractor : BaseNumberExtractor + public class IntegerExtractor : CachedNumberExtractor { private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; private static readonly ConcurrentDictionary Instances = new ConcurrentDictionary(); - private IntegerExtractor(string placeholder = NumbersDefinitions.PlaceHolderDefault) + private readonly string keyPrefix; + + private IntegerExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) { + + keyPrefix = string.Intern(ExtractType + "_" + config.Options + "_" + config.Placeholder + "_" + config.Culture); + var regexes = new Dictionary { { - new Regex(NumbersDefinitions.NumbersWithPlaceHolder(placeholder), RegexFlags), + new Regex(NumbersDefinitions.NumbersWithPlaceHolder(config.Placeholder), RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { @@ -43,15 +49,15 @@ private IntegerExtractor(string placeholder = NumbersDefinitions.PlaceHolderDefa RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.ENGLISH) }, { - GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumComma, placeholder, RegexFlags), + GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumComma, config.Placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { - GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumBlank, placeholder, RegexFlags), + GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumBlank, config.Placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { - GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumNoBreakSpace, placeholder, RegexFlags), + GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumNoBreakSpace, config.Placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, }; @@ -63,15 +69,24 @@ private IntegerExtractor(string placeholder = NumbersDefinitions.PlaceHolderDefa protected sealed override string ExtractType { get; } = Constants.SYS_NUM_INTEGER; // "Integer"; - public static IntegerExtractor GetInstance(string placeholder = NumbersDefinitions.PlaceHolderDefault) + public static IntegerExtractor GetInstance(BaseNumberOptionsConfiguration config) { - if (!Instances.ContainsKey(placeholder)) + + var extractorKey = config.Placeholder; + + if (!Instances.ContainsKey(extractorKey)) { - var instance = new IntegerExtractor(placeholder); - Instances.TryAdd(placeholder, instance); + var instance = new IntegerExtractor(config); + Instances.TryAdd(extractorKey, instance); } - return Instances[placeholder]; + return Instances[extractorKey]; } + + protected override object GenKey(string input) + { + return (keyPrefix, input); + } + } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/English/Extractors/MergedNumberExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/English/Extractors/MergedNumberExtractor.cs index 03e103cceb..4baa70060d 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/English/Extractors/MergedNumberExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/English/Extractors/MergedNumberExtractor.cs @@ -12,9 +12,9 @@ internal class MergedNumberExtractor : BaseMergedNumberExtractor private static readonly ConcurrentDictionary<(NumberMode, NumberOptions), MergedNumberExtractor> Instances = new ConcurrentDictionary<(NumberMode, NumberOptions), MergedNumberExtractor>(); - public MergedNumberExtractor(NumberMode mode, NumberOptions options) + public MergedNumberExtractor(BaseNumberOptionsConfiguration config) { - NumberExtractor = English.NumberExtractor.GetInstance(mode, options); + NumberExtractor = English.NumberExtractor.GetInstance(config); RoundNumberIntegerRegexWithLocks = new Regex(NumbersDefinitions.RoundNumberIntegerRegexWithLocks, RegexFlags); ConnectorRegex = new Regex(NumbersDefinitions.ConnectorRegex, RegexFlags); } @@ -25,18 +25,18 @@ public MergedNumberExtractor(NumberMode mode, NumberOptions options) public sealed override Regex ConnectorRegex { get; set; } - public static MergedNumberExtractor GetInstance( - NumberMode mode = NumberMode.Default, - NumberOptions options = NumberOptions.None) + public static MergedNumberExtractor GetInstance(BaseNumberOptionsConfiguration config) { - var cacheKey = (mode, options); - if (!Instances.ContainsKey(cacheKey)) + + var extractorKey = (config.Mode, config.Options); + + if (!Instances.ContainsKey(extractorKey)) { - var instance = new MergedNumberExtractor(mode, options); - Instances.TryAdd(cacheKey, instance); + var instance = new MergedNumberExtractor(config); + Instances.TryAdd(extractorKey, instance); } - return Instances[cacheKey]; + return Instances[extractorKey]; } } } diff --git a/.NET/Microsoft.Recognizers.Text.Number/English/Extractors/NumberExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/English/Extractors/NumberExtractor.cs index 428f745565..9d04f4d184 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/English/Extractors/NumberExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/English/Extractors/NumberExtractor.cs @@ -2,41 +2,45 @@ using System.Collections.Immutable; using System.Text.RegularExpressions; -using Microsoft.Recognizers.Definitions; using Microsoft.Recognizers.Definitions.English; namespace Microsoft.Recognizers.Text.Number.English { - public class NumberExtractor : BaseNumberExtractor + public class NumberExtractor : CachedNumberExtractor { private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; private static readonly ConcurrentDictionary<(NumberMode, NumberOptions), NumberExtractor> Instances = new ConcurrentDictionary<(NumberMode, NumberOptions), NumberExtractor>(); - private NumberExtractor(NumberMode mode, NumberOptions options) + private readonly string keyPrefix; + + private NumberExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) { + + keyPrefix = string.Intern(ExtractType + "_" + config.Options + "_" + config.Mode + "_" + config.Culture); + NegativeNumberTermsRegex = new Regex(NumbersDefinitions.NegativeNumberTermsRegex + '$', RegexFlags); AmbiguousFractionConnectorsRegex = new Regex(NumbersDefinitions.AmbiguousFractionConnectorsRegex, RegexFlags); RelativeReferenceRegex = new Regex(NumbersDefinitions.RelativeOrdinalRegex, RegexFlags); - Options = options; - var builder = ImmutableDictionary.CreateBuilder(); // Add Cardinal CardinalExtractor cardExtract = null; - switch (mode) + switch (config.Mode) { case NumberMode.PureNumber: - cardExtract = CardinalExtractor.GetInstance(NumbersDefinitions.PlaceHolderPureNumber); + var purNumConfig = new BaseNumberOptionsConfiguration(config.Culture, config.Options, config.Mode, + NumbersDefinitions.PlaceHolderPureNumber); + cardExtract = CardinalExtractor.GetInstance(purNumConfig); break; case NumberMode.Currency: - builder.Add( - BaseNumberExtractor.CurrencyRegex, - RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX)); + builder.Add(BaseNumberExtractor.CurrencyRegex, + RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX)); break; case NumberMode.Unit: break; @@ -46,13 +50,13 @@ private NumberExtractor(NumberMode mode, NumberOptions options) if (cardExtract == null) { - cardExtract = CardinalExtractor.GetInstance(); + cardExtract = CardinalExtractor.GetInstance(config); } builder.AddRange(cardExtract.Regexes); // Add Fraction - var fracExtract = FractionExtractor.GetInstance(mode, Options); + var fracExtract = FractionExtractor.GetInstance(config); builder.AddRange(fracExtract.Regexes); Regexes = builder.ToImmutable(); @@ -60,7 +64,7 @@ private NumberExtractor(NumberMode mode, NumberOptions options) var ambiguityBuilder = ImmutableDictionary.CreateBuilder(); // Do not filter the ambiguous number cases like 'that one' in NumberWithUnit, otherwise they can't be resolved. - if (mode != NumberMode.Unit) + if (config.Mode != NumberMode.Unit) { foreach (var item in NumbersDefinitions.AmbiguityFiltersDict) { @@ -75,8 +79,6 @@ private NumberExtractor(NumberMode mode, NumberOptions options) protected sealed override ImmutableDictionary AmbiguityFiltersDict { get; } - protected sealed override NumberOptions Options { get; } - protected sealed override string ExtractType { get; } = Constants.SYS_NUM; // "Number"; protected sealed override Regex NegativeNumberTermsRegex { get; } @@ -85,16 +87,23 @@ private NumberExtractor(NumberMode mode, NumberOptions options) protected sealed override Regex RelativeReferenceRegex { get; } - public static NumberExtractor GetInstance(NumberMode mode = NumberMode.Default, NumberOptions options = NumberOptions.None) + public static NumberExtractor GetInstance(BaseNumberOptionsConfiguration config) { - var cacheKey = (mode, options); - if (!Instances.ContainsKey(cacheKey)) + var extractorKey = (config.Mode, config.Options); + + if (!Instances.ContainsKey(extractorKey)) { - var instance = new NumberExtractor(mode, options); - Instances.TryAdd(cacheKey, instance); + var instance = new NumberExtractor(config); + Instances.TryAdd(extractorKey, instance); } - return Instances[cacheKey]; + return Instances[extractorKey]; } + + protected override object GenKey(string input) + { + return (keyPrefix, input); + } + } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/English/Extractors/NumberRangeExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/English/Extractors/NumberRangeExtractor.cs index 9a65273485..9a23472362 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/English/Extractors/NumberRangeExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/English/Extractors/NumberRangeExtractor.cs @@ -12,11 +12,12 @@ public class NumberRangeExtractor : BaseNumberRangeExtractor public NumberRangeExtractor(INumberOptionsConfiguration config) : base( - NumberExtractor.GetInstance(), - OrdinalExtractor.GetInstance(), - new BaseNumberParser(new EnglishNumberParserConfiguration(config)), - config) + NumberExtractor.GetInstance(new BaseNumberOptionsConfiguration(config.Culture, config.Options)), + OrdinalExtractor.GetInstance(new BaseNumberOptionsConfiguration(config.Culture, config.Options)), + new BaseNumberParser(new EnglishNumberParserConfiguration(config)), + config) { + var regexes = new Dictionary() { { diff --git a/.NET/Microsoft.Recognizers.Text.Number/English/Extractors/OrdinalExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/English/Extractors/OrdinalExtractor.cs index abde8c3b84..12360b28ec 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/English/Extractors/OrdinalExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/English/Extractors/OrdinalExtractor.cs @@ -7,7 +7,7 @@ namespace Microsoft.Recognizers.Text.Number.English { - public class OrdinalExtractor : BaseNumberExtractor + public class OrdinalExtractor : CachedNumberExtractor { private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; @@ -15,9 +15,14 @@ public class OrdinalExtractor : BaseNumberExtractor private static readonly ConcurrentDictionary Instances = new ConcurrentDictionary(); - private OrdinalExtractor(NumberOptions options) - : base(options) + private readonly string keyPrefix; + + private OrdinalExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) { + + keyPrefix = string.Intern(ExtractType + "_" + config.Options.ToString() + "_" + config.Culture); + AmbiguousFractionConnectorsRegex = new Regex(NumbersDefinitions.AmbiguousFractionConnectorsRegex, RegexFlags); RelativeReferenceRegex = new Regex(NumbersDefinitions.RelativeOrdinalRegex, RegexFlags); @@ -53,16 +58,23 @@ private OrdinalExtractor(NumberOptions options) protected sealed override Regex RelativeReferenceRegex { get; } - public static OrdinalExtractor GetInstance(NumberOptions options = NumberOptions.None) + public static OrdinalExtractor GetInstance(BaseNumberOptionsConfiguration config) { - var cacheKey = options.ToString(); - if (!Instances.ContainsKey(cacheKey)) + var extractorKey = config.Options.ToString(); + + if (!Instances.ContainsKey(extractorKey)) { - var instance = new OrdinalExtractor(options); - Instances.TryAdd(cacheKey, instance); + var instance = new OrdinalExtractor(config); + Instances.TryAdd(extractorKey, instance); } - return Instances[cacheKey]; + return Instances[extractorKey]; } + + protected override object GenKey(string input) + { + return (keyPrefix, input); + } + } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/English/Extractors/PercentageExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/English/Extractors/PercentageExtractor.cs index 779c73fe8a..73797ffe89 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/English/Extractors/PercentageExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/English/Extractors/PercentageExtractor.cs @@ -8,10 +8,10 @@ namespace Microsoft.Recognizers.Text.Number.English { public sealed class PercentageExtractor : BasePercentageExtractor { - public PercentageExtractor(NumberOptions options = NumberOptions.None) - : base(NumberExtractor.GetInstance(options: options)) + public PercentageExtractor(BaseNumberOptionsConfiguration config) + : base(NumberExtractor.GetInstance(config)) { - Options = options; + Options = config.Options; Regexes = InitRegexes(); } @@ -19,7 +19,7 @@ public PercentageExtractor(NumberOptions options = NumberOptions.None) protected override ImmutableHashSet InitRegexes() { - HashSet regexStrs = new HashSet + HashSet regexStrings = new HashSet { NumbersDefinitions.NumberWithSuffixPercentage, NumbersDefinitions.NumberWithPrefixPercentage, @@ -27,11 +27,11 @@ protected override ImmutableHashSet InitRegexes() if ((Options & NumberOptions.PercentageMode) != 0) { - regexStrs.Add(NumbersDefinitions.FractionNumberWithSuffixPercentage); - regexStrs.Add(NumbersDefinitions.NumberWithPrepositionPercentage); + regexStrings.Add(NumbersDefinitions.FractionNumberWithSuffixPercentage); + regexStrings.Add(NumbersDefinitions.NumberWithPrepositionPercentage); } - return BuildRegexes(regexStrs); + return BuildRegexes(regexStrings); } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/English/Parsers/EnglishNumberParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Number/English/Parsers/EnglishNumberParserConfiguration.cs index 2c9494b0ec..6fa3e0cdb7 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/English/Parsers/EnglishNumberParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/English/Parsers/EnglishNumberParserConfiguration.cs @@ -15,7 +15,7 @@ public EnglishNumberParserConfiguration(INumberOptionsConfiguration config) { this.Config = config; - this.LangMarker = NumbersDefinitions.LangMarker; + this.LanguageMarker = NumbersDefinitions.LangMarker; // @TODO Temporary workaround var culture = config.Culture; diff --git a/.NET/Microsoft.Recognizers.Text.Number/English/Parsers/EnglishNumberRangeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Number/English/Parsers/EnglishNumberRangeParserConfiguration.cs index 469402ab5f..fdc87de642 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/English/Parsers/EnglishNumberRangeParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/English/Parsers/EnglishNumberRangeParserConfiguration.cs @@ -6,14 +6,17 @@ namespace Microsoft.Recognizers.Text.Number.English { public class EnglishNumberRangeParserConfiguration : INumberRangeParserConfiguration { + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; public EnglishNumberRangeParserConfiguration(INumberOptionsConfiguration config) { CultureInfo = new CultureInfo(config.Culture); - NumberExtractor = English.NumberExtractor.GetInstance(); - OrdinalExtractor = English.OrdinalExtractor.GetInstance(); + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, config.Options); + + NumberExtractor = English.NumberExtractor.GetInstance(numConfig); + OrdinalExtractor = English.OrdinalExtractor.GetInstance(numConfig); NumberParser = new BaseNumberParser(new EnglishNumberParserConfiguration(config)); MoreOrEqual = new Regex(NumbersDefinitions.MoreOrEqual, RegexFlags); diff --git a/.NET/Microsoft.Recognizers.Text.Number/Extractors/BaseMergedNumberExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Extractors/BaseMergedNumberExtractor.cs index 5249bf4a78..bff7b18fc1 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Extractors/BaseMergedNumberExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Extractors/BaseMergedNumberExtractor.cs @@ -71,7 +71,9 @@ public List Extract(string source) { if (idx == 0 || groups[idx] != groups[idx - 1]) { - var tmpExtractResult = ers[idx]; + + var tmpExtractResult = ers[idx].Clone(); + tmpExtractResult.Data = new List { new ExtractResult diff --git a/.NET/Microsoft.Recognizers.Text.Number/Extractors/BaseNumberExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Extractors/BaseNumberExtractor.cs index e26aae4c02..372ed574e6 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Extractors/BaseNumberExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Extractors/BaseNumberExtractor.cs @@ -5,6 +5,7 @@ using System.Text.RegularExpressions; using Microsoft.Recognizers.Definitions; +using Microsoft.Recognizers.Text.InternalCache; namespace Microsoft.Recognizers.Text.Number { @@ -13,19 +14,21 @@ public abstract class BaseNumberExtractor : IExtractor public static readonly Regex CurrencyRegex = new Regex(BaseNumbers.CurrencyRegex, RegexOptions.Singleline | RegexOptions.ExplicitCapture); + protected static readonly ResultsCache ResultsCache = new ResultsCache(4); + protected BaseNumberExtractor(NumberOptions options = NumberOptions.None) { Options = options; } + public virtual NumberOptions Options { get; } = NumberOptions.None; + internal abstract ImmutableDictionary Regexes { get; } protected virtual ImmutableDictionary AmbiguityFiltersDict { get; } = null; protected virtual string ExtractType { get; } = string.Empty; - protected virtual NumberOptions Options { get; } = NumberOptions.None; - protected virtual Regex NegativeNumberTermsRegex { get; } = null; protected virtual Regex AmbiguousFractionConnectorsRegex { get; } = null; @@ -158,7 +161,8 @@ private List FilterAmbiguity(List extractResults, if (regex.Key.IsMatch(extractResult.Text)) { var matches = regex.Value.Matches(text).Cast(); - extractResults = extractResults.Where(er => !matches.Any(m => m.Index < er.Start + er.Length && m.Index + m.Length > er.Start)) + extractResults = extractResults.Where(er => !matches.Any(m => m.Index < er.Start + er.Length && + m.Index + m.Length > er.Start)) .ToList(); } } diff --git a/.NET/Microsoft.Recognizers.Text.Number/Extractors/BaseNumberRangeExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Extractors/BaseNumberRangeExtractor.cs index 2f8dae045d..af5211fd12 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Extractors/BaseNumberRangeExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Extractors/BaseNumberRangeExtractor.cs @@ -99,8 +99,9 @@ public virtual List Extract(string source) { foreach (var result in results) { - if (result.Data.ToString() == NumberRangeConstants.TWONUMBETWEEN || - result.Data.ToString() == NumberRangeConstants.TWONUMTILL) + var data = result.Data.ToString(); + if (data == NumberRangeConstants.TWONUMBETWEEN || + data == NumberRangeConstants.TWONUMTILL) { result.Data = NumberRangeConstants.TWONUMCLOSED; } diff --git a/.NET/Microsoft.Recognizers.Text.Number/Extractors/BasePercentageExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Extractors/BasePercentageExtractor.cs index 9354fa7b6c..fed195a850 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Extractors/BasePercentageExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Extractors/BasePercentageExtractor.cs @@ -16,14 +16,15 @@ public abstract class BasePercentageExtractor : IExtractor private readonly BaseNumberExtractor numberExtractor; - public BasePercentageExtractor(BaseNumberExtractor numberExtractor) + protected BasePercentageExtractor(BaseNumberExtractor numberExtractor) { + this.Options = numberExtractor.Options; this.numberExtractor = numberExtractor; } protected string ExtractType { get; set; } = Constants.SYS_NUM_PERCENTAGE; - protected virtual NumberOptions Options { get; } = NumberOptions.None; + protected virtual NumberOptions Options { get; } protected ImmutableHashSet Regexes { get; set; } @@ -77,6 +78,7 @@ public List Extract(string source) int start = last + 1; int length = i - last; string substr = source.Substring(start, length); + ExtractResult er = new ExtractResult { Start = start, @@ -84,6 +86,7 @@ public List Extract(string source) Text = substr, Type = ExtractType, }; + result.Add(er); } } @@ -102,23 +105,24 @@ public List Extract(string source) /// /// read the rules. /// - /// rule list. + /// rule list. /// . /// Immutable HashSet of regex. - protected static ImmutableHashSet BuildRegexes(HashSet regexStrs, bool ignoreCase = false) + protected static ImmutableHashSet BuildRegexes(HashSet regexStrings, bool ignoreCase = false) { var regexes = new HashSet(); - foreach (var regexStr in regexStrs) + foreach (var regexString in regexStrings) { // var sl = "(?=\\b)(" + regexStr + ")(?=(s?\\b))"; - var options = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + var regexOptions = RegexOptions.Singleline | RegexOptions.ExplicitCapture; + if (ignoreCase) { - options = options | RegexOptions.IgnoreCase; + regexOptions |= RegexOptions.IgnoreCase; } - Regex regex = new Regex(regexStr, options); + Regex regex = new Regex(regexString, regexOptions); regexes.Add(regex); } diff --git a/.NET/Microsoft.Recognizers.Text.Number/Extractors/CachedNumberExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Extractors/CachedNumberExtractor.cs new file mode 100644 index 0000000000..377698671d --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text.Number/Extractors/CachedNumberExtractor.cs @@ -0,0 +1,35 @@ +using System.Collections.Generic; + +namespace Microsoft.Recognizers.Text.Number +{ + public abstract class CachedNumberExtractor : BaseNumberExtractor + { + + protected CachedNumberExtractor(NumberOptions options = NumberOptions.None) + : base(options) + { + } + + public override List Extract(string source) + { + + List results; + + if ((this.Options & NumberOptions.NoProtoCache) != 0) + { + results = base.Extract(source); + } + else + { + var key = GenKey(source); + + results = ResultsCache.GetOrCreate(key, () => base.Extract(source)); + } + + return results; + } + + protected abstract object GenKey(string input); + + } +} diff --git a/.NET/Microsoft.Recognizers.Text.Number/French/Extractors/FractionExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/French/Extractors/FractionExtractor.cs index 756e4202df..bf637c1a30 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/French/Extractors/FractionExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/French/Extractors/FractionExtractor.cs @@ -50,9 +50,9 @@ private FractionExtractor(NumberMode mode, NumberOptions options) Regexes = regexes.ToImmutableDictionary(); } - internal sealed override ImmutableDictionary Regexes { get; } + public sealed override NumberOptions Options { get; } - protected sealed override NumberOptions Options { get; } + internal sealed override ImmutableDictionary Regexes { get; } protected sealed override string ExtractType { get; } = Constants.SYS_NUM_FRACTION; // "Fraction"; diff --git a/.NET/Microsoft.Recognizers.Text.Number/French/Extractors/NumberExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/French/Extractors/NumberExtractor.cs index 3601c684c6..b451dcb5a2 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/French/Extractors/NumberExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/French/Extractors/NumberExtractor.cs @@ -61,12 +61,12 @@ private NumberExtractor(NumberMode mode, NumberOptions options) AmbiguityFiltersDict = ambiguityBuilder.ToImmutable(); } + public sealed override NumberOptions Options { get; } + internal sealed override ImmutableDictionary Regexes { get; } protected sealed override ImmutableDictionary AmbiguityFiltersDict { get; } - protected sealed override NumberOptions Options { get; } - protected sealed override string ExtractType { get; } = Constants.SYS_NUM; public static NumberExtractor GetInstance( diff --git a/.NET/Microsoft.Recognizers.Text.Number/French/Parsers/FrenchNumberParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Number/French/Parsers/FrenchNumberParserConfiguration.cs index c9385a57d4..4438ee56df 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/French/Parsers/FrenchNumberParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/French/Parsers/FrenchNumberParserConfiguration.cs @@ -17,7 +17,7 @@ public FrenchNumberParserConfiguration(INumberOptionsConfiguration config) { this.Config = config; - this.LangMarker = NumbersDefinitions.LangMarker; + this.LanguageMarker = NumbersDefinitions.LangMarker; this.CultureInfo = new CultureInfo(config.Culture); this.IsCompoundNumberLanguage = NumbersDefinitions.CompoundNumberLanguage; diff --git a/.NET/Microsoft.Recognizers.Text.Number/German/Parsers/GermanNumberParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Number/German/Parsers/GermanNumberParserConfiguration.cs index d0cdc00350..eca56ba511 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/German/Parsers/GermanNumberParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/German/Parsers/GermanNumberParserConfiguration.cs @@ -17,7 +17,7 @@ public GermanNumberParserConfiguration(INumberOptionsConfiguration config) { this.Config = config; - this.LangMarker = NumbersDefinitions.LangMarker; + this.LanguageMarker = NumbersDefinitions.LangMarker; this.CultureInfo = new CultureInfo(config.Culture); this.IsCompoundNumberLanguage = NumbersDefinitions.CompoundNumberLanguage; diff --git a/.NET/Microsoft.Recognizers.Text.Number/Hindi/Extractors/FractionExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Hindi/Extractors/FractionExtractor.cs index 7ea34d2ffb..1efff0d172 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Hindi/Extractors/FractionExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Hindi/Extractors/FractionExtractor.cs @@ -60,9 +60,9 @@ private FractionExtractor(NumberOptions options) Regexes = regexes.ToImmutableDictionary(); } - internal sealed override ImmutableDictionary Regexes { get; } + public sealed override NumberOptions Options { get; } - protected sealed override NumberOptions Options { get; } + internal sealed override ImmutableDictionary Regexes { get; } protected sealed override string ExtractType { get; } = Constants.SYS_NUM_FRACTION; // "Fraction"; diff --git a/.NET/Microsoft.Recognizers.Text.Number/Hindi/Extractors/NumberExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Hindi/Extractors/NumberExtractor.cs index 1704a3632b..354f5ca4d7 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Hindi/Extractors/NumberExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Hindi/Extractors/NumberExtractor.cs @@ -69,12 +69,12 @@ private NumberExtractor(NumberMode mode, NumberOptions options) AmbiguityFiltersDict = ambiguityBuilder.ToImmutable(); } + public sealed override NumberOptions Options { get; } + internal sealed override ImmutableDictionary Regexes { get; } protected sealed override ImmutableDictionary AmbiguityFiltersDict { get; } - protected sealed override NumberOptions Options { get; } - protected sealed override string ExtractType { get; } = Constants.SYS_NUM; // "Number"; protected sealed override Regex NegativeNumberTermsRegex { get; } diff --git a/.NET/Microsoft.Recognizers.Text.Number/Hindi/Extractors/OrdinalExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Hindi/Extractors/OrdinalExtractor.cs index 006c52cab7..c25766aba6 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Hindi/Extractors/OrdinalExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Hindi/Extractors/OrdinalExtractor.cs @@ -16,6 +16,7 @@ public class OrdinalExtractor : BaseNumberExtractor private OrdinalExtractor(NumberOptions options) : base(options) { + AmbiguousFractionConnectorsRegex = new Regex(NumbersDefinitions.AmbiguousFractionConnectorsRegex, RegexFlags); RelativeReferenceRegex = new Regex(NumbersDefinitions.RelativeOrdinalRegex, RegexFlags); diff --git a/.NET/Microsoft.Recognizers.Text.Number/Hindi/Parsers/HindiNumberParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Number/Hindi/Parsers/HindiNumberParserConfiguration.cs index 5f295296b2..9cafb5353e 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Hindi/Parsers/HindiNumberParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Hindi/Parsers/HindiNumberParserConfiguration.cs @@ -15,7 +15,7 @@ public HindiNumberParserConfiguration(INumberOptionsConfiguration config) { this.Config = config; - this.LangMarker = NumbersDefinitions.LangMarker; + this.LanguageMarker = NumbersDefinitions.LangMarker; this.CultureInfo = new CultureInfo(config.Culture); this.IsCompoundNumberLanguage = NumbersDefinitions.CompoundNumberLanguage; diff --git a/.NET/Microsoft.Recognizers.Text.Number/Hindi/Parsers/HindiNumberRangeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Number/Hindi/Parsers/HindiNumberRangeParserConfiguration.cs index 54a605d66b..302f9d9d27 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Hindi/Parsers/HindiNumberRangeParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Hindi/Parsers/HindiNumberRangeParserConfiguration.cs @@ -6,14 +6,17 @@ namespace Microsoft.Recognizers.Text.Number.Hindi { public class HindiNumberRangeParserConfiguration : INumberRangeParserConfiguration { + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; public HindiNumberRangeParserConfiguration(INumberOptionsConfiguration config) { CultureInfo = new CultureInfo(config.Culture); - NumberExtractor = English.NumberExtractor.GetInstance(); - OrdinalExtractor = English.OrdinalExtractor.GetInstance(); + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, config.Options); + + NumberExtractor = English.NumberExtractor.GetInstance(numConfig); + OrdinalExtractor = English.OrdinalExtractor.GetInstance(numConfig); NumberParser = new BaseNumberParser(new HindiNumberParserConfiguration(config)); MoreOrEqual = new Regex(NumbersDefinitions.MoreOrEqual, RegexFlags); diff --git a/.NET/Microsoft.Recognizers.Text.Number/Italian/Extractors/FractionExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Italian/Extractors/FractionExtractor.cs index fb2a5173b3..3de3c433f9 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Italian/Extractors/FractionExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Italian/Extractors/FractionExtractor.cs @@ -50,9 +50,9 @@ private FractionExtractor(NumberMode mode, NumberOptions options) Regexes = regexes.ToImmutableDictionary(); } - internal sealed override ImmutableDictionary Regexes { get; } + public sealed override NumberOptions Options { get; } - protected sealed override NumberOptions Options { get; } + internal sealed override ImmutableDictionary Regexes { get; } protected sealed override string ExtractType { get; } = Constants.SYS_NUM_FRACTION; // "Fraction"; diff --git a/.NET/Microsoft.Recognizers.Text.Number/Italian/Extractors/NumberExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Italian/Extractors/NumberExtractor.cs index e3dc95a276..7a28dd284a 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Italian/Extractors/NumberExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Italian/Extractors/NumberExtractor.cs @@ -64,12 +64,12 @@ private NumberExtractor(NumberMode mode, NumberOptions options) AmbiguityFiltersDict = ambiguityBuilder.ToImmutable(); } + public sealed override NumberOptions Options { get; } + internal sealed override ImmutableDictionary Regexes { get; } protected sealed override ImmutableDictionary AmbiguityFiltersDict { get; } - protected sealed override NumberOptions Options { get; } - protected sealed override string ExtractType { get; } = Constants.SYS_NUM; public static NumberExtractor GetInstance(NumberMode mode = NumberMode.Default, NumberOptions options = NumberOptions.None) diff --git a/.NET/Microsoft.Recognizers.Text.Number/Italian/Parsers/ItalianNumberParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Number/Italian/Parsers/ItalianNumberParserConfiguration.cs index 5c24aa5940..8bb9c5faee 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Italian/Parsers/ItalianNumberParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Italian/Parsers/ItalianNumberParserConfiguration.cs @@ -18,7 +18,7 @@ public ItalianNumberParserConfiguration(INumberOptionsConfiguration config) { this.Config = config; - this.LangMarker = NumbersDefinitions.LangMarker; + this.LanguageMarker = NumbersDefinitions.LangMarker; this.CultureInfo = new CultureInfo(config.Culture); this.IsCompoundNumberLanguage = NumbersDefinitions.CompoundNumberLanguage; diff --git a/.NET/Microsoft.Recognizers.Text.Number/Japanese/Parsers/JapaneseNumberParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Number/Japanese/Parsers/JapaneseNumberParserConfiguration.cs index 476b1de9e9..1c6f7334ce 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Japanese/Parsers/JapaneseNumberParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Japanese/Parsers/JapaneseNumberParserConfiguration.cs @@ -17,7 +17,7 @@ public JapaneseNumberParserConfiguration(INumberOptionsConfiguration config) { this.Config = config; - this.LangMarker = NumbersDefinitions.LangMarker; + this.LanguageMarker = NumbersDefinitions.LangMarker; this.CultureInfo = new CultureInfo(config.Culture); this.IsCompoundNumberLanguage = NumbersDefinitions.CompoundNumberLanguage; diff --git a/.NET/Microsoft.Recognizers.Text.Number/Korean/Parsers/KoreanNumberParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Number/Korean/Parsers/KoreanNumberParserConfiguration.cs index f83a852d27..d8ed25045b 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Korean/Parsers/KoreanNumberParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Korean/Parsers/KoreanNumberParserConfiguration.cs @@ -16,7 +16,7 @@ public KoreanNumberParserConfiguration(INumberOptionsConfiguration config) { this.Config = config; - this.LangMarker = NumbersDefinitions.LangMarker; + this.LanguageMarker = NumbersDefinitions.LangMarker; this.CultureInfo = new CultureInfo(config.Culture); this.IsCompoundNumberLanguage = NumbersDefinitions.CompoundNumberLanguage; diff --git a/.NET/Microsoft.Recognizers.Text.Number/Microsoft.Recognizers.Text.Number.csproj b/.NET/Microsoft.Recognizers.Text.Number/Microsoft.Recognizers.Text.Number.csproj index 58e52730a4..2e0a6fb61d 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Microsoft.Recognizers.Text.Number.csproj +++ b/.NET/Microsoft.Recognizers.Text.Number/Microsoft.Recognizers.Text.Number.csproj @@ -1,7 +1,7 @@  - netstandard2.0;net462;net452;net45 + netstandard2.0;net462 false false @@ -18,7 +18,7 @@ --> $(OutputPath)$(AssemblyName).xml $(NoWarn),1573,1591,1712 - + diff --git a/.NET/Microsoft.Recognizers.Text.Number/Microsoft.Recognizers.Text.Number.nuspec b/.NET/Microsoft.Recognizers.Text.Number/Microsoft.Recognizers.Text.Number.nuspec index a8d58f77d3..af0a4d995e 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Microsoft.Recognizers.Text.Number.nuspec +++ b/.NET/Microsoft.Recognizers.Text.Number/Microsoft.Recognizers.Text.Number.nuspec @@ -19,8 +19,6 @@ - - diff --git a/.NET/Microsoft.Recognizers.Text.Number/Microsoft.Recognizers.Text.Number.xml b/.NET/Microsoft.Recognizers.Text.Number/Microsoft.Recognizers.Text.Number.xml index 1824f6b8fd..ad0765698d 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Microsoft.Recognizers.Text.Number.xml +++ b/.NET/Microsoft.Recognizers.Text.Number/Microsoft.Recognizers.Text.Number.xml @@ -32,7 +32,7 @@ read the rules. - rule list. + rule list. . Immutable HashSet of regex. @@ -100,6 +100,11 @@ PercentageMode + + + NoProtoCache + + SuppressExtendedTypes, mode that skips extraction of extra types not in v1. May be removed later. diff --git a/.NET/Microsoft.Recognizers.Text.Number/NumberOptions.cs b/.NET/Microsoft.Recognizers.Text.Number/NumberOptions.cs index 07ed99c218..32a8b095c8 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/NumberOptions.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/NumberOptions.cs @@ -15,6 +15,11 @@ public enum NumberOptions /// PercentageMode = 1, + /// + /// NoProtoCache + /// + NoProtoCache = 16, + /// /// SuppressExtendedTypes, mode that skips extraction of extra types not in v1. May be removed later. /// diff --git a/.NET/Microsoft.Recognizers.Text.Number/NumberRecognizer.cs b/.NET/Microsoft.Recognizers.Text.Number/NumberRecognizer.cs index 8fa2ff8508..84d4c6dc06 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/NumberRecognizer.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/NumberRecognizer.cs @@ -80,26 +80,27 @@ public NumberRangeModel GetNumberRangeModel(string culture = null, bool fallback protected override void InitializeConfiguration() { + RegisterModel( Culture.English, options => new NumberModel( AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, new EnglishNumberParserConfiguration( new BaseNumberOptionsConfiguration(Culture.English, options))), - English.MergedNumberExtractor.GetInstance(NumberMode.PureNumber, options))); + English.MergedNumberExtractor.GetInstance(new BaseNumberOptionsConfiguration(Culture.English, options, NumberMode.PureNumber)))); RegisterModel( Culture.English, options => new OrdinalModel( AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Ordinal, new EnglishNumberParserConfiguration( new BaseNumberOptionsConfiguration(Culture.English, options))), - English.OrdinalExtractor.GetInstance(options))); + English.OrdinalExtractor.GetInstance(new BaseNumberOptionsConfiguration(Culture.English, options)))); RegisterModel( Culture.English, options => new PercentModel( AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Percentage, new EnglishNumberParserConfiguration( new BaseNumberOptionsConfiguration(Culture.English, options))), - new English.PercentageExtractor(options))); + new English.PercentageExtractor(new BaseNumberOptionsConfiguration(Culture.English, options)))); RegisterModel( Culture.English, @@ -113,21 +114,21 @@ protected override void InitializeConfiguration() (options) => new NumberModel( AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, new ChineseNumberParserConfiguration( new BaseNumberOptionsConfiguration(Culture.Chinese, options))), - new Chinese.NumberExtractor())); + new Chinese.NumberExtractor(new BaseNumberOptionsConfiguration(Culture.Chinese, options)))); RegisterModel( Culture.Chinese, (options) => new OrdinalModel( AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Ordinal, new ChineseNumberParserConfiguration( new BaseNumberOptionsConfiguration(Culture.Chinese, options))), - new Chinese.OrdinalExtractor())); + new Chinese.OrdinalExtractor(new BaseNumberOptionsConfiguration(Culture.Chinese, options)))); RegisterModel( Culture.Chinese, (options) => new PercentModel( AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Percentage, new ChineseNumberParserConfiguration( new BaseNumberOptionsConfiguration(Culture.Chinese, options))), - new Chinese.PercentageExtractor())); + new Chinese.PercentageExtractor(new BaseNumberOptionsConfiguration(Culture.Chinese, options)))); RegisterModel( Culture.Chinese, @@ -141,21 +142,21 @@ protected override void InitializeConfiguration() (options) => new NumberModel( AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, new SpanishNumberParserConfiguration( new BaseNumberOptionsConfiguration(Culture.Spanish, options))), - Spanish.NumberExtractor.GetInstance(NumberMode.PureNumber, options))); + Spanish.NumberExtractor.GetInstance(new BaseNumberOptionsConfiguration(Culture.Spanish, options, NumberMode.PureNumber)))); RegisterModel( Culture.Spanish, (options) => new OrdinalModel( AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Ordinal, new SpanishNumberParserConfiguration( new BaseNumberOptionsConfiguration(Culture.Spanish, options))), - Spanish.OrdinalExtractor.GetInstance())); + Spanish.OrdinalExtractor.GetInstance(new BaseNumberOptionsConfiguration(Culture.Spanish, options)))); RegisterModel( Culture.Spanish, (options) => new PercentModel( AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Percentage, new SpanishNumberParserConfiguration( new BaseNumberOptionsConfiguration(Culture.Spanish, options))), - new Spanish.PercentageExtractor())); + new Spanish.PercentageExtractor(new BaseNumberOptionsConfiguration(Culture.Spanish, options)))); RegisterModel( Culture.Spanish, @@ -169,21 +170,21 @@ protected override void InitializeConfiguration() (options) => new NumberModel( AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, new PortugueseNumberParserConfiguration( new BaseNumberOptionsConfiguration(Culture.Portuguese, options))), - Portuguese.NumberExtractor.GetInstance(NumberMode.PureNumber, options))); + Portuguese.NumberExtractor.GetInstance(new BaseNumberOptionsConfiguration(Culture.Portuguese, options, NumberMode.PureNumber)))); RegisterModel( Culture.Portuguese, (options) => new OrdinalModel( AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Ordinal, new PortugueseNumberParserConfiguration( new BaseNumberOptionsConfiguration(Culture.Portuguese, options))), - Portuguese.OrdinalExtractor.GetInstance())); + Portuguese.OrdinalExtractor.GetInstance(new BaseNumberOptionsConfiguration(Culture.Portuguese, options)))); RegisterModel( Culture.Portuguese, (options) => new PercentModel( AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Percentage, new PortugueseNumberParserConfiguration( new BaseNumberOptionsConfiguration(Culture.Portuguese, options))), - new Portuguese.PercentageExtractor())); + new Portuguese.PercentageExtractor(new BaseNumberOptionsConfiguration(Culture.Portuguese, options)))); RegisterModel( Culture.French, @@ -297,21 +298,21 @@ protected override void InitializeConfiguration() (options) => new NumberModel( AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, new DutchNumberParserConfiguration( new BaseNumberOptionsConfiguration(Culture.Dutch, options))), - Dutch.NumberExtractor.GetInstance(NumberMode.PureNumber))); + Dutch.NumberExtractor.GetInstance(new BaseNumberOptionsConfiguration(Culture.Dutch, options, NumberMode.PureNumber)))); RegisterModel( Culture.Dutch, (options) => new OrdinalModel( AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Ordinal, new DutchNumberParserConfiguration( new BaseNumberOptionsConfiguration(Culture.Dutch, options))), - Dutch.OrdinalExtractor.GetInstance())); + Dutch.OrdinalExtractor.GetInstance(new BaseNumberOptionsConfiguration(Culture.Dutch, options)))); RegisterModel( Culture.Dutch, (options) => new PercentModel( AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Percentage, new DutchNumberParserConfiguration( new BaseNumberOptionsConfiguration(Culture.Dutch, options))), - new Dutch.PercentageExtractor(options))); + new Dutch.PercentageExtractor(new BaseNumberOptionsConfiguration(Culture.Dutch, options)))); // When registering NumberRangeModel, enable TestNumber_Dutch -> NumberRangeModel tests /* diff --git a/.NET/Microsoft.Recognizers.Text.Number/Parsers/BaseCJKNumberParser.cs b/.NET/Microsoft.Recognizers.Text.Number/Parsers/BaseCJKNumberParser.cs index 205a7154fd..57153a6461 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Parsers/BaseCJKNumberParser.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Parsers/BaseCJKNumberParser.cs @@ -1,4 +1,5 @@ using System; +using System.Globalization; using System.Linq; using System.Text; using System.Text.RegularExpressions; @@ -135,7 +136,7 @@ protected ParseResult ParseFraction(ExtractResult extResult) } else { - intPart = Config.ZeroChar.ToString(); + intPart = Config.ZeroChar.ToString(CultureInfo.InvariantCulture); demoPart = splitResult[0]; numPart = splitResult[1]; } @@ -298,7 +299,7 @@ protected ParseResult ParsePercentage(ExtractResult extResult) var splitResult = Config.PointRegex.Split(doubleText); if (splitResult[0] == string.Empty) { - splitResult[0] = Config.ZeroChar.ToString(); + splitResult[0] = Config.ZeroChar.ToString(CultureInfo.InvariantCulture); } var doubleValue = GetIntValue(splitResult[0]); @@ -371,7 +372,7 @@ protected ParseResult ParseDouble(ExtractResult extResult) if (splitResult[0] == string.Empty) { - splitResult[0] = Config.ZeroChar.ToString(); + splitResult[0] = Config.ZeroChar.ToString(CultureInfo.InvariantCulture); } if (Config.NegativeNumberSignRegex.IsMatch(splitResult[0])) @@ -441,7 +442,7 @@ private double GetDigitValue(string intStr, double power) return intValue; } - // Replace full digtal numbers with half digtal numbers. "4" and "4" are both legal in Japanese, replace "4" with "4", then deal with "4" + // Replace full digit numbers with half digit numbers. "4" and "4" are both legal in Japanese, replace "4" with "4", then deal with "4" private string NormalizeCharWidth(string text) { if (string.IsNullOrWhiteSpace(text)) diff --git a/.NET/Microsoft.Recognizers.Text.Number/Parsers/BaseIndianNumberParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Number/Parsers/BaseIndianNumberParserConfiguration.cs index 8cd0ebb253..fc11cb918e 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Parsers/BaseIndianNumberParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Parsers/BaseIndianNumberParserConfiguration.cs @@ -40,7 +40,7 @@ public class BaseIndianNumberParserConfiguration : IIndianNumberParserConfigurat public string HalfADozenText { get; set; } - public string LangMarker { get; set; } + public string LanguageMarker { get; set; } public char NonDecimalSeparatorChar { get; set; } diff --git a/.NET/Microsoft.Recognizers.Text.Number/Parsers/BaseNumberParser.cs b/.NET/Microsoft.Recognizers.Text.Number/Parsers/BaseNumberParser.cs index 3906305cfa..3d93236543 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Parsers/BaseNumberParser.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Parsers/BaseNumberParser.cs @@ -53,7 +53,7 @@ public virtual ParseResult Parse(ExtractResult extResult) if (!(extResult.Data is string extra)) { - extra = LongFormatRegex.Match(extResult.Text).Success ? Constants.NUMBER_SUFFIX : Config.LangMarker; + extra = LongFormatRegex.Match(extResult.Text).Success ? Constants.NUMBER_SUFFIX : Config.LanguageMarker; } // Resolve symbol prefix @@ -108,12 +108,12 @@ public virtual ParseResult Parse(ExtractResult extResult) { ret = DigitNumberParse(extResult); } - else if (extra.Contains($"{Constants.FRACTION_PREFIX}{Config.LangMarker}")) + else if (extra.Contains($"{Constants.FRACTION_PREFIX}{Config.LanguageMarker}")) { // Such fractions are special cases, parse via another method ret = FracLikeNumberParse(extResult); } - else if (extra.Contains(Config.LangMarker)) + else if (extra.Contains(Config.LanguageMarker)) { ret = TextNumberParse(extResult); } diff --git a/.NET/Microsoft.Recognizers.Text.Number/Parsers/BasePercentageParser.cs b/.NET/Microsoft.Recognizers.Text.Number/Parsers/BasePercentageParser.cs index e0ead79be8..2c52469d23 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Parsers/BasePercentageParser.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Parsers/BasePercentageParser.cs @@ -21,7 +21,7 @@ public override ParseResult Parse(ExtractResult extResult) { // for case like "2 out of 5". extResult.Text = $"{extendedData1[0].Item1} {Config.FractionMarkerToken} {extendedData1[1].Item1}"; - extResult.Data = $"Frac{Config.LangMarker}"; + extResult.Data = $"Frac{Config.LanguageMarker}"; ret = base.Parse(extResult); ret.Value = (double)ret.Value * 100; diff --git a/.NET/Microsoft.Recognizers.Text.Number/Parsers/INumberParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Number/Parsers/INumberParserConfiguration.cs index 0d805b424f..8a4992209c 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Parsers/INumberParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Parsers/INumberParserConfiguration.cs @@ -8,6 +8,8 @@ namespace Microsoft.Recognizers.Text.Number { public interface INumberParserConfiguration { + string LanguageMarker { get; } + ImmutableDictionary CardinalNumberMap { get; } ImmutableDictionary OrdinalNumberMap { get; } @@ -34,8 +36,6 @@ public interface INumberParserConfiguration string HalfADozenText { get; } - string LangMarker { get; } - char NonDecimalSeparatorChar { get; } char DecimalSeparatorChar { get; } @@ -100,7 +100,7 @@ public class BaseNumberParserConfiguration : INumberParserConfiguration public string HalfADozenText { get; set; } - public string LangMarker { get; set; } + public string LanguageMarker { get; set; } public char NonDecimalSeparatorChar { get; set; } diff --git a/.NET/Microsoft.Recognizers.Text.Number/Portuguese/Extractors/CardinalExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Portuguese/Extractors/CardinalExtractor.cs index 781328a560..efac648a22 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Portuguese/Extractors/CardinalExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Portuguese/Extractors/CardinalExtractor.cs @@ -6,21 +6,27 @@ namespace Microsoft.Recognizers.Text.Number.Portuguese { - public class CardinalExtractor : BaseNumberExtractor // Same as Spanish. + public class CardinalExtractor : CachedNumberExtractor { private static readonly ConcurrentDictionary Instances = new ConcurrentDictionary(); - private CardinalExtractor(string placeholder = NumbersDefinitions.PlaceHolderDefault) + private readonly string keyPrefix; + + private CardinalExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) { + + keyPrefix = string.Intern(ExtractType + "_" + config.Options + "_" + config.Placeholder + "_" + config.Culture); + var builder = ImmutableDictionary.CreateBuilder(); // Add Integer Regexes - var intExtract = IntegerExtractor.GetInstance(placeholder); + var intExtract = IntegerExtractor.GetInstance(config); builder.AddRange(intExtract.Regexes); // Add Double Regexes - var douExtract = DoubleExtractor.GetInstance(placeholder); + var douExtract = DoubleExtractor.GetInstance(config); builder.AddRange(douExtract.Regexes); Regexes = builder.ToImmutable(); @@ -30,15 +36,23 @@ private CardinalExtractor(string placeholder = NumbersDefinitions.PlaceHolderDef protected sealed override string ExtractType { get; } = Constants.SYS_NUM_CARDINAL; // "Cardinal"; - public static CardinalExtractor GetInstance(string placeholder = NumbersDefinitions.PlaceHolderDefault) + public static CardinalExtractor GetInstance(BaseNumberOptionsConfiguration config) { - if (!Instances.ContainsKey(placeholder)) + + var extractorKey = config.Placeholder; + + if (!Instances.ContainsKey(extractorKey)) { - var instance = new CardinalExtractor(placeholder); - Instances.TryAdd(placeholder, instance); + var instance = new CardinalExtractor(config); + Instances.TryAdd(extractorKey, instance); } - return Instances[placeholder]; + return Instances[extractorKey]; + } + + protected override object GenKey(string input) + { + return (keyPrefix, input); } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/Portuguese/Extractors/DoubleExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Portuguese/Extractors/DoubleExtractor.cs index d244014f01..38bbf39c37 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Portuguese/Extractors/DoubleExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Portuguese/Extractors/DoubleExtractor.cs @@ -15,16 +15,18 @@ public class DoubleExtractor : BaseNumberExtractor private static readonly ConcurrentDictionary Instances = new ConcurrentDictionary(); - private DoubleExtractor(string placeholder = NumbersDefinitions.PlaceHolderDefault) + private DoubleExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) { + var regexes = new Dictionary { { - new Regex(NumbersDefinitions.DoubleDecimalPointRegex(placeholder), RegexFlags), + new Regex(NumbersDefinitions.DoubleDecimalPointRegex(config.Placeholder), RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.DoubleWithoutIntegralRegex(placeholder), RegexFlags), + new Regex(NumbersDefinitions.DoubleWithoutIntegralRegex(config.Placeholder), RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { @@ -48,11 +50,11 @@ private DoubleExtractor(string placeholder = NumbersDefinitions.PlaceHolderDefau RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.POWER_SUFFIX) }, { - GenerateLongFormatNumberRegexes(LongFormatType.DoubleNumDotComma, placeholder, RegexFlags), + GenerateLongFormatNumberRegexes(LongFormatType.DoubleNumDotComma, config.Placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { - GenerateLongFormatNumberRegexes(LongFormatType.DoubleNumNoBreakSpaceComma, placeholder, RegexFlags), + GenerateLongFormatNumberRegexes(LongFormatType.DoubleNumNoBreakSpaceComma, config.Placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, }; @@ -64,15 +66,18 @@ private DoubleExtractor(string placeholder = NumbersDefinitions.PlaceHolderDefau protected sealed override string ExtractType { get; } = Constants.SYS_NUM_DOUBLE; // "Double"; - public static DoubleExtractor GetInstance(string placeholder = NumbersDefinitions.PlaceHolderDefault) + public static DoubleExtractor GetInstance(BaseNumberOptionsConfiguration config) { - if (!Instances.ContainsKey(placeholder)) + + var extractorKey = config.Placeholder; + + if (!Instances.ContainsKey(extractorKey)) { - var instance = new DoubleExtractor(placeholder); - Instances.TryAdd(placeholder, instance); + var instance = new DoubleExtractor(config); + Instances.TryAdd(extractorKey, instance); } - return Instances[placeholder]; + return Instances[extractorKey]; } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/Portuguese/Extractors/FractionExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Portuguese/Extractors/FractionExtractor.cs index 2ffb3ff55b..dc6d59b989 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Portuguese/Extractors/FractionExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Portuguese/Extractors/FractionExtractor.cs @@ -12,12 +12,12 @@ public class FractionExtractor : BaseNumberExtractor private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - private static readonly ConcurrentDictionary<(NumberMode, NumberOptions, string), FractionExtractor> Instances = - new ConcurrentDictionary<(NumberMode, NumberOptions, string), FractionExtractor>(); + private static readonly ConcurrentDictionary<(NumberMode, NumberOptions), FractionExtractor> Instances = + new ConcurrentDictionary<(NumberMode, NumberOptions), FractionExtractor>(); - private FractionExtractor(NumberMode mode, NumberOptions options) + private FractionExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) { - Options = options; var regexes = new Dictionary { @@ -40,7 +40,7 @@ private FractionExtractor(NumberMode mode, NumberOptions options) }; // Not add FractionPrepositionRegex when the mode is Unit to avoid wrong recognize cases like "$1000 over 3" - if (mode != NumberMode.Unit) + if (config.Mode != NumberMode.Unit) { regexes.Add( new Regex(NumbersDefinitions.FractionPrepositionRegex, RegexFlags), @@ -52,20 +52,19 @@ private FractionExtractor(NumberMode mode, NumberOptions options) internal sealed override ImmutableDictionary Regexes { get; } - protected sealed override NumberOptions Options { get; } - protected sealed override string ExtractType { get; } = Constants.SYS_NUM_FRACTION; // "Fraction"; - public static FractionExtractor GetInstance(NumberMode mode = NumberMode.Default, NumberOptions options = NumberOptions.None, string placeholder = "") + public static FractionExtractor GetInstance(BaseNumberOptionsConfiguration config) { - var cacheKey = (mode, options, placeholder); - if (!Instances.ContainsKey(cacheKey)) + var extractorKey = (config.Mode, config.Options); + + if (!Instances.ContainsKey(extractorKey)) { - var instance = new FractionExtractor(mode, options); - Instances.TryAdd(cacheKey, instance); + var instance = new FractionExtractor(config); + Instances.TryAdd(extractorKey, instance); } - return Instances[cacheKey]; + return Instances[extractorKey]; } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/Portuguese/Extractors/IntegerExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Portuguese/Extractors/IntegerExtractor.cs index 7b55926b7f..0adcd94adb 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Portuguese/Extractors/IntegerExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Portuguese/Extractors/IntegerExtractor.cs @@ -7,7 +7,7 @@ namespace Microsoft.Recognizers.Text.Number.Portuguese { - public class IntegerExtractor : BaseNumberExtractor + public class IntegerExtractor : CachedNumberExtractor { private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; @@ -15,12 +15,18 @@ public class IntegerExtractor : BaseNumberExtractor private static readonly ConcurrentDictionary Instances = new ConcurrentDictionary(); - private IntegerExtractor(string placeholder = NumbersDefinitions.PlaceHolderDefault) + private readonly string keyPrefix; + + private IntegerExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) { + + keyPrefix = string.Intern(ExtractType + "_" + config.Options + "_" + config.Placeholder + "_" + config.Culture); + var regexes = new Dictionary { { - new Regex(NumbersDefinitions.NumbersWithPlaceHolder(placeholder), RegexFlags), + new Regex(NumbersDefinitions.NumbersWithPlaceHolder(config.Placeholder), RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { @@ -28,15 +34,15 @@ private IntegerExtractor(string placeholder = NumbersDefinitions.PlaceHolderDefa RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { - GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumDot, placeholder, RegexFlags), + GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumDot, config.Placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { - GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumBlank, placeholder, RegexFlags), + GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumBlank, config.Placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { - GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumNoBreakSpace, placeholder, RegexFlags), + GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumNoBreakSpace, config.Placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { @@ -68,15 +74,23 @@ private IntegerExtractor(string placeholder = NumbersDefinitions.PlaceHolderDefa protected sealed override string ExtractType { get; } = Constants.SYS_NUM_INTEGER; - public static IntegerExtractor GetInstance(string placeholder = NumbersDefinitions.PlaceHolderDefault) + public static IntegerExtractor GetInstance(BaseNumberOptionsConfiguration config) { - if (!Instances.ContainsKey(placeholder)) + + var extractorKey = config.Placeholder; + + if (!Instances.ContainsKey(extractorKey)) { - var instance = new IntegerExtractor(placeholder); - Instances.TryAdd(placeholder, instance); + var instance = new IntegerExtractor(config); + Instances.TryAdd(extractorKey, instance); } - return Instances[placeholder]; + return Instances[extractorKey]; + } + + protected override object GenKey(string input) + { + return (keyPrefix, input); } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/Portuguese/Extractors/NumberExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Portuguese/Extractors/NumberExtractor.cs index 3402564f81..679c3647b7 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Portuguese/Extractors/NumberExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Portuguese/Extractors/NumberExtractor.cs @@ -2,30 +2,35 @@ using System.Collections.Immutable; using System.Text.RegularExpressions; -using Microsoft.Recognizers.Definitions; using Microsoft.Recognizers.Definitions.Portuguese; namespace Microsoft.Recognizers.Text.Number.Portuguese { - public class NumberExtractor : BaseNumberExtractor + public class NumberExtractor : CachedNumberExtractor { private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; private static readonly ConcurrentDictionary<(NumberMode, NumberOptions), NumberExtractor> Instances = new ConcurrentDictionary<(NumberMode, NumberOptions), NumberExtractor>(); - private NumberExtractor(NumberMode mode = NumberMode.Default, NumberOptions options = NumberOptions.None) + private readonly string keyPrefix; + + private NumberExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) { - Options = options; + + keyPrefix = string.Intern(ExtractType + "_" + config.Options + "_" + config.Mode + "_" + config.Culture); var builder = ImmutableDictionary.CreateBuilder(); // Add Cardinal CardinalExtractor cardExtract = null; - switch (mode) + switch (config.Mode) { case NumberMode.PureNumber: - cardExtract = CardinalExtractor.GetInstance(NumbersDefinitions.PlaceHolderPureNumber); + var purNumConfig = new BaseNumberOptionsConfiguration(config.Culture, config.Options, config.Mode, + NumbersDefinitions.PlaceHolderPureNumber); + cardExtract = CardinalExtractor.GetInstance(purNumConfig); break; case NumberMode.Currency: builder.Add( @@ -38,13 +43,13 @@ private NumberExtractor(NumberMode mode = NumberMode.Default, NumberOptions opti if (cardExtract == null) { - cardExtract = CardinalExtractor.GetInstance(); + cardExtract = CardinalExtractor.GetInstance(config); } builder.AddRange(cardExtract.Regexes); // Add Fraction - var fracExtract = FractionExtractor.GetInstance(mode, Options); + var fracExtract = FractionExtractor.GetInstance(config); builder.AddRange(fracExtract.Regexes); this.Regexes = builder.ToImmutable(); @@ -52,7 +57,7 @@ private NumberExtractor(NumberMode mode = NumberMode.Default, NumberOptions opti var ambiguityBuilder = ImmutableDictionary.CreateBuilder(); // Do not filter the ambiguous number cases like '$2000' in NumberWithUnit, otherwise they can't be resolved. - if (mode != NumberMode.Unit) + if (config.Mode != NumberMode.Unit) { foreach (var item in NumbersDefinitions.AmbiguityFiltersDict) { @@ -63,24 +68,30 @@ private NumberExtractor(NumberMode mode = NumberMode.Default, NumberOptions opti AmbiguityFiltersDict = ambiguityBuilder.ToImmutable(); } + public sealed override NumberOptions Options { get; } + internal sealed override ImmutableDictionary Regexes { get; } protected sealed override ImmutableDictionary AmbiguityFiltersDict { get; } - protected sealed override NumberOptions Options { get; } - protected sealed override string ExtractType { get; } = Constants.SYS_NUM; // "Number"; - public static NumberExtractor GetInstance(NumberMode mode = NumberMode.Default, NumberOptions options = NumberOptions.None) + public static NumberExtractor GetInstance(BaseNumberOptionsConfiguration config) { - var cacheKey = (mode, options); - if (!Instances.ContainsKey(cacheKey)) + var extractorKey = (config.Mode, config.Options); + + if (!Instances.ContainsKey(extractorKey)) { - var instance = new NumberExtractor(mode, options); - Instances.TryAdd(cacheKey, instance); + var instance = new NumberExtractor(config); + Instances.TryAdd(extractorKey, instance); } - return Instances[cacheKey]; + return Instances[extractorKey]; + } + + protected override object GenKey(string input) + { + return (keyPrefix, input); } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/Portuguese/Extractors/OrdinalExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Portuguese/Extractors/OrdinalExtractor.cs index 5fe31bd3c5..d13f333b6a 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Portuguese/Extractors/OrdinalExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Portuguese/Extractors/OrdinalExtractor.cs @@ -7,7 +7,7 @@ namespace Microsoft.Recognizers.Text.Number.Portuguese { - public class OrdinalExtractor : BaseNumberExtractor + public class OrdinalExtractor : CachedNumberExtractor { private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; @@ -15,8 +15,14 @@ public class OrdinalExtractor : BaseNumberExtractor private static readonly ConcurrentDictionary Instances = new ConcurrentDictionary(); - private OrdinalExtractor() + private readonly string keyPrefix; + + private OrdinalExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) { + + keyPrefix = string.Intern(ExtractType + "_" + config.Options.ToString() + "_" + config.Culture); + var regexes = new Dictionary { { @@ -36,15 +42,22 @@ private OrdinalExtractor() protected sealed override string ExtractType { get; } = Constants.SYS_NUM_ORDINAL; // "Ordinal"; - public static OrdinalExtractor GetInstance(string placeholder = "") + public static OrdinalExtractor GetInstance(BaseNumberOptionsConfiguration config) { - if (!Instances.ContainsKey(placeholder)) + var extractorKey = config.Options.ToString(); + + if (!Instances.ContainsKey(extractorKey)) { - var instance = new OrdinalExtractor(); - Instances.TryAdd(placeholder, instance); + var instance = new OrdinalExtractor(config); + Instances.TryAdd(extractorKey, instance); } - return Instances[placeholder]; + return Instances[extractorKey]; + } + + protected override object GenKey(string input) + { + return (keyPrefix, input); } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/Portuguese/Extractors/PercentageExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Portuguese/Extractors/PercentageExtractor.cs index 6bd2423a83..b59913093a 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Portuguese/Extractors/PercentageExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Portuguese/Extractors/PercentageExtractor.cs @@ -8,10 +8,10 @@ namespace Microsoft.Recognizers.Text.Number.Portuguese { public sealed class PercentageExtractor : BasePercentageExtractor { - public PercentageExtractor(NumberOptions options = NumberOptions.None) - : base(NumberExtractor.GetInstance(options: options)) + public PercentageExtractor(BaseNumberOptionsConfiguration config) + : base(NumberExtractor.GetInstance(config)) { - Options = options; + Options = config.Options; Regexes = InitRegexes(); } diff --git a/.NET/Microsoft.Recognizers.Text.Number/Portuguese/Parsers/PortugueseNumberParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Number/Portuguese/Parsers/PortugueseNumberParserConfiguration.cs index 1c4005a11a..53357b64f1 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Portuguese/Parsers/PortugueseNumberParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Portuguese/Parsers/PortugueseNumberParserConfiguration.cs @@ -18,7 +18,7 @@ public PortugueseNumberParserConfiguration(INumberOptionsConfiguration config) { this.Config = config; - this.LangMarker = NumbersDefinitions.LangMarker; + this.LanguageMarker = NumbersDefinitions.LangMarker; this.CultureInfo = new CultureInfo(config.Culture); this.IsCompoundNumberLanguage = NumbersDefinitions.CompoundNumberLanguage; diff --git a/.NET/Microsoft.Recognizers.Text.Number/Spanish/Extractors/CardinalExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Spanish/Extractors/CardinalExtractor.cs index 2bfafb9715..d364839fb5 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Spanish/Extractors/CardinalExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Spanish/Extractors/CardinalExtractor.cs @@ -2,25 +2,29 @@ using System.Collections.Immutable; using System.Text.RegularExpressions; -using Microsoft.Recognizers.Definitions.Spanish; - namespace Microsoft.Recognizers.Text.Number.Spanish { - public class CardinalExtractor : BaseNumberExtractor + public class CardinalExtractor : CachedNumberExtractor { private static readonly ConcurrentDictionary Instances = new ConcurrentDictionary(); - private CardinalExtractor(string placeholder = NumbersDefinitions.PlaceHolderDefault) + private readonly string keyPrefix; + + private CardinalExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) { + + keyPrefix = string.Intern(ExtractType + "_" + config.Options + "_" + config.Placeholder + "_" + config.Culture); + var builder = ImmutableDictionary.CreateBuilder(); // Add Integer Regexes - var intExtract = IntegerExtractor.GetInstance(placeholder); + var intExtract = IntegerExtractor.GetInstance(config); builder.AddRange(intExtract.Regexes); // Add Double Regexes - var douExtract = DoubleExtractor.GetInstance(placeholder); + var douExtract = DoubleExtractor.GetInstance(config); builder.AddRange(douExtract.Regexes); this.Regexes = builder.ToImmutable(); @@ -30,15 +34,22 @@ private CardinalExtractor(string placeholder = NumbersDefinitions.PlaceHolderDef protected sealed override string ExtractType { get; } = Constants.SYS_NUM_CARDINAL; // "Cardinal"; - public static CardinalExtractor GetInstance(string placeholder = NumbersDefinitions.PlaceHolderDefault) + public static CardinalExtractor GetInstance(BaseNumberOptionsConfiguration config) { - if (!Instances.ContainsKey(placeholder)) + var extractorKey = config.Placeholder; + + if (!Instances.ContainsKey(extractorKey)) { - var instance = new CardinalExtractor(placeholder); - Instances.TryAdd(placeholder, instance); + var instance = new CardinalExtractor(config); + Instances.TryAdd(extractorKey, instance); } - return Instances[placeholder]; + return Instances[extractorKey]; + } + + protected override object GenKey(string input) + { + return (keyPrefix, input); } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/Spanish/Extractors/DoubleExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Spanish/Extractors/DoubleExtractor.cs index 67d628243e..24dc10c07d 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Spanish/Extractors/DoubleExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Spanish/Extractors/DoubleExtractor.cs @@ -15,16 +15,18 @@ public class DoubleExtractor : BaseNumberExtractor private static readonly ConcurrentDictionary Instances = new ConcurrentDictionary(); - private DoubleExtractor(string placeholder = NumbersDefinitions.PlaceHolderDefault) + private DoubleExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) { + var regexes = new Dictionary { { - new Regex(NumbersDefinitions.DoubleDecimalPointRegex(placeholder), RegexFlags), + new Regex(NumbersDefinitions.DoubleDecimalPointRegex(config.Placeholder), RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { - new Regex(NumbersDefinitions.DoubleWithoutIntegralRegex(placeholder), RegexFlags), + new Regex(NumbersDefinitions.DoubleWithoutIntegralRegex(config.Placeholder), RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { @@ -48,11 +50,11 @@ private DoubleExtractor(string placeholder = NumbersDefinitions.PlaceHolderDefau RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.POWER_SUFFIX) }, { - GenerateLongFormatNumberRegexes(LongFormatType.DoubleNumDotComma, placeholder, RegexFlags), + GenerateLongFormatNumberRegexes(LongFormatType.DoubleNumDotComma, config.Placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, { - GenerateLongFormatNumberRegexes(LongFormatType.DoubleNumNoBreakSpaceComma, placeholder, RegexFlags), + GenerateLongFormatNumberRegexes(LongFormatType.DoubleNumNoBreakSpaceComma, config.Placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.DOUBLE_PREFIX, Constants.NUMBER_SUFFIX) }, }; @@ -64,15 +66,18 @@ private DoubleExtractor(string placeholder = NumbersDefinitions.PlaceHolderDefau protected sealed override string ExtractType { get; } = Constants.SYS_NUM_DOUBLE; // "Double"; - public static DoubleExtractor GetInstance(string placeholder = NumbersDefinitions.PlaceHolderDefault) + public static DoubleExtractor GetInstance(BaseNumberOptionsConfiguration config) { - if (!Instances.ContainsKey(placeholder)) + + var extractorKey = config.Placeholder; + + if (!Instances.ContainsKey(extractorKey)) { - var instance = new DoubleExtractor(placeholder); - Instances.TryAdd(placeholder, instance); + var instance = new DoubleExtractor(config); + Instances.TryAdd(extractorKey, instance); } - return Instances[placeholder]; + return Instances[extractorKey]; } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/Spanish/Extractors/FractionExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Spanish/Extractors/FractionExtractor.cs index 3caba540fb..c208aa398e 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Spanish/Extractors/FractionExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Spanish/Extractors/FractionExtractor.cs @@ -12,12 +12,12 @@ public class FractionExtractor : BaseNumberExtractor private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - private static readonly ConcurrentDictionary<(NumberMode, NumberOptions, string), FractionExtractor> Instances = - new ConcurrentDictionary<(NumberMode, NumberOptions, string), FractionExtractor>(); + private static readonly ConcurrentDictionary<(NumberMode, NumberOptions), FractionExtractor> Instances = + new ConcurrentDictionary<(NumberMode, NumberOptions), FractionExtractor>(); - private FractionExtractor(NumberMode mode, NumberOptions options) + private FractionExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) { - Options = options; var regexes = new Dictionary { @@ -40,7 +40,7 @@ private FractionExtractor(NumberMode mode, NumberOptions options) }; // Not add FractionPrepositionRegex when the mode is Unit to avoid wrong recognize cases like "$1000 over 3" - if (mode != NumberMode.Unit) + if (config.Mode != NumberMode.Unit) { regexes.Add( new Regex(NumbersDefinitions.FractionPrepositionRegex, RegexFlags), @@ -52,20 +52,19 @@ private FractionExtractor(NumberMode mode, NumberOptions options) internal sealed override ImmutableDictionary Regexes { get; } - protected sealed override NumberOptions Options { get; } - protected sealed override string ExtractType { get; } = Constants.SYS_NUM_FRACTION; // "Fraction"; - public static FractionExtractor GetInstance(NumberMode mode = NumberMode.Default, NumberOptions options = NumberOptions.None, string placeholder = "") + public static FractionExtractor GetInstance(BaseNumberOptionsConfiguration config) { - var cacheKey = (mode, options, placeholder); - if (!Instances.ContainsKey(cacheKey)) + var extractorKey = (config.Mode, config.Options); + + if (!Instances.ContainsKey(extractorKey)) { - var instance = new FractionExtractor(mode, options); - Instances.TryAdd(cacheKey, instance); + var instance = new FractionExtractor(config); + Instances.TryAdd(extractorKey, instance); } - return Instances[cacheKey]; + return Instances[extractorKey]; } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/Spanish/Extractors/IntegerExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Spanish/Extractors/IntegerExtractor.cs index 19ee186291..4730454ea5 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Spanish/Extractors/IntegerExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Spanish/Extractors/IntegerExtractor.cs @@ -7,7 +7,7 @@ namespace Microsoft.Recognizers.Text.Number.Spanish { - public class IntegerExtractor : BaseNumberExtractor + public class IntegerExtractor : CachedNumberExtractor { private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; @@ -15,12 +15,18 @@ public class IntegerExtractor : BaseNumberExtractor private static readonly ConcurrentDictionary Instances = new ConcurrentDictionary(); - private IntegerExtractor(string placeholder = NumbersDefinitions.PlaceHolderDefault) + private readonly string keyPrefix; + + private IntegerExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) { + + keyPrefix = string.Intern(ExtractType + "_" + config.Options + "_" + config.Placeholder + "_" + config.Culture); + var regexes = new Dictionary { { - new Regex(NumbersDefinitions.NumbersWithPlaceHolder(placeholder), RegexFlags), + new Regex(NumbersDefinitions.NumbersWithPlaceHolder(config.Placeholder), RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { @@ -28,15 +34,15 @@ private IntegerExtractor(string placeholder = NumbersDefinitions.PlaceHolderDefa RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { - GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumDot, placeholder, RegexFlags), + GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumDot, config.Placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { - GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumBlank, placeholder, RegexFlags), + GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumBlank, config.Placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { - GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumNoBreakSpace, placeholder, RegexFlags), + GenerateLongFormatNumberRegexes(LongFormatType.IntegerNumNoBreakSpace, config.Placeholder, RegexFlags), RegexTagGenerator.GenerateRegexTag(Constants.INTEGER_PREFIX, Constants.NUMBER_SUFFIX) }, { @@ -64,15 +70,22 @@ private IntegerExtractor(string placeholder = NumbersDefinitions.PlaceHolderDefa protected sealed override string ExtractType { get; } = Constants.SYS_NUM_INTEGER; - public static IntegerExtractor GetInstance(string placeholder = NumbersDefinitions.PlaceHolderDefault) + public static IntegerExtractor GetInstance(BaseNumberOptionsConfiguration config) { - if (!Instances.ContainsKey(placeholder)) + var extractorKey = config.Placeholder; + + if (!Instances.ContainsKey(extractorKey)) { - var instance = new IntegerExtractor(placeholder); - Instances.TryAdd(placeholder, instance); + var instance = new IntegerExtractor(config); + Instances.TryAdd(extractorKey, instance); } - return Instances[placeholder]; + return Instances[extractorKey]; + } + + protected override object GenKey(string input) + { + return (keyPrefix, input); } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/Spanish/Extractors/NumberExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Spanish/Extractors/NumberExtractor.cs index 775cac68ff..03a34389aa 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Spanish/Extractors/NumberExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Spanish/Extractors/NumberExtractor.cs @@ -2,12 +2,11 @@ using System.Collections.Immutable; using System.Text.RegularExpressions; -using Microsoft.Recognizers.Definitions; using Microsoft.Recognizers.Definitions.Spanish; namespace Microsoft.Recognizers.Text.Number.Spanish { - public class NumberExtractor : BaseNumberExtractor + public class NumberExtractor : CachedNumberExtractor { private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; @@ -15,20 +14,26 @@ public class NumberExtractor : BaseNumberExtractor private static readonly ConcurrentDictionary<(NumberMode, NumberOptions), NumberExtractor> Instances = new ConcurrentDictionary<(NumberMode, NumberOptions), NumberExtractor>(); - private NumberExtractor(NumberMode mode = NumberMode.Default, NumberOptions options = NumberOptions.None) + private readonly string keyPrefix; + + private NumberExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) { - NegativeNumberTermsRegex = new Regex(NumbersDefinitions.NegativeNumberTermsRegex + '$', RegexFlags); - Options = options; + keyPrefix = string.Intern(ExtractType + "_" + config.Options + "_" + config.Mode + "_" + config.Culture); + + NegativeNumberTermsRegex = new Regex(NumbersDefinitions.NegativeNumberTermsRegex + '$', RegexFlags); var builder = ImmutableDictionary.CreateBuilder(); // Add Cardinal CardinalExtractor cardExtract = null; - switch (mode) + switch (config.Mode) { case NumberMode.PureNumber: - cardExtract = CardinalExtractor.GetInstance(NumbersDefinitions.PlaceHolderPureNumber); + var purNumConfig = new BaseNumberOptionsConfiguration(config.Culture, config.Options, config.Mode, + NumbersDefinitions.PlaceHolderPureNumber); + cardExtract = CardinalExtractor.GetInstance(purNumConfig); break; case NumberMode.Currency: builder.Add( @@ -41,13 +46,13 @@ private NumberExtractor(NumberMode mode = NumberMode.Default, NumberOptions opti if (cardExtract == null) { - cardExtract = CardinalExtractor.GetInstance(); + cardExtract = CardinalExtractor.GetInstance(config); } builder.AddRange(cardExtract.Regexes); // Add Fraction - var fracExtract = FractionExtractor.GetInstance(mode, Options); + var fracExtract = FractionExtractor.GetInstance(config); builder.AddRange(fracExtract.Regexes); Regexes = builder.ToImmutable(); @@ -55,7 +60,7 @@ private NumberExtractor(NumberMode mode = NumberMode.Default, NumberOptions opti var ambiguityBuilder = ImmutableDictionary.CreateBuilder(); // Do not filter the ambiguous number cases like '$2000' in NumberWithUnit, otherwise they can't be resolved. - if (mode != NumberMode.Unit) + if (config.Mode != NumberMode.Unit) { foreach (var item in NumbersDefinitions.AmbiguityFiltersDict) { @@ -70,23 +75,27 @@ private NumberExtractor(NumberMode mode = NumberMode.Default, NumberOptions opti protected sealed override ImmutableDictionary AmbiguityFiltersDict { get; } - protected sealed override NumberOptions Options { get; } - // "Number" protected sealed override string ExtractType { get; } = Constants.SYS_NUM; protected sealed override Regex NegativeNumberTermsRegex { get; } - public static NumberExtractor GetInstance(NumberMode mode = NumberMode.Default, NumberOptions options = NumberOptions.None) + public static NumberExtractor GetInstance(BaseNumberOptionsConfiguration config) { - var cacheKey = (mode, options); - if (!Instances.ContainsKey(cacheKey)) + var extractorKey = (config.Mode, config.Options); + + if (!Instances.ContainsKey(extractorKey)) { - var instance = new NumberExtractor(mode, options); - Instances.TryAdd(cacheKey, instance); + var instance = new NumberExtractor(config); + Instances.TryAdd(extractorKey, instance); } - return Instances[cacheKey]; + return Instances[extractorKey]; + } + + protected override object GenKey(string input) + { + return (keyPrefix, input); } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/Spanish/Extractors/NumberRangeExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Spanish/Extractors/NumberRangeExtractor.cs index fe09b73d34..6e19009033 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Spanish/Extractors/NumberRangeExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Spanish/Extractors/NumberRangeExtractor.cs @@ -12,8 +12,8 @@ public class NumberRangeExtractor : BaseNumberRangeExtractor private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; public NumberRangeExtractor(INumberOptionsConfiguration config) - : base(NumberExtractor.GetInstance(), - OrdinalExtractor.GetInstance(), + : base(NumberExtractor.GetInstance(new BaseNumberOptionsConfiguration(config.Culture, config.Options)), + OrdinalExtractor.GetInstance(new BaseNumberOptionsConfiguration(config.Culture, config.Options)), new BaseNumberParser(new SpanishNumberParserConfiguration(config)), config) { diff --git a/.NET/Microsoft.Recognizers.Text.Number/Spanish/Extractors/OrdinalExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Spanish/Extractors/OrdinalExtractor.cs index f38ff4ae5c..0f80ca8ee3 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Spanish/Extractors/OrdinalExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Spanish/Extractors/OrdinalExtractor.cs @@ -7,7 +7,7 @@ namespace Microsoft.Recognizers.Text.Number.Spanish { - public class OrdinalExtractor : BaseNumberExtractor + public class OrdinalExtractor : CachedNumberExtractor { private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; @@ -15,8 +15,14 @@ public class OrdinalExtractor : BaseNumberExtractor private static readonly ConcurrentDictionary Instances = new ConcurrentDictionary(); - private OrdinalExtractor() + private readonly string keyPrefix; + + private OrdinalExtractor(BaseNumberOptionsConfiguration config) + : base(config.Options) { + + keyPrefix = string.Intern(ExtractType + "_" + config.Options.ToString() + "_" + config.Culture); + var regexes = new Dictionary { { @@ -36,15 +42,22 @@ private OrdinalExtractor() protected sealed override string ExtractType { get; } = Constants.SYS_NUM_ORDINAL; // "Ordinal"; - public static OrdinalExtractor GetInstance(string placeholder = "") + public static OrdinalExtractor GetInstance(BaseNumberOptionsConfiguration config) { - if (!Instances.ContainsKey(placeholder)) + var extractorKey = config.Options.ToString(); + + if (!Instances.ContainsKey(extractorKey)) { - var instance = new OrdinalExtractor(); - Instances.TryAdd(placeholder, instance); + var instance = new OrdinalExtractor(config); + Instances.TryAdd(extractorKey, instance); } - return Instances[placeholder]; + return Instances[extractorKey]; + } + + protected override object GenKey(string input) + { + return (keyPrefix, input); } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/Spanish/Extractors/PercentageExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Spanish/Extractors/PercentageExtractor.cs index e26924cedc..3f1db16926 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Spanish/Extractors/PercentageExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Spanish/Extractors/PercentageExtractor.cs @@ -8,10 +8,10 @@ namespace Microsoft.Recognizers.Text.Number.Spanish { public sealed class PercentageExtractor : BasePercentageExtractor { - public PercentageExtractor(NumberOptions options = NumberOptions.None) - : base(NumberExtractor.GetInstance(options: options)) + public PercentageExtractor(BaseNumberOptionsConfiguration config) + : base(NumberExtractor.GetInstance(config)) { - Options = options; + Options = config.Options; Regexes = InitRegexes(); } @@ -19,7 +19,7 @@ public PercentageExtractor(NumberOptions options = NumberOptions.None) protected override ImmutableHashSet InitRegexes() { - HashSet regexStrs = new HashSet + var regexStrs = new HashSet { NumbersDefinitions.NumberWithPrefixPercentage, }; diff --git a/.NET/Microsoft.Recognizers.Text.Number/Spanish/Parsers/SpanishNumberParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Number/Spanish/Parsers/SpanishNumberParserConfiguration.cs index 24d1e6a482..e7ffcf33d9 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Spanish/Parsers/SpanishNumberParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Spanish/Parsers/SpanishNumberParserConfiguration.cs @@ -17,7 +17,7 @@ public SpanishNumberParserConfiguration(INumberOptionsConfiguration config) { this.Config = config; - this.LangMarker = NumbersDefinitions.LangMarker; + this.LanguageMarker = NumbersDefinitions.LangMarker; this.CultureInfo = new CultureInfo(config.Culture); this.IsCompoundNumberLanguage = NumbersDefinitions.CompoundNumberLanguage; diff --git a/.NET/Microsoft.Recognizers.Text.Number/Spanish/Parsers/SpanishNumberRangeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Number/Spanish/Parsers/SpanishNumberRangeParserConfiguration.cs index 365f782674..8bd02b7528 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Spanish/Parsers/SpanishNumberRangeParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Spanish/Parsers/SpanishNumberRangeParserConfiguration.cs @@ -14,8 +14,10 @@ public SpanishNumberRangeParserConfiguration(INumberOptionsConfiguration config) CultureInfo = new CultureInfo(config.Culture); - NumberExtractor = Spanish.NumberExtractor.GetInstance(); - OrdinalExtractor = Spanish.OrdinalExtractor.GetInstance(); + var numConfig = new BaseNumberOptionsConfiguration(config.Culture, config.Options); + + NumberExtractor = Spanish.NumberExtractor.GetInstance(numConfig); + OrdinalExtractor = Spanish.OrdinalExtractor.GetInstance(numConfig); NumberParser = new BaseNumberParser(new SpanishNumberParserConfiguration(config)); diff --git a/.NET/Microsoft.Recognizers.Text.Number/Swedish/Extractors/FractionExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Swedish/Extractors/FractionExtractor.cs index 32abf3b261..2347110e8d 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Swedish/Extractors/FractionExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Swedish/Extractors/FractionExtractor.cs @@ -12,11 +12,13 @@ public class FractionExtractor : BaseNumberExtractor private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; - private static readonly ConcurrentDictionary<(NumberMode, NumberOptions, string), FractionExtractor> Instances = - new ConcurrentDictionary<(NumberMode, NumberOptions, string), FractionExtractor>(); + private static readonly ConcurrentDictionary<(NumberMode, NumberOptions), FractionExtractor> Instances = + new ConcurrentDictionary<(NumberMode, NumberOptions), FractionExtractor>(); private FractionExtractor(NumberMode mode, NumberOptions options) + : base(options) { + Options = options; var regexes = new Dictionary @@ -50,16 +52,17 @@ private FractionExtractor(NumberMode mode, NumberOptions options) Regexes = regexes.ToImmutableDictionary(); } - internal sealed override ImmutableDictionary Regexes { get; } + public sealed override NumberOptions Options { get; } - protected sealed override NumberOptions Options { get; } + internal sealed override ImmutableDictionary Regexes { get; } // "Fraction"; protected sealed override string ExtractType { get; } = Constants.SYS_NUM_FRACTION; - public static FractionExtractor GetInstance(NumberMode mode = NumberMode.Default, NumberOptions options = NumberOptions.None, string placeholder = "") + public static FractionExtractor GetInstance(NumberMode mode = NumberMode.Default, NumberOptions options = NumberOptions.None) { - var cacheKey = (mode, options, placeholder); + var cacheKey = (mode, options); + if (!Instances.ContainsKey(cacheKey)) { var instance = new FractionExtractor(mode, options); diff --git a/.NET/Microsoft.Recognizers.Text.Number/Swedish/Extractors/NumberExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Swedish/Extractors/NumberExtractor.cs index d35fe4698f..35dc396880 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Swedish/Extractors/NumberExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Swedish/Extractors/NumberExtractor.cs @@ -2,7 +2,6 @@ using System.Collections.Immutable; using System.Text.RegularExpressions; -using Microsoft.Recognizers.Definitions; using Microsoft.Recognizers.Definitions.Swedish; namespace Microsoft.Recognizers.Text.Number.Swedish @@ -14,8 +13,14 @@ public class NumberExtractor : BaseNumberExtractor private static readonly ConcurrentDictionary<(NumberMode, NumberOptions), NumberExtractor> Instances = new ConcurrentDictionary<(NumberMode, NumberOptions), NumberExtractor>(); + private readonly NumberMode mode; + private NumberExtractor(NumberMode mode, NumberOptions options) + : base(options) { + + this.mode = mode; + NegativeNumberTermsRegex = new Regex(NumbersDefinitions.NegativeNumberTermsRegex + "$", RegexFlags); AmbiguousFractionConnectorsRegex = new Regex(NumbersDefinitions.AmbiguousFractionConnectorsRegex, RegexFlags); @@ -71,12 +76,12 @@ private NumberExtractor(NumberMode mode, NumberOptions options) AmbiguityFiltersDict = ambiguityBuilder.ToImmutable(); } + public sealed override NumberOptions Options { get; } + internal sealed override ImmutableDictionary Regexes { get; } protected sealed override ImmutableDictionary AmbiguityFiltersDict { get; } - protected sealed override NumberOptions Options { get; } - protected sealed override string ExtractType { get; } = Constants.SYS_NUM; // "Number"; protected sealed override Regex NegativeNumberTermsRegex { get; } @@ -85,9 +90,7 @@ private NumberExtractor(NumberMode mode, NumberOptions options) protected sealed override Regex RelativeReferenceRegex { get; } - public static NumberExtractor GetInstance( - NumberMode mode = NumberMode.Default, - NumberOptions options = NumberOptions.None) + public static NumberExtractor GetInstance(NumberMode mode = NumberMode.Default, NumberOptions options = NumberOptions.None) { var cacheKey = (mode, options); if (!Instances.ContainsKey(cacheKey)) diff --git a/.NET/Microsoft.Recognizers.Text.Number/Swedish/Extractors/OrdinalExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Swedish/Extractors/OrdinalExtractor.cs index f8be68b459..5f59b4bc3c 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Swedish/Extractors/OrdinalExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Swedish/Extractors/OrdinalExtractor.cs @@ -15,7 +15,8 @@ public class OrdinalExtractor : BaseNumberExtractor private static readonly ConcurrentDictionary Instances = new ConcurrentDictionary(); - private OrdinalExtractor() + private OrdinalExtractor(NumberOptions options) + : base(options) { AmbiguousFractionConnectorsRegex = new Regex(NumbersDefinitions.AmbiguousFractionConnectorsRegex, RegexFlags); @@ -53,15 +54,16 @@ private OrdinalExtractor() protected sealed override Regex RelativeReferenceRegex { get; } - public static OrdinalExtractor GetInstance(string placeholder = "") + public static OrdinalExtractor GetInstance(NumberOptions options = NumberOptions.None) { - if (!Instances.ContainsKey(placeholder)) + var cacheKey = options.ToString(); + if (!Instances.ContainsKey(cacheKey)) { - var instance = new OrdinalExtractor(); - Instances.TryAdd(placeholder, instance); + var instance = new OrdinalExtractor(options); + Instances.TryAdd(cacheKey, instance); } - return Instances[placeholder]; + return Instances[cacheKey]; } } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.Number/Swedish/Parsers/SwedishNumberParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Number/Swedish/Parsers/SwedishNumberParserConfiguration.cs index 3c44ebacfe..a3984377dd 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Swedish/Parsers/SwedishNumberParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Swedish/Parsers/SwedishNumberParserConfiguration.cs @@ -17,7 +17,7 @@ public SwedishNumberParserConfiguration(INumberOptionsConfiguration config) { this.Config = config; - this.LangMarker = NumbersDefinitions.LangMarker; + this.LanguageMarker = NumbersDefinitions.LangMarker; this.CultureInfo = new CultureInfo(config.Culture); this.IsCompoundNumberLanguage = NumbersDefinitions.CompoundNumberLanguage; diff --git a/.NET/Microsoft.Recognizers.Text.Number/Turkish/Extractors/FractionExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Turkish/Extractors/FractionExtractor.cs index 092c6d81a6..7d093d8638 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Turkish/Extractors/FractionExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Turkish/Extractors/FractionExtractor.cs @@ -45,9 +45,9 @@ private FractionExtractor(NumberMode mode, NumberOptions options) Regexes = regexes.ToImmutableDictionary(); } - internal sealed override ImmutableDictionary Regexes { get; } + public sealed override NumberOptions Options { get; } - protected sealed override NumberOptions Options { get; } + internal sealed override ImmutableDictionary Regexes { get; } protected sealed override string ExtractType { get; } = Constants.SYS_NUM_FRACTION; // "Fraction"; diff --git a/.NET/Microsoft.Recognizers.Text.Number/Turkish/Extractors/NumberExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Turkish/Extractors/NumberExtractor.cs index 776d74c627..1d95af5488 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Turkish/Extractors/NumberExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Turkish/Extractors/NumberExtractor.cs @@ -69,12 +69,12 @@ private NumberExtractor(NumberMode mode, NumberOptions options) AmbiguityFiltersDict = ambiguityBuilder.ToImmutable(); } + public sealed override NumberOptions Options { get; } + internal sealed override ImmutableDictionary Regexes { get; } protected sealed override ImmutableDictionary AmbiguityFiltersDict { get; } - protected sealed override NumberOptions Options { get; } - protected sealed override string ExtractType { get; } = Constants.SYS_NUM; // "Number"; protected sealed override Regex NegativeNumberTermsRegex { get; } diff --git a/.NET/Microsoft.Recognizers.Text.Number/Turkish/Extractors/NumberRangeExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Turkish/Extractors/NumberRangeExtractor.cs index 20df3d8db8..662738d5fc 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Turkish/Extractors/NumberRangeExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Turkish/Extractors/NumberRangeExtractor.cs @@ -11,7 +11,8 @@ public class NumberRangeExtractor : BaseNumberRangeExtractor private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; public NumberRangeExtractor(INumberOptionsConfiguration config) - : base(NumberExtractor.GetInstance(), + : base( + NumberExtractor.GetInstance(), OrdinalExtractor.GetInstance(), new BaseNumberParser(new TurkishNumberParserConfiguration(config)), config) diff --git a/.NET/Microsoft.Recognizers.Text.Number/Turkish/Parsers/TurkishNumberParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Number/Turkish/Parsers/TurkishNumberParserConfiguration.cs index 9051ed9429..56c4374b7a 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Turkish/Parsers/TurkishNumberParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Turkish/Parsers/TurkishNumberParserConfiguration.cs @@ -16,7 +16,7 @@ public TurkishNumberParserConfiguration(INumberOptionsConfiguration config) { this.Config = config; - this.LangMarker = NumbersDefinitions.LangMarker; + this.LanguageMarker = NumbersDefinitions.LangMarker; this.CultureInfo = new CultureInfo(config.Culture); this.IsCompoundNumberLanguage = NumbersDefinitions.CompoundNumberLanguage; diff --git a/.NET/Microsoft.Recognizers.Text.Number/Turkish/Parsers/TurkishNumberRangeParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.Number/Turkish/Parsers/TurkishNumberRangeParserConfiguration.cs index 348a74a509..bfcda99686 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Turkish/Parsers/TurkishNumberRangeParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Turkish/Parsers/TurkishNumberRangeParserConfiguration.cs @@ -6,15 +6,17 @@ namespace Microsoft.Recognizers.Text.Number.Turkish { public class TurkishNumberRangeParserConfiguration : INumberRangeParserConfiguration { + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; public TurkishNumberRangeParserConfiguration(INumberOptionsConfiguration config) { CultureInfo = new CultureInfo(config.Culture); - NumberExtractor = Turkish.NumberExtractor.GetInstance(); - OrdinalExtractor = Turkish.OrdinalExtractor.GetInstance(); + NumberExtractor = Turkish.NumberExtractor.GetInstance(NumberMode.Default, config.Options); + OrdinalExtractor = Turkish.OrdinalExtractor.GetInstance(config.Options); NumberParser = new BaseNumberParser(new TurkishNumberParserConfiguration(config)); + MoreOrEqual = new Regex(NumbersDefinitions.MoreOrEqual, RegexFlags); LessOrEqual = new Regex(NumbersDefinitions.LessOrEqual, RegexFlags); MoreOrEqualSuffix = new Regex(NumbersDefinitions.MoreOrEqualSuffix, RegexFlags); diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Chinese/Extractors/ChineseNumberWithUnitExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Chinese/Extractors/ChineseNumberWithUnitExtractorConfiguration.cs index 33fced4bb6..cbb5662f85 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Chinese/Extractors/ChineseNumberWithUnitExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Chinese/Extractors/ChineseNumberWithUnitExtractorConfiguration.cs @@ -5,6 +5,7 @@ using Microsoft.Recognizers.Definitions; using Microsoft.Recognizers.Definitions.Chinese; +using Microsoft.Recognizers.Text.Number; using Microsoft.Recognizers.Text.Number.Chinese; using Microsoft.Recognizers.Text.Number.Config; @@ -24,7 +25,11 @@ public abstract class ChineseNumberWithUnitExtractorConfiguration : INumberWithU protected ChineseNumberWithUnitExtractorConfiguration(CultureInfo ci) { this.CultureInfo = ci; - this.UnitNumExtractor = new NumberExtractor(CJKNumberExtractorMode.ExtractAll); + + var numConfig = new BaseNumberOptionsConfiguration(ci.Name, NumberOptions.None); + + this.UnitNumExtractor = new NumberExtractor(numConfig, CJKNumberExtractorMode.ExtractAll); + this.BuildPrefix = NumbersWithUnitDefinitions.BuildPrefix; this.BuildSuffix = NumbersWithUnitDefinitions.BuildSuffix; this.ConnectorToken = NumbersWithUnitDefinitions.ConnectorToken; diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Chinese/Parsers/ChineseNumberWithUnitParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Chinese/Parsers/ChineseNumberWithUnitParserConfiguration.cs index 63c5f800a5..b4f904e4b6 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Chinese/Parsers/ChineseNumberWithUnitParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Chinese/Parsers/ChineseNumberWithUnitParserConfiguration.cs @@ -9,9 +9,12 @@ public class ChineseNumberWithUnitParserConfiguration : BaseNumberWithUnitParser public ChineseNumberWithUnitParserConfiguration(CultureInfo ci) : base(ci) { - this.InternalNumberExtractor = new NumberExtractor(); - this.InternalNumberParser = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, new ChineseNumberParserConfiguration( - new BaseNumberOptionsConfiguration(Culture.Chinese))); + + var numConfig = new BaseNumberOptionsConfiguration(ci.Name, NumberOptions.None); + + this.InternalNumberExtractor = new NumberExtractor(numConfig); + this.InternalNumberParser = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, + new ChineseNumberParserConfiguration(numConfig)); this.ConnectorToken = string.Empty; } diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Extractors/DutchNumberWithUnitExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Extractors/DutchNumberWithUnitExtractorConfiguration.cs index f067ff37fc..7fbd43cd7e 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Extractors/DutchNumberWithUnitExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Extractors/DutchNumberWithUnitExtractorConfiguration.cs @@ -22,7 +22,10 @@ public abstract class DutchNumberWithUnitExtractorConfiguration : INumberWithUni protected DutchNumberWithUnitExtractorConfiguration(CultureInfo ci) { this.CultureInfo = ci; - this.UnitNumExtractor = NumberExtractor.GetInstance(NumberMode.Unit); + + var unitNumConfig = new BaseNumberOptionsConfiguration(ci.Name, NumberOptions.None, NumberMode.Unit); + this.UnitNumExtractor = NumberExtractor.GetInstance(unitNumConfig); + this.BuildPrefix = NumbersWithUnitDefinitions.BuildPrefix; this.BuildSuffix = NumbersWithUnitDefinitions.BuildSuffix; this.ConnectorToken = string.Empty; diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Parsers/DutchNumberWithUnitParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Parsers/DutchNumberWithUnitParserConfiguration.cs index b94f468722..95647315ef 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Parsers/DutchNumberWithUnitParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Dutch/Parsers/DutchNumberWithUnitParserConfiguration.cs @@ -9,9 +9,12 @@ public class DutchNumberWithUnitParserConfiguration : BaseNumberWithUnitParserCo public DutchNumberWithUnitParserConfiguration(CultureInfo ci) : base(ci) { - this.InternalNumberExtractor = NumberExtractor.GetInstance(); - this.InternalNumberParser = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, new DutchNumberParserConfiguration( - new BaseNumberOptionsConfiguration(Culture.Dutch))); + + var config = new BaseNumberOptionsConfiguration(Culture.Dutch, NumberOptions.None); + + this.InternalNumberExtractor = NumberExtractor.GetInstance(config); + this.InternalNumberParser = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, + new DutchNumberParserConfiguration(config)); this.ConnectorToken = string.Empty; } diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Extractors/EnglishNumberWithUnitExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Extractors/EnglishNumberWithUnitExtractorConfiguration.cs index de4ccbd0c8..8c073876ce 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Extractors/EnglishNumberWithUnitExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Extractors/EnglishNumberWithUnitExtractorConfiguration.cs @@ -25,7 +25,10 @@ public abstract class EnglishNumberWithUnitExtractorConfiguration : INumberWithU protected EnglishNumberWithUnitExtractorConfiguration(CultureInfo ci) { this.CultureInfo = ci; - this.UnitNumExtractor = NumberExtractor.GetInstance(NumberMode.Unit); + + var unitNumConfig = new BaseNumberOptionsConfiguration(ci.Name, NumberOptions.None, NumberMode.Unit); + this.UnitNumExtractor = NumberExtractor.GetInstance(unitNumConfig); + this.BuildPrefix = NumbersWithUnitDefinitions.BuildPrefix; this.BuildSuffix = NumbersWithUnitDefinitions.BuildSuffix; this.ConnectorToken = string.Empty; diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Parsers/EnglishNumberWithUnitParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Parsers/EnglishNumberWithUnitParserConfiguration.cs index 2401b51b74..fe42a3333a 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Parsers/EnglishNumberWithUnitParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Parsers/EnglishNumberWithUnitParserConfiguration.cs @@ -9,9 +9,11 @@ public class EnglishNumberWithUnitParserConfiguration : BaseNumberWithUnitParser public EnglishNumberWithUnitParserConfiguration(CultureInfo ci) : base(ci) { - this.InternalNumberExtractor = NumberExtractor.GetInstance(); - this.InternalNumberParser = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, new EnglishNumberParserConfiguration( - new BaseNumberOptionsConfiguration(ci.Name))); + var numConfig = new BaseNumberOptionsConfiguration(Culture.English, NumberOptions.None); + + this.InternalNumberExtractor = NumberExtractor.GetInstance(numConfig); + this.InternalNumberParser = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, + new EnglishNumberParserConfiguration(numConfig)); this.ConnectorToken = string.Empty; } diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Extractors/BaseMergedUnitExtractor.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Extractors/BaseMergedUnitExtractor.cs index 2f5733d038..995f699f64 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Extractors/BaseMergedUnitExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Extractors/BaseMergedUnitExtractor.cs @@ -58,7 +58,8 @@ private List MergeCompoundUnits(string source) continue; } - if (ers[idx].Data is ExtractResult er && !er.Data.ToString().StartsWith("Integer", StringComparison.Ordinal)) + if (ers[idx].Data is ExtractResult er && + !er.Data.ToString().StartsWith(Number.Constants.INTEGER_PREFIX, StringComparison.Ordinal)) { groups[idx + 1] = groups[idx] + 1; continue; @@ -66,8 +67,14 @@ private List MergeCompoundUnits(string source) var middleBegin = ers[idx].Start + ers[idx].Length ?? 0; var middleEnd = ers[idx + 1].Start ?? 0; + var length = middleEnd - middleBegin; - var middleStr = source.Substring(middleBegin, middleEnd - middleBegin).Trim(); + if (length < 0) + { + continue; // @HERE + } + + var middleStr = source.Substring(middleBegin, length).Trim(); // Separated by whitespace if (string.IsNullOrEmpty(middleStr)) @@ -92,7 +99,7 @@ private List MergeCompoundUnits(string source) { if (idx == 0 || groups[idx] != groups[idx - 1]) { - var tmpExtractResult = ers[idx]; + var tmpExtractResult = ers[idx].Clone(); tmpExtractResult.Data = new List { @@ -141,6 +148,7 @@ private List MergeCompoundUnits(string source) private void MergePureNumber(string source, List ers) { var numErs = config.UnitNumExtractor.Extract(source); + var unitNumbers = new List(); for (int i = 0, j = 0; i < numErs.Count; i++) { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Extractors/NumberWithUnitExtractor.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Extractors/NumberWithUnitExtractor.cs index 4cb2d24a9e..63b190e4f5 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Extractors/NumberWithUnitExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Extractors/NumberWithUnitExtractor.cs @@ -53,6 +53,7 @@ public static bool ValidateUnit(string source) public List Extract(string source) { + var result = new List(); if (!PreCheckStr(source)) @@ -66,10 +67,10 @@ public List Extract(string source) var unitIsPrefix = new List(); MatchCollection nonUnitMatches = null; - var prefixMatch = prefixMatcher.Find(source).OrderBy(o => o.Start).ToList(); - var suffixMatch = suffixMatcher.Find(source).OrderBy(o => o.Start).ToList(); + var prefixMatches = prefixMatcher.Find(source).OrderBy(o => o.Start).ToList(); + var suffixMatches = suffixMatcher.Find(source).OrderBy(o => o.Start).ToList(); - if (prefixMatch.Count > 0 || suffixMatch.Count > 0) + if (prefixMatches.Count > 0 || suffixMatches.Count > 0) { var numbers = this.config.UnitNumExtractor.Extract(source).OrderBy(o => o.Start); @@ -107,7 +108,7 @@ public List Extract(string source) var lastIndex = start; MatchResult bestMatch = null; - foreach (var m in prefixMatch) + foreach (var m in prefixMatches) { if (m.Length > 0 && m.End > start) { @@ -131,7 +132,7 @@ public List Extract(string source) { var offSet = lastIndex - bestMatch.Start; var unitStr = source.Substring(bestMatch.Start, offSet); - mappingPrefix.Add(number.Start.Value, new PrefixUnitResult { Offset = offSet, UnitStr = unitStr }); + mappingPrefix[number.Start.Value] = new PrefixUnitResult { Offset = offSet, UnitStr = unitStr }; } } @@ -151,16 +152,9 @@ public List Extract(string source) }; // Relative position will be used in Parser - number.Start = start - er.Start; - er.Data = new ExtractResult - { - Data = number.Data, - Length = number.Length, - Metadata = number.Metadata, - Start = number.Start, - Text = number.Text, - Type = number.Type, - }; + var numberData = number.Clone(); + numberData.Start = start - er.Start; + er.Data = numberData; result.Add(er); unitIsPrefix.Add(true); @@ -178,7 +172,7 @@ public List Extract(string source) var maxlen = 0; var firstIndex = start + length; - foreach (var m in suffixMatch) + foreach (var m in suffixMatches) { if (m.Length > 0 && m.Start >= firstIndex) { @@ -186,7 +180,7 @@ public List Extract(string source) if (maxlen < endpos) { var midStr = source.Substring(firstIndex, m.Start - firstIndex); - if (string.IsNullOrWhiteSpace(midStr) || midStr.Trim().Equals(this.config.ConnectorToken)) + if (string.IsNullOrWhiteSpace(midStr) || midStr.Trim().Equals(this.config.ConnectorToken, StringComparison.Ordinal)) { maxlen = endpos; } @@ -197,6 +191,7 @@ public List Extract(string source) if (maxlen != 0) { var substr = source.Substring(start, length + maxlen); + var er = new ExtractResult { Start = start, @@ -214,8 +209,9 @@ public List Extract(string source) } // Relative position will be used in Parser - number.Start = start - er.Start; - er.Data = number; + var numberData = number.Clone(); + numberData.Start = start - er.Start; + er.Data = numberData; // Special treatment, handle cases like '2:00 pm', '00 pm' is not dimension var isNotUnit = false; @@ -257,8 +253,10 @@ public List Extract(string source) }; // Relative position will be used in Parser - number.Start = start - er.Start; - er.Data = number; + var numberData = number.Clone(); + numberData.Start = start - er.Start; + er.Data = numberData; + result.Add(er); } } @@ -280,7 +278,7 @@ public List Extract(string source) if (CheckExtractorType(Constants.SYS_UNIT_CURRENCY)) { - result = SelectCandidate(source, result, unitIsPrefix); + result = SelectCandidates(source, result, unitIsPrefix); } return result; @@ -294,6 +292,7 @@ public void ExtractSeparateUnits(string source, List numDependRes { int start = numDependResult.Start.Value; int i = 0; + do { matchResult[start + i++] = true; @@ -483,7 +482,7 @@ private bool CheckExtractorType(string extractorType) return this.config.ExtractType.Equals(extractorType, StringComparison.Ordinal); } - private List SelectCandidate(string source, List extractResults, List unitIsPrefix) + private List SelectCandidates(string source, List extractResults, List unitIsPrefix) { int totalCandidate = unitIsPrefix.Count; bool haveConflict = false; diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Parsers/FrenchNumberWithUnitParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Parsers/FrenchNumberWithUnitParserConfiguration.cs index ff92af09b3..c54b88b49c 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Parsers/FrenchNumberWithUnitParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/French/Parsers/FrenchNumberWithUnitParserConfiguration.cs @@ -11,9 +11,12 @@ public class FrenchNumberWithUnitParserConfiguration : BaseNumberWithUnitParserC public FrenchNumberWithUnitParserConfiguration(CultureInfo ci) : base(ci) { + + var numConfig = new BaseNumberOptionsConfiguration(Culture.French, NumberOptions.None); + this.InternalNumberExtractor = NumberExtractor.GetInstance(); - this.InternalNumberParser = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, new FrenchNumberParserConfiguration( - new BaseNumberOptionsConfiguration(ci.Name))); + this.InternalNumberParser = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, + new FrenchNumberParserConfiguration(numConfig)); this.ConnectorToken = NumbersWithUnitDefinitions.ConnectorToken; } diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Parsers/GermanNumberWithUnitParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Parsers/GermanNumberWithUnitParserConfiguration.cs index 13c1cbb261..addf83293a 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Parsers/GermanNumberWithUnitParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Parsers/GermanNumberWithUnitParserConfiguration.cs @@ -9,9 +9,12 @@ public class GermanNumberWithUnitParserConfiguration : BaseNumberWithUnitParserC public GermanNumberWithUnitParserConfiguration(CultureInfo ci) : base(ci) { + + var numConfig = new BaseNumberOptionsConfiguration(Culture.German, NumberOptions.None); + this.InternalNumberExtractor = NumberExtractor.GetInstance(); - this.InternalNumberParser = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, new GermanNumberParserConfiguration( - new BaseNumberOptionsConfiguration(ci.Name))); + this.InternalNumberParser = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, + new GermanNumberParserConfiguration(numConfig)); this.ConnectorToken = string.Empty; } diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Parsers/HindiNumberWithUnitParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Parsers/HindiNumberWithUnitParserConfiguration.cs index 6faf00a222..4544ccb2b1 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Parsers/HindiNumberWithUnitParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Hindi/Parsers/HindiNumberWithUnitParserConfiguration.cs @@ -9,9 +9,11 @@ public class HindiNumberWithUnitParserConfiguration : BaseNumberWithUnitParserCo public HindiNumberWithUnitParserConfiguration(CultureInfo ci) : base(ci) { + var numConfig = new BaseNumberOptionsConfiguration(Culture.Hindi, NumberOptions.None); + this.InternalNumberExtractor = NumberExtractor.GetInstance(); - this.InternalNumberParser = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, new HindiNumberParserConfiguration( - new BaseNumberOptionsConfiguration(ci.Name))); + this.InternalNumberParser = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, + new HindiNumberParserConfiguration(numConfig)); this.ConnectorToken = string.Empty; } diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Parsers/ItalianNumberWithUnitParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Parsers/ItalianNumberWithUnitParserConfiguration.cs index f78ac78286..be8533f5b0 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Parsers/ItalianNumberWithUnitParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Italian/Parsers/ItalianNumberWithUnitParserConfiguration.cs @@ -11,9 +11,12 @@ public class ItalianNumberWithUnitParserConfiguration : BaseNumberWithUnitParser public ItalianNumberWithUnitParserConfiguration(CultureInfo ci) : base(ci) { + + var numConfig = new BaseNumberOptionsConfiguration(Culture.Italian, NumberOptions.None); + this.InternalNumberExtractor = NumberExtractor.GetInstance(); - this.InternalNumberParser = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, new ItalianNumberParserConfiguration( - new BaseNumberOptionsConfiguration(ci.Name))); + this.InternalNumberParser = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, + new ItalianNumberParserConfiguration(numConfig)); this.ConnectorToken = NumbersWithUnitDefinitions.ConnectorToken; } diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Japanese/Parsers/JapaneseNumberWithUnitParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Japanese/Parsers/JapaneseNumberWithUnitParserConfiguration.cs index ad5d0a46b3..20267b968c 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Japanese/Parsers/JapaneseNumberWithUnitParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Japanese/Parsers/JapaneseNumberWithUnitParserConfiguration.cs @@ -9,9 +9,12 @@ public class JapaneseNumberWithUnitParserConfiguration : BaseNumberWithUnitParse public JapaneseNumberWithUnitParserConfiguration(CultureInfo ci) : base(ci) { + + var numConfig = new BaseNumberOptionsConfiguration(Culture.Japanese, NumberOptions.None); + this.InternalNumberExtractor = new NumberExtractor(); - this.InternalNumberParser = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, new JapaneseNumberParserConfiguration( - new BaseNumberOptionsConfiguration(ci.Name))); + this.InternalNumberParser = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, + new JapaneseNumberParserConfiguration(numConfig)); this.ConnectorToken = string.Empty; } diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Microsoft.Recognizers.Text.NumberWithUnit.csproj b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Microsoft.Recognizers.Text.NumberWithUnit.csproj index be682a0dbd..4ecf222c85 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Microsoft.Recognizers.Text.NumberWithUnit.csproj +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Microsoft.Recognizers.Text.NumberWithUnit.csproj @@ -1,13 +1,14 @@  - netstandard2.0;net462;net452;net45 + netstandard2.0;net462 false false ../Recognizers-Text.ruleset - + + $(OutputPath)$(AssemblyName).xml $(NoWarn),1573,1591,1712 - + + all diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Microsoft.Recognizers.Text.NumberWithUnit.nuspec b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Microsoft.Recognizers.Text.NumberWithUnit.nuspec index 3c338b3191..2ba19fed7a 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Microsoft.Recognizers.Text.NumberWithUnit.nuspec +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Microsoft.Recognizers.Text.NumberWithUnit.nuspec @@ -20,8 +20,6 @@ - - diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Microsoft.Recognizers.Text.NumberWithUnit.xml b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Microsoft.Recognizers.Text.NumberWithUnit.xml index 5f327ef61f..5111ae00a3 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Microsoft.Recognizers.Text.NumberWithUnit.xml +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Microsoft.Recognizers.Text.NumberWithUnit.xml @@ -9,5 +9,15 @@ Represents None + + + NoProtoCache + + + + + EnablePreview + + diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/NumberWithUnitOptions.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/NumberWithUnitOptions.cs index cce72c53db..bb6ecd00b4 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/NumberWithUnitOptions.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/NumberWithUnitOptions.cs @@ -9,5 +9,15 @@ public enum NumberWithUnitOptions /// Represents None /// None = 0, + + /// + /// NoProtoCache + /// + NoProtoCache = 16, + + /// + /// EnablePreview + /// + EnablePreview = 8388608, // 2 ^23 } } diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Extractors/PortugueseNumberWithUnitExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Extractors/PortugueseNumberWithUnitExtractorConfiguration.cs index 4ecfc68ef8..688c6d679a 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Extractors/PortugueseNumberWithUnitExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Extractors/PortugueseNumberWithUnitExtractorConfiguration.cs @@ -23,7 +23,10 @@ public abstract class PortugueseNumberWithUnitExtractorConfiguration : INumberWi protected PortugueseNumberWithUnitExtractorConfiguration(CultureInfo ci) { this.CultureInfo = ci; - this.UnitNumExtractor = NumberExtractor.GetInstance(NumberMode.Unit); + + var unitNumConfig = new BaseNumberOptionsConfiguration(ci.Name, NumberOptions.None, NumberMode.Unit); + this.UnitNumExtractor = NumberExtractor.GetInstance(unitNumConfig); + this.BuildPrefix = NumbersWithUnitDefinitions.BuildPrefix; this.BuildSuffix = NumbersWithUnitDefinitions.BuildSuffix; this.ConnectorToken = NumbersWithUnitDefinitions.ConnectorToken; diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Parsers/PortugueseNumberWithUnitParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Parsers/PortugueseNumberWithUnitParserConfiguration.cs index fe3683fb4e..42eb05ca4d 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Parsers/PortugueseNumberWithUnitParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Parsers/PortugueseNumberWithUnitParserConfiguration.cs @@ -11,9 +11,12 @@ public class PortugueseNumberWithUnitParserConfiguration : BaseNumberWithUnitPar public PortugueseNumberWithUnitParserConfiguration(CultureInfo ci) : base(ci) { - this.InternalNumberExtractor = NumberExtractor.GetInstance(); - this.InternalNumberParser = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, new PortugueseNumberParserConfiguration( - new BaseNumberOptionsConfiguration(ci.Name))); + + var numConfig = new BaseNumberOptionsConfiguration(Culture.Portuguese, NumberOptions.None); + + this.InternalNumberExtractor = NumberExtractor.GetInstance(numConfig); + this.InternalNumberParser = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, + new PortugueseNumberParserConfiguration(numConfig)); this.ConnectorToken = NumbersWithUnitDefinitions.ConnectorToken; } diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Extractors/SpanishNumberWithUnitExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Extractors/SpanishNumberWithUnitExtractorConfiguration.cs index 9f27801d34..631dcd6d05 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Extractors/SpanishNumberWithUnitExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Extractors/SpanishNumberWithUnitExtractorConfiguration.cs @@ -5,7 +5,6 @@ using Microsoft.Recognizers.Definitions; using Microsoft.Recognizers.Definitions.Spanish; -using Microsoft.Recognizers.Definitions.Utilities; using Microsoft.Recognizers.Text.Number; using Microsoft.Recognizers.Text.Number.Spanish; @@ -25,7 +24,10 @@ public abstract class SpanishNumberWithUnitExtractorConfiguration : INumberWithU protected SpanishNumberWithUnitExtractorConfiguration(CultureInfo ci) { this.CultureInfo = ci; - this.UnitNumExtractor = NumberExtractor.GetInstance(NumberMode.Unit); + + var unitNumConfig = new BaseNumberOptionsConfiguration(ci.Name, NumberOptions.None, NumberMode.Unit); + this.UnitNumExtractor = NumberExtractor.GetInstance(unitNumConfig); + this.BuildPrefix = NumbersWithUnitDefinitions.BuildPrefix; this.BuildSuffix = NumbersWithUnitDefinitions.BuildSuffix; this.ConnectorToken = NumbersWithUnitDefinitions.ConnectorToken; diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Parsers/SpanishNumberWithUnitParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Parsers/SpanishNumberWithUnitParserConfiguration.cs index cbda06e203..88af1dfb18 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Parsers/SpanishNumberWithUnitParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Parsers/SpanishNumberWithUnitParserConfiguration.cs @@ -11,9 +11,12 @@ public class SpanishNumberWithUnitParserConfiguration : BaseNumberWithUnitParser public SpanishNumberWithUnitParserConfiguration(CultureInfo ci) : base(ci) { - this.InternalNumberExtractor = NumberExtractor.GetInstance(); - this.InternalNumberParser = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, new SpanishNumberParserConfiguration( - new BaseNumberOptionsConfiguration(ci.Name))); + + var numConfig = new BaseNumberOptionsConfiguration(Culture.Spanish, NumberOptions.None); + + this.InternalNumberExtractor = NumberExtractor.GetInstance(numConfig); + this.InternalNumberParser = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, + new SpanishNumberParserConfiguration(numConfig)); this.ConnectorToken = NumbersWithUnitDefinitions.ConnectorToken; } diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Parsers/TurkishNumberWithUnitParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Parsers/TurkishNumberWithUnitParserConfiguration.cs index 527568adf7..41eae1826b 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Parsers/TurkishNumberWithUnitParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Turkish/Parsers/TurkishNumberWithUnitParserConfiguration.cs @@ -9,9 +9,12 @@ public class TurkishNumberWithUnitParserConfiguration : BaseNumberWithUnitParser public TurkishNumberWithUnitParserConfiguration(CultureInfo ci) : base(ci) { + + var numConfig = new BaseNumberOptionsConfiguration(Culture.Turkish, NumberOptions.None); + this.InternalNumberExtractor = NumberExtractor.GetInstance(); - this.InternalNumberParser = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, new TurkishNumberParserConfiguration( - new BaseNumberOptionsConfiguration(ci.Name))); + this.InternalNumberParser = AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, + new TurkishNumberParserConfiguration(numConfig)); this.ConnectorToken = string.Empty; } diff --git a/.NET/Microsoft.Recognizers.Text.Sequence/Microsoft.Recognizers.Text.Sequence.csproj b/.NET/Microsoft.Recognizers.Text.Sequence/Microsoft.Recognizers.Text.Sequence.csproj index 7ab110fe51..5756386ee7 100644 --- a/.NET/Microsoft.Recognizers.Text.Sequence/Microsoft.Recognizers.Text.Sequence.csproj +++ b/.NET/Microsoft.Recognizers.Text.Sequence/Microsoft.Recognizers.Text.Sequence.csproj @@ -1,12 +1,13 @@  - netstandard2.0;net462;net452;net45 + netstandard2.0;net462 false false ../Recognizers-Text.ruleset + $(OutputPath)$(AssemblyName).xml $(NoWarn),1573,1591,1712 - + + all @@ -42,4 +44,5 @@ + diff --git a/.NET/Microsoft.Recognizers.Text.Sequence/Microsoft.Recognizers.Text.Sequence.nuspec b/.NET/Microsoft.Recognizers.Text.Sequence/Microsoft.Recognizers.Text.Sequence.nuspec index f7f49a63b3..70056bc6a5 100644 --- a/.NET/Microsoft.Recognizers.Text.Sequence/Microsoft.Recognizers.Text.Sequence.nuspec +++ b/.NET/Microsoft.Recognizers.Text.Sequence/Microsoft.Recognizers.Text.Sequence.nuspec @@ -18,8 +18,6 @@ - - diff --git a/.NET/Microsoft.Recognizers.Text.sln b/.NET/Microsoft.Recognizers.Text.sln index 24ff5dfddf..1f491d825e 100644 --- a/.NET/Microsoft.Recognizers.Text.sln +++ b/.NET/Microsoft.Recognizers.Text.sln @@ -52,9 +52,10 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Patterns", "Patterns", "{23 ..\Patterns\German\German-DateTime.yaml = ..\Patterns\German\German-DateTime.yaml ..\Patterns\German\German-Numbers.yaml = ..\Patterns\German\German-Numbers.yaml ..\Patterns\German\German-NumbersWithUnit.yaml = ..\Patterns\German\German-NumbersWithUnit.yaml + ..\Patterns\Hindi\Hindi-Choice.yaml = ..\Patterns\Hindi\Hindi-Choice.yaml + ..\Patterns\Hindi\Hindi-DateTime.yaml = ..\Patterns\Hindi\Hindi-DateTime.yaml ..\Patterns\Hindi\Hindi-Numbers.yaml = ..\Patterns\Hindi\Hindi-Numbers.yaml ..\Patterns\Hindi\Hindi-NumbersWithUnit.yaml = ..\Patterns\Hindi\Hindi-NumbersWithUnit.yaml - ..\Patterns\Hindi\Hindi-Choice.yaml = ..\Patterns\Hindi\Hindi-Choice.yaml ..\Patterns\Italian\Italian-Choice.yaml = ..\Patterns\Italian\Italian-Choice.yaml ..\Patterns\Italian\Italian-DateTime.yaml = ..\Patterns\Italian\Italian-DateTime.yaml ..\Patterns\Italian\Italian-Numbers.yaml = ..\Patterns\Italian\Italian-Numbers.yaml diff --git a/.NET/Microsoft.Recognizers.Text.sln.DotSettings b/.NET/Microsoft.Recognizers.Text.sln.DotSettings index d919d7f6b4..fde2f5285d 100644 --- a/.NET/Microsoft.Recognizers.Text.sln.DotSettings +++ b/.NET/Microsoft.Recognizers.Text.sln.DotSettings @@ -2,8 +2,10 @@ True True True + True True True True True + True True \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text/Extractors/IExtractor.cs b/.NET/Microsoft.Recognizers.Text/Extractors/IExtractor.cs index 51b678f170..e1894ab8fd 100644 --- a/.NET/Microsoft.Recognizers.Text/Extractors/IExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text/Extractors/IExtractor.cs @@ -1,5 +1,7 @@ using System.Collections.Generic; +using Microsoft.Recognizers.Text.InternalCache; + namespace Microsoft.Recognizers.Text { public interface IExtractor @@ -7,7 +9,7 @@ public interface IExtractor List Extract(string input); } - public class ExtractResult + public class ExtractResult : ICloneableType { public int? Start { get; set; } = null; @@ -20,5 +22,11 @@ public class ExtractResult public object Data { get; set; } = null; public Metadata Metadata { get; set; } = null; + + public ExtractResult Clone() + { + return (ExtractResult)MemberwiseClone(); + } + } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text/Extractors/Metadata.cs b/.NET/Microsoft.Recognizers.Text/Extractors/Metadata.cs index 28dc82570e..8ee125ab57 100644 --- a/.NET/Microsoft.Recognizers.Text/Extractors/Metadata.cs +++ b/.NET/Microsoft.Recognizers.Text/Extractors/Metadata.cs @@ -23,5 +23,10 @@ public class Metadata public string RelativeTo { get; set; } = string.Empty; public bool IsMealtime { get; set; } = false; + + public Metadata Clone() + { + return (Metadata)MemberwiseClone(); + } } } diff --git a/.NET/Microsoft.Recognizers.Text/InternalCache/ICloneableType.cs b/.NET/Microsoft.Recognizers.Text/InternalCache/ICloneableType.cs new file mode 100644 index 0000000000..81e76b858a --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text/InternalCache/ICloneableType.cs @@ -0,0 +1,7 @@ +namespace Microsoft.Recognizers.Text.InternalCache +{ + public interface ICloneableType + { + T Clone(); + } +} diff --git a/.NET/Microsoft.Recognizers.Text/InternalCache/ResultsCache.cs b/.NET/Microsoft.Recognizers.Text/InternalCache/ResultsCache.cs new file mode 100644 index 0000000000..37f7e3893e --- /dev/null +++ b/.NET/Microsoft.Recognizers.Text/InternalCache/ResultsCache.cs @@ -0,0 +1,50 @@ +// ReSharper disable StaticMemberInGenericType + +using System; +using System.Collections.Generic; + +using Microsoft.Extensions.Caching.Memory; + +namespace Microsoft.Recognizers.Text.InternalCache +{ + public class ResultsCache + where TItem : ICloneableType + { + + private const long BaseCacheSize = 20000; + + private const double CompactionPercentage = 0.6; + + private static readonly MemoryCacheEntryOptions CacheEntryOptions = new MemoryCacheEntryOptions().SetSize(1); + + private readonly IMemoryCache resultsCache; + + // In recognizers usage, DateTime has 4 cache instances, while Number only has one. + public ResultsCache(int ratioFactor = 1) + { + + var cacheOptions = new MemoryCacheOptions + { + SizeLimit = BaseCacheSize * ratioFactor, + CompactionPercentage = CompactionPercentage, + ExpirationScanFrequency = TimeSpan.FromHours(24), + }; + + resultsCache = new MemoryCache(cacheOptions); + } + + public List GetOrCreate(object key, Func> createItem) + { + + if (!resultsCache.TryGetValue(key, out List results)) + { + results = createItem(); + + resultsCache.Set(key, results, CacheEntryOptions); + } + + return results.ConvertAll(e => e.Clone()); + } + + } +} diff --git a/.NET/Microsoft.Recognizers.Text/Microsoft.Recognizers.Text.csproj b/.NET/Microsoft.Recognizers.Text/Microsoft.Recognizers.Text.csproj index c5357694d3..6b6939bac7 100644 --- a/.NET/Microsoft.Recognizers.Text/Microsoft.Recognizers.Text.csproj +++ b/.NET/Microsoft.Recognizers.Text/Microsoft.Recognizers.Text.csproj @@ -1,12 +1,13 @@  - netstandard2.0;net462;net452;net45 + netstandard2.0;net462 false false ../Recognizers-Text.ruleset - + + $(OutputPath)$(AssemblyName).xml $(NoWarn),1573,1591,1712 - + + all @@ -27,8 +29,9 @@ runtime; build; native; contentfiles; analyzers - + + diff --git a/.NET/Microsoft.Recognizers.Text/Microsoft.Recognizers.Text.nuspec b/.NET/Microsoft.Recognizers.Text/Microsoft.Recognizers.Text.nuspec index 0269d1c153..2ae5d8b5ed 100644 --- a/.NET/Microsoft.Recognizers.Text/Microsoft.Recognizers.Text.nuspec +++ b/.NET/Microsoft.Recognizers.Text/Microsoft.Recognizers.Text.nuspec @@ -15,13 +15,10 @@ + - - - - diff --git a/.NET/Microsoft.Recognizers.Text/Recognizer.cs b/.NET/Microsoft.Recognizers.Text/Recognizer.cs index 1f6019bb6b..d8a9c228ee 100644 --- a/.NET/Microsoft.Recognizers.Text/Recognizer.cs +++ b/.NET/Microsoft.Recognizers.Text/Recognizer.cs @@ -14,6 +14,7 @@ protected Recognizer(string targetCulture, TRecognizerOptions options, bool lazy this.TargetCulture = targetCulture; this.factory = new ModelFactory(); + InitializeConfiguration(); if (!lazyInitialization) diff --git a/.NET/test-pack.sh b/.NET/test-pack.sh new file mode 100644 index 0000000000..4d1979e577 --- /dev/null +++ b/.NET/test-pack.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +nugetExe=$1 +version=$2 +echo "Version: $version" + +targetDir="./test-pack" + +config="release;basic=$version;number=$version;numberWithUnit=$version" + +$nugetExe pack ./Microsoft.Recognizers.Text/Microsoft.Recognizers.Text.nuspec -NonInteractive -OutputDirectory $targetDir -Properties Configuration=$config -Symbols -version "$version" -Verbosity Detailed +$nugetExe pack ./Microsoft.Recognizers.Text.Choice/Microsoft.Recognizers.Text.Choice.nuspec -NonInteractive -OutputDirectory $targetDir -Properties Configuration=$config -Symbols -version "$version" -Verbosity Detailed +$nugetExe pack ./Microsoft.Recognizers.Text.Sequence/Microsoft.Recognizers.Text.Sequence.nuspec -NonInteractive -OutputDirectory $targetDir -Properties Configuration=$config -Symbols -version "$version" -Verbosity Detailed +$nugetExe pack ./Microsoft.Recognizers.Text.Number/Microsoft.Recognizers.Text.Number.nuspec -NonInteractive -OutputDirectory $targetDir -Properties Configuration=$config -Symbols -version "$version" -Verbosity Detailed +$nugetExe pack ./Microsoft.Recognizers.Text.NumberWithUnit/Microsoft.Recognizers.Text.NumberWithUnit.nuspec -NonInteractive -OutputDirectory $targetDir -Properties Configuration=$config -Symbols -version "$version" -Verbosity Detailed +$nugetExe pack ./Microsoft.Recognizers.Text.DateTime/Microsoft.Recognizers.Text.DateTime.nuspec -NonInteractive -OutputDirectory $targetDir -Properties Configuration=$config -Symbols -version "$version" -Verbosity Detailed +$nugetExe pack ./Microsoft.Recognizers.Text.DataTypes.TimexExpression/Microsoft.Recognizers.Text.DataTypes.TimexExpression.nuspec -NonInteractive -OutputDirectory $targetDir -Properties Configuration=$config -Symbols -version "$version" -Verbosity Detailed + diff --git a/Java/libraries/recognizers-text-date-time/src/main/java/com/microsoft/recognizers/text/datetime/resources/ChineseDateTime.java b/Java/libraries/recognizers-text-date-time/src/main/java/com/microsoft/recognizers/text/datetime/resources/ChineseDateTime.java index 9699f2a802..c65cd03042 100644 --- a/Java/libraries/recognizers-text-date-time/src/main/java/com/microsoft/recognizers/text/datetime/resources/ChineseDateTime.java +++ b/Java/libraries/recognizers-text-date-time/src/main/java/com/microsoft/recognizers/text/datetime/resources/ChineseDateTime.java @@ -19,6 +19,8 @@ public class ChineseDateTime { + public static final String LangMarker = "Chi"; + public static final String MonthRegex = "(?正月|一月|二月|三月|四月|五月|六月|七月|八月|九月|十月|十一月|十二月|01月|02月|03月|04月|05月|06月|07月|08月|09月|10月|11月|12月|1月|2月|3月|4月|5月|6月|7月|8月|9月|大年)"; public static final String DayRegex = "(?01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|1|2|3|4|5|6|7|8|9)"; diff --git a/Java/libraries/recognizers-text-date-time/src/main/java/com/microsoft/recognizers/text/datetime/resources/EnglishDateTime.java b/Java/libraries/recognizers-text-date-time/src/main/java/com/microsoft/recognizers/text/datetime/resources/EnglishDateTime.java index 6cdc48fc51..66c4612981 100644 --- a/Java/libraries/recognizers-text-date-time/src/main/java/com/microsoft/recognizers/text/datetime/resources/EnglishDateTime.java +++ b/Java/libraries/recognizers-text-date-time/src/main/java/com/microsoft/recognizers/text/datetime/resources/EnglishDateTime.java @@ -19,6 +19,8 @@ public class EnglishDateTime { + public static final String LangMarker = "Eng"; + public static final Boolean CheckBothBeforeAfter = false; public static final String TillRegex = "(?\\b(to|(un)?till?|thru|through)\\b|{BaseDateTime.RangeConnectorSymbolRegex})" diff --git a/Java/libraries/recognizers-text-date-time/src/main/java/com/microsoft/recognizers/text/datetime/resources/FrenchDateTime.java b/Java/libraries/recognizers-text-date-time/src/main/java/com/microsoft/recognizers/text/datetime/resources/FrenchDateTime.java index edd6e8dcbe..aff14fb218 100644 --- a/Java/libraries/recognizers-text-date-time/src/main/java/com/microsoft/recognizers/text/datetime/resources/FrenchDateTime.java +++ b/Java/libraries/recognizers-text-date-time/src/main/java/com/microsoft/recognizers/text/datetime/resources/FrenchDateTime.java @@ -19,6 +19,8 @@ public class FrenchDateTime { + public static final String LangMarker = "Fre"; + public static final Boolean CheckBothBeforeAfter = false; public static final String TillRegex = "(?au|et|(jusqu')?[aà]|avant|--|-|—|——)"; diff --git a/Java/libraries/recognizers-text-date-time/src/main/java/com/microsoft/recognizers/text/datetime/resources/PortugueseDateTime.java b/Java/libraries/recognizers-text-date-time/src/main/java/com/microsoft/recognizers/text/datetime/resources/PortugueseDateTime.java index ae7fd24365..7ac3c5be31 100644 --- a/Java/libraries/recognizers-text-date-time/src/main/java/com/microsoft/recognizers/text/datetime/resources/PortugueseDateTime.java +++ b/Java/libraries/recognizers-text-date-time/src/main/java/com/microsoft/recognizers/text/datetime/resources/PortugueseDateTime.java @@ -19,6 +19,8 @@ public class PortugueseDateTime { + public static final String LangMarker = "Por"; + public static final Boolean CheckBothBeforeAfter = false; public static final String TillRegex = "(?ate|as|às|até|ateh|a|ao|--|-|—|——)(\\s+(o|[aà](s)?))?"; diff --git a/Java/libraries/recognizers-text-date-time/src/main/java/com/microsoft/recognizers/text/datetime/resources/SpanishDateTime.java b/Java/libraries/recognizers-text-date-time/src/main/java/com/microsoft/recognizers/text/datetime/resources/SpanishDateTime.java index 58331b29c5..6adfd7f1db 100644 --- a/Java/libraries/recognizers-text-date-time/src/main/java/com/microsoft/recognizers/text/datetime/resources/SpanishDateTime.java +++ b/Java/libraries/recognizers-text-date-time/src/main/java/com/microsoft/recognizers/text/datetime/resources/SpanishDateTime.java @@ -19,6 +19,8 @@ public class SpanishDateTime { + public static final String LangMarker = "Spa"; + public static final Boolean CheckBothBeforeAfter = false; public static final String TillRegex = "(?hasta|al|a|--|-|—|——)(\\s+(el|la(s)?))?"; diff --git a/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/chinese/extractors/DoubleExtractor.java b/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/chinese/extractors/DoubleExtractor.java index 2d5d6629e3..7f6f2c642e 100644 --- a/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/chinese/extractors/DoubleExtractor.java +++ b/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/chinese/extractors/DoubleExtractor.java @@ -35,9 +35,9 @@ public DoubleExtractor() { // 1.0 K builder.put(RegExpUtility.getSafeLookbehindRegExp(ChineseNumeric.DoubleWithMultiplierRegex, Pattern.UNICODE_CHARACTER_CLASS), "DoubleNum"); //15.2万 - builder.put(RegExpUtility.getSafeLookbehindRegExp(ChineseNumeric.DoubleWithThousandsRegex, Pattern.UNICODE_CHARACTER_CLASS), "DoubleChs"); + builder.put(RegExpUtility.getSafeLookbehindRegExp(ChineseNumeric.DoubleWithThousandsRegex, Pattern.UNICODE_CHARACTER_CLASS), "Double" + ChineseNumeric.LangMarker); //四十五点三三 - builder.put(RegExpUtility.getSafeRegExp(ChineseNumeric.DoubleAllFloatRegex, Pattern.UNICODE_CHARACTER_CLASS), "DoubleChs"); + builder.put(RegExpUtility.getSafeRegExp(ChineseNumeric.DoubleAllFloatRegex, Pattern.UNICODE_CHARACTER_CLASS), "Double" + ChineseNumeric.LangMarker); // 2e6, 21.2e0 builder.put(RegExpUtility.getSafeLookbehindRegExp(ChineseNumeric.DoubleExponentialNotationRegex, Pattern.UNICODE_CHARACTER_CLASS), "DoublePow"); //2^5 diff --git a/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/chinese/extractors/FractionExtractor.java b/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/chinese/extractors/FractionExtractor.java index 4617cd38f9..f6b1223c37 100644 --- a/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/chinese/extractors/FractionExtractor.java +++ b/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/chinese/extractors/FractionExtractor.java @@ -32,7 +32,7 @@ public FractionExtractor() { // 8/3 builder.put(RegExpUtility.getSafeLookbehindRegExp(ChineseNumeric.FractionNotationRegex, Pattern.UNICODE_CHARACTER_CLASS), "FracNum"); //四分之六十五 - builder.put(RegExpUtility.getSafeLookbehindRegExp(ChineseNumeric.AllFractionNumber, Pattern.UNICODE_CHARACTER_CLASS), "FracChs"); + builder.put(RegExpUtility.getSafeLookbehindRegExp(ChineseNumeric.AllFractionNumber, Pattern.UNICODE_CHARACTER_CLASS), "Frac" + ChineseNumeric.LangMarker); this.regexes = Collections.unmodifiableMap(builder); } diff --git a/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/chinese/extractors/IntegerExtractor.java b/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/chinese/extractors/IntegerExtractor.java index d6eef5c096..4217df0486 100644 --- a/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/chinese/extractors/IntegerExtractor.java +++ b/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/chinese/extractors/IntegerExtractor.java @@ -40,21 +40,21 @@ public IntegerExtractor(ChineseNumberExtractorMode mode) { //1,234, 2,332,111 builder.put(RegExpUtility.getSafeLookbehindRegExp(ChineseNumeric.DottedNumbersSpecialsChar, Pattern.UNICODE_CHARACTER_CLASS), "IntegerNum"); //半百 半打 - builder.put(RegExpUtility.getSafeLookbehindRegExp(ChineseNumeric.NumbersWithHalfDozen, Pattern.UNICODE_CHARACTER_CLASS), "IntegerChs"); + builder.put(RegExpUtility.getSafeLookbehindRegExp(ChineseNumeric.NumbersWithHalfDozen, Pattern.UNICODE_CHARACTER_CLASS), "Integer" + ChineseNumeric.LangMarker); //一打 五十打 - builder.put(RegExpUtility.getSafeLookbehindRegExp(ChineseNumeric.NumbersWithDozen, Pattern.UNICODE_CHARACTER_CLASS), "IntegerChs"); + builder.put(RegExpUtility.getSafeLookbehindRegExp(ChineseNumeric.NumbersWithDozen, Pattern.UNICODE_CHARACTER_CLASS), "Integer" + ChineseNumeric.LangMarker); switch (mode) { case Default: // 一百五十五, 负一亿三百二十二. // Uses an allow list to avoid extracting "四" from "四川" - builder.put(RegExpUtility.getSafeRegExp(ChineseNumeric.NumbersWithAllowListRegex, Pattern.UNICODE_CHARACTER_CLASS), "IntegerChs"); + builder.put(RegExpUtility.getSafeRegExp(ChineseNumeric.NumbersWithAllowListRegex, Pattern.UNICODE_CHARACTER_CLASS), "Integer" + ChineseNumeric.LangMarker); break; case ExtractAll: // 一百五十五, 负一亿三百二十二, "四" from "四川". // Uses no allow lists and extracts all potential integers (useful in Units, for example). - builder.put(RegExpUtility.getSafeRegExp(ChineseNumeric.NumbersAggressiveRegex, Pattern.UNICODE_CHARACTER_CLASS), "IntegerChs"); + builder.put(RegExpUtility.getSafeRegExp(ChineseNumeric.NumbersAggressiveRegex, Pattern.UNICODE_CHARACTER_CLASS), "Integer" + ChineseNumeric.LangMarker); break; default: diff --git a/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/chinese/extractors/OrdinalExtractor.java b/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/chinese/extractors/OrdinalExtractor.java index 9e7120e603..2bcd86e934 100644 --- a/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/chinese/extractors/OrdinalExtractor.java +++ b/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/chinese/extractors/OrdinalExtractor.java @@ -28,10 +28,10 @@ public OrdinalExtractor() { HashMap builder = new HashMap<>(); //第一百五十四 - builder.put(Pattern.compile(ChineseNumeric.OrdinalRegex, Pattern.UNICODE_CHARACTER_CLASS), "OrdinalChs"); + builder.put(Pattern.compile(ChineseNumeric.OrdinalRegex, Pattern.UNICODE_CHARACTER_CLASS), "Ordinal" + ChineseNumeric.LangMarker); //第2565, 第1234 - builder.put(Pattern.compile(ChineseNumeric.OrdinalNumbersRegex, Pattern.UNICODE_CHARACTER_CLASS), "OrdinalChs"); + builder.put(Pattern.compile(ChineseNumeric.OrdinalNumbersRegex, Pattern.UNICODE_CHARACTER_CLASS), "Ordinal" + ChineseNumeric.LangMarker); this.regexes = Collections.unmodifiableMap(builder); } diff --git a/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/chinese/extractors/PercentageExtractor.java b/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/chinese/extractors/PercentageExtractor.java index 698f428d86..a3381f5747 100644 --- a/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/chinese/extractors/PercentageExtractor.java +++ b/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/chinese/extractors/PercentageExtractor.java @@ -29,10 +29,10 @@ public PercentageExtractor() { HashMap builder = new HashMap<>(); //二十个百分点, 四点五个百分点 - builder.put(RegExpUtility.getSafeRegExp(ChineseNumeric.PercentagePointRegex, Pattern.UNICODE_CHARACTER_CLASS), "PerChs"); + builder.put(RegExpUtility.getSafeRegExp(ChineseNumeric.PercentagePointRegex, Pattern.UNICODE_CHARACTER_CLASS), "Per" + ChineseNumeric.LangMarker); //百分之五十 百分之一点五 - builder.put(RegExpUtility.getSafeLookbehindRegExp(ChineseNumeric.SimplePercentageRegex, Pattern.UNICODE_CHARACTER_CLASS), "PerChs"); + builder.put(RegExpUtility.getSafeLookbehindRegExp(ChineseNumeric.SimplePercentageRegex, Pattern.UNICODE_CHARACTER_CLASS), "Per" + ChineseNumeric.LangMarker); //百分之56.2 百分之12 builder.put(RegExpUtility.getSafeLookbehindRegExp(ChineseNumeric.NumbersPercentagePointRegex, Pattern.UNICODE_CHARACTER_CLASS), "PerNum"); diff --git a/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/french/extractors/DoubleExtractor.java b/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/french/extractors/DoubleExtractor.java index 827bf10c1f..6d0ea2f652 100644 --- a/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/french/extractors/DoubleExtractor.java +++ b/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/french/extractors/DoubleExtractor.java @@ -37,7 +37,7 @@ public DoubleExtractor(String placeholder) { builder.put(RegExpUtility.getSafeLookbehindRegExp(FrenchNumeric.DoubleWithoutIntegralRegex(placeholder), Pattern.UNICODE_CHARACTER_CLASS), "DoubleNum"); builder.put(RegExpUtility.getSafeLookbehindRegExp(FrenchNumeric.DoubleWithMultiplierRegex), "DoubleNum"); builder.put(RegExpUtility.getSafeLookbehindRegExp(FrenchNumeric.DoubleWithRoundNumber, Pattern.UNICODE_CHARACTER_CLASS), "DoubleNum"); - builder.put(RegExpUtility.getSafeLookbehindRegExp(FrenchNumeric.DoubleAllFloatRegex, Pattern.UNICODE_CHARACTER_CLASS), "DoubleFr"); + builder.put(RegExpUtility.getSafeLookbehindRegExp(FrenchNumeric.DoubleAllFloatRegex, Pattern.UNICODE_CHARACTER_CLASS), "Double" + FrenchNumeric.LangMarker); builder.put(RegExpUtility.getSafeLookbehindRegExp(FrenchNumeric.DoubleExponentialNotationRegex, Pattern.UNICODE_CHARACTER_CLASS), "DoublePow"); builder.put(RegExpUtility.getSafeLookbehindRegExp(FrenchNumeric.DoubleCaretExponentialNotationRegex, Pattern.UNICODE_CHARACTER_CLASS), "DoublePow"); builder.put(generateLongFormatNumberRegexes(LongFormatType.DoubleNumDotComma, placeholder), "DoubleNum"); diff --git a/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/french/extractors/FractionExtractor.java b/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/french/extractors/FractionExtractor.java index cfd04603ca..18f855c706 100644 --- a/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/french/extractors/FractionExtractor.java +++ b/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/french/extractors/FractionExtractor.java @@ -30,10 +30,10 @@ public FractionExtractor(NumberMode mode) { builder.put(RegExpUtility.getSafeLookbehindRegExp(FrenchNumeric.FractionNotationWithSpacesRegex, Pattern.UNICODE_CHARACTER_CLASS), "FracNum"); builder.put(RegExpUtility.getSafeLookbehindRegExp(FrenchNumeric.FractionNotationRegex, Pattern.UNICODE_CHARACTER_CLASS), "FracNum"); - builder.put(RegExpUtility.getSafeLookbehindRegExp(FrenchNumeric.FractionNounRegex, Pattern.UNICODE_CHARACTER_CLASS), "FracFr"); - builder.put(RegExpUtility.getSafeLookbehindRegExp(FrenchNumeric.FractionNounWithArticleRegex, Pattern.UNICODE_CHARACTER_CLASS), "FracFr"); + builder.put(RegExpUtility.getSafeLookbehindRegExp(FrenchNumeric.FractionNounRegex, Pattern.UNICODE_CHARACTER_CLASS), "Frac" + FrenchNumeric.LangMarker); + builder.put(RegExpUtility.getSafeLookbehindRegExp(FrenchNumeric.FractionNounWithArticleRegex, Pattern.UNICODE_CHARACTER_CLASS), "Frac" + FrenchNumeric.LangMarker); if (mode != NumberMode.Unit) { - builder.put(RegExpUtility.getSafeLookbehindRegExp(FrenchNumeric.FractionPrepositionRegex, Pattern.UNICODE_CHARACTER_CLASS), "FracFr"); + builder.put(RegExpUtility.getSafeLookbehindRegExp(FrenchNumeric.FractionPrepositionRegex, Pattern.UNICODE_CHARACTER_CLASS), "Frac" + FrenchNumeric.LangMarker); } this.regexes = Collections.unmodifiableMap(builder); diff --git a/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/french/extractors/IntegerExtractor.java b/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/french/extractors/IntegerExtractor.java index 0f0c41c359..f349f08826 100644 --- a/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/french/extractors/IntegerExtractor.java +++ b/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/french/extractors/IntegerExtractor.java @@ -37,8 +37,8 @@ public IntegerExtractor(String placeholder) { builder.put(generateLongFormatNumberRegexes(LongFormatType.IntegerNumDot, placeholder), "IntegerNum"); builder.put(RegExpUtility.getSafeLookbehindRegExp(FrenchNumeric.RoundNumberIntegerRegexWithLocks, Pattern.UNICODE_CHARACTER_CLASS), "IntegerNum"); builder.put(RegExpUtility.getSafeLookbehindRegExp(FrenchNumeric.NumbersWithDozenSuffix, Pattern.UNICODE_CHARACTER_CLASS), "IntegerNum"); - builder.put(RegExpUtility.getSafeLookbehindRegExp(FrenchNumeric.AllIntRegexWithLocks, Pattern.UNICODE_CHARACTER_CLASS), "IntegerFr"); - builder.put(RegExpUtility.getSafeLookbehindRegExp(FrenchNumeric.AllIntRegexWithDozenSuffixLocks, Pattern.UNICODE_CHARACTER_CLASS), "IntegerFr"); + builder.put(RegExpUtility.getSafeLookbehindRegExp(FrenchNumeric.AllIntRegexWithLocks, Pattern.UNICODE_CHARACTER_CLASS), "Integer" + FrenchNumeric.LangMarker); + builder.put(RegExpUtility.getSafeLookbehindRegExp(FrenchNumeric.AllIntRegexWithDozenSuffixLocks, Pattern.UNICODE_CHARACTER_CLASS), "Integer" + FrenchNumeric.LangMarker); builder.put(generateLongFormatNumberRegexes(LongFormatType.IntegerNumBlank, placeholder), "IntegerNum"); builder.put(generateLongFormatNumberRegexes(LongFormatType.IntegerNumNoBreakSpace, placeholder), "IntegerNum"); diff --git a/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/french/extractors/OrdinalExtractor.java b/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/french/extractors/OrdinalExtractor.java index ced55486dc..188357df18 100644 --- a/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/french/extractors/OrdinalExtractor.java +++ b/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/french/extractors/OrdinalExtractor.java @@ -28,7 +28,7 @@ public OrdinalExtractor() { HashMap builder = new HashMap<>(); builder.put(RegExpUtility.getSafeLookbehindRegExp(FrenchNumeric.OrdinalSuffixRegex, Pattern.UNICODE_CHARACTER_CLASS), "OrdinalNum"); - builder.put(RegExpUtility.getSafeLookbehindRegExp(FrenchNumeric.OrdinalFrenchRegex, Pattern.UNICODE_CHARACTER_CLASS), "OrdFr"); + builder.put(RegExpUtility.getSafeLookbehindRegExp(FrenchNumeric.OrdinalFrenchRegex, Pattern.UNICODE_CHARACTER_CLASS), "Ord" + FrenchNumeric.LangMarker); this.regexes = Collections.unmodifiableMap(builder); } diff --git a/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/resources/ChineseNumeric.java b/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/resources/ChineseNumeric.java index 62b8030ee6..3cf1218414 100644 --- a/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/resources/ChineseNumeric.java +++ b/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/resources/ChineseNumeric.java @@ -19,7 +19,7 @@ public class ChineseNumeric { - public static final String LangMarker = "Chs"; + public static final String LangMarker = "Chi"; public static final Boolean CompoundNumberLanguage = true; diff --git a/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/resources/FrenchNumeric.java b/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/resources/FrenchNumeric.java index e9d1876a37..bd6f0961fe 100644 --- a/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/resources/FrenchNumeric.java +++ b/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/resources/FrenchNumeric.java @@ -19,7 +19,7 @@ public class FrenchNumeric { - public static final String LangMarker = "Fr"; + public static final String LangMarker = "Fre"; public static final Boolean CompoundNumberLanguage = false; diff --git a/JavaScript/packages/recognizers-date-time/src/resources/chineseDateTime.ts b/JavaScript/packages/recognizers-date-time/src/resources/chineseDateTime.ts index 768ceb28e3..d1cf6918b4 100644 --- a/JavaScript/packages/recognizers-date-time/src/resources/chineseDateTime.ts +++ b/JavaScript/packages/recognizers-date-time/src/resources/chineseDateTime.ts @@ -11,6 +11,7 @@ import { BaseDateTime } from "./baseDateTime"; export namespace ChineseDateTime { + export const LangMarker = 'Chi'; export const MonthRegex = `(?正月|一月|二月|三月|四月|五月|六月|七月|八月|九月|十月|十一月|十二月|01月|02月|03月|04月|05月|06月|07月|08月|09月|10月|11月|12月|1月|2月|3月|4月|5月|6月|7月|8月|9月|大年)`; export const DayRegex = `(?01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|1|2|3|4|5|6|7|8|9)`; export const DateDayRegexInChinese = `(?初一|三十|一日|十一日|二十一日|三十一日|二日|三日|四日|五日|六日|七日|八日|九日|十二日|十三日|十四日|十五日|十六日|十七日|十八日|十九日|二十二日|二十三日|二十四日|二十五日|二十六日|二十七日|二十八日|二十九日|一日|十一日|十日|二十一日|二十日|三十一日|三十日|二日|三日|四日|五日|六日|七日|八日|九日|十二日|十三日|十四日|十五日|十六日|十七日|十八日|十九日|二十二日|二十三日|二十四日|二十五日|二十六日|二十七日|二十八日|二十九日|十日|二十日|三十日|10日|11日|12日|13日|14日|15日|16日|17日|18日|19日|1日|20日|21日|22日|23日|24日|25日|26日|27日|28日|29日|2日|30日|31日|3日|4日|5日|6日|7日|8日|9日|一号|十一号|二十一号|三十一号|二号|三号|四号|五号|六号|七号|八号|九号|十二号|十三号|十四号|十五号|十六号|十七号|十八号|十九号|二十二号|二十三号|二十四号|二十五号|二十六号|二十七号|二十八号|二十九号|一号|十一号|十号|二十一号|二十号|三十一号|三十号|二号|三号|四号|五号|六号|七号|八号|九号|十二号|十三号|十四号|十五号|十六号|十七号|十八号|十九号|二十二号|二十三号|二十四号|二十五号|二十六号|二十七号|二十八号|二十九号|十号|二十号|三十号|10号|11号|12号|13号|14号|15号|16号|17号|18号|19号|1号|20号|21号|22号|23号|24号|25号|26号|27号|28号|29号|2号|30号|31号|3号|4号|5号|6号|7号|8号|9号)`; diff --git a/JavaScript/packages/recognizers-date-time/src/resources/englishDateTime.ts b/JavaScript/packages/recognizers-date-time/src/resources/englishDateTime.ts index ed95fad248..69f8c9dc46 100644 --- a/JavaScript/packages/recognizers-date-time/src/resources/englishDateTime.ts +++ b/JavaScript/packages/recognizers-date-time/src/resources/englishDateTime.ts @@ -11,6 +11,7 @@ import { BaseDateTime } from "./baseDateTime"; export namespace EnglishDateTime { + export const LangMarker = 'Eng'; export const CheckBothBeforeAfter = false; export const TillRegex = `(?\\b(to|(un)?till?|thru|through)\\b|${BaseDateTime.RangeConnectorSymbolRegex})`; export const RangeConnectorRegex = `(?\\b(and|through|to)\\b|${BaseDateTime.RangeConnectorSymbolRegex})`; diff --git a/JavaScript/packages/recognizers-date-time/src/resources/frenchDateTime.ts b/JavaScript/packages/recognizers-date-time/src/resources/frenchDateTime.ts index 95ba176696..87582e9bff 100644 --- a/JavaScript/packages/recognizers-date-time/src/resources/frenchDateTime.ts +++ b/JavaScript/packages/recognizers-date-time/src/resources/frenchDateTime.ts @@ -11,6 +11,7 @@ import { BaseDateTime } from "./baseDateTime"; export namespace FrenchDateTime { + export const LangMarker = 'Fre'; export const CheckBothBeforeAfter = false; export const TillRegex = `(?au|et|(jusqu')?[aà]|avant|--|-|—|——)`; export const RangeConnectorRegex = `(?de la|au|[aà]|et(\\s*la)?|--|-|—|——)`; diff --git a/JavaScript/packages/recognizers-date-time/src/resources/portugueseDateTime.ts b/JavaScript/packages/recognizers-date-time/src/resources/portugueseDateTime.ts index b95395523d..b11f18d68b 100644 --- a/JavaScript/packages/recognizers-date-time/src/resources/portugueseDateTime.ts +++ b/JavaScript/packages/recognizers-date-time/src/resources/portugueseDateTime.ts @@ -11,6 +11,7 @@ import { BaseDateTime } from "./baseDateTime"; export namespace PortugueseDateTime { + export const LangMarker = 'Por'; export const CheckBothBeforeAfter = false; export const TillRegex = `(?ate|as|às|até|ateh|a|ao|--|-|—|——)(\\s+(o|[aà](s)?))?`; export const AndRegex = `(?e|e\\s*o|--|-|—|——)`; diff --git a/JavaScript/packages/recognizers-date-time/src/resources/spanishDateTime.ts b/JavaScript/packages/recognizers-date-time/src/resources/spanishDateTime.ts index 56656adc40..96fd355ef9 100644 --- a/JavaScript/packages/recognizers-date-time/src/resources/spanishDateTime.ts +++ b/JavaScript/packages/recognizers-date-time/src/resources/spanishDateTime.ts @@ -11,6 +11,7 @@ import { BaseDateTime } from "./baseDateTime"; export namespace SpanishDateTime { + export const LangMarker = 'Spa'; export const CheckBothBeforeAfter = false; export const TillRegex = `(?hasta|al|a|--|-|—|——)(\\s+(el|la(s)?))?`; export const AndRegex = `(?y|y\\s*el|--|-|—|——)`; diff --git a/JavaScript/packages/recognizers-number/src/number/chinese/extractors.ts b/JavaScript/packages/recognizers-number/src/number/chinese/extractors.ts index 15e2ca7be6..4947e569f7 100644 --- a/JavaScript/packages/recognizers-number/src/number/chinese/extractors.ts +++ b/JavaScript/packages/recognizers-number/src/number/chinese/extractors.ts @@ -74,15 +74,15 @@ export class ChineseIntegerExtractor extends BaseNumberExtractor { }, { // 半百 半打 regExp: RegExpUtility.getSafeRegExp(ChineseNumeric.NumbersWithHalfDozen, "gis"), - value: "IntegerChs" + value: "Integer" + ChineseNumeric.LangMarker }, { // 半 regExp: RegExpUtility.getSafeRegExp(ChineseNumeric.HalfUnitRegex, "gis"), - value: "IntegerChs" + value: "Integer" + ChineseNumeric.LangMarker }, { // 一打 五十打 regExp: RegExpUtility.getSafeRegExp(ChineseNumeric.NumbersWithDozen, "gis"), - value: "IntegerChs" + value: "Integer" + ChineseNumeric.LangMarker } ); @@ -90,14 +90,14 @@ export class ChineseIntegerExtractor extends BaseNumberExtractor { case ChineseNumberExtractorMode.Default: regexes.push({ // 一百五十五, 负一亿三百二十二. Uses an allow list to avoid extracting "四" from "四川" regExp: RegExpUtility.getSafeRegExp(ChineseNumeric.NumbersWithAllowListRegex, "gi"), - value: "IntegerChs" + value: "Integer" + ChineseNumeric.LangMarker }); break; case ChineseNumberExtractorMode.ExtractAll: regexes.push({ // 一百五十五, 负一亿三百二十二, "四" from "四川". Uses no allow lists and extracts all potential integers (useful in Units, for example). regExp: RegExpUtility.getSafeRegExp(ChineseNumeric.NumbersAggressiveRegex, "gi"), - value: "IntegerChs" + value: "Integer" + ChineseNumeric.LangMarker }); break; } @@ -131,11 +131,11 @@ export class ChineseDoubleExtractor extends BaseNumberExtractor { }, { // 15.2万 regExp: RegExpUtility.getSafeRegExp(ChineseNumeric.DoubleWithThousandsRegex, "gi"), - value: "DoubleChs" + value: "Double" + ChineseNumeric.LangMarker }, { // 四十五点三三 regExp: RegExpUtility.getSafeRegExp(ChineseNumeric.DoubleAllFloatRegex, "gi"), - value: "DoubleChs" + value: "Double" + ChineseNumeric.LangMarker }, { // 2e6, 21.2e0 regExp: RegExpUtility.getSafeRegExp(ChineseNumeric.DoubleExponentialNotationRegex, "gis"), @@ -169,7 +169,7 @@ export class ChineseFractionExtractor extends BaseNumberExtractor { }, { // 四分之六十五 regExp: RegExpUtility.getSafeRegExp(ChineseNumeric.AllFractionNumber, "gi"), - value: "FracChs" + value: "Frac" + ChineseNumeric.LangMarker } ); @@ -185,11 +185,11 @@ export class ChineseOrdinalExtractor extends BaseNumberExtractor { let regexes = new Array( { // 第一百五十四 regExp: RegExpUtility.getSafeRegExp(ChineseNumeric.OrdinalRegex, "gi"), - value: "OrdinalChs" + value: "Ordinal" + ChineseNumeric.LangMarker }, { // 第2565, 第1234 regExp: RegExpUtility.getSafeRegExp(ChineseNumeric.OrdinalNumbersRegex, "gi"), - value: "OrdinalChs" + value: "Ordinal" + ChineseNumeric.LangMarker } ); @@ -205,11 +205,11 @@ export class ChinesePercentageExtractor extends BaseNumberExtractor { let regexes = new Array( { // 二十个百分点, 四点五个百分点 regExp: RegExpUtility.getSafeRegExp(ChineseNumeric.PercentagePointRegex, "gi"), - value: "PerChs" + value: "Per" + ChineseNumeric.LangMarker }, { // 百分之五十 百分之一点五 regExp: RegExpUtility.getSafeRegExp(ChineseNumeric.SimplePercentageRegex, "gi"), - value: "PerChs" + value: "Per" + ChineseNumeric.LangMarker }, { // 百分之56.2 百分之12 regExp: RegExpUtility.getSafeRegExp(ChineseNumeric.NumbersPercentagePointRegex, "gis"), diff --git a/JavaScript/packages/recognizers-number/src/number/french/extractors.ts b/JavaScript/packages/recognizers-number/src/number/french/extractors.ts index 9b7bfaf34d..b1968d52ca 100644 --- a/JavaScript/packages/recognizers-number/src/number/french/extractors.ts +++ b/JavaScript/packages/recognizers-number/src/number/french/extractors.ts @@ -107,11 +107,11 @@ export class FrenchIntegerExtractor extends BaseNumberExtractor { }, { regExp: RegExpUtility.getSafeRegExp(FrenchNumeric.AllIntRegexWithLocks), - value: "IntegerFr" + value: "Integer" + FrenchNumeric.LangMarker }, { regExp: RegExpUtility.getSafeRegExp(FrenchNumeric.AllIntRegexWithDozenSuffixLocks), - value: "IntegerFr" + value: "Integer" + FrenchNumeric.LangMarker } ); @@ -144,7 +144,7 @@ export class FrenchDoubleExtractor extends BaseNumberExtractor { }, { regExp: RegExpUtility.getSafeRegExp(FrenchNumeric.DoubleAllFloatRegex), - value: "DoubleFr" + value: "Double" + FrenchNumeric.LangMarker }, { regExp: RegExpUtility.getSafeRegExp(FrenchNumeric.DoubleExponentialNotationRegex), @@ -186,11 +186,11 @@ export class FrenchFractionExtractor extends BaseNumberExtractor { }, { regExp: RegExpUtility.getSafeRegExp(FrenchNumeric.FractionNounRegex), - value: "FracFr" + value: "Frac" + FrenchNumeric.LangMarker }, { regExp: RegExpUtility.getSafeRegExp(FrenchNumeric.FractionNounWithArticleRegex), - value: "FracFr" + value: "Frac" + FrenchNumeric.LangMarker } ); @@ -198,7 +198,7 @@ export class FrenchFractionExtractor extends BaseNumberExtractor { if (mode != NumberMode.Unit) { regexes.push({ regExp: RegExpUtility.getSafeRegExp(FrenchNumeric.FractionPrepositionRegex), - value: "FracFr" + value: "Frac" + FrenchNumeric.LangMarker }); }; @@ -218,7 +218,7 @@ export class FrenchOrdinalExtractor extends BaseNumberExtractor { }, { regExp: RegExpUtility.getSafeRegExp(FrenchNumeric.OrdinalFrenchRegex), - value: "OrdFr" + value: "Ord" + FrenchNumeric.LangMarker } ); diff --git a/JavaScript/packages/recognizers-number/src/resources/chineseNumeric.ts b/JavaScript/packages/recognizers-number/src/resources/chineseNumeric.ts index db168b1e8b..a059d46a97 100644 --- a/JavaScript/packages/recognizers-number/src/resources/chineseNumeric.ts +++ b/JavaScript/packages/recognizers-number/src/resources/chineseNumeric.ts @@ -11,7 +11,7 @@ import { BaseNumbers } from "./baseNumbers"; export namespace ChineseNumeric { - export const LangMarker = 'Chs'; + export const LangMarker = 'Chi'; export const CompoundNumberLanguage = true; export const MultiDecimalSeparatorCulture = false; export const DecimalSeparatorChar = '.'; diff --git a/JavaScript/packages/recognizers-number/src/resources/frenchNumeric.ts b/JavaScript/packages/recognizers-number/src/resources/frenchNumeric.ts index 25289ae13e..f49501b0d8 100644 --- a/JavaScript/packages/recognizers-number/src/resources/frenchNumeric.ts +++ b/JavaScript/packages/recognizers-number/src/resources/frenchNumeric.ts @@ -11,7 +11,7 @@ import { BaseNumbers } from "./baseNumbers"; export namespace FrenchNumeric { - export const LangMarker = 'Fr'; + export const LangMarker = 'Fre'; export const CompoundNumberLanguage = false; export const MultiDecimalSeparatorCulture = true; export const RoundNumberIntegerRegex = `(cent|mille|millions|million|milliard|milliards|billion|billions)`; diff --git a/Patterns/Bulgarian/Bulgarian-Numbers.yaml b/Patterns/Bulgarian/Bulgarian-Numbers.yaml index 22c5c8b7a1..3278ae0ce3 100644 --- a/Patterns/Bulgarian/Bulgarian-Numbers.yaml +++ b/Patterns/Bulgarian/Bulgarian-Numbers.yaml @@ -1,4 +1,5 @@ --- +#ISO 639-2 Code LangMarker: Bul #Does this language use non-whitespace-separated numbers? CompoundNumberLanguage: !bool true diff --git a/Patterns/Chinese/Chinese-DateTime.yaml b/Patterns/Chinese/Chinese-DateTime.yaml index fa1b4ea620..68f60fcdde 100644 --- a/Patterns/Chinese/Chinese-DateTime.yaml +++ b/Patterns/Chinese/Chinese-DateTime.yaml @@ -1,4 +1,6 @@ --- +#ISO 639-2 Code +LangMarker: Chi #DateExtractorChs MonthRegex: !simpleRegex def: (?正月|一月|二月|三月|四月|五月|六月|七月|八月|九月|十月|十一月|十二月|01月|02月|03月|04月|05月|06月|07月|08月|09月|10月|11月|12月|1月|2月|3月|4月|5月|6月|7月|8月|9月|大年) diff --git a/Patterns/Chinese/Chinese-Numbers.yaml b/Patterns/Chinese/Chinese-Numbers.yaml index a3eee885cc..8329c5e99c 100644 --- a/Patterns/Chinese/Chinese-Numbers.yaml +++ b/Patterns/Chinese/Chinese-Numbers.yaml @@ -1,6 +1,7 @@ --- +#ISO 639-2 Code +LangMarker: Chi #ChineseNumberParserConfiguration -LangMarker: Chs #Does this language use non-whitespace-separated numbers? CompoundNumberLanguage: !bool true #Does this culture uses period and comma intercheangeably as decimal separator? diff --git a/Patterns/Dutch/Dutch-DateTime.yaml b/Patterns/Dutch/Dutch-DateTime.yaml index 968630dcab..1011c15ff6 100644 --- a/Patterns/Dutch/Dutch-DateTime.yaml +++ b/Patterns/Dutch/Dutch-DateTime.yaml @@ -1,4 +1,6 @@ --- +#ISO 639-2 Code +LangMarker: Dut CheckBothBeforeAfter: !bool false TillRegex: !nestedRegex def: (?\b(tot|totdat|gedurende|tijdens|ten tijde van)\b|{BaseDateTime.RangeConnectorSymbolRegex}) diff --git a/Patterns/Dutch/Dutch-Numbers.yaml b/Patterns/Dutch/Dutch-Numbers.yaml index 5c4e7af382..bdecdc99bf 100644 --- a/Patterns/Dutch/Dutch-Numbers.yaml +++ b/Patterns/Dutch/Dutch-Numbers.yaml @@ -1,5 +1,6 @@ --- -LangMarker: Nl +#ISO 639-2 Code +LangMarker: Dut #Does this language use non-whitespace-separated numbers? CompoundNumberLanguage: !bool true #Does this culture uses period and comma intercheangeably as decimal separator? diff --git a/Patterns/English/English-DateTime.yaml b/Patterns/English/English-DateTime.yaml index a9679da3f8..37a6690e4b 100644 --- a/Patterns/English/English-DateTime.yaml +++ b/Patterns/English/English-DateTime.yaml @@ -1,4 +1,6 @@ --- +#ISO 639-2 Code +LangMarker: Eng CheckBothBeforeAfter: !bool false TillRegex: !nestedRegex def: (?\b(to|(un)?till?|thru|through)\b|{BaseDateTime.RangeConnectorSymbolRegex}) diff --git a/Patterns/English/English-Numbers.yaml b/Patterns/English/English-Numbers.yaml index 3acbb97832..b5678a438b 100644 --- a/Patterns/English/English-Numbers.yaml +++ b/Patterns/English/English-Numbers.yaml @@ -1,4 +1,5 @@ --- +#ISO 639-2 Code LangMarker: Eng #Does this language use non-whitespace-separated numbers? CompoundNumberLanguage: !bool false diff --git a/Patterns/French/French-DateTime.yaml b/Patterns/French/French-DateTime.yaml index 7e9e3ce269..a749188f81 100644 --- a/Patterns/French/French-DateTime.yaml +++ b/Patterns/French/French-DateTime.yaml @@ -1,4 +1,6 @@ --- +#ISO 639-2 Code +LangMarker: Fre CheckBothBeforeAfter: !bool false TillRegex: !simpleRegex def: (?au|et|(jusqu')?[aà]|avant|--|-|—|——) diff --git a/Patterns/French/French-Numbers.yaml b/Patterns/French/French-Numbers.yaml index 6f02a83b23..cdac2d571f 100644 --- a/Patterns/French/French-Numbers.yaml +++ b/Patterns/French/French-Numbers.yaml @@ -1,5 +1,6 @@ --- -LangMarker: Fr +#ISO 639-2 Code +LangMarker: Fre #Does this language use non-whitespace-separated numbers? CompoundNumberLanguage: !bool false #Does this culture uses period and comma intercheangeably as decimal separator? diff --git a/Patterns/German/German-DateTime.yaml b/Patterns/German/German-DateTime.yaml index c9010361eb..1303ee9774 100644 --- a/Patterns/German/German-DateTime.yaml +++ b/Patterns/German/German-DateTime.yaml @@ -1,4 +1,6 @@ --- +#ISO 639-2 Code +LangMarker: Ger CheckBothBeforeAfter: !bool false TillRegex: !simpleRegex def: (?zu|bis\s*zum|zum|bis|bis\s*hin(\s*zum)?|--|-|—|——) diff --git a/Patterns/German/German-Numbers.yaml b/Patterns/German/German-Numbers.yaml index b66d3fcc7b..791ec23579 100644 --- a/Patterns/German/German-Numbers.yaml +++ b/Patterns/German/German-Numbers.yaml @@ -1,4 +1,5 @@ --- +#ISO 639-2 Code LangMarker: Ger #Does this language use non-whitespace-separated numbers? CompoundNumberLanguage: !bool true diff --git a/Patterns/Hindi/Hindi-DateTime.yaml b/Patterns/Hindi/Hindi-DateTime.yaml index 646331e2d1..d551dca2f5 100644 --- a/Patterns/Hindi/Hindi-DateTime.yaml +++ b/Patterns/Hindi/Hindi-DateTime.yaml @@ -1,4 +1,6 @@ --- +#ISO 639-2 Code +LangMarker: Hin CheckBothBeforeAfter: !bool true TillRegex: !nestedRegex def: (?\b(और|तक|द्वारा|से|to)|{BaseDateTime.RangeConnectorSymbolRegex}) diff --git a/Patterns/Hindi/Hindi-Numbers.yaml b/Patterns/Hindi/Hindi-Numbers.yaml index fe9b1e5904..528af95824 100644 --- a/Patterns/Hindi/Hindi-Numbers.yaml +++ b/Patterns/Hindi/Hindi-Numbers.yaml @@ -1,4 +1,5 @@ --- +#ISO 639-2 Code LangMarker: Hin #Does this language use non-whitespace-separated numbers? CompoundNumberLanguage: !bool true diff --git a/Patterns/Italian/Italian-DateTime.yaml b/Patterns/Italian/Italian-DateTime.yaml index 5c0abc4c38..e5d73104cd 100644 --- a/Patterns/Italian/Italian-DateTime.yaml +++ b/Patterns/Italian/Italian-DateTime.yaml @@ -1,4 +1,6 @@ --- +#ISO 639-2 Code +LangMarker: Ita CheckBothBeforeAfter: !bool false TillRegex: !simpleRegex def: (?\b(fino\s+a(l(l[aoe'])?|gli|i)?|a(l(l[aoe'])?|gli|i)?|e\s+(il?|l[aoe']|gli))\b|--|-|—|——|~) @@ -349,7 +351,7 @@ TimeRegex1: !nestedRegex def: (((((?<=\b(da|al)?(le|l'|ore)\s*)({EngTimeRegex}))|((?<=\b(da|al)?(le|l'|ore)\s*)({HourNumRegex}|{BaseDateTime.HourRegex})(?![\.,]\d+)(?=\s*({PrepRegex}))))|(({TimePrefix}\s+)({EngTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex}))|(({EngTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex})\s+{TimePrefix}))((\s*{DescRegex})|\b)) references: [ TimePrefix, EngTimeRegex, HourNumRegex, BaseDateTime.HourRegex, DescRegex, PrepRegex ] TimeRegex2: !nestedRegex - def: ({BaseDateTime.HourRegex})(\s*)?:(\s*)?{BaseDateTime.MinuteRegex}((\s*)?:(\s*)?{BaseDateTime.SecondRegex})?((\s*{DescRegex})|\b) + def: (t)?({BaseDateTime.HourRegex})(\s*)?:(\s*)?{BaseDateTime.MinuteRegex}((\s*)?:(\s*)?{BaseDateTime.SecondRegex})?((\s*{DescRegex})|\b) references: [ BaseDateTime.HourRegex, BaseDateTime.MinuteRegex, BaseDateTime.SecondRegex, DescRegex ] TimeRegex3: !nestedRegex def: \b{BaseDateTime.HourRegex}\.{BaseDateTime.MinuteRegex}(\s*{DescRegex})(\s+{TimePrefix})? diff --git a/Patterns/Italian/Italian-Numbers.yaml b/Patterns/Italian/Italian-Numbers.yaml index 0c3af50c26..252bef7ba3 100644 --- a/Patterns/Italian/Italian-Numbers.yaml +++ b/Patterns/Italian/Italian-Numbers.yaml @@ -1,4 +1,5 @@ --- +#ISO 639-2 Code LangMarker: Ita #Does this language use non-whitespace-separated numbers? CompoundNumberLanguage: !bool true diff --git a/Patterns/Japanese/Japanese-DateTime.yaml b/Patterns/Japanese/Japanese-DateTime.yaml index 62e3a1527e..c04ace53dd 100644 --- a/Patterns/Japanese/Japanese-DateTime.yaml +++ b/Patterns/Japanese/Japanese-DateTime.yaml @@ -1,4 +1,6 @@ --- +#ISO 639-2 Code +LangMarker: Jpn #DateExtractorJap MonthRegex: !simpleRegex def: (?正月|一月|二月|三月|四月|五月|六月|七月|八月|九月|十月|十一月|十二月|01月|02月|03月|04月|05月|06月|07月|08月|09月|10月|11月|12月|1月|2月|3月|4月|5月|6月|7月|8月|9月) diff --git a/Patterns/Japanese/Japanese-Numbers.yaml b/Patterns/Japanese/Japanese-Numbers.yaml index 237f926eda..886b570fe3 100644 --- a/Patterns/Japanese/Japanese-Numbers.yaml +++ b/Patterns/Japanese/Japanese-Numbers.yaml @@ -1,6 +1,7 @@ --- -#JapaneseNumberParserConfiguration +#ISO 639-2 Code LangMarker: Jpn +#JapaneseNumberParserConfiguration #Does this language use non-whitespace-separated numbers? CompoundNumberLanguage: !bool true #Does this culture uses period and comma intercheangeably as decimal separator? diff --git a/Patterns/Korean/Korean-Numbers.yaml b/Patterns/Korean/Korean-Numbers.yaml index 1f4f43a17a..9b0baaa6a4 100644 --- a/Patterns/Korean/Korean-Numbers.yaml +++ b/Patterns/Korean/Korean-Numbers.yaml @@ -1,5 +1,5 @@ --- -#Korean +#ISO 639-2 Code LangMarker: Kor #Does this language use non-whitespace-separated numbers? CompoundNumberLanguage: !bool true diff --git a/Patterns/Portuguese/Portuguese-DateTime.yaml b/Patterns/Portuguese/Portuguese-DateTime.yaml index b9f50f992a..a6cadaa4dd 100644 --- a/Patterns/Portuguese/Portuguese-DateTime.yaml +++ b/Patterns/Portuguese/Portuguese-DateTime.yaml @@ -1,4 +1,6 @@ --- +#ISO 639-2 Code +LangMarker: Por CheckBothBeforeAfter: !bool false TillRegex: !simpleRegex def: (?ate|as|às|até|ateh|a|ao|--|-|—|——)(\s+(o|[aà](s)?))? diff --git a/Patterns/Portuguese/Portuguese-Numbers.yaml b/Patterns/Portuguese/Portuguese-Numbers.yaml index d98187dda5..c128c602f2 100644 --- a/Patterns/Portuguese/Portuguese-Numbers.yaml +++ b/Patterns/Portuguese/Portuguese-Numbers.yaml @@ -1,4 +1,5 @@ --- +#ISO 639-2 Code LangMarker: Por #Does this language use non-whitespace-separated numbers? CompoundNumberLanguage: !bool false diff --git a/Patterns/Spanish/Spanish-DateTime.yaml b/Patterns/Spanish/Spanish-DateTime.yaml index 1cc4cb0c80..9f418e6d74 100644 --- a/Patterns/Spanish/Spanish-DateTime.yaml +++ b/Patterns/Spanish/Spanish-DateTime.yaml @@ -1,4 +1,6 @@ --- +#ISO 639-2 Code +LangMarker: Spa CheckBothBeforeAfter: !bool false TillRegex: !simpleRegex def: (?hasta|al|a|--|-|—|——)(\s+(el|la(s)?))? diff --git a/Patterns/Spanish/Spanish-Numbers.yaml b/Patterns/Spanish/Spanish-Numbers.yaml index dde1856504..3800a86d2e 100644 --- a/Patterns/Spanish/Spanish-Numbers.yaml +++ b/Patterns/Spanish/Spanish-Numbers.yaml @@ -1,4 +1,5 @@ --- +#ISO 639-2 Code LangMarker: Spa #Does this language use non-whitespace-separated numbers? CompoundNumberLanguage: !bool false diff --git a/Patterns/Swedish/Swedish-Numbers.yaml b/Patterns/Swedish/Swedish-Numbers.yaml index b6a052eb1f..c3197e7235 100644 --- a/Patterns/Swedish/Swedish-Numbers.yaml +++ b/Patterns/Swedish/Swedish-Numbers.yaml @@ -1,4 +1,5 @@ --- +#ISO 639-2 Code LangMarker: Swe #Does this language use non-whitespace-separated numbers? CompoundNumberLanguage: !bool true diff --git a/Patterns/Turkish/Turkish-DateTime.yaml b/Patterns/Turkish/Turkish-DateTime.yaml index e788733c7a..9b62765bd4 100644 --- a/Patterns/Turkish/Turkish-DateTime.yaml +++ b/Patterns/Turkish/Turkish-DateTime.yaml @@ -1,4 +1,6 @@ --- +#ISO 639-2 Code +LangMarker: Tur CheckBothBeforeAfter: !bool true TillRegex: !nestedRegex def: (?\b(kadar|dek\b|değin)|{BaseDateTime.RangeConnectorSymbolRegex}) @@ -463,7 +465,7 @@ TimeRegex1: !nestedRegex def: \b(({TimePrefix}\s+)(saat\s)?({WrittenTimeRegex}|{HourNumRegex}|{AtHourNumRegex}|{HourRegex}))(?!(\s+saat|\d+)) references: [ TimePrefix, WrittenTimeRegex, HourNumRegex, HourRegex, AtHourNumRegex ] TimeRegex2: !nestedRegex - def: \b({TimePrefix}\s+)?(saat\s)?({HourRegex}:{BaseDateTime.MinuteRegex}(:{BaseDateTime.SecondRegex})?) + def: \b({TimePrefix}\s+)?(saat\s)?(t)?({HourRegex}:{BaseDateTime.MinuteRegex}(:{BaseDateTime.SecondRegex})?) references: [ TimePrefix, HourRegex, BaseDateTime.MinuteRegex, BaseDateTime.SecondRegex ] TimeRegex3: !nestedRegex def: \b({TimePrefix}\s+)?(saat\s)?({HourRegex}:{BaseDateTime.MinuteRegex}) diff --git a/Patterns/Turkish/Turkish-Numbers.yaml b/Patterns/Turkish/Turkish-Numbers.yaml index e6da039b83..696eb617c0 100644 --- a/Patterns/Turkish/Turkish-Numbers.yaml +++ b/Patterns/Turkish/Turkish-Numbers.yaml @@ -1,5 +1,6 @@ --- -LangMarker: Tr +#ISO 639-2 Code +LangMarker: Tur CompoundNumberLanguage: !bool true MultiDecimalSeparatorCulture: !bool true # Integer Regex diff --git a/Python/libraries/recognizers-date-time/recognizers_date_time/resources/chinese_date_time.py b/Python/libraries/recognizers-date-time/recognizers_date_time/resources/chinese_date_time.py index 9ab3cc3f34..a091168fce 100644 --- a/Python/libraries/recognizers-date-time/recognizers_date_time/resources/chinese_date_time.py +++ b/Python/libraries/recognizers-date-time/recognizers_date_time/resources/chinese_date_time.py @@ -14,6 +14,7 @@ class ChineseDateTime: + LangMarker = 'Chi' MonthRegex = f'(?正月|一月|二月|三月|四月|五月|六月|七月|八月|九月|十月|十一月|十二月|01月|02月|03月|04月|05月|06月|07月|08月|09月|10月|11月|12月|1月|2月|3月|4月|5月|6月|7月|8月|9月|大年)' DayRegex = f'(?01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|1|2|3|4|5|6|7|8|9)' DateDayRegexInChinese = f'(?初一|三十|一日|十一日|二十一日|三十一日|二日|三日|四日|五日|六日|七日|八日|九日|十二日|十三日|十四日|十五日|十六日|十七日|十八日|十九日|二十二日|二十三日|二十四日|二十五日|二十六日|二十七日|二十八日|二十九日|一日|十一日|十日|二十一日|二十日|三十一日|三十日|二日|三日|四日|五日|六日|七日|八日|九日|十二日|十三日|十四日|十五日|十六日|十七日|十八日|十九日|二十二日|二十三日|二十四日|二十五日|二十六日|二十七日|二十八日|二十九日|十日|二十日|三十日|10日|11日|12日|13日|14日|15日|16日|17日|18日|19日|1日|20日|21日|22日|23日|24日|25日|26日|27日|28日|29日|2日|30日|31日|3日|4日|5日|6日|7日|8日|9日|一号|十一号|二十一号|三十一号|二号|三号|四号|五号|六号|七号|八号|九号|十二号|十三号|十四号|十五号|十六号|十七号|十八号|十九号|二十二号|二十三号|二十四号|二十五号|二十六号|二十七号|二十八号|二十九号|一号|十一号|十号|二十一号|二十号|三十一号|三十号|二号|三号|四号|五号|六号|七号|八号|九号|十二号|十三号|十四号|十五号|十六号|十七号|十八号|十九号|二十二号|二十三号|二十四号|二十五号|二十六号|二十七号|二十八号|二十九号|十号|二十号|三十号|10号|11号|12号|13号|14号|15号|16号|17号|18号|19号|1号|20号|21号|22号|23号|24号|25号|26号|27号|28号|29号|2号|30号|31号|3号|4号|5号|6号|7号|8号|9号)' diff --git a/Python/libraries/recognizers-date-time/recognizers_date_time/resources/english_date_time.py b/Python/libraries/recognizers-date-time/recognizers_date_time/resources/english_date_time.py index ac1342a65a..d0120e1049 100644 --- a/Python/libraries/recognizers-date-time/recognizers_date_time/resources/english_date_time.py +++ b/Python/libraries/recognizers-date-time/recognizers_date_time/resources/english_date_time.py @@ -14,6 +14,7 @@ class EnglishDateTime: + LangMarker = 'Eng' CheckBothBeforeAfter = False TillRegex = f'(?\\b(to|(un)?till?|thru|through)\\b|{BaseDateTime.RangeConnectorSymbolRegex})' RangeConnectorRegex = f'(?\\b(and|through|to)\\b|{BaseDateTime.RangeConnectorSymbolRegex})' diff --git a/Python/libraries/recognizers-date-time/recognizers_date_time/resources/french_date_time.py b/Python/libraries/recognizers-date-time/recognizers_date_time/resources/french_date_time.py index 8e8dc1cc58..fb53211bd5 100644 --- a/Python/libraries/recognizers-date-time/recognizers_date_time/resources/french_date_time.py +++ b/Python/libraries/recognizers-date-time/recognizers_date_time/resources/french_date_time.py @@ -14,6 +14,7 @@ class FrenchDateTime: + LangMarker = 'Fre' CheckBothBeforeAfter = False TillRegex = f'(?au|et|(jusqu\')?[aà]|avant|--|-|—|——)' RangeConnectorRegex = f'(?de la|au|[aà]|et(\\s*la)?|--|-|—|——)' diff --git a/Python/libraries/recognizers-date-time/recognizers_date_time/resources/portuguese_date_time.py b/Python/libraries/recognizers-date-time/recognizers_date_time/resources/portuguese_date_time.py index a85945cbe0..5fc4e445f2 100644 --- a/Python/libraries/recognizers-date-time/recognizers_date_time/resources/portuguese_date_time.py +++ b/Python/libraries/recognizers-date-time/recognizers_date_time/resources/portuguese_date_time.py @@ -14,6 +14,7 @@ class PortugueseDateTime: + LangMarker = 'Por' CheckBothBeforeAfter = False TillRegex = f'(?ate|as|às|até|ateh|a|ao|--|-|—|——)(\\s+(o|[aà](s)?))?' AndRegex = f'(?e|e\\s*o|--|-|—|——)' diff --git a/Python/libraries/recognizers-date-time/recognizers_date_time/resources/spanish_date_time.py b/Python/libraries/recognizers-date-time/recognizers_date_time/resources/spanish_date_time.py index c6882c729d..81ea3fd280 100644 --- a/Python/libraries/recognizers-date-time/recognizers_date_time/resources/spanish_date_time.py +++ b/Python/libraries/recognizers-date-time/recognizers_date_time/resources/spanish_date_time.py @@ -14,6 +14,7 @@ class SpanishDateTime: + LangMarker = 'Spa' CheckBothBeforeAfter = False TillRegex = f'(?hasta|al|a|--|-|—|——)(\\s+(el|la(s)?))?' AndRegex = f'(?y|y\\s*el|--|-|—|——)' diff --git a/Python/libraries/recognizers-number/recognizers_number/number/chinese/extractors.py b/Python/libraries/recognizers-number/recognizers_number/number/chinese/extractors.py index 31655f5da1..f0573d2c9e 100644 --- a/Python/libraries/recognizers-number/recognizers_number/number/chinese/extractors.py +++ b/Python/libraries/recognizers-number/recognizers_number/number/chinese/extractors.py @@ -76,22 +76,22 @@ def __init__(self, mode: ChineseNumberExtractorMode = ChineseNumberExtractorMode ReVal( re=RegExpUtility.get_safe_reg_exp( ChineseNumeric.NumbersWithHalfDozen), - val='IntegerChs'), + val=f'Integer{ChineseNumeric.LangMarker}'), ReVal( re=RegExpUtility.get_safe_reg_exp( ChineseNumeric.NumbersWithDozen), - val='IntegerChs'), + val=f'Integer{ChineseNumeric.LangMarker}'), ReVal( re=RegExpUtility.get_safe_reg_exp( ChineseNumeric.HalfUnitRegex), - val='IntegerChs') + val=f'Integer{ChineseNumeric.LangMarker}') ] if mode == ChineseNumberExtractorMode.DEFAULT: self.__regexes.append( ReVal( re=RegExpUtility.get_safe_reg_exp( ChineseNumeric.NumbersWithAllowListRegex), - val='IntegerChs' + val=f'Integer{ChineseNumeric.LangMarker}' ) ) elif mode == ChineseNumberExtractorMode.EXTRACT_ALL: @@ -99,7 +99,7 @@ def __init__(self, mode: ChineseNumberExtractorMode = ChineseNumberExtractorMode ReVal( re=RegExpUtility.get_safe_reg_exp( ChineseNumeric.NumbersAggressiveRegex), - val='IntegerChs' + val=f'Integer{ChineseNumeric.LangMarker}' ) ) @@ -134,11 +134,11 @@ def __init__(self): ReVal( re=RegExpUtility.get_safe_reg_exp( ChineseNumeric.DoubleWithThousandsRegex), - val='DoubleChs'), + val=f'Double{ChineseNumeric.LangMarker}'), ReVal( re=RegExpUtility.get_safe_reg_exp( ChineseNumeric.DoubleAllFloatRegex), - val='DoubleChs'), + val=f'Double{ChineseNumeric.LangMarker}'), ReVal( re=RegExpUtility.get_safe_reg_exp( ChineseNumeric.DoubleExponentialNotationRegex), @@ -172,7 +172,7 @@ def __init__(self): ReVal( re=RegExpUtility.get_safe_reg_exp( ChineseNumeric.AllFractionNumber), - val='FracChs') + val=f'Frac{ChineseNumeric.LangMarker}') ] @@ -189,11 +189,11 @@ def __init__(self): self.__regexes = [ ReVal( re=RegExpUtility.get_safe_reg_exp(ChineseNumeric.OrdinalRegex), - val='OrdinalChs'), + val=f'Ordinal{ChineseNumeric.LangMarker}'), ReVal( re=RegExpUtility.get_safe_reg_exp( ChineseNumeric.OrdinalNumbersRegex), - val='OrdinalChs') + val=f'Ordinal{ChineseNumeric.LangMarker}') ] @@ -211,11 +211,11 @@ def __init__(self): ReVal( re=RegExpUtility.get_safe_reg_exp( ChineseNumeric.PercentagePointRegex), - val='PerChs'), + val=f'Per{ChineseNumeric.LangMarker}'), ReVal( re=RegExpUtility.get_safe_reg_exp( ChineseNumeric.SimplePercentageRegex), - val='PerChs'), + val=f'Per{ChineseNumeric.LangMarker}'), ReVal( re=RegExpUtility.get_safe_reg_exp( ChineseNumeric.NumbersPercentagePointRegex), diff --git a/Python/libraries/recognizers-number/recognizers_number/number/french/extractors.py b/Python/libraries/recognizers-number/recognizers_number/number/french/extractors.py index 16d99bc336..b63bae14a8 100644 --- a/Python/libraries/recognizers-number/recognizers_number/number/french/extractors.py +++ b/Python/libraries/recognizers-number/recognizers_number/number/french/extractors.py @@ -119,11 +119,11 @@ def __init__(self, placeholder: str = FrenchNumeric.PlaceHolderDefault): ReVal( re=RegExpUtility.get_safe_reg_exp( FrenchNumeric.AllIntRegexWithLocks), - val='IntegerFr'), + val=f'Integer{FrenchNumeric.LangMarker}'), ReVal( re=RegExpUtility.get_safe_reg_exp( FrenchNumeric.AllIntRegexWithDozenSuffixLocks), - val='IntegerFr') + val=f'Integer{FrenchNumeric.LangMarker}') ] @@ -165,7 +165,7 @@ def __init__(self, placeholder): ReVal( re=RegExpUtility.get_safe_reg_exp( FrenchNumeric.DoubleAllFloatRegex), - val='DoubleFr'), + val=f'Double{FrenchNumeric.LangMarker}'), ReVal( re=RegExpUtility.get_safe_reg_exp( FrenchNumeric.DoubleExponentialNotationRegex), @@ -199,11 +199,11 @@ def __init__(self, mode): ReVal( re=RegExpUtility.get_safe_reg_exp( FrenchNumeric.FractionNounRegex), - val='FracFr'), + val=f'Frac{FrenchNumeric.LangMarker}'), ReVal( re=RegExpUtility.get_safe_reg_exp( FrenchNumeric.FractionNounWithArticleRegex), - val='FracFr') + val=f'Frac{FrenchNumeric.LangMarker}') ] if mode != NumberMode.Unit: @@ -211,7 +211,7 @@ def __init__(self, mode): ReVal( re=RegExpUtility.get_safe_reg_exp( FrenchNumeric.FractionPrepositionRegex), - val='FracFr')) + val=f'Frac{FrenchNumeric.LangMarker}')) class FrenchOrdinalExtractor(BaseNumberExtractor): @@ -232,7 +232,7 @@ def __init__(self): ReVal( re=RegExpUtility.get_safe_reg_exp( FrenchNumeric.OrdinalFrenchRegex), - val='OrdFr') + val=f'Ord{FrenchNumeric.LangMarker}') ] diff --git a/Python/libraries/recognizers-number/recognizers_number/resources/chinese_numeric.py b/Python/libraries/recognizers-number/recognizers_number/resources/chinese_numeric.py index 06e7dc30af..935fda179f 100644 --- a/Python/libraries/recognizers-number/recognizers_number/resources/chinese_numeric.py +++ b/Python/libraries/recognizers-number/recognizers_number/resources/chinese_numeric.py @@ -14,7 +14,7 @@ class ChineseNumeric: - LangMarker = 'Chs' + LangMarker = 'Chi' CompoundNumberLanguage = True MultiDecimalSeparatorCulture = False DecimalSeparatorChar = '.' diff --git a/Python/libraries/recognizers-number/recognizers_number/resources/french_numeric.py b/Python/libraries/recognizers-number/recognizers_number/resources/french_numeric.py index 105b6d5163..c0d60d2b9e 100644 --- a/Python/libraries/recognizers-number/recognizers_number/resources/french_numeric.py +++ b/Python/libraries/recognizers-number/recognizers_number/resources/french_numeric.py @@ -14,7 +14,7 @@ class FrenchNumeric: - LangMarker = 'Fr' + LangMarker = 'Fre' CompoundNumberLanguage = False MultiDecimalSeparatorCulture = True RoundNumberIntegerRegex = f'(cent|mille|millions|million|milliard|milliards|billion|billions)' diff --git a/Specs/DateTime/Dutch/DateTimeExtractor.json b/Specs/DateTime/Dutch/DateTimeExtractor.json index 0c158b4b85..2c8f313fbc 100644 --- a/Specs/DateTime/Dutch/DateTimeExtractor.json +++ b/Specs/DateTime/Dutch/DateTimeExtractor.json @@ -1448,7 +1448,7 @@ "NotSupportedByDesign": "javascript,python,java", "Results": [ { - "Text": "2016-12-16T12:23:59", + "Text": "2016-12-16 12:23:59", "Type": "datetime", "Start": 13, "Length": 19 diff --git a/Specs/DateTime/English/DateTimeModel.json b/Specs/DateTime/English/DateTimeModel.json index dce5cca49a..1ab8f75a35 100644 --- a/Specs/DateTime/English/DateTimeModel.json +++ b/Specs/DateTime/English/DateTimeModel.json @@ -14648,6 +14648,29 @@ } ] }, + { + "Input": "what about 30 min later?", + "Context": { + "ReferenceDateTime": "2019-11-01T15:16:00" + }, + "Results": [ + { + "Text": "30 min later", + "Start": 11, + "End": 22, + "TypeName": "datetimeV2.datetime", + "Resolution": { + "values": [ + { + "timex": "2019-11-01T15:46:00", + "type": "datetime", + "value": "2019-11-01 15:46:00" + } + ] + } + } + ] + }, { "Input": "Every other Friday", "Context": { diff --git a/Specs/DateTime/English/DateTimeModelComplexCalendar.json b/Specs/DateTime/English/DateTimeModelComplexCalendar.json index b82beed979..1033bf4625 100644 --- a/Specs/DateTime/English/DateTimeModelComplexCalendar.json +++ b/Specs/DateTime/English/DateTimeModelComplexCalendar.json @@ -12587,5 +12587,28 @@ } } ] + }, + { + "Input": "what about 30 min later?", + "Context": { + "ReferenceDateTime": "2019-11-01T15:16:00" + }, + "Results": [ + { + "Text": "30 min later", + "Start": 11, + "End": 22, + "TypeName": "datetimeV2.datetime", + "Resolution": { + "values": [ + { + "timex": "2019-11-01T15:46:00", + "type": "datetime", + "value": "2019-11-01 15:46:00" + } + ] + } + } + ] } ] diff --git a/Specs/DateTime/German/DateTimeModel.json b/Specs/DateTime/German/DateTimeModel.json index 93c7e5c7e8..177b33ddfd 100644 --- a/Specs/DateTime/German/DateTimeModel.json +++ b/Specs/DateTime/German/DateTimeModel.json @@ -1501,7 +1501,6 @@ "Context": { "ReferenceDateTime": "2019-09-03T00:00:00" }, - "Debug": true, "NotSupported": "python, javascript", "Results": [ { diff --git a/Specs/DateTime/Italian/TimeExtractor.json b/Specs/DateTime/Italian/TimeExtractor.json index 43d2daadc3..dfd7b42b60 100644 --- a/Specs/DateTime/Italian/TimeExtractor.json +++ b/Specs/DateTime/Italian/TimeExtractor.json @@ -112,10 +112,10 @@ "NotSupportedByDesign": "python,javascript,python", "Results": [ { - "Text": "12:34:20", + "Text": "T12:34:20", "Type": "time", - "Start": 14, - "Length": 8 + "Start": 13, + "Length": 9 } ] }, diff --git a/Specs/DateTime/Turkish/MergedParser.json b/Specs/DateTime/Turkish/MergedParser.json index 76d35ee0b0..3aaca1d92b 100644 --- a/Specs/DateTime/Turkish/MergedParser.json +++ b/Specs/DateTime/Turkish/MergedParser.json @@ -329,7 +329,7 @@ "NotSupportedByDesign": "javascript,python,java", "Results": [ { - "Text": "Çarşamba", + "Text": "çarşamba", "Type": "datetimeV2.date", "Value": { "values": [ @@ -358,7 +358,7 @@ "NotSupportedByDesign": "javascript,python,java", "Results": [ { - "Text": "Çarşamba günü 31'i", + "Text": "çarşamba günü 31'i", "Type": "datetimeV2.date", "Value": { "values": [ diff --git a/Specs/Number/English/NumberModel.json b/Specs/Number/English/NumberModel.json index 1aa7e5df7e..f1f4201ad4 100644 --- a/Specs/Number/English/NumberModel.json +++ b/Specs/Number/English/NumberModel.json @@ -2594,5 +2594,20 @@ } } ] + }, + { + "Input": "The answer is negative one", + "Results": [ + { + "Text": "negative one", + "TypeName": "number", + "Resolution": { + "subtype": "integer", + "value": "-1" + }, + "Start": 14, + "End": 25 + } + ] } ] \ No newline at end of file diff --git a/Specs/Number/English/NumberRangeModelExperimentalMode.json b/Specs/Number/English/NumberRangeModelExperimentalMode.json index 4c9636a3f0..78fe52ac94 100644 --- a/Specs/Number/English/NumberRangeModelExperimentalMode.json +++ b/Specs/Number/English/NumberRangeModelExperimentalMode.json @@ -871,7 +871,17 @@ { "Input": "How about 2 in 5 or more", "NotSupported": "javascript, python, java", - "Results": [] + "Results": [ + { + "Text": "5 or more", + "TypeName": "numberrange", + "Resolution": { + "value": "[5,)" + }, + "Start": 15, + "End": 23 + } + ] }, { "Input": "How about more than 2 in 5",