Skip to content

Commit

Permalink
.NET adding internal cache prototype (perf improvement) (#1998)
Browse files Browse the repository at this point in the history
* .NET adding internal cache prototype for speed (#1)

* Initial config normalization in English

* Add internal cache prototype

* Updated new Swedish recognizers and fixed Date cache experiment.

* Removing support for .NET Framework 4.5 and 4.5.2.

* Adding flag to disable internal cache.

* Fixing TurkishNumberRangeParserConfiguration to use Turkish number extractors.

* Making sure English number range passes Options internally.

* Moving cache keys to tuples. Making sure all datetime extractions receive reference time.

* Further improvements with cache reuse and key changes.

* Adding ISO 639-2 language codes to relevant pattern files

* Moving to standardize configs. Needs to be propagated to other languages.

* Reducing number of cache instances and propagating to Dutch.

* Centralizing number resultscache code and propagating configs to Chinese.

* Propagated configs and cache to Spanish and Portuguese numbers.

* Patch for other platforms after the language marker changes to match ISO

* Minor cleanup
  • Loading branch information
tellarin authored Dec 17, 2019
1 parent c62686b commit 51c3ce7
Show file tree
Hide file tree
Showing 320 changed files with 2,616 additions and 1,207 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ namespace Microsoft.Recognizers.Definitions.Chinese

public static class DateTimeDefinitions
{
public const string LangMarker = @"Chi";
public const string MonthRegex = @"(?<month>正月|一月|二月|三月|四月|五月|六月|七月|八月|九月|十月|十一月|十二月|01月|02月|03月|04月|05月|06月|07月|08月|09月|10月|11月|12月|1月|2月|3月|4月|5月|6月|7月|8月|9月|大年)";
public const string DayRegex = @"(?<day>01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|1|2|3|4|5|6|7|8|9)";
public const string DateDayRegexInChinese = @"(?<day>初一|三十|一日|十一日|二十一日|三十一日|二日|三日|四日|五日|六日|七日|八日|九日|十二日|十三日|十四日|十五日|十六日|十七日|十八日|十九日|二十二日|二十三日|二十四日|二十五日|二十六日|二十七日|二十八日|二十九日|一日|十一日|十日|二十一日|二十日|三十一日|三十日|二日|三日|四日|五日|六日|七日|八日|九日|十二日|十三日|十四日|十五日|十六日|十七日|十八日|十九日|二十二日|二十三日|二十四日|二十五日|二十六日|二十七日|二十八日|二十九日|十日|二十日|三十日|10日|11日|12日|13日|14日|15日|16日|17日|18日|19日|1日|20日|21日|22日|23日|24日|25日|26日|27日|28日|29日|2日|30日|31日|3日|4日|5日|6日|7日|8日|9日|一号|十一号|二十一号|三十一号|二号|三号|四号|五号|六号|七号|八号|九号|十二号|十三号|十四号|十五号|十六号|十七号|十八号|十九号|二十二号|二十三号|二十四号|二十五号|二十六号|二十七号|二十八号|二十九号|一号|十一号|十号|二十一号|二十号|三十一号|三十号|二号|三号|四号|五号|六号|七号|八号|九号|十二号|十三号|十四号|十五号|十六号|十七号|十八号|十九号|二十二号|二十三号|二十四号|二十五号|二十六号|二十七号|二十八号|二十九号|十号|二十号|三十号|10号|11号|12号|13号|14号|15号|16号|17号|18号|19号|1号|20号|21号|22号|23号|24号|25号|26号|27号|28号|29号|2号|30号|31号|3号|4号|5号|6号|7号|8号|9号)";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ namespace Microsoft.Recognizers.Definitions.Chinese

public static class NumbersDefinitions
{
public const string LangMarker = @"Chs";
public const string LangMarker = @"Chi";
public const bool CompoundNumberLanguage = true;
public const bool MultiDecimalSeparatorCulture = false;
public const char DecimalSeparatorChar = '.';
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ namespace Microsoft.Recognizers.Definitions.Dutch

public static class DateTimeDefinitions
{
public const string LangMarker = @"Dut";
public const bool CheckBothBeforeAfter = false;
public static readonly string TillRegex = $@"(?<till>\b(tot|totdat|gedurende|tijdens|ten tijde van)\b|{BaseDateTime.RangeConnectorSymbolRegex})";
public static readonly string RangeConnectorRegex = $@"(?<and>\b(en|tot en met|t/m|tot|tot aan)\b|{BaseDateTime.RangeConnectorSymbolRegex})";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ namespace Microsoft.Recognizers.Definitions.Dutch

public static class NumbersDefinitions
{
public const string LangMarker = @"Nl";
public const string LangMarker = @"Dut";
public const bool CompoundNumberLanguage = true;
public const bool MultiDecimalSeparatorCulture = false;
public const string RoundNumberIntegerRegex = @"(honderd|duizend|miljoen|miljard|biljoen)";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ namespace Microsoft.Recognizers.Definitions.English

public static class DateTimeDefinitions
{
public const string LangMarker = @"Eng";
public const bool CheckBothBeforeAfter = false;
public static readonly string TillRegex = $@"(?<till>\b(to|(un)?till?|thru|through)\b|{BaseDateTime.RangeConnectorSymbolRegex})";
public static readonly string RangeConnectorRegex = $@"(?<and>\b(and|through|to)\b|{BaseDateTime.RangeConnectorSymbolRegex})";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ namespace Microsoft.Recognizers.Definitions.French

public static class DateTimeDefinitions
{
public const string LangMarker = @"Fre";
public const bool CheckBothBeforeAfter = false;
public const string TillRegex = @"(?<till>au|et|(jusqu')?[aà]|avant|--|-|—|——)";
public const string RangeConnectorRegex = @"(?<and>de la|au|[aà]|et(\s*la)?|--|-|—|——)";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ namespace Microsoft.Recognizers.Definitions.French

public static class NumbersDefinitions
{
public const string LangMarker = @"Fr";
public const string LangMarker = @"Fre";
public const bool CompoundNumberLanguage = false;
public const bool MultiDecimalSeparatorCulture = true;
public const string RoundNumberIntegerRegex = @"(cent|mille|millions|million|milliard|milliards|billion|billions)";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ namespace Microsoft.Recognizers.Definitions.German

public static class DateTimeDefinitions
{
public const string LangMarker = @"Ger";
public const bool CheckBothBeforeAfter = false;
public const string TillRegex = @"(?<till>zu|bis\s*zum|zum|bis|bis\s*hin(\s*zum)?|--|-|—|——)";
public const string RangeConnectorRegex = @"(?<and>und|--|-|—|——)";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ namespace Microsoft.Recognizers.Definitions.Hindi

public static class DateTimeDefinitions
{
public const string LangMarker = @"Hin";
public const bool CheckBothBeforeAfter = true;
public static readonly string TillRegex = $@"(?<till>\b(और|तक|द्वारा|से|to)|{BaseDateTime.RangeConnectorSymbolRegex})";
public static readonly string RangeConnectorRegex = $@"(?<and>\b(और|तक|द्वारा|से|to)|{BaseDateTime.RangeConnectorSymbolRegex})";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ namespace Microsoft.Recognizers.Definitions.Italian

public static class DateTimeDefinitions
{
public const string LangMarker = @"Ita";
public const bool CheckBothBeforeAfter = false;
public const string TillRegex = @"(?<till>\b(fino\s+a(l(l[aoe'])?|gli|i)?|a(l(l[aoe'])?|gli|i)?|e\s+(il?|l[aoe']|gli))\b|--|-|—|——|~)";
public const string RestrictedTillRegex = @"(?<till>\b(fino\s+a(l(l[aoe'])?|gli|i)?)\b|--|-|—|——|~)";
Expand Down Expand Up @@ -157,7 +158,7 @@ public static class DateTimeDefinitions
public const string AmbiguousRangeModifierPrefix = @"^[.]";
public static readonly string NumberEndingPattern = $@"^(\s+(?<meeting>riunione|appuntamento|conferenza|chiamata|chiamata skype)\s+all['e]\s*(?<newTime>{PeriodHourNumRegex}|{HourRegex})((\.)?$|(\.,|,|!|\?)))";
public static readonly string TimeRegex1 = $@"(((((?<=\b(da|al)?(le|l'|ore)\s*)({EngTimeRegex}))|((?<=\b(da|al)?(le|l'|ore)\s*)({HourNumRegex}|{BaseDateTime.HourRegex})(?![\.,]\d+)(?=\s*({PrepRegex}))))|(({TimePrefix}\s+)({EngTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex}))|(({EngTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex})\s+{TimePrefix}))((\s*{DescRegex})|\b))";
public static readonly string TimeRegex2 = $@"({BaseDateTime.HourRegex})(\s*)?:(\s*)?{BaseDateTime.MinuteRegex}((\s*)?:(\s*)?{BaseDateTime.SecondRegex})?((\s*{DescRegex})|\b)";
public static readonly string TimeRegex2 = $@"(t)?({BaseDateTime.HourRegex})(\s*)?:(\s*)?{BaseDateTime.MinuteRegex}((\s*)?:(\s*)?{BaseDateTime.SecondRegex})?((\s*{DescRegex})|\b)";
public static readonly string TimeRegex3 = $@"\b{BaseDateTime.HourRegex}\.{BaseDateTime.MinuteRegex}(\s*{DescRegex})(\s+{TimePrefix})?";
public static readonly string TimeRegex4 = $@"\b({BasicTime}(\s*{DescRegex})?(\s+{TimePrefix})?(\s*{DescRegex})?\s+{TimeSuffix}|{OclockPrefix}\s+{BasicTime}(\s*{DescRegex})?(\s+{TimePrefix})?(\s*{DescRegex})?)\b";
public static readonly string TimeRegex5 = $@"\b(({BasicTime}\s*{DescRegex}(\s+{TimePrefix})?)|({BasicTime}(\s+{TimePrefix})((\s*{DescRegex})|\b))|((?<=\b(da|al)?(le|l'|ore)\s*)(\b(?<basictime>{EngTimeRegex}|{BaseDateTime.HourRegex}:{BaseDateTime.MinuteRegex}(:{BaseDateTime.SecondRegex})?)|\b(?<basictime>{HourNumRegex}|{BaseDateTime.HourRegex})(?![\.,]\d+)(?=\s*({PrepRegex})\b))))";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ namespace Microsoft.Recognizers.Definitions.Japanese

public static class DateTimeDefinitions
{
public const string LangMarker = @"Jpn";
public const string MonthRegex = @"(?<month>正月|一月|二月|三月|四月|五月|六月|七月|八月|九月|十月|十一月|十二月|01月|02月|03月|04月|05月|06月|07月|08月|09月|10月|11月|12月|1月|2月|3月|4月|5月|6月|7月|8月|9月)";
public const string MonthRegexForPeriod = @"(?<month>正月|一月|二月|三月|四月|五月|六月|七月|八月|九月|十月|十一月|十二月|01月|02月|03月|04月|05月|06月|07月|08月|09月|10月|11月|12月|1月|2月|3月|4月|5月|6月|7月|8月|9月)(?=\b|t|まで|から)?";
public const string MonthNumRegexForPeriod = @"(?<month>01|02|03|04|05|06|07|08|09|10|11|12|1|2|3|4|5|6|7|8|9)(?=\b|t|まで|から)?";
Expand Down
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<TargetFrameworks>net462</TargetFrameworks>
<TargetFrameworks>net462;netstandard2.0</TargetFrameworks>
<!-- Disable GenerateAssemblyInfo to use the existing AssemblyInfo.cs -->
<GenerateAssemblyInfo>false</GenerateAssemblyInfo>
<GeneratePackageOnBuild>false</GeneratePackageOnBuild>
<CodeAnalysisRuleSet>../Recognizers-Text.ruleset</CodeAnalysisRuleSet>
<RunPostBuildEvent>OnOutputUpdated</RunPostBuildEvent>
</PropertyGroup>
<PropertyGroup>

<PropertyGroup>
<!--
Make sure any documentation comments which are included in code get checked for syntax during the build, but do
not report warnings for missing comments.
Expand All @@ -18,7 +19,8 @@
-->
<DocumentationFile>$(OutputPath)$(AssemblyName).xml</DocumentationFile>
<NoWarn>$(NoWarn),1573,1591,1712</NoWarn>
</PropertyGroup>
</PropertyGroup>

<ItemGroup>
<PackageReference Include="Microsoft.CodeAnalysis.FxCopAnalyzers" Version="2.6.3">
<PrivateAssets>all</PrivateAssets>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ namespace Microsoft.Recognizers.Definitions.Portuguese

public static class DateTimeDefinitions
{
public const string LangMarker = @"Por";
public const bool CheckBothBeforeAfter = false;
public const string TillRegex = @"(?<till>ate|as|às|até|ateh|a|ao|--|-|—|——)(\s+(o|[aà](s)?))?";
public const string AndRegex = @"(?<and>e|e\s*o|--|-|—|——)";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ namespace Microsoft.Recognizers.Definitions.Spanish

public static class DateTimeDefinitions
{
public const string LangMarker = @"Spa";
public const bool CheckBothBeforeAfter = false;
public const string TillRegex = @"(?<till>hasta|al|a|--|-|—|——)(\s+(el|la(s)?))?";
public const string AndRegex = @"(?<and>y|y\s*el|--|-|—|——)";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ namespace Microsoft.Recognizers.Definitions.Turkish

public static class DateTimeDefinitions
{
public const string LangMarker = @"Tur";
public const bool CheckBothBeforeAfter = true;
public static readonly string TillRegex = $@"(?<till>\b(kadar|dek\b|değin)|{BaseDateTime.RangeConnectorSymbolRegex})";
public static readonly string TillConnectorRegex = $@"(?<till>('?tan|'?ten|'?den|'?dan|ile)\b|{BaseDateTime.RangeConnectorSymbolRegex})";
Expand Down Expand Up @@ -207,7 +208,7 @@ public static class DateTimeDefinitions
public static readonly string ConnectNumRegex = $@"\b({DescRegex}\s+){HourRegex}(?<min>00|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|32|33|34|35|36|37|38|39|40|41|42|43|44|45|46|47|48|49|50|51|52|53|54|55|56|57|58|59)(?!\d)";
public static readonly string TimeRegexWithDotConnector = $@"({HourRegex}(\s*\.\s*){BaseDateTime.MinuteRegex})";
public static readonly string TimeRegex1 = $@"\b(({TimePrefix}\s+)(saat\s)?({WrittenTimeRegex}|{HourNumRegex}|{AtHourNumRegex}|{HourRegex}))(?!(\s+saat|\d+))";
public static readonly string TimeRegex2 = $@"\b({TimePrefix}\s+)?(saat\s)?({HourRegex}:{BaseDateTime.MinuteRegex}(:{BaseDateTime.SecondRegex})?)";
public static readonly string TimeRegex2 = $@"\b({TimePrefix}\s+)?(saat\s)?(t)?({HourRegex}:{BaseDateTime.MinuteRegex}(:{BaseDateTime.SecondRegex})?)";
public static readonly string TimeRegex3 = $@"\b({TimePrefix}\s+)?(saat\s)?({HourRegex}:{BaseDateTime.MinuteRegex})";
public static readonly string TimeRegex4 = $@"\b({TimePrefix}\s+)?(saat\s)?{TimeSuffix}(?=(\b|dan))";
public static readonly string TimeRegex5 = $@"\b({TimePrefix}\s+)?(saat\s)?(?<!:)({HourRegex}(:{BaseDateTime.MinuteRegex})?|{HourNumRegex})({AroundRegex})\b";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ namespace Microsoft.Recognizers.Definitions.Turkish

public static class NumbersDefinitions
{
public const string LangMarker = @"Tr";
public const string LangMarker = @"Tur";
public const bool CompoundNumberLanguage = true;
public const bool MultiDecimalSeparatorCulture = true;
public const string DigitsNumberRegex = @"\d+|\d{1,3}(\.\d{3})";
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<TargetFrameworks>netstandard2.0;net462;net452;net45</TargetFrameworks>
<TargetFrameworks>netstandard2.0;net462</TargetFrameworks>
<!-- Disable GenerateAssemblyInfo to use the existing AssemblyInfo.cs -->
<GenerateAssemblyInfo>false</GenerateAssemblyInfo>
<GeneratePackageOnBuild>false</GeneratePackageOnBuild>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<TargetFrameworks>netstandard2.0;net462;net452;net45</TargetFrameworks>
<TargetFrameworks>netstandard2.0;net462</TargetFrameworks>
<!-- Disable GenerateAssemblyInfo to use the existing AssemblyInfo.cs -->
<GenerateAssemblyInfo>false</GenerateAssemblyInfo>
<GeneratePackageOnBuild>false</GeneratePackageOnBuild>
<CodeAnalysisRuleSet>../Recognizers-Text.ruleset</CodeAnalysisRuleSet>
<DocumentationFile>$(OutputPath)$(AssemblyName).xml</DocumentationFile>
<NoWarn>$(NoWarn),1573,1591,1712</NoWarn>
</PropertyGroup>
<PropertyGroup>

<PropertyGroup>
<!--
Make sure any documentation comments which are included in code get checked for syntax during the build, but do
not report warnings for missing comments.
Expand All @@ -19,7 +20,8 @@
-->
<DocumentationFile>$(OutputPath)$(AssemblyName).xml</DocumentationFile>
<NoWarn>$(NoWarn),1573,1591,1712</NoWarn>
</PropertyGroup>
</PropertyGroup>

<ItemGroup>
<PackageReference Include="Microsoft.CodeAnalysis.FxCopAnalyzers" Version="2.6.3">
<PrivateAssets>all</PrivateAssets>
Expand All @@ -44,4 +46,5 @@
<Target Name="PostBuildUnix" AfterTargets="PostBuildEvent" Condition="'$(OS)' != 'Windows_NT'">
<Exec Command="cp -r $(TargetDir)/../* $(TargetDir)../../../../build/package\" />
</Target>

</Project>
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,6 @@
</dependencies>
</metadata>
<files>
<file src="..\build\package\net45\Microsoft.Recognizers.Text.Choice.dll" target="lib\net45"/>
<file src="..\build\package\net452\Microsoft.Recognizers.Text.Choice.dll" target="lib\net452"/>
<file src="..\build\package\net462\Microsoft.Recognizers.Text.Choice.dll" target="lib\net462"/>
<file src="..\build\package\netstandard2.0\Microsoft.Recognizers.Text.Choice.dll" target="lib\netstandard2.0"/>
<file src="**\*.cs" exclude="**\obj\**\*.cs" target="src" />
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
<IsPackable>false</IsPackable>
<Copyright>© Microsoft Corporation. All rights reserved.</Copyright>
</PropertyGroup>

<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
<DebugSymbols>true</DebugSymbols>
<DebugType>full</DebugType>
Expand All @@ -15,6 +16,7 @@
<WarningLevel>4</WarningLevel>
<CodeAnalysisRuleSet>../Recognizers-Text.ruleset</CodeAnalysisRuleSet>
</PropertyGroup>

<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
<DebugType>pdbonly</DebugType>
<Optimize>true</Optimize>
Expand All @@ -24,6 +26,7 @@
<WarningLevel>4</WarningLevel>
<CodeAnalysisRuleSet>../Recognizers-Text.ruleset</CodeAnalysisRuleSet>
</PropertyGroup>

<PropertyGroup>
<!--
Make sure any documentation comments which are included in code get checked for syntax during the build, but do
Expand All @@ -34,7 +37,8 @@
-->
<DocumentationFile>$(OutputPath)$(AssemblyName).xml</DocumentationFile>
<NoWarn>$(NoWarn),1573,1591,1712</NoWarn>
</PropertyGroup>
</PropertyGroup>

<ItemGroup>
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="15.9.0" />
<PackageReference Include="MSTest.TestAdapter" Version="1.3.2" />
Expand All @@ -54,4 +58,5 @@
<ProjectReference Include="..\Microsoft.Recognizers.Text.Sequence\Microsoft.Recognizers.Text.Sequence.csproj" />
<ProjectReference Include="..\Microsoft.Recognizers.Text\Microsoft.Recognizers.Text.csproj" />
</ItemGroup>

</Project>
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ public LongFormTestConfiguration(char decimalSep, char nonDecimalSep)

public string HalfADozenText { get; }

public string LangMarker { get; } = "SelfDefined";
public string LanguageMarker { get; } = "SelfDefined";

public char NonDecimalSeparatorChar { get; }

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,12 @@ public class TestNumberRecognizerInitialization

public TestNumberRecognizerInitialization()
{
var numConfig = new BaseNumberOptionsConfiguration(EnglishCulture, NumberOptions.None);
var pureNumConfig = new BaseNumberOptionsConfiguration(EnglishCulture, NumberOptions.None, NumberMode.PureNumber);

controlModel = new NumberModel(
AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number,
new EnglishNumberParserConfiguration(new BaseNumberOptionsConfiguration(EnglishCulture))),
NumberExtractor.GetInstance(NumberMode.PureNumber));
AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Number, new EnglishNumberParserConfiguration(numConfig)),
NumberExtractor.GetInstance(pureNumConfig));
}

[TestMethod]
Expand Down
Loading

0 comments on commit 51c3ce7

Please sign in to comment.