-
Notifications
You must be signed in to change notification settings - Fork 0
/
FrequencyAnalysisTask.cs
102 lines (97 loc) · 4.09 KB
/
FrequencyAnalysisTask.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
using System.Collections.Generic;
using System.Linq;
namespace TextAnalysis
{
static class FrequencyAnalysisTask
{
public static Dictionary<string, string> GetMostFrequentNextWords(List<List<string>> text)
{
var bigramDictionary = GetBigrams(text);
var trigramDictionary = GetTrigrams(text);
var result = bigramDictionary.ToDictionary(
keyValuePair => keyValuePair.Key, keyValuePair => keyValuePair.Value);
foreach (var keyValuePair in trigramDictionary)
{
result.Add(keyValuePair.Key, keyValuePair.Value);
}
return result;
}
private static Dictionary<string, string> GetBigrams(List<List<string>> sentences)
{
var resultBigram = new Dictionary<string, Dictionary<string, int>>();
foreach (var sentence in sentences)
{
for (var i = 0; i < sentence.Count - 1; i++)
{
if (resultBigram.ContainsKey(sentence[i]))
{
if (resultBigram[sentence[i]].ContainsKey(sentence[i + 1]))
{
var number = resultBigram[sentence[i]][sentence[i + 1]] + 1;
resultBigram[sentence[i]].Remove(sentence[i + 1]);
resultBigram[sentence[i]].Add(sentence[i + 1], number);
}
else resultBigram[sentence[i]].Add(sentence[i + 1], 1);
}
else
{
var innerDict = new Dictionary<string, int> {{sentence[i + 1], 1}};
resultBigram.Add(sentence[i], innerDict);
}
}
}
return SortNgram(resultBigram);
}
private static Dictionary<string, string> GetTrigrams(List<List<string>> sentences)
{
var resultTrigram = new Dictionary<string, Dictionary<string, int>>();
foreach (var sentence in sentences)
{
for (var i = 0; i < sentence.Count - 2; i++)
{
var key = $"{sentence[i]} {sentence[i + 1]}";
if (resultTrigram.ContainsKey(key))
{
if (resultTrigram[key].ContainsKey(sentence[i + 2]))
{
var number = resultTrigram[key][sentence[i + 2]] + 1;
resultTrigram[key].Remove(sentence[i + 2]);
resultTrigram[key].Add(sentence[i + 2], number);
}
else resultTrigram[key].Add(sentence[i + 2], 1);
}
else
{
var innerDict = new Dictionary<string, int> {{sentence[i + 2], 1}};
resultTrigram.Add(key, innerDict);
}
}
}
return SortNgram(resultTrigram);
}
public static Dictionary<string, string> SortNgram(Dictionary<string, Dictionary<string, int>> ngram)
{
var resultMax = new Dictionary<string, string>();
foreach (var keyValuePair in ngram)
{
var first = keyValuePair.Value.First();
var maxIndex = first.Value;
var wordMaxIndex = first.Key;
foreach (var valuePair in keyValuePair.Value)
{
if (valuePair.Value > maxIndex)
{
maxIndex = valuePair.Value;
wordMaxIndex = valuePair.Key;
}
if (valuePair.Value != maxIndex) continue;
if (string.CompareOrdinal(valuePair.Key, wordMaxIndex) >= 0) continue;
maxIndex = valuePair.Value;
wordMaxIndex = valuePair.Key;
}
resultMax.Add(keyValuePair.Key, wordMaxIndex);
}
return resultMax;
}
}
}