-
Notifications
You must be signed in to change notification settings - Fork 0
/
extract_test.go
119 lines (100 loc) · 4.05 KB
/
extract_test.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
package words_test
import (
"github.com/imbue11235/words"
"reflect"
"testing"
)
type testSet struct {
input string
expected []string
}
func runExtractTest(t *testing.T, tests []testSet, options ...words.Option) {
for _, test := range tests {
extraction := words.Extract(test.input, options...)
// If both slices are empty, just continue
if len(extraction) == 0 && len(test.expected) == 0 {
continue
}
if !reflect.DeepEqual(extraction, test.expected) {
t.Errorf("Expected %v to be %v", extraction, test.expected)
}
}
}
func TestExtract(t *testing.T) {
tests := []testSet{
{"", []string{}},
{"100cm", []string{"100", "cm"}},
{"μου αρέσουν τα μπιφτέκια", []string{"μου", "αρέσουν", "τα", "μπιφτέκια"}},
{"aeiouAreVowels", []string{"aeiou", "Are", "Vowels"}},
{"XmlHTTP", []string{"Xml", "HTTP"}},
{"isISO8601", []string{"is", "ISO", "8601"}},
{"Win2000", []string{"Win", "2000"}},
{"Bose QC35", []string{"Bose", "QC", "35"}},
{"YAMLParser", []string{"YAML", "Parser"}},
{"SOME_CONSTANT_STRING_REPRESENTATION", []string{"SOME", "CONSTANT", "STRING", "REPRESENTATION"}},
{"joe, johnathan & john", []string{"joe", "johnathan", "john"}},
{"a small-town family-owned business", []string{"a", "small", "town", "family", "owned", "business"}},
{"-any-day-now-", []string{"any", "day", "now"}},
{"a lot of spaces ", []string{"a", "lot", "of", "spaces"}},
{"AnUnknownChar𖡄", []string{"An", "Unknown", "Char", "𖡄"}},
{"�����������������������������", []string{}},
{"invalidUTF8\xc5z", []string{"invalidUTF8\xc5z"}},
}
runExtractTest(t, tests)
}
func TestExtractWithOptionHyphenatedWords(t *testing.T) {
tests := []testSet{
{"-hyphenated-words", []string{"hyphenated-words"}},
{"a later -hyphenated-word", []string{"a", "later", "hyphenated-word"}},
{"a small-sized, dog-friendly, vacation home", []string{"a", "small-sized", "dog-friendly", "vacation", "home"}},
{"other.chars_should-still*be>processed", []string{"other", "chars", "should-still", "be", "processed"}},
{"-.-", []string{}},
{"----------------", []string{}},
{"----------a-b------------", []string{"a-b"}},
{"-z-----------b", []string{"z", "b"}},
{"a family-SIZED meal", []string{"a", "family", "SIZED", "meal"}},
}
runExtractTest(t, tests, words.AllowHyphenatedWords())
}
func TestExtractWithOptionIncludeSpace(t *testing.T) {
tests := []testSet{
{"a string with spaces", []string{"a", " ", "string", " ", "with", " ", "spaces"}},
{"So many spaces", []string{"So", " ", "many", " ", "spaces"}},
{"Spaces & Symbols", []string{"Spaces", " ", " ", "Symbols"}},
}
runExtractTest(t, tests, words.IncludeSpaces())
}
func TestExtractWithOptionIncludeSymbols(t *testing.T) {
tests := []testSet{
{"should>yield|any<symbol", []string{"should", ">", "yield", "|", "any", "<", "symbol"}},
{"no punctuation!", []string{"no", "punctuation"}},
{"<<<<<hi>>>>>", []string{"<<<<<", "hi", ">>>>>"}},
}
runExtractTest(t, tests, words.IncludeSymbols())
}
func TestExtractWithOptionIncludePunctuation(t *testing.T) {
tests := []testSet{
{"keep. all, punctuation!", []string{"keep", ".", "all", ",", "punctuation", "!"}},
{">!..oops", []string{"!..", "oops"}},
}
runExtractTest(t, tests, words.IncludePunctuation())
}
func TestExtractWithIgnoredRunes(t *testing.T) {
tests := []testSet{
{"ignored.runes", []string{"ignored.runes"}},
{"etc. and so on", []string{"etc.", "and", "so", "on"}},
{"etc! and so on", []string{"etc", "and", "so", "on"}},
{".start_and_end_with_periods.", []string{".start", "and", "end", "with", "periods."}},
}
runExtractTest(t, tests, words.WithIgnoredRunes('.'))
}
func TestExtractWithIgnoreNumbers(t *testing.T) {
tests := []testSet{
{"100cm", []string{"100cm"}},
{"QC35", []string{"QC35"}},
{"Win2000", []string{"Win2000"}},
{"100cm QC35 Win2000", []string{"100cm", "QC35", "Win2000"}},
{"100cmQC35Win2000", []string{"100cm", "QC35", "Win2000"}},
}
runExtractTest(t, tests, words.WithIgnoredRuneKinds(words.Digit))
}