-
Notifications
You must be signed in to change notification settings - Fork 1
/
wordlist_generator_singlestroke.py
executable file
·126 lines (112 loc) · 2.45 KB
/
wordlist_generator_singlestroke.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
#!/usr/bin/env python
import nltk
from nltk.corpus import brown
import json, os
prondict = nltk.corpus.cmudict.dict()
with open(os.path.expanduser("~/.config/plover/dict.json")) as data_file:
stenodict = json.load(data_file)
onset = {
'B': 'PW',
'CH': 'KH',
'D': 'TK',
'DH': 'TH',
'F': 'TP',
'G': 'TKPW',
'HH': 'H',
'JH': 'SKWR',
'K': 'K',
'L': 'HR',
'M': 'PH',
'N': 'TPH',
'P': 'P',
'R': 'R',
'S': 'S',
'SH': 'SH',
'T': 'T',
'TH': 'TH',
'V': 'SR',
'W': 'W',
'Y': 'KWR',
'Z': 'S*'
}
nucleus = {
'AA': 'O',
'AE': 'A',
'AH': 'U',
'AO': 'AU',
'AW': 'OU',
'AY': 'AOEU',
'EH': 'E',
'ER': 'ER',
'EY': 'AEU',
'IH': 'EU',
'IY': 'AOE',
'OW': 'OE',
'OY': 'OEU',
'UH': 'U',
'UW': 'AOU'
}
coda = {
'ER': 'R',
'B':'B',
'CH': 'FP',
'D': 'D',
'DH': '*T',
'F': 'F',
'G': 'G',
'JH': 'PBLJ',
'K': 'BG',
'L': 'L',
'M': 'PL',
'N': 'PB',
'NG': 'PBG',
'P': 'P',
'R': 'R',
'S': 'S',
'SH': 'RB',
'T': 'T',
'TH': '*T',
'V': 'F',
'Z': 'Z'
}
def word_to_steno(word):
result = []
for pron in prondict[word]:
syll = []
seg = []
# if len(pron) != 3:
# syll.append("VOID")
for phone in pron:
if phone[-1] == '1':
syll.append(seg)
syll.append([phone])
seg = []
# elif phone[-1] == '0':
# seg += "VOID"
else:
seg.append(phone)
syll.append(seg)
entry = ''
if len(syll) == 3:
for phone in syll[0]:
entry += onset.get(phone[:2], '')
for phone in syll[1]:
entry += nucleus.get(phone[:2], '')
for phone in syll[2]:
entry += coda.get(phone[:2], '')
result.append(entry)
return result
easy_words = []
for word in nltk.corpus.cmudict.words():
for steno in word_to_steno(word):
if stenodict.get(steno) == word:# and 3 <= len(steno) <= 6:
print(word)
easy_words.append(word)
#print(steno)
#print(prondict[word])
#for bigram in list(nltk.bigrams(brown.words())):
# if bigram[0] in easy_words and bigram[1] in easy_words:
# print(bigram[0] + ' ' + bigram[1])
#for trigram in list(nltk.trigrams(brown.words())):
# if trigram[0] in easy_words and trigram[1] in easy_words and trigram[2] in easy_words:
# print(trigram[0] + ' ' + trigram[1] + ' ' + trigram[2])