-
Notifications
You must be signed in to change notification settings - Fork 0
/
syllable.py
101 lines (89 loc) · 2.33 KB
/
syllable.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
from nltk.corpus import cmudict
from nltk.tokenize import sent_tokenize
from nltk import word_tokenize
import random
d = cmudict.dict()
def nsyl_sent(sentence):
nsyl = 0
tokens = word_tokenize(sentence)
for t in tokens:
if t.lower() in d:
word_syl = [len(list(y for y in x if y[-1].isdigit())) for x in d[t.lower()]]
nsyl += word_syl[0]
return nsyl
def syl_frame(text):
syllable_counts = []
tokens = sent_tokenize(text)
for t in tokens:
syllable_counts.append(nsyl_sent(t))
return syllable_counts
def build_syl_dict(text):
syl_dict = {}
sent_tokens = sent_tokenize(text)
for sent in sent_tokens:
count = nsyl_sent(sent)
if count in syl_dict:
syl_dict[count].append(sent)
else:
syl_dict[count] = [sent]
return syl_dict
def merge_syl_dicts(target, addition):
for integer in addition:
if integer in target:
target[integer].extend(addition[integer])
else:
target[integer] = addition[integer]
return target
def construct_corpora_syl_dict(corpora):
first_dict = None
for c in corpora:
f = open(c, 'r')
contents = f.read()
syl_dict = build_syl_dict(contents)
if first_dict is None:
first_dict = syl_dict
else:
first_dict = merge_syl_dicts(first_dict, syl_dict)
return first_dict
def build_to_frame(frame, corpus):
build = []
for nsyl in frame:
if nsyl in corpus:
ran_sen = random.choice(corpus[nsyl])
build.append(ran_sen)
else:
print('MISSING SYL COUNT ' + str(nsyl))
return build
def main(argv):
opts, args = getopt.getopt(argv, "")
if __name__ == "__main__":
main(sys.argv([1:]))
# Set up frame
f = open('frame.txt', 'r')
text = f.read()
f.close()
frame = syl_frame(text)
# Load corpora
corpora = [
'corpora/Art.txt',
'corpora/AskReddit.txt',
'corpora/bestof.txt',
'corpora/books.txt',
'corpora/dataisbeautiful.txt',
'corpora/explainlikeimfive.txt',
'corpora/HistoryPorn.txt',
'corpora/interestingasfuck.txt',
'corpora/mildlyinfuriating.txt',
'corpora/nottheonion.txt',
'corpora/relationships.txt',
'corpora/science.txt',
'corpora/Showerthoughts.txt',
'corpora/space.txt',
'corpora/tifu.txt'
]
united_corpus = construct_corpora_syl_dict(corpora)
# Build corpora against frame
framed = build_to_frame(frame, united_corpus)
f = open('result.txt', 'w')
f.write(' '.join(framed))
f.close()