-
Notifications
You must be signed in to change notification settings - Fork 1
/
syll.go
266 lines (238 loc) · 7.58 KB
/
syll.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
package rbg2p
import (
"fmt"
"os"
"strings"
)
// SyllDef is an interface for implementing custom made syllabification strategies
type SyllDef interface {
ValidSplit(left []string, right []string) bool
ContainsSyllabic(phonemes []string) bool
IsDefined() bool
IsStress(symbol string) bool
IsSyllabic(symbol string) bool
PhonemeDelimiter() string
StressPlacement() StressPlacement
IncludePhonemeDelimiter() bool
SyllableDelimiter() string
}
// MOPSyllDef is a Maximum Onset Principle implementation of the SyllDef interface
type MOPSyllDef struct {
Onsets []string
Syllabic []string
PhnDelim string
SyllDelim string
Stress []string
StressPlcmnt StressPlacement
IncludePhnDelim bool
}
// PhonemeDelimiter is the string used to separate phonemes (required by interface)
func (def MOPSyllDef) PhonemeDelimiter() string {
return def.PhnDelim
}
// IncludePhonemeDelimiter defines whether the syllable boundaries should be surrounded by the phoneme delimiter
func (def MOPSyllDef) IncludePhonemeDelimiter() bool {
return def.IncludePhnDelim
}
// SyllableDelimiter is the string used to separate syllables (required by interface)
func (def MOPSyllDef) SyllableDelimiter() string {
return def.SyllDelim
}
// StressPlacement
func (def MOPSyllDef) StressPlacement() StressPlacement {
return def.StressPlcmnt
}
// IsDefined is used to determine if there is a syllabifier defined or not (required by interface)
func (def MOPSyllDef) IsDefined() bool {
return len(def.Onsets) > 0
}
// IsStress is used to check if the input symbol is a stress symbol
func (def MOPSyllDef) IsStress(symbol string) bool {
for _, s := range def.Stress {
if s == symbol {
return true
}
}
return false
}
// IsSyllabic is used to check if the input phoneme is syllabic
func (def MOPSyllDef) IsSyllabic(phoneme string) bool {
for _, s := range def.Syllabic {
if s == phoneme {
return true
}
}
return false
}
// ContainsSyllabic tells if the input phoneme slice contains any syllabic phonemes (required by interface)
func (def MOPSyllDef) ContainsSyllabic(phonemes []string) bool {
for _, p := range phonemes {
if def.IsSyllabic(p) {
return true
}
}
return false
}
func (def MOPSyllDef) validOnset(onset string) bool {
if len(onset) == 0 {
return true
}
for _, s := range def.Onsets {
if s == onset {
return true
}
}
return false
}
// ValidSplit is called by Syllabifier.Syllabify to test where to put the boundaries
func (def MOPSyllDef) ValidSplit(left0 []string, right0 []string) bool {
left := left0
right := right0
// if def.StressPlacement() != AfterSyllabic {
// if len(left) > 0 && def.IsStress(left[len(left)-1]) {
// return false
// }
// }
// if def.StressPlacement() == AfterSyllabic {
// if len(right) > 0 && def.IsStress(right[0]) {
// right = right[1:]
// }
// }
//fmt.Println("debug validsplit internal left/right", left, right)
onset := []string{}
keepCond := func(s string) bool {
if def.StressPlacement() != AfterSyllabic {
return !def.IsSyllabic(s)
}
return !def.IsSyllabic(s) // && !def.IsStress(s)
}
for i := 0; i < len(right) && keepCond(right[i]); i++ {
if def.IsStress(right[i]) {
if def.StressPlacement() == AfterSyllabic {
onset = append(onset, right[i])
}
} else {
onset = append(onset, right[i])
}
}
//s := strings.Join(onset, def.PhonemeDelimiter())
//fmt.Println("debug validsplit test onset1", s, def.validOnset(s))
if !def.validOnset(strings.Join(onset, def.PhonemeDelimiter())) {
return false
}
test := onset
for i := len(left) - 1; i >= 0 && keepCond(left[i]); i-- {
test = append([]string{left[i]}, test...)
//s := strings.Join(test, def.PhonemeDelimiter())
//fmt.Println("debug validsplit test onset2", s, def.validOnset(s))
if def.validOnset(strings.Join(test, def.PhonemeDelimiter())) {
return false
}
}
return true
}
// SyllTest defines a rule test (input -> output)
type SyllTest struct {
Input string
Output string
}
// Syllabifier is a module to divide a transcription into syllables
type Syllabifier struct {
SyllDef SyllDef
Tests []SyllTest
StressPlacement StressPlacement
PhonemeSet PhonemeSet
Debug bool
}
// IsDefined is used to determine if there is a syllabifier defined or not
func (s Syllabifier) IsDefined() bool {
return s.SyllDef != nil && s.SyllDef.IsDefined()
}
// SyllabifyFromPhonemes is used to divide a range of phonemes into syllables and create an output string
func (s Syllabifier) SyllabifyFromPhonemes(phns []string) string {
t := trans{}
for _, phn := range phns {
t.phonemes = append(t.phonemes, g2p{g: "", p: []string{phn}})
}
return s.syllabifyToString(t)
}
// SyllabifyFromString is used to divide a transcription string into syllables and create an output string
func (s Syllabifier) SyllabifyFromString(trans string) (string, error) {
phns, err := s.PhonemeSet.SplitTranscription(trans)
if err != nil {
return "", err
}
return s.SyllabifyFromPhonemes(phns), nil
}
// syllabifyToString is used to divide a transcription into syllables and create an output string
func (s Syllabifier) syllabifyToString(t trans) string {
sylled := s.syllabify(t)
res := s.stringWithStressPlacement(sylled)
if s.Debug {
fmt.Fprintf(os.Stderr, "%s\t%s\t%v\t%s\n", "SYLLABIFY", t, sylled, res)
}
return res
}
func (s Syllabifier) syllabify(t trans) sylledTrans {
res := sylledTrans{trans: t}
left := []string{}
right := t.listPhonemes()
for gi, g2p := range t.phonemes {
for pi, p := range g2p.p {
validSplit := s.SyllDef.ValidSplit(left, right)
//fmt.Println("debug syllabify", gi, pi, p, left, right, validSplit, s.SyllDef.ContainsSyllabic(left), s.SyllDef.ContainsSyllabic(right))
if len(left) > 0 && validSplit && s.SyllDef.ContainsSyllabic(left) && s.SyllDef.ContainsSyllabic(right) {
index := boundary{g: gi, p: pi}
res.boundaries = append(res.boundaries, index)
left = []string{}
}
left = append(left, p)
right = right[1:]
}
}
return res
}
//Test to test the input syllabifier definition using tests in the input data or file
func (s Syllabifier) Test() TestResult {
var result = TestResult{}
for _, test := range s.Tests {
res, err := s.SyllabifyFromString(test.Input)
if err != nil {
result.Errors = append(result.Errors, fmt.Sprintf("found error in test input (couldn't split) /%s/ : %s", test.Input, err))
}
if res != test.Output {
result.Errors = append(result.Errors, fmt.Sprintf("from /%s/ expected /%s/, found /%s/", test.Input, test.Output, res))
}
}
return result
}
func (s Syllabifier) stringWithStressPlacement(t sylledTrans) string {
if s.StressPlacement == Undefined {
return t.string(s.SyllDef.PhonemeDelimiter(), s.SyllDef.SyllableDelimiter())
}
syllables := s.parse(t)
if s.Debug {
fmt.Fprintf(os.Stderr, "PARSED SYLLABLES\t%v\n", syllables)
}
res := []string{}
for _, syll := range syllables {
newSyll := []string{}
if (s.StressPlacement == FirstInSyllable) && syll.stress != "" {
newSyll = append(newSyll, syll.stress)
}
for _, phn := range syll.phonemes {
if s.SyllDef.IsSyllabic(phn) && syll.stress != "" && s.StressPlacement == BeforeSyllabic {
newSyll = append(newSyll, syll.stress)
}
newSyll = append(newSyll, phn)
if s.SyllDef.IsSyllabic(phn) && syll.stress != "" && s.StressPlacement == AfterSyllabic {
newSyll = append(newSyll, syll.stress)
}
}
res = append(res, strings.Join(newSyll, s.SyllDef.PhonemeDelimiter()))
}
if s.SyllDef.IncludePhonemeDelimiter() {
return strings.Join(res, s.SyllDef.PhonemeDelimiter()+s.SyllDef.SyllableDelimiter()+s.SyllDef.PhonemeDelimiter())
}
return strings.Join(res, s.SyllDef.SyllableDelimiter())
}