-
Notifications
You must be signed in to change notification settings - Fork 0
/
replacer.py
49 lines (43 loc) · 1.83 KB
/
replacer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
####################################################################
# Licence: Creative Commons (see COPYRIGHT) #
# Authors: Nikolaos Pappas, Georgios Katsimpras #
# {nik0spapp, gkatsimpras}@gmail.com #
# Supervisor: Efstathios stamatatos #
# stamatatos@aegean.gr #
# University of the Aegean #
# Department of Information and Communication Systems Engineering #
# Information Management Track (MSc) #
# Karlovasi, Samos #
# Greece #
####################################################################
import re
from nltk.corpus import wordnet
class RepeatReplacer(object):
"""
RepeatReplacer: Replaces letters that appear in irregular
repetition inside words.
"""
def __init__(self, lexicon):
self.lexicon = lexicon.words
self.repeat_regexp = re.compile(r'(.*)(.)\2(.*)')
self.repl = r'\1\2\3'
def replace(self, word):
check = re.sub(r'\!|;|\||\.|\?|,|:|"|\)|\(','',word)
if self.lexicon.has_key(word) and self.lexicon[word].has_key('emoticon'):
return word
if wordnet.synsets(check):
if word == check:
return word
else:
return check + "".join(set(word[len(check):]))
repl_word = self.repeat_regexp.sub(self.repl, word)
if repl_word != word:
return self.replace(repl_word)
else:
return repl_word
if __name__ == '__main__':
rr = RepeatReplacer()
example = "sorrryyyyyyyyyyy"
print "Before: " + example
rr.replace(example)
print "After: " + example