Skip to content

Commit

Permalink
refactor method and variable names to follow python convention: lower…
Browse files Browse the repository at this point in the history
…-case words separated by underscores
  • Loading branch information
har07 committed Jan 14, 2016
1 parent 74cd51c commit c0a2296
Show file tree
Hide file tree
Showing 28 changed files with 240 additions and 264 deletions.
4 changes: 2 additions & 2 deletions src/Sastrawi/Dictionary/ArrayDictionary.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,15 @@ class ArrayDictionary(object):
def __init__(self, words=None):
self.words = []
if words:
self.addWords(words)
self.add_words(words)

def contains(self, word):
return word in self.words

def count(self):
return len(self.words)

def addWords(self, words):
def add_words(self, words):
"""Add multiple words to the dictionary"""
for word in words:
self.add(word)
Expand Down
6 changes: 3 additions & 3 deletions src/Sastrawi/Morphology/InvalidAffixPairSpecification.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,14 @@ class InvalidAffixPairSpecification(object):
@link http://researchbank.rmit.edu.au/eserv/rmit:6312/Asian.pdf
"""
def isSatisfiedBy(self, word):
def is_satisfied_by(self, word):
if re.match(r'^me(.*)kan$', word):
return False

if word == 'ketahui':
return False

invalidAffixes = [r'^ber(.*)i$',
invalid_affixes = [r'^ber(.*)i$',
r'^di(.*)an$',
r'^ke(.*)i$',
r'^ke(.*)an$',
Expand All @@ -22,7 +22,7 @@ def isSatisfiedBy(self, word):
r'^per(.*)an$']

contains = False
for invalidAffix in invalidAffixes:
for invalidAffix in invalid_affixes:
contains = contains or re.match(invalidAffix, word)

return contains
Expand Down
4 changes: 2 additions & 2 deletions src/Sastrawi/Stemmer/CachedStemmer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ def __init__(self, cache, delegatedStemmer):
self.delegatedStemmer = delegatedStemmer

def stem(self, text):
normalizedText = TextNormalizer.normalizeText(text)
normalizedText = TextNormalizer.normalize_text(text)

words = normalizedText.split(' ')
stems = []
Expand All @@ -23,5 +23,5 @@ def stem(self, text):

return ' '.join(stems)

def getCache(self):
def get_cache(self):
return self.cache
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ class PrecedenceAdjustmentSpecification(object):
@link http://researchbank.rmit.edu.au/eserv/rmit:6312/Asian.pdf
"""

def isSatisfiedBy(self, value):
regexRules = [
def is_satisfied_by(self, value):
regex_rules = [
r'^be(.*)lah$',
r'^be(.*)an$',
r'^me(.*)i$',
Expand All @@ -17,7 +17,7 @@ def isSatisfiedBy(self, value):
r'^ter(.*)i$',
]

for rule in regexRules:
for rule in regex_rules:
if re.match(rule, value):
return True

Expand Down
166 changes: 71 additions & 95 deletions src/Sastrawi/Stemmer/Context/Context.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,185 +4,161 @@
class Context(object):
"""Stemming Context using Nazief and Adriani, CS, ECS, Improved ECS"""

def __init__(self, originalWord, dictionary, visitorProvider):
self.originalWord = originalWord
self.currentWord = originalWord
def __init__(self, original_word, dictionary, visitor_provider):
self.original_word = original_word
self.current_word = original_word
self.dictionary = dictionary
self.visitorProvider = visitorProvider
self.visitor_provider = visitor_provider

self.processIsStopped = False
self.process_is_stopped = False
self.removals = []
self.visitors = []
self.suffixVisitors = []
self.prefixVisitors = []
self.suffix_visitors = []
self.prefix_pisitors = []
self.result = ''

self.initVisitors()
self.init_visitors()

def initVisitors(self):
self.visitors = self.visitorProvider.getVisitors()
self.suffixVisitors = self.visitorProvider.getSuffixVisitors()
self.prefixVisitors = self.visitorProvider.getPrefixVisitors()

def setDictionary(self, dictionary):
self.dictionary = dictionary

def getDictionary(self):
return self.dictionary

def getOriginalWord(self):
return self.originalWord

def setCurrentWord(self, word):
self.currentWord = word

def getCurrentWord(self):
return self.currentWord
def init_visitors(self):
self.visitors = self.visitor_provider.get_visitors()
self.suffix_visitors = self.visitor_provider.get_suffix_visitors()
self.prefix_pisitors = self.visitor_provider.get_prefix_visitors()

def stopProcess(self):
self.processIsStopped = True

#def processIsStopped(self):
# return self.processIsStopped
self.process_is_stopped = True

def addRemoval(self, removal):
def add_removal(self, removal):
self.removals.append(removal)

def getRemovals(self):
return self.removals

def getResult(self):
return self.result

def execute(self):
"""Execute stemming process; the result can be retrieved with getResult()"""
"""Execute stemming process; the result can be retrieved with result"""

#step 1 - 5
self.startStemmingProcess()
self.start_stemming_process()

#step 6
if self.dictionary.contains(self.currentWord):
self.result = self.getCurrentWord()
if self.dictionary.contains(self.current_word):
self.result = self.current_word
else:
self.result = self.originalWord
self.result = self.original_word

def startStemmingProcess(self):
def start_stemming_process(self):

#step 1
if self.dictionary.contains(self.currentWord):
if self.dictionary.contains(self.current_word):
return
self.acceptVisitors(self.visitors)
if self.dictionary.contains(self.currentWord):
self.accept_visitors(self.visitors)
if self.dictionary.contains(self.current_word):
return

csPrecedenceAdjustmentSpecification = PrecedenceAdjustmentSpecification()

#Confix Stripping
#Try to remove prefix before suffix if the specification is met
if csPrecedenceAdjustmentSpecification.isSatisfiedBy(self.getOriginalWord()):
if csPrecedenceAdjustmentSpecification.is_satisfied_by(self.original_word):
#step 4, 5
self.removePrefixes()
if self.dictionary.contains(self.currentWord):
self.remove_prefixes()
if self.dictionary.contains(self.current_word):
return

#step 2, 3
self.removeSuffixes()
if self.dictionary.contains(self.currentWord):
self.remove_suffixes()
if self.dictionary.contains(self.current_word):
return
else:
#if the trial is failed, restore the original word
#and continue to normal rule precedence (suffix first, prefix afterwards)
self.setCurrentWord(self.originalWord)
self.current_word = self.original_word
self.removals = []

#step 2, 3
self.removeSuffixes()
if self.dictionary.contains(self.currentWord):
self.remove_suffixes()
if self.dictionary.contains(self.current_word):
return

#step 4, 5
self.removePrefixes()
if self.dictionary.contains(self.currentWord):
self.remove_prefixes()
if self.dictionary.contains(self.current_word):
return

#ECS loop pengembalian akhiran
self.loopPengembalianAkhiran()
self.loop_pengembalian_akhiran()

def removePrefixes(self):
def remove_prefixes(self):
for i in range(3):
self.acceptPrefixVisitors(self.prefixVisitors)
if self.dictionary.contains(self.currentWord):
self.accept_prefix_visitors(self.prefix_pisitors)
if self.dictionary.contains(self.current_word):
return

def removeSuffixes(self):
self.acceptVisitors(self.suffixVisitors)
def remove_suffixes(self):
self.accept_visitors(self.suffix_visitors)

def accept(self, visitor):
visitor.visit(self)

def acceptVisitors(self, visitors):
def accept_visitors(self, visitors):
for visitor in visitors:
self.accept(visitor)
if self.dictionary.contains(self.currentWord):
return self.getCurrentWord()
if self.processIsStopped:
return self.getCurrentWord()
if self.dictionary.contains(self.current_word):
return self.current_word
if self.process_is_stopped:
return self.current_word

def acceptPrefixVisitors(self, visitors):
def accept_prefix_visitors(self, visitors):
removalCount = len(self.removals)
for visitor in visitors:
self.accept(visitor)
if self.dictionary.contains(self.currentWord):
return self.getCurrentWord()
if self.processIsStopped:
return self.getCurrentWord()
if self.dictionary.contains(self.current_word):
return self.current_word
if self.process_is_stopped:
return self.current_word
if len(self.removals) > removalCount:
return

def loopPengembalianAkhiran(self):
def loop_pengembalian_akhiran(self):
"""ECS Loop Pengembalian Akhiran"""
self.restorePrefix()
self.restore_prefix()

removals = self.removals
reversedRemovals = reversed(removals)
currentWord = self.getCurrentWord()
reversed_removals = reversed(removals)
current_word = self.current_word

for removal in reversedRemovals:
if not self.isSuffixRemoval(removal):
for removal in reversed_removals:
if not self.is_suffix_removal(removal):
continue
if removal.getRemovedPart() == 'kan':
self.setCurrentWord(removal.getResult() + 'k')
if removal.get_removed_part() == 'kan':
self.current_word = removal.result + 'k'

#step 4,5
self.removePrefixes()
if self.dictionary.contains(self.currentWord):
self.remove_prefixes()
if self.dictionary.contains(self.current_word):
return
self.setCurrentWord(removal.getResult() + 'kan')
self.current_word = removal.result + 'kan'
else:
self.setCurrentWord(removal.getSubject())
self.current_word = removal.get_subject()

#step 4,5
self.removePrefixes()
if self.dictionary.contains(self.currentWord):
self.remove_prefixes()
if self.dictionary.contains(self.current_word):
return

self.removals = removals
self.setCurrentWord(currentWord)
self.current_word = current_word

def isSuffixRemoval(self, removal):
def is_suffix_removal(self, removal):
"""Check wether the removed part is a suffix"""
return removal.getAffixType() == 'DS' \
or removal.getAffixType() == 'PP' \
or removal.getAffixType() == 'P'
return removal.get_affix_type() == 'DS' \
or removal.get_affix_type() == 'PP' \
or removal.get_affix_type() == 'P'

def restorePrefix(self):
def restore_prefix(self):
"""Restore prefix to proceed with ECS loop pengembalian akhiran"""
for removal in self.removals:
#return the word before precoding (the subject of first prefix removal)
self.setCurrentWord(removal.getSubject())
self.current_word = removal.get_subject()
break

for removal in self.removals:
if removal.getAffixType() == 'DP':
if removal.get_affix_type() == 'DP':
self.removals.remove(removal)

12 changes: 6 additions & 6 deletions src/Sastrawi/Stemmer/Context/Removal.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from Sastrawi.Stemmer.Context.RemovalInterface import RemovalInterface

class Removal(RemovalInterface):
class Removal(object):
"""description of class"""

def __init__(self, visitor, subject, result, removedPart, affixType):
Expand All @@ -10,19 +10,19 @@ def __init__(self, visitor, subject, result, removedPart, affixType):
self.removedPart = removedPart
self.affixType = affixType

def getVisitor(self):
def get_visitor(self):
return self.visitor

def getSubject(self):
def get_subject(self):
return self.subject

def getResult(self):
def get_result(self):
return self.result

def getRemovedPart(self):
def get_removed_part(self):
return self.removedPart

def getAffixType(self):
def get_affix_type(self):
return self.affixType


Expand Down
10 changes: 5 additions & 5 deletions src/Sastrawi/Stemmer/Context/RemovalInterface.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
class RemovalInterface(object):
"""description of class"""

def getVisitor(self):
def get_visitor(self):
pass

def getSubject(self):
def get_subject(self):
pass

def getResult(self):
def get_result(self):
pass

def getRemovedPart(self):
def get_removed_part(self):
pass

def getAffixType(self):
def get_affix_type(self):
pass


Expand Down
Loading

0 comments on commit c0a2296

Please sign in to comment.