Skip to content

Commit

Permalink
on progress porting disambiguator tests
Browse files Browse the repository at this point in the history
  • Loading branch information
har07 committed Jan 10, 2016
1 parent 3a7ca0e commit 74cd51c
Show file tree
Hide file tree
Showing 12 changed files with 272 additions and 7 deletions.
4 changes: 2 additions & 2 deletions src/Sastrawi/Stemmer/CachedStemmer.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from Sastrawi.Stemmer.StemmerInterface import StemmerInterface
#from Sastrawi.Stemmer.StemmerInterface import StemmerInterface
from Sastrawi.Stemmer.Filter import TextNormalizer

class CachedStemmer(StemmerInterface):
class CachedStemmer(object):
"""description of class"""
def __init__(self, cache, delegatedStemmer):
self.cache = cache
Expand Down
4 changes: 2 additions & 2 deletions src/Sastrawi/StopWordRemover/StopWordRemover.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ def remove(self, text):
"""Remove stop words."""
words = text.split(' ')
for word in words:
if word in self.dictionary:
self.dictionary.remove(word)
if self.dictionary.contains(word):
words.remove(word)

return ' '.join(words)

Expand Down
1 change: 1 addition & 0 deletions src/Sastrawi/StopWordRemover/StopWordRemoverFactory.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from Sastrawi.Dictionary.ArrayDictionary import ArrayDictionary
from Sastrawi.StopWordRemover.StopWordRemover import StopWordRemover

class StopWordRemoverFactory(object):
"""description of class"""
Expand Down
64 changes: 64 additions & 0 deletions tests/IntegrationTests/Stemmer/StemmerTest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import unittest
from Sastrawi.Dictionary.ArrayDictionary import ArrayDictionary
from Sastrawi.Stemmer.Stemmer import Stemmer
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory

class Test_StemmerTest(unittest.TestCase):
def setUp(self):
stemmerFactory = StemmerFactory()
self.stemmer = stemmerFactory.createStemmer()
return super(Test_StemmerTest, self).setUp()

def getTestData(self):
data = []

data.append(['kebijakan', 'bijak'])
#//data.append(['karyawan', 'karya'])
#//data.append(['karyawati', 'karya'])
data.append(['kinerja', 'kerja'])
data.append(['mengandung', 'kandung'])
data.append(['memakan', 'makan'])
data.append(['asean', 'asean'])
data.append(['pemandu', 'pandu'])
data.append(['mengurangi', 'kurang'])
data.append(['pemerintah', 'perintah'])
data.append(['mengabulkan', 'kabul'])
data.append(['mengupas', 'kupas'])
data.append(['keterpurukan', 'puruk'])
data.append(['ditemukan', 'temu'])
data.append(['mengerti', 'erti'])
data.append(['kebon', 'kebon'])
data.append(['terdepan', 'depan'])
data.append(['mengikis', 'kikis'])
data.append(['kedudukan', 'duduk'])
data.append(['menekan', 'tekan'])
data.append(['perusakan', 'rusa']) # overstemming, it's better than perusa
data.append(['ditemui', 'temu'])
data.append(['di', 'di'])
data.append(['mengalahkan', 'kalah'])
data.append(['melewati', 'lewat'])
data.append(['bernafas', 'nafas'])
data.append(['meniru-niru', 'tiru'])
data.append(['memanggil-manggil', 'panggil'])
data.append(['menyebut-nyebut', 'sebut'])
data.append(['menganga', 'nganga'])
data.append(['besaran', 'besar'])
data.append(['terhenyak', 'henyak'])
data.append(['mengokohkan', 'kokoh'])
data.append(['melainkan', 'lain'])
data.append(['kuasa-Mu', 'kuasa'])
data.append(['malaikat-malaikat-Nya', 'malaikat'])
data.append(['nikmat-Ku', 'nikmat'])

return data

def tryStem(self, word, stem):
self.assertEquals(stem, self.stemmer.stem(word))

def test_stem(self):
data = self.getTestData()
for d in data:
self.tryStem(d[0], d[1])

if __name__ == '__main__':
unittest.main()
33 changes: 30 additions & 3 deletions tests/SastrawiTest.pyproj
Original file line number Diff line number Diff line change
Expand Up @@ -34,16 +34,43 @@
<ItemGroup>
<Folder Include="FunctionalTests\" />
<Folder Include="FunctionalTests\Stemmer\" />
<Folder Include="Stemmer\" />
<Folder Include="IntegrationTests\" />
<Folder Include="IntegrationTests\Stemmer\" />
<Folder Include="UnitTests\" />
<Folder Include="UnitTests\Dictionary\" />
<Folder Include="UnitTests\Morphology\" />
<Folder Include="UnitTests\Morphology\Disambiguator\" />
<Folder Include="UnitTests\StopWordRemover\" />
<Folder Include="UnitTests\Stemmer\" />
</ItemGroup>
<ItemGroup>
<Compile Include="FunctionalTests\Stemmer\StemmerTest.py">
<SubType>Code</SubType>
</Compile>
<Compile Include="Stemmer\StemmerFactoryTest.py">
<Compile Include="IntegrationTests\Stemmer\StemmerTest.py">
<SubType>Code</SubType>
</Compile>
<Compile Include="Stemmer\StemmerTest.py" />
<Compile Include="UnitTests\Dictionary\ArrayDictionaryTest.py">
<SubType>Code</SubType>
</Compile>
<Compile Include="UnitTests\Morphology\Disambiguator\DisambiguatorPrefixRule1Test.py">
<SubType>Code</SubType>
</Compile>
<Compile Include="UnitTests\Morphology\InvalidAffixPairSpecificationTest.py">
<SubType>Code</SubType>
</Compile>
<Compile Include="UnitTests\Stemmer\StemmerFactoryTest.py" />
<Compile Include="UnitTests\Stemmer\StemmerTest.py" />
<Compile Include="UnitTests\StopWordRemover\StopWordRemoverFactoryTest.py">
<SubType>Code</SubType>
</Compile>
<Compile Include="UnitTests\StopWordRemover\StopWordRemoverTest.py">
<SubType>Code</SubType>
</Compile>
</ItemGroup>
<ItemGroup>
<Content Include="UnitTests\Stemmer\StemmerFactoryTest.pyc" />
<Content Include="UnitTests\Stemmer\StemmerTest.pyc" />
</ItemGroup>
<Import Project="$(PtvsTargetsFile)" Condition="Exists($(PtvsTargetsFile))" />
<Import Project="$(MSBuildToolsPath)\Microsoft.Common.targets" Condition="!Exists($(PtvsTargetsFile))" />
Expand Down
40 changes: 40 additions & 0 deletions tests/UnitTests/Dictionary/ArrayDictionaryTest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import unittest
from Sastrawi.Dictionary.ArrayDictionary import ArrayDictionary
from Sastrawi.Dictionary.DictionaryInterface import DictionaryInterface

class Test_ArrayDictionaryTest(unittest.TestCase):
def setUp(self):
self.dictionary = ArrayDictionary()
return super(Test_ArrayDictionaryTest, self).setUp()

def test_addAndContain(self):
self.assertFalse(self.dictionary.contains('word'))
self.dictionary.add('word')
self.assertTrue(self.dictionary.contains('word'))

def test_addCountWord(self):
self.assertEquals(0, self.dictionary.count())
self.dictionary.add('word')
self.assertEquals(1, self.dictionary.count())

def test_addWordIgnoreEmptyString(self):
self.assertEquals(0, self.dictionary.count())
self.dictionary.add('')
self.assertEquals(0, self.dictionary.count())

def test_addWords(self):
words = ['word1', 'word2']
self.dictionary.addWords(words)
self.assertEquals(2, self.dictionary.count())
self.assertTrue(self.dictionary.contains('word1'))
self.assertTrue(self.dictionary.contains('word2'))

def test_constructorPreserveWords(self):
words = ['word1', 'word2']
dictionary = ArrayDictionary(words)
self.assertEquals(2, dictionary.count())
self.assertTrue(dictionary.contains('word1'))
self.assertTrue(dictionary.contains('word2'))

if __name__ == '__main__':
unittest.main()
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import unittest
from Sastrawi.Morphology.Disambiguator.DisambiguatorPrefixRule1 import DisambiguatorPrefixRule1a, DisambiguatorPrefixRule1b

class Test_DisambiguatorPrefixRule1Test(unittest.TestCase):
def setUp(self):
self.subject1a = DisambiguatorPrefixRule1a()
self.subject1b = DisambiguatorPrefixRule1b()
return super(Test_DisambiguatorPrefixRule1Test, self).setUp()

def test_disambiguate1a(self):
self.assertEquals('ia-ia', self.subject1a.disambiguate('beria-ia'))
self.assertIsNone(self.subject1a.disambiguate('berlari'))

def test_disambiguate1b(self):
self.assertEquals('rakit', self.subject1b.disambiguate('berakit'))
self.assertIsNone(self.subject1b.disambiguate('bertabur'))

if __name__ == '__main__':
unittest.main()
22 changes: 22 additions & 0 deletions tests/UnitTests/Morphology/InvalidAffixPairSpecificationTest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import unittest
from Sastrawi.Morphology.InvalidAffixPairSpecification import InvalidAffixPairSpecification

class Test_InvalidAffixPairSpecificationTest(unittest.TestCase):
def setUp(self):
self.specification = InvalidAffixPairSpecification()
return super(Test_InvalidAffixPairSpecificationTest, self).setUp()

def test_containsInvalidAffixPair(self):
self.assertFalse(self.specification.isSatisfiedBy('memberikan'))
self.assertFalse(self.specification.isSatisfiedBy('ketahui'))

self.assertTrue(self.specification.isSatisfiedBy('berjatuhi'))
self.assertTrue(self.specification.isSatisfiedBy('dipukulan'))
self.assertTrue(self.specification.isSatisfiedBy('ketiduri'))
self.assertTrue(self.specification.isSatisfiedBy('ketidurkan'))
self.assertTrue(self.specification.isSatisfiedBy('menduaan'))
self.assertTrue(self.specification.isSatisfiedBy('terduaan'))
self.assertTrue(self.specification.isSatisfiedBy('perkataan'))

if __name__ == '__main__':
unittest.main()
31 changes: 31 additions & 0 deletions tests/UnitTests/Stemmer/StemmerFactoryTest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import unittest
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory
from Sastrawi.Stemmer.Stemmer import Stemmer

class Test_StemmerFactoryTest(unittest.TestCase):
def setUp(self):
self.factory = StemmerFactory()
return super(Test_StemmerFactoryTest, self).setUp()

def test_createStemmerReturnStemmer(self):
stemmer = self.factory.createStemmer()
self.assertIsNotNone(stemmer)
#self.assertIsInstance(stemmer, Stemmer)

def test_fungsional(self):
factory = StemmerFactory()
stemmer = factory.createStemmer()

sentence = 'malaikat-malaikat-Nya'
expected = 'malaikat'
output = stemmer.stem(sentence)

if output != expected:
raise AssertionError(str.format('output is {} instead of {}', output, expected))

def test_getWordsFromFile(self):
factory = StemmerFactory()
factory.getWordsFromFile()

if __name__ == '__main__':
unittest.main()
28 changes: 28 additions & 0 deletions tests/UnitTests/Stemmer/StemmerTest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import unittest
from Sastrawi.Dictionary.ArrayDictionary import ArrayDictionary
from Sastrawi.Stemmer.Stemmer import Stemmer

class Test_StemmerTest(unittest.TestCase):
def setUp(self):
self.dictionary = ArrayDictionary(['beri'])
self.stemmer = Stemmer(self.dictionary)

def test_StemmerImplementsStemmerInterface(self):
self.assertIsInstance(self.stemmer, Stemmer)

def test_StemReturnImmediatelyOnShortWord(self):
"""Don't stem such a short word (three or fewer characters)"""
self.assertEquals('mei', self.stemmer.stem('mei'))
self.assertEquals('bui', self.stemmer.stem('bui'))

def test_StemReturnImmediatelyIfFoundOnDictionary(self):
"""To prevent overstemming : nilai could have been overstemmed to nila
if we don't lookup against the dictionary
"""
self.stemmer.getDictionary().add('nila')
self.assertEquals('nila', self.stemmer.stem('nilai'))
self.stemmer.getDictionary().add('nilai')
self.assertEquals('nilai', self.stemmer.stem('nilai'))

if __name__ == '__main__':
unittest.main()
14 changes: 14 additions & 0 deletions tests/UnitTests/StopWordRemover/StopWordRemoverFactoryTest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import unittest
from Sastrawi.StopWordRemover.StopWordRemoverFactory import StopWordRemoverFactory
from Sastrawi.StopWordRemover.StopWordRemover import StopWordRemover

class Test_StopWordRemoverFactoryTest(unittest.TestCase):
def setUp(self):
self.factory = StopWordRemoverFactory()
return super(Test_StopWordRemoverFactoryTest, self).setUp()

def test_createStopWordRemover(self):
self.assertIsInstance(self.factory.createStopWordRemover(), StopWordRemover)

if __name__ == '__main__':
unittest.main()
19 changes: 19 additions & 0 deletions tests/UnitTests/StopWordRemover/StopWordRemoverTest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import unittest
from Sastrawi.Dictionary.ArrayDictionary import ArrayDictionary
from Sastrawi.StopWordRemover.StopWordRemover import StopWordRemover

class Test_StopWordRemoverTest(unittest.TestCase):
def setUp(self):
self.dictionary = ArrayDictionary(['di', 'ke'])
self.stopWordRemover = StopWordRemover(self.dictionary)
return super(Test_StopWordRemoverTest, self).setUp()

def test_getDictionaryPreserveInstance(self):
self.assertEqual(self.dictionary, self.stopWordRemover.getDictionary())

def test_removeStopWord(self):
self.assertEquals('pergi sekolah', self.stopWordRemover.remove('pergi ke sekolah'))
self.assertEquals('makan rumah', self.stopWordRemover.remove('makan di rumah'))

if __name__ == '__main__':
unittest.main()

0 comments on commit 74cd51c

Please sign in to comment.