-
Notifications
You must be signed in to change notification settings - Fork 111
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
on progress porting disambiguator tests
- Loading branch information
Showing
12 changed files
with
272 additions
and
7 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
import unittest | ||
from Sastrawi.Dictionary.ArrayDictionary import ArrayDictionary | ||
from Sastrawi.Stemmer.Stemmer import Stemmer | ||
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory | ||
|
||
class Test_StemmerTest(unittest.TestCase): | ||
def setUp(self): | ||
stemmerFactory = StemmerFactory() | ||
self.stemmer = stemmerFactory.createStemmer() | ||
return super(Test_StemmerTest, self).setUp() | ||
|
||
def getTestData(self): | ||
data = [] | ||
|
||
data.append(['kebijakan', 'bijak']) | ||
#//data.append(['karyawan', 'karya']) | ||
#//data.append(['karyawati', 'karya']) | ||
data.append(['kinerja', 'kerja']) | ||
data.append(['mengandung', 'kandung']) | ||
data.append(['memakan', 'makan']) | ||
data.append(['asean', 'asean']) | ||
data.append(['pemandu', 'pandu']) | ||
data.append(['mengurangi', 'kurang']) | ||
data.append(['pemerintah', 'perintah']) | ||
data.append(['mengabulkan', 'kabul']) | ||
data.append(['mengupas', 'kupas']) | ||
data.append(['keterpurukan', 'puruk']) | ||
data.append(['ditemukan', 'temu']) | ||
data.append(['mengerti', 'erti']) | ||
data.append(['kebon', 'kebon']) | ||
data.append(['terdepan', 'depan']) | ||
data.append(['mengikis', 'kikis']) | ||
data.append(['kedudukan', 'duduk']) | ||
data.append(['menekan', 'tekan']) | ||
data.append(['perusakan', 'rusa']) # overstemming, it's better than perusa | ||
data.append(['ditemui', 'temu']) | ||
data.append(['di', 'di']) | ||
data.append(['mengalahkan', 'kalah']) | ||
data.append(['melewati', 'lewat']) | ||
data.append(['bernafas', 'nafas']) | ||
data.append(['meniru-niru', 'tiru']) | ||
data.append(['memanggil-manggil', 'panggil']) | ||
data.append(['menyebut-nyebut', 'sebut']) | ||
data.append(['menganga', 'nganga']) | ||
data.append(['besaran', 'besar']) | ||
data.append(['terhenyak', 'henyak']) | ||
data.append(['mengokohkan', 'kokoh']) | ||
data.append(['melainkan', 'lain']) | ||
data.append(['kuasa-Mu', 'kuasa']) | ||
data.append(['malaikat-malaikat-Nya', 'malaikat']) | ||
data.append(['nikmat-Ku', 'nikmat']) | ||
|
||
return data | ||
|
||
def tryStem(self, word, stem): | ||
self.assertEquals(stem, self.stemmer.stem(word)) | ||
|
||
def test_stem(self): | ||
data = self.getTestData() | ||
for d in data: | ||
self.tryStem(d[0], d[1]) | ||
|
||
if __name__ == '__main__': | ||
unittest.main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
import unittest | ||
from Sastrawi.Dictionary.ArrayDictionary import ArrayDictionary | ||
from Sastrawi.Dictionary.DictionaryInterface import DictionaryInterface | ||
|
||
class Test_ArrayDictionaryTest(unittest.TestCase): | ||
def setUp(self): | ||
self.dictionary = ArrayDictionary() | ||
return super(Test_ArrayDictionaryTest, self).setUp() | ||
|
||
def test_addAndContain(self): | ||
self.assertFalse(self.dictionary.contains('word')) | ||
self.dictionary.add('word') | ||
self.assertTrue(self.dictionary.contains('word')) | ||
|
||
def test_addCountWord(self): | ||
self.assertEquals(0, self.dictionary.count()) | ||
self.dictionary.add('word') | ||
self.assertEquals(1, self.dictionary.count()) | ||
|
||
def test_addWordIgnoreEmptyString(self): | ||
self.assertEquals(0, self.dictionary.count()) | ||
self.dictionary.add('') | ||
self.assertEquals(0, self.dictionary.count()) | ||
|
||
def test_addWords(self): | ||
words = ['word1', 'word2'] | ||
self.dictionary.addWords(words) | ||
self.assertEquals(2, self.dictionary.count()) | ||
self.assertTrue(self.dictionary.contains('word1')) | ||
self.assertTrue(self.dictionary.contains('word2')) | ||
|
||
def test_constructorPreserveWords(self): | ||
words = ['word1', 'word2'] | ||
dictionary = ArrayDictionary(words) | ||
self.assertEquals(2, dictionary.count()) | ||
self.assertTrue(dictionary.contains('word1')) | ||
self.assertTrue(dictionary.contains('word2')) | ||
|
||
if __name__ == '__main__': | ||
unittest.main() |
19 changes: 19 additions & 0 deletions
19
tests/UnitTests/Morphology/Disambiguator/DisambiguatorPrefixRule1Test.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
import unittest | ||
from Sastrawi.Morphology.Disambiguator.DisambiguatorPrefixRule1 import DisambiguatorPrefixRule1a, DisambiguatorPrefixRule1b | ||
|
||
class Test_DisambiguatorPrefixRule1Test(unittest.TestCase): | ||
def setUp(self): | ||
self.subject1a = DisambiguatorPrefixRule1a() | ||
self.subject1b = DisambiguatorPrefixRule1b() | ||
return super(Test_DisambiguatorPrefixRule1Test, self).setUp() | ||
|
||
def test_disambiguate1a(self): | ||
self.assertEquals('ia-ia', self.subject1a.disambiguate('beria-ia')) | ||
self.assertIsNone(self.subject1a.disambiguate('berlari')) | ||
|
||
def test_disambiguate1b(self): | ||
self.assertEquals('rakit', self.subject1b.disambiguate('berakit')) | ||
self.assertIsNone(self.subject1b.disambiguate('bertabur')) | ||
|
||
if __name__ == '__main__': | ||
unittest.main() |
22 changes: 22 additions & 0 deletions
22
tests/UnitTests/Morphology/InvalidAffixPairSpecificationTest.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
import unittest | ||
from Sastrawi.Morphology.InvalidAffixPairSpecification import InvalidAffixPairSpecification | ||
|
||
class Test_InvalidAffixPairSpecificationTest(unittest.TestCase): | ||
def setUp(self): | ||
self.specification = InvalidAffixPairSpecification() | ||
return super(Test_InvalidAffixPairSpecificationTest, self).setUp() | ||
|
||
def test_containsInvalidAffixPair(self): | ||
self.assertFalse(self.specification.isSatisfiedBy('memberikan')) | ||
self.assertFalse(self.specification.isSatisfiedBy('ketahui')) | ||
|
||
self.assertTrue(self.specification.isSatisfiedBy('berjatuhi')) | ||
self.assertTrue(self.specification.isSatisfiedBy('dipukulan')) | ||
self.assertTrue(self.specification.isSatisfiedBy('ketiduri')) | ||
self.assertTrue(self.specification.isSatisfiedBy('ketidurkan')) | ||
self.assertTrue(self.specification.isSatisfiedBy('menduaan')) | ||
self.assertTrue(self.specification.isSatisfiedBy('terduaan')) | ||
self.assertTrue(self.specification.isSatisfiedBy('perkataan')) | ||
|
||
if __name__ == '__main__': | ||
unittest.main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
import unittest | ||
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory | ||
from Sastrawi.Stemmer.Stemmer import Stemmer | ||
|
||
class Test_StemmerFactoryTest(unittest.TestCase): | ||
def setUp(self): | ||
self.factory = StemmerFactory() | ||
return super(Test_StemmerFactoryTest, self).setUp() | ||
|
||
def test_createStemmerReturnStemmer(self): | ||
stemmer = self.factory.createStemmer() | ||
self.assertIsNotNone(stemmer) | ||
#self.assertIsInstance(stemmer, Stemmer) | ||
|
||
def test_fungsional(self): | ||
factory = StemmerFactory() | ||
stemmer = factory.createStemmer() | ||
|
||
sentence = 'malaikat-malaikat-Nya' | ||
expected = 'malaikat' | ||
output = stemmer.stem(sentence) | ||
|
||
if output != expected: | ||
raise AssertionError(str.format('output is {} instead of {}', output, expected)) | ||
|
||
def test_getWordsFromFile(self): | ||
factory = StemmerFactory() | ||
factory.getWordsFromFile() | ||
|
||
if __name__ == '__main__': | ||
unittest.main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
import unittest | ||
from Sastrawi.Dictionary.ArrayDictionary import ArrayDictionary | ||
from Sastrawi.Stemmer.Stemmer import Stemmer | ||
|
||
class Test_StemmerTest(unittest.TestCase): | ||
def setUp(self): | ||
self.dictionary = ArrayDictionary(['beri']) | ||
self.stemmer = Stemmer(self.dictionary) | ||
|
||
def test_StemmerImplementsStemmerInterface(self): | ||
self.assertIsInstance(self.stemmer, Stemmer) | ||
|
||
def test_StemReturnImmediatelyOnShortWord(self): | ||
"""Don't stem such a short word (three or fewer characters)""" | ||
self.assertEquals('mei', self.stemmer.stem('mei')) | ||
self.assertEquals('bui', self.stemmer.stem('bui')) | ||
|
||
def test_StemReturnImmediatelyIfFoundOnDictionary(self): | ||
"""To prevent overstemming : nilai could have been overstemmed to nila | ||
if we don't lookup against the dictionary | ||
""" | ||
self.stemmer.getDictionary().add('nila') | ||
self.assertEquals('nila', self.stemmer.stem('nilai')) | ||
self.stemmer.getDictionary().add('nilai') | ||
self.assertEquals('nilai', self.stemmer.stem('nilai')) | ||
|
||
if __name__ == '__main__': | ||
unittest.main() |
14 changes: 14 additions & 0 deletions
14
tests/UnitTests/StopWordRemover/StopWordRemoverFactoryTest.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
import unittest | ||
from Sastrawi.StopWordRemover.StopWordRemoverFactory import StopWordRemoverFactory | ||
from Sastrawi.StopWordRemover.StopWordRemover import StopWordRemover | ||
|
||
class Test_StopWordRemoverFactoryTest(unittest.TestCase): | ||
def setUp(self): | ||
self.factory = StopWordRemoverFactory() | ||
return super(Test_StopWordRemoverFactoryTest, self).setUp() | ||
|
||
def test_createStopWordRemover(self): | ||
self.assertIsInstance(self.factory.createStopWordRemover(), StopWordRemover) | ||
|
||
if __name__ == '__main__': | ||
unittest.main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
import unittest | ||
from Sastrawi.Dictionary.ArrayDictionary import ArrayDictionary | ||
from Sastrawi.StopWordRemover.StopWordRemover import StopWordRemover | ||
|
||
class Test_StopWordRemoverTest(unittest.TestCase): | ||
def setUp(self): | ||
self.dictionary = ArrayDictionary(['di', 'ke']) | ||
self.stopWordRemover = StopWordRemover(self.dictionary) | ||
return super(Test_StopWordRemoverTest, self).setUp() | ||
|
||
def test_getDictionaryPreserveInstance(self): | ||
self.assertEqual(self.dictionary, self.stopWordRemover.getDictionary()) | ||
|
||
def test_removeStopWord(self): | ||
self.assertEquals('pergi sekolah', self.stopWordRemover.remove('pergi ke sekolah')) | ||
self.assertEquals('makan rumah', self.stopWordRemover.remove('makan di rumah')) | ||
|
||
if __name__ == '__main__': | ||
unittest.main() |