Skip to content

Commit

Permalink
Merge pull request #7 from Seen-Arabic:@Feature/Remove-Arabic-Affixes
Browse files Browse the repository at this point in the history
Remove Arabic Affixes Fixes #3
  • Loading branch information
MohamedAmgd authored Nov 30, 2023
2 parents 7fde65c + 959685c commit 4e7d8d1
Show file tree
Hide file tree
Showing 3 changed files with 173 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,17 @@ public static String tashfeer(String text) {
return newSentence.toString().trim();
}

/**
* Word to letters.
*
* For example
* text: "هذه جملة"
* to
* resulting string: "هاء ذال هاء جيم ميم لام تاء_مربوطة"
*
* @param word The input string {@link String}
* @return The resulting string {@link String}
*/
public static String wordToLetters(String word) {
StringBuilder newWord = new StringBuilder();

Expand All @@ -113,6 +124,37 @@ public static String wordToLetters(String word) {
return newWord.toString().trim();
}

/**
* RemoveArabicAffixes
*
* Removes predefined affixes (prefixes and suffixes) from an Arabic word if it
* starts or ends with those affixes.
* This function is designed specifically for processing Arabic text, where
* certain affixes might need to be removed
* for linguistic, stylistic, or morphological reasons.
*
* @param word - The Arabic word from which the affixes are to be
* removed. {@link String}
* @return The word after removing any matching affixes. Returns the
* original word if no affix matches are found. {@link String}
*/
public static String removeArabicAffixes(String word) {
if (Data.ARABIC_PREFIXES.contains(word.substring(0, 2))) {
// For: ALEF & LAM
word = word.substring(2);
} else if (Data.ARABIC_PREFIXES.contains(word.substring(0, 1))) {
word = word.substring(1);
}

if (Data.ARABIC_SUFFIXES.contains(word.substring(word.length() - 2))) {
word = word.substring(0, word.length() - 2);
} else if (Data.ARABIC_SUFFIXES.contains(word.substring(word.length() - 1))) {
word = word.substring(0, word.length() - 1);
}

return word.trim();
}

private static String handleNoonIssue(String text) {
String arabicLetters = String.join("", Data.LETTERS_DICT.keySet()) + "ـ";
String regex = Data.NOON + "(" + "?=[^" + arabicLetters + "]" + ")|" + Data.NOON + "\\z";
Expand Down
14 changes: 14 additions & 0 deletions src/main/java/io/github/seen_arabic/arabic_services/Data.java
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,20 @@ class Data {
static final List<Character> YAA = Arrays.asList('ى', 'ئ');
static final List<Character> WAW = Arrays.asList('ؤ');

/**
* List of common Arabic prefixes. These prefixes are used in the
* removeArabicPrefix method
* to identify and remove them from the beginning of Arabic words.
*/
public static final List<String> ARABIC_PREFIXES = Arrays.asList("أ", "ا", "إ", "ال", "ي", "ت", "ن", "ب");

/**
* List of common Arabic suffixes. These suffixes might be used in other
* functions
* to identify and manipulate them at the end of Arabic words.
*/
public static final List<String> ARABIC_SUFFIXES = Arrays.asList("ة", "ه", "ي", "ى", "ية", "ين", "ون", "هم");

static final String TEXT_NULL_MESSAGE = "text must be not null";

static final Map<String, String> LETTERS_DICT;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -124,4 +124,121 @@ private void itShouldHandleInputWithSpaces() {
String result = ArabicServices.wordToLetters(input);
assertEquals("هاء ذال هاء جيم ميم لام تاء_مربوطة ألف خاء راء ألف_لينة", result);
}

@Test
public void testRemoveArabicAffixes() {
itShouldRemoveAlfPrefixFromAWord();
itShouldRemoveAlefPrefixAndTaaSuffixFromAWord();
itShouldRemoveAlefHamzaBelowPrefixFromAWord();
itShouldRemoveAlPrefixFromAWord();
itShouldRemoveYaPrefixFromAWord();
itShouldRemoveTaPrefixFromAWord();
itShouldRemoveNunPrefixFromAWord();
itShouldRemoveBaPrefixFromAWord();
itShouldRemoveTaSuffixFromAWord();
itShouldRemoveHaSuffixFromAWord();
itShouldRemoveYaSuffixFromAWord();
itShouldRemoveAlefMaksuraSuffixFromAWord();
itShouldRemoveYaAlefSuffixFromAWord();
itShouldRemoveYaNunSuffixFromAWord();
itShouldRemoveWawNunSuffixFromAWord();
itShouldRemoveHumSuffixFromAWord();
}

private void itShouldRemoveAlfPrefixFromAWord() {
String word = "أمل";
String result = ArabicServices.removeArabicAffixes(word);
assertEquals("مل", result);
}

private void itShouldRemoveAlefPrefixAndTaaSuffixFromAWord() {
String word = "امرأة";
String result = ArabicServices.removeArabicAffixes(word);
assertEquals("مرأ", result);
}

private void itShouldRemoveAlefHamzaBelowPrefixFromAWord() {
String word = "إنسان";
String result = ArabicServices.removeArabicAffixes(word);
assertEquals("نسان", result);
}

private void itShouldRemoveAlPrefixFromAWord() {
String word = "الكتاب";
String result = ArabicServices.removeArabicAffixes(word);
assertEquals("كتاب", result);
}

private void itShouldRemoveYaPrefixFromAWord() {
String word = "يوم";
String result = ArabicServices.removeArabicAffixes(word);
assertEquals("وم", result);
}

private void itShouldRemoveTaPrefixFromAWord() {
String word = "تفاح";
String result = ArabicServices.removeArabicAffixes(word);
assertEquals("فاح", result);
}

private void itShouldRemoveNunPrefixFromAWord() {
String word = "نجم";
String result = ArabicServices.removeArabicAffixes(word);
assertEquals("جم", result);
}

private void itShouldRemoveBaPrefixFromAWord() {
String word = "بيت";
String result = ArabicServices.removeArabicAffixes(word);
assertEquals("يت", result);
}

private void itShouldRemoveTaSuffixFromAWord() {
String word = "كتابة";
String result = ArabicServices.removeArabicAffixes(word);
assertEquals("كتاب", result);
}

private void itShouldRemoveHaSuffixFromAWord() {
String word = "جديه";
String result = ArabicServices.removeArabicAffixes(word);
assertEquals("جدي", result);
}

private void itShouldRemoveYaSuffixFromAWord() {
String word = "ذهبي";
String result = ArabicServices.removeArabicAffixes(word);
assertEquals("ذهب", result);
}

private void itShouldRemoveAlefMaksuraSuffixFromAWord() {
String word = "منزلي";
String result = ArabicServices.removeArabicAffixes(word);
assertEquals("منزل", result);
}

private void itShouldRemoveYaAlefSuffixFromAWord() {
String word = "علمية";
String result = ArabicServices.removeArabicAffixes(word);
assertEquals("علم", result);
}

private void itShouldRemoveYaNunSuffixFromAWord() {
String word = "موظفين";
String result = ArabicServices.removeArabicAffixes(word);
assertEquals("موظف", result);
}

private void itShouldRemoveWawNunSuffixFromAWord() {
String word = "موظفون";
String result = ArabicServices.removeArabicAffixes(word);
assertEquals("موظف", result);
}

private void itShouldRemoveHumSuffixFromAWord() {
String word = "طلابهم";
String result = ArabicServices.removeArabicAffixes(word);
assertEquals("طلاب", result);
}

}

0 comments on commit 4e7d8d1

Please sign in to comment.