Skip to content

Commit

Permalink
Fix singularizer issues and add test cases
Browse files Browse the repository at this point in the history
  • Loading branch information
hasathcharu committed Mar 1, 2024
1 parent 3e8e999 commit 65c5f05
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 34 deletions.
56 changes: 30 additions & 26 deletions persist-cli/src/main/java/io/ballerina/persist/inflector/Rules.java
Original file line number Diff line number Diff line change
Expand Up @@ -85,9 +85,13 @@ private Rules() {}
{"pickaxe", "pickaxes"},
{"passerby", "passersby"},
{"canvas", "canvases"},
{"status", "statuses"}
{"mango", "mangoes"}
});

static final String[][] SINGULAR_IRREGULAR_RULES = {
{"have", "have"}
};

// Pluralization rules.
static final String[][] PLURALIZATION_RULES = (new String[][]{
{"[^\u0000-\u007F]$", "$0"},
Expand Down Expand Up @@ -120,33 +124,32 @@ private Rules() {}
});

static final String[][] SINGULARIZATION_RULES = (new String[][]{
{"(s|si|ni)([aeiouy])s$", "$1$2"},
{"([^aeiou])ese$", "$1"},
{"(ax|test)es$", "$1is"},
{"(alias|[^aou]us|t[lm]as|gas|ris)es$", "$1"},
{"(e[mn]u)s$", "$1"},
{"([^l]ias|[aeiou]las|[ejzr]as|[iu]am)s$", "$1"},
{"(alumn|syllab|vir|radi|nucle|fung|cact|stimul|termin|bacill|foc|uter|loc|strat)i$", "$1us"},
{"(alumn|alg|vertebr)ae$", "$1a"},
{"(seraph|cherub)im$", "$1"},
{"(her|at|gr)oes$", "$1o"},
{"(agend|addend|millenni|dat|extrem|bacteri|desiderat|strat|candelabr|errat|ov|" +
"symposi|curricul|automat|quor)a$", "$1um"},
{"(apheli|hyperbat|periheli|asyndet|noumen|phenomen|criteri|organ|prolegomen|" +
"hedr|automat)a$", "$1on"},
{"ses$", "sis"},
{"(?:(kni|wi|li)fe|(ar|l|ea|eo|oa|hoo)f)ves$", "$1$2"},
{"([^aeiouy]|qu)ies$", "$1y"},
{"([^ch][ieo][ln])eys$", "$1ey"},
{"(x|ch|ss|sh|zz)es$", "$1"},
{"(matr|cod|mur|sil|vert|ind|append)ices$", "$1ix"},
{"(ss)$", "$1"},
{"(wi|kni|(?:after|half|high|low|mid|non|night|[^\\w]|^)li)ves$", "$1fe"},
{"(ar|(?:wo|[ae])l|[eo][ao])ves$", "$1f"},
{"ies$", "y"},
{"(dg|ss|ois|lk|ok|wn|mb|th|ch|ec|oal|is|ck|ix|sser|ts|wb)ies$", "$1ie"},
{"\b(l|(?:neck|cross|hog|aun)?t|coll|faer|food|gen|goon|group|hipp|junk|vegg|(?:pork)?p|charl|calor|cut" +
")ies$", "$1ie"},
{"\b(mon|smil)ies$", "$1ey"},
{"\b((?:tit)?m|l)ice$", "$1ouse"},
{"(pe)ople$", "$1rson"},
{"(seraph|cherub)im$", "$1"},
{"(x|ch|ss|sh|zz|tto|go|cho|alias|[^aou]us|t[lm]as|gas|(?:her|at|gr)o|[aeiou]ris)(?:es)?$", "$1"},
{"(analy|diagno|parenthe|progno|synop|the|empha|cri|ne)(?:sis|ses)$", "$1sis"},
{"(movie|twelve|abuse|e[mn]u)s$", "$1"},
{"(test)(?:is|es)$", "$1is"},
{"(alumn|syllab|vir|radi|nucle|fung|cact|stimul|termin|bacill|foc|uter|loc|strat)(?:us|i)$", "$1us"},
{"(agend|addend|millenni|dat|extrem|bacteri|desiderat|strat|candelabr|errat|ov|symposi|curricul|" +
"quor)a$", "$1um"},
{"(apheli|hyperbat|periheli|asyndet|noumen|phenomen|criteri|organ|prolegomen|hedr|automat)a$", "$1on"},
{"(alumn|alg|vertebr)ae$", "$1a"},
{"(cod|mur|sil|vert|ind)ices$", "$1ex"},
{"(matr|append)ices$", "$1ix"},
{"(pe)(rson|ople)$", "$1rson"},
{"(child)ren$", "$1"},
{"eaux$", "$0"},
{"(eau)x?$", "$1"},
{"men$", "man"},
{"you", "thou"},
{"s$", ""}
{"s$", ""},
});


Expand All @@ -171,6 +174,7 @@ private Rules() {}
"^[a-z]*measles$",
"^[a-z]*o[iu]s$", // "carnivorous"
"^[a-z]*pox$", // "chickpox", "smallpox"
"^[a-z]*sheep$"
"^[a-z]*sheep$",
"species"
);
}
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,14 @@ public static String singularize(String word) {
return word;
}
}
for (String[] irregularRule: Rules.SINGULAR_IRREGULAR_RULES) {
if (irregularRule[0].equals(word)) {
return word;
}
if (irregularRule[1].equals(word)) {
return irregularRule[0];
}
}
for (String[] irregularRule: Rules.IRREGULAR_RULES) {
if (irregularRule[0].equals(word)) {
return word;
Expand Down
43 changes: 35 additions & 8 deletions persist-cli/src/test/java/io/ballerina/persist/UnitTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
* A unit test class for singular to plural functions.
*/
public class UnitTest {
List<String> singularWords = Arrays.asList(
List<String> singularInput = Arrays.asList(
"boy", "girl", "bird", "cod", "commerce", "quiz", "lemma", "dingo", "echo", "yes",
"tornado", "have", "dingo", "bus", "child", "mom", "dad", "bottle", "sticker", "moss",
"wolf", "wife", "life", "leaf", "woman", "mouse", "goose", "baby", "toy", "kidney",
Expand All @@ -43,7 +43,7 @@ public class UnitTest {
"medium", "bacterium", "kangaroo", "cherry", "sky", "monkey", "berry", "video", "studio", "mango",
"tornado", "tuxedo", "volcano", "house", "sister", "item", "thing", "computer", "flower", "roof", "has"
);
List<String> pluralWords = Arrays.asList(
List<String> pluralOutput = Arrays.asList(
"boys", "girls", "birds", "cod", "commerce", "quizzes", "lemmata", "dingoes", "echoes", "yeses",
"tornadoes", "have", "dingoes", "buses", "children", "moms", "dads", "bottles", "stickers", "mosses",
"wolves", "wives", "lives", "leaves", "women", "mouses", "geese", "babies", "toys", "kidneys",
Expand All @@ -53,20 +53,47 @@ public class UnitTest {
"cactuses", "focuses", "series", "species", "mouses", "feet", "nannies", "studies", "foxes", "pouches",
"brushes", "quizzes", "roofs", "trucks", "bugs", "pens", "books", "vegetables", "chairs", "bacteria",
"mediums", "bacteria", "kangaroos", "cherries", "skies", "monkeys", "berries", "videos", "studios",
"mangos", "tornadoes", "tuxedos", "volcanoes", "houses", "sisters", "items", "things", "computers",
"mangoes", "tornadoes", "tuxedos", "volcanoes", "houses", "sisters", "items", "things", "computers",
"flowers", "roofs", "have"
);

List<String> pluralInput = Arrays.asList(
"boys", "girls", "birds", "cod", "commerce", "quizzes", "lemmata", "dingoes", "echoes", "yeses",
"tornadoes", "have", "dingoes", "buses", "children", "moms", "dads", "bottles", "stickers", "mosses",
"wolves", "wives", "lives", "leaves", "women", "mouses", "geese", "babies", "toys", "kidneys",
"potatoes", "memos", "stereos", "sheep", "deer", "series", "species", "windows", "stickers", "desks",
"pencils", "cups", "milks", "choices", "boxes", "thieves", "armies", "women", "friends", "daisies",
"bosses", "marshes", "classes", "lunches", "beliefs", "chefs", "cities", "rays", "photos", "pianos",
"cactuses", "focuses", "series", "species", "mouses", "feet", "nannies", "studies", "foxes", "pouches",
"brushes", "quizzes", "roofs", "trucks", "bugs", "pens", "books", "vegetables", "chairs", "bacteria",
"mediums", "bacteria", "kangaroos", "cherries", "skies", "monkeys", "berries", "videos", "studios",
"mangoes", "tornadoes", "tuxedos", "volcanoes", "houses", "sisters", "items", "things", "computers",
"flowers", "roofs", "have"
);

List<String> singularOutput = Arrays.asList(
"boy", "girl", "bird", "cod", "commerce", "quiz", "lemma", "dingo", "echo", "yes",
"tornado", "have", "dingo", "bus", "child", "mom", "dad", "bottle", "sticker", "moss",
"wolf", "wife", "life", "leaf", "woman", "mouse", "goose", "baby", "toy", "kidney",
"potato", "memo", "stereo", "sheep", "deer", "series", "species", "window", "sticker", "desk",
"pencil", "cup", "milk", "choice", "box", "thief", "army", "woman", "friend", "daisy",
"boss", "marsh", "class", "lunch", "belief", "chef", "city", "ray", "photo", "piano",
"cactus", "focus", "series", "species", "mouse", "foot", "nanny", "study", "fox", "pouch",
"brush", "quiz", "roof", "truck", "bug", "pen", "book", "vegetable", "chair", "bacterium",
"medium", "bacterium", "kangaroo", "cherry", "sky", "monkey", "berry", "video", "studio", "mango",
"tornado", "tuxedo", "volcano", "house", "sister", "item", "thing", "computer", "flower", "roof", "have"
);
@Test
public void testSingularToPlural() {
long startTime = System.nanoTime();
List<String> outputs = singularWords.stream().map(Pluralizer::pluralize).toList();
assertResults(startTime, outputs, pluralWords);
List<String> outputs = singularInput.stream().map(Pluralizer::pluralize).toList();
assertResults(startTime, outputs, pluralOutput);
}
@Test(enabled = false)
@Test(enabled = true)
public void testPluralToSingular() {
long startTime = System.nanoTime();
List<String> outputs = pluralWords.stream().map(Singularizer::singularize).toList();
assertResults(startTime, outputs, singularWords);
List<String> outputs = pluralInput.stream().map(Singularizer::singularize).toList();
assertResults(startTime, outputs, singularOutput);
}
private void assertResults(long startTime, List<String> outputs, List<String> expectedOutputs) {
PrintStream print = System.out;
Expand Down

0 comments on commit 65c5f05

Please sign in to comment.