From fb536aee2099063d91f730d3845c546f8bdaba89 Mon Sep 17 00:00:00 2001 From: kariminf Date: Sun, 18 Aug 2019 18:54:07 +0100 Subject: [PATCH] version 3.0.0 --- .settings/org.eclipse.buildship.core.prefs | 4 +- .../as/postProcess/extraction/Extractor.java | 182 ------------------ .../java/kariminf/testing/TestPreProcess.java | 53 +++++ src/main/java/kariminf/testing/TestTCC.java | 36 ++++ .../confs/multiling/ssfgc2017/GCExample.java | 10 +- 5 files changed, 97 insertions(+), 188 deletions(-) create mode 100644 src/main/java/kariminf/testing/TestPreProcess.java create mode 100644 src/main/java/kariminf/testing/TestTCC.java diff --git a/.settings/org.eclipse.buildship.core.prefs b/.settings/org.eclipse.buildship.core.prefs index 2465e9f..9640f07 100644 --- a/.settings/org.eclipse.buildship.core.prefs +++ b/.settings/org.eclipse.buildship.core.prefs @@ -1,8 +1,8 @@ -#Sun Mar 04 14:21:45 CET 2018 +#Wed May 01 19:21:05 CET 2019 connection.gradle.distribution=GRADLE_DISTRIBUTION(VERSION(4.6)) override.workspace.settings=true eclipse.preferences.version=1 gradle.user.home= connection.project.dir= -offline.mode=false build.scans.enabled=false +offline.mode=false diff --git a/src/main/java/kariminf/as/postProcess/extraction/Extractor.java b/src/main/java/kariminf/as/postProcess/extraction/Extractor.java index c948375..d9db511 100644 --- a/src/main/java/kariminf/as/postProcess/extraction/Extractor.java +++ b/src/main/java/kariminf/as/postProcess/extraction/Extractor.java @@ -19,13 +19,9 @@ package kariminf.as.postProcess.extraction; import java.util.ArrayList; -import java.util.HashSet; import java.util.List; -import java.util.Set; - import kariminf.as.postProcess.PostProcessor; import kariminf.as.process.Scorer; -import kariminf.as.tools.Tools; @@ -59,184 +55,6 @@ public List getOrder() { } public abstract void reOrder(); - - /** - * Reorders sentences based on a certain relationship between them. - * - * @param sentWords a list contains sentences, where a sentence is a list of words - * @return a list of indexes representing the order of these sentences - */ - public static List reorder(List> sentWords){ - - if (sentWords.size() > 200) - return null; - - //Begin: Creating the matrix of similarities - int size = sentWords.size(); - - double matrix[][] = new double[size][size]; - - for (int i = 0; i < size; i++) - for(int j = 0; j < size; j++){ - double sim = 0.0; - if (i != j) - sim = - Tools.calcSimilarity(sentWords.get(i), sentWords.get(j)); - matrix[i][j] = sim; - } - //End: Creating the matrix of similarities - - //Calculating the first sentence - int firstNode = getFirst(matrix); - - //Using nearest neighbors to find the order for other sentences - return nearestNeighbour(matrix, firstNode); - - } - - - /** - * Reorders a list of elements based on the nearest neighbor - * - * This method uses a matrix of similarities to reorder the elements starting from - * an index; It returns a list of reordered indexes - * - * @param matrix a matrix of similarities between each two elements - * @param startNode start point (index) - * @return a list of indexes representing the order of these elements - */ - public static List nearestNeighbour(double[][] matrix, int startNode){ - - int nodesSize = matrix.length; - - List result = new ArrayList(); - - //Begin: the remaining elements - Set remaining = new HashSet(); - for (int i = 0; i < nodesSize; i++) - if (i != startNode) remaining.add(i); - //End: the remaining elements - - int current = startNode; - result.add(startNode); - - - while (! remaining.isEmpty()){ - double min = Double.MAX_VALUE; - int index = 0; - - //Begin: min similarity between the current element and the remaining - for (int j: remaining){ - if (min > matrix[current][j]){ - min = matrix[current][j]; - index = j; - } - } - //End: min similarity between the current element and the remaining - - //The nearest neighbor will be the current element - current = index; - result.add(index); - remaining.remove(index); - - } - - return result; - } - - - /** - * Search for the first element based on a similarity matrix. - * - * @param matrix a matrix of similarities between each two elements - * @return the index of the element that should be the starting point - */ - public static int getFirst(double matrix[][]){ - - int nodesSize = matrix.length; - - if (nodesSize < 2) - return 0; - - double minVal = Double.MAX_VALUE; - int firstIdx = 0; - int secondIdx = 0; - - //Begin: Look for the least distance between all pairs of elements - for (int i = 0; i < nodesSize; i++){ - for (int j = i + 1; j < nodesSize; j++){ - if (minVal > matrix[i][j]){ - minVal = matrix[i][j]; - firstIdx = i; - secondIdx = j; - } - } - - } - //End: Look for the least distance between all pairs of elements - - double minFirst = Double.MAX_VALUE; - double minSecond = Double.MAX_VALUE; - - //Begin: Look for min distance of firstIdx and secondIdx with other elements - for (int j = 0; j < nodesSize; j++){ - - if (j == firstIdx || j == secondIdx) continue; - - if((j > firstIdx) && (minFirst > matrix[firstIdx][j])) - minFirst = matrix[firstIdx][j]; - - if((j > secondIdx) && (minSecond > matrix[secondIdx][j])) - minSecond = matrix[secondIdx][j]; - } - //Begin: Look for min distance of firstIdx and secondIdx with other elements - - //If the second element has a less distance with others than the first, - //we take it as the first element - if (minSecond > minFirst) - firstIdx = secondIdx; - - return firstIdx; - } - - - /** - * @param args - */ - public static void main(String[] args) { - List> sentWords= new ArrayList>(); - - List words = new ArrayList(); - words.add("a");words.add("b");words.add("c");//words.add("d"); - sentWords.add(words); - - words = new ArrayList(); - words.add("c");words.add("g");words.add("e"); - sentWords.add(words); - - words = new ArrayList(); - words.add("e");words.add("f"); - sentWords.add(words); - - words = new ArrayList(); - words.add("a");words.add("e");words.add("g"); - sentWords.add(words); - - //System.out.println(ReOrderer.reorder(sentWords)); - - double mat[][]={ - {0.0, -0.57, 0.0, -0.33, -0.2, -0.4},//0 - {-0.57, 0.0, 0.0, 0.0, 0.0, 0.0},//1 - {0.0, 0.0, 0.0, -0.4, 0.0, 0.0},//2 - {-0.33, 0.0, -0.4, 0.0, 0.0, 0.0},//3 - {-0.2, 0.0, 0.0, 0.0, 0.0, 0.0},//4 - {-0.4, 0.0, 0.0, 0.0, 0.0, 0.0},//5 - }; - - System.out.println(Extractor.nearestNeighbour(mat, 1)); - - System.out.println(Extractor.getFirst(mat));/**/ - - } } diff --git a/src/main/java/kariminf/testing/TestPreProcess.java b/src/main/java/kariminf/testing/TestPreProcess.java new file mode 100644 index 0000000..e7f416a --- /dev/null +++ b/src/main/java/kariminf/testing/TestPreProcess.java @@ -0,0 +1,53 @@ +package kariminf.testing; + +import java.util.List; + +import kariminf.as.preProcess.PreProcessor; +import kariminf.as.preProcess.StaticPreProcessor; +import kariminf.as.tools.Data; + +public class TestPreProcess { + + public static void main(String[] args) { + + String text = ""; + text += "My name is Karim, and I study informatics at ESI, which is at Algiers, to obtain Magister degree. "; + text += "My research in ESI is about ATS, it is the intersection between IR and NLP. "; + text += "In this research, the main idea is to find relevant sentences using IR technics. "; + text += "The statistical features are the power of IR to find relevancy. "; + text += "AI technics are used, such as learning algorithms to create models for each topic in the input text. "; + + Data data = new Data(); + PreProcessor prep = new StaticPreProcessor("en"); + prep.setData(data); + prep.addText(text); + prep.preProcess(); + + + List sentences = data.getSentences(); + List> sentWords = data.getSentWords(); + List> sim = data.getSentSimilarities(); + + System.out.println(sim); + + + } + + public static Data pp() { + String text = ""; + text += "My name is Karim, and I study informatics at ESI, which is at Algiers, to obtain Magister degree. "; + text += "My research in ESI is about ATS, it is the intersection between IR and NLP. "; + text += "In this research, the main idea is to find relevant sentences using IR technics. "; + text += "The statistical features are the power of IR to find relevancy. "; + text += "AI technics are used, such as learning algorithms to create models for each topic in the input text. "; + + Data data = new Data(); + PreProcessor prep = new StaticPreProcessor("en"); + prep.setData(data); + prep.addText(text); + prep.preProcess(); + + return data; + } + +} diff --git a/src/main/java/kariminf/testing/TestTCC.java b/src/main/java/kariminf/testing/TestTCC.java new file mode 100644 index 0000000..535e0b5 --- /dev/null +++ b/src/main/java/kariminf/testing/TestTCC.java @@ -0,0 +1,36 @@ +package kariminf.testing; + +import java.util.List; +import kariminf.as.process.Scorer; +import kariminf.as.process.tcc.BayesScoreHandler; +import kariminf.as.process.tcc.NaiveCluster; +import kariminf.as.process.tcc.Pos; +import kariminf.as.process.tcc.TFB; +import kariminf.as.tools.Data; + +public class TestTCC { + + public static void main(String[] args) { + Data data = TestPreProcess.pp(); + + NaiveCluster nc = new NaiveCluster(0.25); + BayesScoreHandler bsh = new BayesScoreHandler(nc); + Scorer s = Scorer.create(bsh); + s.setData(data);//calls bsh.setData(data) + + bsh.addFeature(new TFB());//calls TFB.setData(data); + bsh.addFeature(new Pos());//calls Pos.setData(data) + + + bsh.train();//must train before scoring + s.scoreUnits(); + + List order = s.getOrdered(); + double sent1score = s.getScore(1);//sentence 1 score + + System.out.println(sent1score); + + + } + +} diff --git a/src/test/java/kariminf/as/confs/multiling/ssfgc2017/GCExample.java b/src/test/java/kariminf/as/confs/multiling/ssfgc2017/GCExample.java index fc8a13d..8206092 100644 --- a/src/test/java/kariminf/as/confs/multiling/ssfgc2017/GCExample.java +++ b/src/test/java/kariminf/as/confs/multiling/ssfgc2017/GCExample.java @@ -48,9 +48,9 @@ public static void main(String[] args) { preprocessor.setData(data); preprocessor.preProcess(text); - System.out.println(data.getSentWords()); + //System.out.println(data.getSentWords()); - System.exit(0); + //System.exit(0); /*for(int i = 0; i < 10; i++){ @@ -78,13 +78,15 @@ public static void main(String[] args) { scorer.setData(data); scorer.scoreUnits(); + System.out.println("scores: " + scorer.getOrdered()); + Extractor reorder; //reorder = new ReOrderer0(scorer); //reorder = new ReOrderer1(scorer, th); //reorder = new ReOrderer2(scorer); - //reorder = new ReOrderer3(scorer); - //reorder = new ReOrderer4(scorer); + //reorder = new SimNeighborReOrderer(scorer); reorder = new Neighbors2ReOrderer(scorer); + //reorder = new Neighbors2ReOrderer(scorer); reorder.reOrder();