Skip to content

Commit

Permalink
version 3.0.0
Browse files Browse the repository at this point in the history
  • Loading branch information
kariminf committed Aug 18, 2019
1 parent 88b5428 commit fb536ae
Show file tree
Hide file tree
Showing 5 changed files with 97 additions and 188 deletions.
4 changes: 2 additions & 2 deletions .settings/org.eclipse.buildship.core.prefs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#Sun Mar 04 14:21:45 CET 2018
#Wed May 01 19:21:05 CET 2019
connection.gradle.distribution=GRADLE_DISTRIBUTION(VERSION(4.6))
override.workspace.settings=true
eclipse.preferences.version=1
gradle.user.home=
connection.project.dir=
offline.mode=false
build.scans.enabled=false
offline.mode=false
182 changes: 0 additions & 182 deletions src/main/java/kariminf/as/postProcess/extraction/Extractor.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,9 @@
package kariminf.as.postProcess.extraction;

import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import kariminf.as.postProcess.PostProcessor;
import kariminf.as.process.Scorer;
import kariminf.as.tools.Tools;



Expand Down Expand Up @@ -59,184 +55,6 @@ public List<Integer> getOrder() {
}

public abstract void reOrder();

/**
* Reorders sentences based on a certain relationship between them.
*
* @param sentWords a list contains sentences, where a sentence is a list of words
* @return a list of indexes representing the order of these sentences
*/
public static List<Integer> reorder(List<List<String>> sentWords){

if (sentWords.size() > 200)
return null;

//Begin: Creating the matrix of similarities
int size = sentWords.size();

double matrix[][] = new double[size][size];

for (int i = 0; i < size; i++)
for(int j = 0; j < size; j++){
double sim = 0.0;
if (i != j)
sim = - Tools.calcSimilarity(sentWords.get(i), sentWords.get(j));
matrix[i][j] = sim;
}
//End: Creating the matrix of similarities

//Calculating the first sentence
int firstNode = getFirst(matrix);

//Using nearest neighbors to find the order for other sentences
return nearestNeighbour(matrix, firstNode);

}


/**
* Reorders a list of elements based on the nearest neighbor
*
* This method uses a matrix of similarities to reorder the elements starting from
* an index; It returns a list of reordered indexes
*
* @param matrix a matrix of similarities between each two elements
* @param startNode start point (index)
* @return a list of indexes representing the order of these elements
*/
public static List<Integer> nearestNeighbour(double[][] matrix, int startNode){

int nodesSize = matrix.length;

List<Integer> result = new ArrayList<Integer>();

//Begin: the remaining elements
Set<Integer> remaining = new HashSet<Integer>();
for (int i = 0; i < nodesSize; i++)
if (i != startNode) remaining.add(i);
//End: the remaining elements

int current = startNode;
result.add(startNode);


while (! remaining.isEmpty()){
double min = Double.MAX_VALUE;
int index = 0;

//Begin: min similarity between the current element and the remaining
for (int j: remaining){
if (min > matrix[current][j]){
min = matrix[current][j];
index = j;
}
}
//End: min similarity between the current element and the remaining

//The nearest neighbor will be the current element
current = index;
result.add(index);
remaining.remove(index);

}

return result;
}


/**
* Search for the first element based on a similarity matrix.
*
* @param matrix a matrix of similarities between each two elements
* @return the index of the element that should be the starting point
*/
public static int getFirst(double matrix[][]){

int nodesSize = matrix.length;

if (nodesSize < 2)
return 0;

double minVal = Double.MAX_VALUE;
int firstIdx = 0;
int secondIdx = 0;

//Begin: Look for the least distance between all pairs of elements
for (int i = 0; i < nodesSize; i++){
for (int j = i + 1; j < nodesSize; j++){
if (minVal > matrix[i][j]){
minVal = matrix[i][j];
firstIdx = i;
secondIdx = j;
}
}

}
//End: Look for the least distance between all pairs of elements

double minFirst = Double.MAX_VALUE;
double minSecond = Double.MAX_VALUE;

//Begin: Look for min distance of firstIdx and secondIdx with other elements
for (int j = 0; j < nodesSize; j++){

if (j == firstIdx || j == secondIdx) continue;

if((j > firstIdx) && (minFirst > matrix[firstIdx][j]))
minFirst = matrix[firstIdx][j];

if((j > secondIdx) && (minSecond > matrix[secondIdx][j]))
minSecond = matrix[secondIdx][j];
}
//Begin: Look for min distance of firstIdx and secondIdx with other elements

//If the second element has a less distance with others than the first,
//we take it as the first element
if (minSecond > minFirst)
firstIdx = secondIdx;

return firstIdx;
}


/**
* @param args
*/
public static void main(String[] args) {
List<List<String>> sentWords= new ArrayList<List<String>>();

List<String> words = new ArrayList<String>();
words.add("a");words.add("b");words.add("c");//words.add("d");
sentWords.add(words);

words = new ArrayList<String>();
words.add("c");words.add("g");words.add("e");
sentWords.add(words);

words = new ArrayList<String>();
words.add("e");words.add("f");
sentWords.add(words);

words = new ArrayList<String>();
words.add("a");words.add("e");words.add("g");
sentWords.add(words);

//System.out.println(ReOrderer.reorder(sentWords));

double mat[][]={
{0.0, -0.57, 0.0, -0.33, -0.2, -0.4},//0
{-0.57, 0.0, 0.0, 0.0, 0.0, 0.0},//1
{0.0, 0.0, 0.0, -0.4, 0.0, 0.0},//2
{-0.33, 0.0, -0.4, 0.0, 0.0, 0.0},//3
{-0.2, 0.0, 0.0, 0.0, 0.0, 0.0},//4
{-0.4, 0.0, 0.0, 0.0, 0.0, 0.0},//5
};

System.out.println(Extractor.nearestNeighbour(mat, 1));

System.out.println(Extractor.getFirst(mat));/**/

}


}
53 changes: 53 additions & 0 deletions src/main/java/kariminf/testing/TestPreProcess.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
package kariminf.testing;

import java.util.List;

import kariminf.as.preProcess.PreProcessor;
import kariminf.as.preProcess.StaticPreProcessor;
import kariminf.as.tools.Data;

public class TestPreProcess {

public static void main(String[] args) {

String text = "";
text += "My name is Karim, and I study informatics at ESI, which is at Algiers, to obtain Magister degree. ";
text += "My research in ESI is about ATS, it is the intersection between IR and NLP. ";
text += "In this research, the main idea is to find relevant sentences using IR technics. ";
text += "The statistical features are the power of IR to find relevancy. ";
text += "AI technics are used, such as learning algorithms to create models for each topic in the input text. ";

Data data = new Data();
PreProcessor prep = new StaticPreProcessor("en");
prep.setData(data);
prep.addText(text);
prep.preProcess();


List<String> sentences = data.getSentences();
List<List<String>> sentWords = data.getSentWords();
List<List<Double>> sim = data.getSentSimilarities();

System.out.println(sim);


}

public static Data pp() {
String text = "";
text += "My name is Karim, and I study informatics at ESI, which is at Algiers, to obtain Magister degree. ";
text += "My research in ESI is about ATS, it is the intersection between IR and NLP. ";
text += "In this research, the main idea is to find relevant sentences using IR technics. ";
text += "The statistical features are the power of IR to find relevancy. ";
text += "AI technics are used, such as learning algorithms to create models for each topic in the input text. ";

Data data = new Data();
PreProcessor prep = new StaticPreProcessor("en");
prep.setData(data);
prep.addText(text);
prep.preProcess();

return data;
}

}
36 changes: 36 additions & 0 deletions src/main/java/kariminf/testing/TestTCC.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
package kariminf.testing;

import java.util.List;
import kariminf.as.process.Scorer;
import kariminf.as.process.tcc.BayesScoreHandler;
import kariminf.as.process.tcc.NaiveCluster;
import kariminf.as.process.tcc.Pos;
import kariminf.as.process.tcc.TFB;
import kariminf.as.tools.Data;

public class TestTCC {

public static void main(String[] args) {
Data data = TestPreProcess.pp();

NaiveCluster nc = new NaiveCluster(0.25);
BayesScoreHandler bsh = new BayesScoreHandler(nc);
Scorer s = Scorer.create(bsh);
s.setData(data);//calls bsh.setData(data)

bsh.addFeature(new TFB());//calls TFB.setData(data);
bsh.addFeature(new Pos());//calls Pos.setData(data)


bsh.train();//must train before scoring
s.scoreUnits();

List<Integer> order = s.getOrdered();
double sent1score = s.getScore(1);//sentence 1 score

System.out.println(sent1score);


}

}
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,9 @@ public static void main(String[] args) {
preprocessor.setData(data);
preprocessor.preProcess(text);

System.out.println(data.getSentWords());
//System.out.println(data.getSentWords());

System.exit(0);
//System.exit(0);


/*for(int i = 0; i < 10; i++){
Expand Down Expand Up @@ -78,13 +78,15 @@ public static void main(String[] args) {
scorer.setData(data);
scorer.scoreUnits();

System.out.println("scores: " + scorer.getOrdered());

Extractor reorder;
//reorder = new ReOrderer0(scorer);
//reorder = new ReOrderer1(scorer, th);
//reorder = new ReOrderer2(scorer);
//reorder = new ReOrderer3(scorer);
//reorder = new ReOrderer4(scorer);
//reorder = new SimNeighborReOrderer(scorer);
reorder = new Neighbors2ReOrderer(scorer);
//reorder = new Neighbors2ReOrderer(scorer);

reorder.reOrder();

Expand Down

0 comments on commit fb536ae

Please sign in to comment.