Skip to content

Commit

Permalink
Remove maxTokenMatchSize variable from KrillIndex and getMatchInfo.
Browse files Browse the repository at this point in the history
Allow requesting maxTokenMatchSize via Krill with the value in
KrillProperties as the limit.

Change-Id: I82a1ad2c3a81abf69168d7cc9f9a6972fb9ba49e
  • Loading branch information
margaretha committed May 23, 2024
1 parent 7695559 commit e2cc49f
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 66 deletions.
41 changes: 11 additions & 30 deletions src/main/java/de/ids_mannheim/korap/KrillIndex.java
Original file line number Diff line number Diff line change
Expand Up @@ -175,8 +175,6 @@ public final class KrillIndex implements IndexInfo {
private HashMap termContexts;
private ObjectMapper mapper = new ObjectMapper();

private int maxTokenMatchSize;

// private ByteBuffer bbTerm;

// Some initializations ...
Expand All @@ -193,7 +191,6 @@ public final class KrillIndex implements IndexInfo {
String autoCommitStr = null;
if (prop != null) {
autoCommitStr = prop.getProperty("krill.index.commit.auto");
this.maxTokenMatchSize = KrillProperties.maxTokenMatchSize;
}

if (autoCommitStr != null) {
Expand Down Expand Up @@ -243,7 +240,6 @@ public KrillIndex (Path path) throws IOException {
this.directory = new MMapDirectory(path);
};


/**
* Get the version number of the index.
*
Expand Down Expand Up @@ -435,14 +431,6 @@ public void setAutoCommit (int value) {
this.autoCommit = value;
};

public int getMaxTokenMatchSize () {
return maxTokenMatchSize;
}

public void setMaxTokenMatchSize (int maxMatchTokens) {
this.maxTokenMatchSize = maxMatchTokens;
}

/**
* Update a document in the index as a {@link FieldDocument}
* if it already exists (based on the textSigle), otherwise
Expand Down Expand Up @@ -984,20 +972,12 @@ public Match getMatchInfo (String idString, String field, boolean info,
boolean includeSnippets, boolean includeTokens,
boolean includeHighlights, boolean extendToSentence)
throws QueryException {
return getMatchInfo(idString, field, info, foundry, layer, includeSpans,
includeSnippets, includeTokens, includeHighlights,
extendToSentence, maxTokenMatchSize);
};

public Match getMatchInfo (String idString, String field, boolean info,
List<String> foundry, List<String> layer, boolean includeSpans,
boolean includeSnippets, boolean includeTokens,
boolean includeHighlights, boolean extendToSentence,
int maxMatchTokens) throws QueryException {

if (DEBUG)
log.trace("Get info on {}", idString);

Match match = new Match(maxMatchTokens, idString, includeHighlights);
int maxTokenMatchSize = KrillProperties.maxTokenMatchSize;
Match match = new Match(maxTokenMatchSize, idString, includeHighlights);

if (this.getVersion() != null)
match.setVersion(this.getVersion());
Expand Down Expand Up @@ -1223,8 +1203,8 @@ else if (includeSpans) {
&& spanContext[0] < spanContext[1]) {

// Match needs to be cutted!
if ((spanContext[1] - spanContext[0]) > maxMatchTokens) {
int contextLength = maxMatchTokens - match.getLength();
if ((spanContext[1] - spanContext[0]) > maxTokenMatchSize) {
int contextLength = maxTokenMatchSize - match.getLength();
int halfContext = contextLength / 2;

// This is the extended context calculated
Expand All @@ -1237,8 +1217,8 @@ else if (includeSpans) {
}
}

match.setStartPos(maxMatchTokens,spanContext[0]);
match.setEndPos(maxMatchTokens,spanContext[1]);
match.setStartPos(maxTokenMatchSize,spanContext[0]);
match.setEndPos(maxTokenMatchSize,spanContext[1]);
match.potentialStartPosChar = spanContext[2];
match.potentialEndPosChar = spanContext[3];
match.startMore = false;
Expand Down Expand Up @@ -1591,9 +1571,10 @@ public Result search (Krill ks) {
? lreader.document(localDocID, fieldsSet)
: lreader.document(localDocID);

int maxMatchSize = maxTokenMatchSize;
if (ks.getMaxTokenMatchSize() > 0) {
maxMatchSize = ks.getMaxTokenMatchSize();
int maxMatchSize = ks.getMaxTokenMatchSize();
if (maxMatchSize <= 0
|| maxMatchSize > KrillProperties.maxTokenMatchSize) {
maxMatchSize = KrillProperties.maxTokenMatchSize;
};

// Create new Match
Expand Down
56 changes: 21 additions & 35 deletions src/test/java/de/ids_mannheim/korap/index/TestMaxMatchTokens.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import java.io.IOException;
import java.util.ArrayList;

import org.junit.AfterClass;
import org.junit.Test;

import de.ids_mannheim.korap.Krill;
Expand Down Expand Up @@ -34,35 +35,22 @@ public TestMaxMatchTokens () throws IOException {
.getResource("/queries/position/sentence-contain-token.json")
.getFile());
}

@AfterClass
public static void resetMaxTokenMatchSize() {
KrillProperties.maxTokenMatchSize = 50;
}

@Test
public void testLimitingMatchWithProperties () throws IOException {
// from properties
assertEquals(50, ki.getMaxTokenMatchSize());

// default properties file
Krill ks = new Krill(json);
Result kr = ks.apply(ki);
Match km = kr.getMatch(0);
assertTrue(km.getLength()<ki.getMaxTokenMatchSize());
assertEquals(40, KrillProperties.maxTokenMatchSize);
assertTrue(km.getLength() < 40);
};

@Test
public void testLimitingMatchInKrillIndex () throws IOException {
// Limiting default match token size in KrillIndex
ki.setMaxTokenMatchSize(2);

Krill ks = new Krill(json);
Result kr = ks.apply(ki);
assertEquals(78, kr.getTotalResults());

assertEquals(
"... sechsthäufigste Buchstabe in deutschen Texten. [[Mit Ausnahme]<!>] von Fremdwörtern und Namen ist ...",
kr.getMatch(0).getSnippetBrackets());
assertEquals(
"<span class=\"context-left\"><span class=\"more\"></span>sechsthäufigste Buchstabe in deutschen Texten. </span><span class=\"match\"><mark>Mit Ausnahme</mark><span class=\"cutted\"></span></span><span class=\"context-right\"> von Fremdwörtern und Namen ist<span class=\"more\"></span></span>",
kr.getMatch(0).getSnippetHTML());
}

@Test
public void testLimitingMatchInKrill () throws IOException {
// Change limit via Krill
Expand All @@ -80,7 +68,7 @@ public void testLimitingMatchInKrill () throws IOException {
};

@Test
public void testMatchInfoWithKrillConfig ()
public void testMatchInfo ()
throws IOException, QueryException {
KrillIndex ki = new KrillIndex();
// Indexing test files
Expand All @@ -95,27 +83,25 @@ public void testMatchInfoWithKrillConfig ()
ArrayList<String> layer = new ArrayList<String>();
layer.add("opennlp");

// maxMatchTokens from properties = 5
// maxMatchTokens from properties = 40
km = ki.getMatchInfo("match-WUD17/C94/39360-p390-396", "tokens", false,
foundry, layer, false, false, false, false, false);

assertEquals("... [[g. Artikel vornimmst, wäre es fein]] ...",
km.getSnippetBrackets());

// lower than limit
int maxMatchTokens = 2;
km = ki.getMatchInfo("match-WUD17/C94/39360-p390-396", "tokens",
false, foundry, layer, false, false, false, false, true, // extendToSentence
maxMatchTokens);
// request lower than limit
// int maxMatchTokens = 2;
km = ki.getMatchInfo("match-WUD17/C94/39360-p390-392", "tokens",
false, foundry, layer, false, false, false, false, true);

assertTrue(km.endCutted);
assertEquals("... [[g. Artikel]<!>] ...", km.getSnippetBrackets());
assertEquals("... [[g. Artikel]] ...", km.getSnippetBrackets());

// more than limit
maxMatchTokens = 51;
// request more than limit
// maxMatchTokens = 51;
km = ki.getMatchInfo("match-WUD17/C94/39360-p380-431", "tokens",
false, foundry, layer, false, false, false, false, true, // extendToSentence
maxMatchTokens);
assertEquals(KrillProperties.maxTokenMatchSize, (km.getSnippetBrackets().split(" ").length -2));
false, foundry, layer, false, false, false, false, false);
assertTrue(km.endCutted);
assertEquals(420, km.getEndPos());
}
}
7 changes: 7 additions & 0 deletions src/test/java/de/ids_mannheim/korap/response/TestMatch.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,17 @@
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;

import de.ids_mannheim.korap.util.KrillProperties;

@RunWith(JUnit4.class)
public class TestMatch {

int maxMatchTokens = 50;

public TestMatch () {
KrillProperties.maxTokenMatchSize = 50;
}


@Test
public void testNoMatch () {
Expand Down
2 changes: 1 addition & 1 deletion src/test/resources/krill.properties
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,5 @@ krill.index.commit.count = 15
krill.namedVC = queries/collections/named-vcs/
krill.test = true

krill.match.max.token=50
krill.match.max.token=40
krill.context.max.token=25

0 comments on commit e2cc49f

Please sign in to comment.