diff --git a/src/main/java/de/ids_mannheim/korap/KrillIndex.java b/src/main/java/de/ids_mannheim/korap/KrillIndex.java index 40d369ef..d1e3cdef 100644 --- a/src/main/java/de/ids_mannheim/korap/KrillIndex.java +++ b/src/main/java/de/ids_mannheim/korap/KrillIndex.java @@ -175,8 +175,6 @@ public final class KrillIndex implements IndexInfo { private HashMap termContexts; private ObjectMapper mapper = new ObjectMapper(); - private int maxTokenMatchSize; - // private ByteBuffer bbTerm; // Some initializations ... @@ -193,7 +191,6 @@ public final class KrillIndex implements IndexInfo { String autoCommitStr = null; if (prop != null) { autoCommitStr = prop.getProperty("krill.index.commit.auto"); - this.maxTokenMatchSize = KrillProperties.maxTokenMatchSize; } if (autoCommitStr != null) { @@ -243,7 +240,6 @@ public KrillIndex (Path path) throws IOException { this.directory = new MMapDirectory(path); }; - /** * Get the version number of the index. * @@ -435,14 +431,6 @@ public void setAutoCommit (int value) { this.autoCommit = value; }; - public int getMaxTokenMatchSize () { - return maxTokenMatchSize; - } - - public void setMaxTokenMatchSize (int maxMatchTokens) { - this.maxTokenMatchSize = maxMatchTokens; - } - /** * Update a document in the index as a {@link FieldDocument} * if it already exists (based on the textSigle), otherwise @@ -984,20 +972,12 @@ public Match getMatchInfo (String idString, String field, boolean info, boolean includeSnippets, boolean includeTokens, boolean includeHighlights, boolean extendToSentence) throws QueryException { - return getMatchInfo(idString, field, info, foundry, layer, includeSpans, - includeSnippets, includeTokens, includeHighlights, - extendToSentence, maxTokenMatchSize); - }; - - public Match getMatchInfo (String idString, String field, boolean info, - List foundry, List layer, boolean includeSpans, - boolean includeSnippets, boolean includeTokens, - boolean includeHighlights, boolean extendToSentence, - int maxMatchTokens) throws QueryException { + if (DEBUG) log.trace("Get info on {}", idString); - Match match = new Match(maxMatchTokens, idString, includeHighlights); + int maxTokenMatchSize = KrillProperties.maxTokenMatchSize; + Match match = new Match(maxTokenMatchSize, idString, includeHighlights); if (this.getVersion() != null) match.setVersion(this.getVersion()); @@ -1223,8 +1203,8 @@ else if (includeSpans) { && spanContext[0] < spanContext[1]) { // Match needs to be cutted! - if ((spanContext[1] - spanContext[0]) > maxMatchTokens) { - int contextLength = maxMatchTokens - match.getLength(); + if ((spanContext[1] - spanContext[0]) > maxTokenMatchSize) { + int contextLength = maxTokenMatchSize - match.getLength(); int halfContext = contextLength / 2; // This is the extended context calculated @@ -1237,8 +1217,8 @@ else if (includeSpans) { } } - match.setStartPos(maxMatchTokens,spanContext[0]); - match.setEndPos(maxMatchTokens,spanContext[1]); + match.setStartPos(maxTokenMatchSize,spanContext[0]); + match.setEndPos(maxTokenMatchSize,spanContext[1]); match.potentialStartPosChar = spanContext[2]; match.potentialEndPosChar = spanContext[3]; match.startMore = false; @@ -1591,9 +1571,10 @@ public Result search (Krill ks) { ? lreader.document(localDocID, fieldsSet) : lreader.document(localDocID); - int maxMatchSize = maxTokenMatchSize; - if (ks.getMaxTokenMatchSize() > 0) { - maxMatchSize = ks.getMaxTokenMatchSize(); + int maxMatchSize = ks.getMaxTokenMatchSize(); + if (maxMatchSize <= 0 + || maxMatchSize > KrillProperties.maxTokenMatchSize) { + maxMatchSize = KrillProperties.maxTokenMatchSize; }; // Create new Match diff --git a/src/test/java/de/ids_mannheim/korap/index/TestMaxMatchTokens.java b/src/test/java/de/ids_mannheim/korap/index/TestMaxMatchTokens.java index 3c3d0ff1..262a0f41 100644 --- a/src/test/java/de/ids_mannheim/korap/index/TestMaxMatchTokens.java +++ b/src/test/java/de/ids_mannheim/korap/index/TestMaxMatchTokens.java @@ -7,6 +7,7 @@ import java.io.IOException; import java.util.ArrayList; +import org.junit.AfterClass; import org.junit.Test; import de.ids_mannheim.korap.Krill; @@ -34,35 +35,22 @@ public TestMaxMatchTokens () throws IOException { .getResource("/queries/position/sentence-contain-token.json") .getFile()); } + + @AfterClass + public static void resetMaxTokenMatchSize() { + KrillProperties.maxTokenMatchSize = 50; + } @Test public void testLimitingMatchWithProperties () throws IOException { - // from properties - assertEquals(50, ki.getMaxTokenMatchSize()); - + // default properties file Krill ks = new Krill(json); Result kr = ks.apply(ki); Match km = kr.getMatch(0); - assertTrue(km.getLength()] von Fremdwörtern und Namen ist ...", - kr.getMatch(0).getSnippetBrackets()); - assertEquals( - "sechsthäufigste Buchstabe in deutschen Texten. Mit Ausnahme von Fremdwörtern und Namen ist", - kr.getMatch(0).getSnippetHTML()); - } - @Test public void testLimitingMatchInKrill () throws IOException { // Change limit via Krill @@ -80,7 +68,7 @@ public void testLimitingMatchInKrill () throws IOException { }; @Test - public void testMatchInfoWithKrillConfig () + public void testMatchInfo () throws IOException, QueryException { KrillIndex ki = new KrillIndex(); // Indexing test files @@ -95,27 +83,25 @@ public void testMatchInfoWithKrillConfig () ArrayList layer = new ArrayList(); layer.add("opennlp"); - // maxMatchTokens from properties = 5 + // maxMatchTokens from properties = 40 km = ki.getMatchInfo("match-WUD17/C94/39360-p390-396", "tokens", false, foundry, layer, false, false, false, false, false); assertEquals("... [[g. Artikel vornimmst, wäre es fein]] ...", km.getSnippetBrackets()); - // lower than limit - int maxMatchTokens = 2; - km = ki.getMatchInfo("match-WUD17/C94/39360-p390-396", "tokens", - false, foundry, layer, false, false, false, false, true, // extendToSentence - maxMatchTokens); + // request lower than limit + // int maxMatchTokens = 2; + km = ki.getMatchInfo("match-WUD17/C94/39360-p390-392", "tokens", + false, foundry, layer, false, false, false, false, true); - assertTrue(km.endCutted); - assertEquals("... [[g. Artikel]] ...", km.getSnippetBrackets()); + assertEquals("... [[g. Artikel]] ...", km.getSnippetBrackets()); - // more than limit - maxMatchTokens = 51; + // request more than limit + // maxMatchTokens = 51; km = ki.getMatchInfo("match-WUD17/C94/39360-p380-431", "tokens", - false, foundry, layer, false, false, false, false, true, // extendToSentence - maxMatchTokens); - assertEquals(KrillProperties.maxTokenMatchSize, (km.getSnippetBrackets().split(" ").length -2)); + false, foundry, layer, false, false, false, false, false); + assertTrue(km.endCutted); + assertEquals(420, km.getEndPos()); } } diff --git a/src/test/java/de/ids_mannheim/korap/response/TestMatch.java b/src/test/java/de/ids_mannheim/korap/response/TestMatch.java index 3b1d0dfd..fcc1db3b 100644 --- a/src/test/java/de/ids_mannheim/korap/response/TestMatch.java +++ b/src/test/java/de/ids_mannheim/korap/response/TestMatch.java @@ -6,10 +6,17 @@ import org.junit.runner.RunWith; import org.junit.runners.JUnit4; +import de.ids_mannheim.korap.util.KrillProperties; + @RunWith(JUnit4.class) public class TestMatch { int maxMatchTokens = 50; + + public TestMatch () { + KrillProperties.maxTokenMatchSize = 50; + } + @Test public void testNoMatch () { diff --git a/src/test/resources/krill.properties b/src/test/resources/krill.properties index 3714c0c3..9cfe438e 100644 --- a/src/test/resources/krill.properties +++ b/src/test/resources/krill.properties @@ -6,5 +6,5 @@ krill.index.commit.count = 15 krill.namedVC = queries/collections/named-vcs/ krill.test = true -krill.match.max.token=50 +krill.match.max.token=40 krill.context.max.token=25