From c8980471e12efdb72bbdb2aac21ca5c5541e8b46 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Fri, 19 Jan 2024 15:47:01 +0100 Subject: [PATCH] Run BWC indices generation code together with unittest (#13023) This change runs the current BWC indices generation code together with the unittest to catch issues with the generated indices earliy. Each generation method runs a sanity check on the generated indices. --- .../TestBackwardsCompatibility.java | 129 ++++++++++-------- 1 file changed, 73 insertions(+), 56 deletions(-) diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestBackwardsCompatibility.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestBackwardsCompatibility.java index 6835eb2cfed9..ec57b9458155 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestBackwardsCompatibility.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestBackwardsCompatibility.java @@ -176,11 +176,19 @@ public class TestBackwardsCompatibility extends LuceneTestCase { private static final float[] KNN_VECTOR = {0.2f, -0.1f, 0.1f}; public void testCreateCFS() throws IOException { - createIndex("index.cfs", true, false); + Path indexDir = getIndexDir().resolve("index.cfs"); + Files.deleteIfExists(indexDir); + try (Directory dir = newFSDirectory(indexDir)) { + createIndex(dir, true, false); + } } public void testCreateNoCFS() throws IOException { - createIndex("index.nocfs", false, false); + Path indexDir = getIndexDir().resolve("index.nocfs"); + Files.deleteIfExists(indexDir); + try (Directory dir = newFSDirectory(indexDir)) { + createIndex(dir, false, false); + } } // These are only needed for the special upgrade test to verify @@ -189,11 +197,26 @@ public void testCreateNoCFS() throws IOException { // "old" segment format, version is unimportant: public void testCreateSingleSegmentCFS() throws IOException { - createIndex("index.singlesegment-cfs", true, true); + Path indexDir = getIndexDir().resolve("index.singlesegment-cfs"); + Files.deleteIfExists(indexDir); + try (Directory dir = newFSDirectory(indexDir)) { + createIndex(dir, true, true); + } } public void testCreateSingleSegmentNoCFS() throws IOException { - createIndex("index.singlesegment-nocfs", false, true); + Path indexDir = getIndexDir().resolve("index.singlesegment-nocfs"); + Files.deleteIfExists(indexDir); + try (Directory dir = newFSDirectory(indexDir)) { + createIndex(dir, false, true); + } + } + + public void testCreateIndexInternal() throws IOException { + try (Directory dir = newDirectory()) { + createIndex(dir, random().nextBoolean(), false); + searchIndex(dir, Version.LATEST.toString(), Version.MIN_SUPPORTED_MAJOR, Version.LATEST); + } } private Path getIndexDir() { @@ -226,24 +249,23 @@ private void createMoreTermsIndex(Directory dir) throws Exception { analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH)); IndexWriterConfig conf = - new IndexWriterConfig(analyzer).setMergePolicy(mp).setUseCompoundFile(false); + new IndexWriterConfig(analyzer) + .setMergePolicy(mp) + .setCodec(TestUtil.getDefaultCodec()) + .setUseCompoundFile(false); IndexWriter writer = new IndexWriter(dir, conf); LineFileDocs docs = new LineFileDocs(new Random(0)); - Field docIdDV = null; - Field titleDVField = null; for (int i = 0; i < 50; i++) { - Document doc = docs.nextDoc(); - if (docIdDV == null) { - docIdDV = new NumericDocValuesField("docid_intDV", 0); - doc.add(docIdDV); - } - docIdDV.setLongValue(doc.getField("docid_int").numericValue().longValue()); - if (titleDVField == null) { - titleDVField = new SortedDocValuesField("titleDV", new BytesRef()); - doc.add(titleDVField); - } - titleDVField.setBytesValue(new BytesRef(doc.getField("title").stringValue())); + Document doc = TestUtil.cloneDocument(docs.nextDoc()); + doc.add( + new NumericDocValuesField( + "docid_intDV", doc.getField("docid_int").numericValue().longValue())); + doc.add( + new SortedDocValuesField("titleDV", new BytesRef(doc.getField("title").stringValue()))); writer.addDocument(doc); + if (i % 10 == 0) { // commit every 10 documents + writer.commit(); + } } docs.close(); writer.close(); @@ -281,20 +303,16 @@ public void createSortedIndex(Directory dir) throws Exception { IndexWriterConfig conf = new IndexWriterConfig(analyzer); conf.setMergePolicy(mp); conf.setUseCompoundFile(false); + conf.setCodec(TestUtil.getDefaultCodec()); conf.setIndexSort(new Sort(new SortField("dateDV", SortField.Type.LONG, true))); IndexWriter writer = new IndexWriter(dir, conf); LineFileDocs docs = new LineFileDocs(new Random(0)); SimpleDateFormat parser = new SimpleDateFormat("yyyy-MM-dd", Locale.ROOT); parser.setTimeZone(TimeZone.getTimeZone("UTC")); ParsePosition position = new ParsePosition(0); - Field docIdDV = null; - Field dateDVField = null; - Field titleDVField = null; - for (int i = 0; i < 50; i++) { - Document doc = docs.nextDoc(); + Document doc = TestUtil.cloneDocument(docs.nextDoc()); String dateString = doc.get("date"); - position.setIndex(0); Date date = parser.parse(dateString, position); if (position.getErrorIndex() != -1) { @@ -303,23 +321,13 @@ public void createSortedIndex(Directory dir) throws Exception { if (position.getIndex() != dateString.length()) { throw new AssertionError("failed to parse \"" + dateString + "\" as date"); } - if (dateDVField == null) { - dateDVField = new NumericDocValuesField("dateDV", 0l); - doc.add(dateDVField); - } - dateDVField.setLongValue(date.getTime()); - if (docIdDV == null) { - docIdDV = new NumericDocValuesField("docid_intDV", 0); - doc.add(docIdDV); - } - - docIdDV.setLongValue(doc.getField("docid_int").numericValue().longValue()); - if (titleDVField == null) { - titleDVField = new SortedDocValuesField("titleDV", new BytesRef()); - doc.add(titleDVField); - } - titleDVField.setBytesValue(new BytesRef(doc.getField("title").stringValue())); - if (i == 250) { + doc.add( + new NumericDocValuesField( + "docid_intDV", doc.getField("docid_int").numericValue().longValue())); + doc.add( + new SortedDocValuesField("titleDV", new BytesRef(doc.getField("title").stringValue()))); + doc.add(new NumericDocValuesField("dateDV", date.getTime())); + if (i % 10 == 0) { // commit every 10 documents writer.commit(); } writer.addDocument(doc); @@ -345,13 +353,26 @@ private void updateBinary(IndexWriter writer, String id, String f, String cf, lo } // Creates an index with DocValues updates - public void testCreateIndexWithDocValuesUpdates() throws Exception { + public void testCreateIndexWithDocValuesUpdates() throws IOException { Path indexDir = getIndexDir().resolve("dvupdates"); Files.deleteIfExists(indexDir); - Directory dir = newFSDirectory(indexDir); + try (Directory dir = newFSDirectory(indexDir)) { + createIndexWithDocValuesUpdates(dir); + searchDocValuesUpdatesIndex(dir); + } + } + public void testCreateIndexWithDocValuesUpdatesInternal() throws IOException { + try (Directory dir = newDirectory()) { + createIndexWithDocValuesUpdates(dir); + searchDocValuesUpdatesIndex(dir); + } + } + + private void createIndexWithDocValuesUpdates(Directory dir) throws IOException { IndexWriterConfig conf = new IndexWriterConfig(new MockAnalyzer(random())) + .setCodec(TestUtil.getDefaultCodec()) .setUseCompoundFile(false) .setMergePolicy(NoMergePolicy.INSTANCE); IndexWriter writer = new IndexWriter(dir, conf); @@ -385,10 +406,7 @@ public void testCreateIndexWithDocValuesUpdates() throws Exception { updateBinary(writer, "21", "bdv1", "bdv1_c", 100L); writer.commit(); updateNumeric(writer, "22", "ndv1", "ndv1_c", 200L); // update the field again - writer.commit(); - writer.close(); - dir.close(); } public void testCreateEmptyIndex() throws Exception { @@ -1211,6 +1229,7 @@ public void searchIndex( final Bits liveDocs = MultiBits.getLiveDocs(reader); assertNotNull(liveDocs); + StoredFields storedFields = reader.storedFields(); TermVectors termVectors = reader.termVectors(); @@ -1575,10 +1594,7 @@ public void changeIndexNoAdds(Random random, Directory dir, Version nameVersion) reader.close(); } - public void createIndex(String dirName, boolean doCFS, boolean fullyMerged) throws IOException { - Path indexDir = getIndexDir().resolve(dirName); - Files.deleteIfExists(indexDir); - Directory dir = newFSDirectory(indexDir); + public void createIndex(Directory dir, boolean doCFS, boolean fullyMerged) throws IOException { LogByteSizeMergePolicy mp = new LogByteSizeMergePolicy(); mp.setNoCFSRatio(doCFS ? 1.0 : 0.0); mp.setMaxCFSSegmentSizeMB(Double.POSITIVE_INFINITY); @@ -1620,8 +1636,6 @@ public void createIndex(String dirName, boolean doCFS, boolean fullyMerged) thro writer.deleteDocuments(searchTerm); writer.close(); } - - dir.close(); } private void addDoc(IndexWriter writer, int id) throws IOException { @@ -1813,7 +1827,7 @@ public void testSegmentCommitInfoId() throws IOException { } } - public void verifyUsesDefaultCodec(Directory dir, String name) throws Exception { + public void verifyUsesDefaultCodec(Directory dir, String name) throws IOException { DirectoryReader r = DirectoryReader.open(dir); for (LeafReaderContext context : r.leaves()) { SegmentReader air = (SegmentReader) context.reader(); @@ -2056,9 +2070,13 @@ private void verifyDocValues(Directory dir) throws IOException { public void testDocValuesUpdates() throws Exception { Path oldIndexDir = createTempDir("dvupdates"); TestUtil.unzip(getDataInputStream(dvUpdatesIndex), oldIndexDir); - Directory dir = newFSDirectory(oldIndexDir); - verifyUsesDefaultCodec(dir, dvUpdatesIndex); + try (Directory dir = newFSDirectory(oldIndexDir)) { + searchDocValuesUpdatesIndex(dir); + } + } + private void searchDocValuesUpdatesIndex(Directory dir) throws IOException { + verifyUsesDefaultCodec(dir, dvUpdatesIndex); verifyDocValues(dir); // update fields and verify index @@ -2078,7 +2096,6 @@ public void testDocValuesUpdates() throws Exception { verifyDocValues(dir); writer.close(); - dir.close(); } public void testDeletes() throws Exception {