lucene-java-3.4.0/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java

   1 package org.apache.lucene.index;
   2
   3 /**
   4  * Licensed to the Apache Software Foundation (ASF) under one or more
   5  * contributor license agreements.  See the NOTICE file distributed with
   6  * this work for additional information regarding copyright ownership.
   7  * The ASF licenses this file to You under the Apache License, Version 2.0
   8  * (the "License"); you may not use this file except in compliance with
   9  * the License.  You may obtain a copy of the License at
  10  *
  11  *     http://www.apache.org/licenses/LICENSE-2.0
  12  *
  13  * Unless required by applicable law or agreed to in writing, software
  14  * distributed under the License is distributed on an "AS IS" BASIS,
  15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16  * See the License for the specific language governing permissions and
  17  * limitations under the License.
  18  */
  19
  20 import java.io.File;
  21 import java.io.IOException;
  22 import java.io.ByteArrayInputStream;
  23 import java.io.DataInputStream;
  24 import java.util.Arrays;
  25 import java.util.ArrayList;
  26 import java.util.List;
  27 import java.util.ArrayList;
  28 import java.util.Random;
  29
  30 import org.apache.lucene.analysis.WhitespaceAnalyzer;
  31 import org.apache.lucene.document.Document;
  32 import org.apache.lucene.document.Field;
  33 import org.apache.lucene.document.Fieldable;
  34 import org.apache.lucene.document.FieldSelector;
  35 import org.apache.lucene.document.FieldSelectorResult;
  36 import org.apache.lucene.index.FieldInfo.IndexOptions;
  37 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
  38 import org.apache.lucene.document.NumericField;
  39 import org.apache.lucene.search.FieldCache;
  40 import org.apache.lucene.search.IndexSearcher;
  41 import org.apache.lucene.search.ScoreDoc;
  42 import org.apache.lucene.search.TermQuery;
  43 import org.apache.lucene.search.NumericRangeQuery;
  44 import org.apache.lucene.store.Directory;
  45 import org.apache.lucene.store.RAMDirectory;
  46 import org.apache.lucene.util.ReaderUtil;
  47 import org.apache.lucene.util.LuceneTestCase;
  48 import org.apache.lucene.util._TestUtil;
  49 import org.apache.lucene.util.Constants;
  50
  51 /*
  52   Verify we can read the pre-2.1 file format, do searches
  53   against it, and add documents to it.
  54 */
  55
  56 public class TestBackwardsCompatibility extends LuceneTestCase {
  57
  58   // Uncomment these cases & run them on an older Lucene
  59   // version, to generate an index to test backwards
  60   // compatibility.  Then, cd to build/test/index.cfs and
  61   // run "zip index.<VERSION>.cfs.zip *"; cd to
  62   // build/test/index.nocfs and run "zip
  63   // index.<VERSION>.nocfs.zip *".  Then move those 2 zip
  64   // files to your trunk checkout and add them to the
  65   // oldNames array.
  66
  67   /*
  68   public void testCreateCFS() throws IOException {
  69     createIndex("index.cfs", true, false);
  70   }
  71
  72   public void testCreateNoCFS() throws IOException {
  73     createIndex("index.nocfs", false, false);
  74   }
  75   */
  76
  77   /*
  78   // These are only needed for the special upgrade test to verify
  79   // that also optimized indexes are correctly upgraded by IndexUpgrader.
  80   // You don't need them to be build for non-3.1 (the test is happy with just one
  81   // "old" segment format, version is unimportant:
  82
  83   public void testCreateOptimizedCFS() throws IOException {
  84     createIndex("index.optimized.cfs", true, true);
  85   }
  86
  87   public void testCreateOptimizedNoCFS() throws IOException {
  88     createIndex("index.optimized.nocfs", false, true);
  89   }
  90   */
  91
  92   final String[] oldNames = {"19.cfs",
  93                              "19.nocfs",
  94                              "20.cfs",
  95                              "20.nocfs",
  96                              "21.cfs",
  97                              "21.nocfs",
  98                              "22.cfs",
  99                              "22.nocfs",
 100                              "23.cfs",
 101                              "23.nocfs",
 102                              "24.cfs",
 103                              "24.nocfs",
 104                              "29.cfs",
 105                              "29.nocfs",
 106                              "30.cfs",
 107                              "30.nocfs",
 108                              "31.cfs",
 109                              "31.nocfs",
 110                              "32.cfs",
 111                              "32.nocfs",
 112   };
 113
 114   final String[] oldOptimizedNames = {"31.optimized.cfs",
 115                                       "31.optimized.nocfs",
 116   };
 117
 118   private void assertCompressedFields29(Directory dir, boolean shouldStillBeCompressed) throws IOException {
 119     int count = 0;
 120     final int TEXT_PLAIN_LENGTH = TEXT_TO_COMPRESS.length() * 2;
 121     // FieldSelectorResult.SIZE returns 2*number_of_chars for String fields:
 122     final int BINARY_PLAIN_LENGTH = BINARY_TO_COMPRESS.length;
 123
 124     IndexReader reader = IndexReader.open(dir, true);
 125     try {
 126       // look into sub readers and check if raw merge is on/off
 127       List<IndexReader> readers = new ArrayList<IndexReader>();
 128       ReaderUtil.gatherSubReaders(readers, reader);
 129       for (IndexReader ir : readers) {
 130         final FieldsReader fr = ((SegmentReader) ir).getFieldsReader();
 131         assertTrue("for a 2.9 index, FieldsReader.canReadRawDocs() must be false and other way round for a trunk index",
 132           shouldStillBeCompressed != fr.canReadRawDocs());
 133       }
 134
 135       // test that decompression works correctly
 136       for(int i=0; i<reader.maxDoc(); i++) {
 137         if (!reader.isDeleted(i)) {
 138           Document d = reader.document(i);
 139           if (d.get("content3") != null) continue;
 140           count++;
 141           Fieldable compressed = d.getFieldable("compressed");
 142           if (Integer.parseInt(d.get("id")) % 2 == 0) {
 143             assertFalse(compressed.isBinary());
 144             assertEquals("incorrectly decompressed string", TEXT_TO_COMPRESS, compressed.stringValue());
 145           } else {
 146             assertTrue(compressed.isBinary());
 147             assertTrue("incorrectly decompressed binary", Arrays.equals(BINARY_TO_COMPRESS, compressed.getBinaryValue()));
 148           }
 149         }
 150       }
 151
 152       // check if field was decompressed after optimize
 153       for(int i=0; i<reader.maxDoc(); i++) {
 154         if (!reader.isDeleted(i)) {
 155           Document d = reader.document(i, new FieldSelector() {
 156             public FieldSelectorResult accept(String fieldName) {
 157               return ("compressed".equals(fieldName)) ? FieldSelectorResult.SIZE : FieldSelectorResult.LOAD;
 158             }
 159           });
 160           if (d.get("content3") != null) continue;
 161           count++;
 162           // read the size from the binary value using DataInputStream (this prevents us from doing the shift ops ourselves):
 163           final DataInputStream ds = new DataInputStream(new ByteArrayInputStream(d.getFieldable("compressed").getBinaryValue()));
 164           final int actualSize = ds.readInt();
 165           ds.close();
 166           final int compressedSize = Integer.parseInt(d.get("compressedSize"));
 167           final boolean binary = Integer.parseInt(d.get("id")) % 2 > 0;
 168           final int shouldSize = shouldStillBeCompressed ?
 169             compressedSize :
 170             (binary ? BINARY_PLAIN_LENGTH : TEXT_PLAIN_LENGTH);
 171           assertEquals("size incorrect", shouldSize, actualSize);
 172           if (!shouldStillBeCompressed) {
 173             assertFalse("uncompressed field should have another size than recorded in index", compressedSize == actualSize);
 174           }
 175         }
 176       }
 177       assertEquals("correct number of tests", 34 * 2, count);
 178     } finally {
 179       reader.close();
 180     }
 181   }
 182
 183   public void testUpgrade29Compression() throws IOException {
 184     int hasTested29 = 0;
 185
 186     for(int i=0;i<oldNames.length;i++) {
 187       File oldIndxeDir = _TestUtil.getTempDir(oldNames[i]);
 188       _TestUtil.unzip(getDataFile("index." + oldNames[i] + ".zip"), oldIndxeDir);
 189       Directory dir = newFSDirectory(oldIndxeDir);
 190
 191       if (oldNames[i].startsWith("29.")) {
 192         assertCompressedFields29(dir, true);
 193         hasTested29++;
 194       }
 195
 196       new IndexUpgrader(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, null), VERBOSE ? System.out : null, false)
 197         .upgrade();
 198
 199       if (oldNames[i].startsWith("29.")) {
 200         assertCompressedFields29(dir, false);
 201         hasTested29++;
 202       }
 203
 204       dir.close();
 205       _TestUtil.rmDir(oldIndxeDir);
 206     }
 207
 208     assertEquals("test for compressed field should have run 4 times", 4, hasTested29);
 209   }
 210
 211   public void testAddOldIndexes() throws IOException {
 212     for (String name : oldNames) {
 213       File oldIndxeDir = _TestUtil.getTempDir(name);
 214       _TestUtil.unzip(getDataFile("index." + name + ".zip"), oldIndxeDir);
 215       Directory dir = newFSDirectory(oldIndxeDir);
 216
 217       Directory targetDir = newDirectory();
 218       IndexWriter w = new IndexWriter(targetDir, newIndexWriterConfig(
 219           TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)));
 220       w.addIndexes(new Directory[] { dir });
 221       w.close();
 222
 223       dir.close();
 224       targetDir.close();
 225       _TestUtil.rmDir(oldIndxeDir);
 226     }
 227   }
 228
 229   public void testAddOldIndexesReader() throws IOException {
 230     for (String name : oldNames) {
 231       File oldIndxeDir = _TestUtil.getTempDir(name);
 232       _TestUtil.unzip(getDataFile("index." + name + ".zip"), oldIndxeDir);
 233       Directory dir = newFSDirectory(oldIndxeDir);
 234       IndexReader reader = IndexReader.open(dir);
 235
 236       Directory targetDir = newDirectory();
 237       IndexWriter w = new IndexWriter(targetDir, newIndexWriterConfig(
 238           TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)));
 239       w.addIndexes(new IndexReader[] { reader });
 240       w.close();
 241       reader.close();
 242
 243       dir.close();
 244       targetDir.close();
 245       _TestUtil.rmDir(oldIndxeDir);
 246     }
 247   }
 248
 249   public void testSearchOldIndex() throws IOException {
 250     for(int i=0;i<oldNames.length;i++) {
 251       File oldIndxeDir = _TestUtil.getTempDir(oldNames[i]);
 252       _TestUtil.unzip(getDataFile("index." + oldNames[i] + ".zip"), oldIndxeDir);
 253       searchIndex(oldIndxeDir, oldNames[i]);
 254       _TestUtil.rmDir(oldIndxeDir);
 255     }
 256   }
 257
 258   public void testIndexOldIndexNoAdds() throws IOException {
 259     for(int i=0;i<oldNames.length;i++) {
 260       File oldIndxeDir = _TestUtil.getTempDir(oldNames[i]);
 261       _TestUtil.unzip(getDataFile("index." + oldNames[i] + ".zip"), oldIndxeDir);
 262       changeIndexNoAdds(random, oldIndxeDir);
 263       _TestUtil.rmDir(oldIndxeDir);
 264     }
 265   }
 266
 267   public void testIndexOldIndex() throws IOException {
 268     for(int i=0;i<oldNames.length;i++) {
 269       if (VERBOSE) {
 270         System.out.println("TEST: oldName=" + oldNames[i]);
 271       }
 272       File oldIndxeDir = _TestUtil.getTempDir(oldNames[i]);
 273       _TestUtil.unzip(getDataFile("index." + oldNames[i] + ".zip"), oldIndxeDir);
 274       changeIndexWithAdds(random, oldIndxeDir, oldNames[i]);
 275       _TestUtil.rmDir(oldIndxeDir);
 276     }
 277   }
 278
 279   private void testHits(ScoreDoc[] hits, int expectedCount, IndexReader reader) throws IOException {
 280     final int hitCount = hits.length;
 281     assertEquals("wrong number of hits", expectedCount, hitCount);
 282     for(int i=0;i<hitCount;i++) {
 283       reader.document(hits[i].doc);
 284       reader.getTermFreqVectors(hits[i].doc);
 285     }
 286   }
 287
 288   public void searchIndex(File indexDir, String oldName) throws IOException {
 289     //QueryParser parser = new QueryParser("contents", new WhitespaceAnalyzer(TEST_VERSION_CURRENT));
 290     //Query query = parser.parse("handle:1");
 291
 292     Directory dir = newFSDirectory(indexDir);
 293     IndexSearcher searcher = new IndexSearcher(dir, true);
 294     IndexReader reader = searcher.getIndexReader();
 295
 296     _TestUtil.checkIndex(dir);
 297
 298     for(int i=0;i<35;i++) {
 299       if (!reader.isDeleted(i)) {
 300         Document d = reader.document(i);
 301         List<Fieldable> fields = d.getFields();
 302         if (!oldName.startsWith("19.") &&
 303             !oldName.startsWith("20.") &&
 304             !oldName.startsWith("21.") &&
 305             !oldName.startsWith("22.")) {
 306
 307           if (d.getField("content3") == null) {
 308             final int numFields = oldName.startsWith("29.") ? 7 : 5;
 309             assertEquals(numFields, fields.size());
 310             Field f =  d.getField("id");
 311             assertEquals(""+i, f.stringValue());
 312
 313             f = d.getField("utf8");
 314             assertEquals("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.stringValue());
 315
 316             f =  d.getField("autf8");
 317             assertEquals("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.stringValue());
 318
 319             f = d.getField("content2");
 320             assertEquals("here is more content with aaa aaa aaa", f.stringValue());
 321
 322             f = d.getField("fie\u2C77ld");
 323             assertEquals("field with non-ascii name", f.stringValue());
 324           }
 325
 326           TermFreqVector tfv = reader.getTermFreqVector(i, "utf8");
 327           assertNotNull("docID=" + i + " index=" + indexDir.getName(), tfv);
 328           assertTrue(tfv instanceof TermPositionVector);
 329         }
 330       } else
 331         // Only ID 7 is deleted
 332         assertEquals(7, i);
 333     }
 334
 335     ScoreDoc[] hits = searcher.search(new TermQuery(new Term("content", "aaa")), null, 1000).scoreDocs;
 336
 337     // First document should be #21 since it's norm was
 338     // increased:
 339     Document d = searcher.doc(hits[0].doc);
 340     assertEquals("didn't get the right document first", "21", d.get("id"));
 341
 342     testHits(hits, 34, searcher.getIndexReader());
 343
 344     if (!oldName.startsWith("19.") &&
 345         !oldName.startsWith("20.") &&
 346         !oldName.startsWith("21.") &&
 347         !oldName.startsWith("22.")) {
 348       // Test on indices >= 2.3
 349       hits = searcher.search(new TermQuery(new Term("utf8", "\u0000")), null, 1000).scoreDocs;
 350       assertEquals(34, hits.length);
 351       hits = searcher.search(new TermQuery(new Term("utf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne")), null, 1000).scoreDocs;
 352       assertEquals(34, hits.length);
 353       hits = searcher.search(new TermQuery(new Term("utf8", "ab\ud917\udc17cd")), null, 1000).scoreDocs;
 354       assertEquals(34, hits.length);
 355     }
 356
 357     searcher.close();
 358     dir.close();
 359   }
 360
 361   private int compare(String name, String v) {
 362     int v0 = Integer.parseInt(name.substring(0, 2));
 363     int v1 = Integer.parseInt(v);
 364     return v0 - v1;
 365   }
 366
 367   /* Open pre-lockless index, add docs, do a delete &
 368    * setNorm, and search */
 369   public void changeIndexWithAdds(Random random, File oldIndexDir, String origOldName) throws IOException {
 370     Directory dir = newFSDirectory(oldIndexDir);
 371     // open writer
 372     IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setOpenMode(OpenMode.APPEND));
 373     writer.setInfoStream(VERBOSE ? System.out : null);
 374     // add 10 docs
 375     for(int i=0;i<10;i++) {
 376       addDoc(writer, 35+i);
 377     }
 378
 379     // make sure writer sees right total -- writer seems not to know about deletes in .del?
 380     final int expected;
 381     if (compare(origOldName, "24") < 0) {
 382       expected = 44;
 383     } else {
 384       expected = 45;
 385     }
 386     assertEquals("wrong doc count", expected, writer.numDocs());
 387     writer.close();
 388
 389     // make sure searching sees right # hits
 390     IndexSearcher searcher = new IndexSearcher(dir, true);
 391     ScoreDoc[] hits = searcher.search(new TermQuery(new Term("content", "aaa")), null, 1000).scoreDocs;
 392     Document d = searcher.doc(hits[0].doc);
 393     assertEquals("wrong first document", "21", d.get("id"));
 394     testHits(hits, 44, searcher.getIndexReader());
 395     searcher.close();
 396
 397     // make sure we can do delete & setNorm against this
 398     // pre-lockless segment:
 399     IndexReader reader = IndexReader.open(dir, false);
 400     searcher = newSearcher(reader);
 401     Term searchTerm = new Term("id", "6");
 402     int delCount = reader.deleteDocuments(searchTerm);
 403     assertEquals("wrong delete count", 1, delCount);
 404     reader.setNorm(searcher.search(new TermQuery(new Term("id", "22")), 10).scoreDocs[0].doc, "content", (float) 2.0);
 405     reader.close();
 406     searcher.close();
 407
 408     // make sure they "took":
 409     searcher = new IndexSearcher(dir, true);
 410     hits = searcher.search(new TermQuery(new Term("content", "aaa")), null, 1000).scoreDocs;
 411     assertEquals("wrong number of hits", 43, hits.length);
 412     d = searcher.doc(hits[0].doc);
 413     assertEquals("wrong first document", "22", d.get("id"));
 414     testHits(hits, 43, searcher.getIndexReader());
 415     searcher.close();
 416
 417     // optimize
 418     writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setOpenMode(OpenMode.APPEND));
 419     writer.optimize();
 420     writer.close();
 421
 422     searcher = new IndexSearcher(dir, true);
 423     hits = searcher.search(new TermQuery(new Term("content", "aaa")), null, 1000).scoreDocs;
 424     assertEquals("wrong number of hits", 43, hits.length);
 425     d = searcher.doc(hits[0].doc);
 426     testHits(hits, 43, searcher.getIndexReader());
 427     assertEquals("wrong first document", "22", d.get("id"));
 428     searcher.close();
 429
 430     dir.close();
 431   }
 432
 433   /* Open pre-lockless index, add docs, do a delete &
 434    * setNorm, and search */
 435   public void changeIndexNoAdds(Random random, File oldIndexDir) throws IOException {
 436
 437     Directory dir = newFSDirectory(oldIndexDir);
 438
 439     // make sure searching sees right # hits
 440     IndexSearcher searcher = new IndexSearcher(dir, true);
 441     ScoreDoc[] hits = searcher.search(new TermQuery(new Term("content", "aaa")), null, 1000).scoreDocs;
 442     assertEquals("wrong number of hits", 34, hits.length);
 443     Document d = searcher.doc(hits[0].doc);
 444     assertEquals("wrong first document", "21", d.get("id"));
 445     searcher.close();
 446
 447     // make sure we can do a delete & setNorm against this
 448     // pre-lockless segment:
 449     IndexReader reader = IndexReader.open(dir, false);
 450     Term searchTerm = new Term("id", "6");
 451     int delCount = reader.deleteDocuments(searchTerm);
 452     assertEquals("wrong delete count", 1, delCount);
 453     reader.setNorm(22, "content", (float) 2.0);
 454     reader.close();
 455
 456     // make sure they "took":
 457     searcher = new IndexSearcher(dir, true);
 458     hits = searcher.search(new TermQuery(new Term("content", "aaa")), null, 1000).scoreDocs;
 459     assertEquals("wrong number of hits", 33, hits.length);
 460     d = searcher.doc(hits[0].doc);
 461     assertEquals("wrong first document", "22", d.get("id"));
 462     testHits(hits, 33, searcher.getIndexReader());
 463     searcher.close();
 464
 465     // optimize
 466     IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setOpenMode(OpenMode.APPEND));
 467     writer.optimize();
 468     writer.close();
 469
 470     searcher = new IndexSearcher(dir, true);
 471     hits = searcher.search(new TermQuery(new Term("content", "aaa")), null, 1000).scoreDocs;
 472     assertEquals("wrong number of hits", 33, hits.length);
 473     d = searcher.doc(hits[0].doc);
 474     assertEquals("wrong first document", "22", d.get("id"));
 475     testHits(hits, 33, searcher.getIndexReader());
 476     searcher.close();
 477
 478     dir.close();
 479   }
 480
 481   public File createIndex(String dirName, boolean doCFS, boolean optimized) throws IOException {
 482     // we use a real directory name that is not cleaned up, because this method is only used to create backwards indexes:
 483     File indexDir = new File(LuceneTestCase.TEMP_DIR, dirName);
 484     _TestUtil.rmDir(indexDir);
 485     Directory dir = newFSDirectory(indexDir);
 486     LogByteSizeMergePolicy mp = new LogByteSizeMergePolicy();
 487     mp.setUseCompoundFile(doCFS);
 488     mp.setNoCFSRatio(1.0);
 489     IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT))
 490       .setMaxBufferedDocs(10).setMergePolicy(mp);
 491     IndexWriter writer = new IndexWriter(dir, conf);
 492
 493     for(int i=0;i<35;i++) {
 494       addDoc(writer, i);
 495     }
 496     assertEquals("wrong doc count", 35, writer.maxDoc());
 497     if (optimized) {
 498       writer.optimize();
 499     }
 500     writer.close();
 501
 502     if (!optimized) {
 503       // open fresh writer so we get no prx file in the added segment
 504       mp = new LogByteSizeMergePolicy();
 505       mp.setUseCompoundFile(doCFS);
 506       mp.setNoCFSRatio(1.0);
 507       conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT))
 508         .setMaxBufferedDocs(10).setMergePolicy(mp);
 509       writer = new IndexWriter(dir, conf);
 510       addNoProxDoc(writer);
 511       writer.close();
 512
 513       // Delete one doc so we get a .del file:
 514       IndexReader reader = IndexReader.open(dir, false);
 515       Term searchTerm = new Term("id", "7");
 516       int delCount = reader.deleteDocuments(searchTerm);
 517       assertEquals("didn't delete the right number of documents", 1, delCount);
 518
 519       // Set one norm so we get a .s0 file:
 520       reader.setNorm(21, "content", (float) 1.5);
 521       reader.close();
 522     }
 523
 524     dir.close();
 525
 526     return indexDir;
 527   }
 528
 529   /* Verifies that the expected file names were produced */
 530
 531   public void testExactFileNames() throws IOException {
 532
 533     String outputDirName = "lucene.backwardscompat0.index";
 534     File outputDir = _TestUtil.getTempDir(outputDirName);
 535     _TestUtil.rmDir(outputDir);
 536
 537     try {
 538       Directory dir = newFSDirectory(outputDir);
 539
 540       LogMergePolicy mergePolicy = newLogMergePolicy(true, 10);
 541       mergePolicy.setNoCFSRatio(1); // This test expects all of its segments to be in CFS
 542       IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setMaxBufferedDocs(-1).setRAMBufferSizeMB(16.0)
 543         .setMergePolicy(mergePolicy);
 544       IndexWriter writer = new IndexWriter(dir, conf);
 545       for(int i=0;i<35;i++) {
 546         addDoc(writer, i);
 547       }
 548       assertEquals("wrong doc count", 35, writer.maxDoc());
 549       writer.close();
 550
 551       // Delete one doc so we get a .del file:
 552       IndexReader reader = IndexReader.open(dir, false);
 553       Term searchTerm = new Term("id", "7");
 554       int delCount = reader.deleteDocuments(searchTerm);
 555       assertEquals("didn't delete the right number of documents", 1, delCount);
 556
 557       // Set one norm so we get a .s0 file:
 558       reader.setNorm(21, "content", (float) 1.5);
 559       reader.close();
 560
 561       // The numbering of fields can vary depending on which
 562       // JRE is in use.  On some JREs we see content bound to
 563       // field 0; on others, field 1.  So, here we have to
 564       // figure out which field number corresponds to
 565       // "content", and then set our expected file names below
 566       // accordingly:
 567       CompoundFileReader cfsReader = new CompoundFileReader(dir, "_0.cfs");
 568       FieldInfos fieldInfos = new FieldInfos(cfsReader, "_0.fnm");
 569       int contentFieldIndex = -1;
 570       for(int i=0;i<fieldInfos.size();i++) {
 571         FieldInfo fi = fieldInfos.fieldInfo(i);
 572         if (fi.name.equals("content")) {
 573           contentFieldIndex = i;
 574           break;
 575         }
 576       }
 577       cfsReader.close();
 578       assertTrue("could not locate the 'content' field number in the _2.cfs segment", contentFieldIndex != -1);
 579
 580       // Now verify file names:
 581       String[] expected = new String[] {"_0.cfs",
 582                                "_0_1.del",
 583                                "_0_1.s" + contentFieldIndex,
 584                                "segments_2",
 585                                "segments.gen"};
 586
 587       String[] actual = dir.listAll();
 588       Arrays.sort(expected);
 589       Arrays.sort(actual);
 590       if (!Arrays.equals(expected, actual)) {
 591         fail("incorrect filenames in index: expected:\n    " + asString(expected) + "\n  actual:\n    " + asString(actual));
 592       }
 593       dir.close();
 594     } finally {
 595       _TestUtil.rmDir(outputDir);
 596     }
 597   }
 598
 599   private String asString(String[] l) {
 600     String s = "";
 601     for(int i=0;i<l.length;i++) {
 602       if (i > 0) {
 603         s += "\n    ";
 604       }
 605       s += l[i];
 606     }
 607     return s;
 608   }
 609
 610   private void addDoc(IndexWriter writer, int id) throws IOException
 611   {
 612     Document doc = new Document();
 613     doc.add(new Field("content", "aaa", Field.Store.NO, Field.Index.ANALYZED));
 614     doc.add(new Field("id", Integer.toString(id), Field.Store.YES, Field.Index.NOT_ANALYZED));
 615     doc.add(new Field("autf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
 616     doc.add(new Field("utf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
 617     doc.add(new Field("content2", "here is more content with aaa aaa aaa", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
 618     doc.add(new Field("fie\u2C77ld", "field with non-ascii name", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
 619     /* This was used in 2.9 to generate an index with compressed field:
 620     if (id % 2 == 0) {
 621       doc.add(new Field("compressed", TEXT_TO_COMPRESS, Field.Store.COMPRESS, Field.Index.NOT_ANALYZED));
 622       doc.add(new Field("compressedSize", Integer.toString(TEXT_COMPRESSED_LENGTH), Field.Store.YES, Field.Index.NOT_ANALYZED));
 623     } else {
 624       doc.add(new Field("compressed", BINARY_TO_COMPRESS, Field.Store.COMPRESS));
 625       doc.add(new Field("compressedSize", Integer.toString(BINARY_COMPRESSED_LENGTH), Field.Store.YES, Field.Index.NOT_ANALYZED));
 626     }
 627     */
 628     // add numeric fields, to test if later versions preserve encoding
 629     doc.add(new NumericField("trieInt", 4).setIntValue(id));
 630     doc.add(new NumericField("trieLong", 4).setLongValue(id));
 631     writer.addDocument(doc);
 632   }
 633
 634   private void addNoProxDoc(IndexWriter writer) throws IOException {
 635     Document doc = new Document();
 636     Field f = new Field("content3", "aaa", Field.Store.YES, Field.Index.ANALYZED);
 637     f.setIndexOptions(IndexOptions.DOCS_ONLY);
 638     doc.add(f);
 639     f = new Field("content4", "aaa", Field.Store.YES, Field.Index.NO);
 640     f.setIndexOptions(IndexOptions.DOCS_ONLY);
 641     doc.add(f);
 642     writer.addDocument(doc);
 643   }
 644
 645   static final String TEXT_TO_COMPRESS = "this is a compressed field and should appear in 3.0 as an uncompressed field after merge";
 646   // FieldSelectorResult.SIZE returns compressed size for compressed fields, which are internally handled as binary;
 647   // do it in the same way like FieldsWriter, do not use CompressionTools.compressString() for compressed fields:
 648   /* This was used in 2.9 to generate an index with compressed field:
 649   static final int TEXT_COMPRESSED_LENGTH;
 650   static {
 651     try {
 652       TEXT_COMPRESSED_LENGTH = CompressionTools.compress(TEXT_TO_COMPRESS.getBytes("UTF-8")).length;
 653     } catch (Exception e) {
 654       throw new RuntimeException();
 655     }
 656   }
 657   */
 658   static final byte[] BINARY_TO_COMPRESS = new byte[]{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20};
 659   /* This was used in 2.9 to generate an index with compressed field:
 660   static final int BINARY_COMPRESSED_LENGTH = CompressionTools.compress(BINARY_TO_COMPRESS).length;
 661   */
 662
 663   public void testNumericFields() throws Exception {
 664     for(int i=0;i<oldNames.length;i++) {
 665       // only test indexes >= 3.0
 666       if (oldNames[i].compareTo("30.") < 0) continue;
 667
 668       File oldIndexDir = _TestUtil.getTempDir(oldNames[i]);
 669       _TestUtil.unzip(getDataFile("index." + oldNames[i] + ".zip"), oldIndexDir);
 670       Directory dir = newFSDirectory(oldIndexDir);
 671       IndexSearcher searcher = new IndexSearcher(dir, true);
 672
 673       for (int id=10; id<15; id++) {
 674         ScoreDoc[] hits = searcher.search(NumericRangeQuery.newIntRange("trieInt", 4, Integer.valueOf(id), Integer.valueOf(id), true, true), 100).scoreDocs;
 675         assertEquals("wrong number of hits", 1, hits.length);
 676         Document d = searcher.doc(hits[0].doc);
 677         assertEquals(String.valueOf(id), d.get("id"));
 678
 679         hits = searcher.search(NumericRangeQuery.newLongRange("trieLong", 4, Long.valueOf(id), Long.valueOf(id), true, true), 100).scoreDocs;
 680         assertEquals("wrong number of hits", 1, hits.length);
 681         d = searcher.doc(hits[0].doc);
 682         assertEquals(String.valueOf(id), d.get("id"));
 683       }
 684
 685       // check that also lower-precision fields are ok
 686       ScoreDoc[] hits = searcher.search(NumericRangeQuery.newIntRange("trieInt", 4, Integer.MIN_VALUE, Integer.MAX_VALUE, false, false), 100).scoreDocs;
 687       assertEquals("wrong number of hits", 34, hits.length);
 688
 689       hits = searcher.search(NumericRangeQuery.newLongRange("trieLong", 4, Long.MIN_VALUE, Long.MAX_VALUE, false, false), 100).scoreDocs;
 690       assertEquals("wrong number of hits", 34, hits.length);
 691
 692       // check decoding into field cache
 693       int[] fci = FieldCache.DEFAULT.getInts(searcher.getIndexReader(), "trieInt");
 694       for (int val : fci) {
 695         assertTrue("value in id bounds", val >= 0 && val < 35);
 696       }
 697
 698       long[] fcl = FieldCache.DEFAULT.getLongs(searcher.getIndexReader(), "trieLong");
 699       for (long val : fcl) {
 700         assertTrue("value in id bounds", val >= 0L && val < 35L);
 701       }
 702
 703       searcher.close();
 704       dir.close();
 705       _TestUtil.rmDir(oldIndexDir);
 706     }
 707   }
 708
 709   private int checkAllSegmentsUpgraded(Directory dir) throws IOException {
 710     final SegmentInfos infos = new SegmentInfos();
 711     infos.read(dir);
 712     if (VERBOSE) {
 713       System.out.println("checkAllSegmentsUpgraded: " + infos);
 714     }
 715     for (SegmentInfo si : infos) {
 716       assertEquals(Constants.LUCENE_MAIN_VERSION, si.getVersion());
 717     }
 718     return infos.size();
 719   }
 720
 721   private int getNumberOfSegments(Directory dir) throws IOException {
 722     final SegmentInfos infos = new SegmentInfos();
 723     infos.read(dir);
 724     return infos.size();
 725   }
 726
 727   public void testUpgradeOldIndex() throws Exception {
 728     List<String> names = new ArrayList<String>(oldNames.length + oldOptimizedNames.length);
 729     names.addAll(Arrays.asList(oldNames));
 730     names.addAll(Arrays.asList(oldOptimizedNames));
 731     for(String name : names) {
 732       if (VERBOSE) {
 733         System.out.println("testUpgradeOldIndex: index=" +name);
 734       }
 735       File oldIndxeDir = _TestUtil.getTempDir(name);
 736       _TestUtil.unzip(getDataFile("index." + name + ".zip"), oldIndxeDir);
 737       Directory dir = newFSDirectory(oldIndxeDir);
 738
 739       new IndexUpgrader(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, null), VERBOSE ? System.out : null, false)
 740         .upgrade();
 741
 742       checkAllSegmentsUpgraded(dir);
 743
 744       dir.close();
 745       _TestUtil.rmDir(oldIndxeDir);
 746     }
 747   }
 748
 749   public void testUpgradeOldOptimizedIndexWithAdditions() throws Exception {
 750     for (String name : oldOptimizedNames) {
 751       if (VERBOSE) {
 752         System.out.println("testUpgradeOldOptimizedIndexWithAdditions: index=" +name);
 753       }
 754       File oldIndxeDir = _TestUtil.getTempDir(name);
 755       _TestUtil.unzip(getDataFile("index." + name + ".zip"), oldIndxeDir);
 756       Directory dir = newFSDirectory(oldIndxeDir);
 757
 758       assertEquals("Original index must be optimized", 1, getNumberOfSegments(dir));
 759
 760       // create a bunch of dummy segments
 761       int id = 40;
 762       RAMDirectory ramDir = new RAMDirectory();
 763       for (int i = 0; i < 3; i++) {
 764         // only use Log- or TieredMergePolicy, to make document addition predictable and not suddenly merge:
 765         MergePolicy mp = random.nextBoolean() ? newLogMergePolicy() : newTieredMergePolicy();
 766         IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT))
 767           .setMergePolicy(mp);
 768         IndexWriter w = new IndexWriter(ramDir, iwc);
 769         // add few more docs:
 770         for(int j = 0; j < RANDOM_MULTIPLIER * random.nextInt(30); j++) {
 771           addDoc(w, id++);
 772         }
 773         w.close(false);
 774       }
 775
 776       // add dummy segments (which are all in current version) to optimized index
 777       MergePolicy mp = random.nextBoolean() ? newLogMergePolicy() : newTieredMergePolicy();
 778       IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, null)
 779         .setMergePolicy(mp);
 780       IndexWriter w = new IndexWriter(dir, iwc);
 781       w.setInfoStream(VERBOSE ? System.out : null);
 782       w.addIndexes(ramDir);
 783       w.close(false);
 784
 785       // determine count of segments in modified index
 786       final int origSegCount = getNumberOfSegments(dir);
 787
 788       new IndexUpgrader(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, null), VERBOSE ? System.out : null, false)
 789         .upgrade();
 790
 791       final int segCount = checkAllSegmentsUpgraded(dir);
 792       assertEquals("Index must still contain the same number of segments, as only one segment was upgraded and nothing else merged",
 793         origSegCount, segCount);
 794
 795       dir.close();
 796       _TestUtil.rmDir(oldIndxeDir);
 797     }
 798   }
 799
 800 }