lucene-java-3.5.0/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java

   1 package org.apache.lucene.index;
   2
   3 /**
   4  * Licensed to the Apache Software Foundation (ASF) under one or more
   5  * contributor license agreements.  See the NOTICE file distributed with
   6  * this work for additional information regarding copyright ownership.
   7  * The ASF licenses this file to You under the Apache License, Version 2.0
   8  * (the "License"); you may not use this file except in compliance with
   9  * the License.  You may obtain a copy of the License at
  10  *
  11  *     http://www.apache.org/licenses/LICENSE-2.0
  12  *
  13  * Unless required by applicable law or agreed to in writing, software
  14  * distributed under the License is distributed on an "AS IS" BASIS,
  15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16  * See the License for the specific language governing permissions and
  17  * limitations under the License.
  18  */
  19
  20 import java.io.File;
  21 import java.io.IOException;
  22 import java.io.ByteArrayInputStream;
  23 import java.io.DataInputStream;
  24 import java.util.Arrays;
  25 import java.util.ArrayList;
  26 import java.util.List;
  27 import java.util.Random;
  28
  29 import org.apache.lucene.analysis.WhitespaceAnalyzer;
  30 import org.apache.lucene.document.Document;
  31 import org.apache.lucene.document.Field;
  32 import org.apache.lucene.document.Fieldable;
  33 import org.apache.lucene.document.FieldSelector;
  34 import org.apache.lucene.document.FieldSelectorResult;
  35 import org.apache.lucene.index.FieldInfo.IndexOptions;
  36 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
  37 import org.apache.lucene.document.NumericField;
  38 import org.apache.lucene.search.FieldCache;
  39 import org.apache.lucene.search.IndexSearcher;
  40 import org.apache.lucene.search.ScoreDoc;
  41 import org.apache.lucene.search.TermQuery;
  42 import org.apache.lucene.search.NumericRangeQuery;
  43 import org.apache.lucene.store.Directory;
  44 import org.apache.lucene.store.RAMDirectory;
  45 import org.apache.lucene.util.ReaderUtil;
  46 import org.apache.lucene.util.LuceneTestCase;
  47 import org.apache.lucene.util._TestUtil;
  48 import org.apache.lucene.util.Constants;
  49
  50 /*
  51   Verify we can read the pre-2.1 file format, do searches
  52   against it, and add documents to it.
  53 */
  54
  55 public class TestBackwardsCompatibility extends LuceneTestCase {
  56
  57   // Uncomment these cases & run them on an older Lucene
  58   // version, to generate an index to test backwards
  59   // compatibility.  Then, cd to build/test/index.cfs and
  60   // run "zip index.<VERSION>.cfs.zip *"; cd to
  61   // build/test/index.nocfs and run "zip
  62   // index.<VERSION>.nocfs.zip *".  Then move those 2 zip
  63   // files to your trunk checkout and add them to the
  64   // oldNames array.
  65
  66   /*
  67   public void testCreateCFS() throws IOException {
  68     createIndex("index.cfs", true, false);
  69   }
  70
  71   public void testCreateNoCFS() throws IOException {
  72     createIndex("index.nocfs", false, false);
  73   }
  74   */
  75
  76   /*
  77   // These are only needed for the special upgrade test to verify
  78   // that also single-segment indexes are correctly upgraded by IndexUpgrader.
  79   // You don't need them to be build for non-3.1 (the test is happy with just one
  80   // "old" segment format, version is unimportant:
  81
  82   public void testCreateSingleSegmentCFS() throws IOException {
  83     createIndex("index.singlesegment.cfs", true, true);
  84   }
  85
  86   public void testCreateSingleSegmentNoCFS() throws IOException {
  87     createIndex("index.singlesegment.nocfs", false, true);
  88   }
  89   */
  90
  91   final String[] oldNames = {"19.cfs",
  92                              "19.nocfs",
  93                              "20.cfs",
  94                              "20.nocfs",
  95                              "21.cfs",
  96                              "21.nocfs",
  97                              "22.cfs",
  98                              "22.nocfs",
  99                              "23.cfs",
 100                              "23.nocfs",
 101                              "24.cfs",
 102                              "24.nocfs",
 103                              "29.cfs",
 104                              "29.nocfs",
 105                              "30.cfs",
 106                              "30.nocfs",
 107                              "31.cfs",
 108                              "31.nocfs",
 109                              "32.cfs",
 110                              "32.nocfs",
 111   };
 112
 113   final String[] oldSingleSegmentNames = {"31.optimized.cfs",
 114                                           "31.optimized.nocfs",
 115   };
 116
 117   private void assertCompressedFields29(Directory dir, boolean shouldStillBeCompressed) throws IOException {
 118     int count = 0;
 119     final int TEXT_PLAIN_LENGTH = TEXT_TO_COMPRESS.length() * 2;
 120     // FieldSelectorResult.SIZE returns 2*number_of_chars for String fields:
 121     final int BINARY_PLAIN_LENGTH = BINARY_TO_COMPRESS.length;
 122
 123     IndexReader reader = IndexReader.open(dir, true);
 124     try {
 125       // look into sub readers and check if raw merge is on/off
 126       List<IndexReader> readers = new ArrayList<IndexReader>();
 127       ReaderUtil.gatherSubReaders(readers, reader);
 128       for (IndexReader ir : readers) {
 129         final FieldsReader fr = ((SegmentReader) ir).getFieldsReader();
 130         assertTrue("for a 2.9 index, FieldsReader.canReadRawDocs() must be false and other way round for a trunk index",
 131           shouldStillBeCompressed != fr.canReadRawDocs());
 132       }
 133
 134       // test that decompression works correctly
 135       for(int i=0; i<reader.maxDoc(); i++) {
 136         if (!reader.isDeleted(i)) {
 137           Document d = reader.document(i);
 138           if (d.get("content3") != null) continue;
 139           count++;
 140           Fieldable compressed = d.getFieldable("compressed");
 141           if (Integer.parseInt(d.get("id")) % 2 == 0) {
 142             assertFalse(compressed.isBinary());
 143             assertEquals("incorrectly decompressed string", TEXT_TO_COMPRESS, compressed.stringValue());
 144           } else {
 145             assertTrue(compressed.isBinary());
 146             assertTrue("incorrectly decompressed binary", Arrays.equals(BINARY_TO_COMPRESS, compressed.getBinaryValue()));
 147           }
 148         }
 149       }
 150
 151       // check if field was decompressed after full merge
 152       for(int i=0; i<reader.maxDoc(); i++) {
 153         if (!reader.isDeleted(i)) {
 154           Document d = reader.document(i, new FieldSelector() {
 155             public FieldSelectorResult accept(String fieldName) {
 156               return ("compressed".equals(fieldName)) ? FieldSelectorResult.SIZE : FieldSelectorResult.LOAD;
 157             }
 158           });
 159           if (d.get("content3") != null) continue;
 160           count++;
 161           // read the size from the binary value using DataInputStream (this prevents us from doing the shift ops ourselves):
 162           final DataInputStream ds = new DataInputStream(new ByteArrayInputStream(d.getFieldable("compressed").getBinaryValue()));
 163           final int actualSize = ds.readInt();
 164           ds.close();
 165           final int compressedSize = Integer.parseInt(d.get("compressedSize"));
 166           final boolean binary = Integer.parseInt(d.get("id")) % 2 > 0;
 167           final int shouldSize = shouldStillBeCompressed ?
 168             compressedSize :
 169             (binary ? BINARY_PLAIN_LENGTH : TEXT_PLAIN_LENGTH);
 170           assertEquals("size incorrect", shouldSize, actualSize);
 171           if (!shouldStillBeCompressed) {
 172             assertFalse("uncompressed field should have another size than recorded in index", compressedSize == actualSize);
 173           }
 174         }
 175       }
 176       assertEquals("correct number of tests", 34 * 2, count);
 177     } finally {
 178       reader.close();
 179     }
 180   }
 181
 182   public void testUpgrade29Compression() throws IOException {
 183     int hasTested29 = 0;
 184
 185     for(int i=0;i<oldNames.length;i++) {
 186       File oldIndxeDir = _TestUtil.getTempDir(oldNames[i]);
 187       _TestUtil.unzip(getDataFile("index." + oldNames[i] + ".zip"), oldIndxeDir);
 188       Directory dir = newFSDirectory(oldIndxeDir);
 189
 190       if (oldNames[i].startsWith("29.")) {
 191         assertCompressedFields29(dir, true);
 192         hasTested29++;
 193       }
 194
 195       new IndexUpgrader(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, null), VERBOSE ? System.out : null, false)
 196         .upgrade();
 197
 198       if (oldNames[i].startsWith("29.")) {
 199         assertCompressedFields29(dir, false);
 200         hasTested29++;
 201       }
 202
 203       dir.close();
 204       _TestUtil.rmDir(oldIndxeDir);
 205     }
 206
 207     assertEquals("test for compressed field should have run 4 times", 4, hasTested29);
 208   }
 209
 210   public void testAddOldIndexes() throws IOException {
 211     for (String name : oldNames) {
 212       File oldIndxeDir = _TestUtil.getTempDir(name);
 213       _TestUtil.unzip(getDataFile("index." + name + ".zip"), oldIndxeDir);
 214       Directory dir = newFSDirectory(oldIndxeDir);
 215
 216       Directory targetDir = newDirectory();
 217       IndexWriter w = new IndexWriter(targetDir, newIndexWriterConfig(
 218           TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)));
 219       w.addIndexes(new Directory[] { dir });
 220       w.close();
 221
 222       dir.close();
 223       targetDir.close();
 224       _TestUtil.rmDir(oldIndxeDir);
 225     }
 226   }
 227
 228   public void testAddOldIndexesReader() throws IOException {
 229     for (String name : oldNames) {
 230       File oldIndxeDir = _TestUtil.getTempDir(name);
 231       _TestUtil.unzip(getDataFile("index." + name + ".zip"), oldIndxeDir);
 232       Directory dir = newFSDirectory(oldIndxeDir);
 233       IndexReader reader = IndexReader.open(dir);
 234
 235       Directory targetDir = newDirectory();
 236       IndexWriter w = new IndexWriter(targetDir, newIndexWriterConfig(
 237           TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)));
 238       w.addIndexes(new IndexReader[] { reader });
 239       w.close();
 240       reader.close();
 241
 242       dir.close();
 243       targetDir.close();
 244       _TestUtil.rmDir(oldIndxeDir);
 245     }
 246   }
 247
 248   public void testSearchOldIndex() throws IOException {
 249     for(int i=0;i<oldNames.length;i++) {
 250       File oldIndxeDir = _TestUtil.getTempDir(oldNames[i]);
 251       _TestUtil.unzip(getDataFile("index." + oldNames[i] + ".zip"), oldIndxeDir);
 252       searchIndex(oldIndxeDir, oldNames[i]);
 253       _TestUtil.rmDir(oldIndxeDir);
 254     }
 255   }
 256
 257   public void testIndexOldIndexNoAdds() throws IOException {
 258     for(int i=0;i<oldNames.length;i++) {
 259       File oldIndxeDir = _TestUtil.getTempDir(oldNames[i]);
 260       _TestUtil.unzip(getDataFile("index." + oldNames[i] + ".zip"), oldIndxeDir);
 261       changeIndexNoAdds(random, oldIndxeDir);
 262       _TestUtil.rmDir(oldIndxeDir);
 263     }
 264   }
 265
 266   public void testIndexOldIndex() throws IOException {
 267     for(int i=0;i<oldNames.length;i++) {
 268       if (VERBOSE) {
 269         System.out.println("TEST: oldName=" + oldNames[i]);
 270       }
 271       File oldIndxeDir = _TestUtil.getTempDir(oldNames[i]);
 272       _TestUtil.unzip(getDataFile("index." + oldNames[i] + ".zip"), oldIndxeDir);
 273       changeIndexWithAdds(random, oldIndxeDir, oldNames[i]);
 274       _TestUtil.rmDir(oldIndxeDir);
 275     }
 276   }
 277
 278   private void testHits(ScoreDoc[] hits, int expectedCount, IndexReader reader) throws IOException {
 279     final int hitCount = hits.length;
 280     assertEquals("wrong number of hits", expectedCount, hitCount);
 281     for(int i=0;i<hitCount;i++) {
 282       reader.document(hits[i].doc);
 283       reader.getTermFreqVectors(hits[i].doc);
 284     }
 285   }
 286
 287   public void searchIndex(File indexDir, String oldName) throws IOException {
 288     //QueryParser parser = new QueryParser("contents", new WhitespaceAnalyzer(TEST_VERSION_CURRENT));
 289     //Query query = parser.parse("handle:1");
 290
 291     Directory dir = newFSDirectory(indexDir);
 292     IndexReader reader = IndexReader.open(dir);
 293     IndexSearcher searcher = new IndexSearcher(reader);
 294
 295     _TestUtil.checkIndex(dir);
 296
 297     for(int i=0;i<35;i++) {
 298       if (!reader.isDeleted(i)) {
 299         Document d = reader.document(i);
 300         List<Fieldable> fields = d.getFields();
 301         if (!oldName.startsWith("19.") &&
 302             !oldName.startsWith("20.") &&
 303             !oldName.startsWith("21.") &&
 304             !oldName.startsWith("22.")) {
 305
 306           if (d.getField("content3") == null) {
 307             final int numFields = oldName.startsWith("29.") ? 7 : 5;
 308             assertEquals(numFields, fields.size());
 309             Field f =  d.getField("id");
 310             assertEquals(""+i, f.stringValue());
 311
 312             f = d.getField("utf8");
 313             assertEquals("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.stringValue());
 314
 315             f =  d.getField("autf8");
 316             assertEquals("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.stringValue());
 317
 318             f = d.getField("content2");
 319             assertEquals("here is more content with aaa aaa aaa", f.stringValue());
 320
 321             f = d.getField("fie\u2C77ld");
 322             assertEquals("field with non-ascii name", f.stringValue());
 323           }
 324
 325           TermFreqVector tfv = reader.getTermFreqVector(i, "utf8");
 326           assertNotNull("docID=" + i + " index=" + indexDir.getName(), tfv);
 327           assertTrue(tfv instanceof TermPositionVector);
 328         }
 329       } else
 330         // Only ID 7 is deleted
 331         assertEquals(7, i);
 332     }
 333
 334     ScoreDoc[] hits = searcher.search(new TermQuery(new Term("content", "aaa")), null, 1000).scoreDocs;
 335
 336     // First document should be #21 since it's norm was
 337     // increased:
 338     Document d = searcher.doc(hits[0].doc);
 339     assertEquals("didn't get the right document first", "21", d.get("id"));
 340
 341     testHits(hits, 34, searcher.getIndexReader());
 342
 343     if (!oldName.startsWith("19.") &&
 344         !oldName.startsWith("20.") &&
 345         !oldName.startsWith("21.") &&
 346         !oldName.startsWith("22.")) {
 347       // Test on indices >= 2.3
 348       hits = searcher.search(new TermQuery(new Term("utf8", "\u0000")), null, 1000).scoreDocs;
 349       assertEquals(34, hits.length);
 350       hits = searcher.search(new TermQuery(new Term("utf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne")), null, 1000).scoreDocs;
 351       assertEquals(34, hits.length);
 352       hits = searcher.search(new TermQuery(new Term("utf8", "ab\ud917\udc17cd")), null, 1000).scoreDocs;
 353       assertEquals(34, hits.length);
 354     }
 355
 356     searcher.close();
 357     reader.close();
 358     dir.close();
 359   }
 360
 361   private int compare(String name, String v) {
 362     int v0 = Integer.parseInt(name.substring(0, 2));
 363     int v1 = Integer.parseInt(v);
 364     return v0 - v1;
 365   }
 366
 367   /* Open pre-lockless index, add docs, do a delete &
 368    * setNorm, and search */
 369   public void changeIndexWithAdds(Random random, File oldIndexDir, String origOldName) throws IOException {
 370     Directory dir = newFSDirectory(oldIndexDir);
 371     // open writer
 372     IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setOpenMode(OpenMode.APPEND));
 373     writer.setInfoStream(VERBOSE ? System.out : null);
 374     // add 10 docs
 375     for(int i=0;i<10;i++) {
 376       addDoc(writer, 35+i);
 377     }
 378
 379     // make sure writer sees right total -- writer seems not to know about deletes in .del?
 380     final int expected;
 381     if (compare(origOldName, "24") < 0) {
 382       expected = 44;
 383     } else {
 384       expected = 45;
 385     }
 386     assertEquals("wrong doc count", expected, writer.numDocs());
 387     writer.close();
 388
 389     // make sure searching sees right # hits
 390     IndexReader reader = IndexReader.open(dir);
 391     IndexSearcher searcher = new IndexSearcher(reader);
 392     ScoreDoc[] hits = searcher.search(new TermQuery(new Term("content", "aaa")), null, 1000).scoreDocs;
 393     Document d = searcher.doc(hits[0].doc);
 394     assertEquals("wrong first document", "21", d.get("id"));
 395     testHits(hits, 44, searcher.getIndexReader());
 396     searcher.close();
 397     reader.close();
 398
 399     // make sure we can do delete & setNorm against this
 400     // pre-lockless segment:
 401     reader = IndexReader.open(dir, false);
 402     searcher = newSearcher(reader);
 403     Term searchTerm = new Term("id", "6");
 404     int delCount = reader.deleteDocuments(searchTerm);
 405     assertEquals("wrong delete count", 1, delCount);
 406     reader.setNorm(searcher.search(new TermQuery(new Term("id", "22")), 10).scoreDocs[0].doc, "content", (float) 2.0);
 407     reader.close();
 408     searcher.close();
 409
 410     // make sure they "took":
 411     reader = IndexReader.open(dir, true);
 412     searcher = new IndexSearcher(reader);
 413     hits = searcher.search(new TermQuery(new Term("content", "aaa")), null, 1000).scoreDocs;
 414     assertEquals("wrong number of hits", 43, hits.length);
 415     d = searcher.doc(hits[0].doc);
 416     assertEquals("wrong first document", "22", d.get("id"));
 417     testHits(hits, 43, searcher.getIndexReader());
 418     searcher.close();
 419     reader.close();
 420
 421     // fully merge
 422     writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setOpenMode(OpenMode.APPEND));
 423     writer.forceMerge(1);
 424     writer.close();
 425
 426     reader = IndexReader.open(dir);
 427     searcher = new IndexSearcher(reader);
 428     hits = searcher.search(new TermQuery(new Term("content", "aaa")), null, 1000).scoreDocs;
 429     assertEquals("wrong number of hits", 43, hits.length);
 430     d = searcher.doc(hits[0].doc);
 431     testHits(hits, 43, searcher.getIndexReader());
 432     assertEquals("wrong first document", "22", d.get("id"));
 433     searcher.close();
 434     reader.close();
 435
 436     dir.close();
 437   }
 438
 439   /* Open pre-lockless index, add docs, do a delete &
 440    * setNorm, and search */
 441   public void changeIndexNoAdds(Random random, File oldIndexDir) throws IOException {
 442
 443     Directory dir = newFSDirectory(oldIndexDir);
 444
 445     // make sure searching sees right # hits
 446     IndexReader reader = IndexReader.open(dir);
 447     IndexSearcher searcher = new IndexSearcher(reader);
 448     ScoreDoc[] hits = searcher.search(new TermQuery(new Term("content", "aaa")), null, 1000).scoreDocs;
 449     assertEquals("wrong number of hits", 34, hits.length);
 450     Document d = searcher.doc(hits[0].doc);
 451     assertEquals("wrong first document", "21", d.get("id"));
 452     searcher.close();
 453     reader.close();
 454
 455     // make sure we can do a delete & setNorm against this
 456     // pre-lockless segment:
 457     reader = IndexReader.open(dir, false);
 458     Term searchTerm = new Term("id", "6");
 459     int delCount = reader.deleteDocuments(searchTerm);
 460     assertEquals("wrong delete count", 1, delCount);
 461     reader.setNorm(22, "content", (float) 2.0);
 462     reader.close();
 463
 464     // make sure they "took":
 465     reader = IndexReader.open(dir);
 466     searcher = new IndexSearcher(reader);
 467     hits = searcher.search(new TermQuery(new Term("content", "aaa")), null, 1000).scoreDocs;
 468     assertEquals("wrong number of hits", 33, hits.length);
 469     d = searcher.doc(hits[0].doc);
 470     assertEquals("wrong first document", "22", d.get("id"));
 471     testHits(hits, 33, searcher.getIndexReader());
 472     searcher.close();
 473     reader.close();
 474
 475     // fully merge
 476     IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setOpenMode(OpenMode.APPEND));
 477     writer.forceMerge(1);
 478     writer.close();
 479
 480     reader = IndexReader.open(dir);
 481     searcher = new IndexSearcher(reader);
 482     hits = searcher.search(new TermQuery(new Term("content", "aaa")), null, 1000).scoreDocs;
 483     assertEquals("wrong number of hits", 33, hits.length);
 484     d = searcher.doc(hits[0].doc);
 485     assertEquals("wrong first document", "22", d.get("id"));
 486     testHits(hits, 33, searcher.getIndexReader());
 487     searcher.close();
 488     reader.close();
 489
 490     dir.close();
 491   }
 492
 493   public File createIndex(String dirName, boolean doCFS, boolean fullyMerged) throws IOException {
 494     // we use a real directory name that is not cleaned up, because this method is only used to create backwards indexes:
 495     File indexDir = new File(LuceneTestCase.TEMP_DIR, dirName);
 496     _TestUtil.rmDir(indexDir);
 497     Directory dir = newFSDirectory(indexDir);
 498     LogByteSizeMergePolicy mp = new LogByteSizeMergePolicy();
 499     mp.setUseCompoundFile(doCFS);
 500     mp.setNoCFSRatio(1.0);
 501     IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT))
 502       .setMaxBufferedDocs(10).setMergePolicy(mp);
 503     IndexWriter writer = new IndexWriter(dir, conf);
 504
 505     for(int i=0;i<35;i++) {
 506       addDoc(writer, i);
 507     }
 508     assertEquals("wrong doc count", 35, writer.maxDoc());
 509     if (fullyMerged) {
 510       writer.forceMerge(1);
 511     }
 512     writer.close();
 513
 514     if (!fullyMerged) {
 515       // open fresh writer so we get no prx file in the added segment
 516       mp = new LogByteSizeMergePolicy();
 517       mp.setUseCompoundFile(doCFS);
 518       mp.setNoCFSRatio(1.0);
 519       conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT))
 520         .setMaxBufferedDocs(10).setMergePolicy(mp);
 521       writer = new IndexWriter(dir, conf);
 522       addNoProxDoc(writer);
 523       writer.close();
 524
 525       // Delete one doc so we get a .del file:
 526       IndexReader reader = IndexReader.open(dir, false);
 527       Term searchTerm = new Term("id", "7");
 528       int delCount = reader.deleteDocuments(searchTerm);
 529       assertEquals("didn't delete the right number of documents", 1, delCount);
 530
 531       // Set one norm so we get a .s0 file:
 532       reader.setNorm(21, "content", (float) 1.5);
 533       reader.close();
 534     }
 535
 536     dir.close();
 537
 538     return indexDir;
 539   }
 540
 541   /* Verifies that the expected file names were produced */
 542
 543   public void testExactFileNames() throws IOException {
 544
 545     String outputDirName = "lucene.backwardscompat0.index";
 546     File outputDir = _TestUtil.getTempDir(outputDirName);
 547     _TestUtil.rmDir(outputDir);
 548
 549     try {
 550       Directory dir = newFSDirectory(outputDir);
 551
 552       LogMergePolicy mergePolicy = newLogMergePolicy(true, 10);
 553       mergePolicy.setNoCFSRatio(1); // This test expects all of its segments to be in CFS
 554       IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setMaxBufferedDocs(-1).setRAMBufferSizeMB(16.0)
 555         .setMergePolicy(mergePolicy);
 556       IndexWriter writer = new IndexWriter(dir, conf);
 557       for(int i=0;i<35;i++) {
 558         addDoc(writer, i);
 559       }
 560       assertEquals("wrong doc count", 35, writer.maxDoc());
 561       writer.close();
 562
 563       // Delete one doc so we get a .del file:
 564       IndexReader reader = IndexReader.open(dir, false);
 565       Term searchTerm = new Term("id", "7");
 566       int delCount = reader.deleteDocuments(searchTerm);
 567       assertEquals("didn't delete the right number of documents", 1, delCount);
 568
 569       // Set one norm so we get a .s0 file:
 570       reader.setNorm(21, "content", (float) 1.5);
 571       reader.close();
 572
 573       // The numbering of fields can vary depending on which
 574       // JRE is in use.  On some JREs we see content bound to
 575       // field 0; on others, field 1.  So, here we have to
 576       // figure out which field number corresponds to
 577       // "content", and then set our expected file names below
 578       // accordingly:
 579       CompoundFileReader cfsReader = new CompoundFileReader(dir, "_0.cfs");
 580       FieldInfos fieldInfos = new FieldInfos(cfsReader, "_0.fnm");
 581       int contentFieldIndex = -1;
 582       for(int i=0;i<fieldInfos.size();i++) {
 583         FieldInfo fi = fieldInfos.fieldInfo(i);
 584         if (fi.name.equals("content")) {
 585           contentFieldIndex = i;
 586           break;
 587         }
 588       }
 589       cfsReader.close();
 590       assertTrue("could not locate the 'content' field number in the _2.cfs segment", contentFieldIndex != -1);
 591
 592       // Now verify file names:
 593       String[] expected = new String[] {"_0.cfs",
 594                                "_0_1.del",
 595                                "_0_1.s" + contentFieldIndex,
 596                                "segments_2",
 597                                "segments.gen"};
 598
 599       String[] actual = dir.listAll();
 600       Arrays.sort(expected);
 601       Arrays.sort(actual);
 602       if (!Arrays.equals(expected, actual)) {
 603         fail("incorrect filenames in index: expected:\n    " + asString(expected) + "\n  actual:\n    " + asString(actual));
 604       }
 605       dir.close();
 606     } finally {
 607       _TestUtil.rmDir(outputDir);
 608     }
 609   }
 610
 611   private String asString(String[] l) {
 612     String s = "";
 613     for(int i=0;i<l.length;i++) {
 614       if (i > 0) {
 615         s += "\n    ";
 616       }
 617       s += l[i];
 618     }
 619     return s;
 620   }
 621
 622   private void addDoc(IndexWriter writer, int id) throws IOException
 623   {
 624     Document doc = new Document();
 625     doc.add(new Field("content", "aaa", Field.Store.NO, Field.Index.ANALYZED));
 626     doc.add(new Field("id", Integer.toString(id), Field.Store.YES, Field.Index.NOT_ANALYZED));
 627     doc.add(new Field("autf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
 628     doc.add(new Field("utf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
 629     doc.add(new Field("content2", "here is more content with aaa aaa aaa", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
 630     doc.add(new Field("fie\u2C77ld", "field with non-ascii name", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
 631     /* This was used in 2.9 to generate an index with compressed field:
 632     if (id % 2 == 0) {
 633       doc.add(new Field("compressed", TEXT_TO_COMPRESS, Field.Store.COMPRESS, Field.Index.NOT_ANALYZED));
 634       doc.add(new Field("compressedSize", Integer.toString(TEXT_COMPRESSED_LENGTH), Field.Store.YES, Field.Index.NOT_ANALYZED));
 635     } else {
 636       doc.add(new Field("compressed", BINARY_TO_COMPRESS, Field.Store.COMPRESS));
 637       doc.add(new Field("compressedSize", Integer.toString(BINARY_COMPRESSED_LENGTH), Field.Store.YES, Field.Index.NOT_ANALYZED));
 638     }
 639     */
 640     // add numeric fields, to test if later versions preserve encoding
 641     doc.add(new NumericField("trieInt", 4).setIntValue(id));
 642     doc.add(new NumericField("trieLong", 4).setLongValue(id));
 643     writer.addDocument(doc);
 644   }
 645
 646   private void addNoProxDoc(IndexWriter writer) throws IOException {
 647     Document doc = new Document();
 648     Field f = new Field("content3", "aaa", Field.Store.YES, Field.Index.ANALYZED);
 649     f.setIndexOptions(IndexOptions.DOCS_ONLY);
 650     doc.add(f);
 651     f = new Field("content4", "aaa", Field.Store.YES, Field.Index.NO);
 652     f.setIndexOptions(IndexOptions.DOCS_ONLY);
 653     doc.add(f);
 654     writer.addDocument(doc);
 655   }
 656
 657   static final String TEXT_TO_COMPRESS = "this is a compressed field and should appear in 3.0 as an uncompressed field after merge";
 658   // FieldSelectorResult.SIZE returns compressed size for compressed fields, which are internally handled as binary;
 659   // do it in the same way like FieldsWriter, do not use CompressionTools.compressString() for compressed fields:
 660   /* This was used in 2.9 to generate an index with compressed field:
 661   static final int TEXT_COMPRESSED_LENGTH;
 662   static {
 663     try {
 664       TEXT_COMPRESSED_LENGTH = CompressionTools.compress(TEXT_TO_COMPRESS.getBytes("UTF-8")).length;
 665     } catch (Exception e) {
 666       throw new RuntimeException();
 667     }
 668   }
 669   */
 670   static final byte[] BINARY_TO_COMPRESS = new byte[]{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20};
 671   /* This was used in 2.9 to generate an index with compressed field:
 672   static final int BINARY_COMPRESSED_LENGTH = CompressionTools.compress(BINARY_TO_COMPRESS).length;
 673   */
 674
 675   public void testNumericFields() throws Exception {
 676     for(int i=0;i<oldNames.length;i++) {
 677       // only test indexes >= 3.0
 678       if (oldNames[i].compareTo("30.") < 0) continue;
 679
 680       File oldIndexDir = _TestUtil.getTempDir(oldNames[i]);
 681       _TestUtil.unzip(getDataFile("index." + oldNames[i] + ".zip"), oldIndexDir);
 682       Directory dir = newFSDirectory(oldIndexDir);
 683       IndexReader reader = IndexReader.open(dir);
 684       IndexSearcher searcher = new IndexSearcher(reader);
 685
 686       for (int id=10; id<15; id++) {
 687         ScoreDoc[] hits = searcher.search(NumericRangeQuery.newIntRange("trieInt", 4, Integer.valueOf(id), Integer.valueOf(id), true, true), 100).scoreDocs;
 688         assertEquals("wrong number of hits", 1, hits.length);
 689         Document d = searcher.doc(hits[0].doc);
 690         assertEquals(String.valueOf(id), d.get("id"));
 691
 692         hits = searcher.search(NumericRangeQuery.newLongRange("trieLong", 4, Long.valueOf(id), Long.valueOf(id), true, true), 100).scoreDocs;
 693         assertEquals("wrong number of hits", 1, hits.length);
 694         d = searcher.doc(hits[0].doc);
 695         assertEquals(String.valueOf(id), d.get("id"));
 696       }
 697
 698       // check that also lower-precision fields are ok
 699       ScoreDoc[] hits = searcher.search(NumericRangeQuery.newIntRange("trieInt", 4, Integer.MIN_VALUE, Integer.MAX_VALUE, false, false), 100).scoreDocs;
 700       assertEquals("wrong number of hits", 34, hits.length);
 701
 702       hits = searcher.search(NumericRangeQuery.newLongRange("trieLong", 4, Long.MIN_VALUE, Long.MAX_VALUE, false, false), 100).scoreDocs;
 703       assertEquals("wrong number of hits", 34, hits.length);
 704
 705       // check decoding into field cache
 706       int[] fci = FieldCache.DEFAULT.getInts(searcher.getIndexReader(), "trieInt");
 707       for (int val : fci) {
 708         assertTrue("value in id bounds", val >= 0 && val < 35);
 709       }
 710
 711       long[] fcl = FieldCache.DEFAULT.getLongs(searcher.getIndexReader(), "trieLong");
 712       for (long val : fcl) {
 713         assertTrue("value in id bounds", val >= 0L && val < 35L);
 714       }
 715
 716       searcher.close();
 717       reader.close();
 718       dir.close();
 719       _TestUtil.rmDir(oldIndexDir);
 720     }
 721   }
 722
 723   private int checkAllSegmentsUpgraded(Directory dir) throws IOException {
 724     final SegmentInfos infos = new SegmentInfos();
 725     infos.read(dir);
 726     if (VERBOSE) {
 727       System.out.println("checkAllSegmentsUpgraded: " + infos);
 728     }
 729     for (SegmentInfo si : infos) {
 730       assertEquals(Constants.LUCENE_MAIN_VERSION, si.getVersion());
 731     }
 732     return infos.size();
 733   }
 734
 735   private int getNumberOfSegments(Directory dir) throws IOException {
 736     final SegmentInfos infos = new SegmentInfos();
 737     infos.read(dir);
 738     return infos.size();
 739   }
 740
 741   public void testUpgradeOldIndex() throws Exception {
 742     List<String> names = new ArrayList<String>(oldNames.length + oldSingleSegmentNames.length);
 743     names.addAll(Arrays.asList(oldNames));
 744     names.addAll(Arrays.asList(oldSingleSegmentNames));
 745     for(String name : names) {
 746       if (VERBOSE) {
 747         System.out.println("testUpgradeOldIndex: index=" +name);
 748       }
 749       File oldIndxeDir = _TestUtil.getTempDir(name);
 750       _TestUtil.unzip(getDataFile("index." + name + ".zip"), oldIndxeDir);
 751       Directory dir = newFSDirectory(oldIndxeDir);
 752
 753       new IndexUpgrader(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, null), VERBOSE ? System.out : null, false)
 754         .upgrade();
 755
 756       checkAllSegmentsUpgraded(dir);
 757
 758       dir.close();
 759       _TestUtil.rmDir(oldIndxeDir);
 760     }
 761   }
 762
 763   public void testUpgradeOldSingleSegmentIndexWithAdditions() throws Exception {
 764     for (String name : oldSingleSegmentNames) {
 765       if (VERBOSE) {
 766         System.out.println("testUpgradeOldSingleSegmentIndexWithAdditions: index=" +name);
 767       }
 768       File oldIndxeDir = _TestUtil.getTempDir(name);
 769       _TestUtil.unzip(getDataFile("index." + name + ".zip"), oldIndxeDir);
 770       Directory dir = newFSDirectory(oldIndxeDir);
 771
 772       assertEquals("Original index must be single segment", 1, getNumberOfSegments(dir));
 773
 774       // create a bunch of dummy segments
 775       int id = 40;
 776       RAMDirectory ramDir = new RAMDirectory();
 777       for (int i = 0; i < 3; i++) {
 778         // only use Log- or TieredMergePolicy, to make document addition predictable and not suddenly merge:
 779         MergePolicy mp = random.nextBoolean() ? newLogMergePolicy() : newTieredMergePolicy();
 780         IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT))
 781           .setMergePolicy(mp);
 782         IndexWriter w = new IndexWriter(ramDir, iwc);
 783         // add few more docs:
 784         for(int j = 0; j < RANDOM_MULTIPLIER * random.nextInt(30); j++) {
 785           addDoc(w, id++);
 786         }
 787         w.close(false);
 788       }
 789
 790       // add dummy segments (which are all in current
 791       // version) to single segment index
 792       MergePolicy mp = random.nextBoolean() ? newLogMergePolicy() : newTieredMergePolicy();
 793       IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, null)
 794         .setMergePolicy(mp);
 795       IndexWriter w = new IndexWriter(dir, iwc);
 796       w.setInfoStream(VERBOSE ? System.out : null);
 797       w.addIndexes(ramDir);
 798       w.close(false);
 799
 800       // determine count of segments in modified index
 801       final int origSegCount = getNumberOfSegments(dir);
 802
 803       new IndexUpgrader(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, null), VERBOSE ? System.out : null, false)
 804         .upgrade();
 805
 806       final int segCount = checkAllSegmentsUpgraded(dir);
 807       assertEquals("Index must still contain the same number of segments, as only one segment was upgraded and nothing else merged",
 808         origSegCount, segCount);
 809
 810       dir.close();
 811       _TestUtil.rmDir(oldIndxeDir);
 812     }
 813   }
 814
 815 }