lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java

   1 package org.apache.lucene.index;
   2
   3 /**
   4  * Licensed to the Apache Software Foundation (ASF) under one or more
   5  * contributor license agreements.  See the NOTICE file distributed with
   6  * this work for additional information regarding copyright ownership.
   7  * The ASF licenses this file to You under the Apache License, Version 2.0
   8  * (the "License"); you may not use this file except in compliance with
   9  * the License.  You may obtain a copy of the License at
  10  *
  11  *     http://www.apache.org/licenses/LICENSE-2.0
  12  *
  13  * Unless required by applicable law or agreed to in writing, software
  14  * distributed under the License is distributed on an "AS IS" BASIS,
  15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16  * See the License for the specific language governing permissions and
  17  * limitations under the License.
  18  */
  19
  20 import java.io.File;
  21 import java.io.IOException;
  22 import java.io.ByteArrayInputStream;
  23 import java.io.DataInputStream;
  24 import java.util.Arrays;
  25 import java.util.ArrayList;
  26 import java.util.List;
  27 import java.util.ArrayList;
  28 import java.util.Random;
  29
  30 import org.apache.lucene.analysis.WhitespaceAnalyzer;
  31 import org.apache.lucene.document.Document;
  32 import org.apache.lucene.document.Field;
  33 import org.apache.lucene.document.Fieldable;
  34 import org.apache.lucene.document.FieldSelector;
  35 import org.apache.lucene.document.FieldSelectorResult;
  36 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
  37 import org.apache.lucene.document.NumericField;
  38 import org.apache.lucene.search.FieldCache;
  39 import org.apache.lucene.search.IndexSearcher;
  40 import org.apache.lucene.search.ScoreDoc;
  41 import org.apache.lucene.search.TermQuery;
  42 import org.apache.lucene.search.NumericRangeQuery;
  43 import org.apache.lucene.store.Directory;
  44 import org.apache.lucene.store.RAMDirectory;
  45 import org.apache.lucene.util.ReaderUtil;
  46 import org.apache.lucene.util.LuceneTestCase;
  47 import org.apache.lucene.util._TestUtil;
  48 import org.apache.lucene.util.Constants;
  49
  50 /*
  51   Verify we can read the pre-2.1 file format, do searches
  52   against it, and add documents to it.
  53 */
  54
  55 public class TestBackwardsCompatibility extends LuceneTestCase {
  56
  57   // Uncomment these cases & run them on an older Lucene
  58   // version, to generate an index to test backwards
  59   // compatibility.  Then, cd to build/test/index.cfs and
  60   // run "zip index.<VERSION>.cfs.zip *"; cd to
  61   // build/test/index.nocfs and run "zip
  62   // index.<VERSION>.nocfs.zip *".  Then move those 2 zip
  63   // files to your trunk checkout and add them to the
  64   // oldNames array.
  65
  66   /*
  67   public void testCreateCFS() throws IOException {
  68     createIndex("index.cfs", true, false);
  69   }
  70
  71   public void testCreateNoCFS() throws IOException {
  72     createIndex("index.nocfs", false, false);
  73   }
  74   */
  75
  76   /*
  77   // These are only needed for the special upgrade test to verify
  78   // that also optimized indexes are correctly upgraded by IndexUpgrader.
  79   // You don't need them to be build for non-3.1 (the test is happy with just one
  80   // "old" segment format, version is unimportant:
  81
  82   public void testCreateOptimizedCFS() throws IOException {
  83     createIndex("index.optimized.cfs", true, true);
  84   }
  85
  86   public void testCreateOptimizedNoCFS() throws IOException {
  87     createIndex("index.optimized.nocfs", false, true);
  88   }
  89   */
  90
  91   final String[] oldNames = {"19.cfs",
  92                              "19.nocfs",
  93                              "20.cfs",
  94                              "20.nocfs",
  95                              "21.cfs",
  96                              "21.nocfs",
  97                              "22.cfs",
  98                              "22.nocfs",
  99                              "23.cfs",
 100                              "23.nocfs",
 101                              "24.cfs",
 102                              "24.nocfs",
 103                              "29.cfs",
 104                              "29.nocfs",
 105                              "30.cfs",
 106                              "30.nocfs",
 107                              "31.cfs",
 108                              "31.nocfs",
 109   };
 110
 111   final String[] oldOptimizedNames = {"31.optimized.cfs",
 112                                       "31.optimized.nocfs",
 113   };
 114
 115   private void assertCompressedFields29(Directory dir, boolean shouldStillBeCompressed) throws IOException {
 116     int count = 0;
 117     final int TEXT_PLAIN_LENGTH = TEXT_TO_COMPRESS.length() * 2;
 118     // FieldSelectorResult.SIZE returns 2*number_of_chars for String fields:
 119     final int BINARY_PLAIN_LENGTH = BINARY_TO_COMPRESS.length;
 120
 121     IndexReader reader = IndexReader.open(dir, true);
 122     try {
 123       // look into sub readers and check if raw merge is on/off
 124       List<IndexReader> readers = new ArrayList<IndexReader>();
 125       ReaderUtil.gatherSubReaders(readers, reader);
 126       for (IndexReader ir : readers) {
 127         final FieldsReader fr = ((SegmentReader) ir).getFieldsReader();
 128         assertTrue("for a 2.9 index, FieldsReader.canReadRawDocs() must be false and other way round for a trunk index",
 129           shouldStillBeCompressed != fr.canReadRawDocs());
 130       }
 131
 132       // test that decompression works correctly
 133       for(int i=0; i<reader.maxDoc(); i++) {
 134         if (!reader.isDeleted(i)) {
 135           Document d = reader.document(i);
 136           if (d.get("content3") != null) continue;
 137           count++;
 138           Fieldable compressed = d.getFieldable("compressed");
 139           if (Integer.parseInt(d.get("id")) % 2 == 0) {
 140             assertFalse(compressed.isBinary());
 141             assertEquals("incorrectly decompressed string", TEXT_TO_COMPRESS, compressed.stringValue());
 142           } else {
 143             assertTrue(compressed.isBinary());
 144             assertTrue("incorrectly decompressed binary", Arrays.equals(BINARY_TO_COMPRESS, compressed.getBinaryValue()));
 145           }
 146         }
 147       }
 148
 149       // check if field was decompressed after optimize
 150       for(int i=0; i<reader.maxDoc(); i++) {
 151         if (!reader.isDeleted(i)) {
 152           Document d = reader.document(i, new FieldSelector() {
 153             public FieldSelectorResult accept(String fieldName) {
 154               return ("compressed".equals(fieldName)) ? FieldSelectorResult.SIZE : FieldSelectorResult.LOAD;
 155             }
 156           });
 157           if (d.get("content3") != null) continue;
 158           count++;
 159           // read the size from the binary value using DataInputStream (this prevents us from doing the shift ops ourselves):
 160           final DataInputStream ds = new DataInputStream(new ByteArrayInputStream(d.getFieldable("compressed").getBinaryValue()));
 161           final int actualSize = ds.readInt();
 162           ds.close();
 163           final int compressedSize = Integer.parseInt(d.get("compressedSize"));
 164           final boolean binary = Integer.parseInt(d.get("id")) % 2 > 0;
 165           final int shouldSize = shouldStillBeCompressed ?
 166             compressedSize :
 167             (binary ? BINARY_PLAIN_LENGTH : TEXT_PLAIN_LENGTH);
 168           assertEquals("size incorrect", shouldSize, actualSize);
 169           if (!shouldStillBeCompressed) {
 170             assertFalse("uncompressed field should have another size than recorded in index", compressedSize == actualSize);
 171           }
 172         }
 173       }
 174       assertEquals("correct number of tests", 34 * 2, count);
 175     } finally {
 176       reader.close();
 177     }
 178   }
 179
 180   public void testUpgrade29Compression() throws IOException {
 181     int hasTested29 = 0;
 182
 183     for(int i=0;i<oldNames.length;i++) {
 184       File oldIndxeDir = _TestUtil.getTempDir(oldNames[i]);
 185       _TestUtil.unzip(getDataFile("index." + oldNames[i] + ".zip"), oldIndxeDir);
 186       Directory dir = newFSDirectory(oldIndxeDir);
 187
 188       if (oldNames[i].startsWith("29.")) {
 189         assertCompressedFields29(dir, true);
 190         hasTested29++;
 191       }
 192
 193       new IndexUpgrader(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, null), VERBOSE ? System.out : null, false)
 194         .upgrade();
 195
 196       if (oldNames[i].startsWith("29.")) {
 197         assertCompressedFields29(dir, false);
 198         hasTested29++;
 199       }
 200
 201       dir.close();
 202       _TestUtil.rmDir(oldIndxeDir);
 203     }
 204
 205     assertEquals("test for compressed field should have run 4 times", 4, hasTested29);
 206   }
 207
 208   public void testAddOldIndexes() throws IOException {
 209     for (String name : oldNames) {
 210       File oldIndxeDir = _TestUtil.getTempDir(name);
 211       _TestUtil.unzip(getDataFile("index." + name + ".zip"), oldIndxeDir);
 212       Directory dir = newFSDirectory(oldIndxeDir);
 213
 214       Directory targetDir = newDirectory();
 215       IndexWriter w = new IndexWriter(targetDir, newIndexWriterConfig(
 216           TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)));
 217       w.addIndexes(new Directory[] { dir });
 218       w.close();
 219
 220       dir.close();
 221       targetDir.close();
 222       _TestUtil.rmDir(oldIndxeDir);
 223     }
 224   }
 225
 226   public void testAddOldIndexesReader() throws IOException {
 227     for (String name : oldNames) {
 228       File oldIndxeDir = _TestUtil.getTempDir(name);
 229       _TestUtil.unzip(getDataFile("index." + name + ".zip"), oldIndxeDir);
 230       Directory dir = newFSDirectory(oldIndxeDir);
 231       IndexReader reader = IndexReader.open(dir);
 232
 233       Directory targetDir = newDirectory();
 234       IndexWriter w = new IndexWriter(targetDir, newIndexWriterConfig(
 235           TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)));
 236       w.addIndexes(new IndexReader[] { reader });
 237       w.close();
 238       reader.close();
 239
 240       dir.close();
 241       targetDir.close();
 242       _TestUtil.rmDir(oldIndxeDir);
 243     }
 244   }
 245
 246   public void testSearchOldIndex() throws IOException {
 247     for(int i=0;i<oldNames.length;i++) {
 248       File oldIndxeDir = _TestUtil.getTempDir(oldNames[i]);
 249       _TestUtil.unzip(getDataFile("index." + oldNames[i] + ".zip"), oldIndxeDir);
 250       searchIndex(oldIndxeDir, oldNames[i]);
 251       _TestUtil.rmDir(oldIndxeDir);
 252     }
 253   }
 254
 255   public void testIndexOldIndexNoAdds() throws IOException {
 256     for(int i=0;i<oldNames.length;i++) {
 257       File oldIndxeDir = _TestUtil.getTempDir(oldNames[i]);
 258       _TestUtil.unzip(getDataFile("index." + oldNames[i] + ".zip"), oldIndxeDir);
 259       changeIndexNoAdds(random, oldIndxeDir);
 260       _TestUtil.rmDir(oldIndxeDir);
 261     }
 262   }
 263
 264   public void testIndexOldIndex() throws IOException {
 265     for(int i=0;i<oldNames.length;i++) {
 266       if (VERBOSE) {
 267         System.out.println("TEST: oldName=" + oldNames[i]);
 268       }
 269       File oldIndxeDir = _TestUtil.getTempDir(oldNames[i]);
 270       _TestUtil.unzip(getDataFile("index." + oldNames[i] + ".zip"), oldIndxeDir);
 271       changeIndexWithAdds(random, oldIndxeDir, oldNames[i]);
 272       _TestUtil.rmDir(oldIndxeDir);
 273     }
 274   }
 275
 276   private void testHits(ScoreDoc[] hits, int expectedCount, IndexReader reader) throws IOException {
 277     final int hitCount = hits.length;
 278     assertEquals("wrong number of hits", expectedCount, hitCount);
 279     for(int i=0;i<hitCount;i++) {
 280       reader.document(hits[i].doc);
 281       reader.getTermFreqVectors(hits[i].doc);
 282     }
 283   }
 284
 285   public void searchIndex(File indexDir, String oldName) throws IOException {
 286     //QueryParser parser = new QueryParser("contents", new WhitespaceAnalyzer(TEST_VERSION_CURRENT));
 287     //Query query = parser.parse("handle:1");
 288
 289     Directory dir = newFSDirectory(indexDir);
 290     IndexSearcher searcher = new IndexSearcher(dir, true);
 291     IndexReader reader = searcher.getIndexReader();
 292
 293     _TestUtil.checkIndex(dir);
 294
 295     for(int i=0;i<35;i++) {
 296       if (!reader.isDeleted(i)) {
 297         Document d = reader.document(i);
 298         List<Fieldable> fields = d.getFields();
 299         if (!oldName.startsWith("19.") &&
 300             !oldName.startsWith("20.") &&
 301             !oldName.startsWith("21.") &&
 302             !oldName.startsWith("22.")) {
 303
 304           if (d.getField("content3") == null) {
 305             final int numFields = oldName.startsWith("29.") ? 7 : 5;
 306             assertEquals(numFields, fields.size());
 307             Field f =  d.getField("id");
 308             assertEquals(""+i, f.stringValue());
 309
 310             f = d.getField("utf8");
 311             assertEquals("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.stringValue());
 312
 313             f =  d.getField("autf8");
 314             assertEquals("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.stringValue());
 315
 316             f = d.getField("content2");
 317             assertEquals("here is more content with aaa aaa aaa", f.stringValue());
 318
 319             f = d.getField("fie\u2C77ld");
 320             assertEquals("field with non-ascii name", f.stringValue());
 321           }
 322
 323           TermFreqVector tfv = reader.getTermFreqVector(i, "utf8");
 324           assertNotNull("docID=" + i + " index=" + indexDir.getName(), tfv);
 325           assertTrue(tfv instanceof TermPositionVector);
 326         }
 327       } else
 328         // Only ID 7 is deleted
 329         assertEquals(7, i);
 330     }
 331
 332     ScoreDoc[] hits = searcher.search(new TermQuery(new Term("content", "aaa")), null, 1000).scoreDocs;
 333
 334     // First document should be #21 since it's norm was
 335     // increased:
 336     Document d = searcher.doc(hits[0].doc);
 337     assertEquals("didn't get the right document first", "21", d.get("id"));
 338
 339     testHits(hits, 34, searcher.getIndexReader());
 340
 341     if (!oldName.startsWith("19.") &&
 342         !oldName.startsWith("20.") &&
 343         !oldName.startsWith("21.") &&
 344         !oldName.startsWith("22.")) {
 345       // Test on indices >= 2.3
 346       hits = searcher.search(new TermQuery(new Term("utf8", "\u0000")), null, 1000).scoreDocs;
 347       assertEquals(34, hits.length);
 348       hits = searcher.search(new TermQuery(new Term("utf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne")), null, 1000).scoreDocs;
 349       assertEquals(34, hits.length);
 350       hits = searcher.search(new TermQuery(new Term("utf8", "ab\ud917\udc17cd")), null, 1000).scoreDocs;
 351       assertEquals(34, hits.length);
 352     }
 353
 354     searcher.close();
 355     dir.close();
 356   }
 357
 358   private int compare(String name, String v) {
 359     int v0 = Integer.parseInt(name.substring(0, 2));
 360     int v1 = Integer.parseInt(v);
 361     return v0 - v1;
 362   }
 363
 364   /* Open pre-lockless index, add docs, do a delete &
 365    * setNorm, and search */
 366   public void changeIndexWithAdds(Random random, File oldIndexDir, String origOldName) throws IOException {
 367     Directory dir = newFSDirectory(oldIndexDir);
 368     // open writer
 369     IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setOpenMode(OpenMode.APPEND));
 370     writer.setInfoStream(VERBOSE ? System.out : null);
 371     // add 10 docs
 372     for(int i=0;i<10;i++) {
 373       addDoc(writer, 35+i);
 374     }
 375
 376     // make sure writer sees right total -- writer seems not to know about deletes in .del?
 377     final int expected;
 378     if (compare(origOldName, "24") < 0) {
 379       expected = 44;
 380     } else {
 381       expected = 45;
 382     }
 383     assertEquals("wrong doc count", expected, writer.numDocs());
 384     writer.close();
 385
 386     // make sure searching sees right # hits
 387     IndexSearcher searcher = new IndexSearcher(dir, true);
 388     ScoreDoc[] hits = searcher.search(new TermQuery(new Term("content", "aaa")), null, 1000).scoreDocs;
 389     Document d = searcher.doc(hits[0].doc);
 390     assertEquals("wrong first document", "21", d.get("id"));
 391     testHits(hits, 44, searcher.getIndexReader());
 392     searcher.close();
 393
 394     // make sure we can do delete & setNorm against this
 395     // pre-lockless segment:
 396     IndexReader reader = IndexReader.open(dir, false);
 397     searcher = newSearcher(reader);
 398     Term searchTerm = new Term("id", "6");
 399     int delCount = reader.deleteDocuments(searchTerm);
 400     assertEquals("wrong delete count", 1, delCount);
 401     reader.setNorm(searcher.search(new TermQuery(new Term("id", "22")), 10).scoreDocs[0].doc, "content", (float) 2.0);
 402     reader.close();
 403     searcher.close();
 404
 405     // make sure they "took":
 406     searcher = new IndexSearcher(dir, true);
 407     hits = searcher.search(new TermQuery(new Term("content", "aaa")), null, 1000).scoreDocs;
 408     assertEquals("wrong number of hits", 43, hits.length);
 409     d = searcher.doc(hits[0].doc);
 410     assertEquals("wrong first document", "22", d.get("id"));
 411     testHits(hits, 43, searcher.getIndexReader());
 412     searcher.close();
 413
 414     // optimize
 415     writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setOpenMode(OpenMode.APPEND));
 416     writer.optimize();
 417     writer.close();
 418
 419     searcher = new IndexSearcher(dir, true);
 420     hits = searcher.search(new TermQuery(new Term("content", "aaa")), null, 1000).scoreDocs;
 421     assertEquals("wrong number of hits", 43, hits.length);
 422     d = searcher.doc(hits[0].doc);
 423     testHits(hits, 43, searcher.getIndexReader());
 424     assertEquals("wrong first document", "22", d.get("id"));
 425     searcher.close();
 426
 427     dir.close();
 428   }
 429
 430   /* Open pre-lockless index, add docs, do a delete &
 431    * setNorm, and search */
 432   public void changeIndexNoAdds(Random random, File oldIndexDir) throws IOException {
 433
 434     Directory dir = newFSDirectory(oldIndexDir);
 435
 436     // make sure searching sees right # hits
 437     IndexSearcher searcher = new IndexSearcher(dir, true);
 438     ScoreDoc[] hits = searcher.search(new TermQuery(new Term("content", "aaa")), null, 1000).scoreDocs;
 439     assertEquals("wrong number of hits", 34, hits.length);
 440     Document d = searcher.doc(hits[0].doc);
 441     assertEquals("wrong first document", "21", d.get("id"));
 442     searcher.close();
 443
 444     // make sure we can do a delete & setNorm against this
 445     // pre-lockless segment:
 446     IndexReader reader = IndexReader.open(dir, false);
 447     Term searchTerm = new Term("id", "6");
 448     int delCount = reader.deleteDocuments(searchTerm);
 449     assertEquals("wrong delete count", 1, delCount);
 450     reader.setNorm(22, "content", (float) 2.0);
 451     reader.close();
 452
 453     // make sure they "took":
 454     searcher = new IndexSearcher(dir, true);
 455     hits = searcher.search(new TermQuery(new Term("content", "aaa")), null, 1000).scoreDocs;
 456     assertEquals("wrong number of hits", 33, hits.length);
 457     d = searcher.doc(hits[0].doc);
 458     assertEquals("wrong first document", "22", d.get("id"));
 459     testHits(hits, 33, searcher.getIndexReader());
 460     searcher.close();
 461
 462     // optimize
 463     IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setOpenMode(OpenMode.APPEND));
 464     writer.optimize();
 465     writer.close();
 466
 467     searcher = new IndexSearcher(dir, true);
 468     hits = searcher.search(new TermQuery(new Term("content", "aaa")), null, 1000).scoreDocs;
 469     assertEquals("wrong number of hits", 33, hits.length);
 470     d = searcher.doc(hits[0].doc);
 471     assertEquals("wrong first document", "22", d.get("id"));
 472     testHits(hits, 33, searcher.getIndexReader());
 473     searcher.close();
 474
 475     dir.close();
 476   }
 477
 478   public File createIndex(String dirName, boolean doCFS, boolean optimized) throws IOException {
 479     // we use a real directory name that is not cleaned up, because this method is only used to create backwards indexes:
 480     File indexDir = new File(LuceneTestCase.TEMP_DIR, dirName);
 481     _TestUtil.rmDir(indexDir);
 482     Directory dir = newFSDirectory(indexDir);
 483     IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setMaxBufferedDocs(10);
 484     ((LogMergePolicy) conf.getMergePolicy()).setUseCompoundFile(doCFS);
 485     if (doCFS) {
 486       ((LogMergePolicy) conf.getMergePolicy()).setNoCFSRatio(1.0);
 487     }
 488     IndexWriter writer = new IndexWriter(dir, conf);
 489
 490     for(int i=0;i<35;i++) {
 491       addDoc(writer, i);
 492     }
 493     assertEquals("wrong doc count", 35, writer.maxDoc());
 494     if (optimized) {
 495       writer.optimize();
 496     }
 497     writer.close();
 498
 499     if (!optimized) {
 500       // open fresh writer so we get no prx file in the added segment
 501       conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setMaxBufferedDocs(10);
 502       ((LogMergePolicy) conf.getMergePolicy()).setUseCompoundFile(doCFS);
 503       writer = new IndexWriter(dir, conf);
 504       addNoProxDoc(writer);
 505       writer.close();
 506
 507       // Delete one doc so we get a .del file:
 508       IndexReader reader = IndexReader.open(dir, false);
 509       Term searchTerm = new Term("id", "7");
 510       int delCount = reader.deleteDocuments(searchTerm);
 511       assertEquals("didn't delete the right number of documents", 1, delCount);
 512
 513       // Set one norm so we get a .s0 file:
 514       reader.setNorm(21, "content", (float) 1.5);
 515       reader.close();
 516     }
 517
 518     dir.close();
 519
 520     return indexDir;
 521   }
 522
 523   /* Verifies that the expected file names were produced */
 524
 525   public void testExactFileNames() throws IOException {
 526
 527     String outputDirName = "lucene.backwardscompat0.index";
 528     File outputDir = _TestUtil.getTempDir(outputDirName);
 529     _TestUtil.rmDir(outputDir);
 530
 531     try {
 532       Directory dir = newFSDirectory(outputDir);
 533
 534       LogMergePolicy mergePolicy = newLogMergePolicy(true, 10);
 535       mergePolicy.setNoCFSRatio(1); // This test expects all of its segments to be in CFS
 536       IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setMaxBufferedDocs(-1).setRAMBufferSizeMB(16.0)
 537         .setMergePolicy(mergePolicy);
 538       IndexWriter writer = new IndexWriter(dir, conf);
 539       for(int i=0;i<35;i++) {
 540         addDoc(writer, i);
 541       }
 542       assertEquals("wrong doc count", 35, writer.maxDoc());
 543       writer.close();
 544
 545       // Delete one doc so we get a .del file:
 546       IndexReader reader = IndexReader.open(dir, false);
 547       Term searchTerm = new Term("id", "7");
 548       int delCount = reader.deleteDocuments(searchTerm);
 549       assertEquals("didn't delete the right number of documents", 1, delCount);
 550
 551       // Set one norm so we get a .s0 file:
 552       reader.setNorm(21, "content", (float) 1.5);
 553       reader.close();
 554
 555       // The numbering of fields can vary depending on which
 556       // JRE is in use.  On some JREs we see content bound to
 557       // field 0; on others, field 1.  So, here we have to
 558       // figure out which field number corresponds to
 559       // "content", and then set our expected file names below
 560       // accordingly:
 561       CompoundFileReader cfsReader = new CompoundFileReader(dir, "_0.cfs");
 562       FieldInfos fieldInfos = new FieldInfos(cfsReader, "_0.fnm");
 563       int contentFieldIndex = -1;
 564       for(int i=0;i<fieldInfos.size();i++) {
 565         FieldInfo fi = fieldInfos.fieldInfo(i);
 566         if (fi.name.equals("content")) {
 567           contentFieldIndex = i;
 568           break;
 569         }
 570       }
 571       cfsReader.close();
 572       assertTrue("could not locate the 'content' field number in the _2.cfs segment", contentFieldIndex != -1);
 573
 574       // Now verify file names:
 575       String[] expected = new String[] {"_0.cfs",
 576                                "_0_1.del",
 577                                "_0_1.s" + contentFieldIndex,
 578                                "segments_2",
 579                                "segments.gen"};
 580
 581       String[] actual = dir.listAll();
 582       Arrays.sort(expected);
 583       Arrays.sort(actual);
 584       if (!Arrays.equals(expected, actual)) {
 585         fail("incorrect filenames in index: expected:\n    " + asString(expected) + "\n  actual:\n    " + asString(actual));
 586       }
 587       dir.close();
 588     } finally {
 589       _TestUtil.rmDir(outputDir);
 590     }
 591   }
 592
 593   private String asString(String[] l) {
 594     String s = "";
 595     for(int i=0;i<l.length;i++) {
 596       if (i > 0) {
 597         s += "\n    ";
 598       }
 599       s += l[i];
 600     }
 601     return s;
 602   }
 603
 604   private void addDoc(IndexWriter writer, int id) throws IOException
 605   {
 606     Document doc = new Document();
 607     doc.add(new Field("content", "aaa", Field.Store.NO, Field.Index.ANALYZED));
 608     doc.add(new Field("id", Integer.toString(id), Field.Store.YES, Field.Index.NOT_ANALYZED));
 609     doc.add(new Field("autf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
 610     doc.add(new Field("utf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
 611     doc.add(new Field("content2", "here is more content with aaa aaa aaa", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
 612     doc.add(new Field("fie\u2C77ld", "field with non-ascii name", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
 613     /* This was used in 2.9 to generate an index with compressed field:
 614     if (id % 2 == 0) {
 615       doc.add(new Field("compressed", TEXT_TO_COMPRESS, Field.Store.COMPRESS, Field.Index.NOT_ANALYZED));
 616       doc.add(new Field("compressedSize", Integer.toString(TEXT_COMPRESSED_LENGTH), Field.Store.YES, Field.Index.NOT_ANALYZED));
 617     } else {
 618       doc.add(new Field("compressed", BINARY_TO_COMPRESS, Field.Store.COMPRESS));
 619       doc.add(new Field("compressedSize", Integer.toString(BINARY_COMPRESSED_LENGTH), Field.Store.YES, Field.Index.NOT_ANALYZED));
 620     }
 621     */
 622     // add numeric fields, to test if later versions preserve encoding
 623     doc.add(new NumericField("trieInt", 4).setIntValue(id));
 624     doc.add(new NumericField("trieLong", 4).setLongValue(id));
 625     writer.addDocument(doc);
 626   }
 627
 628   private void addNoProxDoc(IndexWriter writer) throws IOException {
 629     Document doc = new Document();
 630     Field f = new Field("content3", "aaa", Field.Store.YES, Field.Index.ANALYZED);
 631     f.setOmitTermFreqAndPositions(true);
 632     doc.add(f);
 633     f = new Field("content4", "aaa", Field.Store.YES, Field.Index.NO);
 634     f.setOmitTermFreqAndPositions(true);
 635     doc.add(f);
 636     writer.addDocument(doc);
 637   }
 638
 639   static final String TEXT_TO_COMPRESS = "this is a compressed field and should appear in 3.0 as an uncompressed field after merge";
 640   // FieldSelectorResult.SIZE returns compressed size for compressed fields, which are internally handled as binary;
 641   // do it in the same way like FieldsWriter, do not use CompressionTools.compressString() for compressed fields:
 642   /* This was used in 2.9 to generate an index with compressed field:
 643   static final int TEXT_COMPRESSED_LENGTH;
 644   static {
 645     try {
 646       TEXT_COMPRESSED_LENGTH = CompressionTools.compress(TEXT_TO_COMPRESS.getBytes("UTF-8")).length;
 647     } catch (Exception e) {
 648       throw new RuntimeException();
 649     }
 650   }
 651   */
 652   static final byte[] BINARY_TO_COMPRESS = new byte[]{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20};
 653   /* This was used in 2.9 to generate an index with compressed field:
 654   static final int BINARY_COMPRESSED_LENGTH = CompressionTools.compress(BINARY_TO_COMPRESS).length;
 655   */
 656
 657   public void testNumericFields() throws Exception {
 658     for(int i=0;i<oldNames.length;i++) {
 659       // only test indexes >= 3.0
 660       if (oldNames[i].compareTo("30.") < 0) continue;
 661
 662       File oldIndexDir = _TestUtil.getTempDir(oldNames[i]);
 663       _TestUtil.unzip(getDataFile("index." + oldNames[i] + ".zip"), oldIndexDir);
 664       Directory dir = newFSDirectory(oldIndexDir);
 665       IndexSearcher searcher = new IndexSearcher(dir, true);
 666
 667       for (int id=10; id<15; id++) {
 668         ScoreDoc[] hits = searcher.search(NumericRangeQuery.newIntRange("trieInt", 4, Integer.valueOf(id), Integer.valueOf(id), true, true), 100).scoreDocs;
 669         assertEquals("wrong number of hits", 1, hits.length);
 670         Document d = searcher.doc(hits[0].doc);
 671         assertEquals(String.valueOf(id), d.get("id"));
 672
 673         hits = searcher.search(NumericRangeQuery.newLongRange("trieLong", 4, Long.valueOf(id), Long.valueOf(id), true, true), 100).scoreDocs;
 674         assertEquals("wrong number of hits", 1, hits.length);
 675         d = searcher.doc(hits[0].doc);
 676         assertEquals(String.valueOf(id), d.get("id"));
 677       }
 678
 679       // check that also lower-precision fields are ok
 680       ScoreDoc[] hits = searcher.search(NumericRangeQuery.newIntRange("trieInt", 4, Integer.MIN_VALUE, Integer.MAX_VALUE, false, false), 100).scoreDocs;
 681       assertEquals("wrong number of hits", 34, hits.length);
 682
 683       hits = searcher.search(NumericRangeQuery.newLongRange("trieLong", 4, Long.MIN_VALUE, Long.MAX_VALUE, false, false), 100).scoreDocs;
 684       assertEquals("wrong number of hits", 34, hits.length);
 685
 686       // check decoding into field cache
 687       int[] fci = FieldCache.DEFAULT.getInts(searcher.getIndexReader(), "trieInt");
 688       for (int val : fci) {
 689         assertTrue("value in id bounds", val >= 0 && val < 35);
 690       }
 691
 692       long[] fcl = FieldCache.DEFAULT.getLongs(searcher.getIndexReader(), "trieLong");
 693       for (long val : fcl) {
 694         assertTrue("value in id bounds", val >= 0L && val < 35L);
 695       }
 696
 697       searcher.close();
 698       dir.close();
 699       _TestUtil.rmDir(oldIndexDir);
 700     }
 701   }
 702
 703   private int checkAllSegmentsUpgraded(Directory dir) throws IOException {
 704     final SegmentInfos infos = new SegmentInfos();
 705     infos.read(dir);
 706     if (VERBOSE) {
 707       System.out.println("checkAllSegmentsUpgraded: " + infos);
 708     }
 709     for (SegmentInfo si : infos) {
 710       assertEquals(Constants.LUCENE_MAIN_VERSION, si.getVersion());
 711     }
 712     return infos.size();
 713   }
 714
 715   private int getNumberOfSegments(Directory dir) throws IOException {
 716     final SegmentInfos infos = new SegmentInfos();
 717     infos.read(dir);
 718     return infos.size();
 719   }
 720
 721   public void testUpgradeOldIndex() throws Exception {
 722     List<String> names = new ArrayList<String>(oldNames.length + oldOptimizedNames.length);
 723     names.addAll(Arrays.asList(oldNames));
 724     names.addAll(Arrays.asList(oldOptimizedNames));
 725     for(String name : names) {
 726       if (VERBOSE) {
 727         System.out.println("testUpgradeOldIndex: index=" +name);
 728       }
 729       File oldIndxeDir = _TestUtil.getTempDir(name);
 730       _TestUtil.unzip(getDataFile("index." + name + ".zip"), oldIndxeDir);
 731       Directory dir = newFSDirectory(oldIndxeDir);
 732
 733       new IndexUpgrader(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, null), VERBOSE ? System.out : null, false)
 734         .upgrade();
 735
 736       checkAllSegmentsUpgraded(dir);
 737
 738       dir.close();
 739       _TestUtil.rmDir(oldIndxeDir);
 740     }
 741   }
 742
 743   public void testUpgradeOldOptimizedIndexWithAdditions() throws Exception {
 744     for (String name : oldOptimizedNames) {
 745       if (VERBOSE) {
 746         System.out.println("testUpgradeOldOptimizedIndexWithAdditions: index=" +name);
 747       }
 748       File oldIndxeDir = _TestUtil.getTempDir(name);
 749       _TestUtil.unzip(getDataFile("index." + name + ".zip"), oldIndxeDir);
 750       Directory dir = newFSDirectory(oldIndxeDir);
 751
 752       assertEquals("Original index must be optimized", 1, getNumberOfSegments(dir));
 753
 754       // create a bunch of dummy segments
 755       int id = 40;
 756       RAMDirectory ramDir = new RAMDirectory();
 757       for (int i = 0; i < 3; i++) {
 758         // only use Log- or TieredMergePolicy, to make document addition predictable and not suddenly merge:
 759         MergePolicy mp = random.nextBoolean() ? newLogMergePolicy() : newTieredMergePolicy();
 760         IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT))
 761           .setMergePolicy(mp);
 762         IndexWriter w = new IndexWriter(ramDir, iwc);
 763         // add few more docs:
 764         for(int j = 0; j < RANDOM_MULTIPLIER * random.nextInt(30); j++) {
 765           addDoc(w, id++);
 766         }
 767         w.close(false);
 768       }
 769
 770       // add dummy segments (which are all in current version) to optimized index
 771       MergePolicy mp = random.nextBoolean() ? newLogMergePolicy() : newTieredMergePolicy();
 772       IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, null)
 773         .setMergePolicy(mp);
 774       IndexWriter w = new IndexWriter(dir, iwc);
 775       w.setInfoStream(VERBOSE ? System.out : null);
 776       w.addIndexes(ramDir);
 777       w.close(false);
 778
 779       // determine count of segments in modified index
 780       final int origSegCount = getNumberOfSegments(dir);
 781
 782       new IndexUpgrader(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, null), VERBOSE ? System.out : null, false)
 783         .upgrade();
 784
 785       final int segCount = checkAllSegmentsUpgraded(dir);
 786       assertEquals("Index must still contain the same number of segments, as only one segment was upgraded and nothing else merged",
 787         origSegCount, segCount);
 788
 789       dir.close();
 790       _TestUtil.rmDir(oldIndxeDir);
 791     }
 792   }
 793
 794 }