lucene-java-3.4.0/lucene/src/test/org/apache/lucene/index/Test2BPostings.java

   1 package org.apache.lucene.index;
   2
   3 /**
   4  * Licensed to the Apache Software Foundation (ASF) under one or more
   5  * contributor license agreements.  See the NOTICE file distributed with
   6  * this work for additional information regarding copyright ownership.
   7  * The ASF licenses this file to You under the Apache License, Version 2.0
   8  * (the "License"); you may not use this file except in compliance with
   9  * the License.  You may obtain a copy of the License at
  10  *
  11  *     http://www.apache.org/licenses/LICENSE-2.0
  12  *
  13  * Unless required by applicable law or agreed to in writing, software
  14  * distributed under the License is distributed on an "AS IS" BASIS,
  15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16  * See the License for the specific language governing permissions and
  17  * limitations under the License.
  18  */
  19
  20 import java.io.IOException;
  21
  22 import org.apache.lucene.analysis.MockAnalyzer;
  23 import org.apache.lucene.analysis.TokenStream;
  24 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
  25 import org.apache.lucene.document.Document;
  26 import org.apache.lucene.document.Field;
  27 import org.apache.lucene.index.FieldInfo.IndexOptions;
  28 import org.apache.lucene.store.MockDirectoryWrapper;
  29 import org.apache.lucene.util.LuceneTestCase;
  30 import org.apache.lucene.util._TestUtil;
  31 import org.junit.Ignore;
  32
  33 /**
  34  * Test indexes ~82M docs with 26 terms each, so you get > Integer.MAX_VALUE terms/docs pairs
  35  * @lucene.experimental
  36  */
  37 public class Test2BPostings extends LuceneTestCase {
  38
  39   @Nightly
  40   public void test() throws Exception {
  41
  42     MockDirectoryWrapper dir = newFSDirectory(_TestUtil.getTempDir("2BPostings"));
  43     dir.setThrottling(MockDirectoryWrapper.Throttling.NEVER);
  44     dir.setCheckIndexOnClose(false); // don't double-checkindex
  45
  46     IndexWriter w = new IndexWriter(dir,
  47         new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))
  48         .setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH)
  49         .setRAMBufferSizeMB(256.0)
  50         .setMergeScheduler(new ConcurrentMergeScheduler())
  51         .setMergePolicy(newLogMergePolicy(false, 10))
  52         .setOpenMode(IndexWriterConfig.OpenMode.CREATE));
  53
  54     MergePolicy mp = w.getConfig().getMergePolicy();
  55     if (mp instanceof LogByteSizeMergePolicy) {
  56      // 1 petabyte:
  57      ((LogByteSizeMergePolicy) mp).setMaxMergeMB(1024*1024*1024);
  58     }
  59
  60     Document doc = new Document();
  61     Field field = new Field("field", new MyTokenStream());
  62     field.setIndexOptions(IndexOptions.DOCS_ONLY);
  63     field.setOmitNorms(true);
  64     doc.add(field);
  65
  66     final int numDocs = (Integer.MAX_VALUE / 26) + 1;
  67     for (int i = 0; i < numDocs; i++) {
  68       w.addDocument(doc);
  69       if (VERBOSE && i % 100000 == 0) {
  70         System.out.println(i + " of " + numDocs + "...");
  71       }
  72     }
  73     w.optimize();
  74     w.close();
  75     CheckIndex ci = new CheckIndex(dir);
  76     if (VERBOSE) {
  77       ci.setInfoStream(System.out);
  78     }
  79     ci.checkIndex();
  80     dir.close();
  81   }
  82
  83   public static final class MyTokenStream extends TokenStream {
  84     private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
  85     private final char buffer[];
  86     int index;
  87
  88     public MyTokenStream() {
  89       termAtt.setLength(1);
  90       buffer = termAtt.buffer();
  91     }
  92
  93     @Override
  94     public boolean incrementToken() throws IOException {
  95       if (index <= 'z') {
  96         buffer[0] = (char) index++;
  97         return true;
  98       }
  99       return false;
 100     }
 101
 102     @Override
 103     public void reset() throws IOException {
 104       index = 'a';
 105     }
 106   }
 107 }