1 package org.apache.lucene.index;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
21 import java.io.IOException;
22 import java.io.StringReader;
24 import org.apache.lucene.analysis.Analyzer;
25 import org.apache.lucene.analysis.MockAnalyzer;
26 import org.apache.lucene.analysis.TokenStream;
27 import org.apache.lucene.analysis.tokenattributes.TermAttribute;
28 import org.apache.lucene.document.Document;
29 import org.apache.lucene.document.Field;
30 import org.apache.lucene.store.Directory;
31 import org.apache.lucene.util.LuceneTestCase;
32 import org.apache.lucene.util.OpenBitSet;
33 import org.apache.lucene.util._TestUtil;
35 public class TestLongPostings extends LuceneTestCase {
37 // Produces a realistic unicode random string that
38 // survives MockAnalyzer unchanged:
39 private String getRandomTerm(String other) throws IOException {
40 Analyzer a = new MockAnalyzer(random);
42 String s = _TestUtil.randomRealisticUnicodeString(random);
43 if (other != null && s.equals(other)) {
46 final TokenStream ts = a.tokenStream("foo", new StringReader(s));
47 final TermAttribute termAtt = ts.getAttribute(TermAttribute.class);
50 while(ts.incrementToken()) {
51 if (count == 0 && !termAtt.term().equals(s)) {
62 public void testLongPostings() throws Exception {
63 // Don't use _TestUtil.getTempDir so that we own the
64 // randomness (ie same seed will point to same dir):
65 Directory dir = newFSDirectory(_TestUtil.getTempDir("longpostings" + "." + random.nextLong()));
67 final int NUM_DOCS = atLeast(2000);
70 System.out.println("TEST: NUM_DOCS=" + NUM_DOCS);
73 final String s1 = getRandomTerm(null);
74 final String s2 = getRandomTerm(s1);
77 System.out.println("\nTEST: s1=" + s1 + " s2=" + s2);
79 for(int idx=0;idx<s1.length();idx++) {
80 System.out.println(" s1 ch=0x" + Integer.toHexString(s1.charAt(idx)));
82 for(int idx=0;idx<s2.length();idx++) {
83 System.out.println(" s2 ch=0x" + Integer.toHexString(s2.charAt(idx)));
88 final OpenBitSet isS1 = new OpenBitSet(NUM_DOCS);
89 for(int idx=0;idx<NUM_DOCS;idx++) {
90 if (random.nextBoolean()) {
97 final IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))
98 .setOpenMode(IndexWriterConfig.OpenMode.CREATE)
99 .setMergePolicy(newLogMergePolicy());
100 iwc.setRAMBufferSizeMB(16.0 + 16.0 * random.nextDouble());
101 iwc.setMaxBufferedDocs(-1);
102 final RandomIndexWriter riw = new RandomIndexWriter(random, dir, iwc);
104 for(int idx=0;idx<NUM_DOCS;idx++) {
105 final Document doc = new Document();
106 String s = isS1.get(idx) ? s1 : s2;
107 final Field f = newField("field", s, Field.Index.ANALYZED);
108 final int count = _TestUtil.nextInt(random, 1, 4);
109 for(int ct=0;ct<count;ct++) {
112 riw.addDocument(doc);
118 r = IndexReader.open(dir);
123 System.out.println("TEST: terms");
124 TermEnum termEnum = r.terms();
125 while(termEnum.next()) {
126 System.out.println(" term=" + termEnum.term() + " len=" + termEnum.term().text().length());
127 assertTrue(termEnum.docFreq() > 0);
128 System.out.println(" s1?=" + (termEnum.term().text().equals(s1)) + " s1len=" + s1.length());
129 System.out.println(" s2?=" + (termEnum.term().text().equals(s2)) + " s2len=" + s2.length());
130 final String s = termEnum.term().text();
131 for(int idx=0;idx<s.length();idx++) {
132 System.out.println(" ch=0x" + Integer.toHexString(s.charAt(idx)));
138 assertEquals(NUM_DOCS, r.numDocs());
139 assertTrue(r.docFreq(new Term("field", s1)) > 0);
140 assertTrue(r.docFreq(new Term("field", s2)) > 0);
142 final byte[] payload = new byte[100];
144 int num = atLeast(1000);
145 for(int iter=0;iter<num;iter++) {
149 if (random.nextBoolean()) {
158 System.out.println("\nTEST: iter=" + iter + " doS1=" + doS1);
161 final TermPositions postings = r.termPositions(new Term("field", term));
164 while(docID < Integer.MAX_VALUE) {
165 final int what = random.nextInt(3);
168 System.out.println("TEST: docID=" + docID + "; do next()");
171 int expected = docID+1;
173 if (expected == NUM_DOCS) {
174 expected = Integer.MAX_VALUE;
176 } else if (isS1.get(expected) == doS1) {
182 boolean result = postings.next();
184 assertEquals(Integer.MAX_VALUE, expected);
186 System.out.println(" end");
190 docID = postings.doc();
192 System.out.println(" got docID=" + docID);
194 assertEquals(expected, docID);
196 if (random.nextInt(6) == 3) {
197 final int freq = postings.freq();
198 assertTrue(freq >=1 && freq <= 4);
199 for(int pos=0;pos<freq;pos++) {
200 assertEquals(pos, postings.nextPosition());
201 if (random.nextBoolean() && postings.isPayloadAvailable()) {
202 postings.getPayload(payload, 0);
209 final int targetDocID;
211 targetDocID = random.nextInt(NUM_DOCS+1);
213 targetDocID = docID + _TestUtil.nextInt(random, 1, NUM_DOCS - docID);
216 System.out.println("TEST: docID=" + docID + "; do skipTo(" + targetDocID + ")");
218 int expected = targetDocID;
220 if (expected == NUM_DOCS) {
221 expected = Integer.MAX_VALUE;
223 } else if (isS1.get(expected) == doS1) {
230 final boolean result = postings.skipTo(targetDocID);
232 assertEquals(Integer.MAX_VALUE, expected);
234 System.out.println(" end");
238 docID = postings.doc();
240 System.out.println(" got docID=" + docID);
242 assertEquals(expected, docID);
244 if (random.nextInt(6) == 3) {
245 final int freq = postings.freq();
246 assertTrue(freq >=1 && freq <= 4);
247 for(int pos=0;pos<freq;pos++) {
248 assertEquals(pos, postings.nextPosition());
249 if (random.nextBoolean() && postings.isPayloadAvailable()) {
250 postings.getPayload(payload, 0);