--- /dev/null
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.LineFileDocs;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util._TestUtil;
+
+public class TestTermsEnum extends LuceneTestCase {
+
+ public void test() throws Exception {
+ final LineFileDocs docs = new LineFileDocs(random);
+ final Directory d = newDirectory();
+ final RandomIndexWriter w = new RandomIndexWriter(random, d);
+ final int numDocs = atLeast(10);
+ for(int docCount=0;docCount<numDocs;docCount++) {
+ w.addDocument(docs.nextDoc());
+ }
+ final IndexReader r = w.getReader();
+ w.close();
+
+ final List<Term> terms = new ArrayList<Term>();
+ TermEnum termEnum = r.terms(new Term("body"));
+ do {
+ Term term = termEnum.term();
+ if (term == null || !"body".equals(term.field())) {
+ break;
+ }
+ terms.add(term);
+ } while (termEnum.next());
+
+ if (VERBOSE) {
+ System.out.println("TEST: " + terms.size() + " terms");
+ }
+
+ int upto = -1;
+ final int iters = atLeast(200);
+ for(int iter=0;iter<iters;iter++) {
+ final boolean isEnd;
+ if (upto != -1 && random.nextBoolean()) {
+ // next
+ if (VERBOSE) {
+ System.out.println("TEST: iter next");
+ }
+ termEnum.next();
+ isEnd = termEnum.term() == null || !"body".equals(termEnum.term().field());
+ upto++;
+ if (isEnd) {
+ if (VERBOSE) {
+ System.out.println(" end");
+ }
+ assertEquals(upto, terms.size());
+ upto = -1;
+ } else {
+ if (VERBOSE) {
+ System.out.println(" got term=" + termEnum.term() + " expected=" + terms.get(upto));
+ }
+ assertTrue(upto < terms.size());
+ assertEquals(terms.get(upto), termEnum.term());
+ }
+ } else {
+
+ final Term target;
+ final String exists;
+ if (random.nextBoolean()) {
+ // likely fake term
+ if (random.nextBoolean()) {
+ target = new Term("body",
+ _TestUtil.randomSimpleString(random));
+ } else {
+ target = new Term("body",
+ _TestUtil.randomRealisticUnicodeString(random));
+ }
+ exists = "likely not";
+ } else {
+ // real term
+ target = terms.get(random.nextInt(terms.size()));
+ exists = "yes";
+ }
+
+ upto = Collections.binarySearch(terms, target);
+
+ if (VERBOSE) {
+ System.out.println("TEST: iter seekCeil target=" + target + " exists=" + exists);
+ }
+ termEnum = r.terms(target);
+ final Term actualTerm = termEnum.term();
+
+ if (VERBOSE) {
+ System.out.println(" got term=" + actualTerm);
+ }
+
+ if (upto < 0) {
+ upto = -(upto+1);
+ if (upto >= terms.size()) {
+ assertTrue(actualTerm == null || !"body".equals(actualTerm.field()));
+ upto = -1;
+ } else {
+ assertTrue(actualTerm != null && "body".equals(actualTerm.field()));
+ assertEquals(terms.get(upto), actualTerm);
+ }
+ } else {
+ assertEquals(terms.get(upto), actualTerm);
+ }
+ }
+ }
+
+ r.close();
+ d.close();
+ }
+
+ private Directory d;
+ private IndexReader r;
+
+ private final String FIELD = "field";
+
+ private IndexReader makeIndex(String... terms) throws Exception {
+ d = newDirectory();
+ IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random));
+
+ /*
+ CoreCodecProvider cp = new CoreCodecProvider();
+ cp.unregister(cp.lookup("Standard"));
+ cp.register(new StandardCodec(minTermsInBlock, maxTermsInBlock));
+ cp.setDefaultFieldCodec("Standard");
+ iwc.setCodecProvider(cp);
+ */
+
+ final RandomIndexWriter w = new RandomIndexWriter(random, d, iwc);
+ w.w.setInfoStream(VERBOSE ? System.out : null);
+ for(String term : terms) {
+ Document doc = new Document();
+ Field f = newField(FIELD, term, Field.Store.NO, Field.Index.NOT_ANALYZED);
+ doc.add(f);
+ w.addDocument(doc);
+ }
+ if (r != null) {
+ close();
+ }
+ r = w.getReader();
+ w.close();
+ return r;
+ }
+
+ private void close() throws Exception {
+ final Directory d = ((SegmentReader) r.getSequentialSubReaders()[0]).directory();
+ r.close();
+ d.close();
+ }
+
+ private int docFreq(IndexReader r, String term) throws Exception {
+ return r.docFreq(new Term(FIELD, term));
+ }
+
+ public void testEasy() throws Exception {
+ // No floor arcs:
+ r = makeIndex("aa0", "aa1", "aa2", "aa3", "bb0", "bb1", "bb2", "bb3", "aa");
+
+ // First term in block:
+ assertEquals(1, docFreq(r, "aa0"));
+
+ // Scan forward to another term in same block
+ assertEquals(1, docFreq(r, "aa2"));
+
+ assertEquals(1, docFreq(r, "aa"));
+
+ // Reset same block then scan forwards
+ assertEquals(1, docFreq(r, "aa1"));
+
+ // Not found, in same block
+ assertEquals(0, docFreq(r, "aa5"));
+
+ // Found, in same block
+ assertEquals(1, docFreq(r, "aa2"));
+
+ // Not found in index:
+ assertEquals(0, docFreq(r, "b0"));
+
+ // Found:
+ assertEquals(1, docFreq(r, "aa2"));
+
+ // Found, rewind:
+ assertEquals(1, docFreq(r, "aa0"));
+
+
+ // First term in block:
+ assertEquals(1, docFreq(r, "bb0"));
+
+ // Scan forward to another term in same block
+ assertEquals(1, docFreq(r, "bb2"));
+
+ // Reset same block then scan forwards
+ assertEquals(1, docFreq(r, "bb1"));
+
+ // Not found, in same block
+ assertEquals(0, docFreq(r, "bb5"));
+
+ // Found, in same block
+ assertEquals(1, docFreq(r, "bb2"));
+
+ // Not found in index:
+ assertEquals(0, docFreq(r, "b0"));
+
+ // Found:
+ assertEquals(1, docFreq(r, "bb2"));
+
+ // Found, rewind:
+ assertEquals(1, docFreq(r, "bb0"));
+
+ close();
+ }
+
+ // tests:
+ // - test same prefix has non-floor block and floor block (ie, has 2 long outputs on same term prefix)
+ // - term that's entirely in the index
+
+ public void testFloorBlocks() throws Exception {
+ final String[] terms = new String[] {"aa0", "aa1", "aa2", "aa3", "aa4", "aa5", "aa6", "aa7", "aa8", "aa9", "aa", "xx"};
+ r = makeIndex(terms);
+ //r = makeIndex("aa0", "aa1", "aa2", "aa3", "aa4", "aa5", "aa6", "aa7", "aa8", "aa9");
+
+ // First term in first block:
+ assertEquals(1, docFreq(r, "aa0"));
+ assertEquals(1, docFreq(r, "aa4"));
+
+ // No block
+ assertEquals(0, docFreq(r, "bb0"));
+
+ // Second block
+ assertEquals(1, docFreq(r, "aa4"));
+
+ // Backwards to prior floor block:
+ assertEquals(1, docFreq(r, "aa0"));
+
+ // Forwards to last floor block:
+ assertEquals(1, docFreq(r, "aa9"));
+
+ assertEquals(0, docFreq(r, "a"));
+ assertEquals(1, docFreq(r, "aa"));
+ assertEquals(0, docFreq(r, "a"));
+ assertEquals(1, docFreq(r, "aa"));
+
+ // Forwards to last floor block:
+ assertEquals(1, docFreq(r, "xx"));
+ assertEquals(1, docFreq(r, "aa1"));
+ assertEquals(0, docFreq(r, "yy"));
+
+ assertEquals(1, docFreq(r, "xx"));
+ assertEquals(1, docFreq(r, "aa9"));
+
+ assertEquals(1, docFreq(r, "xx"));
+ assertEquals(1, docFreq(r, "aa4"));
+
+ final TermEnum te = r.terms(new Term(FIELD));
+ while(te.next()) {
+ //System.out.println("TEST: next term=" + te.term().utf8ToString());
+ }
+
+ testRandomSeeks(r, terms);
+ close();
+ }
+
+ public void testZeroTerms() throws Exception {
+ d = newDirectory();
+ final RandomIndexWriter w = new RandomIndexWriter(random, d);
+ w.w.setInfoStream(VERBOSE ? System.out : null);
+ Document doc = new Document();
+ doc.add(newField("field", "one two three", Field.Store.NO, Field.Index.ANALYZED));
+ doc = new Document();
+ doc.add(newField("field2", "one two three", Field.Store.NO, Field.Index.ANALYZED));
+ w.addDocument(doc);
+ w.commit();
+ w.deleteDocuments(new Term("field", "one"));
+ w.forceMerge(1);
+ IndexReader r = w.getReader();
+ w.close();
+ assertEquals(1, r.numDocs());
+ assertEquals(1, r.maxDoc());
+ TermEnum terms = r.terms(new Term("field"));
+ if (terms != null) {
+ assertTrue(!terms.next() || !"field".equals(terms.term().field()));
+ }
+ r.close();
+ d.close();
+ }
+
+ private String getRandomString() {
+ //return _TestUtil.randomSimpleString(random);
+ return _TestUtil.randomRealisticUnicodeString(random);
+ }
+
+ public void testRandomTerms() throws Exception {
+ final String[] terms = new String[_TestUtil.nextInt(random, 1, atLeast(1000))];
+ final Set<String> seen = new HashSet<String>();
+
+ final boolean allowEmptyString = random.nextBoolean();
+
+ if (random.nextInt(10) == 7 && terms.length > 2) {
+ // Sometimes add a bunch of terms sharing a longish common prefix:
+ final int numTermsSamePrefix = random.nextInt(terms.length/2);
+ if (numTermsSamePrefix > 0) {
+ String prefix;
+ while(true) {
+ prefix = getRandomString();
+ if (prefix.length() < 5) {
+ continue;
+ } else {
+ break;
+ }
+ }
+ while(seen.size() < numTermsSamePrefix) {
+ final String t = prefix + getRandomString();
+ if (!seen.contains(t)) {
+ terms[seen.size()] = t;
+ seen.add(t);
+ }
+ }
+ }
+ }
+
+ while(seen.size() < terms.length) {
+ final String t = getRandomString();
+ if (!seen.contains(t) && (allowEmptyString || t.length() != 0)) {
+ terms[seen.size()] = t;
+ seen.add(t);
+ }
+ }
+ r = makeIndex(terms);
+ testRandomSeeks(r, terms);
+ close();
+ }
+
+ private BytesRef getNonExistTerm(BytesRef[] terms) {
+ BytesRef t = null;
+ while(true) {
+ final String ts = getRandomString();
+ t = new BytesRef(ts);
+ if (Arrays.binarySearch(terms, t) < 0) {
+ return t;
+ }
+ }
+ }
+
+ private void testRandomSeeks(IndexReader r, String... validTermStrings) throws IOException {
+ final BytesRef[] validTerms = new BytesRef[validTermStrings.length];
+ for(int termIDX=0;termIDX<validTermStrings.length;termIDX++) {
+ validTerms[termIDX] = new BytesRef(validTermStrings[termIDX]);
+ }
+ Arrays.sort(validTerms, BytesRef.getUTF8SortedAsUTF16Comparator());
+ if (VERBOSE) {
+ System.out.println("TEST: " + validTerms.length + " terms:");
+ for(int idx=0;idx<validTerms.length;idx++) {
+ System.out.println(" " + idx + ": " + validTerms[idx]);
+ }
+ }
+
+ final int END_LOC = -validTerms.length-1;
+
+ for(int iter=0;iter<100*RANDOM_MULTIPLIER;iter++) {
+
+ final BytesRef t;
+ int loc;
+ if (random.nextInt(6) == 4) {
+ // pick term that doens't exist:
+ t = getNonExistTerm(validTerms);
+ if (VERBOSE) {
+ System.out.println("\nTEST: invalid term=" + t.utf8ToString());
+ }
+ loc = Arrays.binarySearch(validTerms, t, BytesRef.getUTF8SortedAsUTF16Comparator());
+ } else {
+ // pick valid term
+ loc = random.nextInt(validTerms.length);
+ t = new BytesRef(validTerms[loc]);
+ if (VERBOSE) {
+ System.out.println("\nTEST: valid term=" + t.utf8ToString());
+ }
+ }
+ final Term targetTerm = new Term(FIELD, t.utf8ToString());
+
+ if (VERBOSE) {
+ System.out.println(" seek term=" + targetTerm);
+ }
+
+ final TermEnum te = r.terms(targetTerm);
+ Term actualTerm = te.term();
+ if (VERBOSE) {
+ System.out.println(" got " + actualTerm);
+ }
+
+ if (loc >= 0) {
+ // assertEquals(TermsEnum.SeekStatus.FOUND, result);
+ } else if (loc == END_LOC) {
+ assertTrue(actualTerm == null || !FIELD.equals(actualTerm.field()));
+ } else {
+ assert loc >= -validTerms.length;
+ assertTrue(actualTerm != null && FIELD.equals(actualTerm.field()));
+ //assertEquals(TermsEnum.SeekStatus.NOT_FOUND, result);
+ }
+
+ if (loc >= 0) {
+ assertEquals(targetTerm, actualTerm);
+ } else if (loc == END_LOC) {
+ continue;
+ } else {
+ loc = -loc-1;
+ assertEquals(new Term(FIELD, validTerms[loc].utf8ToString()), actualTerm);
+ }
+
+ // Do a bunch of next's after the seek
+ final int numNext = random.nextInt(validTerms.length);
+
+ if (VERBOSE) {
+ System.out.println("\nTEST: numNext=" + numNext);
+ }
+
+ for(int nextCount=0;nextCount<numNext;nextCount++) {
+ if (VERBOSE) {
+ System.out.println("\nTEST: next loc=" + loc + " of " + validTerms.length);
+ }
+ boolean result = te.next();
+ actualTerm = te.term();
+ loc++;
+
+ if (loc == validTerms.length) {
+ if (VERBOSE) {
+ System.out.println(" actual=null");
+ }
+ assertFalse(result);
+ assertTrue(actualTerm == null || !FIELD.equals(actualTerm.field()));
+ break;
+ } else {
+ if (VERBOSE) {
+ System.out.println(" actual=" + new BytesRef(actualTerm.text()));
+ }
+ assertTrue(result);
+ assertTrue(actualTerm != null && FIELD.equals(actualTerm.field()));
+ assertEquals(validTerms[loc], new BytesRef(actualTerm.text()));
+ }
+ }
+ }
+ }
+}