X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.5.0/lucene/backwards/src/test/org/apache/lucene/index/Test2BTerms.java diff --git a/lucene-java-3.5.0/lucene/backwards/src/test/org/apache/lucene/index/Test2BTerms.java b/lucene-java-3.5.0/lucene/backwards/src/test/org/apache/lucene/index/Test2BTerms.java new file mode 100644 index 0000000..d8665c9 --- /dev/null +++ b/lucene-java-3.5.0/lucene/backwards/src/test/org/apache/lucene/index/Test2BTerms.java @@ -0,0 +1,220 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.util.*; +import org.apache.lucene.store.*; +import org.apache.lucene.search.*; +import org.apache.lucene.analysis.*; +import org.apache.lucene.analysis.tokenattributes.*; +import org.apache.lucene.document.*; +import org.apache.lucene.index.FieldInfo.IndexOptions; +import java.io.File; +import java.io.IOException; +import java.io.UnsupportedEncodingException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import org.junit.Ignore; + +// Best to run this test w/ plenty of RAM (because of the +// terms index): +// +// ant compile-test +// +// java -server -Xmx8g -d64 -cp .:lib/junit-4.7.jar:./build/classes/test:./build/classes/test-framework:./build/classes/java -Dlucene.version=4.0-dev -Dtests.directory=MMapDirectory -DtempDir=build -ea org.junit.runner.JUnitCore org.apache.lucene.index.Test2BTerms +// + +public class Test2BTerms extends LuceneTestCase { + + private final class MyTokenStream extends TokenStream { + + private final int tokensPerDoc; + private int tokenCount; + private final CharTermAttribute charTerm; + private final static int TOKEN_LEN = 5; + private final char[] chars; + public final List savedTerms = new ArrayList(); + private int nextSave; + + public MyTokenStream(int tokensPerDoc) { + super(); + this.tokensPerDoc = tokensPerDoc; + charTerm = addAttribute(CharTermAttribute.class); + chars = charTerm.resizeBuffer(TOKEN_LEN); + charTerm.setLength(TOKEN_LEN); + nextSave = _TestUtil.nextInt(random, 500000, 1000000); + } + + @Override + public boolean incrementToken() { + if (tokenCount >= tokensPerDoc) { + return false; + } + _TestUtil.randomFixedLengthUnicodeString(random, chars, 0, TOKEN_LEN); + tokenCount++; + if (--nextSave == 0) { + final String s = new String(chars, 0, TOKEN_LEN); + System.out.println("TEST: save term=" + s + " [" + toHexString(s) + "]"); + savedTerms.add(s); + nextSave = _TestUtil.nextInt(random, 500000, 1000000); + } + return true; + } + + @Override + public void reset() { + tokenCount = 0; + } + } + + @Ignore("Takes ~4 hours to run on a fast machine!!") + public void test2BTerms() throws IOException { + + final long TERM_COUNT = ((long) Integer.MAX_VALUE) + 100000000; + + final int TERMS_PER_DOC = _TestUtil.nextInt(random, 100000, 1000000); + + List savedTerms = null; + + MockDirectoryWrapper dir = newFSDirectory(_TestUtil.getTempDir("2BTerms")); + dir.setThrottling(MockDirectoryWrapper.Throttling.NEVER); + dir.setCheckIndexOnClose(false); // don't double-checkindex + //Directory dir = newFSDirectory(new File("/p/lucene/indices/2bindex")); + + if (true) { + + IndexWriter w = new IndexWriter(dir, + new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)) + .setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH) + .setRAMBufferSizeMB(256.0) + .setMergeScheduler(new ConcurrentMergeScheduler()) + .setMergePolicy(newLogMergePolicy(false, 10)) + .setOpenMode(IndexWriterConfig.OpenMode.CREATE)); + + MergePolicy mp = w.getConfig().getMergePolicy(); + if (mp instanceof LogByteSizeMergePolicy) { + // 1 petabyte: + ((LogByteSizeMergePolicy) mp).setMaxMergeMB(1024*1024*1024); + } + + Document doc = new Document(); + + final MyTokenStream ts = new MyTokenStream(TERMS_PER_DOC); + Field field = new Field("field", ts); + field.setIndexOptions(IndexOptions.DOCS_ONLY); + field.setOmitNorms(true); + doc.add(field); + //w.setInfoStream(System.out); + final int numDocs = (int) (TERM_COUNT/TERMS_PER_DOC); + + System.out.println("TERMS_PER_DOC=" + TERMS_PER_DOC); + System.out.println("numDocs=" + numDocs); + + for(int i=0;i bigOrdTerms = new ArrayList(savedTerms.subList(numSavedTerms-10, numSavedTerms)); + System.out.println("TEST: test big ord terms..."); + testSavedTerms(r, bigOrdTerms); + System.out.println("TEST: test all saved terms..."); + testSavedTerms(r, savedTerms); + r.close(); + + System.out.println("TEST: now CheckIndex..."); + CheckIndex.Status status = _TestUtil.checkIndex(dir); + final long tc = status.segmentInfos.get(0).termIndexStatus.termCount; + assertTrue("count " + tc + " is not > " + Integer.MAX_VALUE, tc > Integer.MAX_VALUE); + dir.close(); + } + + private List findTerms(IndexReader r) throws IOException { + System.out.println("TEST: findTerms"); + final TermEnum termEnum = r.terms(); + final List savedTerms = new ArrayList(); + int nextSave = _TestUtil.nextInt(random, 500000, 1000000); + while(termEnum.next()) { + if (--nextSave == 0) { + savedTerms.add(termEnum.term().text()); + System.out.println("TEST: add " + termEnum.term()); + nextSave = _TestUtil.nextInt(random, 500000, 1000000); + } + } + return savedTerms; + } + + private String toHexString(String s) { + byte[] bytes; + try { + bytes = s.getBytes("UTF-8"); + } catch (UnsupportedEncodingException uee) { + throw new RuntimeException(uee); + } + StringBuilder sb = new StringBuilder(); + for(byte b : bytes) { + if (sb.length() > 0) { + sb.append(' '); + } + sb.append(Integer.toHexString(b&0xFF)); + } + return sb.toString(); + } + + private void testSavedTerms(IndexReader r, List terms) throws IOException { + System.out.println("TEST: run " + terms.size() + " terms on reader=" + r); + IndexSearcher s = new IndexSearcher(r); + Collections.shuffle(terms); + boolean failed = false; + for(int iter=0;iter<10*terms.size();iter++) { + final String term = terms.get(random.nextInt(terms.size())); + System.out.println("TEST: search " + term + " [" + toHexString(term) + "]"); + final long t0 = System.currentTimeMillis(); + final int count = s.search(new TermQuery(new Term("field", term)), 1).totalHits; + if (count <= 0) { + System.out.println(" FAILED: count=" + count); + failed = true; + } + final long t1 = System.currentTimeMillis(); + System.out.println(" took " + (t1-t0) + " millis"); + + final TermEnum termEnum = r.terms(new Term("field", term)); + final String text = termEnum.term().text(); + if (!term.equals(text)) { + System.out.println(" FAILED: wrong term: got " + text + " [" + toHexString(text) + "]"); + failed = true; + } + } + assertFalse(failed); + } +}