X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.5.0/lucene/contrib/misc/src/test/org/apache/lucene/index/TestIndexSorter.java diff --git a/lucene-java-3.5.0/lucene/contrib/misc/src/test/org/apache/lucene/index/TestIndexSorter.java b/lucene-java-3.5.0/lucene/contrib/misc/src/test/org/apache/lucene/index/TestIndexSorter.java new file mode 100644 index 0000000..45af696 --- /dev/null +++ b/lucene-java-3.5.0/lucene/contrib/misc/src/test/org/apache/lucene/index/TestIndexSorter.java @@ -0,0 +1,129 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.index; + +import org.apache.lucene.analysis.WhitespaceAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.Field.Index; +import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.Field.TermVector; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.Version; + +public class TestIndexSorter extends LuceneTestCase { + + private static final int NUM_DOCS = 4; + private String[] fieldNames = new String[] { + "id", + "url", + "site", + "content", + "host", + "anchor", + "boost" + }; + + Directory inputDir = null; + Directory outputDir = null; + + + @Override + public void setUp() throws Exception { + super.setUp(); + // create test index + inputDir = new RAMDirectory(); + IndexWriterConfig cfg = new IndexWriterConfig(Version.LUCENE_31, new WhitespaceAnalyzer(Version.LUCENE_31)); + IndexWriter writer = new IndexWriter(inputDir, cfg); + // create test documents + for (int i = 0; i < NUM_DOCS; i++) { + Document doc = new Document(); + for (int k = 0; k < fieldNames.length; k++) { + Field f; + Store s; + Index ix; + TermVector tv = TermVector.NO; + String val = null; + if (fieldNames[k].equals("id")) { + s = Store.YES; + ix = Index.NOT_ANALYZED; + val = String.valueOf(i); + } else if (fieldNames[k].equals("host")) { + s = Store.YES; + ix = Index.NOT_ANALYZED; + val = "www.example" + i + ".com"; + } else if (fieldNames[k].equals("site")) { + s = Store.NO; + ix = Index.NOT_ANALYZED; + val = "www.example" + i + ".com"; + } else if (fieldNames[k].equals("content")) { + s = Store.NO; + ix = Index.ANALYZED; + tv = TermVector.YES; + val = "This is the content of the " + i + "-th document."; + } else if (fieldNames[k].equals("boost")) { + s = Store.YES; + ix = Index.NO; + float boost = (float)i; + val = String.valueOf(boost); + } else { + s = Store.YES; + ix = Index.ANALYZED; + if (fieldNames[k].equals("anchor")) { + val = "anchors to " + i + "-th page."; + } else if (fieldNames[k].equals("url")) { + val = "http://www.example" + i + ".com/" + i + ".html"; + } + } + f = new Field(fieldNames[k], val, s, ix, tv); + doc.add(f); + } + writer.addDocument(doc); + } + writer.optimize(); + writer.close(); + outputDir = new RAMDirectory(); + } + + public void testSorting() throws Exception { + IndexSorter sorter = new IndexSorter(); + sorter.sort(inputDir, outputDir, "boost"); + + // read back documents + IndexReader reader = IndexReader.open(outputDir); + assertEquals(reader.numDocs(), NUM_DOCS); + for (int i = 0; i < reader.maxDoc(); i++) { + Document doc = reader.document(i); + Field f = doc.getField("content"); + assertNull(f); + String boost = doc.get("boost"); + int origId = NUM_DOCS - i - 1; + String cmp = String.valueOf((float)origId); + assertEquals(cmp, boost); + // check that vectors are in sync + TermFreqVector tfv = reader.getTermFreqVector(i, "content"); + assertTrue(tfv.indexOf(origId + "-th") != -1); + } + reader.close(); + } + +}