2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
18 package org.apache.lucene.index;
20 import org.apache.lucene.analysis.WhitespaceAnalyzer;
21 import org.apache.lucene.document.Document;
22 import org.apache.lucene.document.Field;
23 import org.apache.lucene.document.Field.Index;
24 import org.apache.lucene.document.Field.Store;
25 import org.apache.lucene.document.Field.TermVector;
26 import org.apache.lucene.index.IndexReader;
27 import org.apache.lucene.index.IndexWriter;
28 import org.apache.lucene.store.Directory;
29 import org.apache.lucene.store.RAMDirectory;
30 import org.apache.lucene.util.LuceneTestCase;
31 import org.apache.lucene.util.Version;
33 public class TestIndexSorter extends LuceneTestCase {
35 private static final int NUM_DOCS = 4;
36 private String[] fieldNames = new String[] {
46 Directory inputDir = null;
47 Directory outputDir = null;
51 public void setUp() throws Exception {
54 inputDir = new RAMDirectory();
55 IndexWriterConfig cfg = new IndexWriterConfig(Version.LUCENE_31, new WhitespaceAnalyzer(Version.LUCENE_31));
56 IndexWriter writer = new IndexWriter(inputDir, cfg);
57 // create test documents
58 for (int i = 0; i < NUM_DOCS; i++) {
59 Document doc = new Document();
60 for (int k = 0; k < fieldNames.length; k++) {
64 TermVector tv = TermVector.NO;
66 if (fieldNames[k].equals("id")) {
68 ix = Index.NOT_ANALYZED;
69 val = String.valueOf(i);
70 } else if (fieldNames[k].equals("host")) {
72 ix = Index.NOT_ANALYZED;
73 val = "www.example" + i + ".com";
74 } else if (fieldNames[k].equals("site")) {
76 ix = Index.NOT_ANALYZED;
77 val = "www.example" + i + ".com";
78 } else if (fieldNames[k].equals("content")) {
82 val = "This is the content of the " + i + "-th document.";
83 } else if (fieldNames[k].equals("boost")) {
86 float boost = (float)i;
87 val = String.valueOf(boost);
91 if (fieldNames[k].equals("anchor")) {
92 val = "anchors to " + i + "-th page.";
93 } else if (fieldNames[k].equals("url")) {
94 val = "http://www.example" + i + ".com/" + i + ".html";
97 f = new Field(fieldNames[k], val, s, ix, tv);
100 writer.addDocument(doc);
104 outputDir = new RAMDirectory();
107 public void testSorting() throws Exception {
108 IndexSorter sorter = new IndexSorter();
109 sorter.sort(inputDir, outputDir, "boost");
111 // read back documents
112 IndexReader reader = IndexReader.open(outputDir);
113 assertEquals(reader.numDocs(), NUM_DOCS);
114 for (int i = 0; i < reader.maxDoc(); i++) {
115 Document doc = reader.document(i);
116 Field f = doc.getField("content");
118 String boost = doc.get("boost");
119 int origId = NUM_DOCS - i - 1;
120 String cmp = String.valueOf((float)origId);
121 assertEquals(cmp, boost);
122 // check that vectors are in sync
123 TermFreqVector tfv = reader.getTermFreqVector(i, "content");
124 assertTrue(tfv.indexOf(origId + "-th") != -1);