lucene-java-3.4.0/lucene/contrib/misc/src/test/org/apache/lucene/index/TestIndexSorter.java

   1 /**
   2  * Licensed to the Apache Software Foundation (ASF) under one or more
   3  * contributor license agreements.  See the NOTICE file distributed with
   4  * this work for additional information regarding copyright ownership.
   5  * The ASF licenses this file to You under the Apache License, Version 2.0
   6  * (the "License"); you may not use this file except in compliance with
   7  * the License.  You may obtain a copy of the License at
   8  *
   9  *     http://www.apache.org/licenses/LICENSE-2.0
  10  *
  11  * Unless required by applicable law or agreed to in writing, software
  12  * distributed under the License is distributed on an "AS IS" BASIS,
  13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14  * See the License for the specific language governing permissions and
  15  * limitations under the License.
  16  */
  17
  18 package org.apache.lucene.index;
  19
  20 import org.apache.lucene.analysis.WhitespaceAnalyzer;
  21 import org.apache.lucene.document.Document;
  22 import org.apache.lucene.document.Field;
  23 import org.apache.lucene.document.Field.Index;
  24 import org.apache.lucene.document.Field.Store;
  25 import org.apache.lucene.document.Field.TermVector;
  26 import org.apache.lucene.index.IndexReader;
  27 import org.apache.lucene.index.IndexWriter;
  28 import org.apache.lucene.store.Directory;
  29 import org.apache.lucene.store.RAMDirectory;
  30 import org.apache.lucene.util.LuceneTestCase;
  31 import org.apache.lucene.util.Version;
  32
  33 public class TestIndexSorter extends LuceneTestCase {
  34
  35   private static final int NUM_DOCS = 4;
  36   private String[] fieldNames = new String[] {
  37       "id",
  38       "url",
  39       "site",
  40       "content",
  41       "host",
  42       "anchor",
  43       "boost"
  44   };
  45
  46   Directory inputDir = null;
  47   Directory outputDir = null;
  48
  49
  50   @Override
  51   public void setUp() throws Exception {
  52     super.setUp();
  53     // create test index
  54     inputDir = new RAMDirectory();
  55     IndexWriterConfig cfg = new IndexWriterConfig(Version.LUCENE_31, new WhitespaceAnalyzer(Version.LUCENE_31));
  56     IndexWriter writer = new IndexWriter(inputDir, cfg);
  57     // create test documents
  58     for (int i = 0; i < NUM_DOCS; i++) {
  59       Document doc = new Document();
  60       for (int k = 0; k < fieldNames.length; k++) {
  61         Field f;
  62         Store s;
  63         Index ix;
  64         TermVector tv = TermVector.NO;
  65         String val = null;
  66         if (fieldNames[k].equals("id")) {
  67           s = Store.YES;
  68           ix = Index.NOT_ANALYZED;
  69           val = String.valueOf(i);
  70         } else if (fieldNames[k].equals("host")) {
  71           s = Store.YES;
  72           ix = Index.NOT_ANALYZED;
  73           val = "www.example" + i + ".com";
  74         } else if (fieldNames[k].equals("site")) {
  75           s = Store.NO;
  76           ix = Index.NOT_ANALYZED;
  77           val = "www.example" + i + ".com";
  78         } else if (fieldNames[k].equals("content")) {
  79           s = Store.NO;
  80           ix = Index.ANALYZED;
  81           tv = TermVector.YES;
  82           val = "This is the content of the " + i + "-th document.";
  83         } else if (fieldNames[k].equals("boost")) {
  84           s = Store.YES;
  85           ix = Index.NO;
  86           float boost = (float)i;
  87           val = String.valueOf(boost);
  88         } else {
  89           s = Store.YES;
  90           ix = Index.ANALYZED;
  91           if (fieldNames[k].equals("anchor")) {
  92             val = "anchors to " + i + "-th page.";
  93           } else if (fieldNames[k].equals("url")) {
  94             val = "http://www.example" + i + ".com/" + i + ".html";
  95           }
  96         }
  97         f = new Field(fieldNames[k], val, s, ix, tv);
  98         doc.add(f);
  99       }
 100       writer.addDocument(doc);
 101     }
 102     writer.optimize();
 103     writer.close();
 104     outputDir = new RAMDirectory();
 105   }
 106
 107   public void testSorting() throws Exception {
 108     IndexSorter sorter = new IndexSorter();
 109     sorter.sort(inputDir, outputDir, "boost");
 110
 111     // read back documents
 112     IndexReader reader = IndexReader.open(outputDir);
 113     assertEquals(reader.numDocs(), NUM_DOCS);
 114     for (int i = 0; i < reader.maxDoc(); i++) {
 115       Document doc = reader.document(i);
 116       Field f = doc.getField("content");
 117       assertNull(f);
 118       String boost = doc.get("boost");
 119       int origId = NUM_DOCS - i - 1;
 120       String cmp = String.valueOf((float)origId);
 121       assertEquals(cmp, boost);
 122       // check that vectors are in sync
 123       TermFreqVector tfv = reader.getTermFreqVector(i, "content");
 124       assertTrue(tfv.indexOf(origId + "-th") != -1);
 125     }
 126     reader.close();
 127   }
 128
 129 }