lucene-java-3.4.0/lucene/backwards/src/test-framework/org/apache/lucene/index/DocHelper.java

   1 package org.apache.lucene.index;
   2
   3 /**
   4  * Licensed to the Apache Software Foundation (ASF) under one or more
   5  * contributor license agreements.  See the NOTICE file distributed with
   6  * this work for additional information regarding copyright ownership.
   7  * The ASF licenses this file to You under the Apache License, Version 2.0
   8  * (the "License"); you may not use this file except in compliance with
   9  * the License.  You may obtain a copy of the License at
  10  *
  11  *     http://www.apache.org/licenses/LICENSE-2.0
  12  *
  13  * Unless required by applicable law or agreed to in writing, software
  14  * distributed under the License is distributed on an "AS IS" BASIS,
  15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16  * See the License for the specific language governing permissions and
  17  * limitations under the License.
  18  */
  19
  20 import java.io.IOException;
  21 import java.io.UnsupportedEncodingException;
  22 import java.util.HashMap;
  23 import java.util.Map;
  24 import java.util.Random;
  25
  26 import org.apache.lucene.analysis.Analyzer;
  27 import org.apache.lucene.analysis.MockAnalyzer;
  28 import org.apache.lucene.analysis.MockTokenizer;
  29 import org.apache.lucene.analysis.WhitespaceAnalyzer;
  30 import org.apache.lucene.document.Document;
  31 import org.apache.lucene.document.Field;
  32 import org.apache.lucene.document.Fieldable;
  33 import org.apache.lucene.search.Similarity;
  34 import org.apache.lucene.store.Directory;
  35 import org.apache.lucene.util.LuceneTestCase;
  36
  37 import static org.apache.lucene.util.LuceneTestCase.TEST_VERSION_CURRENT;
  38
  39 class DocHelper {
  40   public static final String FIELD_1_TEXT = "field one text";
  41   public static final String TEXT_FIELD_1_KEY = "textField1";
  42   public static Field textField1 = new Field(TEXT_FIELD_1_KEY, FIELD_1_TEXT,
  43       Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO);
  44
  45   public static final String FIELD_2_TEXT = "field field field two text";
  46   //Fields will be lexicographically sorted.  So, the order is: field, text, two
  47   public static final int [] FIELD_2_FREQS = {3, 1, 1};
  48   public static final String TEXT_FIELD_2_KEY = "textField2";
  49   public static Field textField2 = new Field(TEXT_FIELD_2_KEY, FIELD_2_TEXT, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
  50
  51   public static final String FIELD_3_TEXT = "aaaNoNorms aaaNoNorms bbbNoNorms";
  52   public static final String TEXT_FIELD_3_KEY = "textField3";
  53   public static Field textField3 = new Field(TEXT_FIELD_3_KEY, FIELD_3_TEXT, Field.Store.YES, Field.Index.ANALYZED);
  54   static { textField3.setOmitNorms(true); }
  55
  56   public static final String KEYWORD_TEXT = "Keyword";
  57   public static final String KEYWORD_FIELD_KEY = "keyField";
  58   public static Field keyField = new Field(KEYWORD_FIELD_KEY, KEYWORD_TEXT,
  59       Field.Store.YES, Field.Index.NOT_ANALYZED);
  60
  61   public static final String NO_NORMS_TEXT = "omitNormsText";
  62   public static final String NO_NORMS_KEY = "omitNorms";
  63   public static Field noNormsField = new Field(NO_NORMS_KEY, NO_NORMS_TEXT,
  64       Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS);
  65
  66   public static final String NO_TF_TEXT = "analyzed with no tf and positions";
  67   public static final String NO_TF_KEY = "omitTermFreqAndPositions";
  68   public static Field noTFField = new Field(NO_TF_KEY, NO_TF_TEXT,
  69       Field.Store.YES, Field.Index.ANALYZED);
  70   static {
  71     noTFField.setOmitTermFreqAndPositions(true);
  72   }
  73
  74   public static final String UNINDEXED_FIELD_TEXT = "unindexed field text";
  75   public static final String UNINDEXED_FIELD_KEY = "unIndField";
  76   public static Field unIndField = new Field(UNINDEXED_FIELD_KEY, UNINDEXED_FIELD_TEXT,
  77       Field.Store.YES, Field.Index.NO);
  78
  79
  80   public static final String UNSTORED_1_FIELD_TEXT = "unstored field text";
  81   public static final String UNSTORED_FIELD_1_KEY = "unStoredField1";
  82   public static Field unStoredField1 = new Field(UNSTORED_FIELD_1_KEY, UNSTORED_1_FIELD_TEXT,
  83       Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.NO);
  84
  85   public static final String UNSTORED_2_FIELD_TEXT = "unstored field text";
  86   public static final String UNSTORED_FIELD_2_KEY = "unStoredField2";
  87   public static Field unStoredField2 = new Field(UNSTORED_FIELD_2_KEY, UNSTORED_2_FIELD_TEXT,
  88       Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES);
  89
  90   public static final String LAZY_FIELD_BINARY_KEY = "lazyFieldBinary";
  91   public static byte [] LAZY_FIELD_BINARY_BYTES;
  92   public static Field lazyFieldBinary;
  93
  94   public static final String LAZY_FIELD_KEY = "lazyField";
  95   public static final String LAZY_FIELD_TEXT = "These are some field bytes";
  96   public static Field lazyField = new Field(LAZY_FIELD_KEY, LAZY_FIELD_TEXT, Field.Store.YES, Field.Index.ANALYZED);
  97
  98   public static final String LARGE_LAZY_FIELD_KEY = "largeLazyField";
  99   public static String LARGE_LAZY_FIELD_TEXT;
 100   public static Field largeLazyField;
 101
 102   //From Issue 509
 103   public static final String FIELD_UTF1_TEXT = "field one \u4e00text";
 104   public static final String TEXT_FIELD_UTF1_KEY = "textField1Utf8";
 105   public static Field textUtfField1 = new Field(TEXT_FIELD_UTF1_KEY, FIELD_UTF1_TEXT,
 106       Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO);
 107
 108   public static final String FIELD_UTF2_TEXT = "field field field \u4e00two text";
 109   //Fields will be lexicographically sorted.  So, the order is: field, text, two
 110   public static final int [] FIELD_UTF2_FREQS = {3, 1, 1};
 111   public static final String TEXT_FIELD_UTF2_KEY = "textField2Utf8";
 112   public static Field textUtfField2 = new Field(TEXT_FIELD_UTF2_KEY, FIELD_UTF2_TEXT, Field.Store.YES,
 113           Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
 114
 115
 116
 117
 118   public static Map<String,Object> nameValues = null;
 119
 120   // ordered list of all the fields...
 121   // could use LinkedHashMap for this purpose if Java1.4 is OK
 122   public static Field[] fields = new Field[] {
 123     textField1,
 124     textField2,
 125     textField3,
 126     keyField,
 127     noNormsField,
 128     noTFField,
 129     unIndField,
 130     unStoredField1,
 131     unStoredField2,
 132     textUtfField1,
 133     textUtfField2,
 134     lazyField,
 135     lazyFieldBinary,//placeholder for binary field, since this is null.  It must be second to last.
 136     largeLazyField//placeholder for large field, since this is null.  It must always be last
 137   };
 138
 139   public static Map<String,Fieldable> all     =new HashMap<String,Fieldable>();
 140   public static Map<String,Fieldable> indexed =new HashMap<String,Fieldable>();
 141   public static Map<String,Fieldable> stored  =new HashMap<String,Fieldable>();
 142   public static Map<String,Fieldable> unstored=new HashMap<String,Fieldable>();
 143   public static Map<String,Fieldable> unindexed=new HashMap<String,Fieldable>();
 144   public static Map<String,Fieldable> termvector=new HashMap<String,Fieldable>();
 145   public static Map<String,Fieldable> notermvector=new HashMap<String,Fieldable>();
 146   public static Map<String,Fieldable> lazy= new HashMap<String,Fieldable>();
 147   public static Map<String,Fieldable> noNorms=new HashMap<String,Fieldable>();
 148   public static Map<String,Fieldable> noTf=new HashMap<String,Fieldable>();
 149
 150   static {
 151     //Initialize the large Lazy Field
 152     StringBuilder buffer = new StringBuilder();
 153     for (int i = 0; i < 10000; i++)
 154     {
 155       buffer.append("Lazily loading lengths of language in lieu of laughing ");
 156     }
 157
 158     try {
 159       LAZY_FIELD_BINARY_BYTES = "These are some binary field bytes".getBytes("UTF8");
 160     } catch (UnsupportedEncodingException e) {
 161     }
 162     lazyFieldBinary = new Field(LAZY_FIELD_BINARY_KEY, LAZY_FIELD_BINARY_BYTES);
 163     fields[fields.length - 2] = lazyFieldBinary;
 164     LARGE_LAZY_FIELD_TEXT = buffer.toString();
 165     largeLazyField = new Field(LARGE_LAZY_FIELD_KEY, LARGE_LAZY_FIELD_TEXT, Field.Store.YES, Field.Index.ANALYZED);
 166     fields[fields.length - 1] = largeLazyField;
 167     for (int i=0; i<fields.length; i++) {
 168       Fieldable f = fields[i];
 169       add(all,f);
 170       if (f.isIndexed()) add(indexed,f);
 171       else add(unindexed,f);
 172       if (f.isTermVectorStored()) add(termvector,f);
 173       if (f.isIndexed() && !f.isTermVectorStored()) add(notermvector,f);
 174       if (f.isStored()) add(stored,f);
 175       else add(unstored,f);
 176       if (f.getOmitNorms()) add(noNorms,f);
 177       if (f.getOmitTermFreqAndPositions()) add(noTf,f);
 178       if (f.isLazy()) add(lazy, f);
 179     }
 180   }
 181
 182
 183   private static void add(Map<String,Fieldable> map, Fieldable field) {
 184     map.put(field.name(), field);
 185   }
 186
 187
 188   static
 189   {
 190     nameValues = new HashMap<String,Object>();
 191     nameValues.put(TEXT_FIELD_1_KEY, FIELD_1_TEXT);
 192     nameValues.put(TEXT_FIELD_2_KEY, FIELD_2_TEXT);
 193     nameValues.put(TEXT_FIELD_3_KEY, FIELD_3_TEXT);
 194     nameValues.put(KEYWORD_FIELD_KEY, KEYWORD_TEXT);
 195     nameValues.put(NO_NORMS_KEY, NO_NORMS_TEXT);
 196     nameValues.put(NO_TF_KEY, NO_TF_TEXT);
 197     nameValues.put(UNINDEXED_FIELD_KEY, UNINDEXED_FIELD_TEXT);
 198     nameValues.put(UNSTORED_FIELD_1_KEY, UNSTORED_1_FIELD_TEXT);
 199     nameValues.put(UNSTORED_FIELD_2_KEY, UNSTORED_2_FIELD_TEXT);
 200     nameValues.put(LAZY_FIELD_KEY, LAZY_FIELD_TEXT);
 201     nameValues.put(LAZY_FIELD_BINARY_KEY, LAZY_FIELD_BINARY_BYTES);
 202     nameValues.put(LARGE_LAZY_FIELD_KEY, LARGE_LAZY_FIELD_TEXT);
 203     nameValues.put(TEXT_FIELD_UTF1_KEY, FIELD_UTF1_TEXT);
 204     nameValues.put(TEXT_FIELD_UTF2_KEY, FIELD_UTF2_TEXT);
 205   }
 206
 207   /**
 208    * Adds the fields above to a document
 209    * @param doc The document to write
 210    */
 211   public static void setupDoc(Document doc) {
 212     for (int i=0; i<fields.length; i++) {
 213       doc.add(fields[i]);
 214     }
 215   }
 216
 217   /**
 218    * Writes the document to the directory using a segment
 219    * named "test"; returns the SegmentInfo describing the new
 220    * segment
 221    * @param dir
 222    * @param doc
 223    * @throws IOException
 224    */
 225   public static SegmentInfo writeDoc(Random random, Directory dir, Document doc) throws IOException
 226   {
 227     return writeDoc(random, dir, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false), null, doc);
 228   }
 229
 230   /**
 231    * Writes the document to the directory using the analyzer
 232    * and the similarity score; returns the SegmentInfo
 233    * describing the new segment
 234    * @param dir
 235    * @param analyzer
 236    * @param similarity
 237    * @param doc
 238    * @throws IOException
 239    */
 240   public static SegmentInfo writeDoc(Random random, Directory dir, Analyzer analyzer, Similarity similarity, Document doc) throws IOException {
 241     IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig( /* LuceneTestCase.newIndexWriterConfig(random, */
 242         TEST_VERSION_CURRENT, analyzer).setSimilarity(similarity));
 243     //writer.setUseCompoundFile(false);
 244     writer.addDocument(doc);
 245     writer.commit();
 246     SegmentInfo info = writer.newestSegment();
 247     writer.close();
 248     return info;
 249   }
 250
 251   public static int numFields(Document doc) {
 252     return doc.getFields().size();
 253   }
 254 }