1 package org.apache.lucene.index;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import java.io.IOException;
21 import java.io.UnsupportedEncodingException;
22 import java.util.HashMap;
24 import java.util.Random;
26 import org.apache.lucene.analysis.Analyzer;
27 import org.apache.lucene.analysis.MockAnalyzer;
28 import org.apache.lucene.analysis.MockTokenizer;
29 import org.apache.lucene.analysis.WhitespaceAnalyzer;
30 import org.apache.lucene.document.Document;
31 import org.apache.lucene.document.Field;
32 import org.apache.lucene.document.Fieldable;
33 import org.apache.lucene.search.Similarity;
34 import org.apache.lucene.store.Directory;
35 import org.apache.lucene.util.LuceneTestCase;
37 import static org.apache.lucene.util.LuceneTestCase.TEST_VERSION_CURRENT;
40 public static final String FIELD_1_TEXT = "field one text";
41 public static final String TEXT_FIELD_1_KEY = "textField1";
42 public static Field textField1 = new Field(TEXT_FIELD_1_KEY, FIELD_1_TEXT,
43 Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO);
45 public static final String FIELD_2_TEXT = "field field field two text";
46 //Fields will be lexicographically sorted. So, the order is: field, text, two
47 public static final int [] FIELD_2_FREQS = {3, 1, 1};
48 public static final String TEXT_FIELD_2_KEY = "textField2";
49 public static Field textField2 = new Field(TEXT_FIELD_2_KEY, FIELD_2_TEXT, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
51 public static final String FIELD_3_TEXT = "aaaNoNorms aaaNoNorms bbbNoNorms";
52 public static final String TEXT_FIELD_3_KEY = "textField3";
53 public static Field textField3 = new Field(TEXT_FIELD_3_KEY, FIELD_3_TEXT, Field.Store.YES, Field.Index.ANALYZED);
54 static { textField3.setOmitNorms(true); }
56 public static final String KEYWORD_TEXT = "Keyword";
57 public static final String KEYWORD_FIELD_KEY = "keyField";
58 public static Field keyField = new Field(KEYWORD_FIELD_KEY, KEYWORD_TEXT,
59 Field.Store.YES, Field.Index.NOT_ANALYZED);
61 public static final String NO_NORMS_TEXT = "omitNormsText";
62 public static final String NO_NORMS_KEY = "omitNorms";
63 public static Field noNormsField = new Field(NO_NORMS_KEY, NO_NORMS_TEXT,
64 Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS);
66 public static final String NO_TF_TEXT = "analyzed with no tf and positions";
67 public static final String NO_TF_KEY = "omitTermFreqAndPositions";
68 public static Field noTFField = new Field(NO_TF_KEY, NO_TF_TEXT,
69 Field.Store.YES, Field.Index.ANALYZED);
71 noTFField.setOmitTermFreqAndPositions(true);
74 public static final String UNINDEXED_FIELD_TEXT = "unindexed field text";
75 public static final String UNINDEXED_FIELD_KEY = "unIndField";
76 public static Field unIndField = new Field(UNINDEXED_FIELD_KEY, UNINDEXED_FIELD_TEXT,
77 Field.Store.YES, Field.Index.NO);
80 public static final String UNSTORED_1_FIELD_TEXT = "unstored field text";
81 public static final String UNSTORED_FIELD_1_KEY = "unStoredField1";
82 public static Field unStoredField1 = new Field(UNSTORED_FIELD_1_KEY, UNSTORED_1_FIELD_TEXT,
83 Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.NO);
85 public static final String UNSTORED_2_FIELD_TEXT = "unstored field text";
86 public static final String UNSTORED_FIELD_2_KEY = "unStoredField2";
87 public static Field unStoredField2 = new Field(UNSTORED_FIELD_2_KEY, UNSTORED_2_FIELD_TEXT,
88 Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES);
90 public static final String LAZY_FIELD_BINARY_KEY = "lazyFieldBinary";
91 public static byte [] LAZY_FIELD_BINARY_BYTES;
92 public static Field lazyFieldBinary;
94 public static final String LAZY_FIELD_KEY = "lazyField";
95 public static final String LAZY_FIELD_TEXT = "These are some field bytes";
96 public static Field lazyField = new Field(LAZY_FIELD_KEY, LAZY_FIELD_TEXT, Field.Store.YES, Field.Index.ANALYZED);
98 public static final String LARGE_LAZY_FIELD_KEY = "largeLazyField";
99 public static String LARGE_LAZY_FIELD_TEXT;
100 public static Field largeLazyField;
103 public static final String FIELD_UTF1_TEXT = "field one \u4e00text";
104 public static final String TEXT_FIELD_UTF1_KEY = "textField1Utf8";
105 public static Field textUtfField1 = new Field(TEXT_FIELD_UTF1_KEY, FIELD_UTF1_TEXT,
106 Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO);
108 public static final String FIELD_UTF2_TEXT = "field field field \u4e00two text";
109 //Fields will be lexicographically sorted. So, the order is: field, text, two
110 public static final int [] FIELD_UTF2_FREQS = {3, 1, 1};
111 public static final String TEXT_FIELD_UTF2_KEY = "textField2Utf8";
112 public static Field textUtfField2 = new Field(TEXT_FIELD_UTF2_KEY, FIELD_UTF2_TEXT, Field.Store.YES,
113 Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
118 public static Map<String,Object> nameValues = null;
120 // ordered list of all the fields...
121 // could use LinkedHashMap for this purpose if Java1.4 is OK
122 public static Field[] fields = new Field[] {
135 lazyFieldBinary,//placeholder for binary field, since this is null. It must be second to last.
136 largeLazyField//placeholder for large field, since this is null. It must always be last
139 public static Map<String,Fieldable> all =new HashMap<String,Fieldable>();
140 public static Map<String,Fieldable> indexed =new HashMap<String,Fieldable>();
141 public static Map<String,Fieldable> stored =new HashMap<String,Fieldable>();
142 public static Map<String,Fieldable> unstored=new HashMap<String,Fieldable>();
143 public static Map<String,Fieldable> unindexed=new HashMap<String,Fieldable>();
144 public static Map<String,Fieldable> termvector=new HashMap<String,Fieldable>();
145 public static Map<String,Fieldable> notermvector=new HashMap<String,Fieldable>();
146 public static Map<String,Fieldable> lazy= new HashMap<String,Fieldable>();
147 public static Map<String,Fieldable> noNorms=new HashMap<String,Fieldable>();
148 public static Map<String,Fieldable> noTf=new HashMap<String,Fieldable>();
151 //Initialize the large Lazy Field
152 StringBuilder buffer = new StringBuilder();
153 for (int i = 0; i < 10000; i++)
155 buffer.append("Lazily loading lengths of language in lieu of laughing ");
159 LAZY_FIELD_BINARY_BYTES = "These are some binary field bytes".getBytes("UTF8");
160 } catch (UnsupportedEncodingException e) {
162 lazyFieldBinary = new Field(LAZY_FIELD_BINARY_KEY, LAZY_FIELD_BINARY_BYTES);
163 fields[fields.length - 2] = lazyFieldBinary;
164 LARGE_LAZY_FIELD_TEXT = buffer.toString();
165 largeLazyField = new Field(LARGE_LAZY_FIELD_KEY, LARGE_LAZY_FIELD_TEXT, Field.Store.YES, Field.Index.ANALYZED);
166 fields[fields.length - 1] = largeLazyField;
167 for (int i=0; i<fields.length; i++) {
168 Fieldable f = fields[i];
170 if (f.isIndexed()) add(indexed,f);
171 else add(unindexed,f);
172 if (f.isTermVectorStored()) add(termvector,f);
173 if (f.isIndexed() && !f.isTermVectorStored()) add(notermvector,f);
174 if (f.isStored()) add(stored,f);
175 else add(unstored,f);
176 if (f.getOmitNorms()) add(noNorms,f);
177 if (f.getOmitTermFreqAndPositions()) add(noTf,f);
178 if (f.isLazy()) add(lazy, f);
183 private static void add(Map<String,Fieldable> map, Fieldable field) {
184 map.put(field.name(), field);
190 nameValues = new HashMap<String,Object>();
191 nameValues.put(TEXT_FIELD_1_KEY, FIELD_1_TEXT);
192 nameValues.put(TEXT_FIELD_2_KEY, FIELD_2_TEXT);
193 nameValues.put(TEXT_FIELD_3_KEY, FIELD_3_TEXT);
194 nameValues.put(KEYWORD_FIELD_KEY, KEYWORD_TEXT);
195 nameValues.put(NO_NORMS_KEY, NO_NORMS_TEXT);
196 nameValues.put(NO_TF_KEY, NO_TF_TEXT);
197 nameValues.put(UNINDEXED_FIELD_KEY, UNINDEXED_FIELD_TEXT);
198 nameValues.put(UNSTORED_FIELD_1_KEY, UNSTORED_1_FIELD_TEXT);
199 nameValues.put(UNSTORED_FIELD_2_KEY, UNSTORED_2_FIELD_TEXT);
200 nameValues.put(LAZY_FIELD_KEY, LAZY_FIELD_TEXT);
201 nameValues.put(LAZY_FIELD_BINARY_KEY, LAZY_FIELD_BINARY_BYTES);
202 nameValues.put(LARGE_LAZY_FIELD_KEY, LARGE_LAZY_FIELD_TEXT);
203 nameValues.put(TEXT_FIELD_UTF1_KEY, FIELD_UTF1_TEXT);
204 nameValues.put(TEXT_FIELD_UTF2_KEY, FIELD_UTF2_TEXT);
208 * Adds the fields above to a document
209 * @param doc The document to write
211 public static void setupDoc(Document doc) {
212 for (int i=0; i<fields.length; i++) {
218 * Writes the document to the directory using a segment
219 * named "test"; returns the SegmentInfo describing the new
223 * @throws IOException
225 public static SegmentInfo writeDoc(Random random, Directory dir, Document doc) throws IOException
227 return writeDoc(random, dir, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false), null, doc);
231 * Writes the document to the directory using the analyzer
232 * and the similarity score; returns the SegmentInfo
233 * describing the new segment
238 * @throws IOException
240 public static SegmentInfo writeDoc(Random random, Directory dir, Analyzer analyzer, Similarity similarity, Document doc) throws IOException {
241 IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig( /* LuceneTestCase.newIndexWriterConfig(random, */
242 TEST_VERSION_CURRENT, analyzer).setSimilarity(similarity));
243 //writer.setUseCompoundFile(false);
244 writer.addDocument(doc);
246 SegmentInfo info = writer.newestSegment();
251 public static int numFields(Document doc) {
252 return doc.getFields().size();