1 package org.apache.lucene.index;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import java.io.IOException;
21 import java.io.UnsupportedEncodingException;
22 import java.util.HashMap;
24 import java.util.Random;
26 import org.apache.lucene.analysis.Analyzer;
27 import org.apache.lucene.analysis.MockAnalyzer;
28 import org.apache.lucene.analysis.MockTokenizer;
29 import org.apache.lucene.analysis.WhitespaceAnalyzer;
30 import org.apache.lucene.document.Document;
31 import org.apache.lucene.document.Field;
32 import org.apache.lucene.document.Fieldable;
33 import org.apache.lucene.search.Similarity;
34 import org.apache.lucene.document.Field.Index;
35 import org.apache.lucene.document.Field.Store;
36 import org.apache.lucene.document.Field.TermVector;
37 import org.apache.lucene.index.FieldInfo.IndexOptions;
38 import org.apache.lucene.store.Directory;
39 import org.apache.lucene.util.LuceneTestCase;
41 import static org.apache.lucene.util.LuceneTestCase.TEST_VERSION_CURRENT;
44 public static final String FIELD_1_TEXT = "field one text";
45 public static final String TEXT_FIELD_1_KEY = "textField1";
46 public static Field textField1 = new Field(TEXT_FIELD_1_KEY, FIELD_1_TEXT,
47 Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO);
49 public static final String FIELD_2_TEXT = "field field field two text";
50 //Fields will be lexicographically sorted. So, the order is: field, text, two
51 public static final int [] FIELD_2_FREQS = {3, 1, 1};
52 public static final String TEXT_FIELD_2_KEY = "textField2";
53 public static Field textField2 = new Field(TEXT_FIELD_2_KEY, FIELD_2_TEXT, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
55 public static final String FIELD_3_TEXT = "aaaNoNorms aaaNoNorms bbbNoNorms";
56 public static final String TEXT_FIELD_3_KEY = "textField3";
57 public static Field textField3 = new Field(TEXT_FIELD_3_KEY, FIELD_3_TEXT, Field.Store.YES, Field.Index.ANALYZED);
58 static { textField3.setOmitNorms(true); }
60 public static final String KEYWORD_TEXT = "Keyword";
61 public static final String KEYWORD_FIELD_KEY = "keyField";
62 public static Field keyField = new Field(KEYWORD_FIELD_KEY, KEYWORD_TEXT,
63 Field.Store.YES, Field.Index.NOT_ANALYZED);
65 public static final String NO_NORMS_TEXT = "omitNormsText";
66 public static final String NO_NORMS_KEY = "omitNorms";
67 public static Field noNormsField = new Field(NO_NORMS_KEY, NO_NORMS_TEXT,
68 Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS);
70 public static final String NO_TF_TEXT = "analyzed with no tf and positions";
71 public static final String NO_TF_KEY = "omitTermFreqAndPositions";
72 public static Field noTFField = new Field(NO_TF_KEY, NO_TF_TEXT,
73 Field.Store.YES, Field.Index.ANALYZED);
75 noTFField.setIndexOptions(IndexOptions.DOCS_ONLY);
78 public static final String UNINDEXED_FIELD_TEXT = "unindexed field text";
79 public static final String UNINDEXED_FIELD_KEY = "unIndField";
80 public static Field unIndField = new Field(UNINDEXED_FIELD_KEY, UNINDEXED_FIELD_TEXT,
81 Field.Store.YES, Field.Index.NO);
84 public static final String UNSTORED_1_FIELD_TEXT = "unstored field text";
85 public static final String UNSTORED_FIELD_1_KEY = "unStoredField1";
86 public static Field unStoredField1 = new Field(UNSTORED_FIELD_1_KEY, UNSTORED_1_FIELD_TEXT,
87 Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.NO);
89 public static final String UNSTORED_2_FIELD_TEXT = "unstored field text";
90 public static final String UNSTORED_FIELD_2_KEY = "unStoredField2";
91 public static Field unStoredField2 = new Field(UNSTORED_FIELD_2_KEY, UNSTORED_2_FIELD_TEXT,
92 Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES);
94 public static final String LAZY_FIELD_BINARY_KEY = "lazyFieldBinary";
95 public static byte [] LAZY_FIELD_BINARY_BYTES;
96 public static Field lazyFieldBinary;
98 public static final String LAZY_FIELD_KEY = "lazyField";
99 public static final String LAZY_FIELD_TEXT = "These are some field bytes";
100 public static Field lazyField = new Field(LAZY_FIELD_KEY, LAZY_FIELD_TEXT, Field.Store.YES, Field.Index.ANALYZED);
102 public static final String LARGE_LAZY_FIELD_KEY = "largeLazyField";
103 public static String LARGE_LAZY_FIELD_TEXT;
104 public static Field largeLazyField;
107 public static final String FIELD_UTF1_TEXT = "field one \u4e00text";
108 public static final String TEXT_FIELD_UTF1_KEY = "textField1Utf8";
109 public static Field textUtfField1 = new Field(TEXT_FIELD_UTF1_KEY, FIELD_UTF1_TEXT,
110 Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO);
112 public static final String FIELD_UTF2_TEXT = "field field field \u4e00two text";
113 //Fields will be lexicographically sorted. So, the order is: field, text, two
114 public static final int [] FIELD_UTF2_FREQS = {3, 1, 1};
115 public static final String TEXT_FIELD_UTF2_KEY = "textField2Utf8";
116 public static Field textUtfField2 = new Field(TEXT_FIELD_UTF2_KEY, FIELD_UTF2_TEXT, Field.Store.YES,
117 Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
122 public static Map<String,Object> nameValues = null;
124 // ordered list of all the fields...
125 // could use LinkedHashMap for this purpose if Java1.4 is OK
126 public static Field[] fields = new Field[] {
139 lazyFieldBinary,//placeholder for binary field, since this is null. It must be second to last.
140 largeLazyField//placeholder for large field, since this is null. It must always be last
143 public static Map<String,Fieldable> all =new HashMap<String,Fieldable>();
144 public static Map<String,Fieldable> indexed =new HashMap<String,Fieldable>();
145 public static Map<String,Fieldable> stored =new HashMap<String,Fieldable>();
146 public static Map<String,Fieldable> unstored=new HashMap<String,Fieldable>();
147 public static Map<String,Fieldable> unindexed=new HashMap<String,Fieldable>();
148 public static Map<String,Fieldable> termvector=new HashMap<String,Fieldable>();
149 public static Map<String,Fieldable> notermvector=new HashMap<String,Fieldable>();
150 public static Map<String,Fieldable> lazy= new HashMap<String,Fieldable>();
151 public static Map<String,Fieldable> noNorms=new HashMap<String,Fieldable>();
152 public static Map<String,Fieldable> noTf=new HashMap<String,Fieldable>();
155 //Initialize the large Lazy Field
156 StringBuilder buffer = new StringBuilder();
157 for (int i = 0; i < 10000; i++)
159 buffer.append("Lazily loading lengths of language in lieu of laughing ");
163 LAZY_FIELD_BINARY_BYTES = "These are some binary field bytes".getBytes("UTF8");
164 } catch (UnsupportedEncodingException e) {
166 lazyFieldBinary = new Field(LAZY_FIELD_BINARY_KEY, LAZY_FIELD_BINARY_BYTES);
167 fields[fields.length - 2] = lazyFieldBinary;
168 LARGE_LAZY_FIELD_TEXT = buffer.toString();
169 largeLazyField = new Field(LARGE_LAZY_FIELD_KEY, LARGE_LAZY_FIELD_TEXT, Field.Store.YES, Field.Index.ANALYZED);
170 fields[fields.length - 1] = largeLazyField;
171 for (int i=0; i<fields.length; i++) {
172 Fieldable f = fields[i];
174 if (f.isIndexed()) add(indexed,f);
175 else add(unindexed,f);
176 if (f.isTermVectorStored()) add(termvector,f);
177 if (f.isIndexed() && !f.isTermVectorStored()) add(notermvector,f);
178 if (f.isStored()) add(stored,f);
179 else add(unstored,f);
180 if (f.getOmitNorms()) add(noNorms,f);
181 if (f.getIndexOptions() == IndexOptions.DOCS_ONLY) add(noTf,f);
182 if (f.isLazy()) add(lazy, f);
187 private static void add(Map<String,Fieldable> map, Fieldable field) {
188 map.put(field.name(), field);
194 nameValues = new HashMap<String,Object>();
195 nameValues.put(TEXT_FIELD_1_KEY, FIELD_1_TEXT);
196 nameValues.put(TEXT_FIELD_2_KEY, FIELD_2_TEXT);
197 nameValues.put(TEXT_FIELD_3_KEY, FIELD_3_TEXT);
198 nameValues.put(KEYWORD_FIELD_KEY, KEYWORD_TEXT);
199 nameValues.put(NO_NORMS_KEY, NO_NORMS_TEXT);
200 nameValues.put(NO_TF_KEY, NO_TF_TEXT);
201 nameValues.put(UNINDEXED_FIELD_KEY, UNINDEXED_FIELD_TEXT);
202 nameValues.put(UNSTORED_FIELD_1_KEY, UNSTORED_1_FIELD_TEXT);
203 nameValues.put(UNSTORED_FIELD_2_KEY, UNSTORED_2_FIELD_TEXT);
204 nameValues.put(LAZY_FIELD_KEY, LAZY_FIELD_TEXT);
205 nameValues.put(LAZY_FIELD_BINARY_KEY, LAZY_FIELD_BINARY_BYTES);
206 nameValues.put(LARGE_LAZY_FIELD_KEY, LARGE_LAZY_FIELD_TEXT);
207 nameValues.put(TEXT_FIELD_UTF1_KEY, FIELD_UTF1_TEXT);
208 nameValues.put(TEXT_FIELD_UTF2_KEY, FIELD_UTF2_TEXT);
212 * Adds the fields above to a document
213 * @param doc The document to write
215 public static void setupDoc(Document doc) {
216 for (int i=0; i<fields.length; i++) {
222 * Writes the document to the directory using a segment
223 * named "test"; returns the SegmentInfo describing the new
227 * @throws IOException
229 public static SegmentInfo writeDoc(Random random, Directory dir, Document doc) throws IOException
231 return writeDoc(random, dir, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false), null, doc);
235 * Writes the document to the directory using the analyzer
236 * and the similarity score; returns the SegmentInfo
237 * describing the new segment
242 * @throws IOException
244 public static SegmentInfo writeDoc(Random random, Directory dir, Analyzer analyzer, Similarity similarity, Document doc) throws IOException {
245 IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig( /* LuceneTestCase.newIndexWriterConfig(random, */
246 TEST_VERSION_CURRENT, analyzer).setSimilarity(similarity));
247 //writer.setUseCompoundFile(false);
248 writer.addDocument(doc);
250 SegmentInfo info = writer.newestSegment();
255 public static int numFields(Document doc) {
256 return doc.getFields().size();
259 public static Document createDocument(int n, String indexName, int numFields) {
260 StringBuilder sb = new StringBuilder();
261 Document doc = new Document();
262 doc.add(new Field("id", Integer.toString(n), Store.YES, Index.NOT_ANALYZED, TermVector.WITH_POSITIONS_OFFSETS));
263 doc.add(new Field("indexname", indexName, Store.YES, Index.NOT_ANALYZED, TermVector.WITH_POSITIONS_OFFSETS));
266 doc.add(new Field("field1", sb.toString(), Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS));
269 for (int i = 1; i < numFields; i++) {
270 doc.add(new Field("field" + (i + 1), sb.toString(), Store.YES,
271 Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS));