1 package org.apache.lucene.index;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
21 import java.io.IOException;
22 import java.util.Collections;
23 import java.util.HashSet;
24 import java.util.List;
27 import org.apache.lucene.analysis.MockAnalyzer;
28 import org.apache.lucene.document.Document;
29 import org.apache.lucene.document.Field;
30 import org.apache.lucene.document.NumericField;
31 import org.apache.lucene.document.FieldSelector;
32 import org.apache.lucene.document.FieldSelectorResult;
33 import org.apache.lucene.document.Fieldable;
34 import org.apache.lucene.document.LoadFirstFieldSelector;
35 import org.apache.lucene.document.SetBasedFieldSelector;
36 import org.apache.lucene.index.FieldInfo.IndexOptions;
37 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
38 import org.apache.lucene.search.FieldCache;
39 import org.apache.lucene.store.AlreadyClosedException;
40 import org.apache.lucene.store.BufferedIndexInput;
41 import org.apache.lucene.store.Directory;
42 import org.apache.lucene.store.IndexInput;
43 import org.apache.lucene.store.IndexOutput;
44 import org.apache.lucene.util.LuceneTestCase;
45 import org.apache.lucene.util._TestUtil;
46 import org.junit.AfterClass;
47 import org.junit.BeforeClass;
49 public class TestFieldsReader extends LuceneTestCase {
50 private static Directory dir;
51 private static Document testDoc = new Document();
52 private static FieldInfos fieldInfos = null;
53 private final static String TEST_SEGMENT_NAME = "_0";
56 public static void beforeClass() throws Exception {
57 fieldInfos = new FieldInfos();
58 DocHelper.setupDoc(testDoc);
59 fieldInfos.add(testDoc);
61 IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy());
62 ((LogMergePolicy) conf.getMergePolicy()).setUseCompoundFile(false);
63 IndexWriter writer = new IndexWriter(dir, conf);
64 writer.addDocument(testDoc);
69 public void setUp() throws Exception {
71 FaultyIndexInput.doFail = false;
75 public static void afterClass() throws Exception {
81 public void test() throws IOException {
82 assertTrue(dir != null);
83 assertTrue(fieldInfos != null);
84 FieldsReader reader = new FieldsReader(dir, TEST_SEGMENT_NAME, fieldInfos);
85 assertTrue(reader.size() == 1);
86 Document doc = reader.doc(0, null);
87 assertTrue(doc != null);
88 assertTrue(doc.getField(DocHelper.TEXT_FIELD_1_KEY) != null);
90 Fieldable field = doc.getField(DocHelper.TEXT_FIELD_2_KEY);
91 assertTrue(field != null);
92 assertTrue(field.isTermVectorStored() == true);
94 assertTrue(field.isStoreOffsetWithTermVector() == true);
95 assertTrue(field.isStorePositionWithTermVector() == true);
96 assertTrue(field.getOmitNorms() == false);
97 assertTrue(field.getIndexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
99 field = doc.getField(DocHelper.TEXT_FIELD_3_KEY);
100 assertTrue(field != null);
101 assertTrue(field.isTermVectorStored() == false);
102 assertTrue(field.isStoreOffsetWithTermVector() == false);
103 assertTrue(field.isStorePositionWithTermVector() == false);
104 assertTrue(field.getOmitNorms() == true);
105 assertTrue(field.getIndexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
107 field = doc.getField(DocHelper.NO_TF_KEY);
108 assertTrue(field != null);
109 assertTrue(field.isTermVectorStored() == false);
110 assertTrue(field.isStoreOffsetWithTermVector() == false);
111 assertTrue(field.isStorePositionWithTermVector() == false);
112 assertTrue(field.getOmitNorms() == false);
113 assertTrue(field.getIndexOptions() == IndexOptions.DOCS_ONLY);
118 public void testLazyFields() throws Exception {
119 assertTrue(dir != null);
120 assertTrue(fieldInfos != null);
121 FieldsReader reader = new FieldsReader(dir, TEST_SEGMENT_NAME, fieldInfos);
122 assertTrue(reader.size() == 1);
123 Set<String> loadFieldNames = new HashSet<String>();
124 loadFieldNames.add(DocHelper.TEXT_FIELD_1_KEY);
125 loadFieldNames.add(DocHelper.TEXT_FIELD_UTF1_KEY);
126 Set<String> lazyFieldNames = new HashSet<String>();
127 //new String[]{DocHelper.LARGE_LAZY_FIELD_KEY, DocHelper.LAZY_FIELD_KEY, DocHelper.LAZY_FIELD_BINARY_KEY};
128 lazyFieldNames.add(DocHelper.LARGE_LAZY_FIELD_KEY);
129 lazyFieldNames.add(DocHelper.LAZY_FIELD_KEY);
130 lazyFieldNames.add(DocHelper.LAZY_FIELD_BINARY_KEY);
131 lazyFieldNames.add(DocHelper.TEXT_FIELD_UTF2_KEY);
132 SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(loadFieldNames, lazyFieldNames);
133 Document doc = reader.doc(0, fieldSelector);
134 assertTrue("doc is null and it shouldn't be", doc != null);
135 Fieldable field = doc.getFieldable(DocHelper.LAZY_FIELD_KEY);
136 assertTrue("field is null and it shouldn't be", field != null);
137 assertTrue("field is not lazy and it should be", field.isLazy());
138 String value = field.stringValue();
139 assertTrue("value is null and it shouldn't be", value != null);
140 assertTrue(value + " is not equal to " + DocHelper.LAZY_FIELD_TEXT, value.equals(DocHelper.LAZY_FIELD_TEXT) == true);
141 assertTrue("calling stringValue() twice should give same reference", field.stringValue() == field.stringValue());
143 field = doc.getFieldable(DocHelper.TEXT_FIELD_1_KEY);
144 assertTrue("field is null and it shouldn't be", field != null);
145 assertTrue("Field is lazy and it should not be", field.isLazy() == false);
146 field = doc.getFieldable(DocHelper.TEXT_FIELD_UTF1_KEY);
147 assertTrue("field is null and it shouldn't be", field != null);
148 assertTrue("Field is lazy and it should not be", field.isLazy() == false);
149 assertTrue(field.stringValue() + " is not equal to " + DocHelper.FIELD_UTF1_TEXT, field.stringValue().equals(DocHelper.FIELD_UTF1_TEXT) == true);
151 field = doc.getFieldable(DocHelper.TEXT_FIELD_UTF2_KEY);
152 assertTrue("field is null and it shouldn't be", field != null);
153 assertTrue("Field is lazy and it should not be", field.isLazy() == true);
154 assertTrue(field.stringValue() + " is not equal to " + DocHelper.FIELD_UTF2_TEXT, field.stringValue().equals(DocHelper.FIELD_UTF2_TEXT) == true);
156 field = doc.getFieldable(DocHelper.LAZY_FIELD_BINARY_KEY);
157 assertTrue("field is null and it shouldn't be", field != null);
158 assertTrue("stringValue isn't null for lazy binary field", field.stringValue() == null);
160 byte [] bytes = field.getBinaryValue();
161 assertTrue("bytes is null and it shouldn't be", bytes != null);
162 assertTrue("", DocHelper.LAZY_FIELD_BINARY_BYTES.length == bytes.length);
163 assertTrue("calling binaryValue() twice should give same reference", field.getBinaryValue() == field.getBinaryValue());
164 for (int i = 0; i < bytes.length; i++) {
165 assertTrue("byte[" + i + "] is mismatched", bytes[i] == DocHelper.LAZY_FIELD_BINARY_BYTES[i]);
171 public void testLatentFields() throws Exception {
172 assertTrue(dir != null);
173 assertTrue(fieldInfos != null);
174 FieldsReader reader = new FieldsReader(dir, TEST_SEGMENT_NAME, fieldInfos);
175 assertTrue(reader.size() == 1);
176 Set<String> loadFieldNames = new HashSet<String>();
177 loadFieldNames.add(DocHelper.TEXT_FIELD_1_KEY);
178 loadFieldNames.add(DocHelper.TEXT_FIELD_UTF1_KEY);
179 Set<String> lazyFieldNames = new HashSet<String>();
180 //new String[]{DocHelper.LARGE_LAZY_FIELD_KEY, DocHelper.LAZY_FIELD_KEY, DocHelper.LAZY_FIELD_BINARY_KEY};
181 lazyFieldNames.add(DocHelper.LARGE_LAZY_FIELD_KEY);
182 lazyFieldNames.add(DocHelper.LAZY_FIELD_KEY);
183 lazyFieldNames.add(DocHelper.LAZY_FIELD_BINARY_KEY);
184 lazyFieldNames.add(DocHelper.TEXT_FIELD_UTF2_KEY);
186 // Use LATENT instead of LAZY
187 SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(loadFieldNames, lazyFieldNames) {
189 public FieldSelectorResult accept(String fieldName) {
190 final FieldSelectorResult result = super.accept(fieldName);
191 if (result == FieldSelectorResult.LAZY_LOAD) {
192 return FieldSelectorResult.LATENT;
199 Document doc = reader.doc(0, fieldSelector);
200 assertTrue("doc is null and it shouldn't be", doc != null);
201 Fieldable field = doc.getFieldable(DocHelper.LAZY_FIELD_KEY);
202 assertTrue("field is null and it shouldn't be", field != null);
203 assertTrue("field is not lazy and it should be", field.isLazy());
204 String value = field.stringValue();
205 assertTrue("value is null and it shouldn't be", value != null);
206 assertTrue(value + " is not equal to " + DocHelper.LAZY_FIELD_TEXT, value.equals(DocHelper.LAZY_FIELD_TEXT) == true);
207 assertTrue("calling stringValue() twice should give different references", field.stringValue() != field.stringValue());
209 field = doc.getFieldable(DocHelper.TEXT_FIELD_1_KEY);
210 assertTrue("field is null and it shouldn't be", field != null);
211 assertTrue("Field is lazy and it should not be", field.isLazy() == false);
212 assertTrue("calling stringValue() twice should give same reference", field.stringValue() == field.stringValue());
214 field = doc.getFieldable(DocHelper.TEXT_FIELD_UTF1_KEY);
215 assertTrue("field is null and it shouldn't be", field != null);
216 assertTrue("Field is lazy and it should not be", field.isLazy() == false);
217 assertTrue(field.stringValue() + " is not equal to " + DocHelper.FIELD_UTF1_TEXT, field.stringValue().equals(DocHelper.FIELD_UTF1_TEXT) == true);
218 assertTrue("calling stringValue() twice should give same reference", field.stringValue() == field.stringValue());
220 field = doc.getFieldable(DocHelper.TEXT_FIELD_UTF2_KEY);
221 assertTrue("field is null and it shouldn't be", field != null);
222 assertTrue("Field is lazy and it should not be", field.isLazy() == true);
223 assertTrue(field.stringValue() + " is not equal to " + DocHelper.FIELD_UTF2_TEXT, field.stringValue().equals(DocHelper.FIELD_UTF2_TEXT) == true);
224 assertTrue("calling stringValue() twice should give different references", field.stringValue() != field.stringValue());
226 field = doc.getFieldable(DocHelper.LAZY_FIELD_BINARY_KEY);
227 assertTrue("field is null and it shouldn't be", field != null);
228 assertTrue("stringValue isn't null for lazy binary field", field.stringValue() == null);
229 assertTrue("calling binaryValue() twice should give different references", field.getBinaryValue() != field.getBinaryValue());
231 byte [] bytes = field.getBinaryValue();
232 assertTrue("bytes is null and it shouldn't be", bytes != null);
233 assertTrue("", DocHelper.LAZY_FIELD_BINARY_BYTES.length == bytes.length);
234 for (int i = 0; i < bytes.length; i++) {
235 assertTrue("byte[" + i + "] is mismatched", bytes[i] == DocHelper.LAZY_FIELD_BINARY_BYTES[i]);
244 public void testLazyFieldsAfterClose() throws Exception {
245 assertTrue(dir != null);
246 assertTrue(fieldInfos != null);
247 FieldsReader reader = new FieldsReader(dir, TEST_SEGMENT_NAME, fieldInfos);
248 assertTrue(reader.size() == 1);
249 Set<String> loadFieldNames = new HashSet<String>();
250 loadFieldNames.add(DocHelper.TEXT_FIELD_1_KEY);
251 loadFieldNames.add(DocHelper.TEXT_FIELD_UTF1_KEY);
252 Set<String> lazyFieldNames = new HashSet<String>();
253 lazyFieldNames.add(DocHelper.LARGE_LAZY_FIELD_KEY);
254 lazyFieldNames.add(DocHelper.LAZY_FIELD_KEY);
255 lazyFieldNames.add(DocHelper.LAZY_FIELD_BINARY_KEY);
256 lazyFieldNames.add(DocHelper.TEXT_FIELD_UTF2_KEY);
257 SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(loadFieldNames, lazyFieldNames);
258 Document doc = reader.doc(0, fieldSelector);
259 assertTrue("doc is null and it shouldn't be", doc != null);
260 Fieldable field = doc.getFieldable(DocHelper.LAZY_FIELD_KEY);
261 assertTrue("field is null and it shouldn't be", field != null);
262 assertTrue("field is not lazy and it should be", field.isLazy());
266 fail("did not hit AlreadyClosedException as expected");
267 } catch (AlreadyClosedException e) {
272 public void testLoadFirst() throws Exception {
273 assertTrue(dir != null);
274 assertTrue(fieldInfos != null);
275 FieldsReader reader = new FieldsReader(dir, TEST_SEGMENT_NAME, fieldInfos);
276 assertTrue(reader.size() == 1);
277 LoadFirstFieldSelector fieldSelector = new LoadFirstFieldSelector();
278 Document doc = reader.doc(0, fieldSelector);
279 assertTrue("doc is null and it shouldn't be", doc != null);
281 List<Fieldable> l = doc.getFields();
282 for (final Fieldable fieldable : l ) {
283 Field field = (Field) fieldable;
285 assertTrue("field is null and it shouldn't be", field != null);
286 String sv = field.stringValue();
287 assertTrue("sv is null and it shouldn't be", sv != null);
290 assertTrue(count + " does not equal: " + 1, count == 1);
295 * Not really a test per se, but we should have some way of assessing whether this is worthwhile.
297 * Must test using a File based directory
301 public void testLazyPerformance() throws Exception {
302 String userName = System.getProperty("user.name");
303 File file = _TestUtil.getTempDir("lazyDir" + userName);
304 Directory tmpDir = newFSDirectory(file);
305 assertTrue(tmpDir != null);
307 IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE).setMergePolicy(newLogMergePolicy());
308 ((LogMergePolicy) conf.getMergePolicy()).setUseCompoundFile(false);
309 IndexWriter writer = new IndexWriter(tmpDir, conf);
310 writer.addDocument(testDoc);
313 assertTrue(fieldInfos != null);
316 long regularTime = 0;
318 Set<String> lazyFieldNames = new HashSet<String>();
319 lazyFieldNames.add(DocHelper.LARGE_LAZY_FIELD_KEY);
320 SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(Collections. <String> emptySet(), lazyFieldNames);
322 for (int i = 0; i < length; i++) {
323 reader = new FieldsReader(tmpDir, TEST_SEGMENT_NAME, fieldInfos);
324 assertTrue(reader.size() == 1);
327 doc = reader.doc(0, null);//Load all of them
328 assertTrue("doc is null and it shouldn't be", doc != null);
329 Fieldable field = doc.getFieldable(DocHelper.LARGE_LAZY_FIELD_KEY);
330 assertTrue("field is null and it shouldn't be", field != null);
331 assertTrue("field is lazy", field.isLazy() == false);
335 start = System.currentTimeMillis();
336 //On my machine this was always 0ms.
337 value = field.stringValue();
338 finish = System.currentTimeMillis();
339 assertTrue("value is null and it shouldn't be", value != null);
340 regularTime += (finish - start);
344 //Hmmm, are we still in cache???
346 reader = new FieldsReader(tmpDir, TEST_SEGMENT_NAME, fieldInfos);
347 doc = reader.doc(0, fieldSelector);
348 field = doc.getFieldable(DocHelper.LARGE_LAZY_FIELD_KEY);
349 assertTrue("field is not lazy", field.isLazy() == true);
350 start = System.currentTimeMillis();
351 //On my machine this took around 50 - 70ms
352 value = field.stringValue();
353 finish = System.currentTimeMillis();
354 assertTrue("value is null and it shouldn't be", value != null);
355 lazyTime += (finish - start);
361 System.out.println("Average Non-lazy time (should be very close to zero): " + regularTime / length + " ms for " + length + " reads");
362 System.out.println("Average Lazy Time (should be greater than zero): " + lazyTime / length + " ms for " + length + " reads");
366 public void testLoadSize() throws IOException {
367 FieldsReader reader = new FieldsReader(dir, TEST_SEGMENT_NAME, fieldInfos);
370 doc = reader.doc(0, new FieldSelector(){
371 public FieldSelectorResult accept(String fieldName) {
372 if (fieldName.equals(DocHelper.TEXT_FIELD_1_KEY) ||
373 fieldName.equals(DocHelper.LAZY_FIELD_BINARY_KEY))
374 return FieldSelectorResult.SIZE;
375 else if (fieldName.equals(DocHelper.TEXT_FIELD_3_KEY))
376 return FieldSelectorResult.LOAD;
378 return FieldSelectorResult.NO_LOAD;
381 Fieldable f1 = doc.getFieldable(DocHelper.TEXT_FIELD_1_KEY);
382 Fieldable f3 = doc.getFieldable(DocHelper.TEXT_FIELD_3_KEY);
383 Fieldable fb = doc.getFieldable(DocHelper.LAZY_FIELD_BINARY_KEY);
384 assertTrue(f1.isBinary());
385 assertTrue(!f3.isBinary());
386 assertTrue(fb.isBinary());
387 assertSizeEquals(2*DocHelper.FIELD_1_TEXT.length(), f1.getBinaryValue());
388 assertEquals(DocHelper.FIELD_3_TEXT, f3.stringValue());
389 assertSizeEquals(DocHelper.LAZY_FIELD_BINARY_BYTES.length, fb.getBinaryValue());
394 private void assertSizeEquals(int size, byte[] sizebytes) {
395 assertEquals((byte) (size>>>24), sizebytes[0]);
396 assertEquals((byte) (size>>>16), sizebytes[1]);
397 assertEquals((byte) (size>>> 8), sizebytes[2]);
398 assertEquals((byte) size , sizebytes[3]);
401 public static class FaultyFSDirectory extends Directory {
404 public FaultyFSDirectory(File dir) throws IOException {
405 fsDir = newFSDirectory(dir);
406 lockFactory = fsDir.getLockFactory();
409 public IndexInput openInput(String name) throws IOException {
410 return new FaultyIndexInput(fsDir.openInput(name));
413 public String[] listAll() throws IOException {
414 return fsDir.listAll();
417 public boolean fileExists(String name) throws IOException {
418 return fsDir.fileExists(name);
421 public long fileModified(String name) throws IOException {
422 return fsDir.fileModified(name);
426 /* @deprecated Lucene never uses this API; it will be
428 public void touchFile(String name) throws IOException {
429 fsDir.touchFile(name);
432 public void deleteFile(String name) throws IOException {
433 fsDir.deleteFile(name);
436 public long fileLength(String name) throws IOException {
437 return fsDir.fileLength(name);
440 public IndexOutput createOutput(String name) throws IOException {
441 return fsDir.createOutput(name);
444 public void close() throws IOException {
449 private static class FaultyIndexInput extends BufferedIndexInput {
451 static boolean doFail;
453 private FaultyIndexInput(IndexInput delegate) {
454 this.delegate = delegate;
456 private void simOutage() throws IOException {
457 if (doFail && count++ % 2 == 1) {
458 throw new IOException("Simulated network outage");
462 public void readInternal(byte[] b, int offset, int length) throws IOException {
464 delegate.readBytes(b, offset, length);
467 public void seekInternal(long pos) throws IOException {
472 public long length() {
473 return delegate.length();
476 public void close() throws IOException {
480 public Object clone() {
481 return new FaultyIndexInput((IndexInput) delegate.clone());
486 public void testExceptions() throws Throwable {
487 File indexDir = _TestUtil.getTempDir("testfieldswriterexceptions");
490 Directory dir = new FaultyFSDirectory(indexDir);
491 IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(
492 TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE));
494 writer.addDocument(testDoc);
498 IndexReader reader = IndexReader.open(dir, true);
500 FaultyIndexInput.doFail = true;
504 for(int i=0;i<2;i++) {
507 } catch (IOException ioe) {
513 } catch (IOException ioe) {
522 _TestUtil.rmDir(indexDir);
527 public void testNumericField() throws Exception {
528 Directory dir = newDirectory();
529 RandomIndexWriter w = new RandomIndexWriter(random, dir);
530 final int numDocs = atLeast(500);
531 final Number[] answers = new Number[numDocs];
532 final NumericField.DataType[] typeAnswers = new NumericField.DataType[numDocs];
533 for(int id=0;id<numDocs;id++) {
534 Document doc = new Document();
535 NumericField nf = new NumericField("nf", Field.Store.YES, false);
538 final NumericField.DataType typeAnswer;
539 if (random.nextBoolean()) {
541 if (random.nextBoolean()) {
542 final float f = random.nextFloat();
544 answer = Float.valueOf(f);
545 typeAnswer = NumericField.DataType.FLOAT;
547 final double d = random.nextDouble();
548 nf.setDoubleValue(d);
549 answer = Double.valueOf(d);
550 typeAnswer = NumericField.DataType.DOUBLE;
554 if (random.nextBoolean()) {
555 final int i = random.nextInt();
557 answer = Integer.valueOf(i);
558 typeAnswer = NumericField.DataType.INT;
560 final long l = random.nextLong();
562 answer = Long.valueOf(l);
563 typeAnswer = NumericField.DataType.LONG;
566 answers[id] = answer;
567 typeAnswers[id] = typeAnswer;
568 doc.add(new NumericField("id", Integer.MAX_VALUE, Field.Store.NO, true).setIntValue(id));
571 final IndexReader r = w.getReader();
574 assertEquals(numDocs, r.numDocs());
576 for(IndexReader sub : r.getSequentialSubReaders()) {
577 final int[] ids = FieldCache.DEFAULT.getInts(sub, "id");
578 for(int docID=0;docID<sub.numDocs();docID++) {
579 final Document doc = sub.document(docID);
580 final Fieldable f = doc.getFieldable("nf");
581 assertTrue("got f=" + f, f instanceof NumericField);
582 final NumericField nf = (NumericField) f;
583 assertEquals(answers[ids[docID]], nf.getNumericValue());
584 assertSame(typeAnswers[ids[docID]], nf.getDataType());