1 package org.apache.lucene.index;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import java.io.IOException;
21 import java.util.HashMap;
23 import java.util.Random;
25 import org.apache.lucene.analysis.MockAnalyzer;
26 import org.apache.lucene.analysis.MockTokenizer;
27 import org.apache.lucene.analysis.TokenStream;
28 import org.apache.lucene.analysis.WhitespaceAnalyzer;
29 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
30 import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
31 import org.apache.lucene.document.Document;
32 import org.apache.lucene.document.Field;
33 import org.apache.lucene.document.Field.Index;
34 import org.apache.lucene.document.Field.Store;
35 import org.apache.lucene.index.PayloadProcessorProvider.DirPayloadProcessor;
36 import org.apache.lucene.index.PayloadProcessorProvider.PayloadProcessor;
37 import org.apache.lucene.store.Directory;
38 import org.apache.lucene.util.LuceneTestCase;
39 import org.junit.Test;
41 public class TestPayloadProcessorProvider extends LuceneTestCase {
43 private static final class PerDirPayloadProcessor extends PayloadProcessorProvider {
45 private Map<Directory, DirPayloadProcessor> processors;
47 public PerDirPayloadProcessor(Map<Directory, DirPayloadProcessor> processors) {
48 this.processors = processors;
52 public DirPayloadProcessor getDirProcessor(Directory dir) throws IOException {
53 return processors.get(dir);
58 private static final class PerTermPayloadProcessor extends DirPayloadProcessor {
61 public PayloadProcessor getProcessor(Term term) throws IOException {
62 // don't process payloads of terms other than "p:p1"
63 if (!term.field().equals("p") || !term.text().equals("p1")) {
67 // All other terms are processed the same way
68 return new DeletePayloadProcessor();
73 /** deletes the incoming payload */
74 private static final class DeletePayloadProcessor extends PayloadProcessor {
77 public int payloadLength() throws IOException {
82 public byte[] processPayload(byte[] payload, int start, int length) throws IOException {
88 private static final class PayloadTokenStream extends TokenStream {
90 private final PayloadAttribute payload = addAttribute(PayloadAttribute.class);
91 private final CharTermAttribute term = addAttribute(CharTermAttribute.class);
93 private boolean called = false;
96 public PayloadTokenStream(String t) {
101 public boolean incrementToken() throws IOException {
107 byte[] p = new byte[] { 1 };
108 payload.setPayload(new Payload(p));
114 public void reset() throws IOException {
121 private static final int NUM_DOCS = 10;
123 private IndexWriterConfig getConfig(Random random) {
124 return newIndexWriterConfig(random, TEST_VERSION_CURRENT, new WhitespaceAnalyzer(
125 TEST_VERSION_CURRENT));
128 private void populateDirs(Random random, Directory[] dirs, boolean multipleCommits)
130 for (int i = 0; i < dirs.length; i++) {
131 dirs[i] = newDirectory();
132 populateDocs(random, dirs[i], multipleCommits);
133 verifyPayloadExists(dirs[i], new Term("p", "p1"), NUM_DOCS);
134 verifyPayloadExists(dirs[i], new Term("p", "p2"), NUM_DOCS);
138 private void populateDocs(Random random, Directory dir, boolean multipleCommits)
140 IndexWriter writer = new IndexWriter(
142 newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).
143 setMergePolicy(newLogMergePolicy(10))
145 TokenStream payloadTS1 = new PayloadTokenStream("p1");
146 TokenStream payloadTS2 = new PayloadTokenStream("p2");
147 for (int i = 0; i < NUM_DOCS; i++) {
148 Document doc = new Document();
149 doc.add(newField("id", "doc" + i, Store.NO, Index.NOT_ANALYZED_NO_NORMS));
150 doc.add(newField("content", "doc content " + i, Store.NO, Index.ANALYZED));
151 doc.add(new Field("p", payloadTS1));
152 doc.add(new Field("p", payloadTS2));
153 writer.addDocument(doc);
154 if (multipleCommits && (i % 4 == 0)) {
161 private void verifyPayloadExists(Directory dir, Term term, int numExpected)
163 IndexReader reader = IndexReader.open(dir);
166 TermPositions tp = reader.termPositions(term);
169 if (tp.isPayloadAvailable()) {
170 assertEquals(1, tp.getPayloadLength());
171 byte[] p = new byte[tp.getPayloadLength()];
173 assertEquals(1, p[0]);
177 assertEquals(numExpected, numPayloads);
183 private void doTest(Random random, boolean addToEmptyIndex,
184 int numExpectedPayloads, boolean multipleCommits) throws IOException {
185 Directory[] dirs = new Directory[2];
186 populateDirs(random, dirs, multipleCommits);
188 Directory dir = newDirectory();
189 if (!addToEmptyIndex) {
190 populateDocs(random, dir, multipleCommits);
191 verifyPayloadExists(dir, new Term("p", "p1"), NUM_DOCS);
192 verifyPayloadExists(dir, new Term("p", "p2"), NUM_DOCS);
195 // Add two source dirs. By not adding the dest dir, we ensure its payloads
196 // won't get processed.
197 Map<Directory, DirPayloadProcessor> processors = new HashMap<Directory, DirPayloadProcessor>();
198 for (Directory d : dirs) {
199 processors.put(d, new PerTermPayloadProcessor());
201 IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)));
202 writer.setPayloadProcessorProvider(new PerDirPayloadProcessor(processors));
204 IndexReader[] readers = new IndexReader[dirs.length];
205 for (int i = 0; i < readers.length; i++) {
206 readers[i] = IndexReader.open(dirs[i]);
209 writer.addIndexes(readers);
211 for (IndexReader r : readers) {
216 verifyPayloadExists(dir, new Term("p", "p1"), numExpectedPayloads);
217 // the second term should always have all payloads
218 numExpectedPayloads = NUM_DOCS * dirs.length
219 + (addToEmptyIndex ? 0 : NUM_DOCS);
220 verifyPayloadExists(dir, new Term("p", "p2"), numExpectedPayloads);
221 for (Directory d : dirs)
227 public void testAddIndexes() throws Exception {
228 // addIndexes - single commit in each
229 doTest(random, true, 0, false);
231 // addIndexes - multiple commits in each
232 doTest(random, true, 0, true);
236 public void testAddIndexesIntoExisting() throws Exception {
237 // addIndexes - single commit in each
238 doTest(random, false, NUM_DOCS, false);
240 // addIndexes - multiple commits in each
241 doTest(random, false, NUM_DOCS, true);
245 public void testRegularMerges() throws Exception {
246 Directory dir = newDirectory();
247 populateDocs(random, dir, true);
248 verifyPayloadExists(dir, new Term("p", "p1"), NUM_DOCS);
249 verifyPayloadExists(dir, new Term("p", "p2"), NUM_DOCS);
251 // Add two source dirs. By not adding the dest dir, we ensure its payloads
252 // won't get processed.
253 Map<Directory, DirPayloadProcessor> processors = new HashMap<Directory, DirPayloadProcessor>();
254 processors.put(dir, new PerTermPayloadProcessor());
255 IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)));
256 writer.setPayloadProcessorProvider(new PerDirPayloadProcessor(processors));
260 verifyPayloadExists(dir, new Term("p", "p1"), 0);
261 verifyPayloadExists(dir, new Term("p", "p2"), NUM_DOCS);