1 package org.apache.lucene.search.highlight;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import java.io.ByteArrayInputStream;
21 import java.io.IOException;
22 import java.io.Reader;
23 import java.io.StringReader;
24 import java.util.ArrayList;
25 import java.util.HashMap;
26 import java.util.HashSet;
27 import java.util.Iterator;
28 import java.util.List;
31 import java.util.StringTokenizer;
33 import javax.xml.parsers.DocumentBuilder;
34 import javax.xml.parsers.DocumentBuilderFactory;
36 import org.apache.lucene.analysis.Analyzer;
37 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
38 import org.apache.lucene.analysis.CharArraySet;
39 import org.apache.lucene.analysis.LowerCaseTokenizer;
40 import org.apache.lucene.analysis.MockAnalyzer;
41 import org.apache.lucene.analysis.MockTokenizer;
42 import org.apache.lucene.analysis.StopAnalyzer;
43 import org.apache.lucene.analysis.Token;
44 import org.apache.lucene.analysis.TokenStream;
45 import org.apache.lucene.analysis.standard.StandardAnalyzer;
46 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
47 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
48 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
49 import org.apache.lucene.document.Document;
50 import org.apache.lucene.document.Field;
51 import org.apache.lucene.document.NumericField;
52 import org.apache.lucene.document.Field.Index;
53 import org.apache.lucene.document.Field.Store;
54 import org.apache.lucene.index.IndexReader;
55 import org.apache.lucene.index.IndexWriter;
56 import org.apache.lucene.index.IndexWriterConfig;
57 import org.apache.lucene.index.Term;
58 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
59 import org.apache.lucene.queryParser.ParseException;
60 import org.apache.lucene.queryParser.QueryParser;
61 import org.apache.lucene.search.BooleanQuery;
62 import org.apache.lucene.search.FilteredQuery;
63 import org.apache.lucene.search.IndexSearcher;
64 import org.apache.lucene.search.MultiPhraseQuery;
65 import org.apache.lucene.search.MultiSearcher;
66 import org.apache.lucene.search.MultiTermQuery;
67 import org.apache.lucene.search.NumericRangeQuery;
68 import org.apache.lucene.search.PhraseQuery;
69 import org.apache.lucene.search.Query;
70 import org.apache.lucene.search.TermQuery;
71 import org.apache.lucene.search.TermRangeFilter;
72 import org.apache.lucene.search.TopDocs;
73 import org.apache.lucene.search.WildcardQuery;
74 import org.apache.lucene.search.BooleanClause.Occur;
75 import org.apache.lucene.search.highlight.SynonymTokenizer.TestHighlightRunner;
76 import org.apache.lucene.search.regex.RegexQuery;
77 import org.apache.lucene.search.regex.SpanRegexQuery;
78 import org.apache.lucene.search.spans.SpanNearQuery;
79 import org.apache.lucene.search.spans.SpanNotQuery;
80 import org.apache.lucene.search.spans.SpanOrQuery;
81 import org.apache.lucene.search.spans.SpanQuery;
82 import org.apache.lucene.search.spans.SpanTermQuery;
83 import org.apache.lucene.store.Directory;
84 import org.apache.lucene.util.LuceneTestCase;
85 import org.w3c.dom.Element;
86 import org.w3c.dom.NodeList;
89 * JUnit Test for Highlighter class.
92 public class HighlighterTest extends BaseTokenStreamTestCase implements Formatter {
94 private IndexReader reader;
95 static final String FIELD_NAME = "contents";
96 private static final String NUMERIC_FIELD_NAME = "nfield";
99 public IndexSearcher searcher = null;
100 int numHighlights = 0;
101 final Analyzer analyzer = new StandardAnalyzer(TEST_VERSION_CURRENT);
105 "Hello this is a piece of text that is very long and contains too much preamble and the meat is really here which says kennedy has been shot",
106 "This piece of text refers to Kennedy at the beginning then has a longer piece of text that is very long in the middle and finally ends with another reference to Kennedy",
107 "JFK has been shot", "John Kennedy has been shot",
108 "This text has a typo in referring to Keneddy",
109 "wordx wordy wordz wordx wordy wordx worda wordb wordy wordc", "y z x y z a b", "lets is a the lets is a the lets is a the lets" };
111 public void testQueryScorerHits() throws Exception {
112 Analyzer analyzer = new MockAnalyzer(random, MockTokenizer.SIMPLE, true);
113 QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, FIELD_NAME, analyzer);
114 query = qp.parse("\"very long\"");
115 searcher = new IndexSearcher(reader);
116 TopDocs hits = searcher.search(query, 10);
118 QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
119 Highlighter highlighter = new Highlighter(scorer);
122 for (int i = 0; i < hits.scoreDocs.length; i++) {
123 Document doc = searcher.doc(hits.scoreDocs[i].doc);
124 String storedField = doc.get(FIELD_NAME);
126 TokenStream stream = TokenSources.getAnyTokenStream(searcher
127 .getIndexReader(), hits.scoreDocs[i].doc, FIELD_NAME, doc, analyzer);
129 Fragmenter fragmenter = new SimpleSpanFragmenter(scorer);
131 highlighter.setTextFragmenter(fragmenter);
133 String fragment = highlighter.getBestFragment(stream, storedField);
135 if (VERBOSE) System.out.println(fragment);
140 public void testHighlightingWithDefaultField() throws Exception {
142 String s1 = "I call our world Flatland, not because we call it so,";
144 QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, FIELD_NAME, new StandardAnalyzer(TEST_VERSION_CURRENT));
146 // Verify that a query against the default field results in text being
148 // regardless of the field name.
149 Query q = parser.parse("\"world Flatland\"~3");
150 String expected = "I call our <B>world</B> <B>Flatland</B>, not because we call it so,";
151 String observed = highlightField(q, "SOME_FIELD_NAME", s1);
152 if (VERBOSE) System.out.println("Expected: \"" + expected + "\n" + "Observed: \"" + observed);
153 assertEquals("Query in the default field results in text for *ANY* field being highlighted",
156 // Verify that a query against a named field does not result in any
158 // when the query field name differs from the name of the field being
160 // which in this example happens to be the default field name.
161 q = parser.parse("text:\"world Flatland\"~3");
163 observed = highlightField(q, FIELD_NAME, s1);
164 if (VERBOSE) System.out.println("Expected: \"" + expected + "\n" + "Observed: \"" + observed);
166 "Query in a named field does not result in highlighting when that field isn't in the query",
167 s1, highlightField(q, FIELD_NAME, s1));
171 * This method intended for use with <tt>testHighlightingWithDefaultField()</tt>
172 * @throws InvalidTokenOffsetsException
174 private static String highlightField(Query query, String fieldName, String text)
175 throws IOException, InvalidTokenOffsetsException {
176 TokenStream tokenStream = new StandardAnalyzer(TEST_VERSION_CURRENT).tokenStream(fieldName, new StringReader(text));
177 // Assuming "<B>", "</B>" used to highlight
178 SimpleHTMLFormatter formatter = new SimpleHTMLFormatter();
179 QueryScorer scorer = new QueryScorer(query, fieldName, FIELD_NAME);
180 Highlighter highlighter = new Highlighter(formatter, scorer);
181 highlighter.setTextFragmenter(new SimpleFragmenter(Integer.MAX_VALUE));
183 String rv = highlighter.getBestFragments(tokenStream, text, 1, "(FIELD TEXT TRUNCATED)");
184 return rv.length() == 0 ? text : rv;
187 public void testSimpleSpanHighlighter() throws Exception {
188 doSearching("Kennedy");
190 int maxNumFragmentsRequired = 2;
192 QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
193 Highlighter highlighter = new Highlighter(scorer);
195 for (int i = 0; i < hits.totalHits; i++) {
196 String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
197 TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME,
198 new StringReader(text));
199 highlighter.setTextFragmenter(new SimpleFragmenter(40));
201 String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
203 if (VERBOSE) System.out.println("\t" + result);
206 // Not sure we can assert anything here - just running to check we dont
207 // throw any exceptions
211 public void testRepeatingTermsInMultBooleans() throws Exception {
212 String content = "x y z a b c d e f g b c g";
213 String ph1 = "\"a b c d\"";
214 String ph2 = "\"b c g\"";
217 String f1c = f1 + ":";
218 String f2c = f2 + ":";
219 String q = "(" + f1c + ph1 + " OR " + f2c + ph1 + ") AND (" + f1c + ph2
220 + " OR " + f2c + ph2 + ")";
221 Analyzer analyzer = new MockAnalyzer(random, MockTokenizer.WHITESPACE, false);
222 QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, f1, analyzer);
223 Query query = qp.parse(q);
225 QueryScorer scorer = new QueryScorer(query, f1);
226 scorer.setExpandMultiTermQuery(false);
228 Highlighter h = new Highlighter(this, scorer);
230 h.getBestFragment(analyzer, f1, content);
232 assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
236 public void testSimpleQueryScorerPhraseHighlighting() throws Exception {
237 doSearching("\"very long and contains\"");
239 int maxNumFragmentsRequired = 2;
241 QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
242 Highlighter highlighter = new Highlighter(this, scorer);
244 for (int i = 0; i < hits.totalHits; i++) {
245 String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
246 TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
248 highlighter.setTextFragmenter(new SimpleFragmenter(40));
250 String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
252 if (VERBOSE) System.out.println("\t" + result);
255 assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
259 doSearching("\"This piece of text refers to Kennedy\"");
261 maxNumFragmentsRequired = 2;
263 scorer = new QueryScorer(query, FIELD_NAME);
264 highlighter = new Highlighter(this, scorer);
266 for (int i = 0; i < hits.totalHits; i++) {
267 String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
268 TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
270 highlighter.setTextFragmenter(new SimpleFragmenter(40));
272 String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
274 if (VERBOSE) System.out.println("\t" + result);
277 assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
281 doSearching("\"lets is a the lets is a the lets is a the lets\"");
283 maxNumFragmentsRequired = 2;
285 scorer = new QueryScorer(query, FIELD_NAME);
286 highlighter = new Highlighter(this, scorer);
288 for (int i = 0; i < hits.totalHits; i++) {
289 String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
290 TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
292 highlighter.setTextFragmenter(new SimpleFragmenter(40));
294 String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
296 if (VERBOSE) System.out.println("\t" + result);
299 assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
304 public void testSpanRegexQuery() throws Exception {
305 query = new SpanOrQuery(new SpanQuery [] {
306 new SpanRegexQuery(new Term(FIELD_NAME, "ken.*")) });
307 searcher = new IndexSearcher(reader);
308 hits = searcher.search(query, 100);
309 int maxNumFragmentsRequired = 2;
311 QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
312 Highlighter highlighter = new Highlighter(this, scorer);
314 for (int i = 0; i < hits.totalHits; i++) {
315 String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
316 TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
318 highlighter.setTextFragmenter(new SimpleFragmenter(40));
320 String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
322 if (VERBOSE) System.out.println("\t" + result);
325 assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
329 public void testRegexQuery() throws Exception {
330 query = new RegexQuery(new Term(FIELD_NAME, "ken.*"));
331 searcher = new IndexSearcher(reader);
332 hits = searcher.search(query, 100);
333 int maxNumFragmentsRequired = 2;
335 QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
336 Highlighter highlighter = new Highlighter(this, scorer);
338 for (int i = 0; i < hits.totalHits; i++) {
339 String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
340 TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
342 highlighter.setTextFragmenter(new SimpleFragmenter(40));
344 String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
346 if (VERBOSE) System.out.println("\t" + result);
349 assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
353 public void testNumericRangeQuery() throws Exception {
354 // doesn't currently highlight, but make sure it doesn't cause exception either
355 query = NumericRangeQuery.newIntRange(NUMERIC_FIELD_NAME, 2, 6, true, true);
356 searcher = new IndexSearcher(reader);
357 hits = searcher.search(query, 100);
358 int maxNumFragmentsRequired = 2;
360 QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
361 Highlighter highlighter = new Highlighter(this, scorer);
363 for (int i = 0; i < hits.totalHits; i++) {
364 String text = searcher.doc(hits.scoreDocs[i].doc).get(NUMERIC_FIELD_NAME);
365 TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
367 highlighter.setTextFragmenter(new SimpleFragmenter(40));
370 highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,"...");
371 //if (VERBOSE) System.out.println("\t" + result);
377 public void testSimpleQueryScorerPhraseHighlighting2() throws Exception {
378 doSearching("\"text piece long\"~5");
380 int maxNumFragmentsRequired = 2;
382 QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
383 Highlighter highlighter = new Highlighter(this,scorer);
384 highlighter.setTextFragmenter(new SimpleFragmenter(40));
386 for (int i = 0; i < hits.totalHits; i++) {
387 String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
388 TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
390 String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
392 if (VERBOSE) System.out.println("\t" + result);
395 assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
399 public void testSimpleQueryScorerPhraseHighlighting3() throws Exception {
400 doSearching("\"x y z\"");
402 int maxNumFragmentsRequired = 2;
404 for (int i = 0; i < hits.totalHits; i++) {
405 String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
406 TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
407 QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
408 Highlighter highlighter = new Highlighter(this, scorer);
410 highlighter.setTextFragmenter(new SimpleFragmenter(40));
412 String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
414 if (VERBOSE) System.out.println("\t" + result);
416 assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
421 public void testSimpleSpanFragmenter() throws Exception {
422 doSearching("\"piece of text that is very long\"");
424 int maxNumFragmentsRequired = 2;
426 QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
427 Highlighter highlighter = new Highlighter(this, scorer);
429 for (int i = 0; i < hits.totalHits; i++) {
430 String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
431 TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
433 highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer, 5));
435 String result = highlighter.getBestFragments(tokenStream, text,
436 maxNumFragmentsRequired, "...");
437 if (VERBOSE) System.out.println("\t" + result);
441 doSearching("\"been shot\"");
443 maxNumFragmentsRequired = 2;
445 scorer = new QueryScorer(query, FIELD_NAME);
446 highlighter = new Highlighter(this, scorer);
448 for (int i = 0; i < hits.totalHits; i++) {
449 String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
450 TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
452 highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer, 20));
454 String result = highlighter.getBestFragments(tokenStream, text,
455 maxNumFragmentsRequired, "...");
456 if (VERBOSE) System.out.println("\t" + result);
461 // position sensitive query added after position insensitive query
462 public void testPosTermStdTerm() throws Exception {
463 doSearching("y \"x y z\"");
465 int maxNumFragmentsRequired = 2;
467 QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
468 Highlighter highlighter = new Highlighter(this,scorer);
470 for (int i = 0; i < hits.totalHits; i++) {
471 String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
472 TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME,new StringReader(text));
474 highlighter.setTextFragmenter(new SimpleFragmenter(40));
476 String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
478 if (VERBOSE) System.out.println("\t" + result);
480 assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
485 public void testQueryScorerMultiPhraseQueryHighlighting() throws Exception {
486 MultiPhraseQuery mpq = new MultiPhraseQuery();
488 mpq.add(new Term[] { new Term(FIELD_NAME, "wordx"), new Term(FIELD_NAME, "wordb") });
489 mpq.add(new Term(FIELD_NAME, "wordy"));
493 final int maxNumFragmentsRequired = 2;
494 assertExpectedHighlightCount(maxNumFragmentsRequired, 6);
497 public void testQueryScorerMultiPhraseQueryHighlightingWithGap() throws Exception {
498 MultiPhraseQuery mpq = new MultiPhraseQuery();
501 * The toString of MultiPhraseQuery doesn't work so well with these
502 * out-of-order additions, but the Query itself seems to match accurately.
505 mpq.add(new Term[] { new Term(FIELD_NAME, "wordz") }, 2);
506 mpq.add(new Term[] { new Term(FIELD_NAME, "wordx") }, 0);
510 final int maxNumFragmentsRequired = 1;
511 final int expectedHighlights = 2;
513 assertExpectedHighlightCount(maxNumFragmentsRequired, expectedHighlights);
516 public void testNearSpanSimpleQuery() throws Exception {
517 doSearching(new SpanNearQuery(new SpanQuery[] {
518 new SpanTermQuery(new Term(FIELD_NAME, "beginning")),
519 new SpanTermQuery(new Term(FIELD_NAME, "kennedy")) }, 3, false));
521 TestHighlightRunner helper = new TestHighlightRunner() {
524 public void run() throws Exception {
526 doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this);
532 assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
536 public void testSimpleQueryTermScorerHighlighter() throws Exception {
537 doSearching("Kennedy");
538 Highlighter highlighter = new Highlighter(new QueryTermScorer(query));
539 highlighter.setTextFragmenter(new SimpleFragmenter(40));
540 int maxNumFragmentsRequired = 2;
541 for (int i = 0; i < hits.totalHits; i++) {
542 String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
543 TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
545 String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
547 if (VERBOSE) System.out.println("\t" + result);
549 // Not sure we can assert anything here - just running to check we dont
550 // throw any exceptions
553 public void testSpanHighlighting() throws Exception {
554 Query query1 = new SpanNearQuery(new SpanQuery[] {
555 new SpanTermQuery(new Term(FIELD_NAME, "wordx")),
556 new SpanTermQuery(new Term(FIELD_NAME, "wordy")) }, 1, false);
557 Query query2 = new SpanNearQuery(new SpanQuery[] {
558 new SpanTermQuery(new Term(FIELD_NAME, "wordy")),
559 new SpanTermQuery(new Term(FIELD_NAME, "wordc")) }, 1, false);
560 BooleanQuery bquery = new BooleanQuery();
561 bquery.add(query1, Occur.SHOULD);
562 bquery.add(query2, Occur.SHOULD);
564 TestHighlightRunner helper = new TestHighlightRunner() {
567 public void run() throws Exception {
569 doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this);
574 assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
578 public void testNotSpanSimpleQuery() throws Exception {
579 doSearching(new SpanNotQuery(new SpanNearQuery(new SpanQuery[] {
580 new SpanTermQuery(new Term(FIELD_NAME, "shot")),
581 new SpanTermQuery(new Term(FIELD_NAME, "kennedy")) }, 3, false), new SpanTermQuery(
582 new Term(FIELD_NAME, "john"))));
583 TestHighlightRunner helper = new TestHighlightRunner() {
586 public void run() throws Exception {
588 doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this);
593 assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
597 public void testGetBestFragmentsSimpleQuery() throws Exception {
598 TestHighlightRunner helper = new TestHighlightRunner() {
601 public void run() throws Exception {
603 doSearching("Kennedy");
604 doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this);
605 assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
613 public void testGetFuzzyFragments() throws Exception {
614 TestHighlightRunner helper = new TestHighlightRunner() {
617 public void run() throws Exception {
619 doSearching("Kinnedy~");
620 doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this, true);
621 assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
629 public void testGetWildCardFragments() throws Exception {
630 TestHighlightRunner helper = new TestHighlightRunner() {
633 public void run() throws Exception {
635 doSearching("K?nnedy");
636 doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this);
637 assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
645 public void testGetMidWildCardFragments() throws Exception {
646 TestHighlightRunner helper = new TestHighlightRunner() {
649 public void run() throws Exception {
652 doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this);
653 assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
661 public void testGetRangeFragments() throws Exception {
662 TestHighlightRunner helper = new TestHighlightRunner() {
665 public void run() throws Exception {
667 String queryString = FIELD_NAME + ":[kannedy TO kznnedy]";
669 // Need to explicitly set the QueryParser property to use TermRangeQuery
672 QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, FIELD_NAME, analyzer);
673 parser.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
674 query = parser.parse(queryString);
677 doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this);
678 assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
686 public void testConstantScoreMultiTermQuery() throws Exception {
690 query = new WildcardQuery(new Term(FIELD_NAME, "ken*"));
691 ((WildcardQuery)query).setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
692 searcher = new IndexSearcher(reader);
693 // can't rewrite ConstantScore if you want to highlight it -
694 // it rewrites to ConstantScoreQuery which cannot be highlighted
695 // query = unReWrittenQuery.rewrite(reader);
696 if (VERBOSE) System.out.println("Searching for: " + query.toString(FIELD_NAME));
697 hits = searcher.search(query, null, 1000);
699 for (int i = 0; i < hits.totalHits; i++) {
700 String text = searcher.doc(hits.scoreDocs[i].doc).get(HighlighterTest.FIELD_NAME);
701 int maxNumFragmentsRequired = 2;
702 String fragmentSeparator = "...";
703 QueryScorer scorer = null;
704 TokenStream tokenStream = null;
706 tokenStream = analyzer.tokenStream(HighlighterTest.FIELD_NAME, new StringReader(text));
708 scorer = new QueryScorer(query, HighlighterTest.FIELD_NAME);
710 Highlighter highlighter = new Highlighter(this, scorer);
712 highlighter.setTextFragmenter(new SimpleFragmenter(20));
714 String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
716 if (VERBOSE) System.out.println("\t" + result);
718 assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
723 hits = searcher.search(query, null, 1000);
727 for (int i = 0; i < hits.totalHits; i++) {
728 String text = searcher.doc(hits.scoreDocs[i].doc).get(HighlighterTest.FIELD_NAME);
729 int maxNumFragmentsRequired = 2;
730 String fragmentSeparator = "...";
731 QueryScorer scorer = null;
732 TokenStream tokenStream = null;
734 tokenStream = analyzer.tokenStream(HighlighterTest.FIELD_NAME, new StringReader(text));
736 scorer = new QueryScorer(query, null);
738 Highlighter highlighter = new Highlighter(this, scorer);
740 highlighter.setTextFragmenter(new SimpleFragmenter(20));
742 String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
744 if (VERBOSE) System.out.println("\t" + result);
746 assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
751 hits = searcher.search(query, null, 1000);
755 for (int i = 0; i < hits.totalHits; i++) {
756 String text = searcher.doc(hits.scoreDocs[i].doc).get(HighlighterTest.FIELD_NAME);
757 int maxNumFragmentsRequired = 2;
758 String fragmentSeparator = "...";
759 QueryScorer scorer = null;
760 TokenStream tokenStream = null;
762 tokenStream = analyzer.tokenStream(HighlighterTest.FIELD_NAME, new StringReader(text));
764 scorer = new QueryScorer(query, "random_field", HighlighterTest.FIELD_NAME);
766 Highlighter highlighter = new Highlighter(this, scorer);
768 highlighter.setTextFragmenter(new SimpleFragmenter(20));
770 String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
772 if (VERBOSE) System.out.println("\t" + result);
774 assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
778 public void testGetBestFragmentsPhrase() throws Exception {
779 TestHighlightRunner helper = new TestHighlightRunner() {
782 public void run() throws Exception {
784 doSearching("\"John Kennedy\"");
785 doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this);
786 // Currently highlights "John" and "Kennedy" separately
787 assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
795 public void testGetBestFragmentsQueryScorer() throws Exception {
796 TestHighlightRunner helper = new TestHighlightRunner() {
799 public void run() throws Exception {
801 SpanQuery clauses[] = { new SpanTermQuery(new Term("contents", "john")),
802 new SpanTermQuery(new Term("contents", "kennedy")), };
804 SpanNearQuery snq = new SpanNearQuery(clauses, 1, true);
806 doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this);
807 // Currently highlights "John" and "Kennedy" separately
808 assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
816 public void testOffByOne() throws Exception {
817 TestHighlightRunner helper = new TestHighlightRunner() {
820 public void run() throws Exception {
821 TermQuery query = new TermQuery(new Term("data", "help"));
822 Highlighter hg = new Highlighter(new SimpleHTMLFormatter(), new QueryTermScorer(query));
823 hg.setTextFragmenter(new NullFragmenter());
826 match = hg.getBestFragment(analyzer, "data", "help me [54-65]");
827 assertEquals("<B>help</B> me [54-65]", match);
835 public void testGetBestFragmentsFilteredQuery() throws Exception {
836 TestHighlightRunner helper = new TestHighlightRunner() {
839 public void run() throws Exception {
841 TermRangeFilter rf = new TermRangeFilter("contents", "john", "john", true, true);
842 SpanQuery clauses[] = { new SpanTermQuery(new Term("contents", "john")),
843 new SpanTermQuery(new Term("contents", "kennedy")), };
844 SpanNearQuery snq = new SpanNearQuery(clauses, 1, true);
845 FilteredQuery fq = new FilteredQuery(snq, rf);
848 doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this);
849 // Currently highlights "John" and "Kennedy" separately
850 assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
858 public void testGetBestFragmentsFilteredPhraseQuery() throws Exception {
859 TestHighlightRunner helper = new TestHighlightRunner() {
862 public void run() throws Exception {
864 TermRangeFilter rf = new TermRangeFilter("contents", "john", "john", true, true);
865 PhraseQuery pq = new PhraseQuery();
866 pq.add(new Term("contents", "john"));
867 pq.add(new Term("contents", "kennedy"));
868 FilteredQuery fq = new FilteredQuery(pq, rf);
871 doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this);
872 // Currently highlights "John" and "Kennedy" separately
873 assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
881 public void testGetBestFragmentsMultiTerm() throws Exception {
882 TestHighlightRunner helper = new TestHighlightRunner() {
885 public void run() throws Exception {
887 doSearching("John Kenn*");
888 doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this);
889 assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
897 public void testGetBestFragmentsWithOr() throws Exception {
898 TestHighlightRunner helper = new TestHighlightRunner() {
901 public void run() throws Exception {
903 doSearching("JFK OR Kennedy");
904 doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this);
905 assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
912 public void testGetBestSingleFragment() throws Exception {
914 TestHighlightRunner helper = new TestHighlightRunner() {
917 public void run() throws Exception {
918 doSearching("Kennedy");
920 for (int i = 0; i < hits.totalHits; i++) {
921 String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
922 TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
924 Highlighter highlighter = getHighlighter(query, FIELD_NAME, tokenStream,
925 HighlighterTest.this);
926 highlighter.setTextFragmenter(new SimpleFragmenter(40));
927 String result = highlighter.getBestFragment(tokenStream, text);
928 if (VERBOSE) System.out.println("\t" + result);
930 assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
934 for (int i = 0; i < hits.totalHits; i++) {
935 String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
936 TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
937 Highlighter highlighter = getHighlighter(query, FIELD_NAME, tokenStream,
938 HighlighterTest.this);
939 highlighter.getBestFragment(analyzer, FIELD_NAME, text);
941 assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
945 for (int i = 0; i < hits.totalHits; i++) {
946 String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
948 TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
949 Highlighter highlighter = getHighlighter(query, FIELD_NAME, tokenStream,
950 HighlighterTest.this);
951 highlighter.getBestFragments(analyzer, FIELD_NAME, text, 10);
953 assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
964 public void testGetBestSingleFragmentWithWeights() throws Exception {
966 TestHighlightRunner helper = new TestHighlightRunner() {
969 public void run() throws Exception {
970 WeightedSpanTerm[] wTerms = new WeightedSpanTerm[2];
971 wTerms[0] = new WeightedSpanTerm(10f, "hello");
973 List<PositionSpan> positionSpans = new ArrayList<PositionSpan>();
974 positionSpans.add(new PositionSpan(0, 0));
975 wTerms[0].addPositionSpans(positionSpans);
977 wTerms[1] = new WeightedSpanTerm(1f, "kennedy");
978 positionSpans = new ArrayList<PositionSpan>();
979 positionSpans.add(new PositionSpan(14, 14));
980 wTerms[1].addPositionSpans(positionSpans);
982 Highlighter highlighter = getHighlighter(wTerms, HighlighterTest.this);// new
984 // QueryTermScorer(wTerms));
985 TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(texts[0]));
986 highlighter.setTextFragmenter(new SimpleFragmenter(2));
988 String result = highlighter.getBestFragment(tokenStream, texts[0]).trim();
989 assertTrue("Failed to find best section using weighted terms. Found: [" + result + "]",
990 "<B>Hello</B>".equals(result));
993 wTerms[1].setWeight(50f);
994 tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(texts[0]));
995 highlighter = getHighlighter(wTerms, HighlighterTest.this);
996 highlighter.setTextFragmenter(new SimpleFragmenter(2));
998 result = highlighter.getBestFragment(tokenStream, texts[0]).trim();
999 assertTrue("Failed to find best section using weighted terms. Found: " + result,
1000 "<B>kennedy</B>".equals(result));
1009 // tests a "complex" analyzer that produces multiple
1010 // overlapping tokens
1011 public void testOverlapAnalyzer() throws Exception {
1012 TestHighlightRunner helper = new TestHighlightRunner() {
1015 public void run() throws Exception {
1016 HashMap<String,String> synonyms = new HashMap<String,String>();
1017 synonyms.put("football", "soccer,footie");
1018 Analyzer analyzer = new SynonymAnalyzer(synonyms);
1019 String srchkey = "football";
1021 String s = "football-soccer in the euro 2004 footie competition";
1022 QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, "bookid", analyzer);
1023 Query query = parser.parse(srchkey);
1025 TokenStream tokenStream = analyzer.tokenStream(null, new StringReader(s));
1027 Highlighter highlighter = getHighlighter(query, null, tokenStream, HighlighterTest.this);
1029 // Get 3 best fragments and seperate with a "..."
1030 tokenStream = analyzer.tokenStream(null, new StringReader(s));
1032 String result = highlighter.getBestFragments(tokenStream, s, 3, "...");
1033 String expectedResult = "<B>football</B>-<B>soccer</B> in the euro 2004 <B>footie</B> competition";
1034 assertTrue("overlapping analyzer should handle highlights OK, expected:" + expectedResult
1035 + " actual:" + result, expectedResult.equals(result));
1044 public void testGetSimpleHighlight() throws Exception {
1045 TestHighlightRunner helper = new TestHighlightRunner() {
1048 public void run() throws Exception {
1050 doSearching("Kennedy");
1051 // new Highlighter(HighlighterTest.this, new QueryTermScorer(query));
1053 for (int i = 0; i < hits.totalHits; i++) {
1054 String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
1055 TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
1056 Highlighter highlighter = getHighlighter(query, FIELD_NAME, tokenStream,
1057 HighlighterTest.this);
1058 String result = highlighter.getBestFragment(tokenStream, text);
1059 if (VERBOSE) System.out.println("\t" + result);
1061 assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
1062 numHighlights == 4);
1068 public void testGetTextFragments() throws Exception {
1069 TestHighlightRunner helper = new TestHighlightRunner() {
1072 public void run() throws Exception {
1074 doSearching("Kennedy");
1076 for (int i = 0; i < hits.totalHits; i++) {
1077 String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
1078 TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
1080 Highlighter highlighter = getHighlighter(query, FIELD_NAME, tokenStream,
1081 HighlighterTest.this);// new Highlighter(this, new
1082 // QueryTermScorer(query));
1083 highlighter.setTextFragmenter(new SimpleFragmenter(20));
1084 String stringResults[] = highlighter.getBestFragments(tokenStream, text, 10);
1086 tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
1087 TextFragment fragmentResults[] = highlighter.getBestTextFragments(tokenStream, text,
1090 assertTrue("Failed to find correct number of text Fragments: " + fragmentResults.length
1091 + " vs " + stringResults.length, fragmentResults.length == stringResults.length);
1092 for (int j = 0; j < stringResults.length; j++) {
1093 if (VERBOSE) System.out.println(fragmentResults[j]);
1094 assertTrue("Failed to find same text Fragments: " + fragmentResults[j] + " found",
1095 fragmentResults[j].toString().equals(stringResults[j]));
1105 public void testMaxSizeHighlight() throws Exception {
1106 final MockAnalyzer analyzer = new MockAnalyzer(random, MockTokenizer.SIMPLE, true, (CharArraySet) StopAnalyzer.ENGLISH_STOP_WORDS_SET, true);
1107 // we disable MockTokenizer checks because we will forcefully limit the
1108 // tokenstream and call end() before incrementToken() returns false.
1109 analyzer.setEnableChecks(false);
1110 TestHighlightRunner helper = new TestHighlightRunner() {
1113 public void run() throws Exception {
1115 doSearching("meat");
1116 TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(texts[0]));
1117 Highlighter highlighter = getHighlighter(query, FIELD_NAME, tokenStream,
1118 HighlighterTest.this);// new Highlighter(this, new
1119 // QueryTermScorer(query));
1120 highlighter.setMaxDocCharsToAnalyze(30);
1122 highlighter.getBestFragment(tokenStream, texts[0]);
1123 assertTrue("Setting MaxDocBytesToAnalyze should have prevented "
1124 + "us from finding matches for this record: " + numHighlights + " found",
1125 numHighlights == 0);
1132 public void testMaxSizeHighlightTruncates() throws Exception {
1133 TestHighlightRunner helper = new TestHighlightRunner() {
1136 public void run() throws Exception {
1137 String goodWord = "goodtoken";
1138 Set<String> stopWords = new HashSet<String>(1);
1139 stopWords.add("stoppedtoken");
1141 TermQuery query = new TermQuery(new Term("data", goodWord));
1143 String match = null;
1144 StringBuilder sb = new StringBuilder();
1145 sb.append(goodWord);
1146 for (int i = 0; i < 10000; i++) {
1148 // only one stopword
1149 sb.append(stopWords.iterator().next());
1151 SimpleHTMLFormatter fm = new SimpleHTMLFormatter();
1152 Highlighter hg = getHighlighter(query, "data", new StandardAnalyzer(TEST_VERSION_CURRENT, stopWords).tokenStream(
1153 "data", new StringReader(sb.toString())), fm);// new Highlighter(fm,
1155 // QueryTermScorer(query));
1156 hg.setTextFragmenter(new NullFragmenter());
1157 hg.setMaxDocCharsToAnalyze(100);
1158 match = hg.getBestFragment(new StandardAnalyzer(TEST_VERSION_CURRENT, stopWords), "data", sb.toString());
1159 assertTrue("Matched text should be no more than 100 chars in length ", match.length() < hg
1160 .getMaxDocCharsToAnalyze());
1162 // add another tokenized word to the overrall length - but set way
1164 // the length of text under consideration (after a large slug of stop
1168 sb.append(goodWord);
1169 match = hg.getBestFragment(new StandardAnalyzer(TEST_VERSION_CURRENT, stopWords), "data", sb.toString());
1170 assertTrue("Matched text should be no more than 100 chars in length ", match.length() < hg
1171 .getMaxDocCharsToAnalyze());
1179 public void testMaxSizeEndHighlight() throws Exception {
1180 TestHighlightRunner helper = new TestHighlightRunner() {
1182 public void run() throws Exception {
1183 Set<String> stopWords = new HashSet<String>();
1184 stopWords.add("in");
1185 stopWords.add("it");
1186 TermQuery query = new TermQuery(new Term("text", "searchterm"));
1188 String text = "this is a text with searchterm in it";
1189 SimpleHTMLFormatter fm = new SimpleHTMLFormatter();
1190 Highlighter hg = getHighlighter(query, "text", new StandardAnalyzer(TEST_VERSION_CURRENT,
1191 stopWords).tokenStream("text", new StringReader(text)), fm);
1192 hg.setTextFragmenter(new NullFragmenter());
1193 hg.setMaxDocCharsToAnalyze(36);
1194 String match = hg.getBestFragment(new StandardAnalyzer(TEST_VERSION_CURRENT, stopWords), "text", text);
1196 "Matched text should contain remainder of text after highlighted query ",
1197 match.endsWith("in it"));
1203 public void testUnRewrittenQuery() throws Exception {
1204 final TestHighlightRunner helper = new TestHighlightRunner() {
1207 public void run() throws Exception {
1209 // test to show how rewritten query can still be used
1210 if (searcher != null) searcher.close();
1211 searcher = new IndexSearcher(reader);
1212 Analyzer analyzer = new StandardAnalyzer(TEST_VERSION_CURRENT);
1214 QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, FIELD_NAME, analyzer);
1215 Query query = parser.parse("JF? or Kenned*");
1216 if (VERBOSE) System.out.println("Searching with primitive query");
1217 // forget to set this and...
1218 // query=query.rewrite(reader);
1219 TopDocs hits = searcher.search(query, null, 1000);
1221 // create an instance of the highlighter with the tags used to surround
1223 // QueryHighlightExtractor highlighter = new
1224 // QueryHighlightExtractor(this,
1225 // query, new StandardAnalyzer(TEST_VERSION));
1227 int maxNumFragmentsRequired = 3;
1229 for (int i = 0; i < hits.totalHits; i++) {
1230 String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
1231 TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
1232 Highlighter highlighter = getHighlighter(query, FIELD_NAME, tokenStream, HighlighterTest.this, false);
1234 highlighter.setTextFragmenter(new SimpleFragmenter(40));
1236 String highlightedText = highlighter.getBestFragments(tokenStream, text,
1237 maxNumFragmentsRequired, "...");
1239 if (VERBOSE) System.out.println(highlightedText);
1241 // We expect to have zero highlights if the query is multi-terms and is
1244 assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
1245 numHighlights == 0);
1252 public void testNoFragments() throws Exception {
1253 TestHighlightRunner helper = new TestHighlightRunner() {
1256 public void run() throws Exception {
1257 doSearching("AnInvalidQueryWhichShouldYieldNoResults");
1259 for (int i = 0; i < texts.length; i++) {
1260 String text = texts[i];
1261 TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
1262 Highlighter highlighter = getHighlighter(query, FIELD_NAME, tokenStream,
1263 HighlighterTest.this);
1264 String result = highlighter.getBestFragment(tokenStream, text);
1265 assertNull("The highlight result should be null for text with no query terms", result);
1274 * Demonstrates creation of an XHTML compliant doc using new encoding facilities.
1278 public void testEncoding() throws Exception {
1280 String rawDocContent = "\"Smith & sons' prices < 3 and >4\" claims article";
1281 // run the highlighter on the raw content (scorer does not score any tokens
1283 // highlighting but scores a single fragment for selection
1284 Highlighter highlighter = new Highlighter(this, new SimpleHTMLEncoder(), new Scorer() {
1285 public void startFragment(TextFragment newFragment) {
1288 public float getTokenScore() {
1292 public float getFragmentScore() {
1296 public TokenStream init(TokenStream tokenStream) {
1300 highlighter.setTextFragmenter(new SimpleFragmenter(2000));
1301 TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(rawDocContent));
1303 String encodedSnippet = highlighter.getBestFragments(tokenStream, rawDocContent, 1, "");
1304 // An ugly bit of XML creation:
1305 String xhtml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
1306 + "<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\" lang=\"en\">\n"
1307 + "<head>\n" + "<title>My Test HTML Document</title>\n" + "</head>\n" + "<body>\n" + "<h2>"
1308 + encodedSnippet + "</h2>\n" + "</body>\n" + "</html>";
1309 // now an ugly built of XML parsing to test the snippet is encoded OK
1310 DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
1311 DocumentBuilder db = dbf.newDocumentBuilder();
1312 org.w3c.dom.Document doc = db.parse(new ByteArrayInputStream(xhtml.getBytes()));
1313 Element root = doc.getDocumentElement();
1314 NodeList nodes = root.getElementsByTagName("body");
1315 Element body = (Element) nodes.item(0);
1316 nodes = body.getElementsByTagName("h2");
1317 Element h2 = (Element) nodes.item(0);
1318 String decodedSnippet = h2.getFirstChild().getNodeValue();
1319 assertEquals("XHTML Encoding should have worked:", rawDocContent, decodedSnippet);
1322 public void testMultiSearcher() throws Exception {
1324 Directory ramDir1 = newDirectory();
1325 IndexWriter writer1 = new IndexWriter(ramDir1, newIndexWriterConfig(
1326 TEST_VERSION_CURRENT, new StandardAnalyzer(TEST_VERSION_CURRENT)));
1327 Document d = new Document();
1328 Field f = new Field(FIELD_NAME, "multiOne", Field.Store.YES, Field.Index.ANALYZED);
1330 writer1.addDocument(d);
1333 IndexReader reader1 = IndexReader.open(ramDir1, true);
1336 Directory ramDir2 = newDirectory();
1337 IndexWriter writer2 = new IndexWriter(ramDir2, newIndexWriterConfig(
1338 TEST_VERSION_CURRENT, new StandardAnalyzer(TEST_VERSION_CURRENT)));
1340 f = new Field(FIELD_NAME, "multiTwo", Field.Store.YES, Field.Index.ANALYZED);
1342 writer2.addDocument(d);
1345 IndexReader reader2 = IndexReader.open(ramDir2, true);
1347 IndexSearcher searchers[] = new IndexSearcher[2];
1348 searchers[0] = new IndexSearcher(ramDir1, true);
1349 searchers[1] = new IndexSearcher(ramDir2, true);
1350 MultiSearcher multiSearcher = new MultiSearcher(searchers);
1351 QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, FIELD_NAME, new StandardAnalyzer(TEST_VERSION_CURRENT));
1352 parser.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
1353 query = parser.parse("multi*");
1354 if (VERBOSE) System.out.println("Searching for: " + query.toString(FIELD_NAME));
1355 // at this point the multisearcher calls combine(query[])
1356 hits = multiSearcher.search(query, null, 1000);
1358 // query = QueryParser.parse("multi*", FIELD_NAME, new StandardAnalyzer(TEST_VERSION));
1359 Query expandedQueries[] = new Query[2];
1360 expandedQueries[0] = query.rewrite(reader1);
1361 expandedQueries[1] = query.rewrite(reader2);
1362 query = query.combine(expandedQueries);
1364 // create an instance of the highlighter with the tags used to surround
1366 Highlighter highlighter = new Highlighter(this, new QueryTermScorer(query));
1368 for (int i = 0; i < hits.totalHits; i++) {
1369 String text = multiSearcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
1370 TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
1371 String highlightedText = highlighter.getBestFragment(tokenStream, text);
1372 if (VERBOSE) System.out.println(highlightedText);
1374 assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
1375 numHighlights == 2);
1378 searchers[0].close();
1379 searchers[1].close();
1384 public void testFieldSpecificHighlighting() throws Exception {
1385 TestHighlightRunner helper = new TestHighlightRunner() {
1388 public void run() throws Exception {
1389 String docMainText = "fred is one of the people";
1390 QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, FIELD_NAME, analyzer);
1391 Query query = parser.parse("fred category:people");
1393 // highlighting respects fieldnames used in query
1395 Scorer fieldSpecificScorer = null;
1396 if (mode == TestHighlightRunner.QUERY) {
1397 fieldSpecificScorer = new QueryScorer(query, FIELD_NAME);
1398 } else if (mode == TestHighlightRunner.QUERY_TERM) {
1399 fieldSpecificScorer = new QueryTermScorer(query, "contents");
1401 Highlighter fieldSpecificHighlighter = new Highlighter(new SimpleHTMLFormatter(),
1402 fieldSpecificScorer);
1403 fieldSpecificHighlighter.setTextFragmenter(new NullFragmenter());
1404 String result = fieldSpecificHighlighter.getBestFragment(analyzer, FIELD_NAME, docMainText);
1405 assertEquals("Should match", result, "<B>fred</B> is one of the people");
1407 // highlighting does not respect fieldnames used in query
1408 Scorer fieldInSpecificScorer = null;
1409 if (mode == TestHighlightRunner.QUERY) {
1410 fieldInSpecificScorer = new QueryScorer(query, null);
1411 } else if (mode == TestHighlightRunner.QUERY_TERM) {
1412 fieldInSpecificScorer = new QueryTermScorer(query);
1415 Highlighter fieldInSpecificHighlighter = new Highlighter(new SimpleHTMLFormatter(),
1416 fieldInSpecificScorer);
1417 fieldInSpecificHighlighter.setTextFragmenter(new NullFragmenter());
1418 result = fieldInSpecificHighlighter.getBestFragment(analyzer, FIELD_NAME, docMainText);
1419 assertEquals("Should match", result, "<B>fred</B> is one of the <B>people</B>");
1429 protected TokenStream getTS2() {
1430 // String s = "Hi-Speed10 foo";
1431 return new TokenStream() {
1432 Iterator<Token> iter;
1434 private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
1435 private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
1436 private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
1438 lst = new ArrayList<Token>();
1440 t = createToken("hi", 0, 2);
1441 t.setPositionIncrement(1);
1443 t = createToken("hispeed", 0, 8);
1444 t.setPositionIncrement(1);
1446 t = createToken("speed", 3, 8);
1447 t.setPositionIncrement(0);
1449 t = createToken("10", 8, 10);
1450 t.setPositionIncrement(1);
1452 t = createToken("foo", 11, 14);
1453 t.setPositionIncrement(1);
1455 iter = lst.iterator();
1459 public boolean incrementToken() throws IOException {
1460 if(iter.hasNext()) {
1461 Token token = iter.next();
1463 termAtt.setEmpty().append(token);
1464 posIncrAtt.setPositionIncrement(token.getPositionIncrement());
1465 offsetAtt.setOffset(token.startOffset(), token.endOffset());
1474 // same token-stream as above, but the bigger token comes first this time
1475 protected TokenStream getTS2a() {
1476 // String s = "Hi-Speed10 foo";
1477 return new TokenStream() {
1478 Iterator<Token> iter;
1480 private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
1481 private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
1482 private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
1484 lst = new ArrayList<Token>();
1486 t = createToken("hispeed", 0, 8);
1487 t.setPositionIncrement(1);
1489 t = createToken("hi", 0, 2);
1490 t.setPositionIncrement(0);
1492 t = createToken("speed", 3, 8);
1493 t.setPositionIncrement(1);
1495 t = createToken("10", 8, 10);
1496 t.setPositionIncrement(1);
1498 t = createToken("foo", 11, 14);
1499 t.setPositionIncrement(1);
1501 iter = lst.iterator();
1505 public boolean incrementToken() throws IOException {
1506 if(iter.hasNext()) {
1507 Token token = iter.next();
1509 termAtt.setEmpty().append(token);
1510 posIncrAtt.setPositionIncrement(token.getPositionIncrement());
1511 offsetAtt.setOffset(token.startOffset(), token.endOffset());
1519 public void testOverlapAnalyzer2() throws Exception {
1520 TestHighlightRunner helper = new TestHighlightRunner() {
1523 public void run() throws Exception {
1524 String s = "Hi-Speed10 foo";
1527 Highlighter highlighter;
1530 query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("foo");
1531 highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this);
1532 result = highlighter.getBestFragments(getTS2(), s, 3, "...");
1533 assertEquals("Hi-Speed10 <B>foo</B>", result);
1535 query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("10");
1536 highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this);
1537 result = highlighter.getBestFragments(getTS2(), s, 3, "...");
1538 assertEquals("Hi-Speed<B>10</B> foo", result);
1540 query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("hi");
1541 highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this);
1542 result = highlighter.getBestFragments(getTS2(), s, 3, "...");
1543 assertEquals("<B>Hi</B>-Speed10 foo", result);
1545 query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("speed");
1546 highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this);
1547 result = highlighter.getBestFragments(getTS2(), s, 3, "...");
1548 assertEquals("Hi-<B>Speed</B>10 foo", result);
1550 query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("hispeed");
1551 highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this);
1552 result = highlighter.getBestFragments(getTS2(), s, 3, "...");
1553 assertEquals("<B>Hi-Speed</B>10 foo", result);
1555 query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("hi speed");
1556 highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this);
1557 result = highlighter.getBestFragments(getTS2(), s, 3, "...");
1558 assertEquals("<B>Hi-Speed</B>10 foo", result);
1560 // ///////////////// same tests, just put the bigger overlapping token
1562 query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("foo");
1563 highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this);
1564 result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
1565 assertEquals("Hi-Speed10 <B>foo</B>", result);
1567 query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("10");
1568 highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this);
1569 result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
1570 assertEquals("Hi-Speed<B>10</B> foo", result);
1572 query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("hi");
1573 highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this);
1574 result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
1575 assertEquals("<B>Hi</B>-Speed10 foo", result);
1577 query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("speed");
1578 highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this);
1579 result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
1580 assertEquals("Hi-<B>Speed</B>10 foo", result);
1582 query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("hispeed");
1583 highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this);
1584 result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
1585 assertEquals("<B>Hi-Speed</B>10 foo", result);
1587 query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("hi speed");
1588 highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this);
1589 result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
1590 assertEquals("<B>Hi-Speed</B>10 foo", result);
1597 private Directory dir;
1598 private Analyzer a = new MockAnalyzer(random, MockTokenizer.WHITESPACE, false);
1600 public void testWeightedTermsWithDeletes() throws IOException, ParseException, InvalidTokenOffsetsException {
1606 private Document doc( String f, String v ){
1607 Document doc = new Document();
1608 doc.add( new Field( f, v, Store.YES, Index.ANALYZED ) );
1612 private void makeIndex() throws IOException {
1613 IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)));
1614 writer.addDocument( doc( "t_text1", "random words for highlighting tests del" ) );
1615 writer.addDocument( doc( "t_text1", "more random words for second field del" ) );
1616 writer.addDocument( doc( "t_text1", "random words for highlighting tests del" ) );
1617 writer.addDocument( doc( "t_text1", "more random words for second field" ) );
1618 writer.forceMerge(1);
1622 private void deleteDocument() throws IOException {
1623 IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).setOpenMode(OpenMode.APPEND));
1624 writer.deleteDocuments( new Term( "t_text1", "del" ) );
1625 // To see negative idf, keep comment the following line
1626 //writer.forceMerge(1);
1630 private void searchIndex() throws IOException, ParseException, InvalidTokenOffsetsException {
1631 String q = "t_text1:random";
1632 QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, "t_text1", a );
1633 Query query = parser.parse( q );
1634 IndexReader reader = IndexReader.open(dir);
1635 IndexSearcher searcher = new IndexSearcher(reader);
1636 // This scorer can return negative idf -> null fragment
1637 Scorer scorer = new QueryTermScorer( query, searcher.getIndexReader(), "t_text1" );
1638 // This scorer doesn't use idf (patch version)
1639 //Scorer scorer = new QueryTermScorer( query, "t_text1" );
1640 Highlighter h = new Highlighter( scorer );
1642 TopDocs hits = searcher.search(query, null, 10);
1643 for( int i = 0; i < hits.totalHits; i++ ){
1644 Document doc = searcher.doc( hits.scoreDocs[i].doc );
1645 String result = h.getBestFragment( a, "t_text1", doc.get( "t_text1" ));
1646 if (VERBOSE) System.out.println("result:" + result);
1647 assertEquals("more <B>random</B> words for second field", result);
1655 * public void testBigramAnalyzer() throws IOException, ParseException {
1656 * //test to ensure analyzers with none-consecutive start/end offsets //dont
1657 * double-highlight text //setup index 1 RAMDirectory ramDir = new
1658 * RAMDirectory(); Analyzer bigramAnalyzer=new CJKAnalyzer(); IndexWriter
1659 * writer = new IndexWriter(ramDir,bigramAnalyzer , true); Document d = new
1660 * Document(); Field f = new Field(FIELD_NAME, "java abc def", true, true,
1661 * true); d.add(f); writer.addDocument(d); writer.close(); IndexReader reader =
1662 * IndexReader.open(ramDir, true);
1664 * IndexSearcher searcher=new IndexSearcher(reader); query =
1665 * QueryParser.parse("abc", FIELD_NAME, bigramAnalyzer);
1666 * System.out.println("Searching for: " + query.toString(FIELD_NAME)); hits =
1667 * searcher.search(query);
1669 * Highlighter highlighter = new Highlighter(this,new
1670 * QueryFragmentScorer(query));
1672 * for (int i = 0; i < hits.totalHits; i++) { String text =
1673 * searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME); TokenStream
1674 * tokenStream=bigramAnalyzer.tokenStream(FIELD_NAME,new StringReader(text));
1675 * String highlightedText = highlighter.getBestFragment(tokenStream,text);
1676 * System.out.println(highlightedText); } }
1679 public String highlightTerm(String originalText, TokenGroup group) {
1680 if (group.getTotalScore() <= 0) {
1681 return originalText;
1683 numHighlights++; // update stats used in assertions
1684 return "<B>" + originalText + "</B>";
1687 public void doSearching(String queryString) throws Exception {
1688 QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, FIELD_NAME, analyzer);
1689 parser.setEnablePositionIncrements(true);
1690 parser.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
1691 query = parser.parse(queryString);
1695 public void doSearching(Query unReWrittenQuery) throws Exception {
1696 if (searcher != null) searcher.close();
1697 searcher = new IndexSearcher(reader);
1698 // for any multi-term queries to work (prefix, wildcard, range,fuzzy etc)
1699 // you must use a rewritten query!
1700 query = unReWrittenQuery.rewrite(reader);
1701 if (VERBOSE) System.out.println("Searching for: " + query.toString(FIELD_NAME));
1702 hits = searcher.search(query, null, 1000);
1705 public void assertExpectedHighlightCount(final int maxNumFragmentsRequired,
1706 final int expectedHighlights) throws Exception {
1707 for (int i = 0; i < hits.totalHits; i++) {
1708 String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
1709 TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
1710 QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
1711 Highlighter highlighter = new Highlighter(this, scorer);
1713 highlighter.setTextFragmenter(new SimpleFragmenter(40));
1715 String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
1717 if (VERBOSE) System.out.println("\t" + result);
1719 assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
1720 numHighlights == expectedHighlights);
1725 public void setUp() throws Exception {
1727 dir = newDirectory();
1728 ramDir = newDirectory();
1729 IndexWriter writer = new IndexWriter(ramDir, newIndexWriterConfig(
1730 TEST_VERSION_CURRENT, new StandardAnalyzer(TEST_VERSION_CURRENT)));
1731 for (int i = 0; i < texts.length; i++) {
1732 addDoc(writer, texts[i]);
1734 Document doc = new Document();
1735 NumericField nfield = new NumericField(NUMERIC_FIELD_NAME, Store.YES, true);
1736 nfield.setIntValue(1);
1738 writer.addDocument(doc, analyzer);
1739 nfield = new NumericField(NUMERIC_FIELD_NAME, Store.YES, true);
1740 nfield.setIntValue(3);
1741 doc = new Document();
1743 writer.addDocument(doc, analyzer);
1744 nfield = new NumericField(NUMERIC_FIELD_NAME, Store.YES, true);
1745 nfield.setIntValue(5);
1746 doc = new Document();
1748 writer.addDocument(doc, analyzer);
1749 nfield = new NumericField(NUMERIC_FIELD_NAME, Store.YES, true);
1750 nfield.setIntValue(7);
1751 doc = new Document();
1753 writer.addDocument(doc, analyzer);
1754 writer.forceMerge(1);
1756 reader = IndexReader.open(ramDir, true);
1761 public void tearDown() throws Exception {
1762 if (searcher != null) searcher.close();
1768 private void addDoc(IndexWriter writer, String text) throws IOException {
1769 Document d = new Document();
1770 Field f = new Field(FIELD_NAME, text, Field.Store.YES, Field.Index.ANALYZED);
1772 writer.addDocument(d);
1776 private static Token createToken(String term, int start, int offset)
1778 return new Token(term, start, offset);
1783 // ===================================================================
1784 // ========== BEGIN TEST SUPPORTING CLASSES
1785 // ========== THESE LOOK LIKE, WITH SOME MORE EFFORT THESE COULD BE
1786 // ========== MADE MORE GENERALLY USEFUL.
1787 // TODO - make synonyms all interchangeable with each other and produce
1788 // a version that does hyponyms - the "is a specialised type of ...."
1789 // so that car = audi, bmw and volkswagen but bmw != audi so different
1790 // behaviour to synonyms
1791 // ===================================================================
1793 final class SynonymAnalyzer extends Analyzer {
1794 private Map<String,String> synonyms;
1796 public SynonymAnalyzer(Map<String,String> synonyms) {
1797 this.synonyms = synonyms;
1803 * @see org.apache.lucene.analysis.Analyzer#tokenStream(java.lang.String,
1807 public TokenStream tokenStream(String arg0, Reader arg1) {
1808 LowerCaseTokenizer stream = new LowerCaseTokenizer(LuceneTestCase.TEST_VERSION_CURRENT, arg1);
1809 stream.addAttribute(CharTermAttribute.class);
1810 stream.addAttribute(PositionIncrementAttribute.class);
1811 stream.addAttribute(OffsetAttribute.class);
1814 } catch (IOException e) {
1815 throw new RuntimeException(e);
1817 return new SynonymTokenizer(stream, synonyms);
1822 * Expands a token stream with synonyms (TODO - make the synonyms analyzed by choice of analyzer)
1825 final class SynonymTokenizer extends TokenStream {
1826 private final TokenStream realStream;
1827 private Token currentRealToken = null;
1828 private final Map<String, String> synonyms;
1829 private StringTokenizer st = null;
1830 private final CharTermAttribute realTermAtt;
1831 private final PositionIncrementAttribute realPosIncrAtt;
1832 private final OffsetAttribute realOffsetAtt;
1833 private final CharTermAttribute termAtt;
1834 private final PositionIncrementAttribute posIncrAtt;
1835 private final OffsetAttribute offsetAtt;
1837 public SynonymTokenizer(TokenStream realStream, Map<String, String> synonyms) {
1838 this.realStream = realStream;
1839 this.synonyms = synonyms;
1840 realTermAtt = realStream.addAttribute(CharTermAttribute.class);
1841 realPosIncrAtt = realStream.addAttribute(PositionIncrementAttribute.class);
1842 realOffsetAtt = realStream.addAttribute(OffsetAttribute.class);
1844 termAtt = addAttribute(CharTermAttribute.class);
1845 posIncrAtt = addAttribute(PositionIncrementAttribute.class);
1846 offsetAtt = addAttribute(OffsetAttribute.class);
1850 public boolean incrementToken() throws IOException {
1852 if (currentRealToken == null) {
1853 boolean next = realStream.incrementToken();
1857 //Token nextRealToken = new Token(, offsetAtt.startOffset(), offsetAtt.endOffset());
1859 termAtt.copyBuffer(realTermAtt.buffer(), 0, realTermAtt.length());
1860 offsetAtt.setOffset(realOffsetAtt.startOffset(), realOffsetAtt.endOffset());
1861 posIncrAtt.setPositionIncrement(realPosIncrAtt.getPositionIncrement());
1863 String expansions = synonyms.get(realTermAtt.toString());
1864 if (expansions == null) {
1867 st = new StringTokenizer(expansions, ",");
1868 if (st.hasMoreTokens()) {
1869 currentRealToken = new Token(realOffsetAtt.startOffset(), realOffsetAtt.endOffset());
1870 currentRealToken.copyBuffer(realTermAtt.buffer(), 0, realTermAtt.length());
1875 String tok = st.nextToken();
1877 termAtt.setEmpty().append(tok);
1878 offsetAtt.setOffset(currentRealToken.startOffset(), currentRealToken.endOffset());
1879 posIncrAtt.setPositionIncrement(0);
1880 if (!st.hasMoreTokens()) {
1881 currentRealToken = null;
1890 public void reset() throws IOException {
1892 this.currentRealToken = null;
1896 static abstract class TestHighlightRunner {
1897 static final int QUERY = 0;
1898 static final int QUERY_TERM = 1;
1901 Fragmenter frag = new SimpleFragmenter(20);
1903 public Highlighter getHighlighter(Query query, String fieldName, TokenStream stream, Formatter formatter) {
1904 return getHighlighter(query, fieldName, stream, formatter, true);
1907 public Highlighter getHighlighter(Query query, String fieldName, TokenStream stream, Formatter formatter, boolean expanMultiTerm) {
1908 Scorer scorer = null;
1909 if (mode == QUERY) {
1910 scorer = new QueryScorer(query, fieldName);
1911 if(!expanMultiTerm) {
1912 ((QueryScorer)scorer).setExpandMultiTermQuery(false);
1914 } else if (mode == QUERY_TERM) {
1915 scorer = new QueryTermScorer(query);
1917 throw new RuntimeException("Unknown highlight mode");
1920 return new Highlighter(formatter, scorer);
1923 Highlighter getHighlighter(WeightedTerm[] weightedTerms, Formatter formatter) {
1924 if (mode == QUERY) {
1925 return new Highlighter(formatter, new QueryScorer((WeightedSpanTerm[]) weightedTerms));
1926 } else if (mode == QUERY_TERM) {
1927 return new Highlighter(formatter, new QueryTermScorer(weightedTerms));
1930 throw new RuntimeException("Unknown highlight mode");
1934 void doStandardHighlights(Analyzer analyzer, IndexSearcher searcher, TopDocs hits, Query query, Formatter formatter)
1936 doStandardHighlights(analyzer, searcher, hits, query, formatter, false);
1939 void doStandardHighlights(Analyzer analyzer, IndexSearcher searcher, TopDocs hits, Query query, Formatter formatter, boolean expandMT)
1942 for (int i = 0; i < hits.totalHits; i++) {
1943 String text = searcher.doc(hits.scoreDocs[i].doc).get(HighlighterTest.FIELD_NAME);
1944 int maxNumFragmentsRequired = 2;
1945 String fragmentSeparator = "...";
1946 Scorer scorer = null;
1947 TokenStream tokenStream = analyzer.tokenStream(HighlighterTest.FIELD_NAME, new StringReader(text));
1948 if (mode == QUERY) {
1949 scorer = new QueryScorer(query);
1950 } else if (mode == QUERY_TERM) {
1951 scorer = new QueryTermScorer(query);
1953 Highlighter highlighter = new Highlighter(formatter, scorer);
1954 highlighter.setTextFragmenter(frag);
1956 String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
1958 if (HighlighterTest.VERBOSE) System.out.println("\t" + result);
1962 abstract void run() throws Exception;
1964 void start() throws Exception {
1965 if (HighlighterTest.VERBOSE) System.out.println("Run QueryScorer");
1967 if (HighlighterTest.VERBOSE) System.out.println("Run QueryTermScorer");