1 package org.apache.lucene.search.highlight;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import java.io.ByteArrayInputStream;
21 import java.io.IOException;
22 import java.io.Reader;
23 import java.io.StringReader;
24 import java.util.ArrayList;
25 import java.util.HashMap;
26 import java.util.HashSet;
27 import java.util.Iterator;
28 import java.util.List;
31 import java.util.StringTokenizer;
33 import javax.xml.parsers.DocumentBuilder;
34 import javax.xml.parsers.DocumentBuilderFactory;
36 import org.apache.lucene.analysis.Analyzer;
37 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
38 import org.apache.lucene.analysis.CharArraySet;
39 import org.apache.lucene.analysis.LowerCaseTokenizer;
40 import org.apache.lucene.analysis.MockAnalyzer;
41 import org.apache.lucene.analysis.MockTokenizer;
42 import org.apache.lucene.analysis.SimpleAnalyzer;
43 import org.apache.lucene.analysis.StopAnalyzer;
44 import org.apache.lucene.analysis.Token;
45 import org.apache.lucene.analysis.TokenStream;
46 import org.apache.lucene.analysis.WhitespaceAnalyzer;
47 import org.apache.lucene.analysis.standard.StandardAnalyzer;
48 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
49 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
50 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
51 import org.apache.lucene.document.Document;
52 import org.apache.lucene.document.Field;
53 import org.apache.lucene.document.NumericField;
54 import org.apache.lucene.document.Field.Index;
55 import org.apache.lucene.document.Field.Store;
56 import org.apache.lucene.index.IndexReader;
57 import org.apache.lucene.index.IndexWriter;
58 import org.apache.lucene.index.IndexWriterConfig;
59 import org.apache.lucene.index.Term;
60 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
61 import org.apache.lucene.queryParser.ParseException;
62 import org.apache.lucene.queryParser.QueryParser;
63 import org.apache.lucene.search.BooleanQuery;
64 import org.apache.lucene.search.FilteredQuery;
65 import org.apache.lucene.search.IndexSearcher;
66 import org.apache.lucene.search.MultiPhraseQuery;
67 import org.apache.lucene.search.MultiSearcher;
68 import org.apache.lucene.search.MultiTermQuery;
69 import org.apache.lucene.search.NumericRangeQuery;
70 import org.apache.lucene.search.PhraseQuery;
71 import org.apache.lucene.search.Query;
72 import org.apache.lucene.search.TermQuery;
73 import org.apache.lucene.search.TermRangeFilter;
74 import org.apache.lucene.search.TopDocs;
75 import org.apache.lucene.search.WildcardQuery;
76 import org.apache.lucene.search.BooleanClause.Occur;
77 import org.apache.lucene.search.highlight.SynonymTokenizer.TestHighlightRunner;
78 import org.apache.lucene.search.regex.RegexQuery;
79 import org.apache.lucene.search.regex.SpanRegexQuery;
80 import org.apache.lucene.search.spans.SpanNearQuery;
81 import org.apache.lucene.search.spans.SpanNotQuery;
82 import org.apache.lucene.search.spans.SpanOrQuery;
83 import org.apache.lucene.search.spans.SpanQuery;
84 import org.apache.lucene.search.spans.SpanTermQuery;
85 import org.apache.lucene.store.Directory;
86 import org.apache.lucene.util.LuceneTestCase;
87 import org.w3c.dom.Element;
88 import org.w3c.dom.NodeList;
91 * JUnit Test for Highlighter class.
94 public class HighlighterTest extends BaseTokenStreamTestCase implements Formatter {
96 private IndexReader reader;
97 static final String FIELD_NAME = "contents";
98 private static final String NUMERIC_FIELD_NAME = "nfield";
101 public IndexSearcher searcher = null;
102 int numHighlights = 0;
103 final Analyzer analyzer = new StandardAnalyzer(TEST_VERSION_CURRENT);
107 "Hello this is a piece of text that is very long and contains too much preamble and the meat is really here which says kennedy has been shot",
108 "This piece of text refers to Kennedy at the beginning then has a longer piece of text that is very long in the middle and finally ends with another reference to Kennedy",
109 "JFK has been shot", "John Kennedy has been shot",
110 "This text has a typo in referring to Keneddy",
111 "wordx wordy wordz wordx wordy wordx worda wordb wordy wordc", "y z x y z a b", "lets is a the lets is a the lets is a the lets" };
113 public void testQueryScorerHits() throws Exception {
114 Analyzer analyzer = new MockAnalyzer(random, MockTokenizer.SIMPLE, true);
115 QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, FIELD_NAME, analyzer);
116 query = qp.parse("\"very long\"");
117 searcher = new IndexSearcher(ramDir, true);
118 TopDocs hits = searcher.search(query, 10);
120 QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
121 Highlighter highlighter = new Highlighter(scorer);
124 for (int i = 0; i < hits.scoreDocs.length; i++) {
125 Document doc = searcher.doc(hits.scoreDocs[i].doc);
126 String storedField = doc.get(FIELD_NAME);
128 TokenStream stream = TokenSources.getAnyTokenStream(searcher
129 .getIndexReader(), hits.scoreDocs[i].doc, FIELD_NAME, doc, analyzer);
131 Fragmenter fragmenter = new SimpleSpanFragmenter(scorer);
133 highlighter.setTextFragmenter(fragmenter);
135 String fragment = highlighter.getBestFragment(stream, storedField);
137 if (VERBOSE) System.out.println(fragment);
142 public void testHighlightingWithDefaultField() throws Exception {
144 String s1 = "I call our world Flatland, not because we call it so,";
146 QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, FIELD_NAME, new StandardAnalyzer(TEST_VERSION_CURRENT));
148 // Verify that a query against the default field results in text being
150 // regardless of the field name.
151 Query q = parser.parse("\"world Flatland\"~3");
152 String expected = "I call our <B>world</B> <B>Flatland</B>, not because we call it so,";
153 String observed = highlightField(q, "SOME_FIELD_NAME", s1);
154 if (VERBOSE) System.out.println("Expected: \"" + expected + "\n" + "Observed: \"" + observed);
155 assertEquals("Query in the default field results in text for *ANY* field being highlighted",
158 // Verify that a query against a named field does not result in any
160 // when the query field name differs from the name of the field being
162 // which in this example happens to be the default field name.
163 q = parser.parse("text:\"world Flatland\"~3");
165 observed = highlightField(q, FIELD_NAME, s1);
166 if (VERBOSE) System.out.println("Expected: \"" + expected + "\n" + "Observed: \"" + observed);
168 "Query in a named field does not result in highlighting when that field isn't in the query",
169 s1, highlightField(q, FIELD_NAME, s1));
173 * This method intended for use with <tt>testHighlightingWithDefaultField()</tt>
174 * @throws InvalidTokenOffsetsException
176 private static String highlightField(Query query, String fieldName, String text)
177 throws IOException, InvalidTokenOffsetsException {
178 TokenStream tokenStream = new StandardAnalyzer(TEST_VERSION_CURRENT).tokenStream(fieldName, new StringReader(text));
179 // Assuming "<B>", "</B>" used to highlight
180 SimpleHTMLFormatter formatter = new SimpleHTMLFormatter();
181 QueryScorer scorer = new QueryScorer(query, fieldName, FIELD_NAME);
182 Highlighter highlighter = new Highlighter(formatter, scorer);
183 highlighter.setTextFragmenter(new SimpleFragmenter(Integer.MAX_VALUE));
185 String rv = highlighter.getBestFragments(tokenStream, text, 1, "(FIELD TEXT TRUNCATED)");
186 return rv.length() == 0 ? text : rv;
189 public void testSimpleSpanHighlighter() throws Exception {
190 doSearching("Kennedy");
192 int maxNumFragmentsRequired = 2;
194 QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
195 Highlighter highlighter = new Highlighter(scorer);
197 for (int i = 0; i < hits.totalHits; i++) {
198 String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
199 TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME,
200 new StringReader(text));
201 highlighter.setTextFragmenter(new SimpleFragmenter(40));
203 String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
205 if (VERBOSE) System.out.println("\t" + result);
208 // Not sure we can assert anything here - just running to check we dont
209 // throw any exceptions
213 public void testRepeatingTermsInMultBooleans() throws Exception {
214 String content = "x y z a b c d e f g b c g";
215 String ph1 = "\"a b c d\"";
216 String ph2 = "\"b c g\"";
219 String f1c = f1 + ":";
220 String f2c = f2 + ":";
221 String q = "(" + f1c + ph1 + " OR " + f2c + ph1 + ") AND (" + f1c + ph2
222 + " OR " + f2c + ph2 + ")";
223 Analyzer analyzer = new MockAnalyzer(random, MockTokenizer.WHITESPACE, false);
224 QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, f1, analyzer);
225 Query query = qp.parse(q);
227 QueryScorer scorer = new QueryScorer(query, f1);
228 scorer.setExpandMultiTermQuery(false);
230 Highlighter h = new Highlighter(this, scorer);
232 h.getBestFragment(analyzer, f1, content);
234 assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
238 public void testSimpleQueryScorerPhraseHighlighting() throws Exception {
239 doSearching("\"very long and contains\"");
241 int maxNumFragmentsRequired = 2;
243 QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
244 Highlighter highlighter = new Highlighter(this, scorer);
246 for (int i = 0; i < hits.totalHits; i++) {
247 String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
248 TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
250 highlighter.setTextFragmenter(new SimpleFragmenter(40));
252 String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
254 if (VERBOSE) System.out.println("\t" + result);
257 assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
261 doSearching("\"This piece of text refers to Kennedy\"");
263 maxNumFragmentsRequired = 2;
265 scorer = new QueryScorer(query, FIELD_NAME);
266 highlighter = new Highlighter(this, scorer);
268 for (int i = 0; i < hits.totalHits; i++) {
269 String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
270 TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
272 highlighter.setTextFragmenter(new SimpleFragmenter(40));
274 String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
276 if (VERBOSE) System.out.println("\t" + result);
279 assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
283 doSearching("\"lets is a the lets is a the lets is a the lets\"");
285 maxNumFragmentsRequired = 2;
287 scorer = new QueryScorer(query, FIELD_NAME);
288 highlighter = new Highlighter(this, scorer);
290 for (int i = 0; i < hits.totalHits; i++) {
291 String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
292 TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
294 highlighter.setTextFragmenter(new SimpleFragmenter(40));
296 String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
298 if (VERBOSE) System.out.println("\t" + result);
301 assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
306 public void testSpanRegexQuery() throws Exception {
307 query = new SpanOrQuery(new SpanQuery [] {
308 new SpanRegexQuery(new Term(FIELD_NAME, "ken.*")) });
309 searcher = new IndexSearcher(ramDir, true);
310 hits = searcher.search(query, 100);
311 int maxNumFragmentsRequired = 2;
313 QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
314 Highlighter highlighter = new Highlighter(this, scorer);
316 for (int i = 0; i < hits.totalHits; i++) {
317 String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
318 TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
320 highlighter.setTextFragmenter(new SimpleFragmenter(40));
322 String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
324 if (VERBOSE) System.out.println("\t" + result);
327 assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
331 public void testRegexQuery() throws Exception {
332 query = new RegexQuery(new Term(FIELD_NAME, "ken.*"));
333 searcher = new IndexSearcher(ramDir, true);
334 hits = searcher.search(query, 100);
335 int maxNumFragmentsRequired = 2;
337 QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
338 Highlighter highlighter = new Highlighter(this, scorer);
340 for (int i = 0; i < hits.totalHits; i++) {
341 String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
342 TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
344 highlighter.setTextFragmenter(new SimpleFragmenter(40));
346 String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
348 if (VERBOSE) System.out.println("\t" + result);
351 assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
355 public void testNumericRangeQuery() throws Exception {
356 // doesn't currently highlight, but make sure it doesn't cause exception either
357 query = NumericRangeQuery.newIntRange(NUMERIC_FIELD_NAME, 2, 6, true, true);
358 searcher = new IndexSearcher(ramDir, true);
359 hits = searcher.search(query, 100);
360 int maxNumFragmentsRequired = 2;
362 QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
363 Highlighter highlighter = new Highlighter(this, scorer);
365 for (int i = 0; i < hits.totalHits; i++) {
366 String text = searcher.doc(hits.scoreDocs[i].doc).get(NUMERIC_FIELD_NAME);
367 TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
369 highlighter.setTextFragmenter(new SimpleFragmenter(40));
372 highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,"...");
373 //if (VERBOSE) System.out.println("\t" + result);
379 public void testSimpleQueryScorerPhraseHighlighting2() throws Exception {
380 doSearching("\"text piece long\"~5");
382 int maxNumFragmentsRequired = 2;
384 QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
385 Highlighter highlighter = new Highlighter(this,scorer);
386 highlighter.setTextFragmenter(new SimpleFragmenter(40));
388 for (int i = 0; i < hits.totalHits; i++) {
389 String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
390 TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
392 String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
394 if (VERBOSE) System.out.println("\t" + result);
397 assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
401 public void testSimpleQueryScorerPhraseHighlighting3() throws Exception {
402 doSearching("\"x y z\"");
404 int maxNumFragmentsRequired = 2;
406 for (int i = 0; i < hits.totalHits; i++) {
407 String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
408 TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
409 QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
410 Highlighter highlighter = new Highlighter(this, scorer);
412 highlighter.setTextFragmenter(new SimpleFragmenter(40));
414 String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
416 if (VERBOSE) System.out.println("\t" + result);
418 assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
423 public void testSimpleSpanFragmenter() throws Exception {
424 doSearching("\"piece of text that is very long\"");
426 int maxNumFragmentsRequired = 2;
428 QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
429 Highlighter highlighter = new Highlighter(this, scorer);
431 for (int i = 0; i < hits.totalHits; i++) {
432 String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
433 TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
435 highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer, 5));
437 String result = highlighter.getBestFragments(tokenStream, text,
438 maxNumFragmentsRequired, "...");
439 if (VERBOSE) System.out.println("\t" + result);
443 doSearching("\"been shot\"");
445 maxNumFragmentsRequired = 2;
447 scorer = new QueryScorer(query, FIELD_NAME);
448 highlighter = new Highlighter(this, scorer);
450 for (int i = 0; i < hits.totalHits; i++) {
451 String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
452 TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
454 highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer, 20));
456 String result = highlighter.getBestFragments(tokenStream, text,
457 maxNumFragmentsRequired, "...");
458 if (VERBOSE) System.out.println("\t" + result);
463 // position sensitive query added after position insensitive query
464 public void testPosTermStdTerm() throws Exception {
465 doSearching("y \"x y z\"");
467 int maxNumFragmentsRequired = 2;
469 QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
470 Highlighter highlighter = new Highlighter(this,scorer);
472 for (int i = 0; i < hits.totalHits; i++) {
473 String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
474 TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME,new StringReader(text));
476 highlighter.setTextFragmenter(new SimpleFragmenter(40));
478 String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
480 if (VERBOSE) System.out.println("\t" + result);
482 assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
487 public void testQueryScorerMultiPhraseQueryHighlighting() throws Exception {
488 MultiPhraseQuery mpq = new MultiPhraseQuery();
490 mpq.add(new Term[] { new Term(FIELD_NAME, "wordx"), new Term(FIELD_NAME, "wordb") });
491 mpq.add(new Term(FIELD_NAME, "wordy"));
495 final int maxNumFragmentsRequired = 2;
496 assertExpectedHighlightCount(maxNumFragmentsRequired, 6);
499 public void testQueryScorerMultiPhraseQueryHighlightingWithGap() throws Exception {
500 MultiPhraseQuery mpq = new MultiPhraseQuery();
503 * The toString of MultiPhraseQuery doesn't work so well with these
504 * out-of-order additions, but the Query itself seems to match accurately.
507 mpq.add(new Term[] { new Term(FIELD_NAME, "wordz") }, 2);
508 mpq.add(new Term[] { new Term(FIELD_NAME, "wordx") }, 0);
512 final int maxNumFragmentsRequired = 1;
513 final int expectedHighlights = 2;
515 assertExpectedHighlightCount(maxNumFragmentsRequired, expectedHighlights);
518 public void testNearSpanSimpleQuery() throws Exception {
519 doSearching(new SpanNearQuery(new SpanQuery[] {
520 new SpanTermQuery(new Term(FIELD_NAME, "beginning")),
521 new SpanTermQuery(new Term(FIELD_NAME, "kennedy")) }, 3, false));
523 TestHighlightRunner helper = new TestHighlightRunner() {
526 public void run() throws Exception {
528 doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this);
534 assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
538 public void testSimpleQueryTermScorerHighlighter() throws Exception {
539 doSearching("Kennedy");
540 Highlighter highlighter = new Highlighter(new QueryTermScorer(query));
541 highlighter.setTextFragmenter(new SimpleFragmenter(40));
542 int maxNumFragmentsRequired = 2;
543 for (int i = 0; i < hits.totalHits; i++) {
544 String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
545 TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
547 String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
549 if (VERBOSE) System.out.println("\t" + result);
551 // Not sure we can assert anything here - just running to check we dont
552 // throw any exceptions
555 public void testSpanHighlighting() throws Exception {
556 Query query1 = new SpanNearQuery(new SpanQuery[] {
557 new SpanTermQuery(new Term(FIELD_NAME, "wordx")),
558 new SpanTermQuery(new Term(FIELD_NAME, "wordy")) }, 1, false);
559 Query query2 = new SpanNearQuery(new SpanQuery[] {
560 new SpanTermQuery(new Term(FIELD_NAME, "wordy")),
561 new SpanTermQuery(new Term(FIELD_NAME, "wordc")) }, 1, false);
562 BooleanQuery bquery = new BooleanQuery();
563 bquery.add(query1, Occur.SHOULD);
564 bquery.add(query2, Occur.SHOULD);
566 TestHighlightRunner helper = new TestHighlightRunner() {
569 public void run() throws Exception {
571 doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this);
576 assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
580 public void testNotSpanSimpleQuery() throws Exception {
581 doSearching(new SpanNotQuery(new SpanNearQuery(new SpanQuery[] {
582 new SpanTermQuery(new Term(FIELD_NAME, "shot")),
583 new SpanTermQuery(new Term(FIELD_NAME, "kennedy")) }, 3, false), new SpanTermQuery(
584 new Term(FIELD_NAME, "john"))));
585 TestHighlightRunner helper = new TestHighlightRunner() {
588 public void run() throws Exception {
590 doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this);
595 assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
599 public void testGetBestFragmentsSimpleQuery() throws Exception {
600 TestHighlightRunner helper = new TestHighlightRunner() {
603 public void run() throws Exception {
605 doSearching("Kennedy");
606 doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this);
607 assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
615 public void testGetFuzzyFragments() throws Exception {
616 TestHighlightRunner helper = new TestHighlightRunner() {
619 public void run() throws Exception {
621 doSearching("Kinnedy~");
622 doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this, true);
623 assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
631 public void testGetWildCardFragments() throws Exception {
632 TestHighlightRunner helper = new TestHighlightRunner() {
635 public void run() throws Exception {
637 doSearching("K?nnedy");
638 doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this);
639 assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
647 public void testGetMidWildCardFragments() throws Exception {
648 TestHighlightRunner helper = new TestHighlightRunner() {
651 public void run() throws Exception {
654 doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this);
655 assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
663 public void testGetRangeFragments() throws Exception {
664 TestHighlightRunner helper = new TestHighlightRunner() {
667 public void run() throws Exception {
669 String queryString = FIELD_NAME + ":[kannedy TO kznnedy]";
671 // Need to explicitly set the QueryParser property to use TermRangeQuery
674 QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, FIELD_NAME, analyzer);
675 parser.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
676 query = parser.parse(queryString);
679 doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this);
680 assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
688 public void testConstantScoreMultiTermQuery() throws Exception {
692 query = new WildcardQuery(new Term(FIELD_NAME, "ken*"));
693 ((WildcardQuery)query).setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
694 searcher = new IndexSearcher(ramDir, true);
695 // can't rewrite ConstantScore if you want to highlight it -
696 // it rewrites to ConstantScoreQuery which cannot be highlighted
697 // query = unReWrittenQuery.rewrite(reader);
698 if (VERBOSE) System.out.println("Searching for: " + query.toString(FIELD_NAME));
699 hits = searcher.search(query, null, 1000);
701 for (int i = 0; i < hits.totalHits; i++) {
702 String text = searcher.doc(hits.scoreDocs[i].doc).get(HighlighterTest.FIELD_NAME);
703 int maxNumFragmentsRequired = 2;
704 String fragmentSeparator = "...";
705 QueryScorer scorer = null;
706 TokenStream tokenStream = null;
708 tokenStream = analyzer.tokenStream(HighlighterTest.FIELD_NAME, new StringReader(text));
710 scorer = new QueryScorer(query, HighlighterTest.FIELD_NAME);
712 Highlighter highlighter = new Highlighter(this, scorer);
714 highlighter.setTextFragmenter(new SimpleFragmenter(20));
716 String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
718 if (VERBOSE) System.out.println("\t" + result);
720 assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
725 hits = searcher.search(query, null, 1000);
729 for (int i = 0; i < hits.totalHits; i++) {
730 String text = searcher.doc(hits.scoreDocs[i].doc).get(HighlighterTest.FIELD_NAME);
731 int maxNumFragmentsRequired = 2;
732 String fragmentSeparator = "...";
733 QueryScorer scorer = null;
734 TokenStream tokenStream = null;
736 tokenStream = analyzer.tokenStream(HighlighterTest.FIELD_NAME, new StringReader(text));
738 scorer = new QueryScorer(query, null);
740 Highlighter highlighter = new Highlighter(this, scorer);
742 highlighter.setTextFragmenter(new SimpleFragmenter(20));
744 String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
746 if (VERBOSE) System.out.println("\t" + result);
748 assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
753 hits = searcher.search(query, null, 1000);
757 for (int i = 0; i < hits.totalHits; i++) {
758 String text = searcher.doc(hits.scoreDocs[i].doc).get(HighlighterTest.FIELD_NAME);
759 int maxNumFragmentsRequired = 2;
760 String fragmentSeparator = "...";
761 QueryScorer scorer = null;
762 TokenStream tokenStream = null;
764 tokenStream = analyzer.tokenStream(HighlighterTest.FIELD_NAME, new StringReader(text));
766 scorer = new QueryScorer(query, "random_field", HighlighterTest.FIELD_NAME);
768 Highlighter highlighter = new Highlighter(this, scorer);
770 highlighter.setTextFragmenter(new SimpleFragmenter(20));
772 String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
774 if (VERBOSE) System.out.println("\t" + result);
776 assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
780 public void testGetBestFragmentsPhrase() throws Exception {
781 TestHighlightRunner helper = new TestHighlightRunner() {
784 public void run() throws Exception {
786 doSearching("\"John Kennedy\"");
787 doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this);
788 // Currently highlights "John" and "Kennedy" separately
789 assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
797 public void testGetBestFragmentsQueryScorer() throws Exception {
798 TestHighlightRunner helper = new TestHighlightRunner() {
801 public void run() throws Exception {
803 SpanQuery clauses[] = { new SpanTermQuery(new Term("contents", "john")),
804 new SpanTermQuery(new Term("contents", "kennedy")), };
806 SpanNearQuery snq = new SpanNearQuery(clauses, 1, true);
808 doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this);
809 // Currently highlights "John" and "Kennedy" separately
810 assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
818 public void testOffByOne() throws Exception {
819 TestHighlightRunner helper = new TestHighlightRunner() {
822 public void run() throws Exception {
823 TermQuery query = new TermQuery(new Term("data", "help"));
824 Highlighter hg = new Highlighter(new SimpleHTMLFormatter(), new QueryTermScorer(query));
825 hg.setTextFragmenter(new NullFragmenter());
828 match = hg.getBestFragment(analyzer, "data", "help me [54-65]");
829 assertEquals("<B>help</B> me [54-65]", match);
837 public void testGetBestFragmentsFilteredQuery() throws Exception {
838 TestHighlightRunner helper = new TestHighlightRunner() {
841 public void run() throws Exception {
843 TermRangeFilter rf = new TermRangeFilter("contents", "john", "john", true, true);
844 SpanQuery clauses[] = { new SpanTermQuery(new Term("contents", "john")),
845 new SpanTermQuery(new Term("contents", "kennedy")), };
846 SpanNearQuery snq = new SpanNearQuery(clauses, 1, true);
847 FilteredQuery fq = new FilteredQuery(snq, rf);
850 doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this);
851 // Currently highlights "John" and "Kennedy" separately
852 assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
860 public void testGetBestFragmentsFilteredPhraseQuery() throws Exception {
861 TestHighlightRunner helper = new TestHighlightRunner() {
864 public void run() throws Exception {
866 TermRangeFilter rf = new TermRangeFilter("contents", "john", "john", true, true);
867 PhraseQuery pq = new PhraseQuery();
868 pq.add(new Term("contents", "john"));
869 pq.add(new Term("contents", "kennedy"));
870 FilteredQuery fq = new FilteredQuery(pq, rf);
873 doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this);
874 // Currently highlights "John" and "Kennedy" separately
875 assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
883 public void testGetBestFragmentsMultiTerm() throws Exception {
884 TestHighlightRunner helper = new TestHighlightRunner() {
887 public void run() throws Exception {
889 doSearching("John Kenn*");
890 doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this);
891 assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
899 public void testGetBestFragmentsWithOr() throws Exception {
900 TestHighlightRunner helper = new TestHighlightRunner() {
903 public void run() throws Exception {
905 doSearching("JFK OR Kennedy");
906 doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this);
907 assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
914 public void testGetBestSingleFragment() throws Exception {
916 TestHighlightRunner helper = new TestHighlightRunner() {
919 public void run() throws Exception {
920 doSearching("Kennedy");
922 for (int i = 0; i < hits.totalHits; i++) {
923 String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
924 TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
926 Highlighter highlighter = getHighlighter(query, FIELD_NAME, tokenStream,
927 HighlighterTest.this);
928 highlighter.setTextFragmenter(new SimpleFragmenter(40));
929 String result = highlighter.getBestFragment(tokenStream, text);
930 if (VERBOSE) System.out.println("\t" + result);
932 assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
936 for (int i = 0; i < hits.totalHits; i++) {
937 String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
938 TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
939 Highlighter highlighter = getHighlighter(query, FIELD_NAME, tokenStream,
940 HighlighterTest.this);
941 highlighter.getBestFragment(analyzer, FIELD_NAME, text);
943 assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
947 for (int i = 0; i < hits.totalHits; i++) {
948 String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
950 TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
951 Highlighter highlighter = getHighlighter(query, FIELD_NAME, tokenStream,
952 HighlighterTest.this);
953 highlighter.getBestFragments(analyzer, FIELD_NAME, text, 10);
955 assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
966 public void testGetBestSingleFragmentWithWeights() throws Exception {
968 TestHighlightRunner helper = new TestHighlightRunner() {
971 public void run() throws Exception {
972 WeightedSpanTerm[] wTerms = new WeightedSpanTerm[2];
973 wTerms[0] = new WeightedSpanTerm(10f, "hello");
975 List<PositionSpan> positionSpans = new ArrayList<PositionSpan>();
976 positionSpans.add(new PositionSpan(0, 0));
977 wTerms[0].addPositionSpans(positionSpans);
979 wTerms[1] = new WeightedSpanTerm(1f, "kennedy");
980 positionSpans = new ArrayList<PositionSpan>();
981 positionSpans.add(new PositionSpan(14, 14));
982 wTerms[1].addPositionSpans(positionSpans);
984 Highlighter highlighter = getHighlighter(wTerms, HighlighterTest.this);// new
986 // QueryTermScorer(wTerms));
987 TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(texts[0]));
988 highlighter.setTextFragmenter(new SimpleFragmenter(2));
990 String result = highlighter.getBestFragment(tokenStream, texts[0]).trim();
991 assertTrue("Failed to find best section using weighted terms. Found: [" + result + "]",
992 "<B>Hello</B>".equals(result));
995 wTerms[1].setWeight(50f);
996 tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(texts[0]));
997 highlighter = getHighlighter(wTerms, HighlighterTest.this);
998 highlighter.setTextFragmenter(new SimpleFragmenter(2));
1000 result = highlighter.getBestFragment(tokenStream, texts[0]).trim();
1001 assertTrue("Failed to find best section using weighted terms. Found: " + result,
1002 "<B>kennedy</B>".equals(result));
1011 // tests a "complex" analyzer that produces multiple
1012 // overlapping tokens
1013 public void testOverlapAnalyzer() throws Exception {
1014 TestHighlightRunner helper = new TestHighlightRunner() {
1017 public void run() throws Exception {
1018 HashMap<String,String> synonyms = new HashMap<String,String>();
1019 synonyms.put("football", "soccer,footie");
1020 Analyzer analyzer = new SynonymAnalyzer(synonyms);
1021 String srchkey = "football";
1023 String s = "football-soccer in the euro 2004 footie competition";
1024 QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, "bookid", analyzer);
1025 Query query = parser.parse(srchkey);
1027 TokenStream tokenStream = analyzer.tokenStream(null, new StringReader(s));
1029 Highlighter highlighter = getHighlighter(query, null, tokenStream, HighlighterTest.this);
1031 // Get 3 best fragments and seperate with a "..."
1032 tokenStream = analyzer.tokenStream(null, new StringReader(s));
1034 String result = highlighter.getBestFragments(tokenStream, s, 3, "...");
1035 String expectedResult = "<B>football</B>-<B>soccer</B> in the euro 2004 <B>footie</B> competition";
1036 assertTrue("overlapping analyzer should handle highlights OK, expected:" + expectedResult
1037 + " actual:" + result, expectedResult.equals(result));
1046 public void testGetSimpleHighlight() throws Exception {
1047 TestHighlightRunner helper = new TestHighlightRunner() {
1050 public void run() throws Exception {
1052 doSearching("Kennedy");
1053 // new Highlighter(HighlighterTest.this, new QueryTermScorer(query));
1055 for (int i = 0; i < hits.totalHits; i++) {
1056 String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
1057 TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
1058 Highlighter highlighter = getHighlighter(query, FIELD_NAME, tokenStream,
1059 HighlighterTest.this);
1060 String result = highlighter.getBestFragment(tokenStream, text);
1061 if (VERBOSE) System.out.println("\t" + result);
1063 assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
1064 numHighlights == 4);
1070 public void testGetTextFragments() throws Exception {
1071 TestHighlightRunner helper = new TestHighlightRunner() {
1074 public void run() throws Exception {
1076 doSearching("Kennedy");
1078 for (int i = 0; i < hits.totalHits; i++) {
1079 String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
1080 TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
1082 Highlighter highlighter = getHighlighter(query, FIELD_NAME, tokenStream,
1083 HighlighterTest.this);// new Highlighter(this, new
1084 // QueryTermScorer(query));
1085 highlighter.setTextFragmenter(new SimpleFragmenter(20));
1086 String stringResults[] = highlighter.getBestFragments(tokenStream, text, 10);
1088 tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
1089 TextFragment fragmentResults[] = highlighter.getBestTextFragments(tokenStream, text,
1092 assertTrue("Failed to find correct number of text Fragments: " + fragmentResults.length
1093 + " vs " + stringResults.length, fragmentResults.length == stringResults.length);
1094 for (int j = 0; j < stringResults.length; j++) {
1095 if (VERBOSE) System.out.println(fragmentResults[j]);
1096 assertTrue("Failed to find same text Fragments: " + fragmentResults[j] + " found",
1097 fragmentResults[j].toString().equals(stringResults[j]));
1107 public void testMaxSizeHighlight() throws Exception {
1108 final MockAnalyzer analyzer = new MockAnalyzer(random, MockTokenizer.SIMPLE, true, (CharArraySet) StopAnalyzer.ENGLISH_STOP_WORDS_SET, true);
1109 // we disable MockTokenizer checks because we will forcefully limit the
1110 // tokenstream and call end() before incrementToken() returns false.
1111 analyzer.setEnableChecks(false);
1112 TestHighlightRunner helper = new TestHighlightRunner() {
1115 public void run() throws Exception {
1117 doSearching("meat");
1118 TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(texts[0]));
1119 Highlighter highlighter = getHighlighter(query, FIELD_NAME, tokenStream,
1120 HighlighterTest.this);// new Highlighter(this, new
1121 // QueryTermScorer(query));
1122 highlighter.setMaxDocCharsToAnalyze(30);
1124 highlighter.getBestFragment(tokenStream, texts[0]);
1125 assertTrue("Setting MaxDocBytesToAnalyze should have prevented "
1126 + "us from finding matches for this record: " + numHighlights + " found",
1127 numHighlights == 0);
1134 public void testMaxSizeHighlightTruncates() throws Exception {
1135 TestHighlightRunner helper = new TestHighlightRunner() {
1138 public void run() throws Exception {
1139 String goodWord = "goodtoken";
1140 Set<String> stopWords = new HashSet<String>(1);
1141 stopWords.add("stoppedtoken");
1143 TermQuery query = new TermQuery(new Term("data", goodWord));
1145 String match = null;
1146 StringBuilder sb = new StringBuilder();
1147 sb.append(goodWord);
1148 for (int i = 0; i < 10000; i++) {
1150 // only one stopword
1151 sb.append(stopWords.iterator().next());
1153 SimpleHTMLFormatter fm = new SimpleHTMLFormatter();
1154 Highlighter hg = getHighlighter(query, "data", new StandardAnalyzer(TEST_VERSION_CURRENT, stopWords).tokenStream(
1155 "data", new StringReader(sb.toString())), fm);// new Highlighter(fm,
1157 // QueryTermScorer(query));
1158 hg.setTextFragmenter(new NullFragmenter());
1159 hg.setMaxDocCharsToAnalyze(100);
1160 match = hg.getBestFragment(new StandardAnalyzer(TEST_VERSION_CURRENT, stopWords), "data", sb.toString());
1161 assertTrue("Matched text should be no more than 100 chars in length ", match.length() < hg
1162 .getMaxDocCharsToAnalyze());
1164 // add another tokenized word to the overrall length - but set way
1166 // the length of text under consideration (after a large slug of stop
1170 sb.append(goodWord);
1171 match = hg.getBestFragment(new StandardAnalyzer(TEST_VERSION_CURRENT, stopWords), "data", sb.toString());
1172 assertTrue("Matched text should be no more than 100 chars in length ", match.length() < hg
1173 .getMaxDocCharsToAnalyze());
1181 public void testMaxSizeEndHighlight() throws Exception {
1182 TestHighlightRunner helper = new TestHighlightRunner() {
1184 public void run() throws Exception {
1185 Set<String> stopWords = new HashSet<String>();
1186 stopWords.add("in");
1187 stopWords.add("it");
1188 TermQuery query = new TermQuery(new Term("text", "searchterm"));
1190 String text = "this is a text with searchterm in it";
1191 SimpleHTMLFormatter fm = new SimpleHTMLFormatter();
1192 Highlighter hg = getHighlighter(query, "text", new StandardAnalyzer(TEST_VERSION_CURRENT,
1193 stopWords).tokenStream("text", new StringReader(text)), fm);
1194 hg.setTextFragmenter(new NullFragmenter());
1195 hg.setMaxDocCharsToAnalyze(36);
1196 String match = hg.getBestFragment(new StandardAnalyzer(TEST_VERSION_CURRENT, stopWords), "text", text);
1198 "Matched text should contain remainder of text after highlighted query ",
1199 match.endsWith("in it"));
1205 public void testUnRewrittenQuery() throws Exception {
1206 final TestHighlightRunner helper = new TestHighlightRunner() {
1209 public void run() throws Exception {
1211 // test to show how rewritten query can still be used
1212 if (searcher != null) searcher.close();
1213 searcher = new IndexSearcher(ramDir, true);
1214 Analyzer analyzer = new StandardAnalyzer(TEST_VERSION_CURRENT);
1216 QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, FIELD_NAME, analyzer);
1217 Query query = parser.parse("JF? or Kenned*");
1218 if (VERBOSE) System.out.println("Searching with primitive query");
1219 // forget to set this and...
1220 // query=query.rewrite(reader);
1221 TopDocs hits = searcher.search(query, null, 1000);
1223 // create an instance of the highlighter with the tags used to surround
1225 // QueryHighlightExtractor highlighter = new
1226 // QueryHighlightExtractor(this,
1227 // query, new StandardAnalyzer(TEST_VERSION));
1229 int maxNumFragmentsRequired = 3;
1231 for (int i = 0; i < hits.totalHits; i++) {
1232 String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
1233 TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
1234 Highlighter highlighter = getHighlighter(query, FIELD_NAME, tokenStream, HighlighterTest.this, false);
1236 highlighter.setTextFragmenter(new SimpleFragmenter(40));
1238 String highlightedText = highlighter.getBestFragments(tokenStream, text,
1239 maxNumFragmentsRequired, "...");
1241 if (VERBOSE) System.out.println(highlightedText);
1243 // We expect to have zero highlights if the query is multi-terms and is
1246 assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
1247 numHighlights == 0);
1254 public void testNoFragments() throws Exception {
1255 TestHighlightRunner helper = new TestHighlightRunner() {
1258 public void run() throws Exception {
1259 doSearching("AnInvalidQueryWhichShouldYieldNoResults");
1261 for (int i = 0; i < texts.length; i++) {
1262 String text = texts[i];
1263 TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
1264 Highlighter highlighter = getHighlighter(query, FIELD_NAME, tokenStream,
1265 HighlighterTest.this);
1266 String result = highlighter.getBestFragment(tokenStream, text);
1267 assertNull("The highlight result should be null for text with no query terms", result);
1276 * Demonstrates creation of an XHTML compliant doc using new encoding facilities.
1280 public void testEncoding() throws Exception {
1282 String rawDocContent = "\"Smith & sons' prices < 3 and >4\" claims article";
1283 // run the highlighter on the raw content (scorer does not score any tokens
1285 // highlighting but scores a single fragment for selection
1286 Highlighter highlighter = new Highlighter(this, new SimpleHTMLEncoder(), new Scorer() {
1287 public void startFragment(TextFragment newFragment) {
1290 public float getTokenScore() {
1294 public float getFragmentScore() {
1298 public TokenStream init(TokenStream tokenStream) {
1302 highlighter.setTextFragmenter(new SimpleFragmenter(2000));
1303 TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(rawDocContent));
1305 String encodedSnippet = highlighter.getBestFragments(tokenStream, rawDocContent, 1, "");
1306 // An ugly bit of XML creation:
1307 String xhtml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
1308 + "<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\" lang=\"en\">\n"
1309 + "<head>\n" + "<title>My Test HTML Document</title>\n" + "</head>\n" + "<body>\n" + "<h2>"
1310 + encodedSnippet + "</h2>\n" + "</body>\n" + "</html>";
1311 // now an ugly built of XML parsing to test the snippet is encoded OK
1312 DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
1313 DocumentBuilder db = dbf.newDocumentBuilder();
1314 org.w3c.dom.Document doc = db.parse(new ByteArrayInputStream(xhtml.getBytes()));
1315 Element root = doc.getDocumentElement();
1316 NodeList nodes = root.getElementsByTagName("body");
1317 Element body = (Element) nodes.item(0);
1318 nodes = body.getElementsByTagName("h2");
1319 Element h2 = (Element) nodes.item(0);
1320 String decodedSnippet = h2.getFirstChild().getNodeValue();
1321 assertEquals("XHTML Encoding should have worked:", rawDocContent, decodedSnippet);
1324 public void testMultiSearcher() throws Exception {
1326 Directory ramDir1 = newDirectory();
1327 IndexWriter writer1 = new IndexWriter(ramDir1, newIndexWriterConfig(
1328 TEST_VERSION_CURRENT, new StandardAnalyzer(TEST_VERSION_CURRENT)));
1329 Document d = new Document();
1330 Field f = new Field(FIELD_NAME, "multiOne", Field.Store.YES, Field.Index.ANALYZED);
1332 writer1.addDocument(d);
1335 IndexReader reader1 = IndexReader.open(ramDir1, true);
1338 Directory ramDir2 = newDirectory();
1339 IndexWriter writer2 = new IndexWriter(ramDir2, newIndexWriterConfig(
1340 TEST_VERSION_CURRENT, new StandardAnalyzer(TEST_VERSION_CURRENT)));
1342 f = new Field(FIELD_NAME, "multiTwo", Field.Store.YES, Field.Index.ANALYZED);
1344 writer2.addDocument(d);
1347 IndexReader reader2 = IndexReader.open(ramDir2, true);
1349 IndexSearcher searchers[] = new IndexSearcher[2];
1350 searchers[0] = new IndexSearcher(ramDir1, true);
1351 searchers[1] = new IndexSearcher(ramDir2, true);
1352 MultiSearcher multiSearcher = new MultiSearcher(searchers);
1353 QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, FIELD_NAME, new StandardAnalyzer(TEST_VERSION_CURRENT));
1354 parser.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
1355 query = parser.parse("multi*");
1356 if (VERBOSE) System.out.println("Searching for: " + query.toString(FIELD_NAME));
1357 // at this point the multisearcher calls combine(query[])
1358 hits = multiSearcher.search(query, null, 1000);
1360 // query = QueryParser.parse("multi*", FIELD_NAME, new StandardAnalyzer(TEST_VERSION));
1361 Query expandedQueries[] = new Query[2];
1362 expandedQueries[0] = query.rewrite(reader1);
1363 expandedQueries[1] = query.rewrite(reader2);
1364 query = query.combine(expandedQueries);
1366 // create an instance of the highlighter with the tags used to surround
1368 Highlighter highlighter = new Highlighter(this, new QueryTermScorer(query));
1370 for (int i = 0; i < hits.totalHits; i++) {
1371 String text = multiSearcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
1372 TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
1373 String highlightedText = highlighter.getBestFragment(tokenStream, text);
1374 if (VERBOSE) System.out.println(highlightedText);
1376 assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
1377 numHighlights == 2);
1380 searchers[0].close();
1381 searchers[1].close();
1386 public void testFieldSpecificHighlighting() throws Exception {
1387 TestHighlightRunner helper = new TestHighlightRunner() {
1390 public void run() throws Exception {
1391 String docMainText = "fred is one of the people";
1392 QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, FIELD_NAME, analyzer);
1393 Query query = parser.parse("fred category:people");
1395 // highlighting respects fieldnames used in query
1397 Scorer fieldSpecificScorer = null;
1398 if (mode == TestHighlightRunner.QUERY) {
1399 fieldSpecificScorer = new QueryScorer(query, FIELD_NAME);
1400 } else if (mode == TestHighlightRunner.QUERY_TERM) {
1401 fieldSpecificScorer = new QueryTermScorer(query, "contents");
1403 Highlighter fieldSpecificHighlighter = new Highlighter(new SimpleHTMLFormatter(),
1404 fieldSpecificScorer);
1405 fieldSpecificHighlighter.setTextFragmenter(new NullFragmenter());
1406 String result = fieldSpecificHighlighter.getBestFragment(analyzer, FIELD_NAME, docMainText);
1407 assertEquals("Should match", result, "<B>fred</B> is one of the people");
1409 // highlighting does not respect fieldnames used in query
1410 Scorer fieldInSpecificScorer = null;
1411 if (mode == TestHighlightRunner.QUERY) {
1412 fieldInSpecificScorer = new QueryScorer(query, null);
1413 } else if (mode == TestHighlightRunner.QUERY_TERM) {
1414 fieldInSpecificScorer = new QueryTermScorer(query);
1417 Highlighter fieldInSpecificHighlighter = new Highlighter(new SimpleHTMLFormatter(),
1418 fieldInSpecificScorer);
1419 fieldInSpecificHighlighter.setTextFragmenter(new NullFragmenter());
1420 result = fieldInSpecificHighlighter.getBestFragment(analyzer, FIELD_NAME, docMainText);
1421 assertEquals("Should match", result, "<B>fred</B> is one of the <B>people</B>");
1431 protected TokenStream getTS2() {
1432 // String s = "Hi-Speed10 foo";
1433 return new TokenStream() {
1434 Iterator<Token> iter;
1436 private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
1437 private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
1438 private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
1440 lst = new ArrayList<Token>();
1442 t = createToken("hi", 0, 2);
1443 t.setPositionIncrement(1);
1445 t = createToken("hispeed", 0, 8);
1446 t.setPositionIncrement(1);
1448 t = createToken("speed", 3, 8);
1449 t.setPositionIncrement(0);
1451 t = createToken("10", 8, 10);
1452 t.setPositionIncrement(1);
1454 t = createToken("foo", 11, 14);
1455 t.setPositionIncrement(1);
1457 iter = lst.iterator();
1461 public boolean incrementToken() throws IOException {
1462 if(iter.hasNext()) {
1463 Token token = iter.next();
1465 termAtt.setEmpty().append(token);
1466 posIncrAtt.setPositionIncrement(token.getPositionIncrement());
1467 offsetAtt.setOffset(token.startOffset(), token.endOffset());
1476 // same token-stream as above, but the bigger token comes first this time
1477 protected TokenStream getTS2a() {
1478 // String s = "Hi-Speed10 foo";
1479 return new TokenStream() {
1480 Iterator<Token> iter;
1482 private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
1483 private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
1484 private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
1486 lst = new ArrayList<Token>();
1488 t = createToken("hispeed", 0, 8);
1489 t.setPositionIncrement(1);
1491 t = createToken("hi", 0, 2);
1492 t.setPositionIncrement(0);
1494 t = createToken("speed", 3, 8);
1495 t.setPositionIncrement(1);
1497 t = createToken("10", 8, 10);
1498 t.setPositionIncrement(1);
1500 t = createToken("foo", 11, 14);
1501 t.setPositionIncrement(1);
1503 iter = lst.iterator();
1507 public boolean incrementToken() throws IOException {
1508 if(iter.hasNext()) {
1509 Token token = iter.next();
1511 termAtt.setEmpty().append(token);
1512 posIncrAtt.setPositionIncrement(token.getPositionIncrement());
1513 offsetAtt.setOffset(token.startOffset(), token.endOffset());
1521 public void testOverlapAnalyzer2() throws Exception {
1522 TestHighlightRunner helper = new TestHighlightRunner() {
1525 public void run() throws Exception {
1526 String s = "Hi-Speed10 foo";
1529 Highlighter highlighter;
1532 query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("foo");
1533 highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this);
1534 result = highlighter.getBestFragments(getTS2(), s, 3, "...");
1535 assertEquals("Hi-Speed10 <B>foo</B>", result);
1537 query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("10");
1538 highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this);
1539 result = highlighter.getBestFragments(getTS2(), s, 3, "...");
1540 assertEquals("Hi-Speed<B>10</B> foo", result);
1542 query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("hi");
1543 highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this);
1544 result = highlighter.getBestFragments(getTS2(), s, 3, "...");
1545 assertEquals("<B>Hi</B>-Speed10 foo", result);
1547 query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("speed");
1548 highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this);
1549 result = highlighter.getBestFragments(getTS2(), s, 3, "...");
1550 assertEquals("Hi-<B>Speed</B>10 foo", result);
1552 query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("hispeed");
1553 highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this);
1554 result = highlighter.getBestFragments(getTS2(), s, 3, "...");
1555 assertEquals("<B>Hi-Speed</B>10 foo", result);
1557 query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("hi speed");
1558 highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this);
1559 result = highlighter.getBestFragments(getTS2(), s, 3, "...");
1560 assertEquals("<B>Hi-Speed</B>10 foo", result);
1562 // ///////////////// same tests, just put the bigger overlapping token
1564 query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("foo");
1565 highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this);
1566 result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
1567 assertEquals("Hi-Speed10 <B>foo</B>", result);
1569 query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("10");
1570 highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this);
1571 result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
1572 assertEquals("Hi-Speed<B>10</B> foo", result);
1574 query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("hi");
1575 highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this);
1576 result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
1577 assertEquals("<B>Hi</B>-Speed10 foo", result);
1579 query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("speed");
1580 highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this);
1581 result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
1582 assertEquals("Hi-<B>Speed</B>10 foo", result);
1584 query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("hispeed");
1585 highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this);
1586 result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
1587 assertEquals("<B>Hi-Speed</B>10 foo", result);
1589 query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("hi speed");
1590 highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this);
1591 result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
1592 assertEquals("<B>Hi-Speed</B>10 foo", result);
1599 private Directory dir;
1600 private Analyzer a = new MockAnalyzer(random, MockTokenizer.WHITESPACE, false);
1602 public void testWeightedTermsWithDeletes() throws IOException, ParseException, InvalidTokenOffsetsException {
1608 private Document doc( String f, String v ){
1609 Document doc = new Document();
1610 doc.add( new Field( f, v, Store.YES, Index.ANALYZED ) );
1614 private void makeIndex() throws IOException {
1615 IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)));
1616 writer.addDocument( doc( "t_text1", "random words for highlighting tests del" ) );
1617 writer.addDocument( doc( "t_text1", "more random words for second field del" ) );
1618 writer.addDocument( doc( "t_text1", "random words for highlighting tests del" ) );
1619 writer.addDocument( doc( "t_text1", "more random words for second field" ) );
1624 private void deleteDocument() throws IOException {
1625 IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).setOpenMode(OpenMode.APPEND));
1626 writer.deleteDocuments( new Term( "t_text1", "del" ) );
1627 // To see negative idf, keep comment the following line
1628 //writer.optimize();
1632 private void searchIndex() throws IOException, ParseException, InvalidTokenOffsetsException {
1633 String q = "t_text1:random";
1634 QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, "t_text1", a );
1635 Query query = parser.parse( q );
1636 IndexSearcher searcher = new IndexSearcher( dir, true );
1637 // This scorer can return negative idf -> null fragment
1638 Scorer scorer = new QueryTermScorer( query, searcher.getIndexReader(), "t_text1" );
1639 // This scorer doesn't use idf (patch version)
1640 //Scorer scorer = new QueryTermScorer( query, "t_text1" );
1641 Highlighter h = new Highlighter( scorer );
1643 TopDocs hits = searcher.search(query, null, 10);
1644 for( int i = 0; i < hits.totalHits; i++ ){
1645 Document doc = searcher.doc( hits.scoreDocs[i].doc );
1646 String result = h.getBestFragment( a, "t_text1", doc.get( "t_text1" ));
1647 if (VERBOSE) System.out.println("result:" + result);
1648 assertEquals("more <B>random</B> words for second field", result);
1655 * public void testBigramAnalyzer() throws IOException, ParseException {
1656 * //test to ensure analyzers with none-consecutive start/end offsets //dont
1657 * double-highlight text //setup index 1 RAMDirectory ramDir = new
1658 * RAMDirectory(); Analyzer bigramAnalyzer=new CJKAnalyzer(); IndexWriter
1659 * writer = new IndexWriter(ramDir,bigramAnalyzer , true); Document d = new
1660 * Document(); Field f = new Field(FIELD_NAME, "java abc def", true, true,
1661 * true); d.add(f); writer.addDocument(d); writer.close(); IndexReader reader =
1662 * IndexReader.open(ramDir, true);
1664 * IndexSearcher searcher=new IndexSearcher(reader); query =
1665 * QueryParser.parse("abc", FIELD_NAME, bigramAnalyzer);
1666 * System.out.println("Searching for: " + query.toString(FIELD_NAME)); hits =
1667 * searcher.search(query);
1669 * Highlighter highlighter = new Highlighter(this,new
1670 * QueryFragmentScorer(query));
1672 * for (int i = 0; i < hits.totalHits; i++) { String text =
1673 * searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME); TokenStream
1674 * tokenStream=bigramAnalyzer.tokenStream(FIELD_NAME,new StringReader(text));
1675 * String highlightedText = highlighter.getBestFragment(tokenStream,text);
1676 * System.out.println(highlightedText); } }
1679 public String highlightTerm(String originalText, TokenGroup group) {
1680 if (group.getTotalScore() <= 0) {
1681 return originalText;
1683 numHighlights++; // update stats used in assertions
1684 return "<B>" + originalText + "</B>";
1687 public void doSearching(String queryString) throws Exception {
1688 QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, FIELD_NAME, analyzer);
1689 parser.setEnablePositionIncrements(true);
1690 parser.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
1691 query = parser.parse(queryString);
1695 public void doSearching(Query unReWrittenQuery) throws Exception {
1696 if (searcher != null) searcher.close();
1697 searcher = new IndexSearcher(ramDir, true);
1698 // for any multi-term queries to work (prefix, wildcard, range,fuzzy etc)
1699 // you must use a rewritten query!
1700 query = unReWrittenQuery.rewrite(reader);
1701 if (VERBOSE) System.out.println("Searching for: " + query.toString(FIELD_NAME));
1702 hits = searcher.search(query, null, 1000);
1705 public void assertExpectedHighlightCount(final int maxNumFragmentsRequired,
1706 final int expectedHighlights) throws Exception {
1707 for (int i = 0; i < hits.totalHits; i++) {
1708 String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
1709 TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
1710 QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
1711 Highlighter highlighter = new Highlighter(this, scorer);
1713 highlighter.setTextFragmenter(new SimpleFragmenter(40));
1715 String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
1717 if (VERBOSE) System.out.println("\t" + result);
1719 assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
1720 numHighlights == expectedHighlights);
1725 public void setUp() throws Exception {
1727 dir = newDirectory();
1728 ramDir = newDirectory();
1729 IndexWriter writer = new IndexWriter(ramDir, newIndexWriterConfig(
1730 TEST_VERSION_CURRENT, new StandardAnalyzer(TEST_VERSION_CURRENT)));
1731 for (int i = 0; i < texts.length; i++) {
1732 addDoc(writer, texts[i]);
1734 Document doc = new Document();
1735 NumericField nfield = new NumericField(NUMERIC_FIELD_NAME, Store.YES, true);
1736 nfield.setIntValue(1);
1738 writer.addDocument(doc, analyzer);
1739 nfield = new NumericField(NUMERIC_FIELD_NAME, Store.YES, true);
1740 nfield.setIntValue(3);
1741 doc = new Document();
1743 writer.addDocument(doc, analyzer);
1744 nfield = new NumericField(NUMERIC_FIELD_NAME, Store.YES, true);
1745 nfield.setIntValue(5);
1746 doc = new Document();
1748 writer.addDocument(doc, analyzer);
1749 nfield = new NumericField(NUMERIC_FIELD_NAME, Store.YES, true);
1750 nfield.setIntValue(7);
1751 doc = new Document();
1753 writer.addDocument(doc, analyzer);
1756 reader = IndexReader.open(ramDir, true);
1761 public void tearDown() throws Exception {
1762 if (searcher != null) searcher.close();
1768 private void addDoc(IndexWriter writer, String text) throws IOException {
1769 Document d = new Document();
1770 Field f = new Field(FIELD_NAME, text, Field.Store.YES, Field.Index.ANALYZED);
1772 writer.addDocument(d);
1776 private static Token createToken(String term, int start, int offset)
1778 return new Token(term, start, offset);
1783 // ===================================================================
1784 // ========== BEGIN TEST SUPPORTING CLASSES
1785 // ========== THESE LOOK LIKE, WITH SOME MORE EFFORT THESE COULD BE
1786 // ========== MADE MORE GENERALLY USEFUL.
1787 // TODO - make synonyms all interchangeable with each other and produce
1788 // a version that does hyponyms - the "is a specialised type of ...."
1789 // so that car = audi, bmw and volkswagen but bmw != audi so different
1790 // behaviour to synonyms
1791 // ===================================================================
1793 final class SynonymAnalyzer extends Analyzer {
1794 private Map<String,String> synonyms;
1796 public SynonymAnalyzer(Map<String,String> synonyms) {
1797 this.synonyms = synonyms;
1803 * @see org.apache.lucene.analysis.Analyzer#tokenStream(java.lang.String,
1807 public TokenStream tokenStream(String arg0, Reader arg1) {
1808 LowerCaseTokenizer stream = new LowerCaseTokenizer(LuceneTestCase.TEST_VERSION_CURRENT, arg1);
1809 stream.addAttribute(CharTermAttribute.class);
1810 stream.addAttribute(PositionIncrementAttribute.class);
1811 stream.addAttribute(OffsetAttribute.class);
1814 } catch (IOException e) {
1815 throw new RuntimeException(e);
1817 return new SynonymTokenizer(stream, synonyms);
1822 * Expands a token stream with synonyms (TODO - make the synonyms analyzed by choice of analyzer)
1825 final class SynonymTokenizer extends TokenStream {
1826 private final TokenStream realStream;
1827 private Token currentRealToken = null;
1828 private final Map<String, String> synonyms;
1829 private StringTokenizer st = null;
1830 private final CharTermAttribute realTermAtt;
1831 private final PositionIncrementAttribute realPosIncrAtt;
1832 private final OffsetAttribute realOffsetAtt;
1833 private final CharTermAttribute termAtt;
1834 private final PositionIncrementAttribute posIncrAtt;
1835 private final OffsetAttribute offsetAtt;
1837 public SynonymTokenizer(TokenStream realStream, Map<String, String> synonyms) {
1838 this.realStream = realStream;
1839 this.synonyms = synonyms;
1840 realTermAtt = realStream.addAttribute(CharTermAttribute.class);
1841 realPosIncrAtt = realStream.addAttribute(PositionIncrementAttribute.class);
1842 realOffsetAtt = realStream.addAttribute(OffsetAttribute.class);
1844 termAtt = addAttribute(CharTermAttribute.class);
1845 posIncrAtt = addAttribute(PositionIncrementAttribute.class);
1846 offsetAtt = addAttribute(OffsetAttribute.class);
1850 public boolean incrementToken() throws IOException {
1852 if (currentRealToken == null) {
1853 boolean next = realStream.incrementToken();
1857 //Token nextRealToken = new Token(, offsetAtt.startOffset(), offsetAtt.endOffset());
1859 termAtt.copyBuffer(realTermAtt.buffer(), 0, realTermAtt.length());
1860 offsetAtt.setOffset(realOffsetAtt.startOffset(), realOffsetAtt.endOffset());
1861 posIncrAtt.setPositionIncrement(realPosIncrAtt.getPositionIncrement());
1863 String expansions = synonyms.get(realTermAtt.toString());
1864 if (expansions == null) {
1867 st = new StringTokenizer(expansions, ",");
1868 if (st.hasMoreTokens()) {
1869 currentRealToken = new Token(realOffsetAtt.startOffset(), realOffsetAtt.endOffset());
1870 currentRealToken.copyBuffer(realTermAtt.buffer(), 0, realTermAtt.length());
1875 String tok = st.nextToken();
1877 termAtt.setEmpty().append(tok);
1878 offsetAtt.setOffset(currentRealToken.startOffset(), currentRealToken.endOffset());
1879 posIncrAtt.setPositionIncrement(0);
1880 if (!st.hasMoreTokens()) {
1881 currentRealToken = null;
1890 public void reset() throws IOException {
1892 this.currentRealToken = null;
1896 static abstract class TestHighlightRunner {
1897 static final int QUERY = 0;
1898 static final int QUERY_TERM = 1;
1901 Fragmenter frag = new SimpleFragmenter(20);
1903 public Highlighter getHighlighter(Query query, String fieldName, TokenStream stream, Formatter formatter) {
1904 return getHighlighter(query, fieldName, stream, formatter, true);
1907 public Highlighter getHighlighter(Query query, String fieldName, TokenStream stream, Formatter formatter, boolean expanMultiTerm) {
1908 Scorer scorer = null;
1909 if (mode == QUERY) {
1910 scorer = new QueryScorer(query, fieldName);
1911 if(!expanMultiTerm) {
1912 ((QueryScorer)scorer).setExpandMultiTermQuery(false);
1914 } else if (mode == QUERY_TERM) {
1915 scorer = new QueryTermScorer(query);
1917 throw new RuntimeException("Unknown highlight mode");
1920 return new Highlighter(formatter, scorer);
1923 Highlighter getHighlighter(WeightedTerm[] weightedTerms, Formatter formatter) {
1924 if (mode == QUERY) {
1925 return new Highlighter(formatter, new QueryScorer((WeightedSpanTerm[]) weightedTerms));
1926 } else if (mode == QUERY_TERM) {
1927 return new Highlighter(formatter, new QueryTermScorer(weightedTerms));
1930 throw new RuntimeException("Unknown highlight mode");
1934 void doStandardHighlights(Analyzer analyzer, IndexSearcher searcher, TopDocs hits, Query query, Formatter formatter)
1936 doStandardHighlights(analyzer, searcher, hits, query, formatter, false);
1939 void doStandardHighlights(Analyzer analyzer, IndexSearcher searcher, TopDocs hits, Query query, Formatter formatter, boolean expandMT)
1942 for (int i = 0; i < hits.totalHits; i++) {
1943 String text = searcher.doc(hits.scoreDocs[i].doc).get(HighlighterTest.FIELD_NAME);
1944 int maxNumFragmentsRequired = 2;
1945 String fragmentSeparator = "...";
1946 Scorer scorer = null;
1947 TokenStream tokenStream = analyzer.tokenStream(HighlighterTest.FIELD_NAME, new StringReader(text));
1948 if (mode == QUERY) {
1949 scorer = new QueryScorer(query);
1950 } else if (mode == QUERY_TERM) {
1951 scorer = new QueryTermScorer(query);
1953 Highlighter highlighter = new Highlighter(formatter, scorer);
1954 highlighter.setTextFragmenter(frag);
1956 String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
1958 if (HighlighterTest.VERBOSE) System.out.println("\t" + result);
1962 abstract void run() throws Exception;
1964 void start() throws Exception {
1965 if (HighlighterTest.VERBOSE) System.out.println("Run QueryScorer");
1967 if (HighlighterTest.VERBOSE) System.out.println("Run QueryTermScorer");