1 package org.apache.lucene.search.vectorhighlight;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import java.text.BreakIterator;
21 import java.util.Locale;
23 import org.apache.lucene.util.LuceneTestCase;
25 public class BreakIteratorBoundaryScannerTest extends LuceneTestCase {
26 static final String TEXT =
27 "Apache Lucene(TM) is a high-performance, full-featured text search engine library written entirely in Java." +
28 "\nIt is a technology suitable for nearly any application that requires\n" +
29 "full-text search, especially cross-platform. \nApache Lucene is an open source project available for free download.";
31 public void testOutOfRange() throws Exception {
32 StringBuilder text = new StringBuilder(TEXT);
33 BreakIterator bi = BreakIterator.getWordInstance(Locale.ENGLISH);
34 BoundaryScanner scanner = new BreakIteratorBoundaryScanner(bi);
36 int start = TEXT.length() + 1;
37 assertEquals(start, scanner.findStartOffset(text, start));
38 assertEquals(start, scanner.findEndOffset(text, start));
40 assertEquals(start, scanner.findStartOffset(text, start));
42 assertEquals(start, scanner.findEndOffset(text, start));
45 public void testWordBoundary() throws Exception {
46 StringBuilder text = new StringBuilder(TEXT);
47 BreakIterator bi = BreakIterator.getWordInstance(Locale.ENGLISH);
48 BoundaryScanner scanner = new BreakIteratorBoundaryScanner(bi);
50 int start = TEXT.indexOf("formance");
51 int expected = TEXT.indexOf("high-performance");
52 testFindStartOffset(text, start, expected, scanner);
54 expected = TEXT.indexOf(", full");
55 testFindEndOffset(text, start, expected, scanner);
58 public void testSentenceBoundary() throws Exception {
59 StringBuilder text = new StringBuilder(TEXT);
60 BreakIterator bi = BreakIterator.getSentenceInstance();
61 BoundaryScanner scanner = new BreakIteratorBoundaryScanner(bi);
63 int start = TEXT.indexOf("any application");
64 int expected = TEXT.indexOf("It is a");
65 testFindStartOffset(text, start, expected, scanner);
67 expected = TEXT.indexOf("Apache Lucene is an open source");
68 testFindEndOffset(text, start, expected, scanner);
71 public void testLineBoundary() throws Exception {
72 StringBuilder text = new StringBuilder(TEXT);
73 BreakIterator bi = BreakIterator.getLineInstance();
74 BoundaryScanner scanner = new BreakIteratorBoundaryScanner(bi);
76 int start = TEXT.indexOf("any application");
77 int expected = TEXT.indexOf("nearly");
78 testFindStartOffset(text, start, expected, scanner);
80 expected = TEXT.indexOf("application that requires");
81 testFindEndOffset(text, start, expected, scanner);
84 private void testFindStartOffset(StringBuilder text, int start, int expected, BoundaryScanner scanner) throws Exception {
85 assertEquals(expected, scanner.findStartOffset(text, start));
88 private void testFindEndOffset(StringBuilder text, int start, int expected, BoundaryScanner scanner) throws Exception {
89 assertEquals(expected, scanner.findEndOffset(text, start));