X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.4.0/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java diff --git a/lucene-java-3.4.0/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java b/lucene-java-3.4.0/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java deleted file mode 100644 index 8de53a6..0000000 --- a/lucene-java-3.4.0/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java +++ /dev/null @@ -1,1132 +0,0 @@ -package org.apache.lucene.analysis.shingle; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; -import java.io.StringReader; - -import org.apache.lucene.analysis.BaseTokenStreamTestCase; -import org.apache.lucene.analysis.Token; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.Tokenizer; -import org.apache.lucene.analysis.WhitespaceTokenizer; -import org.apache.lucene.analysis.tokenattributes.*; - -public class ShingleFilterTest extends BaseTokenStreamTestCase { - - public class TestTokenStream extends TokenStream { - - protected int index = 0; - protected Token[] testToken; - - private CharTermAttribute termAtt; - private OffsetAttribute offsetAtt; - private PositionIncrementAttribute posIncrAtt; - private TypeAttribute typeAtt; - - public TestTokenStream(Token[] testToken) { - super(); - this.testToken = testToken; - this.termAtt = addAttribute(CharTermAttribute.class); - this.offsetAtt = addAttribute(OffsetAttribute.class); - this.posIncrAtt = addAttribute(PositionIncrementAttribute.class); - this.typeAtt = addAttribute(TypeAttribute.class); - } - - @Override - public final boolean incrementToken() throws IOException { - clearAttributes(); - if (index < testToken.length) { - Token t = testToken[index++]; - termAtt.copyBuffer(t.buffer(), 0, t.length()); - offsetAtt.setOffset(t.startOffset(), t.endOffset()); - posIncrAtt.setPositionIncrement(t.getPositionIncrement()); - typeAtt.setType(TypeAttributeImpl.DEFAULT_TYPE); - return true; - } else { - return false; - } - } - } - - public static final Token[] TEST_TOKEN = new Token[] { - createToken("please", 0, 6), - createToken("divide", 7, 13), - createToken("this", 14, 18), - createToken("sentence", 19, 27), - createToken("into", 28, 32), - createToken("shingles", 33, 39), - }; - - public static final int[] UNIGRAM_ONLY_POSITION_INCREMENTS = new int[] { - 1, 1, 1, 1, 1, 1 - }; - - public static final String[] UNIGRAM_ONLY_TYPES = new String[] { - "word", "word", "word", "word", "word", "word" - }; - - public static Token[] testTokenWithHoles; - - public static final Token[] BI_GRAM_TOKENS = new Token[] { - createToken("please", 0, 6), - createToken("please divide", 0, 13), - createToken("divide", 7, 13), - createToken("divide this", 7, 18), - createToken("this", 14, 18), - createToken("this sentence", 14, 27), - createToken("sentence", 19, 27), - createToken("sentence into", 19, 32), - createToken("into", 28, 32), - createToken("into shingles", 28, 39), - createToken("shingles", 33, 39), - }; - - public static final int[] BI_GRAM_POSITION_INCREMENTS = new int[] { - 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 - }; - - public static final String[] BI_GRAM_TYPES = new String[] { - "word", "shingle", "word", "shingle", "word", "shingle", "word", - "shingle", "word", "shingle", "word" - }; - - public static final Token[] BI_GRAM_TOKENS_WITH_HOLES = new Token[] { - createToken("please", 0, 6), - createToken("please divide", 0, 13), - createToken("divide", 7, 13), - createToken("divide _", 7, 19), - createToken("_ sentence", 19, 27), - createToken("sentence", 19, 27), - createToken("sentence _", 19, 33), - createToken("_ shingles", 33, 39), - createToken("shingles", 33, 39), - }; - - public static final int[] BI_GRAM_POSITION_INCREMENTS_WITH_HOLES = new int[] { - 1, 0, 1, 0, 1, 1, 0, 1, 1 - }; - - private static final String[] BI_GRAM_TYPES_WITH_HOLES = { - "word", "shingle", - "word", "shingle", "shingle", "word", "shingle", "shingle", "word" - }; - - public static final Token[] BI_GRAM_TOKENS_WITHOUT_UNIGRAMS = new Token[] { - createToken("please divide", 0, 13), - createToken("divide this", 7, 18), - createToken("this sentence", 14, 27), - createToken("sentence into", 19, 32), - createToken("into shingles", 28, 39), - }; - - public static final int[] BI_GRAM_POSITION_INCREMENTS_WITHOUT_UNIGRAMS = new int[] { - 1, 1, 1, 1, 1 - }; - - public static final String[] BI_GRAM_TYPES_WITHOUT_UNIGRAMS = new String[] { - "shingle", "shingle", "shingle", "shingle", "shingle" - }; - - public static final Token[] BI_GRAM_TOKENS_WITH_HOLES_WITHOUT_UNIGRAMS = new Token[] { - createToken("please divide", 0, 13), - createToken("divide _", 7, 19), - createToken("_ sentence", 19, 27), - createToken("sentence _", 19, 33), - createToken("_ shingles", 33, 39), - }; - - public static final int[] BI_GRAM_POSITION_INCREMENTS_WITH_HOLES_WITHOUT_UNIGRAMS = new int[] { - 1, 1, 1, 1, 1, 1 - }; - - - public static final Token[] TEST_SINGLE_TOKEN = new Token[] { - createToken("please", 0, 6) - }; - - public static final Token[] SINGLE_TOKEN = new Token[] { - createToken("please", 0, 6) - }; - - public static final int[] SINGLE_TOKEN_INCREMENTS = new int[] { - 1 - }; - - public static final String[] SINGLE_TOKEN_TYPES = new String[] { - "word" - }; - - public static final Token[] EMPTY_TOKEN_ARRAY = new Token[] { - }; - - public static final int[] EMPTY_TOKEN_INCREMENTS_ARRAY = new int[] { - }; - - public static final String[] EMPTY_TOKEN_TYPES_ARRAY = new String[] { - }; - - public static final Token[] TRI_GRAM_TOKENS = new Token[] { - createToken("please", 0, 6), - createToken("please divide", 0, 13), - createToken("please divide this", 0, 18), - createToken("divide", 7, 13), - createToken("divide this", 7, 18), - createToken("divide this sentence", 7, 27), - createToken("this", 14, 18), - createToken("this sentence", 14, 27), - createToken("this sentence into", 14, 32), - createToken("sentence", 19, 27), - createToken("sentence into", 19, 32), - createToken("sentence into shingles", 19, 39), - createToken("into", 28, 32), - createToken("into shingles", 28, 39), - createToken("shingles", 33, 39) - }; - - public static final int[] TRI_GRAM_POSITION_INCREMENTS = new int[] { - 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1 - }; - - public static final String[] TRI_GRAM_TYPES = new String[] { - "word", "shingle", "shingle", - "word", "shingle", "shingle", - "word", "shingle", "shingle", - "word", "shingle", "shingle", - "word", "shingle", - "word" - }; - - public static final Token[] TRI_GRAM_TOKENS_WITHOUT_UNIGRAMS = new Token[] { - createToken("please divide", 0, 13), - createToken("please divide this", 0, 18), - createToken("divide this", 7, 18), - createToken("divide this sentence", 7, 27), - createToken("this sentence", 14, 27), - createToken("this sentence into", 14, 32), - createToken("sentence into", 19, 32), - createToken("sentence into shingles", 19, 39), - createToken("into shingles", 28, 39), - }; - - public static final int[] TRI_GRAM_POSITION_INCREMENTS_WITHOUT_UNIGRAMS = new int[] { - 1, 0, 1, 0, 1, 0, 1, 0, 1 - }; - - public static final String[] TRI_GRAM_TYPES_WITHOUT_UNIGRAMS = new String[] { - "shingle", "shingle", - "shingle", "shingle", - "shingle", "shingle", - "shingle", "shingle", - "shingle", - }; - - public static final Token[] FOUR_GRAM_TOKENS = new Token[] { - createToken("please", 0, 6), - createToken("please divide", 0, 13), - createToken("please divide this", 0, 18), - createToken("please divide this sentence", 0, 27), - createToken("divide", 7, 13), - createToken("divide this", 7, 18), - createToken("divide this sentence", 7, 27), - createToken("divide this sentence into", 7, 32), - createToken("this", 14, 18), - createToken("this sentence", 14, 27), - createToken("this sentence into", 14, 32), - createToken("this sentence into shingles", 14, 39), - createToken("sentence", 19, 27), - createToken("sentence into", 19, 32), - createToken("sentence into shingles", 19, 39), - createToken("into", 28, 32), - createToken("into shingles", 28, 39), - createToken("shingles", 33, 39) - }; - - public static final int[] FOUR_GRAM_POSITION_INCREMENTS = new int[] { - 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1 - }; - - public static final String[] FOUR_GRAM_TYPES = new String[] { - "word", "shingle", "shingle", "shingle", - "word", "shingle", "shingle", "shingle", - "word", "shingle", "shingle", "shingle", - "word", "shingle", "shingle", - "word", "shingle", - "word" - }; - - public static final Token[] FOUR_GRAM_TOKENS_WITHOUT_UNIGRAMS = new Token[] { - createToken("please divide", 0, 13), - createToken("please divide this", 0, 18), - createToken("please divide this sentence", 0, 27), - createToken("divide this", 7, 18), - createToken("divide this sentence", 7, 27), - createToken("divide this sentence into", 7, 32), - createToken("this sentence", 14, 27), - createToken("this sentence into", 14, 32), - createToken("this sentence into shingles", 14, 39), - createToken("sentence into", 19, 32), - createToken("sentence into shingles", 19, 39), - createToken("into shingles", 28, 39), - }; - - public static final int[] FOUR_GRAM_POSITION_INCREMENTS_WITHOUT_UNIGRAMS = new int[] { - 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1 - }; - - public static final String[] FOUR_GRAM_TYPES_WITHOUT_UNIGRAMS = new String[] { - "shingle", "shingle", - "shingle", "shingle", - "shingle", "shingle", - "shingle", "shingle", - "shingle", "shingle", - "shingle", "shingle", - - }; - - public static final Token[] TRI_GRAM_TOKENS_MIN_TRI_GRAM = new Token[] { - createToken("please", 0, 6), - createToken("please divide this", 0, 18), - createToken("divide", 7, 13), - createToken("divide this sentence", 7, 27), - createToken("this", 14, 18), - createToken("this sentence into", 14, 32), - createToken("sentence", 19, 27), - createToken("sentence into shingles", 19, 39), - createToken("into", 28, 32), - createToken("shingles", 33, 39) - }; - - public static final int[] TRI_GRAM_POSITION_INCREMENTS_MIN_TRI_GRAM = new int[] { - 1, 0, 1, 0, 1, 0, 1, 0, 1, 1 - }; - - public static final String[] TRI_GRAM_TYPES_MIN_TRI_GRAM = new String[] { - "word", "shingle", - "word", "shingle", - "word", "shingle", - "word", "shingle", - "word", - "word" - }; - - public static final Token[] TRI_GRAM_TOKENS_WITHOUT_UNIGRAMS_MIN_TRI_GRAM = new Token[] { - createToken("please divide this", 0, 18), - createToken("divide this sentence", 7, 27), - createToken("this sentence into", 14, 32), - createToken("sentence into shingles", 19, 39) - }; - - public static final int[] TRI_GRAM_POSITION_INCREMENTS_WITHOUT_UNIGRAMS_MIN_TRI_GRAM = new int[] { - 1, 1, 1, 1 - }; - - public static final String[] TRI_GRAM_TYPES_WITHOUT_UNIGRAMS_MIN_TRI_GRAM = new String[] { - "shingle", - "shingle", - "shingle", - "shingle" - }; - - public static final Token[] FOUR_GRAM_TOKENS_MIN_TRI_GRAM = new Token[] { - createToken("please", 0, 6), - createToken("please divide this", 0, 18), - createToken("please divide this sentence", 0, 27), - createToken("divide", 7, 13), - createToken("divide this sentence", 7, 27), - createToken("divide this sentence into", 7, 32), - createToken("this", 14, 18), - createToken("this sentence into", 14, 32), - createToken("this sentence into shingles", 14, 39), - createToken("sentence", 19, 27), - createToken("sentence into shingles", 19, 39), - createToken("into", 28, 32), - createToken("shingles", 33, 39) - }; - - public static final int[] FOUR_GRAM_POSITION_INCREMENTS_MIN_TRI_GRAM = new int[] { - 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1 - }; - - public static final String[] FOUR_GRAM_TYPES_MIN_TRI_GRAM = new String[] { - "word", "shingle", "shingle", - "word", "shingle", "shingle", - "word", "shingle", "shingle", - "word", "shingle", - "word", - "word" - }; - - public static final Token[] FOUR_GRAM_TOKENS_WITHOUT_UNIGRAMS_MIN_TRI_GRAM = new Token[] { - createToken("please divide this", 0, 18), - createToken("please divide this sentence", 0, 27), - createToken("divide this sentence", 7, 27), - createToken("divide this sentence into", 7, 32), - createToken("this sentence into", 14, 32), - createToken("this sentence into shingles", 14, 39), - createToken("sentence into shingles", 19, 39), - }; - - public static final int[] FOUR_GRAM_POSITION_INCREMENTS_WITHOUT_UNIGRAMS_MIN_TRI_GRAM = new int[] { - 1, 0, 1, 0, 1, 0, 1 - }; - - public static final String[] FOUR_GRAM_TYPES_WITHOUT_UNIGRAMS_MIN_TRI_GRAM = new String[] { - "shingle", "shingle", - "shingle", "shingle", - "shingle", "shingle", - "shingle" - }; - - public static final Token[] FOUR_GRAM_TOKENS_MIN_FOUR_GRAM = new Token[] { - createToken("please", 0, 6), - createToken("please divide this sentence", 0, 27), - createToken("divide", 7, 13), - createToken("divide this sentence into", 7, 32), - createToken("this", 14, 18), - createToken("this sentence into shingles", 14, 39), - createToken("sentence", 19, 27), - createToken("into", 28, 32), - createToken("shingles", 33, 39) - }; - - public static final int[] FOUR_GRAM_POSITION_INCREMENTS_MIN_FOUR_GRAM = new int[] { - 1, 0, 1, 0, 1, 0, 1, 1, 1 - }; - - public static final String[] FOUR_GRAM_TYPES_MIN_FOUR_GRAM = new String[] { - "word", "shingle", - "word", "shingle", - "word", "shingle", - "word", - "word", - "word" - }; - - public static final Token[] FOUR_GRAM_TOKENS_WITHOUT_UNIGRAMS_MIN_FOUR_GRAM = new Token[] { - createToken("please divide this sentence", 0, 27), - createToken("divide this sentence into", 7, 32), - createToken("this sentence into shingles", 14, 39), - }; - - public static final int[] FOUR_GRAM_POSITION_INCREMENTS_WITHOUT_UNIGRAMS_MIN_FOUR_GRAM = new int[] { - 1, 1, 1 - }; - - public static final String[] FOUR_GRAM_TYPES_WITHOUT_UNIGRAMS_MIN_FOUR_GRAM = new String[] { - "shingle", - "shingle", - "shingle" - }; - - public static final Token[] BI_GRAM_TOKENS_NO_SEPARATOR = new Token[] { - createToken("please", 0, 6), - createToken("pleasedivide", 0, 13), - createToken("divide", 7, 13), - createToken("dividethis", 7, 18), - createToken("this", 14, 18), - createToken("thissentence", 14, 27), - createToken("sentence", 19, 27), - createToken("sentenceinto", 19, 32), - createToken("into", 28, 32), - createToken("intoshingles", 28, 39), - createToken("shingles", 33, 39), - }; - - public static final int[] BI_GRAM_POSITION_INCREMENTS_NO_SEPARATOR = new int[] { - 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 - }; - - public static final String[] BI_GRAM_TYPES_NO_SEPARATOR = new String[] { - "word", "shingle", "word", "shingle", "word", "shingle", "word", - "shingle", "word", "shingle", "word" - }; - - public static final Token[] BI_GRAM_TOKENS_WITHOUT_UNIGRAMS_NO_SEPARATOR = new Token[] { - createToken("pleasedivide", 0, 13), - createToken("dividethis", 7, 18), - createToken("thissentence", 14, 27), - createToken("sentenceinto", 19, 32), - createToken("intoshingles", 28, 39), - }; - - public static final int[] BI_GRAM_POSITION_INCREMENTS_WITHOUT_UNIGRAMS_NO_SEPARATOR = new int[] { - 1, 1, 1, 1, 1 - }; - - public static final String[] BI_GRAM_TYPES_WITHOUT_UNIGRAMS_NO_SEPARATOR = new String[] { - "shingle", "shingle", "shingle", "shingle", "shingle" - }; - - public static final Token[] TRI_GRAM_TOKENS_NO_SEPARATOR = new Token[] { - createToken("please", 0, 6), - createToken("pleasedivide", 0, 13), - createToken("pleasedividethis", 0, 18), - createToken("divide", 7, 13), - createToken("dividethis", 7, 18), - createToken("dividethissentence", 7, 27), - createToken("this", 14, 18), - createToken("thissentence", 14, 27), - createToken("thissentenceinto", 14, 32), - createToken("sentence", 19, 27), - createToken("sentenceinto", 19, 32), - createToken("sentenceintoshingles", 19, 39), - createToken("into", 28, 32), - createToken("intoshingles", 28, 39), - createToken("shingles", 33, 39) - }; - - public static final int[] TRI_GRAM_POSITION_INCREMENTS_NO_SEPARATOR = new int[] { - 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1 - }; - - public static final String[] TRI_GRAM_TYPES_NO_SEPARATOR = new String[] { - "word", "shingle", "shingle", - "word", "shingle", "shingle", - "word", "shingle", "shingle", - "word", "shingle", "shingle", - "word", "shingle", - "word" - }; - - public static final Token[] TRI_GRAM_TOKENS_WITHOUT_UNIGRAMS_NO_SEPARATOR = new Token[] { - createToken("pleasedivide", 0, 13), - createToken("pleasedividethis", 0, 18), - createToken("dividethis", 7, 18), - createToken("dividethissentence", 7, 27), - createToken("thissentence", 14, 27), - createToken("thissentenceinto", 14, 32), - createToken("sentenceinto", 19, 32), - createToken("sentenceintoshingles", 19, 39), - createToken("intoshingles", 28, 39), - }; - - public static final int[] TRI_GRAM_POSITION_INCREMENTS_WITHOUT_UNIGRAMS_NO_SEPARATOR = new int[] { - 1, 0, 1, 0, 1, 0, 1, 0, 1 - }; - - public static final String[] TRI_GRAM_TYPES_WITHOUT_UNIGRAMS_NO_SEPARATOR = new String[] { - "shingle", "shingle", - "shingle", "shingle", - "shingle", "shingle", - "shingle", "shingle", - "shingle", - }; - - public static final Token[] BI_GRAM_TOKENS_ALT_SEPARATOR = new Token[] { - createToken("please", 0, 6), - createToken("pleasedivide", 0, 13), - createToken("divide", 7, 13), - createToken("dividethis", 7, 18), - createToken("this", 14, 18), - createToken("thissentence", 14, 27), - createToken("sentence", 19, 27), - createToken("sentenceinto", 19, 32), - createToken("into", 28, 32), - createToken("intoshingles", 28, 39), - createToken("shingles", 33, 39), - }; - - public static final int[] BI_GRAM_POSITION_INCREMENTS_ALT_SEPARATOR = new int[] { - 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 - }; - - public static final String[] BI_GRAM_TYPES_ALT_SEPARATOR = new String[] { - "word", "shingle", "word", "shingle", "word", "shingle", "word", - "shingle", "word", "shingle", "word" - }; - - public static final Token[] BI_GRAM_TOKENS_WITHOUT_UNIGRAMS_ALT_SEPARATOR = new Token[] { - createToken("pleasedivide", 0, 13), - createToken("dividethis", 7, 18), - createToken("thissentence", 14, 27), - createToken("sentenceinto", 19, 32), - createToken("intoshingles", 28, 39), - }; - - public static final int[] BI_GRAM_POSITION_INCREMENTS_WITHOUT_UNIGRAMS_ALT_SEPARATOR = new int[] { - 1, 1, 1, 1, 1 - }; - - public static final String[] BI_GRAM_TYPES_WITHOUT_UNIGRAMS_ALT_SEPARATOR = new String[] { - "shingle", "shingle", "shingle", "shingle", "shingle" - }; - - public static final Token[] TRI_GRAM_TOKENS_ALT_SEPARATOR = new Token[] { - createToken("please", 0, 6), - createToken("pleasedivide", 0, 13), - createToken("pleasedividethis", 0, 18), - createToken("divide", 7, 13), - createToken("dividethis", 7, 18), - createToken("dividethissentence", 7, 27), - createToken("this", 14, 18), - createToken("thissentence", 14, 27), - createToken("thissentenceinto", 14, 32), - createToken("sentence", 19, 27), - createToken("sentenceinto", 19, 32), - createToken("sentenceintoshingles", 19, 39), - createToken("into", 28, 32), - createToken("intoshingles", 28, 39), - createToken("shingles", 33, 39) - }; - - public static final int[] TRI_GRAM_POSITION_INCREMENTS_ALT_SEPARATOR = new int[] { - 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1 - }; - - public static final String[] TRI_GRAM_TYPES_ALT_SEPARATOR = new String[] { - "word", "shingle", "shingle", - "word", "shingle", "shingle", - "word", "shingle", "shingle", - "word", "shingle", "shingle", - "word", "shingle", - "word" - }; - - public static final Token[] TRI_GRAM_TOKENS_WITHOUT_UNIGRAMS_ALT_SEPARATOR = new Token[] { - createToken("pleasedivide", 0, 13), - createToken("pleasedividethis", 0, 18), - createToken("dividethis", 7, 18), - createToken("dividethissentence", 7, 27), - createToken("thissentence", 14, 27), - createToken("thissentenceinto", 14, 32), - createToken("sentenceinto", 19, 32), - createToken("sentenceintoshingles", 19, 39), - createToken("intoshingles", 28, 39), - }; - - public static final int[] TRI_GRAM_POSITION_INCREMENTS_WITHOUT_UNIGRAMS_ALT_SEPARATOR = new int[] { - 1, 0, 1, 0, 1, 0, 1, 0, 1 - }; - - public static final String[] TRI_GRAM_TYPES_WITHOUT_UNIGRAMS_ALT_SEPARATOR = new String[] { - "shingle", "shingle", - "shingle", "shingle", - "shingle", "shingle", - "shingle", "shingle", - "shingle", - }; - - public static final Token[] TRI_GRAM_TOKENS_NULL_SEPARATOR = new Token[] { - createToken("please", 0, 6), - createToken("pleasedivide", 0, 13), - createToken("pleasedividethis", 0, 18), - createToken("divide", 7, 13), - createToken("dividethis", 7, 18), - createToken("dividethissentence", 7, 27), - createToken("this", 14, 18), - createToken("thissentence", 14, 27), - createToken("thissentenceinto", 14, 32), - createToken("sentence", 19, 27), - createToken("sentenceinto", 19, 32), - createToken("sentenceintoshingles", 19, 39), - createToken("into", 28, 32), - createToken("intoshingles", 28, 39), - createToken("shingles", 33, 39) - }; - - public static final int[] TRI_GRAM_POSITION_INCREMENTS_NULL_SEPARATOR = new int[] { - 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1 - }; - - public static final String[] TRI_GRAM_TYPES_NULL_SEPARATOR = new String[] { - "word", "shingle", "shingle", - "word", "shingle", "shingle", - "word", "shingle", "shingle", - "word", "shingle", "shingle", - "word", "shingle", - "word" - }; - - public static final Token[] TEST_TOKEN_POS_INCR_EQUAL_TO_N = new Token[] { - createToken("please", 0, 6), - createToken("divide", 7, 13), - createToken("this", 14, 18), - createToken("sentence", 29, 37, 3), - createToken("into", 38, 42), - createToken("shingles", 43, 49), - }; - - public static final Token[] TRI_GRAM_TOKENS_POS_INCR_EQUAL_TO_N = new Token[] { - createToken("please", 0, 6), - createToken("please divide", 0, 13), - createToken("please divide this", 0, 18), - createToken("divide", 7, 13), - createToken("divide this", 7, 18), - createToken("divide this _", 7, 29), - createToken("this", 14, 18), - createToken("this _", 14, 29), - createToken("this _ _", 14, 29), - createToken("_ _ sentence", 29, 37), - createToken("_ sentence", 29, 37), - createToken("_ sentence into", 29, 42), - createToken("sentence", 29, 37), - createToken("sentence into", 29, 42), - createToken("sentence into shingles", 29, 49), - createToken("into", 38, 42), - createToken("into shingles", 38, 49), - createToken("shingles", 43, 49) - }; - - public static final int[] TRI_GRAM_POSITION_INCREMENTS_POS_INCR_EQUAL_TO_N = new int[] { - 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1 - }; - - public static final String[] TRI_GRAM_TYPES_POS_INCR_EQUAL_TO_N = new String[] { - "word", "shingle", "shingle", - "word", "shingle", "shingle", - "word", "shingle", "shingle", - "shingle", "shingle", "shingle", "word", "shingle", "shingle", - "word", "shingle", - "word" - }; - - public static final Token[] TRI_GRAM_TOKENS_POS_INCR_EQUAL_TO_N_WITHOUT_UNIGRAMS = new Token[] { - createToken("please divide", 0, 13), - createToken("please divide this", 0, 18), - createToken("divide this", 7, 18), - createToken("divide this _", 7, 29), - createToken("this _", 14, 29), - createToken("this _ _", 14, 29), - createToken("_ _ sentence", 29, 37), - createToken("_ sentence", 29, 37), - createToken("_ sentence into", 29, 42), - createToken("sentence into", 29, 42), - createToken("sentence into shingles", 29, 49), - createToken("into shingles", 38, 49), - }; - - public static final int[] TRI_GRAM_POSITION_INCREMENTS_POS_INCR_EQUAL_TO_N_WITHOUT_UNIGRAMS = new int[] { - 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1 - }; - - public static final String[] TRI_GRAM_TYPES_POS_INCR_EQUAL_TO_N_WITHOUT_UNIGRAMS = new String[] { - "shingle", "shingle", - "shingle", "shingle", - "shingle", "shingle", - "shingle", "shingle", "shingle", - "shingle", "shingle", - "shingle", - }; - - public static final Token[] TEST_TOKEN_POS_INCR_GREATER_THAN_N = new Token[] { - createToken("please", 0, 6), - createToken("divide", 57, 63, 8), - createToken("this", 64, 68), - createToken("sentence", 69, 77), - createToken("into", 78, 82), - createToken("shingles", 83, 89), - }; - - public static final Token[] TRI_GRAM_TOKENS_POS_INCR_GREATER_THAN_N = new Token[] { - createToken("please", 0, 6), - createToken("please _", 0, 57), - createToken("please _ _", 0, 57), - createToken("_ _ divide", 57, 63), - createToken("_ divide", 57, 63), - createToken("_ divide this", 57, 68), - createToken("divide", 57, 63), - createToken("divide this", 57, 68), - createToken("divide this sentence", 57, 77), - createToken("this", 64, 68), - createToken("this sentence", 64, 77), - createToken("this sentence into", 64, 82), - createToken("sentence", 69, 77), - createToken("sentence into", 69, 82), - createToken("sentence into shingles", 69, 89), - createToken("into", 78, 82), - createToken("into shingles", 78, 89), - createToken("shingles", 83, 89) - }; - - public static final int[] TRI_GRAM_POSITION_INCREMENTS_POS_INCR_GREATER_THAN_N = new int[] { - 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1 - }; - public static final String[] TRI_GRAM_TYPES_POS_INCR_GREATER_THAN_N = new String[] { - "word", "shingle", "shingle", - "shingle", - "shingle", "shingle", - "word", "shingle", "shingle", - "word", "shingle", "shingle", - "word", "shingle", "shingle", - "word", "shingle", - "word" - }; - - public static final Token[] TRI_GRAM_TOKENS_POS_INCR_GREATER_THAN_N_WITHOUT_UNIGRAMS = new Token[] { - createToken("please _", 0, 57), - createToken("please _ _", 0, 57), - createToken("_ _ divide", 57, 63), - createToken("_ divide", 57, 63), - createToken("_ divide this", 57, 68), - createToken("divide this", 57, 68), - createToken("divide this sentence", 57, 77), - createToken("this sentence", 64, 77), - createToken("this sentence into", 64, 82), - createToken("sentence into", 69, 82), - createToken("sentence into shingles", 69, 89), - createToken("into shingles", 78, 89), - }; - - public static final int[] TRI_GRAM_POSITION_INCREMENTS_POS_INCR_GREATER_THAN_N_WITHOUT_UNIGRAMS = new int[] { - 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1 - }; - - public static final String[] TRI_GRAM_TYPES_POS_INCR_GREATER_THAN_N_WITHOUT_UNIGRAMS = new String[] { - "shingle", "shingle", - "shingle", "shingle", - "shingle", "shingle", - "shingle", "shingle", "shingle", "shingle", "shingle", - "shingle", - }; - - @Override - public void setUp() throws Exception { - super.setUp(); - testTokenWithHoles = new Token[] { - createToken("please", 0, 6), - createToken("divide", 7, 13), - createToken("sentence", 19, 27, 2), - createToken("shingles", 33, 39, 2), - }; - } - - /* - * Class under test for void ShingleFilter(TokenStream, int) - */ - public void testBiGramFilter() throws IOException { - this.shingleFilterTest(2, TEST_TOKEN, BI_GRAM_TOKENS, - BI_GRAM_POSITION_INCREMENTS, BI_GRAM_TYPES, - true); - } - - public void testBiGramFilterWithHoles() throws IOException { - this.shingleFilterTest(2, testTokenWithHoles, BI_GRAM_TOKENS_WITH_HOLES, - BI_GRAM_POSITION_INCREMENTS_WITH_HOLES, - BI_GRAM_TYPES_WITH_HOLES, - true); - } - - public void testBiGramFilterWithoutUnigrams() throws IOException { - this.shingleFilterTest(2, TEST_TOKEN, BI_GRAM_TOKENS_WITHOUT_UNIGRAMS, - BI_GRAM_POSITION_INCREMENTS_WITHOUT_UNIGRAMS, BI_GRAM_TYPES_WITHOUT_UNIGRAMS, - false); - } - - public void testBiGramFilterWithHolesWithoutUnigrams() throws IOException { - this.shingleFilterTest(2, testTokenWithHoles, BI_GRAM_TOKENS_WITH_HOLES_WITHOUT_UNIGRAMS, - BI_GRAM_POSITION_INCREMENTS_WITH_HOLES_WITHOUT_UNIGRAMS, BI_GRAM_TYPES_WITHOUT_UNIGRAMS, - false); - } - - public void testBiGramFilterWithSingleToken() throws IOException { - this.shingleFilterTest(2, TEST_SINGLE_TOKEN, SINGLE_TOKEN, - SINGLE_TOKEN_INCREMENTS, SINGLE_TOKEN_TYPES, - true); - } - - public void testBiGramFilterWithSingleTokenWithoutUnigrams() throws IOException { - this.shingleFilterTest(2, TEST_SINGLE_TOKEN, EMPTY_TOKEN_ARRAY, - EMPTY_TOKEN_INCREMENTS_ARRAY, EMPTY_TOKEN_TYPES_ARRAY, - false); - } - - public void testBiGramFilterWithEmptyTokenStream() throws IOException { - this.shingleFilterTest(2, EMPTY_TOKEN_ARRAY, EMPTY_TOKEN_ARRAY, - EMPTY_TOKEN_INCREMENTS_ARRAY, EMPTY_TOKEN_TYPES_ARRAY, - true); - } - - public void testBiGramFilterWithEmptyTokenStreamWithoutUnigrams() throws IOException { - this.shingleFilterTest(2, EMPTY_TOKEN_ARRAY, EMPTY_TOKEN_ARRAY, - EMPTY_TOKEN_INCREMENTS_ARRAY, EMPTY_TOKEN_TYPES_ARRAY, - false); - } - - public void testTriGramFilter() throws IOException { - this.shingleFilterTest(3, TEST_TOKEN, TRI_GRAM_TOKENS, - TRI_GRAM_POSITION_INCREMENTS, TRI_GRAM_TYPES, - true); - } - - public void testTriGramFilterWithoutUnigrams() throws IOException { - this.shingleFilterTest(3, TEST_TOKEN, TRI_GRAM_TOKENS_WITHOUT_UNIGRAMS, - TRI_GRAM_POSITION_INCREMENTS_WITHOUT_UNIGRAMS, TRI_GRAM_TYPES_WITHOUT_UNIGRAMS, - false); - } - - public void testFourGramFilter() throws IOException { - this.shingleFilterTest(4, TEST_TOKEN, FOUR_GRAM_TOKENS, - FOUR_GRAM_POSITION_INCREMENTS, FOUR_GRAM_TYPES, - true); - } - - public void testFourGramFilterWithoutUnigrams() throws IOException { - this.shingleFilterTest(4, TEST_TOKEN, FOUR_GRAM_TOKENS_WITHOUT_UNIGRAMS, - FOUR_GRAM_POSITION_INCREMENTS_WITHOUT_UNIGRAMS, - FOUR_GRAM_TYPES_WITHOUT_UNIGRAMS, false); - } - - - public void testTriGramFilterMinTriGram() throws IOException { - this.shingleFilterTest(3, 3, TEST_TOKEN, TRI_GRAM_TOKENS_MIN_TRI_GRAM, - TRI_GRAM_POSITION_INCREMENTS_MIN_TRI_GRAM, - TRI_GRAM_TYPES_MIN_TRI_GRAM, - true); - } - - public void testTriGramFilterWithoutUnigramsMinTriGram() throws IOException { - this.shingleFilterTest(3, 3, TEST_TOKEN, - TRI_GRAM_TOKENS_WITHOUT_UNIGRAMS_MIN_TRI_GRAM, - TRI_GRAM_POSITION_INCREMENTS_WITHOUT_UNIGRAMS_MIN_TRI_GRAM, - TRI_GRAM_TYPES_WITHOUT_UNIGRAMS_MIN_TRI_GRAM, - false); - } - - public void testFourGramFilterMinTriGram() throws IOException { - this.shingleFilterTest(3, 4, TEST_TOKEN, FOUR_GRAM_TOKENS_MIN_TRI_GRAM, - FOUR_GRAM_POSITION_INCREMENTS_MIN_TRI_GRAM, - FOUR_GRAM_TYPES_MIN_TRI_GRAM, - true); - } - - public void testFourGramFilterWithoutUnigramsMinTriGram() throws IOException { - this.shingleFilterTest(3, 4, TEST_TOKEN, - FOUR_GRAM_TOKENS_WITHOUT_UNIGRAMS_MIN_TRI_GRAM, - FOUR_GRAM_POSITION_INCREMENTS_WITHOUT_UNIGRAMS_MIN_TRI_GRAM, - FOUR_GRAM_TYPES_WITHOUT_UNIGRAMS_MIN_TRI_GRAM, false); - } - - public void testFourGramFilterMinFourGram() throws IOException { - this.shingleFilterTest(4, 4, TEST_TOKEN, FOUR_GRAM_TOKENS_MIN_FOUR_GRAM, - FOUR_GRAM_POSITION_INCREMENTS_MIN_FOUR_GRAM, - FOUR_GRAM_TYPES_MIN_FOUR_GRAM, - true); - } - - public void testFourGramFilterWithoutUnigramsMinFourGram() throws IOException { - this.shingleFilterTest(4, 4, TEST_TOKEN, - FOUR_GRAM_TOKENS_WITHOUT_UNIGRAMS_MIN_FOUR_GRAM, - FOUR_GRAM_POSITION_INCREMENTS_WITHOUT_UNIGRAMS_MIN_FOUR_GRAM, - FOUR_GRAM_TYPES_WITHOUT_UNIGRAMS_MIN_FOUR_GRAM, false); - } - - public void testBiGramFilterNoSeparator() throws IOException { - this.shingleFilterTest("", 2, 2, TEST_TOKEN, BI_GRAM_TOKENS_NO_SEPARATOR, - BI_GRAM_POSITION_INCREMENTS_NO_SEPARATOR, - BI_GRAM_TYPES_NO_SEPARATOR, true); - } - - public void testBiGramFilterWithoutUnigramsNoSeparator() throws IOException { - this.shingleFilterTest("", 2, 2, TEST_TOKEN, - BI_GRAM_TOKENS_WITHOUT_UNIGRAMS_NO_SEPARATOR, - BI_GRAM_POSITION_INCREMENTS_WITHOUT_UNIGRAMS_NO_SEPARATOR, - BI_GRAM_TYPES_WITHOUT_UNIGRAMS_NO_SEPARATOR, - false); - } - public void testTriGramFilterNoSeparator() throws IOException { - this.shingleFilterTest("", 2, 3, TEST_TOKEN, TRI_GRAM_TOKENS_NO_SEPARATOR, - TRI_GRAM_POSITION_INCREMENTS_NO_SEPARATOR, - TRI_GRAM_TYPES_NO_SEPARATOR, true); - } - - public void testTriGramFilterWithoutUnigramsNoSeparator() throws IOException { - this.shingleFilterTest("", 2, 3, TEST_TOKEN, - TRI_GRAM_TOKENS_WITHOUT_UNIGRAMS_NO_SEPARATOR, - TRI_GRAM_POSITION_INCREMENTS_WITHOUT_UNIGRAMS_NO_SEPARATOR, - TRI_GRAM_TYPES_WITHOUT_UNIGRAMS_NO_SEPARATOR, false); - } - - public void testBiGramFilterAltSeparator() throws IOException { - this.shingleFilterTest("", 2, 2, TEST_TOKEN, BI_GRAM_TOKENS_ALT_SEPARATOR, - BI_GRAM_POSITION_INCREMENTS_ALT_SEPARATOR, - BI_GRAM_TYPES_ALT_SEPARATOR, true); - } - - public void testBiGramFilterWithoutUnigramsAltSeparator() throws IOException { - this.shingleFilterTest("", 2, 2, TEST_TOKEN, - BI_GRAM_TOKENS_WITHOUT_UNIGRAMS_ALT_SEPARATOR, - BI_GRAM_POSITION_INCREMENTS_WITHOUT_UNIGRAMS_ALT_SEPARATOR, - BI_GRAM_TYPES_WITHOUT_UNIGRAMS_ALT_SEPARATOR, - false); - } - public void testTriGramFilterAltSeparator() throws IOException { - this.shingleFilterTest("", 2, 3, TEST_TOKEN, TRI_GRAM_TOKENS_ALT_SEPARATOR, - TRI_GRAM_POSITION_INCREMENTS_ALT_SEPARATOR, - TRI_GRAM_TYPES_ALT_SEPARATOR, true); - } - - public void testTriGramFilterWithoutUnigramsAltSeparator() throws IOException { - this.shingleFilterTest("", 2, 3, TEST_TOKEN, - TRI_GRAM_TOKENS_WITHOUT_UNIGRAMS_ALT_SEPARATOR, - TRI_GRAM_POSITION_INCREMENTS_WITHOUT_UNIGRAMS_ALT_SEPARATOR, - TRI_GRAM_TYPES_WITHOUT_UNIGRAMS_ALT_SEPARATOR, false); - } - - public void testTriGramFilterNullSeparator() throws IOException { - this.shingleFilterTest(null, 2, 3, TEST_TOKEN, TRI_GRAM_TOKENS_NULL_SEPARATOR, - TRI_GRAM_POSITION_INCREMENTS_NULL_SEPARATOR, - TRI_GRAM_TYPES_NULL_SEPARATOR, true); - } - - public void testPositionIncrementEqualToN() throws IOException { - this.shingleFilterTest(2, 3, TEST_TOKEN_POS_INCR_EQUAL_TO_N, TRI_GRAM_TOKENS_POS_INCR_EQUAL_TO_N, - TRI_GRAM_POSITION_INCREMENTS_POS_INCR_EQUAL_TO_N, - TRI_GRAM_TYPES_POS_INCR_EQUAL_TO_N, true); - } - - public void testPositionIncrementEqualToNWithoutUnigrams() throws IOException { - this.shingleFilterTest(2, 3, TEST_TOKEN_POS_INCR_EQUAL_TO_N, TRI_GRAM_TOKENS_POS_INCR_EQUAL_TO_N_WITHOUT_UNIGRAMS, - TRI_GRAM_POSITION_INCREMENTS_POS_INCR_EQUAL_TO_N_WITHOUT_UNIGRAMS, - TRI_GRAM_TYPES_POS_INCR_EQUAL_TO_N_WITHOUT_UNIGRAMS, false); - } - - - public void testPositionIncrementGreaterThanN() throws IOException { - this.shingleFilterTest(2, 3, TEST_TOKEN_POS_INCR_GREATER_THAN_N, TRI_GRAM_TOKENS_POS_INCR_GREATER_THAN_N, - TRI_GRAM_POSITION_INCREMENTS_POS_INCR_GREATER_THAN_N, - TRI_GRAM_TYPES_POS_INCR_GREATER_THAN_N, true); - } - - public void testPositionIncrementGreaterThanNWithoutUnigrams() throws IOException { - this.shingleFilterTest(2, 3, TEST_TOKEN_POS_INCR_GREATER_THAN_N, TRI_GRAM_TOKENS_POS_INCR_GREATER_THAN_N_WITHOUT_UNIGRAMS, - TRI_GRAM_POSITION_INCREMENTS_POS_INCR_GREATER_THAN_N_WITHOUT_UNIGRAMS, - TRI_GRAM_TYPES_POS_INCR_GREATER_THAN_N_WITHOUT_UNIGRAMS, false); - } - - public void testReset() throws Exception { - Tokenizer wsTokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("please divide this sentence")); - TokenStream filter = new ShingleFilter(wsTokenizer, 2); - assertTokenStreamContents(filter, - new String[]{"please","please divide","divide","divide this","this","this sentence","sentence"}, - new int[]{0,0,7,7,14,14,19}, new int[]{6,13,13,18,18,27,27}, - new String[]{TypeAttributeImpl.DEFAULT_TYPE,"shingle",TypeAttributeImpl.DEFAULT_TYPE,"shingle",TypeAttributeImpl.DEFAULT_TYPE,"shingle",TypeAttributeImpl.DEFAULT_TYPE}, - new int[]{1,0,1,0,1,0,1} - ); - wsTokenizer.reset(new StringReader("please divide this sentence")); - assertTokenStreamContents(filter, - new String[]{"please","please divide","divide","divide this","this","this sentence","sentence"}, - new int[]{0,0,7,7,14,14,19}, new int[]{6,13,13,18,18,27,27}, - new String[]{TypeAttributeImpl.DEFAULT_TYPE,"shingle",TypeAttributeImpl.DEFAULT_TYPE,"shingle",TypeAttributeImpl.DEFAULT_TYPE,"shingle",TypeAttributeImpl.DEFAULT_TYPE}, - new int[]{1,0,1,0,1,0,1} - ); - } - - public void testOutputUnigramsIfNoShinglesSingleTokenCase() throws IOException { - // Single token input with outputUnigrams==false is the primary case where - // enabling this option should alter program behavior. - this.shingleFilterTest(2, 2, TEST_SINGLE_TOKEN, SINGLE_TOKEN, - SINGLE_TOKEN_INCREMENTS, SINGLE_TOKEN_TYPES, - false, true); - } - - public void testOutputUnigramsIfNoShinglesWithSimpleBigram() throws IOException { - // Here we expect the same result as with testBiGramFilter(). - this.shingleFilterTest(2, 2, TEST_TOKEN, BI_GRAM_TOKENS, - BI_GRAM_POSITION_INCREMENTS, BI_GRAM_TYPES, - true, true); - } - - public void testOutputUnigramsIfNoShinglesWithSimpleUnigramlessBigram() throws IOException { - // Here we expect the same result as with testBiGramFilterWithoutUnigrams(). - this.shingleFilterTest(2, 2, TEST_TOKEN, BI_GRAM_TOKENS_WITHOUT_UNIGRAMS, - BI_GRAM_POSITION_INCREMENTS_WITHOUT_UNIGRAMS, BI_GRAM_TYPES_WITHOUT_UNIGRAMS, - false, true); - } - - public void testOutputUnigramsIfNoShinglesWithMultipleInputTokens() throws IOException { - // Test when the minimum shingle size is greater than the number of input tokens - this.shingleFilterTest(7, 7, TEST_TOKEN, TEST_TOKEN, - UNIGRAM_ONLY_POSITION_INCREMENTS, UNIGRAM_ONLY_TYPES, - false, true); - } - - protected void shingleFilterTest(int maxSize, Token[] tokensToShingle, Token[] tokensToCompare, - int[] positionIncrements, String[] types, - boolean outputUnigrams) - throws IOException { - - ShingleFilter filter = new ShingleFilter(new TestTokenStream(tokensToShingle), maxSize); - filter.setOutputUnigrams(outputUnigrams); - shingleFilterTestCommon(filter, tokensToCompare, positionIncrements, types); - } - - protected void shingleFilterTest(int minSize, int maxSize, Token[] tokensToShingle, - Token[] tokensToCompare, int[] positionIncrements, - String[] types, boolean outputUnigrams) - throws IOException { - ShingleFilter filter - = new ShingleFilter(new TestTokenStream(tokensToShingle), minSize, maxSize); - filter.setOutputUnigrams(outputUnigrams); - shingleFilterTestCommon(filter, tokensToCompare, positionIncrements, types); - } - - protected void shingleFilterTest(int minSize, int maxSize, Token[] tokensToShingle, - Token[] tokensToCompare, int[] positionIncrements, - String[] types, boolean outputUnigrams, - boolean outputUnigramsIfNoShingles) - throws IOException { - ShingleFilter filter - = new ShingleFilter(new TestTokenStream(tokensToShingle), minSize, maxSize); - filter.setOutputUnigrams(outputUnigrams); - filter.setOutputUnigramsIfNoShingles(outputUnigramsIfNoShingles); - shingleFilterTestCommon(filter, tokensToCompare, positionIncrements, types); - } - - protected void shingleFilterTest(String tokenSeparator, int minSize, int maxSize, Token[] tokensToShingle, - Token[] tokensToCompare, int[] positionIncrements, - String[] types, boolean outputUnigrams) - throws IOException { - ShingleFilter filter - = new ShingleFilter(new TestTokenStream(tokensToShingle), minSize, maxSize); - filter.setTokenSeparator(tokenSeparator); - filter.setOutputUnigrams(outputUnigrams); - shingleFilterTestCommon(filter, tokensToCompare, positionIncrements, types); - } - - protected void shingleFilterTestCommon(ShingleFilter filter, - Token[] tokensToCompare, - int[] positionIncrements, - String[] types) - throws IOException { - String text[] = new String[tokensToCompare.length]; - int startOffsets[] = new int[tokensToCompare.length]; - int endOffsets[] = new int[tokensToCompare.length]; - - for (int i = 0; i < tokensToCompare.length; i++) { - text[i] = new String(tokensToCompare[i].buffer(),0, tokensToCompare[i].length()); - startOffsets[i] = tokensToCompare[i].startOffset(); - endOffsets[i] = tokensToCompare[i].endOffset(); - } - - assertTokenStreamContents(filter, text, startOffsets, endOffsets, types, positionIncrements); - } - - private static Token createToken(String term, int start, int offset) { - return createToken(term, start, offset, 1); - } - - private static Token createToken - (String term, int start, int offset, int positionIncrement) - { - Token token = new Token(start, offset); - token.copyBuffer(term.toCharArray(), 0, term.length()); - token.setPositionIncrement(positionIncrement); - return token; - } -}