pylucene 3.5.0-3

[pylucene.git] / lucene-java-3.5.0 / lucene / contrib / analyzers / common / src / test / org / apache / lucene / analysis / shingle / ShingleFilterTest.java
diff --git a/lucene-java-3.5.0/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java b/lucene-java-3.5.0/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java

new file mode 100644 (file)

index 0000000..8de53a6
--- /dev/null
+++ b/lucene-java-3.5.0/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java
@@ -0,0 +1,1132 @@
+package org.apache.lucene.analysis.shingle;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.StringReader;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.WhitespaceTokenizer;
+import org.apache.lucene.analysis.tokenattributes.*;
+
+public class ShingleFilterTest extends BaseTokenStreamTestCase {
+
+  public class TestTokenStream extends TokenStream {
+
+    protected int index = 0;
+    protected Token[] testToken;
+    
+    private CharTermAttribute termAtt;
+    private OffsetAttribute offsetAtt;
+    private PositionIncrementAttribute posIncrAtt;
+    private TypeAttribute typeAtt;
+
+    public TestTokenStream(Token[] testToken) {
+      super();
+      this.testToken = testToken;
+      this.termAtt = addAttribute(CharTermAttribute.class);
+      this.offsetAtt = addAttribute(OffsetAttribute.class);
+      this.posIncrAtt = addAttribute(PositionIncrementAttribute.class);
+      this.typeAtt = addAttribute(TypeAttribute.class);
+    }
+
+    @Override
+    public final boolean incrementToken() throws IOException {
+      clearAttributes();
+      if (index < testToken.length) {
+        Token t = testToken[index++];
+        termAtt.copyBuffer(t.buffer(), 0, t.length());
+        offsetAtt.setOffset(t.startOffset(), t.endOffset());
+        posIncrAtt.setPositionIncrement(t.getPositionIncrement());
+        typeAtt.setType(TypeAttributeImpl.DEFAULT_TYPE);
+        return true;
+      } else {
+        return false;
+      }
+    }
+  }
+
+  public static final Token[] TEST_TOKEN = new Token[] {
+      createToken("please", 0, 6),
+      createToken("divide", 7, 13),
+      createToken("this", 14, 18),
+      createToken("sentence", 19, 27),
+      createToken("into", 28, 32),
+      createToken("shingles", 33, 39),
+  };
+
+  public static final int[] UNIGRAM_ONLY_POSITION_INCREMENTS = new int[] {
+    1, 1, 1, 1, 1, 1
+  };
+
+  public static final String[] UNIGRAM_ONLY_TYPES = new String[] {
+    "word", "word", "word", "word", "word", "word"
+  };
+
+  public static Token[] testTokenWithHoles;
+
+  public static final Token[] BI_GRAM_TOKENS = new Token[] {
+    createToken("please", 0, 6),
+    createToken("please divide", 0, 13),
+    createToken("divide", 7, 13),
+    createToken("divide this", 7, 18),
+    createToken("this", 14, 18),
+    createToken("this sentence", 14, 27),
+    createToken("sentence", 19, 27),
+    createToken("sentence into", 19, 32),
+    createToken("into", 28, 32),
+    createToken("into shingles", 28, 39),
+    createToken("shingles", 33, 39),
+  };
+
+  public static final int[] BI_GRAM_POSITION_INCREMENTS = new int[] {
+    1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
+  };
+
+  public static final String[] BI_GRAM_TYPES = new String[] {
+    "word", "shingle", "word", "shingle", "word", "shingle", "word",
+    "shingle", "word", "shingle", "word"
+  };
+
+  public static final Token[] BI_GRAM_TOKENS_WITH_HOLES = new Token[] {
+    createToken("please", 0, 6),
+    createToken("please divide", 0, 13),
+    createToken("divide", 7, 13),
+    createToken("divide _", 7, 19),
+    createToken("_ sentence", 19, 27),
+    createToken("sentence", 19, 27),
+    createToken("sentence _", 19, 33),
+    createToken("_ shingles", 33, 39),
+    createToken("shingles", 33, 39),
+  };
+
+  public static final int[] BI_GRAM_POSITION_INCREMENTS_WITH_HOLES = new int[] {
+    1, 0, 1, 0, 1, 1, 0, 1, 1
+  };
+
+  private static final String[] BI_GRAM_TYPES_WITH_HOLES = {
+    "word", "shingle", 
+    "word", "shingle", "shingle", "word", "shingle", "shingle", "word"
+  };
+
+  public static final Token[] BI_GRAM_TOKENS_WITHOUT_UNIGRAMS = new Token[] {
+    createToken("please divide", 0, 13),
+    createToken("divide this", 7, 18),
+    createToken("this sentence", 14, 27),
+    createToken("sentence into", 19, 32),
+    createToken("into shingles", 28, 39),
+  };
+
+  public static final int[] BI_GRAM_POSITION_INCREMENTS_WITHOUT_UNIGRAMS = new int[] {
+    1, 1, 1, 1, 1
+  };
+
+  public static final String[] BI_GRAM_TYPES_WITHOUT_UNIGRAMS = new String[] {
+    "shingle", "shingle", "shingle", "shingle", "shingle"
+  };
+
+  public static final Token[] BI_GRAM_TOKENS_WITH_HOLES_WITHOUT_UNIGRAMS = new Token[] {
+    createToken("please divide", 0, 13),
+    createToken("divide _", 7, 19),
+    createToken("_ sentence", 19, 27),
+    createToken("sentence _", 19, 33),
+    createToken("_ shingles", 33, 39),
+  };
+
+  public static final int[] BI_GRAM_POSITION_INCREMENTS_WITH_HOLES_WITHOUT_UNIGRAMS = new int[] {
+    1, 1, 1, 1, 1, 1
+  };
+
+
+  public static final Token[] TEST_SINGLE_TOKEN = new Token[] {
+    createToken("please", 0, 6)
+  };
+
+  public static final Token[] SINGLE_TOKEN = new Token[] {
+    createToken("please", 0, 6)
+  };
+
+  public static final int[] SINGLE_TOKEN_INCREMENTS = new int[] {
+    1
+  };
+
+  public static final String[] SINGLE_TOKEN_TYPES = new String[] {
+    "word"
+  };
+
+  public static final Token[] EMPTY_TOKEN_ARRAY = new Token[] {
+  };
+
+  public static final int[] EMPTY_TOKEN_INCREMENTS_ARRAY = new int[] {
+  };
+
+  public static final String[] EMPTY_TOKEN_TYPES_ARRAY = new String[] {
+  };
+
+  public static final Token[] TRI_GRAM_TOKENS = new Token[] {
+    createToken("please", 0, 6),
+    createToken("please divide", 0, 13),
+    createToken("please divide this", 0, 18),
+    createToken("divide", 7, 13),
+    createToken("divide this", 7, 18),
+    createToken("divide this sentence", 7, 27),
+    createToken("this", 14, 18),
+    createToken("this sentence", 14, 27),
+    createToken("this sentence into", 14, 32),
+    createToken("sentence", 19, 27),
+    createToken("sentence into", 19, 32),
+    createToken("sentence into shingles", 19, 39),
+    createToken("into", 28, 32),
+    createToken("into shingles", 28, 39),
+    createToken("shingles", 33, 39)
+  };
+
+  public static final int[] TRI_GRAM_POSITION_INCREMENTS = new int[] {
+    1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1
+  };
+
+  public static final String[] TRI_GRAM_TYPES = new String[] {
+    "word", "shingle", "shingle",
+    "word", "shingle", "shingle",
+    "word", "shingle", "shingle",
+    "word", "shingle", "shingle",
+    "word", "shingle",
+    "word"
+  };
+  
+  public static final Token[] TRI_GRAM_TOKENS_WITHOUT_UNIGRAMS = new Token[] {
+    createToken("please divide", 0, 13),
+    createToken("please divide this", 0, 18),
+    createToken("divide this", 7, 18),
+    createToken("divide this sentence", 7, 27),
+    createToken("this sentence", 14, 27),
+    createToken("this sentence into", 14, 32),
+    createToken("sentence into", 19, 32),
+    createToken("sentence into shingles", 19, 39),
+    createToken("into shingles", 28, 39),
+  };
+
+  public static final int[] TRI_GRAM_POSITION_INCREMENTS_WITHOUT_UNIGRAMS = new int[] {
+    1, 0, 1, 0, 1, 0, 1, 0, 1
+  };
+  
+  public static final String[] TRI_GRAM_TYPES_WITHOUT_UNIGRAMS = new String[] {
+    "shingle", "shingle",
+    "shingle", "shingle",
+    "shingle", "shingle",
+    "shingle", "shingle",
+    "shingle",
+  };
+  
+  public static final Token[] FOUR_GRAM_TOKENS = new Token[] {
+    createToken("please", 0, 6),
+    createToken("please divide", 0, 13),
+    createToken("please divide this", 0, 18),
+    createToken("please divide this sentence", 0, 27),
+    createToken("divide", 7, 13),
+    createToken("divide this", 7, 18),
+    createToken("divide this sentence", 7, 27),
+    createToken("divide this sentence into", 7, 32),
+    createToken("this", 14, 18),
+    createToken("this sentence", 14, 27),
+    createToken("this sentence into", 14, 32),
+    createToken("this sentence into shingles", 14, 39),
+    createToken("sentence", 19, 27),
+    createToken("sentence into", 19, 32),
+    createToken("sentence into shingles", 19, 39),
+    createToken("into", 28, 32),
+    createToken("into shingles", 28, 39),
+    createToken("shingles", 33, 39)
+  };
+
+  public static final int[] FOUR_GRAM_POSITION_INCREMENTS = new int[] {
+    1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1
+  };
+
+  public static final String[] FOUR_GRAM_TYPES = new String[] {
+    "word", "shingle", "shingle", "shingle",
+    "word", "shingle", "shingle", "shingle",
+    "word", "shingle", "shingle", "shingle",
+    "word", "shingle", "shingle",
+    "word", "shingle",
+    "word"
+  };
+  
+  public static final Token[] FOUR_GRAM_TOKENS_WITHOUT_UNIGRAMS = new Token[] {
+    createToken("please divide", 0, 13),
+    createToken("please divide this", 0, 18),
+    createToken("please divide this sentence", 0, 27),
+    createToken("divide this", 7, 18),
+    createToken("divide this sentence", 7, 27),
+    createToken("divide this sentence into", 7, 32),
+    createToken("this sentence", 14, 27),
+    createToken("this sentence into", 14, 32),
+    createToken("this sentence into shingles", 14, 39),
+    createToken("sentence into", 19, 32),
+    createToken("sentence into shingles", 19, 39),
+    createToken("into shingles", 28, 39),
+  };
+
+  public static final int[] FOUR_GRAM_POSITION_INCREMENTS_WITHOUT_UNIGRAMS = new int[] {
+    1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1
+  };
+  
+  public static final String[] FOUR_GRAM_TYPES_WITHOUT_UNIGRAMS = new String[] {
+    "shingle", "shingle",
+    "shingle", "shingle",
+    "shingle", "shingle",
+    "shingle", "shingle",
+    "shingle", "shingle",
+    "shingle", "shingle",
+
+  };
+
+  public static final Token[] TRI_GRAM_TOKENS_MIN_TRI_GRAM = new Token[] {
+    createToken("please", 0, 6),
+    createToken("please divide this", 0, 18),
+    createToken("divide", 7, 13),
+    createToken("divide this sentence", 7, 27),
+    createToken("this", 14, 18),
+    createToken("this sentence into", 14, 32),
+    createToken("sentence", 19, 27),
+    createToken("sentence into shingles", 19, 39),
+    createToken("into", 28, 32),
+    createToken("shingles", 33, 39)
+  };
+
+  public static final int[] TRI_GRAM_POSITION_INCREMENTS_MIN_TRI_GRAM = new int[] {
+    1, 0, 1, 0, 1, 0, 1, 0, 1, 1
+  };
+
+  public static final String[] TRI_GRAM_TYPES_MIN_TRI_GRAM = new String[] {
+    "word", "shingle",
+    "word", "shingle",
+    "word", "shingle",
+    "word", "shingle",
+    "word",
+    "word"
+  };
+  
+  public static final Token[] TRI_GRAM_TOKENS_WITHOUT_UNIGRAMS_MIN_TRI_GRAM = new Token[] {
+    createToken("please divide this", 0, 18),
+    createToken("divide this sentence", 7, 27),
+    createToken("this sentence into", 14, 32),
+    createToken("sentence into shingles", 19, 39)
+  };
+
+  public static final int[] TRI_GRAM_POSITION_INCREMENTS_WITHOUT_UNIGRAMS_MIN_TRI_GRAM = new int[] {
+    1, 1, 1, 1
+  };
+  
+  public static final String[] TRI_GRAM_TYPES_WITHOUT_UNIGRAMS_MIN_TRI_GRAM = new String[] {
+    "shingle",
+    "shingle",
+    "shingle",
+    "shingle"
+  };
+  
+  public static final Token[] FOUR_GRAM_TOKENS_MIN_TRI_GRAM = new Token[] {
+    createToken("please", 0, 6),
+    createToken("please divide this", 0, 18),
+    createToken("please divide this sentence", 0, 27),
+    createToken("divide", 7, 13),
+    createToken("divide this sentence", 7, 27),
+    createToken("divide this sentence into", 7, 32),
+    createToken("this", 14, 18),
+    createToken("this sentence into", 14, 32),
+    createToken("this sentence into shingles", 14, 39),
+    createToken("sentence", 19, 27),
+    createToken("sentence into shingles", 19, 39),
+    createToken("into", 28, 32),
+    createToken("shingles", 33, 39)
+  };
+
+  public static final int[] FOUR_GRAM_POSITION_INCREMENTS_MIN_TRI_GRAM = new int[] {
+    1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1
+  };
+
+  public static final String[] FOUR_GRAM_TYPES_MIN_TRI_GRAM = new String[] {
+    "word", "shingle", "shingle",
+    "word", "shingle", "shingle",
+    "word", "shingle", "shingle",
+    "word", "shingle",
+    "word",
+    "word"
+  };
+  
+  public static final Token[] FOUR_GRAM_TOKENS_WITHOUT_UNIGRAMS_MIN_TRI_GRAM = new Token[] {
+    createToken("please divide this", 0, 18),
+    createToken("please divide this sentence", 0, 27),
+    createToken("divide this sentence", 7, 27),
+    createToken("divide this sentence into", 7, 32),
+    createToken("this sentence into", 14, 32),
+    createToken("this sentence into shingles", 14, 39),
+    createToken("sentence into shingles", 19, 39),
+  };
+
+  public static final int[] FOUR_GRAM_POSITION_INCREMENTS_WITHOUT_UNIGRAMS_MIN_TRI_GRAM = new int[] {
+    1, 0, 1, 0, 1, 0, 1
+  };
+  
+  public static final String[] FOUR_GRAM_TYPES_WITHOUT_UNIGRAMS_MIN_TRI_GRAM = new String[] {
+    "shingle", "shingle",
+    "shingle", "shingle",
+    "shingle", "shingle",
+    "shingle"
+  };
+  
+  public static final Token[] FOUR_GRAM_TOKENS_MIN_FOUR_GRAM = new Token[] {
+    createToken("please", 0, 6),
+    createToken("please divide this sentence", 0, 27),
+    createToken("divide", 7, 13),
+    createToken("divide this sentence into", 7, 32),
+    createToken("this", 14, 18),
+    createToken("this sentence into shingles", 14, 39),
+    createToken("sentence", 19, 27),
+    createToken("into", 28, 32),
+    createToken("shingles", 33, 39)
+  };
+
+  public static final int[] FOUR_GRAM_POSITION_INCREMENTS_MIN_FOUR_GRAM = new int[] {
+    1, 0, 1, 0, 1, 0, 1, 1, 1
+  };
+
+  public static final String[] FOUR_GRAM_TYPES_MIN_FOUR_GRAM = new String[] {
+    "word", "shingle",
+    "word", "shingle",
+    "word", "shingle",
+    "word",
+    "word",
+    "word"
+  };
+  
+  public static final Token[] FOUR_GRAM_TOKENS_WITHOUT_UNIGRAMS_MIN_FOUR_GRAM = new Token[] {
+    createToken("please divide this sentence", 0, 27),
+    createToken("divide this sentence into", 7, 32),
+    createToken("this sentence into shingles", 14, 39),
+  };
+
+  public static final int[] FOUR_GRAM_POSITION_INCREMENTS_WITHOUT_UNIGRAMS_MIN_FOUR_GRAM = new int[] {
+    1, 1, 1
+  };
+  
+  public static final String[] FOUR_GRAM_TYPES_WITHOUT_UNIGRAMS_MIN_FOUR_GRAM = new String[] {
+    "shingle",
+    "shingle",
+    "shingle"
+  };
+
+  public static final Token[] BI_GRAM_TOKENS_NO_SEPARATOR = new Token[] {
+    createToken("please", 0, 6),
+    createToken("pleasedivide", 0, 13),
+    createToken("divide", 7, 13),
+    createToken("dividethis", 7, 18),
+    createToken("this", 14, 18),
+    createToken("thissentence", 14, 27),
+    createToken("sentence", 19, 27),
+    createToken("sentenceinto", 19, 32),
+    createToken("into", 28, 32),
+    createToken("intoshingles", 28, 39),
+    createToken("shingles", 33, 39),
+  };
+
+  public static final int[] BI_GRAM_POSITION_INCREMENTS_NO_SEPARATOR = new int[] {
+    1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
+  };
+
+  public static final String[] BI_GRAM_TYPES_NO_SEPARATOR = new String[] {
+    "word", "shingle", "word", "shingle", "word", "shingle", "word",
+    "shingle", "word", "shingle", "word"
+  };
+
+  public static final Token[] BI_GRAM_TOKENS_WITHOUT_UNIGRAMS_NO_SEPARATOR = new Token[] {
+    createToken("pleasedivide", 0, 13),
+    createToken("dividethis", 7, 18),
+    createToken("thissentence", 14, 27),
+    createToken("sentenceinto", 19, 32),
+    createToken("intoshingles", 28, 39),
+  };
+
+  public static final int[] BI_GRAM_POSITION_INCREMENTS_WITHOUT_UNIGRAMS_NO_SEPARATOR = new int[] {
+    1, 1, 1, 1, 1
+  };
+
+  public static final String[] BI_GRAM_TYPES_WITHOUT_UNIGRAMS_NO_SEPARATOR = new String[] {
+    "shingle", "shingle", "shingle", "shingle", "shingle"
+  };
+  
+  public static final Token[] TRI_GRAM_TOKENS_NO_SEPARATOR = new Token[] {
+    createToken("please", 0, 6),
+    createToken("pleasedivide", 0, 13),
+    createToken("pleasedividethis", 0, 18),
+    createToken("divide", 7, 13),
+    createToken("dividethis", 7, 18),
+    createToken("dividethissentence", 7, 27),
+    createToken("this", 14, 18),
+    createToken("thissentence", 14, 27),
+    createToken("thissentenceinto", 14, 32),
+    createToken("sentence", 19, 27),
+    createToken("sentenceinto", 19, 32),
+    createToken("sentenceintoshingles", 19, 39),
+    createToken("into", 28, 32),
+    createToken("intoshingles", 28, 39),
+    createToken("shingles", 33, 39)
+  };
+
+  public static final int[] TRI_GRAM_POSITION_INCREMENTS_NO_SEPARATOR = new int[] {
+    1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1
+  };
+
+  public static final String[] TRI_GRAM_TYPES_NO_SEPARATOR = new String[] {
+    "word", "shingle", "shingle",
+    "word", "shingle", "shingle",
+    "word", "shingle", "shingle",
+    "word", "shingle", "shingle",
+    "word", "shingle",
+    "word"
+  };
+  
+  public static final Token[] TRI_GRAM_TOKENS_WITHOUT_UNIGRAMS_NO_SEPARATOR = new Token[] {
+    createToken("pleasedivide", 0, 13),
+    createToken("pleasedividethis", 0, 18),
+    createToken("dividethis", 7, 18),
+    createToken("dividethissentence", 7, 27),
+    createToken("thissentence", 14, 27),
+    createToken("thissentenceinto", 14, 32),
+    createToken("sentenceinto", 19, 32),
+    createToken("sentenceintoshingles", 19, 39),
+    createToken("intoshingles", 28, 39),
+  };
+
+  public static final int[] TRI_GRAM_POSITION_INCREMENTS_WITHOUT_UNIGRAMS_NO_SEPARATOR = new int[] {
+    1, 0, 1, 0, 1, 0, 1, 0, 1
+  };
+  
+  public static final String[] TRI_GRAM_TYPES_WITHOUT_UNIGRAMS_NO_SEPARATOR = new String[] {
+    "shingle", "shingle",
+    "shingle", "shingle",
+    "shingle", "shingle",
+    "shingle", "shingle",
+    "shingle",
+  };
+
+  public static final Token[] BI_GRAM_TOKENS_ALT_SEPARATOR = new Token[] {
+    createToken("please", 0, 6),
+    createToken("please<SEP>divide", 0, 13),
+    createToken("divide", 7, 13),
+    createToken("divide<SEP>this", 7, 18),
+    createToken("this", 14, 18),
+    createToken("this<SEP>sentence", 14, 27),
+    createToken("sentence", 19, 27),
+    createToken("sentence<SEP>into", 19, 32),
+    createToken("into", 28, 32),
+    createToken("into<SEP>shingles", 28, 39),
+    createToken("shingles", 33, 39),
+  };
+
+  public static final int[] BI_GRAM_POSITION_INCREMENTS_ALT_SEPARATOR = new int[] {
+    1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
+  };
+
+  public static final String[] BI_GRAM_TYPES_ALT_SEPARATOR = new String[] {
+    "word", "shingle", "word", "shingle", "word", "shingle", "word",
+    "shingle", "word", "shingle", "word"
+  };
+
+  public static final Token[] BI_GRAM_TOKENS_WITHOUT_UNIGRAMS_ALT_SEPARATOR = new Token[] {
+    createToken("please<SEP>divide", 0, 13),
+    createToken("divide<SEP>this", 7, 18),
+    createToken("this<SEP>sentence", 14, 27),
+    createToken("sentence<SEP>into", 19, 32),
+    createToken("into<SEP>shingles", 28, 39),
+  };
+
+  public static final int[] BI_GRAM_POSITION_INCREMENTS_WITHOUT_UNIGRAMS_ALT_SEPARATOR = new int[] {
+    1, 1, 1, 1, 1
+  };
+
+  public static final String[] BI_GRAM_TYPES_WITHOUT_UNIGRAMS_ALT_SEPARATOR = new String[] {
+    "shingle", "shingle", "shingle", "shingle", "shingle"
+  };
+  
+  public static final Token[] TRI_GRAM_TOKENS_ALT_SEPARATOR = new Token[] {
+    createToken("please", 0, 6),
+    createToken("please<SEP>divide", 0, 13),
+    createToken("please<SEP>divide<SEP>this", 0, 18),
+    createToken("divide", 7, 13),
+    createToken("divide<SEP>this", 7, 18),
+    createToken("divide<SEP>this<SEP>sentence", 7, 27),
+    createToken("this", 14, 18),
+    createToken("this<SEP>sentence", 14, 27),
+    createToken("this<SEP>sentence<SEP>into", 14, 32),
+    createToken("sentence", 19, 27),
+    createToken("sentence<SEP>into", 19, 32),
+    createToken("sentence<SEP>into<SEP>shingles", 19, 39),
+    createToken("into", 28, 32),
+    createToken("into<SEP>shingles", 28, 39),
+    createToken("shingles", 33, 39)
+  };
+
+  public static final int[] TRI_GRAM_POSITION_INCREMENTS_ALT_SEPARATOR = new int[] {
+    1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1
+  };
+
+  public static final String[] TRI_GRAM_TYPES_ALT_SEPARATOR = new String[] {
+    "word", "shingle", "shingle",
+    "word", "shingle", "shingle",
+    "word", "shingle", "shingle",
+    "word", "shingle", "shingle",
+    "word", "shingle",
+    "word"
+  };
+  
+  public static final Token[] TRI_GRAM_TOKENS_WITHOUT_UNIGRAMS_ALT_SEPARATOR = new Token[] {
+    createToken("please<SEP>divide", 0, 13),
+    createToken("please<SEP>divide<SEP>this", 0, 18),
+    createToken("divide<SEP>this", 7, 18),
+    createToken("divide<SEP>this<SEP>sentence", 7, 27),
+    createToken("this<SEP>sentence", 14, 27),
+    createToken("this<SEP>sentence<SEP>into", 14, 32),
+    createToken("sentence<SEP>into", 19, 32),
+    createToken("sentence<SEP>into<SEP>shingles", 19, 39),
+    createToken("into<SEP>shingles", 28, 39),
+  };
+
+  public static final int[] TRI_GRAM_POSITION_INCREMENTS_WITHOUT_UNIGRAMS_ALT_SEPARATOR = new int[] {
+    1, 0, 1, 0, 1, 0, 1, 0, 1
+  };
+  
+  public static final String[] TRI_GRAM_TYPES_WITHOUT_UNIGRAMS_ALT_SEPARATOR = new String[] {
+    "shingle", "shingle",
+    "shingle", "shingle",
+    "shingle", "shingle",
+    "shingle", "shingle",
+    "shingle",
+  };
+
+  public static final Token[] TRI_GRAM_TOKENS_NULL_SEPARATOR = new Token[] {
+    createToken("please", 0, 6),
+    createToken("pleasedivide", 0, 13),
+    createToken("pleasedividethis", 0, 18),
+    createToken("divide", 7, 13),
+    createToken("dividethis", 7, 18),
+    createToken("dividethissentence", 7, 27),
+    createToken("this", 14, 18),
+    createToken("thissentence", 14, 27),
+    createToken("thissentenceinto", 14, 32),
+    createToken("sentence", 19, 27),
+    createToken("sentenceinto", 19, 32),
+    createToken("sentenceintoshingles", 19, 39),
+    createToken("into", 28, 32),
+    createToken("intoshingles", 28, 39),
+    createToken("shingles", 33, 39)
+  };
+
+  public static final int[] TRI_GRAM_POSITION_INCREMENTS_NULL_SEPARATOR = new int[] {
+    1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1
+  };
+
+  public static final String[] TRI_GRAM_TYPES_NULL_SEPARATOR = new String[] {
+    "word", "shingle", "shingle",
+    "word", "shingle", "shingle",
+    "word", "shingle", "shingle",
+    "word", "shingle", "shingle",
+    "word", "shingle",
+    "word"
+  };
+  
+  public static final Token[] TEST_TOKEN_POS_INCR_EQUAL_TO_N = new Token[] {
+    createToken("please", 0, 6),
+    createToken("divide", 7, 13),
+    createToken("this", 14, 18),
+    createToken("sentence", 29, 37, 3),
+    createToken("into", 38, 42),
+    createToken("shingles", 43, 49),
+  };
+
+  public static final Token[] TRI_GRAM_TOKENS_POS_INCR_EQUAL_TO_N = new Token[] {
+    createToken("please", 0, 6),
+    createToken("please divide", 0, 13),
+    createToken("please divide this", 0, 18),
+    createToken("divide", 7, 13),
+    createToken("divide this", 7, 18),
+    createToken("divide this _", 7, 29),
+    createToken("this", 14, 18),
+    createToken("this _", 14, 29),
+    createToken("this _ _", 14, 29),
+    createToken("_ _ sentence", 29, 37),
+    createToken("_ sentence", 29, 37),
+    createToken("_ sentence into", 29, 42),
+    createToken("sentence", 29, 37),
+    createToken("sentence into", 29, 42),
+    createToken("sentence into shingles", 29, 49),
+    createToken("into", 38, 42),
+    createToken("into shingles", 38, 49),
+    createToken("shingles", 43, 49)
+  };
+  
+  public static final int[] TRI_GRAM_POSITION_INCREMENTS_POS_INCR_EQUAL_TO_N = new int[] {
+    1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1
+  };
+  
+  public static final String[] TRI_GRAM_TYPES_POS_INCR_EQUAL_TO_N = new String[] {
+    "word", "shingle", "shingle",
+    "word", "shingle", "shingle",
+    "word", "shingle", "shingle",
+    "shingle", "shingle", "shingle", "word", "shingle", "shingle",
+    "word", "shingle",
+    "word"
+  };
+  
+  public static final Token[] TRI_GRAM_TOKENS_POS_INCR_EQUAL_TO_N_WITHOUT_UNIGRAMS = new Token[] {
+    createToken("please divide", 0, 13),
+    createToken("please divide this", 0, 18),
+    createToken("divide this", 7, 18),
+    createToken("divide this _", 7, 29),
+    createToken("this _", 14, 29),
+    createToken("this _ _", 14, 29),
+    createToken("_ _ sentence", 29, 37),
+    createToken("_ sentence", 29, 37),
+    createToken("_ sentence into", 29, 42),
+    createToken("sentence into", 29, 42),
+    createToken("sentence into shingles", 29, 49),
+    createToken("into shingles", 38, 49),
+  };
+
+  public static final int[] TRI_GRAM_POSITION_INCREMENTS_POS_INCR_EQUAL_TO_N_WITHOUT_UNIGRAMS = new int[] {
+    1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1
+  };
+
+  public static final String[] TRI_GRAM_TYPES_POS_INCR_EQUAL_TO_N_WITHOUT_UNIGRAMS = new String[] {
+    "shingle", "shingle",
+    "shingle", "shingle",
+    "shingle", "shingle",
+    "shingle", "shingle", "shingle",
+    "shingle", "shingle",
+    "shingle",
+  };
+
+  public static final Token[] TEST_TOKEN_POS_INCR_GREATER_THAN_N = new Token[] {
+    createToken("please", 0, 6),
+    createToken("divide", 57, 63, 8),
+    createToken("this", 64, 68),
+    createToken("sentence", 69, 77),
+    createToken("into", 78, 82),
+    createToken("shingles", 83, 89),
+  };
+  
+  public static final Token[] TRI_GRAM_TOKENS_POS_INCR_GREATER_THAN_N = new Token[] {
+    createToken("please", 0, 6),
+    createToken("please _", 0, 57),
+    createToken("please _ _", 0, 57),
+    createToken("_ _ divide", 57, 63),
+    createToken("_ divide", 57, 63),
+    createToken("_ divide this", 57, 68),
+    createToken("divide", 57, 63),
+    createToken("divide this", 57, 68),
+    createToken("divide this sentence", 57, 77),
+    createToken("this", 64, 68),
+    createToken("this sentence", 64, 77),
+    createToken("this sentence into", 64, 82),
+    createToken("sentence", 69, 77),
+    createToken("sentence into", 69, 82),
+    createToken("sentence into shingles", 69, 89),
+    createToken("into", 78, 82),
+    createToken("into shingles", 78, 89),
+    createToken("shingles", 83, 89)
+  };
+  
+  public static final int[] TRI_GRAM_POSITION_INCREMENTS_POS_INCR_GREATER_THAN_N = new int[] {
+    1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1
+  };
+  public static final String[] TRI_GRAM_TYPES_POS_INCR_GREATER_THAN_N = new String[] {
+    "word", "shingle", "shingle",
+    "shingle",
+    "shingle", "shingle", 
+    "word", "shingle", "shingle",
+    "word", "shingle", "shingle",
+    "word", "shingle", "shingle",
+    "word", "shingle",
+    "word"
+  };
+  
+  public static final Token[] TRI_GRAM_TOKENS_POS_INCR_GREATER_THAN_N_WITHOUT_UNIGRAMS = new Token[] {
+    createToken("please _", 0, 57),
+    createToken("please _ _", 0, 57),
+    createToken("_ _ divide", 57, 63),
+    createToken("_ divide", 57, 63),
+    createToken("_ divide this", 57, 68),
+    createToken("divide this", 57, 68),
+    createToken("divide this sentence", 57, 77),
+    createToken("this sentence", 64, 77),
+    createToken("this sentence into", 64, 82),
+    createToken("sentence into", 69, 82),
+    createToken("sentence into shingles", 69, 89),
+    createToken("into shingles", 78, 89),
+  };
+
+  public static final int[] TRI_GRAM_POSITION_INCREMENTS_POS_INCR_GREATER_THAN_N_WITHOUT_UNIGRAMS = new int[] {
+    1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1
+  };
+
+  public static final String[] TRI_GRAM_TYPES_POS_INCR_GREATER_THAN_N_WITHOUT_UNIGRAMS = new String[] {
+    "shingle", "shingle",
+    "shingle", "shingle",
+    "shingle", "shingle",
+    "shingle", "shingle", "shingle", "shingle", "shingle",
+    "shingle",
+  };
+
+  @Override
+  public void setUp() throws Exception {
+    super.setUp();
+    testTokenWithHoles = new Token[] {
+      createToken("please", 0, 6),
+      createToken("divide", 7, 13),
+      createToken("sentence", 19, 27, 2),
+      createToken("shingles", 33, 39, 2),
+    };
+  }
+
+  /*
+   * Class under test for void ShingleFilter(TokenStream, int)
+   */
+  public void testBiGramFilter() throws IOException {
+    this.shingleFilterTest(2, TEST_TOKEN, BI_GRAM_TOKENS,
+                           BI_GRAM_POSITION_INCREMENTS, BI_GRAM_TYPES,
+                           true);
+  }
+
+  public void testBiGramFilterWithHoles() throws IOException {
+    this.shingleFilterTest(2, testTokenWithHoles, BI_GRAM_TOKENS_WITH_HOLES,
+                           BI_GRAM_POSITION_INCREMENTS_WITH_HOLES, 
+                           BI_GRAM_TYPES_WITH_HOLES, 
+                           true);
+  }
+
+  public void testBiGramFilterWithoutUnigrams() throws IOException {
+    this.shingleFilterTest(2, TEST_TOKEN, BI_GRAM_TOKENS_WITHOUT_UNIGRAMS,
+                           BI_GRAM_POSITION_INCREMENTS_WITHOUT_UNIGRAMS, BI_GRAM_TYPES_WITHOUT_UNIGRAMS,
+                           false);
+  }
+
+  public void testBiGramFilterWithHolesWithoutUnigrams() throws IOException {
+    this.shingleFilterTest(2, testTokenWithHoles, BI_GRAM_TOKENS_WITH_HOLES_WITHOUT_UNIGRAMS,
+                           BI_GRAM_POSITION_INCREMENTS_WITH_HOLES_WITHOUT_UNIGRAMS, BI_GRAM_TYPES_WITHOUT_UNIGRAMS,
+                           false);
+  }
+
+  public void testBiGramFilterWithSingleToken() throws IOException {
+    this.shingleFilterTest(2, TEST_SINGLE_TOKEN, SINGLE_TOKEN,
+                           SINGLE_TOKEN_INCREMENTS, SINGLE_TOKEN_TYPES,
+                           true);
+  }
+
+  public void testBiGramFilterWithSingleTokenWithoutUnigrams() throws IOException {
+    this.shingleFilterTest(2, TEST_SINGLE_TOKEN, EMPTY_TOKEN_ARRAY,
+                           EMPTY_TOKEN_INCREMENTS_ARRAY, EMPTY_TOKEN_TYPES_ARRAY,
+                           false);
+  }
+
+  public void testBiGramFilterWithEmptyTokenStream() throws IOException {
+    this.shingleFilterTest(2, EMPTY_TOKEN_ARRAY, EMPTY_TOKEN_ARRAY,
+                           EMPTY_TOKEN_INCREMENTS_ARRAY, EMPTY_TOKEN_TYPES_ARRAY,
+                           true);
+  }
+
+  public void testBiGramFilterWithEmptyTokenStreamWithoutUnigrams() throws IOException {
+    this.shingleFilterTest(2, EMPTY_TOKEN_ARRAY, EMPTY_TOKEN_ARRAY,
+                           EMPTY_TOKEN_INCREMENTS_ARRAY, EMPTY_TOKEN_TYPES_ARRAY,
+                           false);
+  }
+
+  public void testTriGramFilter() throws IOException {
+    this.shingleFilterTest(3, TEST_TOKEN, TRI_GRAM_TOKENS,
+                           TRI_GRAM_POSITION_INCREMENTS, TRI_GRAM_TYPES,
+                           true);
+  }
+  
+  public void testTriGramFilterWithoutUnigrams() throws IOException {
+    this.shingleFilterTest(3, TEST_TOKEN, TRI_GRAM_TOKENS_WITHOUT_UNIGRAMS,
+                           TRI_GRAM_POSITION_INCREMENTS_WITHOUT_UNIGRAMS, TRI_GRAM_TYPES_WITHOUT_UNIGRAMS,
+                           false);
+  }
+  
+  public void testFourGramFilter() throws IOException {
+    this.shingleFilterTest(4, TEST_TOKEN, FOUR_GRAM_TOKENS,
+        FOUR_GRAM_POSITION_INCREMENTS, FOUR_GRAM_TYPES,
+                           true);
+  }
+  
+  public void testFourGramFilterWithoutUnigrams() throws IOException {
+    this.shingleFilterTest(4, TEST_TOKEN, FOUR_GRAM_TOKENS_WITHOUT_UNIGRAMS,
+        FOUR_GRAM_POSITION_INCREMENTS_WITHOUT_UNIGRAMS,
+        FOUR_GRAM_TYPES_WITHOUT_UNIGRAMS, false);
+  }
+  
+  
+  public void testTriGramFilterMinTriGram() throws IOException {
+    this.shingleFilterTest(3, 3, TEST_TOKEN, TRI_GRAM_TOKENS_MIN_TRI_GRAM,
+                           TRI_GRAM_POSITION_INCREMENTS_MIN_TRI_GRAM,
+                           TRI_GRAM_TYPES_MIN_TRI_GRAM,
+                           true);
+  }
+  
+  public void testTriGramFilterWithoutUnigramsMinTriGram() throws IOException {
+    this.shingleFilterTest(3, 3, TEST_TOKEN, 
+                           TRI_GRAM_TOKENS_WITHOUT_UNIGRAMS_MIN_TRI_GRAM,
+                           TRI_GRAM_POSITION_INCREMENTS_WITHOUT_UNIGRAMS_MIN_TRI_GRAM, 
+                           TRI_GRAM_TYPES_WITHOUT_UNIGRAMS_MIN_TRI_GRAM,
+                           false);
+  }
+  
+  public void testFourGramFilterMinTriGram() throws IOException {
+    this.shingleFilterTest(3, 4, TEST_TOKEN, FOUR_GRAM_TOKENS_MIN_TRI_GRAM,
+                           FOUR_GRAM_POSITION_INCREMENTS_MIN_TRI_GRAM, 
+                           FOUR_GRAM_TYPES_MIN_TRI_GRAM,
+                           true);
+  }
+  
+  public void testFourGramFilterWithoutUnigramsMinTriGram() throws IOException {
+    this.shingleFilterTest(3, 4, TEST_TOKEN, 
+                           FOUR_GRAM_TOKENS_WITHOUT_UNIGRAMS_MIN_TRI_GRAM,
+                           FOUR_GRAM_POSITION_INCREMENTS_WITHOUT_UNIGRAMS_MIN_TRI_GRAM,
+                           FOUR_GRAM_TYPES_WITHOUT_UNIGRAMS_MIN_TRI_GRAM, false);
+  }
+
+  public void testFourGramFilterMinFourGram() throws IOException {
+    this.shingleFilterTest(4, 4, TEST_TOKEN, FOUR_GRAM_TOKENS_MIN_FOUR_GRAM,
+                           FOUR_GRAM_POSITION_INCREMENTS_MIN_FOUR_GRAM, 
+                           FOUR_GRAM_TYPES_MIN_FOUR_GRAM,
+                           true);
+  }
+  
+  public void testFourGramFilterWithoutUnigramsMinFourGram() throws IOException {
+    this.shingleFilterTest(4, 4, TEST_TOKEN, 
+                           FOUR_GRAM_TOKENS_WITHOUT_UNIGRAMS_MIN_FOUR_GRAM,
+                           FOUR_GRAM_POSITION_INCREMENTS_WITHOUT_UNIGRAMS_MIN_FOUR_GRAM,
+                           FOUR_GRAM_TYPES_WITHOUT_UNIGRAMS_MIN_FOUR_GRAM, false);
+  }
+ 
+  public void testBiGramFilterNoSeparator() throws IOException {
+    this.shingleFilterTest("", 2, 2, TEST_TOKEN, BI_GRAM_TOKENS_NO_SEPARATOR,
+                           BI_GRAM_POSITION_INCREMENTS_NO_SEPARATOR, 
+                           BI_GRAM_TYPES_NO_SEPARATOR, true);
+  }
+
+  public void testBiGramFilterWithoutUnigramsNoSeparator() throws IOException {
+    this.shingleFilterTest("", 2, 2, TEST_TOKEN, 
+                           BI_GRAM_TOKENS_WITHOUT_UNIGRAMS_NO_SEPARATOR,
+                           BI_GRAM_POSITION_INCREMENTS_WITHOUT_UNIGRAMS_NO_SEPARATOR, 
+                           BI_GRAM_TYPES_WITHOUT_UNIGRAMS_NO_SEPARATOR,
+                           false);
+  }
+  public void testTriGramFilterNoSeparator() throws IOException {
+    this.shingleFilterTest("", 2, 3, TEST_TOKEN, TRI_GRAM_TOKENS_NO_SEPARATOR,
+                           TRI_GRAM_POSITION_INCREMENTS_NO_SEPARATOR, 
+                           TRI_GRAM_TYPES_NO_SEPARATOR, true);
+  }
+  
+  public void testTriGramFilterWithoutUnigramsNoSeparator() throws IOException {
+    this.shingleFilterTest("", 2, 3, TEST_TOKEN, 
+                           TRI_GRAM_TOKENS_WITHOUT_UNIGRAMS_NO_SEPARATOR,
+                           TRI_GRAM_POSITION_INCREMENTS_WITHOUT_UNIGRAMS_NO_SEPARATOR,
+                           TRI_GRAM_TYPES_WITHOUT_UNIGRAMS_NO_SEPARATOR, false);
+  }
+  
+  public void testBiGramFilterAltSeparator() throws IOException {
+    this.shingleFilterTest("<SEP>", 2, 2, TEST_TOKEN, BI_GRAM_TOKENS_ALT_SEPARATOR,
+                           BI_GRAM_POSITION_INCREMENTS_ALT_SEPARATOR, 
+                           BI_GRAM_TYPES_ALT_SEPARATOR, true);
+  }
+
+  public void testBiGramFilterWithoutUnigramsAltSeparator() throws IOException {
+    this.shingleFilterTest("<SEP>", 2, 2, TEST_TOKEN, 
+                           BI_GRAM_TOKENS_WITHOUT_UNIGRAMS_ALT_SEPARATOR,
+                           BI_GRAM_POSITION_INCREMENTS_WITHOUT_UNIGRAMS_ALT_SEPARATOR, 
+                           BI_GRAM_TYPES_WITHOUT_UNIGRAMS_ALT_SEPARATOR,
+                           false);
+  }
+  public void testTriGramFilterAltSeparator() throws IOException {
+    this.shingleFilterTest("<SEP>", 2, 3, TEST_TOKEN, TRI_GRAM_TOKENS_ALT_SEPARATOR,
+                           TRI_GRAM_POSITION_INCREMENTS_ALT_SEPARATOR, 
+                           TRI_GRAM_TYPES_ALT_SEPARATOR, true);
+  }
+  
+  public void testTriGramFilterWithoutUnigramsAltSeparator() throws IOException {
+    this.shingleFilterTest("<SEP>", 2, 3, TEST_TOKEN, 
+                           TRI_GRAM_TOKENS_WITHOUT_UNIGRAMS_ALT_SEPARATOR,
+                           TRI_GRAM_POSITION_INCREMENTS_WITHOUT_UNIGRAMS_ALT_SEPARATOR,
+                           TRI_GRAM_TYPES_WITHOUT_UNIGRAMS_ALT_SEPARATOR, false);
+  }
+
+  public void testTriGramFilterNullSeparator() throws IOException {
+    this.shingleFilterTest(null, 2, 3, TEST_TOKEN, TRI_GRAM_TOKENS_NULL_SEPARATOR,
+                           TRI_GRAM_POSITION_INCREMENTS_NULL_SEPARATOR, 
+                           TRI_GRAM_TYPES_NULL_SEPARATOR, true);
+  }
+
+  public void testPositionIncrementEqualToN() throws IOException {
+    this.shingleFilterTest(2, 3, TEST_TOKEN_POS_INCR_EQUAL_TO_N, TRI_GRAM_TOKENS_POS_INCR_EQUAL_TO_N,
+                           TRI_GRAM_POSITION_INCREMENTS_POS_INCR_EQUAL_TO_N, 
+                           TRI_GRAM_TYPES_POS_INCR_EQUAL_TO_N, true);
+  }
+  
+  public void testPositionIncrementEqualToNWithoutUnigrams() throws IOException {
+    this.shingleFilterTest(2, 3, TEST_TOKEN_POS_INCR_EQUAL_TO_N, TRI_GRAM_TOKENS_POS_INCR_EQUAL_TO_N_WITHOUT_UNIGRAMS,
+                           TRI_GRAM_POSITION_INCREMENTS_POS_INCR_EQUAL_TO_N_WITHOUT_UNIGRAMS, 
+                           TRI_GRAM_TYPES_POS_INCR_EQUAL_TO_N_WITHOUT_UNIGRAMS, false);
+  }
+  
+  
+  public void testPositionIncrementGreaterThanN() throws IOException {
+    this.shingleFilterTest(2, 3, TEST_TOKEN_POS_INCR_GREATER_THAN_N, TRI_GRAM_TOKENS_POS_INCR_GREATER_THAN_N,
+                           TRI_GRAM_POSITION_INCREMENTS_POS_INCR_GREATER_THAN_N, 
+                           TRI_GRAM_TYPES_POS_INCR_GREATER_THAN_N, true);
+  }
+  
+  public void testPositionIncrementGreaterThanNWithoutUnigrams() throws IOException {
+    this.shingleFilterTest(2, 3, TEST_TOKEN_POS_INCR_GREATER_THAN_N, TRI_GRAM_TOKENS_POS_INCR_GREATER_THAN_N_WITHOUT_UNIGRAMS,
+                           TRI_GRAM_POSITION_INCREMENTS_POS_INCR_GREATER_THAN_N_WITHOUT_UNIGRAMS, 
+                           TRI_GRAM_TYPES_POS_INCR_GREATER_THAN_N_WITHOUT_UNIGRAMS, false);
+  }
+  
+  public void testReset() throws Exception {
+    Tokenizer wsTokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("please divide this sentence"));
+    TokenStream filter = new ShingleFilter(wsTokenizer, 2);
+    assertTokenStreamContents(filter,
+      new String[]{"please","please divide","divide","divide this","this","this sentence","sentence"},
+      new int[]{0,0,7,7,14,14,19}, new int[]{6,13,13,18,18,27,27},
+      new String[]{TypeAttributeImpl.DEFAULT_TYPE,"shingle",TypeAttributeImpl.DEFAULT_TYPE,"shingle",TypeAttributeImpl.DEFAULT_TYPE,"shingle",TypeAttributeImpl.DEFAULT_TYPE},
+      new int[]{1,0,1,0,1,0,1}
+    );
+    wsTokenizer.reset(new StringReader("please divide this sentence"));
+    assertTokenStreamContents(filter,
+      new String[]{"please","please divide","divide","divide this","this","this sentence","sentence"},
+      new int[]{0,0,7,7,14,14,19}, new int[]{6,13,13,18,18,27,27},
+      new String[]{TypeAttributeImpl.DEFAULT_TYPE,"shingle",TypeAttributeImpl.DEFAULT_TYPE,"shingle",TypeAttributeImpl.DEFAULT_TYPE,"shingle",TypeAttributeImpl.DEFAULT_TYPE},
+      new int[]{1,0,1,0,1,0,1}
+    );
+  }
+
+  public void testOutputUnigramsIfNoShinglesSingleTokenCase() throws IOException {
+    // Single token input with outputUnigrams==false is the primary case where
+    // enabling this option should alter program behavior.
+    this.shingleFilterTest(2, 2, TEST_SINGLE_TOKEN, SINGLE_TOKEN,
+                           SINGLE_TOKEN_INCREMENTS, SINGLE_TOKEN_TYPES,
+                           false, true);
+  }
+ 
+  public void testOutputUnigramsIfNoShinglesWithSimpleBigram() throws IOException {
+    // Here we expect the same result as with testBiGramFilter().
+    this.shingleFilterTest(2, 2, TEST_TOKEN, BI_GRAM_TOKENS,
+                           BI_GRAM_POSITION_INCREMENTS, BI_GRAM_TYPES,
+                           true, true);
+  }
+
+  public void testOutputUnigramsIfNoShinglesWithSimpleUnigramlessBigram() throws IOException {
+    // Here we expect the same result as with testBiGramFilterWithoutUnigrams().
+    this.shingleFilterTest(2, 2, TEST_TOKEN, BI_GRAM_TOKENS_WITHOUT_UNIGRAMS,
+                           BI_GRAM_POSITION_INCREMENTS_WITHOUT_UNIGRAMS, BI_GRAM_TYPES_WITHOUT_UNIGRAMS,
+                           false, true);
+  }
+
+  public void testOutputUnigramsIfNoShinglesWithMultipleInputTokens() throws IOException {
+    // Test when the minimum shingle size is greater than the number of input tokens
+    this.shingleFilterTest(7, 7, TEST_TOKEN, TEST_TOKEN, 
+                           UNIGRAM_ONLY_POSITION_INCREMENTS, UNIGRAM_ONLY_TYPES,
+                           false, true);
+  }
+
+  protected void shingleFilterTest(int maxSize, Token[] tokensToShingle, Token[] tokensToCompare,
+                                   int[] positionIncrements, String[] types,
+                                   boolean outputUnigrams)
+    throws IOException {
+
+    ShingleFilter filter = new ShingleFilter(new TestTokenStream(tokensToShingle), maxSize);
+    filter.setOutputUnigrams(outputUnigrams);
+    shingleFilterTestCommon(filter, tokensToCompare, positionIncrements, types);
+  }
+
+  protected void shingleFilterTest(int minSize, int maxSize, Token[] tokensToShingle, 
+                                   Token[] tokensToCompare, int[] positionIncrements,
+                                   String[] types, boolean outputUnigrams)
+    throws IOException {
+    ShingleFilter filter 
+      = new ShingleFilter(new TestTokenStream(tokensToShingle), minSize, maxSize);
+    filter.setOutputUnigrams(outputUnigrams);
+    shingleFilterTestCommon(filter, tokensToCompare, positionIncrements, types);
+  }
+
+  protected void shingleFilterTest(int minSize, int maxSize, Token[] tokensToShingle, 
+                                   Token[] tokensToCompare, int[] positionIncrements,
+                                   String[] types, boolean outputUnigrams, 
+                                   boolean outputUnigramsIfNoShingles)
+    throws IOException {
+    ShingleFilter filter 
+      = new ShingleFilter(new TestTokenStream(tokensToShingle), minSize, maxSize);
+    filter.setOutputUnigrams(outputUnigrams);
+    filter.setOutputUnigramsIfNoShingles(outputUnigramsIfNoShingles);
+    shingleFilterTestCommon(filter, tokensToCompare, positionIncrements, types);
+  }
+
+  protected void shingleFilterTest(String tokenSeparator, int minSize, int maxSize, Token[] tokensToShingle, 
+                                   Token[] tokensToCompare, int[] positionIncrements,
+                                   String[] types, boolean outputUnigrams)
+    throws IOException {
+    ShingleFilter filter 
+      = new ShingleFilter(new TestTokenStream(tokensToShingle), minSize, maxSize);
+    filter.setTokenSeparator(tokenSeparator);
+    filter.setOutputUnigrams(outputUnigrams);
+    shingleFilterTestCommon(filter, tokensToCompare, positionIncrements, types);
+  }
+
+  protected void shingleFilterTestCommon(ShingleFilter filter,
+                                         Token[] tokensToCompare,
+                                         int[] positionIncrements,
+                                         String[] types)
+    throws IOException {
+    String text[] = new String[tokensToCompare.length];
+    int startOffsets[] = new int[tokensToCompare.length];
+    int endOffsets[] = new int[tokensToCompare.length];
+    
+    for (int i = 0; i < tokensToCompare.length; i++) {
+      text[i] = new String(tokensToCompare[i].buffer(),0, tokensToCompare[i].length());
+      startOffsets[i] = tokensToCompare[i].startOffset();
+      endOffsets[i] = tokensToCompare[i].endOffset();
+    }
+    
+    assertTokenStreamContents(filter, text, startOffsets, endOffsets, types, positionIncrements);
+  }
+  
+  private static Token createToken(String term, int start, int offset) {
+    return createToken(term, start, offset, 1);
+  }
+
+  private static Token createToken
+    (String term, int start, int offset, int positionIncrement)
+  {
+    Token token = new Token(start, offset);
+    token.copyBuffer(term.toCharArray(), 0, term.length());
+    token.setPositionIncrement(positionIncrement);
+    return token;
+  }
+}