X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/analysis/NumericTokenStream.java?ds=sidebyside diff --git a/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/analysis/NumericTokenStream.java b/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/analysis/NumericTokenStream.java new file mode 100644 index 0000000..f84328c --- /dev/null +++ b/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/analysis/NumericTokenStream.java @@ -0,0 +1,252 @@ +package org.apache.lucene.analysis; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.util.AttributeSource; +import org.apache.lucene.util.NumericUtils; +import org.apache.lucene.document.NumericField; // for javadocs +import org.apache.lucene.search.NumericRangeQuery; // for javadocs +import org.apache.lucene.search.NumericRangeFilter; // for javadocs +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.TypeAttribute; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; + +/** + * Expert: This class provides a {@link TokenStream} + * for indexing numeric values that can be used by {@link + * NumericRangeQuery} or {@link NumericRangeFilter}. + * + *

Note that for simple usage, {@link NumericField} is + * recommended. {@link NumericField} disables norms and + * term freqs, as they are not usually needed during + * searching. If you need to change these settings, you + * should use this class. + * + *

See {@link NumericField} for capabilities of fields + * indexed numerically.

+ * + *

Here's an example usage, for an int field: + * + *

+ *  Field field = new Field(name, new NumericTokenStream(precisionStep).setIntValue(value));
+ *  field.setOmitNorms(true);
+ *  field.setIndexOptions(IndexOptions.DOCS_ONLY);
+ *  document.add(field);
+ * 
+ * + *

For optimal performance, re-use the TokenStream and Field instance + * for more than one document: + * + *

+ *  NumericTokenStream stream = new NumericTokenStream(precisionStep);
+ *  Field field = new Field(name, stream);
+ *  field.setOmitNorms(true);
+ *  field.setIndexOptions(IndexOptions.DOCS_ONLY);
+ *  Document document = new Document();
+ *  document.add(field);
+ *
+ *  for(all documents) {
+ *    stream.setIntValue(value)
+ *    writer.addDocument(document);
+ *  }
+ * 
+ * + *

This stream is not intended to be used in analyzers; + * it's more for iterating the different precisions during + * indexing a specific numeric value.

+ + *

NOTE: as token streams are only consumed once + * the document is added to the index, if you index more + * than one numeric field, use a separate NumericTokenStream + * instance for each.

+ * + *

See {@link NumericRangeQuery} for more details on the + * precisionStep + * parameter as well as how numeric fields work under the hood.

+ * + * @since 2.9 + */ +public final class NumericTokenStream extends TokenStream { + + /** The full precision token gets this token type assigned. */ + public static final String TOKEN_TYPE_FULL_PREC = "fullPrecNumeric"; + + /** The lower precision tokens gets this token type assigned. */ + public static final String TOKEN_TYPE_LOWER_PREC = "lowerPrecNumeric"; + + /** + * Creates a token stream for numeric values using the default precisionStep + * {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). The stream is not yet initialized, + * before using set a value using the various set???Value() methods. + */ + public NumericTokenStream() { + this(NumericUtils.PRECISION_STEP_DEFAULT); + } + + /** + * Creates a token stream for numeric values with the specified + * precisionStep. The stream is not yet initialized, + * before using set a value using the various set???Value() methods. + */ + public NumericTokenStream(final int precisionStep) { + super(); + this.precisionStep = precisionStep; + if (precisionStep < 1) + throw new IllegalArgumentException("precisionStep must be >=1"); + } + + /** + * Expert: Creates a token stream for numeric values with the specified + * precisionStep using the given {@link AttributeSource}. + * The stream is not yet initialized, + * before using set a value using the various set???Value() methods. + */ + public NumericTokenStream(AttributeSource source, final int precisionStep) { + super(source); + this.precisionStep = precisionStep; + if (precisionStep < 1) + throw new IllegalArgumentException("precisionStep must be >=1"); + } + + /** + * Expert: Creates a token stream for numeric values with the specified + * precisionStep using the given + * {@link org.apache.lucene.util.AttributeSource.AttributeFactory}. + * The stream is not yet initialized, + * before using set a value using the various set???Value() methods. + */ + public NumericTokenStream(AttributeFactory factory, final int precisionStep) { + super(factory); + this.precisionStep = precisionStep; + if (precisionStep < 1) + throw new IllegalArgumentException("precisionStep must be >=1"); + } + + /** + * Initializes the token stream with the supplied long value. + * @param value the value, for which this TokenStream should enumerate tokens. + * @return this instance, because of this you can use it the following way: + * new Field(name, new NumericTokenStream(precisionStep).setLongValue(value)) + */ + public NumericTokenStream setLongValue(final long value) { + this.value = value; + valSize = 64; + shift = 0; + return this; + } + + /** + * Initializes the token stream with the supplied int value. + * @param value the value, for which this TokenStream should enumerate tokens. + * @return this instance, because of this you can use it the following way: + * new Field(name, new NumericTokenStream(precisionStep).setIntValue(value)) + */ + public NumericTokenStream setIntValue(final int value) { + this.value = value; + valSize = 32; + shift = 0; + return this; + } + + /** + * Initializes the token stream with the supplied double value. + * @param value the value, for which this TokenStream should enumerate tokens. + * @return this instance, because of this you can use it the following way: + * new Field(name, new NumericTokenStream(precisionStep).setDoubleValue(value)) + */ + public NumericTokenStream setDoubleValue(final double value) { + this.value = NumericUtils.doubleToSortableLong(value); + valSize = 64; + shift = 0; + return this; + } + + /** + * Initializes the token stream with the supplied float value. + * @param value the value, for which this TokenStream should enumerate tokens. + * @return this instance, because of this you can use it the following way: + * new Field(name, new NumericTokenStream(precisionStep).setFloatValue(value)) + */ + public NumericTokenStream setFloatValue(final float value) { + this.value = NumericUtils.floatToSortableInt(value); + valSize = 32; + shift = 0; + return this; + } + + @Override + public void reset() { + if (valSize == 0) + throw new IllegalStateException("call set???Value() before usage"); + shift = 0; + } + + @Override + public boolean incrementToken() { + if (valSize == 0) + throw new IllegalStateException("call set???Value() before usage"); + if (shift >= valSize) + return false; + + clearAttributes(); + final char[] buffer; + switch (valSize) { + case 64: + buffer = termAtt.resizeBuffer(NumericUtils.BUF_SIZE_LONG); + termAtt.setLength(NumericUtils.longToPrefixCoded(value, shift, buffer)); + break; + + case 32: + buffer = termAtt.resizeBuffer(NumericUtils.BUF_SIZE_INT); + termAtt.setLength(NumericUtils.intToPrefixCoded((int) value, shift, buffer)); + break; + + default: + // should not happen + throw new IllegalArgumentException("valSize must be 32 or 64"); + } + + typeAtt.setType((shift == 0) ? TOKEN_TYPE_FULL_PREC : TOKEN_TYPE_LOWER_PREC); + posIncrAtt.setPositionIncrement((shift == 0) ? 1 : 0); + shift += precisionStep; + return true; + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder("(numeric,valSize=").append(valSize); + sb.append(",precisionStep=").append(precisionStep).append(')'); + return sb.toString(); + } + + /** Returns the precision step. */ + public int getPrecisionStep() { + return precisionStep; + } + + // members + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class); + private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class); + + private int shift = 0, valSize = 0; // valSize==0 means not initialized + private final int precisionStep; + + private long value = 0L; +}