X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/document/AbstractField.java diff --git a/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/document/AbstractField.java b/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/document/AbstractField.java new file mode 100755 index 0000000..2a0074d --- /dev/null +++ b/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/document/AbstractField.java @@ -0,0 +1,312 @@ +package org.apache.lucene.document; +/** + * Copyright 2006 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.search.PhraseQuery; // for javadocs +import org.apache.lucene.search.spans.SpanQuery; // for javadocs +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.index.FieldInvertState; +import org.apache.lucene.util.StringHelper; // for javadocs +import org.apache.lucene.index.FieldInfo.IndexOptions; +import org.apache.lucene.index.FieldInvertState; // for javadocs + + +/** + * + * + **/ +public abstract class AbstractField implements Fieldable { + + protected String name = "body"; + protected boolean storeTermVector = false; + protected boolean storeOffsetWithTermVector = false; + protected boolean storePositionWithTermVector = false; + protected boolean omitNorms = false; + protected boolean isStored = false; + protected boolean isIndexed = true; + protected boolean isTokenized = true; + protected boolean isBinary = false; + protected boolean lazy = false; + protected IndexOptions indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; + protected float boost = 1.0f; + // the data object for all different kind of field values + protected Object fieldsData = null; + // pre-analyzed tokenStream for indexed fields + protected TokenStream tokenStream; + // length/offset for all primitive types + protected int binaryLength; + protected int binaryOffset; + + protected AbstractField() + { + } + + protected AbstractField(String name, Field.Store store, Field.Index index, Field.TermVector termVector) { + if (name == null) + throw new NullPointerException("name cannot be null"); + this.name = StringHelper.intern(name); // field names are interned + + this.isStored = store.isStored(); + this.isIndexed = index.isIndexed(); + this.isTokenized = index.isAnalyzed(); + this.omitNorms = index.omitNorms(); + + this.isBinary = false; + + setStoreTermVector(termVector); + } + + /** Sets the boost factor hits on this field. This value will be + * multiplied into the score of all hits on this this field of this + * document. + * + *

The boost is multiplied by {@link org.apache.lucene.document.Document#getBoost()} of the document + * containing this field. If a document has multiple fields with the same + * name, all such values are multiplied together. This product is then + * used to compute the norm factor for the field. By + * default, in the {@link + * org.apache.lucene.search.Similarity#computeNorm(String, + * FieldInvertState)} method, the boost value is multiplied + * by the {@link + * org.apache.lucene.search.Similarity#lengthNorm(String, + * int)} and then + * rounded by {@link org.apache.lucene.search.Similarity#encodeNormValue(float)} before it is stored in the + * index. One should attempt to ensure that this product does not overflow + * the range of that encoding. + * + * @see org.apache.lucene.document.Document#setBoost(float) + * @see org.apache.lucene.search.Similarity#computeNorm(String, FieldInvertState) + * @see org.apache.lucene.search.Similarity#encodeNormValue(float) + */ + public void setBoost(float boost) { + this.boost = boost; + } + + /** Returns the boost factor for hits for this field. + * + *

The default value is 1.0. + * + *

Note: this value is not stored directly with the document in the index. + * Documents returned from {@link org.apache.lucene.index.IndexReader#document(int)} and + * {@link org.apache.lucene.search.Searcher#doc(int)} may thus not have the same value present as when + * this field was indexed. + * + * @see #setBoost(float) + */ + public float getBoost() { + return boost; + } + + /** Returns the name of the field as an interned string. + * For example "date", "title", "body", ... + */ + public String name() { return name; } + + protected void setStoreTermVector(Field.TermVector termVector) { + this.storeTermVector = termVector.isStored(); + this.storePositionWithTermVector = termVector.withPositions(); + this.storeOffsetWithTermVector = termVector.withOffsets(); + } + + /** True iff the value of the field is to be stored in the index for return + with search hits. It is an error for this to be true if a field is + Reader-valued. */ + public final boolean isStored() { return isStored; } + + /** True iff the value of the field is to be indexed, so that it may be + searched on. */ + public final boolean isIndexed() { return isIndexed; } + + /** True iff the value of the field should be tokenized as text prior to + indexing. Un-tokenized fields are indexed as a single word and may not be + Reader-valued. */ + public final boolean isTokenized() { return isTokenized; } + + /** True iff the term or terms used to index this field are stored as a term + * vector, available from {@link org.apache.lucene.index.IndexReader#getTermFreqVector(int,String)}. + * These methods do not provide access to the original content of the field, + * only to terms used to index it. If the original content must be + * preserved, use the stored attribute instead. + * + * @see org.apache.lucene.index.IndexReader#getTermFreqVector(int, String) + */ + public final boolean isTermVectorStored() { return storeTermVector; } + + /** + * True iff terms are stored as term vector together with their offsets + * (start and end position in source text). + */ + public boolean isStoreOffsetWithTermVector(){ + return storeOffsetWithTermVector; + } + + /** + * True iff terms are stored as term vector together with their token positions. + */ + public boolean isStorePositionWithTermVector(){ + return storePositionWithTermVector; + } + + /** True iff the value of the filed is stored as binary */ + public final boolean isBinary() { + return isBinary; + } + + + /** + * Return the raw byte[] for the binary field. Note that + * you must also call {@link #getBinaryLength} and {@link + * #getBinaryOffset} to know which range of bytes in this + * returned array belong to the field. + * @return reference to the Field value as byte[]. + */ + public byte[] getBinaryValue() { + return getBinaryValue(null); + } + + public byte[] getBinaryValue(byte[] result){ + if (isBinary || fieldsData instanceof byte[]) + return (byte[]) fieldsData; + else + return null; + } + + /** + * Returns length of byte[] segment that is used as value, if Field is not binary + * returned value is undefined + * @return length of byte[] segment that represents this Field value + */ + public int getBinaryLength() { + if (isBinary) { + return binaryLength; + } else if (fieldsData instanceof byte[]) + return ((byte[]) fieldsData).length; + else + return 0; + } + + /** + * Returns offset into byte[] segment that is used as value, if Field is not binary + * returned value is undefined + * @return index of the first character in byte[] segment that represents this Field value + */ + public int getBinaryOffset() { + return binaryOffset; + } + + /** True if norms are omitted for this indexed field */ + public boolean getOmitNorms() { return omitNorms; } + + /** @deprecated use {@link #getIndexOptions()} instead. */ + @Deprecated + public boolean getOmitTermFreqAndPositions() { return indexOptions == IndexOptions.DOCS_ONLY; } + + /** @see #setIndexOptions */ + public IndexOptions getIndexOptions() { return indexOptions; } + + /** Expert: + * + * If set, omit normalization factors associated with this indexed field. + * This effectively disables indexing boosts and length normalization for this field. + */ + public void setOmitNorms(boolean omitNorms) { this.omitNorms=omitNorms; } + + /** @deprecated use {@link #setIndexOptions(FieldInfo.IndexOptions)} instead. */ + @Deprecated + public void setOmitTermFreqAndPositions(boolean omitTermFreqAndPositions) { + if (omitTermFreqAndPositions) { + indexOptions = IndexOptions.DOCS_ONLY; + } else { + indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; + } + } + + /** Expert: + * + * If set, omit term freq, and optionally also positions and payloads from + * postings for this field. + * + *

NOTE: While this option reduces storage space + * required in the index, it also means any query + * requiring positional information, such as {@link + * PhraseQuery} or {@link SpanQuery} subclasses will + * silently fail to find results. + */ + public void setIndexOptions(IndexOptions indexOptions) { this.indexOptions=indexOptions; } + + public boolean isLazy() { + return lazy; + } + + /** Prints a Field for human consumption. */ + @Override + public final String toString() { + StringBuilder result = new StringBuilder(); + if (isStored) { + result.append("stored"); + } + if (isIndexed) { + if (result.length() > 0) + result.append(","); + result.append("indexed"); + } + if (isTokenized) { + if (result.length() > 0) + result.append(","); + result.append("tokenized"); + } + if (storeTermVector) { + if (result.length() > 0) + result.append(","); + result.append("termVector"); + } + if (storeOffsetWithTermVector) { + if (result.length() > 0) + result.append(","); + result.append("termVectorOffsets"); + } + if (storePositionWithTermVector) { + if (result.length() > 0) + result.append(","); + result.append("termVectorPosition"); + } + if (isBinary) { + if (result.length() > 0) + result.append(","); + result.append("binary"); + } + if (omitNorms) { + result.append(",omitNorms"); + } + if (indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) { + result.append(",indexOptions="); + result.append(indexOptions); + } + if (lazy){ + result.append(",lazy"); + } + result.append('<'); + result.append(name); + result.append(':'); + + if (fieldsData != null && lazy == false) { + result.append(fieldsData); + } + + result.append('>'); + return result.toString(); + } +}