--- /dev/null
+package org.apache.lucene.document;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.index.IndexWriter; // for javadoc
+import org.apache.lucene.util.StringHelper;
+
+import java.io.Reader;
+import java.io.Serializable;
+
+import org.apache.lucene.index.FieldInfo.IndexOptions;
+
+/**
+ A field is a section of a Document. Each field has two parts, a name and a
+ value. Values may be free text, provided as a String or as a Reader, or they
+ may be atomic keywords, which are not further processed. Such keywords may
+ be used to represent dates, urls, etc. Fields are optionally stored in the
+ index, so that they may be returned with hits on the document.
+ */
+
+public final class Field extends AbstractField implements Fieldable, Serializable {
+
+ /** Specifies whether and how a field should be stored. */
+ public static enum Store {
+
+ /** Store the original field value in the index. This is useful for short texts
+ * like a document's title which should be displayed with the results. The
+ * value is stored in its original form, i.e. no analyzer is used before it is
+ * stored.
+ */
+ YES {
+ @Override
+ public boolean isStored() { return true; }
+ },
+
+ /** Do not store the field value in the index. */
+ NO {
+ @Override
+ public boolean isStored() { return false; }
+ };
+
+ public abstract boolean isStored();
+ }
+
+ /** Specifies whether and how a field should be indexed. */
+ public static enum Index {
+
+ /** Do not index the field value. This field can thus not be searched,
+ * but one can still access its contents provided it is
+ * {@link Field.Store stored}. */
+ NO {
+ @Override
+ public boolean isIndexed() { return false; }
+ @Override
+ public boolean isAnalyzed() { return false; }
+ @Override
+ public boolean omitNorms() { return true; }
+ },
+
+ /** Index the tokens produced by running the field's
+ * value through an Analyzer. This is useful for
+ * common text. */
+ ANALYZED {
+ @Override
+ public boolean isIndexed() { return true; }
+ @Override
+ public boolean isAnalyzed() { return true; }
+ @Override
+ public boolean omitNorms() { return false; }
+ },
+
+ /** Index the field's value without using an Analyzer, so it can be searched.
+ * As no analyzer is used the value will be stored as a single term. This is
+ * useful for unique Ids like product numbers.
+ */
+ NOT_ANALYZED {
+ @Override
+ public boolean isIndexed() { return true; }
+ @Override
+ public boolean isAnalyzed() { return false; }
+ @Override
+ public boolean omitNorms() { return false; }
+ },
+
+ /** Expert: Index the field's value without an Analyzer,
+ * and also disable the indexing of norms. Note that you
+ * can also separately enable/disable norms by calling
+ * {@link Field#setOmitNorms}. No norms means that
+ * index-time field and document boosting and field
+ * length normalization are disabled. The benefit is
+ * less memory usage as norms take up one byte of RAM
+ * per indexed field for every document in the index,
+ * during searching. Note that once you index a given
+ * field <i>with</i> norms enabled, disabling norms will
+ * have no effect. In other words, for this to have the
+ * above described effect on a field, all instances of
+ * that field must be indexed with NOT_ANALYZED_NO_NORMS
+ * from the beginning. */
+ NOT_ANALYZED_NO_NORMS {
+ @Override
+ public boolean isIndexed() { return true; }
+ @Override
+ public boolean isAnalyzed() { return false; }
+ @Override
+ public boolean omitNorms() { return true; }
+ },
+
+ /** Expert: Index the tokens produced by running the
+ * field's value through an Analyzer, and also
+ * separately disable the storing of norms. See
+ * {@link #NOT_ANALYZED_NO_NORMS} for what norms are
+ * and why you may want to disable them. */
+ ANALYZED_NO_NORMS {
+ @Override
+ public boolean isIndexed() { return true; }
+ @Override
+ public boolean isAnalyzed() { return true; }
+ @Override
+ public boolean omitNorms() { return true; }
+ };
+
+ /** Get the best representation of the index given the flags. */
+ public static Index toIndex(boolean indexed, boolean analyzed) {
+ return toIndex(indexed, analyzed, false);
+ }
+
+ /** Expert: Get the best representation of the index given the flags. */
+ public static Index toIndex(boolean indexed, boolean analyzed, boolean omitNorms) {
+
+ // If it is not indexed nothing else matters
+ if (!indexed) {
+ return Index.NO;
+ }
+
+ // typical, non-expert
+ if (!omitNorms) {
+ if (analyzed) {
+ return Index.ANALYZED;
+ }
+ return Index.NOT_ANALYZED;
+ }
+
+ // Expert: Norms omitted
+ if (analyzed) {
+ return Index.ANALYZED_NO_NORMS;
+ }
+ return Index.NOT_ANALYZED_NO_NORMS;
+ }
+
+ public abstract boolean isIndexed();
+ public abstract boolean isAnalyzed();
+ public abstract boolean omitNorms();
+ }
+
+ /** Specifies whether and how a field should have term vectors. */
+ public static enum TermVector {
+
+ /** Do not store term vectors.
+ */
+ NO {
+ @Override
+ public boolean isStored() { return false; }
+ @Override
+ public boolean withPositions() { return false; }
+ @Override
+ public boolean withOffsets() { return false; }
+ },
+
+ /** Store the term vectors of each document. A term vector is a list
+ * of the document's terms and their number of occurrences in that document. */
+ YES {
+ @Override
+ public boolean isStored() { return true; }
+ @Override
+ public boolean withPositions() { return false; }
+ @Override
+ public boolean withOffsets() { return false; }
+ },
+
+ /**
+ * Store the term vector + token position information
+ *
+ * @see #YES
+ */
+ WITH_POSITIONS {
+ @Override
+ public boolean isStored() { return true; }
+ @Override
+ public boolean withPositions() { return true; }
+ @Override
+ public boolean withOffsets() { return false; }
+ },
+
+ /**
+ * Store the term vector + Token offset information
+ *
+ * @see #YES
+ */
+ WITH_OFFSETS {
+ @Override
+ public boolean isStored() { return true; }
+ @Override
+ public boolean withPositions() { return false; }
+ @Override
+ public boolean withOffsets() { return true; }
+ },
+
+ /**
+ * Store the term vector + Token position and offset information
+ *
+ * @see #YES
+ * @see #WITH_POSITIONS
+ * @see #WITH_OFFSETS
+ */
+ WITH_POSITIONS_OFFSETS {
+ @Override
+ public boolean isStored() { return true; }
+ @Override
+ public boolean withPositions() { return true; }
+ @Override
+ public boolean withOffsets() { return true; }
+ };
+
+ /** Get the best representation of a TermVector given the flags. */
+ public static TermVector toTermVector(boolean stored, boolean withOffsets, boolean withPositions) {
+
+ // If it is not stored, nothing else matters.
+ if (!stored) {
+ return TermVector.NO;
+ }
+
+ if (withOffsets) {
+ if (withPositions) {
+ return Field.TermVector.WITH_POSITIONS_OFFSETS;
+ }
+ return Field.TermVector.WITH_OFFSETS;
+ }
+
+ if (withPositions) {
+ return Field.TermVector.WITH_POSITIONS;
+ }
+ return Field.TermVector.YES;
+ }
+
+ public abstract boolean isStored();
+ public abstract boolean withPositions();
+ public abstract boolean withOffsets();
+ }
+
+
+ /** The value of the field as a String, or null. If null, the Reader value or
+ * binary value is used. Exactly one of stringValue(),
+ * readerValue(), and getBinaryValue() must be set. */
+ public String stringValue() { return fieldsData instanceof String ? (String)fieldsData : null; }
+
+ /** The value of the field as a Reader, or null. If null, the String value or
+ * binary value is used. Exactly one of stringValue(),
+ * readerValue(), and getBinaryValue() must be set. */
+ public Reader readerValue() { return fieldsData instanceof Reader ? (Reader)fieldsData : null; }
+
+ /** The TokesStream for this field to be used when indexing, or null. If null, the Reader value
+ * or String value is analyzed to produce the indexed tokens. */
+ public TokenStream tokenStreamValue() { return tokenStream; }
+
+
+ /** <p>Expert: change the value of this field. This can
+ * be used during indexing to re-use a single Field
+ * instance to improve indexing speed by avoiding GC cost
+ * of new'ing and reclaiming Field instances. Typically
+ * a single {@link Document} instance is re-used as
+ * well. This helps most on small documents.</p>
+ *
+ * <p>Each Field instance should only be used once
+ * within a single {@link Document} instance. See <a
+ * href="http://wiki.apache.org/lucene-java/ImproveIndexingSpeed">ImproveIndexingSpeed</a>
+ * for details.</p> */
+ public void setValue(String value) {
+ if (isBinary) {
+ throw new IllegalArgumentException("cannot set a String value on a binary field");
+ }
+ fieldsData = value;
+ }
+
+ /** Expert: change the value of this field. See <a href="#setValue(java.lang.String)">setValue(String)</a>. */
+ public void setValue(Reader value) {
+ if (isBinary) {
+ throw new IllegalArgumentException("cannot set a Reader value on a binary field");
+ }
+ if (isStored) {
+ throw new IllegalArgumentException("cannot set a Reader value on a stored field");
+ }
+ fieldsData = value;
+ }
+
+ /** Expert: change the value of this field. See <a href="#setValue(java.lang.String)">setValue(String)</a>. */
+ public void setValue(byte[] value) {
+ if (!isBinary) {
+ throw new IllegalArgumentException("cannot set a byte[] value on a non-binary field");
+ }
+ fieldsData = value;
+ binaryLength = value.length;
+ binaryOffset = 0;
+ }
+
+ /** Expert: change the value of this field. See <a href="#setValue(java.lang.String)">setValue(String)</a>. */
+ public void setValue(byte[] value, int offset, int length) {
+ if (!isBinary) {
+ throw new IllegalArgumentException("cannot set a byte[] value on a non-binary field");
+ }
+ fieldsData = value;
+ binaryLength = length;
+ binaryOffset = offset;
+ }
+
+ /** Expert: sets the token stream to be used for indexing and causes isIndexed() and isTokenized() to return true.
+ * May be combined with stored values from stringValue() or getBinaryValue() */
+ public void setTokenStream(TokenStream tokenStream) {
+ this.isIndexed = true;
+ this.isTokenized = true;
+ this.tokenStream = tokenStream;
+ }
+
+ /**
+ * Create a field by specifying its name, value and how it will
+ * be saved in the index. Term vectors will not be stored in the index.
+ *
+ * @param name The name of the field
+ * @param value The string to process
+ * @param store Whether <code>value</code> should be stored in the index
+ * @param index Whether the field should be indexed, and if so, if it should
+ * be tokenized before indexing
+ * @throws NullPointerException if name or value is <code>null</code>
+ * @throws IllegalArgumentException if the field is neither stored nor indexed
+ */
+ public Field(String name, String value, Store store, Index index) {
+ this(name, value, store, index, TermVector.NO);
+ }
+
+ /**
+ * Create a field by specifying its name, value and how it will
+ * be saved in the index.
+ *
+ * @param name The name of the field
+ * @param value The string to process
+ * @param store Whether <code>value</code> should be stored in the index
+ * @param index Whether the field should be indexed, and if so, if it should
+ * be tokenized before indexing
+ * @param termVector Whether term vector should be stored
+ * @throws NullPointerException if name or value is <code>null</code>
+ * @throws IllegalArgumentException in any of the following situations:
+ * <ul>
+ * <li>the field is neither stored nor indexed</li>
+ * <li>the field is not indexed but termVector is <code>TermVector.YES</code></li>
+ * </ul>
+ */
+ public Field(String name, String value, Store store, Index index, TermVector termVector) {
+ this(name, true, value, store, index, termVector);
+ }
+
+ /**
+ * Create a field by specifying its name, value and how it will
+ * be saved in the index.
+ *
+ * @param name The name of the field
+ * @param internName Whether to .intern() name or not
+ * @param value The string to process
+ * @param store Whether <code>value</code> should be stored in the index
+ * @param index Whether the field should be indexed, and if so, if it should
+ * be tokenized before indexing
+ * @param termVector Whether term vector should be stored
+ * @throws NullPointerException if name or value is <code>null</code>
+ * @throws IllegalArgumentException in any of the following situations:
+ * <ul>
+ * <li>the field is neither stored nor indexed</li>
+ * <li>the field is not indexed but termVector is <code>TermVector.YES</code></li>
+ * </ul>
+ */
+ public Field(String name, boolean internName, String value, Store store, Index index, TermVector termVector) {
+ if (name == null)
+ throw new NullPointerException("name cannot be null");
+ if (value == null)
+ throw new NullPointerException("value cannot be null");
+ if (index == Index.NO && store == Store.NO)
+ throw new IllegalArgumentException("it doesn't make sense to have a field that "
+ + "is neither indexed nor stored");
+ if (index == Index.NO && termVector != TermVector.NO)
+ throw new IllegalArgumentException("cannot store term vector information "
+ + "for a field that is not indexed");
+
+ if (internName) // field names are optionally interned
+ name = StringHelper.intern(name);
+
+ this.name = name;
+
+ this.fieldsData = value;
+
+ this.isStored = store.isStored();
+
+ this.isIndexed = index.isIndexed();
+ this.isTokenized = index.isAnalyzed();
+ this.omitNorms = index.omitNorms();
+ if (index == Index.NO) {
+ // note: now this reads even wierder than before
+ this.indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
+ }
+
+ this.isBinary = false;
+
+ setStoreTermVector(termVector);
+ }
+
+ /**
+ * Create a tokenized and indexed field that is not stored. Term vectors will
+ * not be stored. The Reader is read only when the Document is added to the index,
+ * i.e. you may not close the Reader until {@link IndexWriter#addDocument(Document)}
+ * has been called.
+ *
+ * @param name The name of the field
+ * @param reader The reader with the content
+ * @throws NullPointerException if name or reader is <code>null</code>
+ */
+ public Field(String name, Reader reader) {
+ this(name, reader, TermVector.NO);
+ }
+
+ /**
+ * Create a tokenized and indexed field that is not stored, optionally with
+ * storing term vectors. The Reader is read only when the Document is added to the index,
+ * i.e. you may not close the Reader until {@link IndexWriter#addDocument(Document)}
+ * has been called.
+ *
+ * @param name The name of the field
+ * @param reader The reader with the content
+ * @param termVector Whether term vector should be stored
+ * @throws NullPointerException if name or reader is <code>null</code>
+ */
+ public Field(String name, Reader reader, TermVector termVector) {
+ if (name == null)
+ throw new NullPointerException("name cannot be null");
+ if (reader == null)
+ throw new NullPointerException("reader cannot be null");
+
+ this.name = StringHelper.intern(name); // field names are interned
+ this.fieldsData = reader;
+
+ this.isStored = false;
+
+ this.isIndexed = true;
+ this.isTokenized = true;
+
+ this.isBinary = false;
+
+ setStoreTermVector(termVector);
+ }
+
+ /**
+ * Create a tokenized and indexed field that is not stored. Term vectors will
+ * not be stored. This is useful for pre-analyzed fields.
+ * The TokenStream is read only when the Document is added to the index,
+ * i.e. you may not close the TokenStream until {@link IndexWriter#addDocument(Document)}
+ * has been called.
+ *
+ * @param name The name of the field
+ * @param tokenStream The TokenStream with the content
+ * @throws NullPointerException if name or tokenStream is <code>null</code>
+ */
+ public Field(String name, TokenStream tokenStream) {
+ this(name, tokenStream, TermVector.NO);
+ }
+
+ /**
+ * Create a tokenized and indexed field that is not stored, optionally with
+ * storing term vectors. This is useful for pre-analyzed fields.
+ * The TokenStream is read only when the Document is added to the index,
+ * i.e. you may not close the TokenStream until {@link IndexWriter#addDocument(Document)}
+ * has been called.
+ *
+ * @param name The name of the field
+ * @param tokenStream The TokenStream with the content
+ * @param termVector Whether term vector should be stored
+ * @throws NullPointerException if name or tokenStream is <code>null</code>
+ */
+ public Field(String name, TokenStream tokenStream, TermVector termVector) {
+ if (name == null)
+ throw new NullPointerException("name cannot be null");
+ if (tokenStream == null)
+ throw new NullPointerException("tokenStream cannot be null");
+
+ this.name = StringHelper.intern(name); // field names are interned
+ this.fieldsData = null;
+ this.tokenStream = tokenStream;
+
+ this.isStored = false;
+
+ this.isIndexed = true;
+ this.isTokenized = true;
+
+ this.isBinary = false;
+
+ setStoreTermVector(termVector);
+ }
+
+
+ /**
+ * Create a stored field with binary value. Optionally the value may be compressed.
+ *
+ * @param name The name of the field
+ * @param value The binary value
+ * @param store Must be Store.YES
+ * @throws IllegalArgumentException if store is <code>Store.NO</code>
+ * @deprecated Use {@link #Field(String, byte[]) instead}
+ */
+ @Deprecated
+ public Field(String name, byte[] value, Store store) {
+ this(name, value, 0, value.length);
+
+ if (store == Store.NO) {
+ throw new IllegalArgumentException("binary values can't be unstored");
+ }
+ }
+
+ /**
+ * Create a stored field with binary value. Optionally the value may be compressed.
+ *
+ * @param name The name of the field
+ * @param value The binary value
+ */
+ public Field(String name, byte[] value) {
+ this(name, value, 0, value.length);
+ }
+
+ /**
+ * Create a stored field with binary value. Optionally the value may be compressed.
+ *
+ * @param name The name of the field
+ * @param value The binary value
+ * @param offset Starting offset in value where this Field's bytes are
+ * @param length Number of bytes to use for this Field, starting at offset
+ * @param store How <code>value</code> should be stored (compressed or not)
+ * @throws IllegalArgumentException if store is <code>Store.NO</code>
+ * @deprecated Use {@link #Field(String, byte[], int, int) instead}
+ */
+ @Deprecated
+ public Field(String name, byte[] value, int offset, int length, Store store) {
+ this(name, value, offset, length);
+
+ if (store == Store.NO) {
+ throw new IllegalArgumentException("binary values can't be unstored");
+ }
+ }
+
+ /**
+ * Create a stored field with binary value. Optionally the value may be compressed.
+ *
+ * @param name The name of the field
+ * @param value The binary value
+ * @param offset Starting offset in value where this Field's bytes are
+ * @param length Number of bytes to use for this Field, starting at offset
+ */
+ public Field(String name, byte[] value, int offset, int length) {
+
+ if (name == null)
+ throw new IllegalArgumentException("name cannot be null");
+ if (value == null)
+ throw new IllegalArgumentException("value cannot be null");
+
+ this.name = StringHelper.intern(name); // field names are interned
+ fieldsData = value;
+
+ isStored = true;
+ isIndexed = false;
+ isTokenized = false;
+ indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
+ omitNorms = true;
+
+ isBinary = true;
+ binaryLength = length;
+ binaryOffset = offset;
+
+ setStoreTermVector(TermVector.NO);
+ }
+}