and back compatibility is not guaranteed. Names with both leading and
trailing underscores (e.g. _version_) are reserved.
-->
-
- <!-- <field name="id" type="string" indexed="true" stored="true" required="true" multiValued="false" /> -->
- <!-- <field name="sku" type="text_en_splitting_tight" indexed="true" stored="true" omitNorms="true"/> -->
- <!-- <field name="name" type="text_general" indexed="true" stored="true"/> -->
- <!-- <field name="manu" type="text_general" indexed="true" stored="true" omitNorms="true"/> -->
- <!-- <field name="cat" type="string" indexed="true" stored="true" multiValued="true"/> -->
- <!-- <field name="features" type="text_general" indexed="true" stored="true" multiValued="true"/> -->
- <!-- <field name="includes" type="text_general" indexed="true" stored="true" termVectors="true" termPositions="true" termOffsets="true" /> -->
-
- <!-- <field name="weight" type="float" indexed="true" stored="true"/> -->
- <!-- <field name="price" type="float" indexed="true" stored="true"/> -->
- <!-- <field name="popularity" type="int" indexed="true" stored="true" /> -->
- <!-- <field name="inStock" type="boolean" indexed="true" stored="true" /> -->
-
- <!-- <field name="store" type="location" indexed="true" stored="true"/> -->
-
- <!-- Common metadata fields, named specifically to match up with
- SolrCell metadata when parsing rich documents such as Word, PDF.
- Some fields are multiValued only because Tika currently may return
- multiple values for them. Some metadata is parsed from the documents,
- but there are some which come from the client context:
- "content_type": From the HTTP headers of incoming stream
- "resourcename": From SolrCell request param resource.name
- -->
- <!-- <field name="title" type="text_general" indexed="true" stored="true" multiValued="true"/> -->
- <!-- <field name="subject" type="text_general" indexed="true" stored="true"/> -->
- <!-- <field name="description" type="text_general" indexed="true" stored="true"/> -->
- <!-- <field name="comments" type="text_general" indexed="true" stored="true"/> -->
- <!-- <field name="author" type="text_general" indexed="true" stored="true"/> -->
- <!-- <field name="keywords" type="text_general" indexed="true" stored="true"/> -->
- <!-- <field name="category" type="text_general" indexed="true" stored="true"/> -->
- <!-- <field name="resourcename" type="text_general" indexed="true" stored="true"/> -->
- <!-- <field name="url" type="text_general" indexed="true" stored="true"/> -->
- <!-- <field name="content_type" type="string" indexed="true" stored="true" multiValued="true"/> -->
- <!-- <field name="last_modified" type="date" indexed="true" stored="true"/> -->
- <!-- <field name="links" type="string" indexed="true" stored="true" multiValued="true"/> -->
<field name="book_id" type="int" indexed="true" stored="true" />
<field name="parent_id" type="int" indexed="false" stored="true" />
<field name="slug" type="lowercase" stored="false" indexed="true" omitNorms="true"/> <!-- no norms -->
<field name="is_book" type="boolean" stored="false" indexed="true"/>
<field name="authors" type="text_pl_nonstop" stored="false" indexed="true" multiValued="true" termPositions="true" termVectors="true"/>
+ <field name="authors_nonstem" type="text_ascii" stored="false" indexed="true" multiValued="true" termPositions="true" termVectors="true"/>
<field name="translators" type="text_pl_nonstop" stored="false" indexed="true" multiValued="true" termPositions="true" termVectors="true" />
<field name="title" type="text_pl_nonstop" stored="false" indexed="true"/>
+ <field name="title_nonstem" type="text_ascii" stored="false" indexed="true"/>
<!-- <field name="published_date" type="tdate" stored="false" indexed="true"/>-->
<field name="published_date" type="string" stored="true" indexed="true"/>
<field name="genres" type="lowercase" stored="false" indexed="false" multiValued="true" />
<field name="metadata" type="text_pl_nonstop" stored="false" indexed="true" multiValued="true" termPositions="true" termVectors="true" />
+ <field name="metadata_nonstem" type="text_pl_nonstop" stored="false" indexed="true" multiValued="true" termPositions="true" termVectors="true" />
<field name="themes" type="lowercase" stored="true" indexed="true" termVectors="true" termPositions="true" multiValued="true" />
<field name="themes_pl" type="text_pl_nonstop" stored="true" indexed="true" termVectors="true" termPositions="true" multiValued="true" />
+ <field name="themes_pl_nonstem" type="text_ascii" stored="true" indexed="true" termVectors="true" termPositions="true" multiValued="true" />
<field name="header_index" type="int" stored="true" indexed="true"/>
<field name="header_span" type="int" stored="true" indexed="true"/>
<field name="header_type" type="lowercase" stored="true" indexed="false"/>
<field name="text" type="text_pl" stored="false" indexed="true" termVectors="true" termPositions="true" />
+ <field name="text_nonstem" type="text_pl_nonstem" stored="false" indexed="true" termVectors="true" termPositions="true" />
<field name="snippets_position" type="int" stored="true" indexed="false"/>
<field name="snippets_length" type="int" stored="true" indexed="false"/>
<field name="tag_category" type="string" stored="true" indexed="true" />
<field name="is_pdcounter" type="boolean" stored="true" indexed="true" />
- <!-- Main body of document extracted by SolrCell.
- NOTE: This field is not indexed by default, since it is also copied to "text"
- using copyField below. This is to save space. Use this field for returning and
- highlighting document content. Use the "text" field to search the content. -->
- <!-- <field name="content" type="text_general" indexed="false" stored="true" multiValued="true"/> -->
-
-
- <!-- catchall field, containing all other searchable text fields (implemented
- via copyField further on in this schema -->
- <!-- <field name="text" type="text_general" indexed="true" stored="false" multiValued="true"/> -->
-
- <!-- catchall text field that indexes tokens both normally and in reverse for efficient
- leading wildcard queries. -->
- <!-- <field name="text_rev" type="text_general_rev" indexed="true" stored="false" multiValued="true"/> -->
-
- <!-- non-tokenized version of manufacturer to make it easier to sort or group
- results by manufacturer. copied from "manu" via copyField -->
- <!-- <field name="manu_exact" type="string" indexed="true" stored="false"/> -->
-
- <!-- <field name="payloads" type="payloads" indexed="true" stored="true"/> -->
-
<field name="_version_" type="long" indexed="true" stored="true"/>
<!-- Uncommenting the following will create a "timestamp" field using
-->
<uniqueKey>uid</uniqueKey>
- <!-- DEPRECATED: The defaultSearchField is consulted by various query parsers when
- parsing a query string that isn't explicit about the field. Machine (non-user)
- generated queries are best made explicit, or they can use the "df" request parameter
- which takes precedence over this.
- Note: Un-commenting defaultSearchField will be insufficient if your request handler
- in solrconfig.xml defines "df", which takes precedence. That would need to be removed.
- <defaultSearchField>text</defaultSearchField> -->
-
- <!-- DEPRECATED: The defaultOperator (AND|OR) is consulted by various query parsers
- when parsing a query string to determine if a clause of the query should be marked as
- required or optional, assuming the clause isn't already marked by some operator.
- The default is OR, which is generally assumed so it is not a good idea to change it
- globally here. The "q.op" request parameter takes precedence over this.
- <solrQueryParser defaultOperator="OR"/> -->
-
<!-- copyField commands copy one field to another at the time a document
is added to the index. It's used either to index the same field differently,
or to add multiple fields to the same field for easier/faster searching. -->
<copyField source="kinds" dest="metadata"/>
<copyField source="genres" dest="metadata"/>
-<!--
- <copyField source="cat" dest="text"/>
- <copyField source="name" dest="text"/>
- <copyField source="manu" dest="text"/>
- <copyField source="features" dest="text"/>
- <copyField source="includes" dest="text"/>
- <copyField source="manu" dest="manu_exact"/>
--->
- <!-- Copy the price into a currency enabled field (default USD) -->
-<!-- <copyField source="price" dest="price_c"/>-->
-
- <!-- Text fields from SolrCell to search by default in our catch-all field -->
-<!-- <copyField source="title" dest="text"/>
- <copyField source="author" dest="text"/>
- <copyField source="description" dest="text"/>
- <copyField source="keywords" dest="text"/>
- <copyField source="content" dest="text"/>
- <copyField source="content_type" dest="text"/>
- <copyField source="resourcename" dest="text"/>
- <copyField source="url" dest="text"/>-->
-
- <!-- Create a string version of author for faceting -->
-<!-- <copyField source="author" dest="author_s"/>-->
-
- <!-- Above, multiple source fields are copied to the [text] field.
- Another way to map multiple source fields to the same
- destination field is to use the dynamic field syntax.
- copyField also supports a maxChars to copy setting. -->
-
- <!-- <copyField source="*_t" dest="text" maxChars="3000"/> -->
-
- <!-- copy name to alphaNameSort, a field designed for sorting by name -->
- <!-- <copyField source="name" dest="alphaNameSort"/> -->
-
+ <copyField source="translators" dest="metadata_nonstem"/>
+ <copyField source="epochs" dest="metadata_nonstem"/>
+ <copyField source="kinds" dest="metadata_nonstem"/>
+ <copyField source="genres" dest="metadata_nonstem"/>
+
+ <copyField source="authors" dest="authors_nonstem"/>
+ <copyField source="title" dest="title_nonstem"/>
+ <copyField source="themes" dest="themes_pl_nonstem"/>
+ <copyField source="text" dest="text_nonstem"/>
+
<types>
<!-- field type definitions. The "name" attribute is
just a label to be used by field definitions. The "class"
<!-- sortMissingLast and sortMissingFirst attributes are optional attributes are
currently supported on types that are sorted internally as strings
and on numeric types.
- This includes "string","boolean", and, as of 3.5 (and 4.x),
- int, float, long, date, double, including the "Trie" variants.
+ This includes "string","boolean", and, as of 3.5 (and 4.x),
+ int, float, long, date, double, including the "Trie" variants.
- If sortMissingLast="true", then a sort on this field will cause documents
without the field to come after documents with the field,
regardless of the requested sort order (asc or desc).
-->
<fieldType name="random" class="solr.RandomSortField" indexed="true" />
- <!-- solr.TextField allows the specification of custom text analyzers
- specified as a tokenizer and a list of token filters. Different
- analyzers may be specified for indexing and querying.
-
- The optional positionIncrementGap puts space between multiple fields of
- this type on the same document, with the purpose of preventing false phrase
- matching across fields.
-
- For more info on customizing your analyzer chain, please see
- http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters
- -->
-
- <!-- One can also specify an existing Analyzer class that has a
- default constructor via the class attribute on the analyzer element.
- Example:
- <fieldType name="text_greek" class="solr.TextField">
- <analyzer class="org.apache.lucene.analysis.el.GreekAnalyzer"/>
- </fieldType>
- -->
-
<fieldType name="uuid" class="solr.UUIDField" indexed="true" />
<!-- Polish -->
<fieldType name="text_pl" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
- <analyzer>
+ <analyzer type="index">
+ <tokenizer class="solr.StandardTokenizerFactory"/>
+ <filter class="solr.LowerCaseFilterFactory"/>
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_pl.txt" format="snowball"/>
+ <filter class="solr.MorfologikFilterFactory" dictionary="morfologik/stemming/polish/polish.dict" />
+ <filter class="solr.ASCIIFoldingFilterFactory" preserveOriginal="true" />
+ </analyzer>
+ <analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_pl.txt" format="snowball"/>
</analyzer>
</fieldType>
+ <fieldType name="text_pl_nonstem" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
+ <analyzer type="index">
+ <tokenizer class="solr.StandardTokenizerFactory"/>
+ <filter class="solr.LowerCaseFilterFactory"/>
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_pl.txt" format="snowball"/>
+ <filter class="solr.ASCIIFoldingFilterFactory" preserveOriginal="true" />
+ </analyzer>
+ <analyzer type="query">
+ <tokenizer class="solr.StandardTokenizerFactory"/>
+ <filter class="solr.LowerCaseFilterFactory"/>
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_pl.txt" format="snowball"/>
+ </analyzer>
+ </fieldType>
+
<fieldType name="text_pl_nonstop" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
- <analyzer>
+ <analyzer type="index">
+ <tokenizer class="solr.StandardTokenizerFactory"/>
+ <filter class="solr.LowerCaseFilterFactory"/>
+ <filter class="solr.MorfologikFilterFactory" dictionary="morfologik/stemming/polish/polish.dict" />
+ <filter class="solr.ASCIIFoldingFilterFactory" preserveOriginal="true" />
+ </analyzer>
+ <analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.MorfologikFilterFactory" dictionary="morfologik/stemming/polish/polish.dict" />
</analyzer>
</fieldType>
+ <fieldType name="text_ascii" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
+ <analyzer type="index">
+ <tokenizer class="solr.StandardTokenizerFactory"/>
+ <filter class="solr.LowerCaseFilterFactory"/>
+ <filter class="solr.ASCIIFoldingFilterFactory" preserveOriginal="true" />
+ </analyzer>
+ <analyzer type="query">
+ <tokenizer class="solr.StandardTokenizerFactory"/>
+ <filter class="solr.LowerCaseFilterFactory"/>
+ </analyzer>
+ </fieldType>
+cb
</types>
<!-- Similarity is the scoring routine for each document vs. a query.