cleanup solr schema
[wolnelektury.git] / doc / schema.xml
index 6b89c00..38d6eda 100644 (file)
 
    <field name="book_id" type="int" indexed="true" stored="true" />
    <field name="parent_id" type="int" indexed="false" stored="true" />
-   <field name="slug" type="text_general" stored="false" indexed="true" omitNorms="true"/> <!-- no norms -->
-   <field name="tags" type="lowercase" stored="false" indexed="true" multiValued="true"/>
+   <field name="slug" type="lowercase" stored="false" indexed="true" omitNorms="true"/> <!-- no norms -->
    <field name="is_book" type="boolean" stored="false" indexed="true"/>
-   <field name="authors" type="text_general" stored="false" indexed="true" multiValued="true" termPositions="true" termVectors="true"/>
-   <field name="translators" type="text_general" stored="false" indexed="true" multiValued="true" termPositions="true" termVectors="true" />
-   <field name="title" type="text_pl" stored="false" indexed="true"/>
-   <field name="title_orig" type="text_general" stored="false" indexed="true"/>
+   <field name="authors" type="text_pl_nonstop" stored="false" indexed="true" multiValued="true" termPositions="true" termVectors="true"/>
+   <field name="translators" type="text_pl_nonstop" stored="false" indexed="false" multiValued="true" />
+   <field name="title" type="text_pl_nonstop" stored="false" indexed="true"/>
 <!--   <field name="published_date" type="tdate" stored="false" indexed="true"/>-->
    <field name="published_date" type="string" stored="true" indexed="true"/>
 
-   <field name="themes" type="lowercase" stored="true" intexed="true" termVectors="true" termPositions="true" multiValued="true" />
-   <field name="themes_pl" type="text_pl" stored="true" indexed="true" termVectors="true" termPositions="true" multiValued="true" />
+   <field name="epochs" type="lowercase" stored="false" indexed="false" multiValued="true" />
+   <field name="kinds" type="lowercase" stored="false" indexed="false" multiValued="true" />
+   <field name="genres" type="lowercase" stored="false" indexed="false" multiValued="true" />
+
+   <field name="metadata" type="text_pl_nonstop" stored="false" indexed="true" multiValued="true" termPositions="true" termVectors="true" />
+
+   <field name="themes" type="lowercase" stored="true" indexed="true" termVectors="true" termPositions="true" multiValued="true" />
+   <field name="themes_pl" type="text_pl_nonstop" stored="true" indexed="true" termVectors="true" termPositions="true" multiValued="true" />
    <field name="header_index" type="int" stored="true" indexed="true"/>
    <field name="header_span" type="int" stored="true" indexed="true"/>
    <field name="header_type" type="lowercase" stored="true" indexed="false"/>
-   <field name="text" type="text_pl" stored="false" indexed="true" termPositions="true" />
+   <field name="text" type="text_pl" stored="false" indexed="true" termVectors="true" termPositions="true" />
 
    <field name="snippets_position" type="int" stored="true" indexed="false"/>
    <field name="snippets_length" type="int" stored="true" indexed="false"/>
    <field name="fragment_anchor" type="string" stored="true" indexed="false"/>
 
    <field name="tag_id" type="int" stored="true" indexed="true"/>
-   <field name="tag_name" type="lowercase" stored="true" intexed="true" />
-   <field name="tag_name_pl" type="text_pl" stored="false" indexed="true" multiValued="true"/>
+   <field name="tag_name" type="lowercase" stored="true" indexed="true" />
+   <field name="tag_name_pl" type="text_pl_nonstop" stored="false" indexed="true" multiValued="true"/>
    <field name="tag_category" type="string" stored="true" indexed="true" />
    <field name="is_pdcounter" type="boolean" stored="true" indexed="true" />
 
 
    <!-- <field name="payloads" type="payloads" indexed="true" stored="true"/> -->
 
-   <!-- <field name="_version_" type="long" indexed="true" stored="true"/> -->
+   <field name="_version_" type="long" indexed="true" stored="true"/>
 
    <!-- Uncommenting the following will create a "timestamp" field using
         a default value of "NOW" to indicate when each document was indexed.
   <copyField source="themes" dest="themes_pl"/>
   <copyField source="tag_name" dest="tag_name_pl"/>
 
+  <copyField source="translators" dest="metadata"/>
+  <copyField source="epochs" dest="metadata"/>
+  <copyField source="kinds" dest="metadata"/>
+  <copyField source="genres" dest="metadata"/>
+
 <!--
    <copyField source="cat" dest="text"/>
    <copyField source="name" dest="text"/>
         <tokenizer class="solr.StandardTokenizerFactory"/>
         <filter class="solr.LowerCaseFilterFactory"/>
         <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_pl.txt" format="snowball" enablePositionIncrements="true"/>
-       <filter class="solr.MorfologikFilterFactory" dictionary="MORFOLOGIK" />
+        <filter class="solr.MorfologikFilterFactory" dictionary="MORFOLOGIK" />
+        <!-- less aggressive: <filter class="solr.PortugueseMinimalStemFilterFactory"/> -->
+        <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Portuguese"/> -->
+        <!-- most aggressive: <filter class="solr.PortugueseStemFilterFactory"/> -->
+      </analyzer>
+    </fieldType>
+
+    <fieldType name="text_pl_nonstop" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
+      <analyzer>
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.MorfologikFilterFactory" dictionary="MORFOLOGIK" />
         <!-- less aggressive: <filter class="solr.PortugueseMinimalStemFilterFactory"/> -->
         <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Portuguese"/> -->
         <!-- most aggressive: <filter class="solr.PortugueseStemFilterFactory"/> -->
       </analyzer>
     </fieldType>
-    
     
     <!-- Portuguese -->
     <fieldType name="text_pt" class="solr.TextField" positionIncrementGap="100">