haystack ignoring fields?
authorMarcin Koziej <marcin.koziej@nowoczesnapolska.org.pl>
Mon, 27 Aug 2012 12:18:38 +0000 (14:18 +0200)
committerMarcin Koziej <marcin.koziej@nowoczesnapolska.org.pl>
Mon, 27 Aug 2012 12:18:38 +0000 (14:18 +0200)
doc/schema.xml [new file with mode: 0644]
migdal/helpers.py
migdal/models.py
migdal/search_indexes.py [new file with mode: 0644]
migdal/templates/search/search.html [new file with mode: 0644]
prawokultury/settings.d/30-apps.conf
prawokultury/settings.d/35-search.conf [new file with mode: 0644]
prawokultury/urls.py
requirements.txt
scripts/make-tags [new file with mode: 0755]

diff --git a/doc/schema.xml b/doc/schema.xml
new file mode 100644 (file)
index 0000000..236b417
--- /dev/null
@@ -0,0 +1,160 @@
+<?xml version="1.0" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<schema name="default" version="1.4">
+  <types>
+    <fieldtype name="string"  class="solr.StrField" sortMissingLast="true" omitNorms="true"/>
+    <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true" omitNorms="true"/>
+    <fieldtype name="binary" class="solr.BinaryField"/>
+
+    <!-- Numeric field types that manipulate the value into
+         a string value that isn't human-readable in its internal form,
+         but with a lexicographic ordering the same as the numeric ordering,
+         so that range queries work correctly. -->
+    <fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" sortMissingLast="true" positionIncrementGap="0"/>
+    <fieldType name="float" class="solr.TrieFloatField" precisionStep="0" omitNorms="true" sortMissingLast="true" positionIncrementGap="0"/>
+    <fieldType name="long" class="solr.TrieLongField" precisionStep="0" omitNorms="true" sortMissingLast="true" positionIncrementGap="0"/>
+    <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" omitNorms="true" sortMissingLast="true" positionIncrementGap="0"/>
+
+    <fieldType name="tint" class="solr.TrieIntField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
+    <fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
+    <fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
+    <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
+
+    <fieldType name="date" class="solr.TrieDateField" omitNorms="true" precisionStep="0" positionIncrementGap="0"/>
+    <!-- A Trie based date field for faster date range queries and date faceting. -->
+    <fieldType name="tdate" class="solr.TrieDateField" omitNorms="true" precisionStep="6" positionIncrementGap="0"/>
+
+    <fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/>
+    <fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/>
+    <fieldtype name="geohash" class="solr.GeoHashField"/>
+
+    <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
+      <analyzer type="index">
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
+        <!-- in this example, we will only use synonyms at query time
+        <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
+        -->
+        <filter class="solr.LowerCaseFilterFactory"/>
+      </analyzer>
+      <analyzer type="query">
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
+        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+      </analyzer>
+    </fieldType>
+
+    <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
+      <analyzer type="index">
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.StopFilterFactory"
+                ignoreCase="true"
+                words="stopwords_en.txt"
+                enablePositionIncrements="true"
+                />
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.EnglishPossessiveFilterFactory"/>
+        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
+        <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
+          <filter class="solr.EnglishMinimalStemFilterFactory"/>
+        -->
+        <filter class="solr.PorterStemFilterFactory"/>
+      </analyzer>
+      <analyzer type="query">
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
+        <filter class="solr.StopFilterFactory"
+                ignoreCase="true"
+                words="stopwords_en.txt"
+                enablePositionIncrements="true"
+                />
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.EnglishPossessiveFilterFactory"/>
+        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
+        <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
+          <filter class="solr.EnglishMinimalStemFilterFactory"/>
+        -->
+        <filter class="solr.PorterStemFilterFactory"/>
+      </analyzer>
+    </fieldType>
+
+    <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
+      <analyzer>
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+      </analyzer>
+    </fieldType>
+
+    <fieldType name="ngram" class="solr.TextField" >
+      <analyzer type="index">
+        <tokenizer class="solr.KeywordTokenizerFactory"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.NGramFilterFactory" minGramSize="3" maxGramSize="15" />
+      </analyzer>
+      <analyzer type="query">
+        <tokenizer class="solr.KeywordTokenizerFactory"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+      </analyzer>
+    </fieldType>
+
+    <fieldType name="edge_ngram" class="solr.TextField" positionIncrementGap="1">
+      <analyzer type="index">
+        <tokenizer class="solr.WhitespaceTokenizerFactory" />
+        <filter class="solr.LowerCaseFilterFactory" />
+        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
+        <filter class="solr.EdgeNGramFilterFactory" minGramSize="2" maxGramSize="15" side="front" />
+      </analyzer>
+      <analyzer type="query">
+        <tokenizer class="solr.WhitespaceTokenizerFactory" />
+        <filter class="solr.LowerCaseFilterFactory" />
+        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
+      </analyzer>
+    </fieldType>
+  </types>
+
+  <fields>
+    <!-- general -->
+    <field name="id" type="string" indexed="true" stored="true" multiValued="false" required="true"/>
+    <field name="django_ct" type="string" indexed="true" stored="true" multiValued="false"/>
+    <field name="django_id" type="string" indexed="true" stored="true" multiValued="false"/>
+
+    <dynamicField name="*_i"  type="int"    indexed="true"  stored="true"/>
+    <dynamicField name="*_s"  type="string"  indexed="true"  stored="true"/>
+    <dynamicField name="*_l"  type="long"   indexed="true"  stored="true"/>
+    <dynamicField name="*_t"  type="text_en"    indexed="true"  stored="true"/>
+    <dynamicField name="*_b"  type="boolean" indexed="true"  stored="true"/>
+    <dynamicField name="*_f"  type="float"  indexed="true"  stored="true"/>
+    <dynamicField name="*_d"  type="double" indexed="true"  stored="true"/>
+    <dynamicField name="*_dt" type="date" indexed="true" stored="true"/>
+    <dynamicField name="*_p" type="location" indexed="true" stored="true"/>
+    <dynamicField name="*_coordinate"  type="tdouble" indexed="true"  stored="false"/>
+
+
+  </fields>
+
+  <!-- field to use to determine and enforce document uniqueness. -->
+  <uniqueKey>id</uniqueKey>
+
+  <!-- field for the QueryParser to use when an explicit fieldname is absent -->
+  <defaultSearchField></defaultSearchField>
+
+  <!-- SolrQueryParser configuration: defaultOperator="AND|OR" -->
+  <solrQueryParser defaultOperator="AND"/>
+</schema>
+
index 1fdbdd4..39f5a60 100644 (file)
@@ -34,8 +34,9 @@ def add_translatable(model, fields, languages=None):
     for name, field in fields.items():
         for lang_code, lang_name in languages:
             new_field = copy(field)
-            if field.verbose_name:
+            if hasattr(field, 'verbose_name') and field.verbose_name:
                 new_field.verbose_name = string_concat(field.verbose_name, ' [%s]' % lang_code)
+               
             new_field.contribute_to_class(model, "%s_%s" % (name, lang_code))
         setattr(model, name, field_getter(name))
         # add setter?
index 83a1865..9a52fe2 100644 (file)
@@ -9,6 +9,7 @@ from migdal.helpers import add_translatable
 from migdal import settings
 
 
+
 class Category(models.Model):
     taxonomy = models.CharField(_('taxonomy'), max_length=32,
                     choices=settings.TAXONOMIES)
diff --git a/migdal/search_indexes.py b/migdal/search_indexes.py
new file mode 100644 (file)
index 0000000..6223b6e
--- /dev/null
@@ -0,0 +1,40 @@
+import datetime
+from haystack import indexes
+from migdal.models import Entry
+from django.conf import settings
+from copy import copy
+
+
+class EntryIndex(indexes.SearchIndex, indexes.Indexable):
+    date = indexes.DateTimeField(indexed=True)
+    author = indexes.CharField()
+
+    def get_model(self):
+        return Entry
+
+    def index_queryset(self):
+        """Used when the entire index for model is updated."""
+        return self.get_model().objects.filter(date__lte=datetime.datetime.now())
+
+
+def add_translatable(index_class, fields, languages=None):
+    """Adds some translatable fields to a search index, and a getter."""
+    if languages is None:
+        languages = settings.LANGUAGES
+    for name, field in fields.items():
+        for lang_code, lang_name in languages:
+            new_field = copy(field)
+            fname = "%s_%s" % (name, lang_code)
+            new_field.index_fieldname = fname
+            setattr(index_class, fname, new_field)
+            index_class.fields[fname] = new_field
+
+
+add_translatable(EntryIndex, {
+    'title': indexes.CharField(indexed=True, document=False),
+    'lead': indexes.CharField(indexed=True, document=False),
+    'body': indexes.CharField(indexed=True, document=False)
+    })
+
+
+getattr(EntryIndex, "body_%s" % settings.LANGUAGE_CODE).document = True
diff --git a/migdal/templates/search/search.html b/migdal/templates/search/search.html
new file mode 100644 (file)
index 0000000..f38a4e6
--- /dev/null
@@ -0,0 +1,25 @@
+{% extends "base.html" %}
+{% load url from future %}
+{% load i18n %}
+{% load migdal_tags %}
+
+
+{% block "body" %}
+<h1>{% trans "Search results" %}</h1>
+
+{% for result in page.object_list %}
+{% entry_short result %}
+{% empty %}
+<p>{% trans "No results found." %}</p>
+{% endfor %}
+
+{% if page.has_previous or page.has_next %}
+<div>
+  {% if page.has_previous %}<a href="?q={{ query }}&amp;page={{ page.previous_page_number }}">{% endif %}{% trans "&laquo; Previous" %}{% if page.has_previous %}</a>{% endif %}
+  |
+  {% if page.has_next %}<a href="?q={{ query }}&amp;page={{ page.next_page_number }}">{% endif %}{% trans "Next &raquo;" %}{% if page.has_next %}</a>{% endif %}
+</div>
+{% else %}
+{# Show some example queries to run, maybe query syntax, something else? #}
+{% endif %}
+{% endblock "body" %}
index 931cb41..3824631 100755 (executable)
@@ -8,6 +8,7 @@ INSTALLED_APPS = (
     'django.contrib.comments',
     'django_comments_xtd',
     'pipeline',
+    'haystack',
 
     'django.contrib.auth',
     'django.contrib.contenttypes',
diff --git a/prawokultury/settings.d/35-search.conf b/prawokultury/settings.d/35-search.conf
new file mode 100644 (file)
index 0000000..62b5799
--- /dev/null
@@ -0,0 +1,9 @@
+HAYSTACK_CONNECTIONS = {
+    'default': {
+        'ENGINE': 'haystack.backends.solr_backend.SolrEngine',
+        'URL': 'http://127.0.0.1:8983/solr/prawokultury'
+    },
+}
+
+from django.conf import settings
+HAYSTACK_DOCUMENT_FIELD = "body_%s" % settings.LANGUAGE_CODE
index f074158..6da0510 100644 (file)
@@ -19,10 +19,11 @@ urlpatterns = patterns('',
     url(r'^media/(?P<path>.*)$', 'django.views.static.serve', {
             'document_root': settings.MEDIA_ROOT,
         }),
+    url(r'^search/', include('haystack.urls')),
 ) + i18n_patterns('',
     url(string_concat(r'^', _('events'), r'/'), include('events.urls')),
     url(r'^comments/', include('django_comments_xtd.urls')),
 ) + migdal_urlpatterns
 
 
-urlpatterns += staticfiles_urlpatterns()
\ No newline at end of file
+urlpatterns += staticfiles_urlpatterns()
index 8d580d9..7efd852 100644 (file)
@@ -8,3 +8,6 @@ django-markupfield
 django-gravatar
 django_comments_xtd
 django-pipeline
+
+-e git+https://github.com/toastdriven/django-haystack.git@master#egg=django-haystack
+pysolr
diff --git a/scripts/make-tags b/scripts/make-tags
new file mode 100755 (executable)
index 0000000..30dc096
--- /dev/null
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+ROOT=$(git rev-parse --show-toplevel)
+
+find $ROOT -name '*.py' | xargs etags -o ${ROOT}/TAGS
+if [ -n "$VIRTUAL_ENV" ]; then
+  find ${VIRTUAL_ENV}/lib -name '*.py' |xargs etags -a -o ${ROOT}/TAGS
+else
+    echo "No Virtual env enabled, will not add it to TAGS"
+fi
+
+find $ROOT/prawokultury/static/css -name '*.css' |xargs etags -a -o ${ROOT}/TAGS
+#find $ROOT/prawokultury/static/js -name '*.js' |xargs etags -a -o ${ROOT}/TAGS