Merge branch 'api'
authorJan Szejko <janek37@gmail.com>
Thu, 14 Dec 2017 08:43:33 +0000 (09:43 +0100)
committerJan Szejko <janek37@gmail.com>
Thu, 14 Dec 2017 08:43:33 +0000 (09:43 +0100)
18 files changed:
doc/schema.xml
src/catalogue/models/book.py
src/catalogue/templates/catalogue/book_searched.html
src/catalogue/templates/catalogue/book_short.html
src/catalogue/templates/catalogue/book_text.html
src/catalogue/templates/catalogue/search_multiple_hits.html
src/catalogue/templates/catalogue/snippets/jplayer.html
src/catalogue/views.py
src/search/context_processors.py
src/search/custom.py
src/search/index.py
src/search/management/commands/reindex.py
src/search/mock_search.py
src/search/templatetags/search_tags.py
src/search/views.py
src/wolnelektury/static/js/search.js
src/wolnelektury/static/scss/main/book_box.scss
src/wolnelektury/static/scss/main/search.scss

index a202781..56172d3 100644 (file)
    <field name="authors" type="text_general" stored="false" indexed="true" multiValued="true" termPositions="true" termVectors="true"/>
    <field name="translators" type="text_general" stored="false" indexed="true" multiValued="true" termPositions="true" termVectors="true" />
    <field name="title" type="text_pl" stored="false" indexed="true"/>
-   <field name="title_orig" type="text_general" stored="false" indexed="true"/>
+   <field name="title_orig" type="lowercase" stored="false" indexed="true"/>
 <!--   <field name="published_date" type="tdate" stored="false" indexed="true"/>-->
    <field name="published_date" type="string" stored="true" indexed="true"/>
 
+   <field name="epochs" type="lowercase" stored="false" indexed="false" multiValued="true" />
+   <field name="kinds" type="lowercase" stored="false" indexed="false" multiValued="true" />
+   <field name="genres" type="lowercase" stored="false" indexed="false" multiValued="true" />
+
+   <field name="metadata" type="text_pl" stored="false" indexed="true" multiValued="true" termPositions="true" termVectors="true" />
+
    <field name="themes" type="lowercase" stored="true" indexed="true" termVectors="true" termPositions="true" multiValued="true" />
    <field name="themes_pl" type="text_pl" stored="true" indexed="true" termVectors="true" termPositions="true" multiValued="true" />
    <field name="header_index" type="int" stored="true" indexed="true"/>
 
   <copyField source="themes" dest="themes_pl"/>
   <copyField source="tag_name" dest="tag_name_pl"/>
+  <copyField source="title" dest="title_orig"/>
+
+  <copyField source="translators" dest="metadata"/>
+  <copyField source="epochs" dest="metadata"/>
+  <copyField source="kinds" dest="metadata"/>
+  <copyField source="genres" dest="metadata"/>
 
 <!--
    <copyField source="cat" dest="text"/>
index 00921b8..140ba50 100644 (file)
@@ -21,7 +21,7 @@ from newtagging import managers
 from catalogue import constants
 from catalogue.fields import EbookField
 from catalogue.models import Tag, Fragment, BookMedia
-from catalogue.utils import create_zip, gallery_url, gallery_path
+from catalogue.utils import create_zip, gallery_url, gallery_path, split_tags
 from catalogue.models.tag import prefetched_relations
 from catalogue import app_settings
 from catalogue import tasks
@@ -55,7 +55,7 @@ class Book(models.Model):
     language = models.CharField(_('language code'), max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
     description = models.TextField(_('description'), blank=True)
     created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
-    changed_at = models.DateTimeField(_('creation date'), auto_now=True, db_index=True)
+    changed_at = models.DateTimeField(_('change date'), auto_now=True, db_index=True)
     parent_number = models.IntegerField(_('parent number'), default=0)
     extra_info = jsonfield.JSONField(_('extra information'), default={})
     gazeta_link = models.CharField(blank=True, max_length=240)
@@ -122,6 +122,9 @@ class Book(models.Model):
         else:
             return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
 
+    def tags_by_category(self):
+        return split_tags(self.tags.exclude(category__in=('set', 'theme')))
+
     def author_unicode(self):
         return self.cached_author
 
@@ -234,6 +237,33 @@ class Book(models.Model):
     has_daisy_file.short_description = 'DAISY'
     has_daisy_file.boolean = True
 
+    def get_audiobooks(self):
+        ogg_files = {}
+        for m in self.media.filter(type='ogg').order_by().iterator():
+            ogg_files[m.name] = m
+
+        audiobooks = []
+        projects = set()
+        for mp3 in self.media.filter(type='mp3').iterator():
+            # ogg files are always from the same project
+            meta = mp3.extra_info
+            project = meta.get('project')
+            if not project:
+                # temporary fallback
+                project = u'CzytamySłuchając'
+
+            projects.add((project, meta.get('funded_by', '')))
+
+            media = {'mp3': mp3}
+
+            ogg = ogg_files.get(mp3.name)
+            if ogg:
+                media['ogg'] = ogg
+            audiobooks.append(media)
+
+        projects = sorted(projects)
+        return audiobooks, projects
+
     def wldocument(self, parse_dublincore=True, inherit=True):
         from catalogue.import_utils import ORMDocProvider
         from librarian.parser import WLDocument
index 26b9f61..4e91432 100644 (file)
@@ -1,11 +1,8 @@
-{% spaceless %}
-  {% load i18n %}
-  {% load inline_tag_list from catalogue_tags %}
-  {% load ssi_include from ssify %}
-
-  <div class="search-result">
-    {% ssi_include 'catalogue_book_short' pk=book.pk %}
+{% extends "catalogue/book_short.html" %}
+{% load inline_tag_list from catalogue_tags %}
 
+{% block right-column %}
+  <div class="book-right-column">
     <div class="snippets">
       {% for hit in hits %}
         {% if hit.snippet %}
               {% inline_tag_list hit.themes_hit  %}
             {% endif %}
             <a href="{{hit.fragment.get_absolute_url}}">
-              {% if hit.snippet %}
-                {{hit.snippet|safe}}
-              {% else %}
-                {{hit.fragment.text|truncatewords_html:15|safe}}
-              {% endif %}
+              {{hit.snippet|safe}}
             </a>
           </div>
         {% endif %}
       {% endfor %}
     </div>
-
-    <div style="clear: right"></div>
+    {% include 'catalogue/snippets/jplayer.html' %}
   </div>
-{% endspaceless %}
\ No newline at end of file
+{% endblock %}
index edd9f6b..70aaed3 100644 (file)
@@ -4,6 +4,8 @@
   {% load book_shelf_tags from social_tags %}
   {% load static %}
 
+  {% with ga=book.get_audiobooks %}
+  {% with audiobooks=ga.0 %}
   <div class="{% block box-class %}book-box{% if audiobooks %} audiobook-box{% endif %}{% endblock %}">
     <div class="book-box-inner">
 
@@ -11,6 +13,7 @@
       {% include "catalogue/snippets/like_button.html" %}
     {% endblock %}
 
+    {% with book.tags_by_category as tags %}
     <div class="book-left-column">
       <div class="book-box-body">
         {% block book-box-body-pre %}
           <div class="author">
             {% for tag in tags.author %}
               <a href="{{ tag.get_absolute_url }}">{{ tag.name }}</a>{% if not forloop.last %},
-            {% endif %}{% endfor %}{% for parent in parents %},
+            {% endif %}{% endfor %}{% for parent in book.parents %},
               <a href="{{ parent.get_absolute_url }}">{{ parent.title }}</a>{% endfor %}
           </div>
           <div class="title">
-            {% if main_link %}<a href="{{ main_link }}">{% endif %}{{ book.title }}{% if main_link %}</a>{% endif %}
+            <a href="{{ book.get_absolute_url }}">{{ book.title }}</a>
           </div>
           {% if book.translator %}
               <div class="author">
@@ -35,9 +38,9 @@
 
         <div class="cover-area">
           {% if book.cover_thumb %}
-            {% if main_link %}<a href="{{ main_link }}">{% endif %}
+            <a href="{{ book.get_absolute_url }}">
               <img src="{{ book.cover_thumb.url }}" alt="Cover" class="cover" />
-            {% if main_link %}</a>{% endif %}
+            </a>
           {% endif %}
           {% block cover-area-extra %}{% endblock %}
         </div>
@@ -67,7 +70,7 @@
             {% endfor %}
           </span></span>
 
-          {% if show_lang %}
+          {% if book.is_foreign %}
             <span class="category">
               <span class="mono"> {% trans "Language" %}:</span>&nbsp;<span class="book-box-tag">
                 <a>{{ book.language_name }}</a>
             </span>
           {% endif %}
 
-          {% if stage_note %}
+          {% with stage_note=book.stage_note %}
+          {% if stage_note.0 %}
             <br>
             <span class="category">
-              <a{% if stage_note_url %} href="{{ stage_note_url }}"{% endif %}>{{ stage_note }}</a>
+              <a{% if stage_note.1 %} href="{{ stage_note.1 }}"{% endif %}>{{ stage_note.0 }}</a>
             </span>
           {% endif %}
+          {% endwith %}
         </div>
       </div>
       {% book_shelf_tags book.pk %}
       {% block box-append %}
       {% endblock %}
     </div>
+    {% endwith %}
 
     {% block right-column %}
       {% if audiobooks %}
-        <div class="audiobook-right-column">
+        <div class="book-right-column">
           {% include 'catalogue/snippets/jplayer.html' %}
         </div>
       {% endif %}
     <div class="clearboth"></div>
     </div>
   </div>
+  {% endwith %}
+  {% endwith %}
 {% endspaceless %}
index 629cb5b..93e755f 100644 (file)
   </div>
 
   <div class="box" id="book-short">
-    {% ssi_include 'catalogue_book_short' pk=book.pk %}
+    {% cache 86400 catalogue_book_short book.pk %}
+      {% include 'catalogue/book_short.html' %}
+    {% endcache %}
   </div>
 {% endblock footer %}
index 5ee0f41..ce43bf9 100644 (file)
     <span class="did_you_mean">{% trans "Did you mean" %}
       <a href="{% url 'search' %}?q={{did_you_mean|urlencode}}">{{did_you_mean|lower}}</a>?</span>
   {% endif %}
-  <!-- tu pójdą trafienia w tagi: Autorzy - z description oraz motywy i rodzaje -->
   <div class="inline-tag-lists top-tag-list">
-    {% if tags.author %}
+    {% if tags %}
       <div>
-        <h2>{% trans "Authors" %}:</h2>
-        {% for tag in tags.author %}
-          <a class="tag-box" href="{{ tag.get_absolute_url }}">
-            {% include "catalogue/tag_box.html" %}
-          </a>
-        {% endfor %}
-      </div>
-    {% endif %}
-    {% if tags.kind %}
-      <div>
-        <h2>{% trans "Kinds" %}:</h2>
-        {% for tag in tags.kind %}
-          <a class="tag-box" href="{{ tag.get_absolute_url }}">
-            {% include "catalogue/tag_box.html" %}
-          </a>
-        {% endfor %}
-      </div>
-    {% endif %}
-    {% if tags.genre %}
-      <div>
-        <h2>{% trans "Genres" %}:</h2>
-        {% for tag in tags.genre %}
-          <a class="tag-box" href="{{ tag.get_absolute_url }}">
-            {% include "catalogue/tag_box.html" %}
-          </a>
-        {% endfor %}
-      </div>
-    {% endif %}
-    {% if tags.epoch %}
-      <div class="inline-tag-list">
-        <h2>{% trans "Epochs" %}:</h2>
-        {% for tag in tags.epoch %}
+        {% for tag in tags %}
           <a class="tag-box" href="{{ tag.get_absolute_url }}">
             {% include "catalogue/tag_box.html" %}
           </a>
     {% endif %}
   </div>
 
-  {% if results.title %}
-    <div class="book-list-header">
-      <div class="book-box-inner">
-        <p>{% trans "Results by title" %}</p>
-      </div>
-    </div>
-    <div>
-      <ol class="work-list">
-        {% for result in results.title %}
-          <li class="Book-item">
-            {% ssi_include 'catalogue_book_short' pk=result.book.pk %}
-          </li>
-        {% endfor %}
-      </ol>
-    </div>
-  {% endif %}
-
-  {% if results.author %}
-    <div class="book-list-header">
-      <div class="book-box-inner">
-        <p>{% trans "Results by authors" %}</p>
-      </div>
-    </div>
-    <div>
-      <ol class="work-list">
-        {% for author in results.author %}
-          <li class="Book-item">{% ssi_include 'catalogue_book_short' pk=author.book.pk %}</li>
-        {% endfor %}
-      </ol>
-    </div>
-  {% endif %}
-
-  {% if results.translator %}
-    <div class="book-list-header">
-      <div class="book-box-inner">
-        <p>{% trans "Results by translators" %}</p>
-      </div>
-    </div>
-    <div>
-      <ol class="work-list">
-        {% for translator in results.translator %}
-          <li class="Book-item">{% ssi_include 'catalogue_book_short' pk=translator.book.pk %}</li>
-        {% endfor %}
-      </ol>
-    </div>
-  {% endif %}
-
-  {% if results.content %}
-  <div class="book-list-header">
-    <div class="book-box-inner">
-      <p>{% trans "Results in text" %}</p>
-    </div>
-  </div>
   <div>
-    <ol class="work-list">
-      {% for result in results.content %}
+    <ul class="work-list">
+      {% for result in results %}
         <li class="Book-item">
-          {% book_searched result %}
+          <div class="search-result">
+            {% book_searched result %}
+          </div>
         </li>
       {% endfor %}
-    </ol>
+    </ul>
   </div>
-  {% endif %}
-
-  {% if results.other %}
-    <div class="book-list-header">
-      <div class="book-box-inner">
-        <p>{% trans "Other results" %}</p>
-      </div>
-    </div>
-    <div>
-      <ol class="work-list">
-        {% for result in results.other %}
-          <li class="Book-item">
-            {% book_searched result %}
-          </li>
-        {% endfor %}
-      </ol>
-    </div>
-  {% endif %}
 {% endblock %}
index 860c2b6..e3c88c1 100644 (file)
@@ -2,7 +2,7 @@
 {% if audiobooks %}
   <div class="jp-type-playlist">
     <div id="jplayer" class="jp-jplayer" data-player="jp_container_{{ book.pk }}"
-         data-supplied="{% if have_oggs %}oga,{% endif %}mp3"></div>
+         data-supplied="oga,mp3"></div>
     <div id="jp_container_{{ book.pk }}" class="jp-audio">
       <div class="jp-type-single">
         <span class="title"></span>
index 267d9b4..a247746 100644 (file)
@@ -286,44 +286,13 @@ def book_detail(request, slug):
     }, context_instance=RequestContext(request))
 
 
-def get_audiobooks(book):
-    ogg_files = {}
-    for m in book.media.filter(type='ogg').order_by().iterator():
-        ogg_files[m.name] = m
-
-    audiobooks = []
-    have_oggs = True
-    projects = set()
-    for mp3 in book.media.filter(type='mp3').iterator():
-        # ogg files are always from the same project
-        meta = mp3.extra_info
-        project = meta.get('project')
-        if not project:
-            # temporary fallback
-            project = u'CzytamySłuchając'
-
-        projects.add((project, meta.get('funded_by', '')))
-
-        media = {'mp3': mp3}
-
-        ogg = ogg_files.get(mp3.name)
-        if ogg:
-            media['ogg'] = ogg
-        else:
-            have_oggs = False
-        audiobooks.append(media)
-
-    projects = sorted(projects)
-    return audiobooks, projects, have_oggs
-
-
 # używane w publicznym interfejsie
 def player(request, slug):
     book = get_object_or_404(Book, slug=slug)
     if not book.has_media('mp3'):
         raise Http404
 
-    audiobooks, projects, have_oggs = get_audiobooks(book)
+    audiobooks, projects = book.get_audiobooks()
 
     return render_to_response('catalogue/player.html', {
         'book': book,
@@ -672,20 +641,9 @@ def book_mini(request, pk, with_link=True):
     ))(ssi_expect(pk, int)))
 def book_short(request, pk):
     book = get_object_or_404(Book, pk=pk)
-    stage_note, stage_note_url = book.stage_note()
-    audiobooks, projects, have_oggs = get_audiobooks(book)
 
     return render(request, 'catalogue/book_short.html', {
         'book': book,
-        'has_audio': book.has_media('mp3'),
-        'main_link': book.get_absolute_url(),
-        'parents': book.parents(),
-        'tags': split_tags(book.tags.exclude(category__in=('set', 'theme'))),
-        'show_lang': book.language_code() != settings.LANGUAGE_CODE,
-        'stage_note': stage_note,
-        'stage_note_url': stage_note_url,
-        'audiobooks': audiobooks,
-        'have_oggs': have_oggs,
     })
 
 
@@ -698,24 +656,17 @@ def book_short(request, pk):
     ))(ssi_expect(pk, int)))
 def book_wide(request, pk):
     book = get_object_or_404(Book, pk=pk)
-    stage_note, stage_note_url = book.stage_note()
     extra_info = book.extra_info
-    audiobooks, projects, have_oggs = get_audiobooks(book)
 
     return render(request, 'catalogue/book_wide.html', {
         'book': book,
-        'has_audio': book.has_media('mp3'),
         'parents': book.parents(),
         'tags': split_tags(book.tags.exclude(category__in=('set', 'theme'))),
         'show_lang': book.language_code() != settings.LANGUAGE_CODE,
-        'stage_note': stage_note,
-        'stage_note_url': stage_note_url,
 
         'main_link': reverse('book_text', args=[book.slug]) if book.html_file else None,
         'extra_info': extra_info,
         'hide_about': extra_info.get('about', '').startswith('http://wiki.wolnepodreczniki.pl'),
-        'audiobooks': audiobooks,
-        'have_oggs': have_oggs,
     })
 
 
index a3f1ea9..6ad2fe9 100644 (file)
@@ -7,4 +7,4 @@ from search.forms import SearchForm
 
 
 def search_form(request):
-    return {'search_form': SearchForm(reverse('search.views.hint'), request.GET)}
+    return {'search_form': SearchForm(reverse('search.views.hint')+'?max=10', request.GET)}
index dfface9..da21e01 100644 (file)
@@ -184,5 +184,6 @@ class CustomSolrInterface(sunburnt.SolrInterface):
             off = -start
             snip = snip[:e + off] + mark[1] + snip[e + off:]
             snip = snip[:s + off] + mark[0] + snip[s + off:]
+        snip = re.sub('%s[ \t\n]+%s' % (mark[1], mark[0]), " ", snip)
 
         return snip
index 7dfe6ef..ce60978 100644 (file)
@@ -240,7 +240,8 @@ class Index(SolrIndex):
             self.remove_book(book, remove_snippets=False)
 
         book_doc = self.create_book_doc(book)
-        meta_fields = self.extract_metadata(book, book_info, dc_only=['source_name', 'authors', 'translators', 'title'])
+        meta_fields = self.extract_metadata(book, book_info, dc_only=[
+            'source_name', 'authors', 'translators', 'title', 'epochs', 'kinds', 'genres'])
         # let's not index it - it's only used for extracting publish date
         if 'source_name' in meta_fields:
             del meta_fields['source_name']
@@ -257,8 +258,9 @@ class Index(SolrIndex):
             'published_date': meta_fields['published_date']
             }
 
-        if 'translators' in meta_fields:
-            book_fields['translators'] = meta_fields['translators']
+        for tag_name in ('translators', 'epochs', 'kinds', 'genres'):
+            if tag_name in meta_fields:
+                book_fields[tag_name] = meta_fields[tag_name]
 
         self.index_content(book, book_fields=book_fields)
 
@@ -272,7 +274,7 @@ class Index(SolrIndex):
         ]
 
     ignore_content_tags = [
-        'uwaga', 'extra',
+        'uwaga', 'extra', 'nota_red',
         'zastepnik_tekstu', 'sekcja_asterysk', 'separator_linia', 'zastepnik_wersu',
         'didaskalia',
         'naglowek_aktu', 'naglowek_sceny', 'naglowek_czesc',
@@ -365,8 +367,8 @@ class Index(SolrIndex):
         if master is None:
             return []
 
-        def walker(node, ignore_tags=()):
-            if node.tag not in ignore_tags:
+        def walker(node):
+            if node.tag not in self.ignore_content_tags:
                 yield node, None, None
                 if node.text is not None:
                     yield None, node.text, None
@@ -417,17 +419,10 @@ class Index(SolrIndex):
 
             if 'themes' in fields:
                 doc['themes'] = fields['themes']
-            doc['uid'] = "part%s%s%s" % (doc['header_index'],
-                                         doc['header_span'],
-                                         doc.get('fragment_anchor', ''))
+            doc['uid'] = "part%s-%s-%s-%s" % (
+                book.id, doc['header_index'], doc['header_span'], doc.get('fragment_anchor', ''))
             return doc
 
-        def give_me_utf8(s):
-            if isinstance(s, unicode):
-                return s.encode('utf-8')
-            else:
-                return s
-
         fragments = {}
         snippets = Snippets(book.id).open('w')
         try:
@@ -448,7 +443,7 @@ class Index(SolrIndex):
                     content.append(text)
                 handle_text = [all_content]
 
-                for start, text, end in walker(header, ignore_tags=self.ignore_content_tags):
+                for start, text, end in walker(header):
                     # handle footnotes
                     if start is not None and start.tag in self.footnote_tags:
                         footnote = []
@@ -515,8 +510,7 @@ class Index(SolrIndex):
 
 
 class SearchResult(object):
-    def __init__(self, doc, how_found=None, query=None, query_terms=None):
-        #        self.search = search
+    def __init__(self, doc, how_found=None, query_terms=None):
         self.boost = 1.0
         self._hits = []
         self._processed_hits = None  # processed hits
@@ -734,71 +728,23 @@ class Search(SolrIndex):
 
         return q
 
-    def search_phrase(self, searched, field='text', book=False,
-                      filters=None,
-                      snippets=False):
-        if filters is None:
-            filters = []
-        if book:
-            filters.append(self.index.Q(is_book=True))
-
-        q = self.index.query(**{field: searched})
-        q = self.apply_filters(q, filters).field_limit(score=True, all_fields=True)
-        res = q.execute()
-        return [SearchResult(found, how_found=u'search_phrase') for found in res]
-
-    def search_some(self, searched, fields, book=True,
-                    filters=None, snippets=True, query_terms=None):
-        assert isinstance(fields, list)
-        if filters is None:
-            filters = []
+    def search_words(self, words, fields, book=True):
+        filters = []
+        for word in words:
+            word_filter = None
+            for field in fields:
+                q = self.index.Q(**{field: word})
+                if word_filter is None:
+                    word_filter = q
+                else:
+                    word_filter |= q
+            filters.append(word_filter)
         if book:
-            filters.append(self.index.Q(is_book=True))
-
-        query = self.index.Q()
-
-        for fld in fields:
-            query = self.index.Q(query | self.make_term_query(searched, fld))
-
-        query = self.index.query(query)
+            query = self.index.query(is_book=True)
+        else:
+            query = self.index.query()
         query = self.apply_filters(query, filters).field_limit(score=True, all_fields=True)
-        res = query.execute()
-        return [SearchResult(found, how_found='search_some', query_terms=query_terms) for found in res]
-
-    def search_everywhere(self, searched, query_terms=None):
-        """
-        Tries to use search terms to match different fields of book (or its parts).
-        E.g. one word can be an author survey, another be a part of the title, and the rest
-        are some words from third chapter.
-        """
-        books = []
-        # content only query : themes x content
-        q = self.make_term_query(searched, 'text')
-        q_themes = self.make_term_query(searched, 'themes_pl')
-
-        query = self.index.query(q).query(q_themes).field_limit(score=True, all_fields=True)
-        res = query.execute()
-
-        for found in res:
-            books.append(SearchResult(found, how_found='search_everywhere_themesXcontent', query_terms=query_terms))
-
-        # query themes/content x author/title/tags
-        in_content = self.index.Q()
-        in_meta = self.index.Q()
-
-        for fld in ['themes_pl', 'text']:
-            in_content |= self.make_term_query(searched, field=fld)
-
-        for fld in ['tags', 'authors', 'title']:
-            in_meta |= self.make_term_query(searched, field=fld)
-
-        q = in_content & in_meta
-        res = self.index.query(q).field_limit(score=True, all_fields=True).execute()
-
-        for found in res:
-            books.append(SearchResult(found, how_found='search_everywhere', query_terms=query_terms))
-
-        return books
+        return [SearchResult(found, how_found='search_words') for found in query.execute()]
 
     def get_snippets(self, searchresult, query, field='text', num=1):
         """
@@ -821,9 +767,10 @@ class Search(SolrIndex):
                 text = snippets.get((int(position),
                                      int(length)))
                 snip = self.index.highlight(text=text, field=field, q=query)
-                snips[idx] = snip
-                if snip:
-                    num -= 1
+                if snip not in snips:
+                    snips[idx] = snip
+                    if snip:
+                        num -= 1
                 idx += 1
 
         except IOError, e:
@@ -879,13 +826,9 @@ class Search(SolrIndex):
                 if is_pdcounter:
                     if category == 'pd_author':
                         tag = PDCounterAuthor.objects.get(id=doc.get('tag_id'))
-                    elif category == 'pd_book':
+                    else:  # category == 'pd_book':
                         tag = PDCounterBook.objects.get(id=doc.get('tag_id'))
                         tag.category = 'pd_book'  # make it look more lik a tag.
-                    else:
-                        # WTF
-                        print ("Warning. cannot get pdcounter tag_id=%d from db; cat=%s" % (
-                            int(doc.get('tag_id')), category)).encode('utf-8')
                     pd_tags.append(tag)
                 else:
                     tag = catalogue.models.Tag.objects.get(id=doc.get("tag_id"))
@@ -915,8 +858,10 @@ class Search(SolrIndex):
         query = query.strip()
         if prefix:
             q |= self.index.Q(title=query + "*")
+            q |= self.index.Q(title_orig=query + "*")
         else:
             q |= self.make_term_query(query, field='title')
+            q |= self.make_term_query(query, field='title_orig')
         qu = self.index.query(q)
         only_books = self.index.Q(is_book=True)
         return self.search_books(qu, [only_books])
index b8cb49c..da4574f 100755 (executable)
@@ -51,6 +51,8 @@ class Command(BaseCommand):
                     help='book id instead of slugs'),
         make_option('-t', '--just-tags', action='store_true', dest='just_tags', default=False,
                     help='just reindex tags'),
+        make_option('--start', dest='start_from', default=None, help='start from this slug'),
+        make_option('--stop', dest='stop_after', default=None, help='stop after this slug'),
     )
 
     def handle(self, *args, **opts):
@@ -67,14 +69,23 @@ class Command(BaseCommand):
                     else:
                         books += Book.objects.filter(slug=a).all()
             else:
-                books = list(Book.objects.all())
-
+                books = list(Book.objects.order_by('slug'))
+            start_from = opts.get('start_from')
+            stop_after = opts.get('stop_after')
+            if start_from:
+                start_from = start_from.replace('-', '')
+            if stop_after:
+                stop_after = stop_after.replace('-', '')
             while books:
                 try:
                     b = books[0]
-                    print b.title
-                    idx.index_book(b)
-                    idx.index.commit()
+                    slug = b.slug.replace('-', '')
+                    if stop_after and slug > stop_after:
+                        break
+                    if not start_from or slug >= start_from:
+                        print b.slug
+                        idx.index_book(b)
+                        idx.index.commit()
                     books.pop(0)
                 except:
                     traceback.print_exc()
index b1c8162..3bd7b93 100644 (file)
@@ -21,12 +21,10 @@ class Search(Mock):
     index = MockIndex()
 
     @staticmethod
-    def _find_some_books(snippets=False, query_terms=None, max_results=20):
+    def _find_some_books(query_terms=None, max_results=20):
         from .index import SearchResult
 
         qs = Book.objects.order_by('?')
-        if snippets:
-            qs = qs.exclude(fragments=None)
         results = []
         for book in qs[:randint(1, max_results)]:
             doc = {
@@ -34,33 +32,13 @@ class Search(Mock):
                 'book_id': book.pk,
                 'published_date': randint(1000, 1920),
                 }
-            if snippets:
-                fragment = book.fragments.order_by('?')[0]
-                doc.update({
-                    'header_type': choice(['strofa', 'akap']),
-                    'header_index': randint(100, 200),
-                    'header_span': randint(100, 200),
-                    'fragment_anchor': fragment.anchor,
-                    'snippets_position': randint(100, 200),
-                    'snippets_length': randint(100, 200),
-                    'snippets_revision': randint(1, 100),
-                    'themes_pl': fragment.tags.filter(category='theme'),
-                })
             res = SearchResult(doc, how_found='mock', query_terms=query_terms)
-            if snippets:
-                res.snippets = [fragment.short_text]
             results.append(res)
         return results
 
-    def search_phrase(self, searched, field='text', book=False, filters=None, snippets=False):
-        return self._find_some_books(snippets)
-
-    def search_some(self, searched, fields, book=True, filters=None, snippets=True, query_terms=None):
-        return self._find_some_books(snippets, query_terms)
-
     # WTF
     def search_books(self, query, filters=None, max_results=10):
-        return self._find_some_books(snippets, max_results=max_results)
+        return self._find_some_books(max_results=max_results)
 
     def search_everywhere(self, searched, query_terms=None):
         return []
index c135b80..ea8d4ed 100644 (file)
@@ -22,17 +22,9 @@ register = template.Library()
 def book_searched(context, result):
     book = Book.objects.get(pk=result.book_id)
 
-    # snippets = []
-    # for hit in result.hits:
-    #     if hit['snippets']:
-    #         snippets.append(hit['snippets'])
-    #     elif hit['fragment']:
-    #         snippets.append(hit['fragment'].short_text)
-
     # We don't need hits which lead to sections but do not have
     # snippets.
     hits = filter(lambda (idx, h):
-                  'fragment' in h or
                   result.snippets[idx] is not None,
                   enumerate(result.hits))
     # print "[tmpl: from %d hits selected %d]" % (len(result.hits), len(hits))
@@ -45,8 +37,8 @@ def book_searched(context, result):
             continue
         snip = result.snippets[idx]
         # fix some formattting
-        snip = re.subn(r"(^[ \t\n]+|[ \t\n]+$)", u"",
-                       re.subn(r"[ \t\n]*\n[ \t\n]*", u"\n", snip)[0])[0]
+        snip = re.sub(r"[ \t\n]*\n[ \t\n]*", u"\n", snip)
+        snip = re.sub(r"(^[ \t\n]+|[ \t\n]+$)", u"", snip)
 
         snip = snip.replace("\n", "<br />").replace('---', '&mdash;')
         hit['snippet'] = snip
@@ -54,5 +46,5 @@ def book_searched(context, result):
     return {
         'request': context['request'],
         'book': book,
-        'hits':  hits and zip(*hits)[1] or []
+        'hits':  zip(*hits)[1] if hits else []
     }
index 5b65a30..70a216e 100644 (file)
@@ -10,7 +10,7 @@ from django.http import HttpResponse, JsonResponse
 from django.utils.translation import ugettext as _
 
 from catalogue.utils import split_tags
-from catalogue.models import Book
+from catalogue.models import Book, Tag
 from pdcounter.models import Author as PDCounterAuthor, BookStub as PDCounterBook
 from search.index import Search, SearchResult
 from suggest.forms import PublishingSuggestForm
@@ -66,26 +66,6 @@ def hint(request):
 
     prefix = remove_query_syntax_chars(prefix)
 
-    search = Search()
-    # tagi beda ograniczac tutaj
-    # ale tagi moga byc na ksiazce i na fragmentach
-    # jezeli tagi dot tylko ksiazki, to wazne zeby te nowe byly w tej samej ksiazce
-    # jesli zas dotycza themes, to wazne, zeby byly w tym samym fragmencie.
-
-    def is_dupe(tag):
-        if isinstance(tag, PDCounterAuthor):
-            if filter(lambda t: t.slug == tag.slug and t != tag, tags):
-                return True
-        elif isinstance(tag, PDCounterBook):
-            if filter(lambda b: b.slug == tag.slug, tags):
-                return True
-        return False
-
-    def category_name(c):
-        if c.startswith('pd_'):
-            c = c[len('pd_'):]
-        return _(c)
-
     try:
         limit = int(request.GET.get('max', ''))
     except ValueError:
@@ -94,33 +74,25 @@ def hint(request):
         if limit < 1:
             limit = -1
 
-    data = []
-
-    tags = search.hint_tags(prefix, pdcounter=True)
-    tags = filter(lambda t: not is_dupe(t), tags)
-    for t in tags:
-        if not limit:
-            break
-        limit -= 1
-        data.append({
-            'label': t.name,
-            'category': category_name(t.category),
-            'id': t.id,
-            'url': t.get_absolute_url()
-            })
-    if limit:
-        books = search.hint_books(prefix)
-        for b in books:
-            if not limit:
-                break
-            limit -= 1
-            data.append({
-                'label': b.title,
+    data = [
+        {
+            'label': author.name,
+            'category': _('author'),
+            'id': author.id,
+            'url': author.get_absolute_url(),
+        }
+        for author in Tag.objects.filter(category='author', name__iregex='\m' + prefix)[:10]
+    ]
+    if len(data) < limit:
+        data += [
+            {
+                'label': '<cite>%s</cite>, %s' % (b.title, b.author_unicode()),
                 'category': _('book'),
                 'id': b.id,
                 'url': b.get_absolute_url()
-                })
-
+            }
+            for b in Book.objects.filter(title__iregex='\m' + prefix)[:limit-len(data)]
+        ]
     callback = request.GET.get('callback', None)
     if callback:
         return HttpResponse("%s(%s);" % (callback, json.dumps(data)),
@@ -146,67 +118,42 @@ def main(request):
             'catalogue/search_too_long.html', {'prefix': query}, context_instance=RequestContext(request))
 
     query = remove_query_syntax_chars(query)
-    
-    search = Search()
 
-    theme_terms = search.index.analyze(text=query, field="themes_pl") \
-        + search.index.analyze(text=query, field="themes")
+    words = query.split()
+    if len(words) > 10:
+        query = ' '.join(words[:10])
+
+    search = Search()
 
-    # change hints
     tags = search.hint_tags(query, pdcounter=True, prefix=False)
     tags = split_tags(tags)
 
-    author_results = search.search_phrase(query, 'authors', book=True)
-    translator_results = search.search_phrase(query, 'translators', book=True)
-
-    title_results = search.search_phrase(query, 'title', book=True)
-
-    # Boost main author/title results with mixed search, and save some of its results for end of list.
-    # boost author, title results
-    author_title_mixed = search.search_some(query, ['authors', 'translators', 'title', 'tags'], query_terms=theme_terms)
-    author_title_rest = []
-
-    for b in author_title_mixed:
-        also_in_mixed = filter(lambda ba: ba.book_id == b.book_id, author_results + translator_results + title_results)
-        for b2 in also_in_mixed:
-            b2.boost *= 1.1
-        if also_in_mixed is []:
-            author_title_rest.append(b)
-
-    # Do a phrase search but a term search as well - this can give us better snippets then search_everywhere,
-    # Because the query is using only one field.
-    text_phrase = SearchResult.aggregate(
-        search.search_phrase(query, 'text', snippets=True, book=False),
-        search.search_some(query, ['text'], snippets=True, book=False, query_terms=theme_terms))
-
-    everywhere = search.search_everywhere(query, query_terms=theme_terms)
-
-    def already_found(results):
-        def f(e):
-            for r in results:
-                if e.book_id == r.book_id:
-                    e.boost = 0.9
-                    results.append(e)
-                    return True
-            return False
-        return f
-    f = already_found(author_results + translator_results + title_results + text_phrase)
-    everywhere = filter(lambda x: not f(x), everywhere)
-
-    author_results = SearchResult.aggregate(author_results)
-    translator_results = SearchResult.aggregate(translator_results)
-    title_results = SearchResult.aggregate(title_results)
-
-    everywhere = SearchResult.aggregate(everywhere, author_title_rest)
-
-    for field, res in [('authors', author_results),
-                       ('translators', translator_results),
-                       ('title', title_results),
-                       ('text', text_phrase),
-                       ('text', everywhere)]:
-        res.sort(reverse=True)
-        for r in res:
-            search.get_snippets(r, query, field, 3)
+    results_parts = []
+
+    search_fields = []
+    fieldsets = (
+        (['authors'], True),
+        (['title'], True),
+        (['metadata'], True),
+        (['text', 'themes_pl'], False),
+    )
+    for fieldset, is_book in fieldsets:
+        search_fields += fieldset
+        results_parts.append(search.search_words(words, search_fields, book=is_book))
+
+    results = []
+    ids_results = {}
+    for results_part in results_parts:
+        for result in sorted(SearchResult.aggregate(results_part), reverse=True):
+            book_id = result.book_id
+            if book_id in ids_results:
+                ids_results[book_id].merge(result)
+            else:
+                results.append(result)
+                ids_results[book_id] = result
+
+    for result in results:
+        search.get_snippets(result, query, num=3)
 
     suggestion = u''
 
@@ -216,26 +163,9 @@ def main(request):
         except Book.DoesNotExist:
             return False
 
-    author_results = filter(ensure_exists, author_results)
-    translator_results = filter(ensure_exists, translator_results)
-    title_results = filter(ensure_exists, title_results)
-    text_phrase = filter(ensure_exists, text_phrase)
-    everywhere = filter(ensure_exists, everywhere)
-
-    results = author_results + translator_results + title_results + text_phrase + everywhere
-    # ensure books do exists & sort them
-    for res in (author_results, translator_results, title_results, text_phrase, everywhere):
-        res.sort(reverse=True)
-
-    # We don't want to redirect to book text, but rather display result page even with one result.
-    # if len(results) == 1:
-    #     fragment_hits = filter(lambda h: 'fragment' in h, results[0].hits)
-    #     if len(fragment_hits) == 1:
-    #         #anchor = fragment_hits[0]['fragment']
-    #         #frag = Fragment.objects.get(anchor=anchor)
-    #         return HttpResponseRedirect(fragment_hits[0]['fragment'].get_absolute_url())
-    #     return HttpResponseRedirect(results[0].book.get_absolute_url())
-    if len(results) == 0:
+    results = filter(ensure_exists, results)
+
+    if not results:
         form = PublishingSuggestForm(initial={"books": query + ", "})
         return render_to_response(
             'catalogue/search_no_hits.html',
@@ -250,15 +180,9 @@ def main(request):
     return render_to_response(
         'catalogue/search_multiple_hits.html',
         {
-            'tags': tags,
+            'tags': tags['author'] + tags['kind'] + tags['genre'] + tags['epoch'] + tags['theme'],
             'prefix': query,
-            'results': {
-                'author': author_results,
-                'translator': translator_results,
-                'title': title_results,
-                'content': text_phrase,
-                'other': everywhere
-            },
+            'results': results,
             'did_you_mean': suggestion
         },
         context_instance=RequestContext(request))
index 4d001e0..786a05c 100644 (file)
@@ -33,8 +33,7 @@ var __bind = function (self, fn) {
    
        render_item: function (ul, item) {
            return $("<li></li>").data('item.autocomplete', item)
-               .append('<a href="'+this.options.host+item.url+'"><span class="search-hint-label">'+item.label+'</span>'+
-                       '<span class="search-hint-category mono">'+item.category+'</span></a>')
+               .append('<a href="'+this.options.host+item.url+'"><span class="search-hint-label">'+item.label+'</span>')
                .appendTo(ul);
        }, 
 
index 44a5955..0e90611 100755 (executable)
   }
 }
 
-.audiobook-box {
+.audiobook-box, .search-result .book-box {
   .book-left-column {
     @media screen and (min-width: 1024px) {
       display: inline-block;
     }
   }
 
-  .audiobook-right-column {
+  .book-right-column {
     @media screen and (min-width: 1024px) {
       float: right;
+      @include size(padding-top, 15px);
       @include size(width, 360px);
     }
   }
 
     @media screen and (min-width: 1024px) {
       float: right;
-      margin-top: 48px;
     }
   }
 }
index d8a1e06..ad151d8 100755 (executable)
@@ -26,8 +26,7 @@
     @include size(margin-bottom, 16px);
 }
 
-
-.search-result {
+/*.search-result {
     @include size(border, 1px solid #ddd);
     @include size(box-shadow, 2px 2px 2px #ddd);
     @include size(margin, 1px);
@@ -55,7 +54,7 @@
             margin: 0;
         }
     }
-}
+}*/
 
 
 
 
 .snippets {
     @media screen and (min-width: 62.5em) {
-        @include size(width, 440px);
+        @include size(width, 360px);
         float: right;
     }
 
     .snippet-text {
-        @include size(font-size, 12px);
+        @include size(font-size, 16px);
         @include size(margin, 13px 0);
         @include size(padding, 12px);
         background: #f7f7f7;
 
 .search-hint-label {
     display: inline-block;
-    @include size(font-size, 11px);
-    @include size(width, 275px);
-    line-height: 1.636em;
-}
-
-.search-hint-category {
-    @include size(font-size, 11px);
+    @include size(font-size, 16px);
+    //@include size(width, 275px);
     line-height: 1.636em;
 }
-