Merge branch 'api'

author Jan Szejko <janek37@gmail.com>

Thu, 14 Dec 2017 08:43:33 +0000 (09:43 +0100)

committer Jan Szejko <janek37@gmail.com>

Thu, 14 Dec 2017 08:43:33 +0000 (09:43 +0100)
author Jan Szejko <janek37@gmail.com>
Thu, 14 Dec 2017 08:43:33 +0000 (09:43 +0100)
committer Jan Szejko <janek37@gmail.com>
Thu, 14 Dec 2017 08:43:33 +0000 (09:43 +0100)
diff --git a/doc/schema.xml b/doc/schema.xml

index a202781..56172d3 100644 (file)
--- a/doc/schema.xml
+++ b/doc/schema.xml
@@ -135,10 +135,16 @@
     <field name="authors" type="text_general" stored="false" indexed="true" multiValued="true" termPositions="true" termVectors="true"/>
     <field name="translators" type="text_general" stored="false" indexed="true" multiValued="true" termPositions="true" termVectors="true" />
     <field name="title" type="text_pl" stored="false" indexed="true"/>
-   <field name="title_orig" type="text_general" stored="false" indexed="true"/>
+   <field name="title_orig" type="lowercase" stored="false" indexed="true"/>
  <!--   <field name="published_date" type="tdate" stored="false" indexed="true"/>-->
     <field name="published_date" type="string" stored="true" indexed="true"/>
  
+   <field name="epochs" type="lowercase" stored="false" indexed="false" multiValued="true" />
+   <field name="kinds" type="lowercase" stored="false" indexed="false" multiValued="true" />
+   <field name="genres" type="lowercase" stored="false" indexed="false" multiValued="true" />
+
+   <field name="metadata" type="text_pl" stored="false" indexed="true" multiValued="true" termPositions="true" termVectors="true" />
+
     <field name="themes" type="lowercase" stored="true" indexed="true" termVectors="true" termPositions="true" multiValued="true" />
     <field name="themes_pl" type="text_pl" stored="true" indexed="true" termVectors="true" termPositions="true" multiValued="true" />
     <field name="header_index" type="int" stored="true" indexed="true"/>
@@ -266,6 +272,12 @@
  
    <copyField source="themes" dest="themes_pl"/>
    <copyField source="tag_name" dest="tag_name_pl"/>
+  <copyField source="title" dest="title_orig"/>
+
+  <copyField source="translators" dest="metadata"/>
+  <copyField source="epochs" dest="metadata"/>
+  <copyField source="kinds" dest="metadata"/>
+  <copyField source="genres" dest="metadata"/>
  
  <!--
     <copyField source="cat" dest="text"/>
diff --git a/src/catalogue/models/book.py b/src/catalogue/models/book.py

index 00921b8..140ba50 100644 (file)
--- a/src/catalogue/models/book.py
+++ b/src/catalogue/models/book.py
@@ -21,7 +21,7 @@ from newtagging import managers
  from catalogue import constants
  from catalogue.fields import EbookField
  from catalogue.models import Tag, Fragment, BookMedia
-from catalogue.utils import create_zip, gallery_url, gallery_path
+from catalogue.utils import create_zip, gallery_url, gallery_path, split_tags
  from catalogue.models.tag import prefetched_relations
  from catalogue import app_settings
  from catalogue import tasks
@@ -55,7 +55,7 @@ class Book(models.Model):
      language = models.CharField(_('language code'), max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
      description = models.TextField(_('description'), blank=True)
      created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
-    changed_at = models.DateTimeField(_('creation date'), auto_now=True, db_index=True)
+    changed_at = models.DateTimeField(_('change date'), auto_now=True, db_index=True)
      parent_number = models.IntegerField(_('parent number'), default=0)
      extra_info = jsonfield.JSONField(_('extra information'), default={})
      gazeta_link = models.CharField(blank=True, max_length=240)
@@ -122,6 +122,9 @@ class Book(models.Model):
          else:
              return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
  
+    def tags_by_category(self):
+        return split_tags(self.tags.exclude(category__in=('set', 'theme')))
+
      def author_unicode(self):
          return self.cached_author
  
@@ -234,6 +237,33 @@ class Book(models.Model):
      has_daisy_file.short_description = 'DAISY'
      has_daisy_file.boolean = True
  
+    def get_audiobooks(self):
+        ogg_files = {}
+        for m in self.media.filter(type='ogg').order_by().iterator():
+            ogg_files[m.name] = m
+
+        audiobooks = []
+        projects = set()
+        for mp3 in self.media.filter(type='mp3').iterator():
+            # ogg files are always from the same project
+            meta = mp3.extra_info
+            project = meta.get('project')
+            if not project:
+                # temporary fallback
+                project = u'CzytamySłuchając'
+
+            projects.add((project, meta.get('funded_by', '')))
+
+            media = {'mp3': mp3}
+
+            ogg = ogg_files.get(mp3.name)
+            if ogg:
+                media['ogg'] = ogg
+            audiobooks.append(media)
+
+        projects = sorted(projects)
+        return audiobooks, projects
+
      def wldocument(self, parse_dublincore=True, inherit=True):
          from catalogue.import_utils import ORMDocProvider
          from librarian.parser import WLDocument
diff --git a/src/catalogue/templates/catalogue/book_searched.html b/src/catalogue/templates/catalogue/book_searched.html

index 26b9f61..4e91432 100644 (file)
--- a/src/catalogue/templates/catalogue/book_searched.html
+++ b/src/catalogue/templates/catalogue/book_searched.html
@@ -1,11 +1,8 @@
-{% spaceless %}
-  {% load i18n %}
-  {% load inline_tag_list from catalogue_tags %}
-  {% load ssi_include from ssify %}
-
-  <div class="search-result">
-    {% ssi_include 'catalogue_book_short' pk=book.pk %}
+{% extends "catalogue/book_short.html" %}
+{% load inline_tag_list from catalogue_tags %}
  
+{% block right-column %}
+  <div class="book-right-column">
      <div class="snippets">
        {% for hit in hits %}
          {% if hit.snippet %}
@@ -18,17 +15,12 @@
                {% inline_tag_list hit.themes_hit  %}
              {% endif %}
              <a href="{{hit.fragment.get_absolute_url}}">
-              {% if hit.snippet %}
-                {{hit.snippet|safe}}
-              {% else %}
-                {{hit.fragment.text|truncatewords_html:15|safe}}
-              {% endif %}
+              {{hit.snippet|safe}}
              </a>
            </div>
          {% endif %}
        {% endfor %}
      </div>
-
-    <div style="clear: right"></div>
+    {% include 'catalogue/snippets/jplayer.html' %}
    </div>
-{% endspaceless %}
-\ No newline at end of file
+{% endblock %}
diff --git a/src/catalogue/templates/catalogue/book_short.html b/src/catalogue/templates/catalogue/book_short.html

index edd9f6b..70aaed3 100644 (file)
--- a/src/catalogue/templates/catalogue/book_short.html
+++ b/src/catalogue/templates/catalogue/book_short.html
@@ -4,6 +4,8 @@
    {% load book_shelf_tags from social_tags %}
    {% load static %}
  
+  {% with ga=book.get_audiobooks %}
+  {% with audiobooks=ga.0 %}
    <div class="{% block box-class %}book-box{% if audiobooks %} audiobook-box{% endif %}{% endblock %}">
      <div class="book-box-inner">
  
@@ -11,6 +13,7 @@
        {% include "catalogue/snippets/like_button.html" %}
      {% endblock %}
  
+    {% with book.tags_by_category as tags %}
      <div class="book-left-column">
        <div class="book-box-body">
          {% block book-box-body-pre %}
@@ -20,11 +23,11 @@
            <div class="author">
              {% for tag in tags.author %}
                <a href="{{ tag.get_absolute_url }}">{{ tag.name }}</a>{% if not forloop.last %},
-            {% endif %}{% endfor %}{% for parent in parents %},
+            {% endif %}{% endfor %}{% for parent in book.parents %},
                <a href="{{ parent.get_absolute_url }}">{{ parent.title }}</a>{% endfor %}
            </div>
            <div class="title">
-            {% if main_link %}<a href="{{ main_link }}">{% endif %}{{ book.title }}{% if main_link %}</a>{% endif %}
+            <a href="{{ book.get_absolute_url }}">{{ book.title }}</a>
            </div>
            {% if book.translator %}
                <div class="author">
@@ -35,9 +38,9 @@
  
          <div class="cover-area">
            {% if book.cover_thumb %}
-            {% if main_link %}<a href="{{ main_link }}">{% endif %}
+            <a href="{{ book.get_absolute_url }}">
                <img src="{{ book.cover_thumb.url }}" alt="Cover" class="cover" />
-            {% if main_link %}</a>{% endif %}
+            </a>
            {% endif %}
            {% block cover-area-extra %}{% endblock %}
          </div>
@@ -67,7 +70,7 @@
              {% endfor %}
            </span></span>
  
-          {% if show_lang %}
+          {% if book.is_foreign %}
              <span class="category">
                <span class="mono"> {% trans "Language" %}:</span>&nbsp;<span class="book-box-tag">
                  <a>{{ book.language_name }}</a>
@@ -75,12 +78,14 @@
              </span>
            {% endif %}
  
-          {% if stage_note %}
+          {% with stage_note=book.stage_note %}
+          {% if stage_note.0 %}
              <br>
              <span class="category">
-              <a{% if stage_note_url %} href="{{ stage_note_url }}"{% endif %}>{{ stage_note }}</a>
+              <a{% if stage_note.1 %} href="{{ stage_note.1 }}"{% endif %}>{{ stage_note.0 }}</a>
              </span>
            {% endif %}
+          {% endwith %}
          </div>
        </div>
        {% book_shelf_tags book.pk %}
@@ -122,10 +127,11 @@
        {% block box-append %}
        {% endblock %}
      </div>
+    {% endwith %}
  
      {% block right-column %}
        {% if audiobooks %}
-        <div class="audiobook-right-column">
+        <div class="book-right-column">
            {% include 'catalogue/snippets/jplayer.html' %}
          </div>
        {% endif %}
@@ -134,4 +140,6 @@
      <div class="clearboth"></div>
      </div>
    </div>
+  {% endwith %}
+  {% endwith %}
  {% endspaceless %}
diff --git a/src/catalogue/templates/catalogue/book_text.html b/src/catalogue/templates/catalogue/book_text.html

index 629cb5b..93e755f 100644 (file)
--- a/src/catalogue/templates/catalogue/book_text.html
+++ b/src/catalogue/templates/catalogue/book_text.html
@@ -110,6 +110,8 @@
    </div>
  
    <div class="box" id="book-short">
-    {% ssi_include 'catalogue_book_short' pk=book.pk %}
+    {% cache 86400 catalogue_book_short book.pk %}
+      {% include 'catalogue/book_short.html' %}
+    {% endcache %}
    </div>
  {% endblock footer %}
diff --git a/src/catalogue/templates/catalogue/search_multiple_hits.html b/src/catalogue/templates/catalogue/search_multiple_hits.html

index 5ee0f41..ce43bf9 100644 (file)
--- a/src/catalogue/templates/catalogue/search_multiple_hits.html
+++ b/src/catalogue/templates/catalogue/search_multiple_hits.html
@@ -14,42 +14,10 @@
      <span class="did_you_mean">{% trans "Did you mean" %}
        <a href="{% url 'search' %}?q={{did_you_mean|urlencode}}">{{did_you_mean|lower}}</a>?</span>
    {% endif %}
-  <!-- tu pójdą trafienia w tagi: Autorzy - z description oraz motywy i rodzaje -->
    <div class="inline-tag-lists top-tag-list">
-    {% if tags.author %}
+    {% if tags %}
        <div>
-        <h2>{% trans "Authors" %}:</h2>
-        {% for tag in tags.author %}
-          <a class="tag-box" href="{{ tag.get_absolute_url }}">
-            {% include "catalogue/tag_box.html" %}
-          </a>
-        {% endfor %}
-      </div>
-    {% endif %}
-    {% if tags.kind %}
-      <div>
-        <h2>{% trans "Kinds" %}:</h2>
-        {% for tag in tags.kind %}
-          <a class="tag-box" href="{{ tag.get_absolute_url }}">
-            {% include "catalogue/tag_box.html" %}
-          </a>
-        {% endfor %}
-      </div>
-    {% endif %}
-    {% if tags.genre %}
-      <div>
-        <h2>{% trans "Genres" %}:</h2>
-        {% for tag in tags.genre %}
-          <a class="tag-box" href="{{ tag.get_absolute_url }}">
-            {% include "catalogue/tag_box.html" %}
-          </a>
-        {% endfor %}
-      </div>
-    {% endif %}
-    {% if tags.epoch %}
-      <div class="inline-tag-list">
-        <h2>{% trans "Epochs" %}:</h2>
-        {% for tag in tags.epoch %}
+        {% for tag in tags %}
            <a class="tag-box" href="{{ tag.get_absolute_url }}">
              {% include "catalogue/tag_box.html" %}
            </a>
@@ -58,84 +26,15 @@
      {% endif %}
    </div>
  
-  {% if results.title %}
-    <div class="book-list-header">
-      <div class="book-box-inner">
-        <p>{% trans "Results by title" %}</p>
-      </div>
-    </div>
-    <div>
-      <ol class="work-list">
-        {% for result in results.title %}
-          <li class="Book-item">
-            {% ssi_include 'catalogue_book_short' pk=result.book.pk %}
-          </li>
-        {% endfor %}
-      </ol>
-    </div>
-  {% endif %}
-
-  {% if results.author %}
-    <div class="book-list-header">
-      <div class="book-box-inner">
-        <p>{% trans "Results by authors" %}</p>
-      </div>
-    </div>
-    <div>
-      <ol class="work-list">
-        {% for author in results.author %}
-          <li class="Book-item">{% ssi_include 'catalogue_book_short' pk=author.book.pk %}</li>
-        {% endfor %}
-      </ol>
-    </div>
-  {% endif %}
-
-  {% if results.translator %}
-    <div class="book-list-header">
-      <div class="book-box-inner">
-        <p>{% trans "Results by translators" %}</p>
-      </div>
-    </div>
-    <div>
-      <ol class="work-list">
-        {% for translator in results.translator %}
-          <li class="Book-item">{% ssi_include 'catalogue_book_short' pk=translator.book.pk %}</li>
-        {% endfor %}
-      </ol>
-    </div>
-  {% endif %}
-
-  {% if results.content %}
-  <div class="book-list-header">
-    <div class="book-box-inner">
-      <p>{% trans "Results in text" %}</p>
-    </div>
-  </div>
    <div>
-    <ol class="work-list">
-      {% for result in results.content %}
+    <ul class="work-list">
+      {% for result in results %}
          <li class="Book-item">
-          {% book_searched result %}
+          <div class="search-result">
+            {% book_searched result %}
+          </div>
          </li>
        {% endfor %}
-    </ol>
+    </ul>
    </div>
-  {% endif %}
-
-  {% if results.other %}
-    <div class="book-list-header">
-      <div class="book-box-inner">
-        <p>{% trans "Other results" %}</p>
-      </div>
-    </div>
-    <div>
-      <ol class="work-list">
-        {% for result in results.other %}
-          <li class="Book-item">
-            {% book_searched result %}
-          </li>
-        {% endfor %}
-      </ol>
-    </div>
-  {% endif %}
  {% endblock %}
diff --git a/src/catalogue/templates/catalogue/snippets/jplayer.html b/src/catalogue/templates/catalogue/snippets/jplayer.html

index 860c2b6..e3c88c1 100644 (file)
--- a/src/catalogue/templates/catalogue/snippets/jplayer.html
+++ b/src/catalogue/templates/catalogue/snippets/jplayer.html
@@ -2,7 +2,7 @@
  {% if audiobooks %}
    <div class="jp-type-playlist">
      <div id="jplayer" class="jp-jplayer" data-player="jp_container_{{ book.pk }}"
-         data-supplied="{% if have_oggs %}oga,{% endif %}mp3"></div>
+         data-supplied="oga,mp3"></div>
      <div id="jp_container_{{ book.pk }}" class="jp-audio">
        <div class="jp-type-single">
          <span class="title"></span>
diff --git a/src/catalogue/views.py b/src/catalogue/views.py

index 267d9b4..a247746 100644 (file)
--- a/src/catalogue/views.py
+++ b/src/catalogue/views.py
@@ -286,44 +286,13 @@ def book_detail(request, slug):
      }, context_instance=RequestContext(request))
  
  
-def get_audiobooks(book):
-    ogg_files = {}
-    for m in book.media.filter(type='ogg').order_by().iterator():
-        ogg_files[m.name] = m
-
-    audiobooks = []
-    have_oggs = True
-    projects = set()
-    for mp3 in book.media.filter(type='mp3').iterator():
-        # ogg files are always from the same project
-        meta = mp3.extra_info
-        project = meta.get('project')
-        if not project:
-            # temporary fallback
-            project = u'CzytamySłuchając'
-
-        projects.add((project, meta.get('funded_by', '')))
-
-        media = {'mp3': mp3}
-
-        ogg = ogg_files.get(mp3.name)
-        if ogg:
-            media['ogg'] = ogg
-        else:
-            have_oggs = False
-        audiobooks.append(media)
-
-    projects = sorted(projects)
-    return audiobooks, projects, have_oggs
-
-
  # używane w publicznym interfejsie
  def player(request, slug):
      book = get_object_or_404(Book, slug=slug)
      if not book.has_media('mp3'):
          raise Http404
  
-    audiobooks, projects, have_oggs = get_audiobooks(book)
+    audiobooks, projects = book.get_audiobooks()
  
      return render_to_response('catalogue/player.html', {
          'book': book,
@@ -672,20 +641,9 @@ def book_mini(request, pk, with_link=True):
      ))(ssi_expect(pk, int)))
  def book_short(request, pk):
      book = get_object_or_404(Book, pk=pk)
-    stage_note, stage_note_url = book.stage_note()
-    audiobooks, projects, have_oggs = get_audiobooks(book)
  
      return render(request, 'catalogue/book_short.html', {
          'book': book,
-        'has_audio': book.has_media('mp3'),
-        'main_link': book.get_absolute_url(),
-        'parents': book.parents(),
-        'tags': split_tags(book.tags.exclude(category__in=('set', 'theme'))),
-        'show_lang': book.language_code() != settings.LANGUAGE_CODE,
-        'stage_note': stage_note,
-        'stage_note_url': stage_note_url,
-        'audiobooks': audiobooks,
-        'have_oggs': have_oggs,
      })
  
  
@@ -698,24 +656,17 @@ def book_short(request, pk):
      ))(ssi_expect(pk, int)))
  def book_wide(request, pk):
      book = get_object_or_404(Book, pk=pk)
-    stage_note, stage_note_url = book.stage_note()
      extra_info = book.extra_info
-    audiobooks, projects, have_oggs = get_audiobooks(book)
  
      return render(request, 'catalogue/book_wide.html', {
          'book': book,
-        'has_audio': book.has_media('mp3'),
          'parents': book.parents(),
          'tags': split_tags(book.tags.exclude(category__in=('set', 'theme'))),
          'show_lang': book.language_code() != settings.LANGUAGE_CODE,
-        'stage_note': stage_note,
-        'stage_note_url': stage_note_url,
  
          'main_link': reverse('book_text', args=[book.slug]) if book.html_file else None,
          'extra_info': extra_info,
          'hide_about': extra_info.get('about', '').startswith('http://wiki.wolnepodreczniki.pl'),
-        'audiobooks': audiobooks,
-        'have_oggs': have_oggs,
      })
  
  
diff --git a/src/search/context_processors.py b/src/search/context_processors.py

index a3f1ea9..6ad2fe9 100644 (file)
--- a/src/search/context_processors.py
+++ b/src/search/context_processors.py
@@ -7,4 +7,4 @@ from search.forms import SearchForm
  
  
  def search_form(request):
-    return {'search_form': SearchForm(reverse('search.views.hint'), request.GET)}
+    return {'search_form': SearchForm(reverse('search.views.hint')+'?max=10', request.GET)}
diff --git a/src/search/custom.py b/src/search/custom.py

index dfface9..da21e01 100644 (file)
--- a/src/search/custom.py
+++ b/src/search/custom.py
@@ -184,5 +184,6 @@ class CustomSolrInterface(sunburnt.SolrInterface):
              off = -start
              snip = snip[:e + off] + mark[1] + snip[e + off:]
              snip = snip[:s + off] + mark[0] + snip[s + off:]
+        snip = re.sub('%s[ \t\n]+%s' % (mark[1], mark[0]), " ", snip)
  
          return snip
diff --git a/src/search/index.py b/src/search/index.py

index 7dfe6ef..ce60978 100644 (file)
--- a/src/search/index.py
+++ b/src/search/index.py
@@ -240,7 +240,8 @@ class Index(SolrIndex):
              self.remove_book(book, remove_snippets=False)
  
          book_doc = self.create_book_doc(book)
-        meta_fields = self.extract_metadata(book, book_info, dc_only=['source_name', 'authors', 'translators', 'title'])
+        meta_fields = self.extract_metadata(book, book_info, dc_only=[
+            'source_name', 'authors', 'translators', 'title', 'epochs', 'kinds', 'genres'])
          # let's not index it - it's only used for extracting publish date
          if 'source_name' in meta_fields:
              del meta_fields['source_name']
@@ -257,8 +258,9 @@ class Index(SolrIndex):
              'published_date': meta_fields['published_date']
              }
  
-        if 'translators' in meta_fields:
-            book_fields['translators'] = meta_fields['translators']
+        for tag_name in ('translators', 'epochs', 'kinds', 'genres'):
+            if tag_name in meta_fields:
+                book_fields[tag_name] = meta_fields[tag_name]
  
          self.index_content(book, book_fields=book_fields)
  
@@ -272,7 +274,7 @@ class Index(SolrIndex):
          ]
  
      ignore_content_tags = [
-        'uwaga', 'extra',
+        'uwaga', 'extra', 'nota_red',
          'zastepnik_tekstu', 'sekcja_asterysk', 'separator_linia', 'zastepnik_wersu',
          'didaskalia',
          'naglowek_aktu', 'naglowek_sceny', 'naglowek_czesc',
@@ -365,8 +367,8 @@ class Index(SolrIndex):
          if master is None:
              return []
  
-        def walker(node, ignore_tags=()):
-            if node.tag not in ignore_tags:
+        def walker(node):
+            if node.tag not in self.ignore_content_tags:
                  yield node, None, None
                  if node.text is not None:
                      yield None, node.text, None
@@ -417,17 +419,10 @@ class Index(SolrIndex):
  
              if 'themes' in fields:
                  doc['themes'] = fields['themes']
-            doc['uid'] = "part%s%s%s" % (doc['header_index'],
-                                         doc['header_span'],
-                                         doc.get('fragment_anchor', ''))
+            doc['uid'] = "part%s-%s-%s-%s" % (
+                book.id, doc['header_index'], doc['header_span'], doc.get('fragment_anchor', ''))
              return doc
  
-        def give_me_utf8(s):
-            if isinstance(s, unicode):
-                return s.encode('utf-8')
-            else:
-                return s
-
          fragments = {}
          snippets = Snippets(book.id).open('w')
          try:
@@ -448,7 +443,7 @@ class Index(SolrIndex):
                      content.append(text)
                  handle_text = [all_content]
  
-                for start, text, end in walker(header, ignore_tags=self.ignore_content_tags):
+                for start, text, end in walker(header):
                      # handle footnotes
                      if start is not None and start.tag in self.footnote_tags:
                          footnote = []
@@ -515,8 +510,7 @@ class Index(SolrIndex):
  
  
  class SearchResult(object):
-    def __init__(self, doc, how_found=None, query=None, query_terms=None):
-        #        self.search = search
+    def __init__(self, doc, how_found=None, query_terms=None):
          self.boost = 1.0
          self._hits = []
          self._processed_hits = None  # processed hits
@@ -734,71 +728,23 @@ class Search(SolrIndex):
  
          return q
  
-    def search_phrase(self, searched, field='text', book=False,
-                      filters=None,
-                      snippets=False):
-        if filters is None:
-            filters = []
-        if book:
-            filters.append(self.index.Q(is_book=True))
-
-        q = self.index.query(**{field: searched})
-        q = self.apply_filters(q, filters).field_limit(score=True, all_fields=True)
-        res = q.execute()
-        return [SearchResult(found, how_found=u'search_phrase') for found in res]
-
-    def search_some(self, searched, fields, book=True,
-                    filters=None, snippets=True, query_terms=None):
-        assert isinstance(fields, list)
-        if filters is None:
-            filters = []
+    def search_words(self, words, fields, book=True):
+        filters = []
+        for word in words:
+            word_filter = None
+            for field in fields:
+                q = self.index.Q(**{field: word})
+                if word_filter is None:
+                    word_filter = q
+                else:
+                    word_filter |= q
+            filters.append(word_filter)
          if book:
-            filters.append(self.index.Q(is_book=True))
-
-        query = self.index.Q()
-
-        for fld in fields:
-            query = self.index.Q(query | self.make_term_query(searched, fld))
-
-        query = self.index.query(query)
+            query = self.index.query(is_book=True)
+        else:
+            query = self.index.query()
          query = self.apply_filters(query, filters).field_limit(score=True, all_fields=True)
-        res = query.execute()
-        return [SearchResult(found, how_found='search_some', query_terms=query_terms) for found in res]
-
-    def search_everywhere(self, searched, query_terms=None):
-        """
-        Tries to use search terms to match different fields of book (or its parts).
-        E.g. one word can be an author survey, another be a part of the title, and the rest
-        are some words from third chapter.
-        """
-        books = []
-        # content only query : themes x content
-        q = self.make_term_query(searched, 'text')
-        q_themes = self.make_term_query(searched, 'themes_pl')
-
-        query = self.index.query(q).query(q_themes).field_limit(score=True, all_fields=True)
-        res = query.execute()
-
-        for found in res:
-            books.append(SearchResult(found, how_found='search_everywhere_themesXcontent', query_terms=query_terms))
-
-        # query themes/content x author/title/tags
-        in_content = self.index.Q()
-        in_meta = self.index.Q()
-
-        for fld in ['themes_pl', 'text']:
-            in_content |= self.make_term_query(searched, field=fld)
-
-        for fld in ['tags', 'authors', 'title']:
-            in_meta |= self.make_term_query(searched, field=fld)
-
-        q = in_content & in_meta
-        res = self.index.query(q).field_limit(score=True, all_fields=True).execute()
-
-        for found in res:
-            books.append(SearchResult(found, how_found='search_everywhere', query_terms=query_terms))
-
-        return books
+        return [SearchResult(found, how_found='search_words') for found in query.execute()]
  
      def get_snippets(self, searchresult, query, field='text', num=1):
          """
@@ -821,9 +767,10 @@ class Search(SolrIndex):
                  text = snippets.get((int(position),
                                       int(length)))
                  snip = self.index.highlight(text=text, field=field, q=query)
-                snips[idx] = snip
-                if snip:
-                    num -= 1
+                if snip not in snips:
+                    snips[idx] = snip
+                    if snip:
+                        num -= 1
                  idx += 1
  
          except IOError, e:
@@ -879,13 +826,9 @@ class Search(SolrIndex):
                  if is_pdcounter:
                      if category == 'pd_author':
                          tag = PDCounterAuthor.objects.get(id=doc.get('tag_id'))
-                    elif category == 'pd_book':
+                    else:  # category == 'pd_book':
                          tag = PDCounterBook.objects.get(id=doc.get('tag_id'))
                          tag.category = 'pd_book'  # make it look more lik a tag.
-                    else:
-                        # WTF
-                        print ("Warning. cannot get pdcounter tag_id=%d from db; cat=%s" % (
-                            int(doc.get('tag_id')), category)).encode('utf-8')
                      pd_tags.append(tag)
                  else:
                      tag = catalogue.models.Tag.objects.get(id=doc.get("tag_id"))
@@ -915,8 +858,10 @@ class Search(SolrIndex):
          query = query.strip()
          if prefix:
              q |= self.index.Q(title=query + "*")
+            q |= self.index.Q(title_orig=query + "*")
          else:
              q |= self.make_term_query(query, field='title')
+            q |= self.make_term_query(query, field='title_orig')
          qu = self.index.query(q)
          only_books = self.index.Q(is_book=True)
          return self.search_books(qu, [only_books])
diff --git a/src/search/management/commands/reindex.py b/src/search/management/commands/reindex.py

index b8cb49c..da4574f 100755 (executable)
--- a/src/search/management/commands/reindex.py
+++ b/src/search/management/commands/reindex.py
@@ -51,6 +51,8 @@ class Command(BaseCommand):
                      help='book id instead of slugs'),
          make_option('-t', '--just-tags', action='store_true', dest='just_tags', default=False,
                      help='just reindex tags'),
+        make_option('--start', dest='start_from', default=None, help='start from this slug'),
+        make_option('--stop', dest='stop_after', default=None, help='stop after this slug'),
      )
  
      def handle(self, *args, **opts):
@@ -67,14 +69,23 @@ class Command(BaseCommand):
                      else:
                          books += Book.objects.filter(slug=a).all()
              else:
-                books = list(Book.objects.all())
-
+                books = list(Book.objects.order_by('slug'))
+            start_from = opts.get('start_from')
+            stop_after = opts.get('stop_after')
+            if start_from:
+                start_from = start_from.replace('-', '')
+            if stop_after:
+                stop_after = stop_after.replace('-', '')
              while books:
                  try:
                      b = books[0]
-                    print b.title
-                    idx.index_book(b)
-                    idx.index.commit()
+                    slug = b.slug.replace('-', '')
+                    if stop_after and slug > stop_after:
+                        break
+                    if not start_from or slug >= start_from:
+                        print b.slug
+                        idx.index_book(b)
+                        idx.index.commit()
                      books.pop(0)
                  except:
                      traceback.print_exc()
diff --git a/src/search/mock_search.py b/src/search/mock_search.py

index b1c8162..3bd7b93 100644 (file)
--- a/src/search/mock_search.py
+++ b/src/search/mock_search.py
@@ -21,12 +21,10 @@ class Search(Mock):
      index = MockIndex()
  
      @staticmethod
-    def _find_some_books(snippets=False, query_terms=None, max_results=20):
+    def _find_some_books(query_terms=None, max_results=20):
          from .index import SearchResult
  
          qs = Book.objects.order_by('?')
-        if snippets:
-            qs = qs.exclude(fragments=None)
          results = []
          for book in qs[:randint(1, max_results)]:
              doc = {
@@ -34,33 +32,13 @@ class Search(Mock):
                  'book_id': book.pk,
                  'published_date': randint(1000, 1920),
                  }
-            if snippets:
-                fragment = book.fragments.order_by('?')[0]
-                doc.update({
-                    'header_type': choice(['strofa', 'akap']),
-                    'header_index': randint(100, 200),
-                    'header_span': randint(100, 200),
-                    'fragment_anchor': fragment.anchor,
-                    'snippets_position': randint(100, 200),
-                    'snippets_length': randint(100, 200),
-                    'snippets_revision': randint(1, 100),
-                    'themes_pl': fragment.tags.filter(category='theme'),
-                })
              res = SearchResult(doc, how_found='mock', query_terms=query_terms)
-            if snippets:
-                res.snippets = [fragment.short_text]
              results.append(res)
          return results
  
-    def search_phrase(self, searched, field='text', book=False, filters=None, snippets=False):
-        return self._find_some_books(snippets)
-
-    def search_some(self, searched, fields, book=True, filters=None, snippets=True, query_terms=None):
-        return self._find_some_books(snippets, query_terms)
-
      # WTF
      def search_books(self, query, filters=None, max_results=10):
-        return self._find_some_books(snippets, max_results=max_results)
+        return self._find_some_books(max_results=max_results)
  
      def search_everywhere(self, searched, query_terms=None):
          return []
diff --git a/src/search/templatetags/search_tags.py b/src/search/templatetags/search_tags.py

index c135b80..ea8d4ed 100644 (file)
--- a/src/search/templatetags/search_tags.py
+++ b/src/search/templatetags/search_tags.py
@@ -22,17 +22,9 @@ register = template.Library()
  def book_searched(context, result):
      book = Book.objects.get(pk=result.book_id)
  
-    # snippets = []
-    # for hit in result.hits:
-    #     if hit['snippets']:
-    #         snippets.append(hit['snippets'])
-    #     elif hit['fragment']:
-    #         snippets.append(hit['fragment'].short_text)
-
      # We don't need hits which lead to sections but do not have
      # snippets.
      hits = filter(lambda (idx, h):
-                  'fragment' in h or
                    result.snippets[idx] is not None,
                    enumerate(result.hits))
      # print "[tmpl: from %d hits selected %d]" % (len(result.hits), len(hits))
@@ -45,8 +37,8 @@ def book_searched(context, result):
              continue
          snip = result.snippets[idx]
          # fix some formattting
-        snip = re.subn(r"(^[ \t\n]+|[ \t\n]+$)", u"",
-                       re.subn(r"[ \t\n]*\n[ \t\n]*", u"\n", snip)[0])[0]
+        snip = re.sub(r"[ \t\n]*\n[ \t\n]*", u"\n", snip)
+        snip = re.sub(r"(^[ \t\n]+|[ \t\n]+$)", u"", snip)
  
          snip = snip.replace("\n", "<br />").replace('---', '&mdash;')
          hit['snippet'] = snip
@@ -54,5 +46,5 @@ def book_searched(context, result):
      return {
          'request': context['request'],
          'book': book,
-        'hits':  hits and zip(*hits)[1] or []
+        'hits':  zip(*hits)[1] if hits else []
      }
diff --git a/src/search/views.py b/src/search/views.py

index 5b65a30..70a216e 100644 (file)
--- a/src/search/views.py
+++ b/src/search/views.py
@@ -10,7 +10,7 @@ from django.http import HttpResponse, JsonResponse
  from django.utils.translation import ugettext as _
  
  from catalogue.utils import split_tags
-from catalogue.models import Book
+from catalogue.models import Book, Tag
  from pdcounter.models import Author as PDCounterAuthor, BookStub as PDCounterBook
  from search.index import Search, SearchResult
  from suggest.forms import PublishingSuggestForm
@@ -66,26 +66,6 @@ def hint(request):
  
      prefix = remove_query_syntax_chars(prefix)
  
-    search = Search()
-    # tagi beda ograniczac tutaj
-    # ale tagi moga byc na ksiazce i na fragmentach
-    # jezeli tagi dot tylko ksiazki, to wazne zeby te nowe byly w tej samej ksiazce
-    # jesli zas dotycza themes, to wazne, zeby byly w tym samym fragmencie.
-
-    def is_dupe(tag):
-        if isinstance(tag, PDCounterAuthor):
-            if filter(lambda t: t.slug == tag.slug and t != tag, tags):
-                return True
-        elif isinstance(tag, PDCounterBook):
-            if filter(lambda b: b.slug == tag.slug, tags):
-                return True
-        return False
-
-    def category_name(c):
-        if c.startswith('pd_'):
-            c = c[len('pd_'):]
-        return _(c)
-
      try:
          limit = int(request.GET.get('max', ''))
      except ValueError:
@@ -94,33 +74,25 @@ def hint(request):
          if limit < 1:
              limit = -1
  
-    data = []
-
-    tags = search.hint_tags(prefix, pdcounter=True)
-    tags = filter(lambda t: not is_dupe(t), tags)
-    for t in tags:
-        if not limit:
-            break
-        limit -= 1
-        data.append({
-            'label': t.name,
-            'category': category_name(t.category),
-            'id': t.id,
-            'url': t.get_absolute_url()
-            })
-    if limit:
-        books = search.hint_books(prefix)
-        for b in books:
-            if not limit:
-                break
-            limit -= 1
-            data.append({
-                'label': b.title,
+    data = [
+        {
+            'label': author.name,
+            'category': _('author'),
+            'id': author.id,
+            'url': author.get_absolute_url(),
+        }
+        for author in Tag.objects.filter(category='author', name__iregex='\m' + prefix)[:10]
+    ]
+    if len(data) < limit:
+        data += [
+            {
+                'label': '<cite>%s</cite>, %s' % (b.title, b.author_unicode()),
                  'category': _('book'),
                  'id': b.id,
                  'url': b.get_absolute_url()
-                })
-
+            }
+            for b in Book.objects.filter(title__iregex='\m' + prefix)[:limit-len(data)]
+        ]
      callback = request.GET.get('callback', None)
      if callback:
          return HttpResponse("%s(%s);" % (callback, json.dumps(data)),
@@ -146,67 +118,42 @@ def main(request):
              'catalogue/search_too_long.html', {'prefix': query}, context_instance=RequestContext(request))
  
      query = remove_query_syntax_chars(query)
-    
-    search = Search()
  
-    theme_terms = search.index.analyze(text=query, field="themes_pl") \
-        + search.index.analyze(text=query, field="themes")
+    words = query.split()
+    if len(words) > 10:
+        query = ' '.join(words[:10])
+
+    search = Search()
  
-    # change hints
      tags = search.hint_tags(query, pdcounter=True, prefix=False)
      tags = split_tags(tags)
  
-    author_results = search.search_phrase(query, 'authors', book=True)
-    translator_results = search.search_phrase(query, 'translators', book=True)
-
-    title_results = search.search_phrase(query, 'title', book=True)
-
-    # Boost main author/title results with mixed search, and save some of its results for end of list.
-    # boost author, title results
-    author_title_mixed = search.search_some(query, ['authors', 'translators', 'title', 'tags'], query_terms=theme_terms)
-    author_title_rest = []
-
-    for b in author_title_mixed:
-        also_in_mixed = filter(lambda ba: ba.book_id == b.book_id, author_results + translator_results + title_results)
-        for b2 in also_in_mixed:
-            b2.boost *= 1.1
-        if also_in_mixed is []:
-            author_title_rest.append(b)
-
-    # Do a phrase search but a term search as well - this can give us better snippets then search_everywhere,
-    # Because the query is using only one field.
-    text_phrase = SearchResult.aggregate(
-        search.search_phrase(query, 'text', snippets=True, book=False),
-        search.search_some(query, ['text'], snippets=True, book=False, query_terms=theme_terms))
-
-    everywhere = search.search_everywhere(query, query_terms=theme_terms)
-
-    def already_found(results):
-        def f(e):
-            for r in results:
-                if e.book_id == r.book_id:
-                    e.boost = 0.9
-                    results.append(e)
-                    return True
-            return False
-        return f
-    f = already_found(author_results + translator_results + title_results + text_phrase)
-    everywhere = filter(lambda x: not f(x), everywhere)
-
-    author_results = SearchResult.aggregate(author_results)
-    translator_results = SearchResult.aggregate(translator_results)
-    title_results = SearchResult.aggregate(title_results)
-
-    everywhere = SearchResult.aggregate(everywhere, author_title_rest)
-
-    for field, res in [('authors', author_results),
-                       ('translators', translator_results),
-                       ('title', title_results),
-                       ('text', text_phrase),
-                       ('text', everywhere)]:
-        res.sort(reverse=True)
-        for r in res:
-            search.get_snippets(r, query, field, 3)
+    results_parts = []
+
+    search_fields = []
+    fieldsets = (
+        (['authors'], True),
+        (['title'], True),
+        (['metadata'], True),
+        (['text', 'themes_pl'], False),
+    )
+    for fieldset, is_book in fieldsets:
+        search_fields += fieldset
+        results_parts.append(search.search_words(words, search_fields, book=is_book))
+
+    results = []
+    ids_results = {}
+    for results_part in results_parts:
+        for result in sorted(SearchResult.aggregate(results_part), reverse=True):
+            book_id = result.book_id
+            if book_id in ids_results:
+                ids_results[book_id].merge(result)
+            else:
+                results.append(result)
+                ids_results[book_id] = result
+
+    for result in results:
+        search.get_snippets(result, query, num=3)
  
      suggestion = u''
  
@@ -216,26 +163,9 @@ def main(request):
          except Book.DoesNotExist:
              return False
  
-    author_results = filter(ensure_exists, author_results)
-    translator_results = filter(ensure_exists, translator_results)
-    title_results = filter(ensure_exists, title_results)
-    text_phrase = filter(ensure_exists, text_phrase)
-    everywhere = filter(ensure_exists, everywhere)
-
-    results = author_results + translator_results + title_results + text_phrase + everywhere
-    # ensure books do exists & sort them
-    for res in (author_results, translator_results, title_results, text_phrase, everywhere):
-        res.sort(reverse=True)
-
-    # We don't want to redirect to book text, but rather display result page even with one result.
-    # if len(results) == 1:
-    #     fragment_hits = filter(lambda h: 'fragment' in h, results[0].hits)
-    #     if len(fragment_hits) == 1:
-    #         #anchor = fragment_hits[0]['fragment']
-    #         #frag = Fragment.objects.get(anchor=anchor)
-    #         return HttpResponseRedirect(fragment_hits[0]['fragment'].get_absolute_url())
-    #     return HttpResponseRedirect(results[0].book.get_absolute_url())
-    if len(results) == 0:
+    results = filter(ensure_exists, results)
+
+    if not results:
          form = PublishingSuggestForm(initial={"books": query + ", "})
          return render_to_response(
              'catalogue/search_no_hits.html',
@@ -250,15 +180,9 @@ def main(request):
      return render_to_response(
          'catalogue/search_multiple_hits.html',
          {
-            'tags': tags,
+            'tags': tags['author'] + tags['kind'] + tags['genre'] + tags['epoch'] + tags['theme'],
              'prefix': query,
-            'results': {
-                'author': author_results,
-                'translator': translator_results,
-                'title': title_results,
-                'content': text_phrase,
-                'other': everywhere
-            },
+            'results': results,
              'did_you_mean': suggestion
          },
          context_instance=RequestContext(request))
diff --git a/src/wolnelektury/static/js/search.js b/src/wolnelektury/static/js/search.js

index 4d001e0..786a05c 100644 (file)
--- a/src/wolnelektury/static/js/search.js
+++ b/src/wolnelektury/static/js/search.js
@@ -33,8 +33,7 @@ var __bind = function (self, fn) {
     
         render_item: function (ul, item) {
             return $("<li></li>").data('item.autocomplete', item)
-               .append('<a href="'+this.options.host+item.url+'"><span class="search-hint-label">'+item.label+'</span>'+
-                       '<span class="search-hint-category mono">'+item.category+'</span></a>')
+               .append('<a href="'+this.options.host+item.url+'"><span class="search-hint-label">'+item.label+'</span>')
                 .appendTo(ul);
         }, 
  
diff --git a/src/wolnelektury/static/scss/main/book_box.scss b/src/wolnelektury/static/scss/main/book_box.scss

index 44a5955..0e90611 100755 (executable)
--- a/src/wolnelektury/static/scss/main/book_box.scss
+++ b/src/wolnelektury/static/scss/main/book_box.scss
@@ -131,7 +131,7 @@
    }
  }
  
-.audiobook-box {
+.audiobook-box, .search-result .book-box {
    .book-left-column {
      @media screen and (min-width: 1024px) {
        display: inline-block;
@@ -139,9 +139,10 @@
      }
    }
  
-  .audiobook-right-column {
+  .book-right-column {
      @media screen and (min-width: 1024px) {
        float: right;
+      @include size(padding-top, 15px);
        @include size(width, 360px);
      }
    }
@@ -151,7 +152,6 @@
  
      @media screen and (min-width: 1024px) {
        float: right;
-      margin-top: 48px;
      }
    }
  }
diff --git a/src/wolnelektury/static/scss/main/search.scss b/src/wolnelektury/static/scss/main/search.scss

index d8a1e06..ad151d8 100755 (executable)
--- a/src/wolnelektury/static/scss/main/search.scss
+++ b/src/wolnelektury/static/scss/main/search.scss
@@ -26,8 +26,7 @@
      @include size(margin-bottom, 16px);
  }
  
-
-.search-result {
+/*.search-result {
      @include size(border, 1px solid #ddd);
      @include size(box-shadow, 2px 2px 2px #ddd);
      @include size(margin, 1px);
@@ -55,7 +54,7 @@
              margin: 0;
          }
      }
-}
+}*/
  
  
  
@@ -78,12 +77,12 @@
  
  .snippets {
      @media screen and (min-width: 62.5em) {
-        @include size(width, 440px);
+        @include size(width, 360px);
          float: right;
      }
  
      .snippet-text {
-        @include size(font-size, 12px);
+        @include size(font-size, 16px);
          @include size(margin, 13px 0);
          @include size(padding, 12px);
          background: #f7f7f7;
@@ -97,13 +96,7 @@
  
  .search-hint-label {
      display: inline-block;
-    @include size(font-size, 11px);
-    @include size(width, 275px);
-    line-height: 1.636em;
-}
-
-.search-hint-category {
-    @include size(font-size, 11px);
+    @include size(font-size, 16px);
+    //@include size(width, 275px);
      line-height: 1.636em;
  }
-
author	Jan Szejko <janek37@gmail.com>
	Thu, 14 Dec 2017 08:43:33 +0000 (09:43 +0100)
committer	Jan Szejko <janek37@gmail.com>
	Thu, 14 Dec 2017 08:43:33 +0000 (09:43 +0100)
doc/schema.xml		patch \| blob \| history
src/catalogue/models/book.py		patch \| blob \| history
src/catalogue/templates/catalogue/book_searched.html		patch \| blob \| history
src/catalogue/templates/catalogue/book_short.html		patch \| blob \| history
src/catalogue/templates/catalogue/book_text.html		patch \| blob \| history
src/catalogue/templates/catalogue/search_multiple_hits.html		patch \| blob \| history
src/catalogue/templates/catalogue/snippets/jplayer.html		patch \| blob \| history
src/catalogue/views.py		patch \| blob \| history
src/search/context_processors.py		patch \| blob \| history
src/search/custom.py		patch \| blob \| history
src/search/index.py		patch \| blob \| history
src/search/management/commands/reindex.py		patch \| blob \| history
src/search/mock_search.py		patch \| blob \| history
src/search/templatetags/search_tags.py		patch \| blob \| history
src/search/views.py		patch \| blob \| history
src/wolnelektury/static/js/search.js		patch \| blob \| history
src/wolnelektury/static/scss/main/book_box.scss		patch \| blob \| history
src/wolnelektury/static/scss/main/search.scss		patch \| blob \| history