From: Marcin Koziej Date: Mon, 30 Jan 2012 16:10:48 +0000 (+0100) Subject: search display tags at top. X-Git-Url: https://git.mdrn.pl/wolnelektury.git/commitdiff_plain/94924a2ca60f4f045c739cdb19d859cdcf8959af search display tags at top. --- diff --git a/apps/search/index.py b/apps/search/index.py index 6d97047a0..d63d3f8b4 100644 --- a/apps/search/index.py +++ b/apps/search/index.py @@ -27,6 +27,7 @@ from librarian import dcparser from librarian.parser import WLDocument from lxml import etree import catalogue.models +from pdcounter.models import Author as PDCounterAuthor from multiprocessing.pool import ThreadPool from threading import current_thread import atexit @@ -219,6 +220,15 @@ class Index(BaseIndex): doc.add(Field("tag_category", tag.category, Field.Store.NO, Field.Index.NOT_ANALYZED)) self.index.addDocument(doc) + for pdtag in PDCounterAuthor.objects.all(): + doc = Document() + doc.add(NumericField("tag_id", Field.Store.YES, True).setIntValue(int(pdtag.id))) + doc.add(Field("tag_name", pdtag.name, Field.Store.NO, Field.Index.ANALYZED)) + doc.add(Field("tag_name_pl", pdtag.name, Field.Store.NO, Field.Index.ANALYZED)) + doc.add(Field("tag_category", 'pdcounter', Field.Store.NO, Field.Index.NOT_ANALYZED)) + doc.add(Field("is_pdcounter", 'true', Field.Store.YES, Field.Index.NOT_ANALYZED)) + self.index.addDocument(doc) + def create_book_doc(self, book): """ Create a lucene document referring book id. @@ -604,26 +614,25 @@ class SearchResult(object): stored = search.searcher.doc(scoreDocs.doc) self.book_id = int(stored.get("book_id")) - header_type = stored.get("header_type") - if not header_type: - return - - sec = (header_type, int(stored.get("header_index"))) - header_span = stored.get('header_span') - header_span = header_span is not None and int(header_span) or 1 - - fragment = stored.get("fragment_anchor") - pd = stored.get("published_date") if pd is None: pd = 0 self.published_date = int(pd) - if snippets: - snippets = snippets.replace("/\n", "\n") - hit = (sec + (header_span,), fragment, scoreDocs.score, {'how_found': how_found, 'snippets': snippets and [snippets] or []}) + header_type = stored.get("header_type") + # we have a content hit in some header of fragment + if header_type is not None: + sec = (header_type, int(stored.get("header_index"))) + header_span = stored.get('header_span') + header_span = header_span is not None and int(header_span) or 1 + + fragment = stored.get("fragment_anchor") - self._hits.append(hit) + if snippets: + snippets = snippets.replace("/\n", "\n") + hit = (sec + (header_span,), fragment, scoreDocs.score, {'how_found': how_found, 'snippets': snippets and [snippets] or []}) + + self._hits.append(hit) self.search = search self.searched = searched @@ -749,6 +758,8 @@ class SearchResult(object): def __cmp__(self, other): c = cmp(self.score, other.score) if c == 0: + if not hasattr(other,'published_date') or not hasattr(self, 'published_date'): + import pdb; pdb.set_trace() # this is inverted, because earlier date is better return cmp(other.published_date, self.published_date) else: @@ -1208,19 +1219,27 @@ class Search(IndexStore): if terms: return JArray('object')(terms, Term) - def search_tags(self, query, filter=None, max_results=40): + def search_tags(self, query, filters=None, max_results=40, pdcounter=False): """ Search for Tag objects using query. """ - tops = self.searcher.search(query, filter, max_results) + if not pdcounter: + filters = self.chain_filters([filter, self.term_filter(Term('is_pdcounter', 'true'), inverse=True)]) + tops = self.searcher.search(query, filters, max_results) tags = [] for found in tops.scoreDocs: doc = self.searcher.doc(found.doc) - tag = catalogue.models.Tag.objects.get(id=doc.get("tag_id")) - tags.append(tag) - print "%s (%d) -> %f" % (tag, tag.id, found.score) - + is_pdcounter = doc.get('is_pdcounter') + if is_pdcounter: + tag = PDCounterAuthor.objects.get(id=doc.get('tag_id')) + else: + tag = catalogue.models.Tag.objects.get(id=doc.get("tag_id")) + # don't add the pdcounter tag if same tag already exists + if not (is_pdcounter and filter(lambda t: tag.slug == t.slug, tags)): + tags.append(tag) + # print "%s (%d) -> %f" % (tag, tag.id, found.score) + print 'returning %s' % tags return tags def search_books(self, query, filter=None, max_results=10): @@ -1234,7 +1253,7 @@ class Search(IndexStore): bks.append(catalogue.models.Book.objects.get(id=doc.get("book_id"))) return bks - def create_prefix_phrase(self, toks, field): + def make_prefix_phrase(self, toks, field): q = MultiPhraseQuery() for i in range(len(toks)): t = Term(field, toks[i]) @@ -1260,7 +1279,7 @@ class Search(IndexStore): return only_term - def hint_tags(self, string, max_results=50): + def hint_tags(self, string, max_results=50, pdcounter=True, prefix=True): """ Return auto-complete hints for tags using prefix search. @@ -1269,14 +1288,17 @@ class Search(IndexStore): top = BooleanQuery() for field in ['tag_name', 'tag_name_pl']: - q = self.create_prefix_phrase(toks, field) + if prefix: + q = self.make_prefix_phrase(toks, field) + else: + q = self.make_term_query(toks, field) top.add(BooleanClause(q, BooleanClause.Occur.SHOULD)) no_book_cat = self.term_filter(Term("tag_category", "book"), inverse=True) - return self.search_tags(top, no_book_cat, max_results=max_results) + return self.search_tags(top, no_book_cat, max_results=max_results, pdcounter=pdcounter) - def hint_books(self, string, max_results=50): + def hint_books(self, string, max_results=50, prefix=True): """ Returns auto-complete hints for book titles Because we do not index 'pseudo' title-tags. @@ -1284,7 +1306,10 @@ class Search(IndexStore): """ toks = self.get_tokens(string, field='SIMPLE') - q = self.create_prefix_phrase(toks, 'title') + if prefix: + q = self.make_prefix_phrase(toks, 'title') + else: + q = self.make_term_query(toks, 'title') return self.search_books(q, self.term_filter(Term("is_book", "true")), max_results=max_results) diff --git a/apps/search/views.py b/apps/search/views.py index cf008705d..623b311fb 100644 --- a/apps/search/views.py +++ b/apps/search/views.py @@ -8,7 +8,7 @@ from django.views.decorators import cache from django.http import HttpResponse, HttpResponseRedirect, Http404, HttpResponsePermanentRedirect from django.utils.translation import ugettext as _ -from catalogue.utils import get_random_hash +from catalogue.utils import split_tags from catalogue.models import Book, Tag, Fragment from catalogue.fields import dumps from catalogue.views import JSONResponse @@ -34,7 +34,7 @@ def did_you_mean(query, tokens): authors = Tag.objects.filter(category='author', name__iregex=match_word_re(t)) if len(authors) > 0: continue - + if not dictionary.check(t): try: change[t] = dictionary.suggest(t)[0] @@ -69,7 +69,7 @@ def hint(request): # jezeli tagi dot tylko ksiazki, to wazne zeby te nowe byly w tej samej ksiazce # jesli zas dotycza themes, to wazne, zeby byly w tym samym fragmencie. - tags = s.hint_tags(prefix) + tags = s.hint_tags(prefix, pdcounter=True) books = s.hint_books(prefix) # TODO DODAC TU HINTY @@ -97,26 +97,28 @@ def main(request): fuzzy = False if 'q' in request.GET: - tags = request.GET.get('tags', '') + # tags = request.GET.get('tags', '') query = request.GET['q'] - book_id = request.GET.get('book', None) - book = None - if book_id is not None: - book = get_object_or_404(Book, id=book_id) + # book_id = request.GET.get('book', None) + # book = None + # if book_id is not None: + # book = get_object_or_404(Book, id=book_id) - hint = srch.hint() - try: - tag_list = Tag.get_tag_list(tags) - except: - tag_list = [] + # hint = srch.hint() + # try: + # tag_list = Tag.get_tag_list(tags) + # except: + # tag_list = [] if len(query) < 2: - return render_to_response('catalogue/search_too_short.html', {'tags': tag_list, 'prefix': query}, + return render_to_response('catalogue/search_too_short.html', {'prefix': query}, context_instance=RequestContext(request)) - hint.tags(tag_list) - if book: - hint.books(book) + # hint.tags(tag_list) + # if book: + # hint.books(book) + tags = srch.hint_tags(query, pdcounter=True, prefix=False) + tags = split_tags(tags) toks = StringReader(query) tokens_cache = {} @@ -160,7 +162,7 @@ def main(request): author_results = SearchResult.aggregate(author_results) title_results = SearchResult.aggregate(title_results) - + everywhere = SearchResult.aggregate(everywhere, author_title_rest) for res in [author_results, title_results, text_phrase, everywhere]: @@ -168,15 +170,15 @@ def main(request): for r in res: for h in r.hits: h['snippets'] = map(lambda s: - re.subn(r"(^[ \t\n]+|[ \t\n]+$)", u"", + re.subn(r"(^[ \t\n]+|[ \t\n]+$)", u"", re.subn(r"[ \t\n]*\n[ \t\n]*", u"\n", s)[0])[0], h['snippets']) - + suggestion = did_you_mean(query, srch.get_tokens(toks, field="SIMPLE")) print "dym? %s" % repr(suggestion).encode('utf-8') - + results = author_results + title_results + text_phrase + everywhere results.sort(reverse=True) - + if len(results) == 1: fragment_hits = filter(lambda h: 'fragment' in h, results[0].hits) if len(fragment_hits) == 1: @@ -187,14 +189,15 @@ def main(request): elif len(results) == 0: form = PublishingSuggestForm(initial={"books": query + ", "}) return render_to_response('catalogue/search_no_hits.html', - {'tags': tag_list, + {'tags': tags, 'prefix': query, "form": form, 'did_you_mean': suggestion}, context_instance=RequestContext(request)) + print "TAGS: %s" % tags return render_to_response('catalogue/search_multiple_hits.html', - {'tags': tag_list, + {'tags': tags, 'prefix': query, 'results': { 'author': author_results, 'title': title_results, diff --git a/wolnelektury/static/css/base.css b/wolnelektury/static/css/base.css index ecb461a94..64e390e55 100755 --- a/wolnelektury/static/css/base.css +++ b/wolnelektury/static/css/base.css @@ -173,3 +173,9 @@ h2 { padding-top:3em; background: #fff; } + +/* just on search page */ +.top-tag-list { + margin-top: 2.2em; + margin-bottom: 1.6em; +} \ No newline at end of file diff --git a/wolnelektury/static/js/base.js b/wolnelektury/static/js/base.js index 8225a1f6c..34f850097 100755 --- a/wolnelektury/static/js/base.js +++ b/wolnelektury/static/js/base.js @@ -100,6 +100,8 @@ $('.open-player').click(function(event) { return false; }); + $(function(){ + $("#search").search();}); }); })(jQuery) diff --git a/wolnelektury/static/js/search.js b/wolnelektury/static/js/search.js index dc85ddee0..ee4cc51b5 100644 --- a/wolnelektury/static/js/search.js +++ b/wolnelektury/static/js/search.js @@ -38,12 +38,6 @@ var __bind = function (self, fn) { }, - - }); - - $(".search-result .see-more-snippets").click(function() { - $(this).closest('.search-result').find('.snippets').removeClass('ui-helper-hidden'); - }); }); diff --git a/wolnelektury/templates/catalogue/search_multiple_hits.html b/wolnelektury/templates/catalogue/search_multiple_hits.html index efe6d7915..70988f395 100644 --- a/wolnelektury/templates/catalogue/search_multiple_hits.html +++ b/wolnelektury/templates/catalogue/search_multiple_hits.html @@ -11,7 +11,43 @@ {% trans "Did you mean" %} {{did_you_mean|lower}}? {% endif %} +
+ {% if tags.author %} +
+
{% trans "Authors" %}:
+
+ {% inline_tag_list tags.author %} +
+
+ {% endif %} + {% if tags.kind %} +
+
{% trans "Kinds" %}:
+
+ {% inline_tag_list tags.kind %} +
+
+ {% endif %} + {% if tags.genre %} +
+
{% trans "Genres" %}:
+
+ {% inline_tag_list tags.genre %} +
+
+ {% endif %} + {% if tags.epoch %} +
+
{% trans "Epochs" %}:
+
+ {% inline_tag_list tags.epoch %} +
+
+ {% endif %} +
+ + {% if results.author %}
@@ -20,11 +56,7 @@
    - {% for author in results.author %} -
  1. - {% book_short author.book %} -
  2. - {% endfor %} + {% for author in results.author %}
  3. {% book_short author.book %}
  4. {% endfor %}
{% endif %} @@ -37,11 +69,9 @@
    - {% for result in results.title %} -
  1. + {% for result in results.title %}
  2. {% book_short result.book %} -
  3. - {% endfor %} + {% endfor %}
{% endif %} diff --git a/wolnelektury/templates/catalogue/tagged_object_list.html b/wolnelektury/templates/catalogue/tagged_object_list.html index 8d5300542..b729ca917 100644 --- a/wolnelektury/templates/catalogue/tagged_object_list.html +++ b/wolnelektury/templates/catalogue/tagged_object_list.html @@ -7,63 +7,65 @@ {% block bodyid %}tagged-object-list{% endblock %} {% block body %} -
-
+
+

{% html_title_from_tags tags %}

{% with tags|last as last_tag %} {% if last_tag.has_description %} -
- -
{{ last_tag.description|safe|truncatewords_html:40 }}
-
+
+ +
{{ last_tag.description|safe|truncatewords_html:40 }}
+
{% endif %} - +
- {% if categories.author %} -
-
{% trans "Authors" %}:
-
- {% inline_tag_list categories.author tags %} -
+ {% if categories.author %} +
+
{% trans "Authors" %}:
+
+ {% inline_tag_list categories.author tags %}
- {% endif %} - {% if categories.kind %} -
-
{% trans "Kinds" %}:
-
- {% inline_tag_list categories.kind tags %} -
+
+ {% endif %} + {% if categories.kind %} +
+
{% trans "Kinds" %}:
+
+ {% inline_tag_list categories.kind tags %}
- {% endif %} - {% if categories.genre %} -
-
{% trans "Genres" %}:
-
- {% inline_tag_list categories.genre tags %} -
+
+ {% endif %} + {% if categories.genre %} +
+
{% trans "Genres" %}:
+
+ {% inline_tag_list categories.genre tags %}
- {% endif %} - {% if categories.epoch %} -
-
{% trans "Epochs" %}:
-
- {% inline_tag_list categories.epoch tags %} -
+
+ {% endif %} + {% if categories.epoch %} +
+
{% trans "Epochs" %}:
+
+ {% inline_tag_list categories.epoch tags %}
- {% endif %} +
+ {% endif %} + + {% if categories.theme %} +
+

+ {% trans "Motifs and themes" %}

+
+ {% tag_list categories.theme tags %} +
+
+ {% endif %}
- {% if categories.theme %} -
-

- {% trans "Motifs and themes" %}

-
- {% tag_list categories.theme tags %} -
-
- {% endif %} +
{% if theme_is_set %}