book.build_mobi()
if not settings.NO_SEARCH_INDEX and search_index:
- index_book.delay(book.id, book_info)
+ book.search_index()
+ #index_book.delay(book.id, book_info)
book_descendants = list(book.children.all())
descendants_tags = set()
from datetime import datetime
from celery.task import task
import catalogue.models
+from traceback import print_exc
@task
def touch_tag(tag):
@task
def index_book(book_id, book_info=None):
- return catalogue.models.Book.objects.get(id=book_id).search_index(book_info)
+ try:
+ return catalogue.models.Book.objects.get(id=book_id).search_index(book_info)
+ except Exception, e:
+ print "Exception during index: %s" % e
+ print_exc()
+ raise e
import errno
from librarian import dcparser
from librarian.parser import WLDocument
+from lxml import etree
import catalogue.models
from multiprocessing.pool import ThreadPool
from threading import current_thread
for tag in catalogue.models.Tag.objects.all():
doc = Document()
- doc.add(NumericField("tag_id", Field.Store.YES, True).setIntValue(tag.id))
+ doc.add(NumericField("tag_id", Field.Store.YES, True).setIntValue(int(tag.id)))
doc.add(Field("tag_name", tag.name, Field.Store.NO, Field.Index.ANALYZED))
doc.add(Field("tag_name_pl", tag.name, Field.Store.NO, Field.Index.ANALYZED))
doc.add(Field("tag_category", tag.category, Field.Store.NO, Field.Index.NOT_ANALYZED))
Create a lucene document referring book id.
"""
doc = Document()
- doc.add(NumericField("book_id", Field.Store.YES, True).setIntValue(book.id))
+ doc.add(NumericField("book_id", Field.Store.YES, True).setIntValue(int(book.id)))
if book.parent is not None:
- doc.add(NumericField("parent_id", Field.Store.YES, True).setIntValue(book.parent.id))
+ doc.add(NumericField("parent_id", Field.Store.YES, True).setIntValue(int(book.parent.id)))
return doc
def remove_book(self, book):
if header.tag in self.skip_header_tags:
continue
+ if header.tag is etree.Comment:
+ continue
- content = u' '.join([t for t in header.itertext()])
- content = fix_format(content)
-
- doc = add_part(snippets, header_index=position, header_type=header.tag, content=content)
-
- self.index.addDocument(doc)
+ # section content
+ content = []
for start, end in walker(header):
+ # handle fragments and themes.
if start is not None and start.tag == 'begin':
fid = start.attrib['id'][1:]
fragments[fid] = {'content': [], 'themes': [], 'start_section': position, 'start_header': header.tag}
- fragments[fid]['content'].append(start.tail)
+
elif start is not None and start.tag == 'motyw':
fid = start.attrib['id'][1:]
if start.text is not None:
fragments[fid]['themes'] += map(str.strip, map(give_me_utf8, start.text.split(',')))
- fragments[fid]['content'].append(start.tail)
+
elif start is not None and start.tag == 'end':
fid = start.attrib['id'][1:]
if fid not in fragments:
continue # a broken <end> node, skip it
+ # import pdb; pdb.set_trace()
frag = fragments[fid]
if frag['themes'] == []:
continue # empty themes list.
themes=frag['themes'])
self.index.addDocument(doc)
+
+ # Collect content.
elif start is not None:
for frag in fragments.values():
frag['content'].append(start.text)
+ content.append(start.text)
elif end is not None:
for frag in fragments.values():
frag['content'].append(end.tail)
+ content.append(end.tail)
+
+ # in the end, add a section text.
+ doc = add_part(snippets, header_index=position, header_type=header.tag,
+ content=fix_format(u' '.join(filter(lambda s: s is not None, content))))
+
+ self.index.addDocument(doc)
+
finally:
snippets.close()
fragment = stored.get("fragment_anchor")
+ if snippets:
+ snippets = snippets.replace("/\n", "\n")
hit = (sec + (header_span,), fragment, scoreDocs.score, {'how_found': how_found, 'snippets': snippets and [snippets] or []})
self._hits.append(hit)
frag = catalogue.models.Fragment.objects.get(anchor=f[FRAGMENT])
m = {'score': f[SCORE],
'fragment': frag,
+ 'section_number': f[POSITION][POSITION_INDEX] + 1,
'themes': frag.tags.filter(category='theme')
}
m.update(f[OTHER])
self.chain_filters([only_in, self.term_filter(Term('is_book', 'true'))]),
max_results)
for found in top.scoreDocs:
- books.append(SearchResult(self.searcher, found))
+ books.append(SearchResult(self.searcher, found, how_found="search_perfect_book"))
return books
def search_book(self, searched, max_results=20, fuzzy=False, hint=None):
self.chain_filters([only_in, self.term_filter(Term('is_book', 'true'))]),
max_results)
for found in top.scoreDocs:
- books.append(SearchResult(self.searcher, found))
+ books.append(SearchResult(self.searcher, found, how_found="search_book"))
return books
flt]),
max_results)
for found in top.scoreDocs:
- books.append(SearchResult(self.searcher, found, snippets=self.get_snippets(found, q)))
+ books.append(SearchResult(self.searcher, found, snippets=self.get_snippets(found, q), how_found='search_perfect_parts'))
return books
topDocs = self.searcher.search(q, only_in, max_results)
for found in topDocs.scoreDocs:
- books.append(SearchResult(self.searcher, found))
+ books.append(SearchResult(self.searcher, found, how_found='search_everywhere_themesXcontent'))
print "* %s theme x content: %s" % (searched, books[-1]._hits)
# query themes/content x author/title/tags
topDocs = self.searcher.search(q, only_in, max_results)
for found in topDocs.scoreDocs:
- books.append(SearchResult(self.searcher, found))
+ books.append(SearchResult(self.searcher, found, how_found='search_everywhere'))
print "* %s scatter search: %s" % (searched, books[-1]._hits)
return books
--- /dev/null
+# -*- coding: utf-8 -*-
+# This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
+# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+#
+# import feedparser
+# import datetime
+
+from django import template
+from django.template import Node, Variable
+from django.utils.encoding import smart_str
+from django.core.urlresolvers import reverse
+# from django.contrib.auth.forms import UserCreationForm, AuthenticationForm
+# from django.db.models import Q
+from django.conf import settings
+# from django.utils.translation import ugettext as _
+from catalogue.templatetags.catalogue_tags import book_wide
+from catalogue.models import Book
+# from catalogue.forms import SearchForm
+# from catalogue.utils import split_tags
+
+
+register = template.Library()
+
+
+@register.inclusion_tag('catalogue/book_searched.html')
+def book_searched(result):
+ book = Book.objects.get(pk=result.book_id)
+ vals = book_wide(book)
+
+ # snippets = []
+ # for hit in result.hits:
+ # if hit['snippets']:
+ # snippets.append(hit['snippets'])
+ # elif hit['fragment']:
+ # snippets.append(hit['fragment'].short_text)
+
+ # We don't need hits which lead to sections but do not have
+ # snippets.
+ vals['hits'] = filter(lambda h: 'fragment' in h or
+ h['snippets'], result.hits)
+
+ for hit in vals['hits']:
+ hit['snippets'] = map(lambda s: s.replace("\n", "<br />").replace('---', '—'), hit['snippets'])
+
+ return vals
--- /dev/null
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
+# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+#
+import sys
+sys.path.insert(0, '../apps')
+sys.path.insert(0, '../lib')
+sys.path.insert(0, '../lib/librarian')
+sys.path.insert(0, '../wolnelektury')
+sys.path.insert(0, '..')
+
+from django.core.management import setup_environ
+from wolnelektury import settings
+import sys
+import zipfile
+
+setup_environ(settings)
+
+from catalogue.models import Book
+
+
+if len(sys.argv) < 2:
+ print "Provide a zip name as first argument"
+ sys.exit(-1)
+
+zip = zipfile.ZipFile(sys.argv[1], 'w')
+for book in Book.objects.all():
+ zip.write(book.xml_file.path, "%s.xml" % book.slug)
+zip.close()
+
.mono {
font-family: "Andale Mono", "Lucida Sans Typewriter", "Courier New";
- font-weight: bold;
+/* font-weight: bold; */
}
.accent1 {
.book-wide-box {
width: 98.5em;
+
+ /** This is a fullpage box, it must be aligned with the top menu.
+ This corresponds to a .1em margin below **/
margin-left: -0.1em;
}
+/*
+ * A mini-box wraps it's contents (image + label) in an <a>
+ * other boxes have an inner box as a wrapper.
+ */
+
+.book-box-inner {
+ /* min, so it can grow */
+ min-height: 19.75em;
+ margin: .5em;
+}
+
.book-mini-box a, .book-box-inner {
display: block;
color: black;
border: 1px solid #ddd;
- height: 20em;
+/* height: 20em; */
padding: .8em 1em;
margin: .1em;
background: #fff;
margin: .1em;
overflow: hidden;
}
-.book-box-inner {
- height: 19.75em;
- margin: .5em;
-}
+
.book-wide-box .book-box-inner {
- height: 24.4em;
+ /* min, so it can grow */
+ min-height: 24.4em;
}
+/*.book-wide-box.search-result .book-box-inner, .book-wide-box.search-result blockquote {
+ height: auto !important;
+}*/
+
.book-mini-box img, .book-box img, .book-wide-box img {
width: 13.9em;
height: 19.3em;
vertical-align: center;
}
-.book-wide-box blockquote div {
+.book-wide-box blockquote div.cite-text {
padding: 0.888em;
}
+.book-wide-box blockquote p.cite-more {
+ display: inline;
+ font-size: 0.611em;
+ float: right;
+}
+
ul.inline-items, ul.inline-items li {
margin: 0;
padding: 0;
display: inline-block;
}
-.book-wide-box #other-tools {
+.book-wide-box .other-tools {
float: left;
width: 14.5em;
margin: 6em 0 0 1.5em;
}
-.book-wide-box #other-download {
+.book-wide-box .other-download {
float: left;
width: 22.5em;
- margin: 6em 1.5em 0em 1.5em
+ margin: 6em 1.5em 0em 1.5em;
}
-
-
.star {
font-size: 2.25em;
margin-right: .5em;
display: none;
}
+.snippets .snippet-text {
+ font-size: 1.2em;
+ margin: 1.083em 0em;
+}
border-right: none;
}
+#lang-menu-items {
+ z-index: 1;
+}
+
/* ======================== */
/* = Footer with sponsors = */
/* ======================== */
$(function() {
$("#search").search().labelify({labelledClass: "blur"});
+
+ $(".search-result .see-more-snippets").click(function() {
+ $(this).closest('.search-result').find('.snippets').removeClass('ui-helper-hidden');
+ });
});
--- /dev/null
+{% extends "catalogue/book_wide.html" %}
+{% load i18n %}
+
+
+{% block box-class %}book-wide-box search-result{% endblock %}
+
+{% block quote %}
+{% if hits.0.snippets %}
+ <div class="cite-text"><a href="{% url book_text book.slug %}#f{{hits.0.section_number}}">{{hits.0.snippets.0|safe}}</a></div>
+{% else %}{% if hits.0.fragment %}
+ <div class="cite-text"><a href="{{hits.0.fragment.get_absolute_url}}">{{hits.0.fragment.short_text|safe}}</a></div>
+{% endif %}{% endif %}
+
+{% if hits.1 %}
+ <p class="cite-more mono"><a class="see-more-snippets" href="#snippets-{{book.id}}">{% trans "See more" %}</a></p>
+{% endif %}
+{% endblock %}
+
+
+{% block box-append %}
+<div class="snippets ui-helper-hidden">
+<a name="snippets-{{book.id}}">
+{% for hit in hits %}
+ {% if hit.snippets %}
+ <div class="snippet-text"><a href="{% url book_text book.slug %}#f{{hit.section_number}}">{{hit.snippets.0|safe}}</a></div>
+ {% else %}
+ {% if hit.fragment %}
+ <div class="snippet-text"><a href="{{hit.fragment.get_absolute_url}}">{{hit.fragment.short_text|safe}}</a></div>
+ {% endif %}
+ {% endif %}
+{% endfor %}
+</a>
+</div>
+{% endblock %}
{% block right-column %}
<div class="right-column">
<blockquote id="quote" class="cite-body">
+ {% block quote %}
<div>Ten, który walczy z potworami powinien zadbać, by sam nie stał się potworem.
Gdy długo spoglądamy w otchłań, otchłań spogląda również w nas.</div>
+ {% endblock %}
</blockquote>
-
- <div id="other-tools">
+ <div class="other-tools">
<h2 class="mono">{% trans "See" %}</h2>
<ul class="inline-items">
{% if extra_info.source_url %}
{% endif %}
</ul>
</div>
- <div id="other-download">
+ <div class="other-download">
<h2 class="mono">{% trans "Download" %}</h2>
<ul class="inline-items">
<li>
{% extends "base.html" %}
{% load i18n %}
-{% load catalogue_tags pagination_tags %}
+{% load search_tags pagination_tags %}
{% block titleextra %}{% trans "Search" %}{% endblock %}
{% block bodyid %}tagged-object-list{% endblock %}
{% block body %}
- <h1>{% trans "Search" %}</h1>
-
{% if did_you_mean %}
<span class="did_you_mean">{% trans "Dod you mean" %} <a href="{% url search %}?q={{did_you_mean|urlencode}}">{{did_you_mean|lower}}</a></b>?</span>
{% endif %}
+ <!-- tu pójdą trafienia w tagi: Autorzy - z description oraz motywy i rodzaje (z book_count) -->
+
<div id="results">
- <ol>
{% for result in results %}
- <li>
- <p><a href="{{result.book.get_absolute_url}}">{{result.book.pretty_title}}</a> (id: {{result.book_id}}, score: {{result.score}})</p>
- <ul>
- {% for hit in result.hits %}
- <li>
- {% if hit.fragment %}
- <a href="{{hit.fragment.get_absolute_url}}">Idź do fragmentu</a>
- <div style="">Tagi/Motywy: {% for tag in hit.themes %}{{tag.name}} {% endfor %}</div>
- {# snippets or short html? #}
- {% if hit.snippets %}
- {% for snip in hit.snippets %}
- {{snip|safe}}<br/>
- {% endfor %}
- {% else %}
- {{hit.fragment.short_text|safe}}
- {% endif %}
-
- {% else %}
- {# it's a section #}
- <a href="{% url book_text result.book.slug %}#f{{hit.section_number}}">{{hit.header_index}}</a>
- {% if hit.snippets %}
- {% for snip in hit.snippets %}
- {{snip|safe}}<br/>
- {% endfor %}
- {% else %}
- [section matched but no snippets :-(]
- {% endif %}
- {% endif %}
- </li>
- {% endfor %}
-
- </ul>
- </li>
- {% empty %}
- <p>No results.</p>
+ {% book_searched result %}
{% endfor %}
- </ol>
</div>
-{% comment %}
- <div id="books-list">
- <p>{% trans "More than one result matching the criteria found." %}</p>
- <ul class='matches'>
- {% for match, link, type in results %}
- <li>{% trans type %}: <a href='{{ link }}'>
- {% ifequal type "book" %}
- {% book_title match %}
- {% else %}
- {{ match.name }}
- {% endifequal %}
- </a></li>
- {% endfor %}
- </ul>
- </div>
-{% endcomment %}
{% endblock %}