<field name="authors" type="text_general" stored="false" indexed="true" multiValued="true" termPositions="true" termVectors="true"/>
<field name="translators" type="text_general" stored="false" indexed="true" multiValued="true" termPositions="true" termVectors="true" />
<field name="title" type="text_pl" stored="false" indexed="true"/>
- <field name="title_orig" type="text_general" stored="false" indexed="true"/>
+ <field name="title_orig" type="lowercase" stored="false" indexed="true"/>
<!-- <field name="published_date" type="tdate" stored="false" indexed="true"/>-->
<field name="published_date" type="string" stored="true" indexed="true"/>
+ <field name="epochs" type="lowercase" stored="false" indexed="false" multiValued="true" />
+ <field name="kinds" type="lowercase" stored="false" indexed="false" multiValued="true" />
+ <field name="genres" type="lowercase" stored="false" indexed="false" multiValued="true" />
+
+ <field name="metadata" type="text_pl" stored="false" indexed="true" multiValued="true" termPositions="true" termVectors="true" />
+
<field name="themes" type="lowercase" stored="true" indexed="true" termVectors="true" termPositions="true" multiValued="true" />
<field name="themes_pl" type="text_pl" stored="true" indexed="true" termVectors="true" termPositions="true" multiValued="true" />
<field name="header_index" type="int" stored="true" indexed="true"/>
<copyField source="themes" dest="themes_pl"/>
<copyField source="tag_name" dest="tag_name_pl"/>
+ <copyField source="title" dest="title_orig"/>
+
+ <copyField source="translators" dest="metadata"/>
+ <copyField source="epochs" dest="metadata"/>
+ <copyField source="kinds" dest="metadata"/>
+ <copyField source="genres" dest="metadata"/>
<!--
<copyField source="cat" dest="text"/>
from catalogue import constants
from catalogue.fields import EbookField
from catalogue.models import Tag, Fragment, BookMedia
-from catalogue.utils import create_zip, gallery_url, gallery_path
+from catalogue.utils import create_zip, gallery_url, gallery_path, split_tags
from catalogue.models.tag import prefetched_relations
from catalogue import app_settings
from catalogue import tasks
language = models.CharField(_('language code'), max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
description = models.TextField(_('description'), blank=True)
created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
- changed_at = models.DateTimeField(_('creation date'), auto_now=True, db_index=True)
+ changed_at = models.DateTimeField(_('change date'), auto_now=True, db_index=True)
parent_number = models.IntegerField(_('parent number'), default=0)
extra_info = jsonfield.JSONField(_('extra information'), default={})
gazeta_link = models.CharField(blank=True, max_length=240)
else:
return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
+ def tags_by_category(self):
+ return split_tags(self.tags.exclude(category__in=('set', 'theme')))
+
def author_unicode(self):
return self.cached_author
has_daisy_file.short_description = 'DAISY'
has_daisy_file.boolean = True
+ def get_audiobooks(self):
+ ogg_files = {}
+ for m in self.media.filter(type='ogg').order_by().iterator():
+ ogg_files[m.name] = m
+
+ audiobooks = []
+ projects = set()
+ for mp3 in self.media.filter(type='mp3').iterator():
+ # ogg files are always from the same project
+ meta = mp3.extra_info
+ project = meta.get('project')
+ if not project:
+ # temporary fallback
+ project = u'CzytamySłuchając'
+
+ projects.add((project, meta.get('funded_by', '')))
+
+ media = {'mp3': mp3}
+
+ ogg = ogg_files.get(mp3.name)
+ if ogg:
+ media['ogg'] = ogg
+ audiobooks.append(media)
+
+ projects = sorted(projects)
+ return audiobooks, projects
+
def wldocument(self, parse_dublincore=True, inherit=True):
from catalogue.import_utils import ORMDocProvider
from librarian.parser import WLDocument
-{% spaceless %}
- {% load i18n %}
- {% load inline_tag_list from catalogue_tags %}
- {% load ssi_include from ssify %}
-
- <div class="search-result">
- {% ssi_include 'catalogue_book_short' pk=book.pk %}
+{% extends "catalogue/book_short.html" %}
+{% load inline_tag_list from catalogue_tags %}
+{% block right-column %}
+ <div class="book-right-column">
<div class="snippets">
{% for hit in hits %}
{% if hit.snippet %}
{% inline_tag_list hit.themes_hit %}
{% endif %}
<a href="{{hit.fragment.get_absolute_url}}">
- {% if hit.snippet %}
- {{hit.snippet|safe}}
- {% else %}
- {{hit.fragment.text|truncatewords_html:15|safe}}
- {% endif %}
+ {{hit.snippet|safe}}
</a>
</div>
{% endif %}
{% endfor %}
</div>
-
- <div style="clear: right"></div>
+ {% include 'catalogue/snippets/jplayer.html' %}
</div>
-{% endspaceless %}
\ No newline at end of file
+{% endblock %}
{% load book_shelf_tags from social_tags %}
{% load static %}
+ {% with ga=book.get_audiobooks %}
+ {% with audiobooks=ga.0 %}
<div class="{% block box-class %}book-box{% if audiobooks %} audiobook-box{% endif %}{% endblock %}">
<div class="book-box-inner">
{% include "catalogue/snippets/like_button.html" %}
{% endblock %}
+ {% with book.tags_by_category as tags %}
<div class="book-left-column">
<div class="book-box-body">
{% block book-box-body-pre %}
<div class="author">
{% for tag in tags.author %}
<a href="{{ tag.get_absolute_url }}">{{ tag.name }}</a>{% if not forloop.last %},
- {% endif %}{% endfor %}{% for parent in parents %},
+ {% endif %}{% endfor %}{% for parent in book.parents %},
<a href="{{ parent.get_absolute_url }}">{{ parent.title }}</a>{% endfor %}
</div>
<div class="title">
- {% if main_link %}<a href="{{ main_link }}">{% endif %}{{ book.title }}{% if main_link %}</a>{% endif %}
+ <a href="{{ book.get_absolute_url }}">{{ book.title }}</a>
</div>
{% if book.translator %}
<div class="author">
<div class="cover-area">
{% if book.cover_thumb %}
- {% if main_link %}<a href="{{ main_link }}">{% endif %}
+ <a href="{{ book.get_absolute_url }}">
<img src="{{ book.cover_thumb.url }}" alt="Cover" class="cover" />
- {% if main_link %}</a>{% endif %}
+ </a>
{% endif %}
{% block cover-area-extra %}{% endblock %}
</div>
{% endfor %}
</span></span>
- {% if show_lang %}
+ {% if book.is_foreign %}
<span class="category">
<span class="mono"> {% trans "Language" %}:</span> <span class="book-box-tag">
<a>{{ book.language_name }}</a>
</span>
{% endif %}
- {% if stage_note %}
+ {% with stage_note=book.stage_note %}
+ {% if stage_note.0 %}
<br>
<span class="category">
- <a{% if stage_note_url %} href="{{ stage_note_url }}"{% endif %}>{{ stage_note }}</a>
+ <a{% if stage_note.1 %} href="{{ stage_note.1 }}"{% endif %}>{{ stage_note.0 }}</a>
</span>
{% endif %}
+ {% endwith %}
</div>
</div>
{% book_shelf_tags book.pk %}
{% block box-append %}
{% endblock %}
</div>
+ {% endwith %}
{% block right-column %}
{% if audiobooks %}
- <div class="audiobook-right-column">
+ <div class="book-right-column">
{% include 'catalogue/snippets/jplayer.html' %}
</div>
{% endif %}
<div class="clearboth"></div>
</div>
</div>
+ {% endwith %}
+ {% endwith %}
{% endspaceless %}
</div>
<div class="box" id="book-short">
- {% ssi_include 'catalogue_book_short' pk=book.pk %}
+ {% cache 86400 catalogue_book_short book.pk %}
+ {% include 'catalogue/book_short.html' %}
+ {% endcache %}
</div>
{% endblock footer %}
<span class="did_you_mean">{% trans "Did you mean" %}
<a href="{% url 'search' %}?q={{did_you_mean|urlencode}}">{{did_you_mean|lower}}</a>?</span>
{% endif %}
- <!-- tu pójdą trafienia w tagi: Autorzy - z description oraz motywy i rodzaje -->
<div class="inline-tag-lists top-tag-list">
- {% if tags.author %}
+ {% if tags %}
<div>
- <h2>{% trans "Authors" %}:</h2>
- {% for tag in tags.author %}
- <a class="tag-box" href="{{ tag.get_absolute_url }}">
- {% include "catalogue/tag_box.html" %}
- </a>
- {% endfor %}
- </div>
- {% endif %}
- {% if tags.kind %}
- <div>
- <h2>{% trans "Kinds" %}:</h2>
- {% for tag in tags.kind %}
- <a class="tag-box" href="{{ tag.get_absolute_url }}">
- {% include "catalogue/tag_box.html" %}
- </a>
- {% endfor %}
- </div>
- {% endif %}
- {% if tags.genre %}
- <div>
- <h2>{% trans "Genres" %}:</h2>
- {% for tag in tags.genre %}
- <a class="tag-box" href="{{ tag.get_absolute_url }}">
- {% include "catalogue/tag_box.html" %}
- </a>
- {% endfor %}
- </div>
- {% endif %}
- {% if tags.epoch %}
- <div class="inline-tag-list">
- <h2>{% trans "Epochs" %}:</h2>
- {% for tag in tags.epoch %}
+ {% for tag in tags %}
<a class="tag-box" href="{{ tag.get_absolute_url }}">
{% include "catalogue/tag_box.html" %}
</a>
{% endif %}
</div>
- {% if results.title %}
- <div class="book-list-header">
- <div class="book-box-inner">
- <p>{% trans "Results by title" %}</p>
- </div>
- </div>
- <div>
- <ol class="work-list">
- {% for result in results.title %}
- <li class="Book-item">
- {% ssi_include 'catalogue_book_short' pk=result.book.pk %}
- </li>
- {% endfor %}
- </ol>
- </div>
- {% endif %}
-
- {% if results.author %}
- <div class="book-list-header">
- <div class="book-box-inner">
- <p>{% trans "Results by authors" %}</p>
- </div>
- </div>
- <div>
- <ol class="work-list">
- {% for author in results.author %}
- <li class="Book-item">{% ssi_include 'catalogue_book_short' pk=author.book.pk %}</li>
- {% endfor %}
- </ol>
- </div>
- {% endif %}
-
- {% if results.translator %}
- <div class="book-list-header">
- <div class="book-box-inner">
- <p>{% trans "Results by translators" %}</p>
- </div>
- </div>
- <div>
- <ol class="work-list">
- {% for translator in results.translator %}
- <li class="Book-item">{% ssi_include 'catalogue_book_short' pk=translator.book.pk %}</li>
- {% endfor %}
- </ol>
- </div>
- {% endif %}
-
- {% if results.content %}
- <div class="book-list-header">
- <div class="book-box-inner">
- <p>{% trans "Results in text" %}</p>
- </div>
- </div>
<div>
- <ol class="work-list">
- {% for result in results.content %}
+ <ul class="work-list">
+ {% for result in results %}
<li class="Book-item">
- {% book_searched result %}
+ <div class="search-result">
+ {% book_searched result %}
+ </div>
</li>
{% endfor %}
- </ol>
+ </ul>
</div>
- {% endif %}
-
- {% if results.other %}
- <div class="book-list-header">
- <div class="book-box-inner">
- <p>{% trans "Other results" %}</p>
- </div>
- </div>
- <div>
- <ol class="work-list">
- {% for result in results.other %}
- <li class="Book-item">
- {% book_searched result %}
- </li>
- {% endfor %}
- </ol>
- </div>
- {% endif %}
{% endblock %}
{% if audiobooks %}
<div class="jp-type-playlist">
<div id="jplayer" class="jp-jplayer" data-player="jp_container_{{ book.pk }}"
- data-supplied="{% if have_oggs %}oga,{% endif %}mp3"></div>
+ data-supplied="oga,mp3"></div>
<div id="jp_container_{{ book.pk }}" class="jp-audio">
<div class="jp-type-single">
<span class="title"></span>
}, context_instance=RequestContext(request))
-def get_audiobooks(book):
- ogg_files = {}
- for m in book.media.filter(type='ogg').order_by().iterator():
- ogg_files[m.name] = m
-
- audiobooks = []
- have_oggs = True
- projects = set()
- for mp3 in book.media.filter(type='mp3').iterator():
- # ogg files are always from the same project
- meta = mp3.extra_info
- project = meta.get('project')
- if not project:
- # temporary fallback
- project = u'CzytamySłuchając'
-
- projects.add((project, meta.get('funded_by', '')))
-
- media = {'mp3': mp3}
-
- ogg = ogg_files.get(mp3.name)
- if ogg:
- media['ogg'] = ogg
- else:
- have_oggs = False
- audiobooks.append(media)
-
- projects = sorted(projects)
- return audiobooks, projects, have_oggs
-
-
# używane w publicznym interfejsie
def player(request, slug):
book = get_object_or_404(Book, slug=slug)
if not book.has_media('mp3'):
raise Http404
- audiobooks, projects, have_oggs = get_audiobooks(book)
+ audiobooks, projects = book.get_audiobooks()
return render_to_response('catalogue/player.html', {
'book': book,
))(ssi_expect(pk, int)))
def book_short(request, pk):
book = get_object_or_404(Book, pk=pk)
- stage_note, stage_note_url = book.stage_note()
- audiobooks, projects, have_oggs = get_audiobooks(book)
return render(request, 'catalogue/book_short.html', {
'book': book,
- 'has_audio': book.has_media('mp3'),
- 'main_link': book.get_absolute_url(),
- 'parents': book.parents(),
- 'tags': split_tags(book.tags.exclude(category__in=('set', 'theme'))),
- 'show_lang': book.language_code() != settings.LANGUAGE_CODE,
- 'stage_note': stage_note,
- 'stage_note_url': stage_note_url,
- 'audiobooks': audiobooks,
- 'have_oggs': have_oggs,
})
))(ssi_expect(pk, int)))
def book_wide(request, pk):
book = get_object_or_404(Book, pk=pk)
- stage_note, stage_note_url = book.stage_note()
extra_info = book.extra_info
- audiobooks, projects, have_oggs = get_audiobooks(book)
return render(request, 'catalogue/book_wide.html', {
'book': book,
- 'has_audio': book.has_media('mp3'),
'parents': book.parents(),
'tags': split_tags(book.tags.exclude(category__in=('set', 'theme'))),
'show_lang': book.language_code() != settings.LANGUAGE_CODE,
- 'stage_note': stage_note,
- 'stage_note_url': stage_note_url,
'main_link': reverse('book_text', args=[book.slug]) if book.html_file else None,
'extra_info': extra_info,
'hide_about': extra_info.get('about', '').startswith('http://wiki.wolnepodreczniki.pl'),
- 'audiobooks': audiobooks,
- 'have_oggs': have_oggs,
})
def search_form(request):
- return {'search_form': SearchForm(reverse('search.views.hint'), request.GET)}
+ return {'search_form': SearchForm(reverse('search.views.hint')+'?max=10', request.GET)}
off = -start
snip = snip[:e + off] + mark[1] + snip[e + off:]
snip = snip[:s + off] + mark[0] + snip[s + off:]
+ snip = re.sub('%s[ \t\n]+%s' % (mark[1], mark[0]), " ", snip)
return snip
self.remove_book(book, remove_snippets=False)
book_doc = self.create_book_doc(book)
- meta_fields = self.extract_metadata(book, book_info, dc_only=['source_name', 'authors', 'translators', 'title'])
+ meta_fields = self.extract_metadata(book, book_info, dc_only=[
+ 'source_name', 'authors', 'translators', 'title', 'epochs', 'kinds', 'genres'])
# let's not index it - it's only used for extracting publish date
if 'source_name' in meta_fields:
del meta_fields['source_name']
'published_date': meta_fields['published_date']
}
- if 'translators' in meta_fields:
- book_fields['translators'] = meta_fields['translators']
+ for tag_name in ('translators', 'epochs', 'kinds', 'genres'):
+ if tag_name in meta_fields:
+ book_fields[tag_name] = meta_fields[tag_name]
self.index_content(book, book_fields=book_fields)
]
ignore_content_tags = [
- 'uwaga', 'extra',
+ 'uwaga', 'extra', 'nota_red',
'zastepnik_tekstu', 'sekcja_asterysk', 'separator_linia', 'zastepnik_wersu',
'didaskalia',
'naglowek_aktu', 'naglowek_sceny', 'naglowek_czesc',
if master is None:
return []
- def walker(node, ignore_tags=()):
- if node.tag not in ignore_tags:
+ def walker(node):
+ if node.tag not in self.ignore_content_tags:
yield node, None, None
if node.text is not None:
yield None, node.text, None
if 'themes' in fields:
doc['themes'] = fields['themes']
- doc['uid'] = "part%s%s%s" % (doc['header_index'],
- doc['header_span'],
- doc.get('fragment_anchor', ''))
+ doc['uid'] = "part%s-%s-%s-%s" % (
+ book.id, doc['header_index'], doc['header_span'], doc.get('fragment_anchor', ''))
return doc
- def give_me_utf8(s):
- if isinstance(s, unicode):
- return s.encode('utf-8')
- else:
- return s
-
fragments = {}
snippets = Snippets(book.id).open('w')
try:
content.append(text)
handle_text = [all_content]
- for start, text, end in walker(header, ignore_tags=self.ignore_content_tags):
+ for start, text, end in walker(header):
# handle footnotes
if start is not None and start.tag in self.footnote_tags:
footnote = []
class SearchResult(object):
- def __init__(self, doc, how_found=None, query=None, query_terms=None):
- # self.search = search
+ def __init__(self, doc, how_found=None, query_terms=None):
self.boost = 1.0
self._hits = []
self._processed_hits = None # processed hits
return q
- def search_phrase(self, searched, field='text', book=False,
- filters=None,
- snippets=False):
- if filters is None:
- filters = []
- if book:
- filters.append(self.index.Q(is_book=True))
-
- q = self.index.query(**{field: searched})
- q = self.apply_filters(q, filters).field_limit(score=True, all_fields=True)
- res = q.execute()
- return [SearchResult(found, how_found=u'search_phrase') for found in res]
-
- def search_some(self, searched, fields, book=True,
- filters=None, snippets=True, query_terms=None):
- assert isinstance(fields, list)
- if filters is None:
- filters = []
+ def search_words(self, words, fields, book=True):
+ filters = []
+ for word in words:
+ word_filter = None
+ for field in fields:
+ q = self.index.Q(**{field: word})
+ if word_filter is None:
+ word_filter = q
+ else:
+ word_filter |= q
+ filters.append(word_filter)
if book:
- filters.append(self.index.Q(is_book=True))
-
- query = self.index.Q()
-
- for fld in fields:
- query = self.index.Q(query | self.make_term_query(searched, fld))
-
- query = self.index.query(query)
+ query = self.index.query(is_book=True)
+ else:
+ query = self.index.query()
query = self.apply_filters(query, filters).field_limit(score=True, all_fields=True)
- res = query.execute()
- return [SearchResult(found, how_found='search_some', query_terms=query_terms) for found in res]
-
- def search_everywhere(self, searched, query_terms=None):
- """
- Tries to use search terms to match different fields of book (or its parts).
- E.g. one word can be an author survey, another be a part of the title, and the rest
- are some words from third chapter.
- """
- books = []
- # content only query : themes x content
- q = self.make_term_query(searched, 'text')
- q_themes = self.make_term_query(searched, 'themes_pl')
-
- query = self.index.query(q).query(q_themes).field_limit(score=True, all_fields=True)
- res = query.execute()
-
- for found in res:
- books.append(SearchResult(found, how_found='search_everywhere_themesXcontent', query_terms=query_terms))
-
- # query themes/content x author/title/tags
- in_content = self.index.Q()
- in_meta = self.index.Q()
-
- for fld in ['themes_pl', 'text']:
- in_content |= self.make_term_query(searched, field=fld)
-
- for fld in ['tags', 'authors', 'title']:
- in_meta |= self.make_term_query(searched, field=fld)
-
- q = in_content & in_meta
- res = self.index.query(q).field_limit(score=True, all_fields=True).execute()
-
- for found in res:
- books.append(SearchResult(found, how_found='search_everywhere', query_terms=query_terms))
-
- return books
+ return [SearchResult(found, how_found='search_words') for found in query.execute()]
def get_snippets(self, searchresult, query, field='text', num=1):
"""
text = snippets.get((int(position),
int(length)))
snip = self.index.highlight(text=text, field=field, q=query)
- snips[idx] = snip
- if snip:
- num -= 1
+ if snip not in snips:
+ snips[idx] = snip
+ if snip:
+ num -= 1
idx += 1
except IOError, e:
if is_pdcounter:
if category == 'pd_author':
tag = PDCounterAuthor.objects.get(id=doc.get('tag_id'))
- elif category == 'pd_book':
+ else: # category == 'pd_book':
tag = PDCounterBook.objects.get(id=doc.get('tag_id'))
tag.category = 'pd_book' # make it look more lik a tag.
- else:
- # WTF
- print ("Warning. cannot get pdcounter tag_id=%d from db; cat=%s" % (
- int(doc.get('tag_id')), category)).encode('utf-8')
pd_tags.append(tag)
else:
tag = catalogue.models.Tag.objects.get(id=doc.get("tag_id"))
query = query.strip()
if prefix:
q |= self.index.Q(title=query + "*")
+ q |= self.index.Q(title_orig=query + "*")
else:
q |= self.make_term_query(query, field='title')
+ q |= self.make_term_query(query, field='title_orig')
qu = self.index.query(q)
only_books = self.index.Q(is_book=True)
return self.search_books(qu, [only_books])
help='book id instead of slugs'),
make_option('-t', '--just-tags', action='store_true', dest='just_tags', default=False,
help='just reindex tags'),
+ make_option('--start', dest='start_from', default=None, help='start from this slug'),
+ make_option('--stop', dest='stop_after', default=None, help='stop after this slug'),
)
def handle(self, *args, **opts):
else:
books += Book.objects.filter(slug=a).all()
else:
- books = list(Book.objects.all())
-
+ books = list(Book.objects.order_by('slug'))
+ start_from = opts.get('start_from')
+ stop_after = opts.get('stop_after')
+ if start_from:
+ start_from = start_from.replace('-', '')
+ if stop_after:
+ stop_after = stop_after.replace('-', '')
while books:
try:
b = books[0]
- print b.title
- idx.index_book(b)
- idx.index.commit()
+ slug = b.slug.replace('-', '')
+ if stop_after and slug > stop_after:
+ break
+ if not start_from or slug >= start_from:
+ print b.slug
+ idx.index_book(b)
+ idx.index.commit()
books.pop(0)
except:
traceback.print_exc()
index = MockIndex()
@staticmethod
- def _find_some_books(snippets=False, query_terms=None, max_results=20):
+ def _find_some_books(query_terms=None, max_results=20):
from .index import SearchResult
qs = Book.objects.order_by('?')
- if snippets:
- qs = qs.exclude(fragments=None)
results = []
for book in qs[:randint(1, max_results)]:
doc = {
'book_id': book.pk,
'published_date': randint(1000, 1920),
}
- if snippets:
- fragment = book.fragments.order_by('?')[0]
- doc.update({
- 'header_type': choice(['strofa', 'akap']),
- 'header_index': randint(100, 200),
- 'header_span': randint(100, 200),
- 'fragment_anchor': fragment.anchor,
- 'snippets_position': randint(100, 200),
- 'snippets_length': randint(100, 200),
- 'snippets_revision': randint(1, 100),
- 'themes_pl': fragment.tags.filter(category='theme'),
- })
res = SearchResult(doc, how_found='mock', query_terms=query_terms)
- if snippets:
- res.snippets = [fragment.short_text]
results.append(res)
return results
- def search_phrase(self, searched, field='text', book=False, filters=None, snippets=False):
- return self._find_some_books(snippets)
-
- def search_some(self, searched, fields, book=True, filters=None, snippets=True, query_terms=None):
- return self._find_some_books(snippets, query_terms)
-
# WTF
def search_books(self, query, filters=None, max_results=10):
- return self._find_some_books(snippets, max_results=max_results)
+ return self._find_some_books(max_results=max_results)
def search_everywhere(self, searched, query_terms=None):
return []
def book_searched(context, result):
book = Book.objects.get(pk=result.book_id)
- # snippets = []
- # for hit in result.hits:
- # if hit['snippets']:
- # snippets.append(hit['snippets'])
- # elif hit['fragment']:
- # snippets.append(hit['fragment'].short_text)
-
# We don't need hits which lead to sections but do not have
# snippets.
hits = filter(lambda (idx, h):
- 'fragment' in h or
result.snippets[idx] is not None,
enumerate(result.hits))
# print "[tmpl: from %d hits selected %d]" % (len(result.hits), len(hits))
continue
snip = result.snippets[idx]
# fix some formattting
- snip = re.subn(r"(^[ \t\n]+|[ \t\n]+$)", u"",
- re.subn(r"[ \t\n]*\n[ \t\n]*", u"\n", snip)[0])[0]
+ snip = re.sub(r"[ \t\n]*\n[ \t\n]*", u"\n", snip)
+ snip = re.sub(r"(^[ \t\n]+|[ \t\n]+$)", u"", snip)
snip = snip.replace("\n", "<br />").replace('---', '—')
hit['snippet'] = snip
return {
'request': context['request'],
'book': book,
- 'hits': hits and zip(*hits)[1] or []
+ 'hits': zip(*hits)[1] if hits else []
}
from django.utils.translation import ugettext as _
from catalogue.utils import split_tags
-from catalogue.models import Book
+from catalogue.models import Book, Tag
from pdcounter.models import Author as PDCounterAuthor, BookStub as PDCounterBook
from search.index import Search, SearchResult
from suggest.forms import PublishingSuggestForm
prefix = remove_query_syntax_chars(prefix)
- search = Search()
- # tagi beda ograniczac tutaj
- # ale tagi moga byc na ksiazce i na fragmentach
- # jezeli tagi dot tylko ksiazki, to wazne zeby te nowe byly w tej samej ksiazce
- # jesli zas dotycza themes, to wazne, zeby byly w tym samym fragmencie.
-
- def is_dupe(tag):
- if isinstance(tag, PDCounterAuthor):
- if filter(lambda t: t.slug == tag.slug and t != tag, tags):
- return True
- elif isinstance(tag, PDCounterBook):
- if filter(lambda b: b.slug == tag.slug, tags):
- return True
- return False
-
- def category_name(c):
- if c.startswith('pd_'):
- c = c[len('pd_'):]
- return _(c)
-
try:
limit = int(request.GET.get('max', ''))
except ValueError:
if limit < 1:
limit = -1
- data = []
-
- tags = search.hint_tags(prefix, pdcounter=True)
- tags = filter(lambda t: not is_dupe(t), tags)
- for t in tags:
- if not limit:
- break
- limit -= 1
- data.append({
- 'label': t.name,
- 'category': category_name(t.category),
- 'id': t.id,
- 'url': t.get_absolute_url()
- })
- if limit:
- books = search.hint_books(prefix)
- for b in books:
- if not limit:
- break
- limit -= 1
- data.append({
- 'label': b.title,
+ data = [
+ {
+ 'label': author.name,
+ 'category': _('author'),
+ 'id': author.id,
+ 'url': author.get_absolute_url(),
+ }
+ for author in Tag.objects.filter(category='author', name__iregex='\m' + prefix)[:10]
+ ]
+ if len(data) < limit:
+ data += [
+ {
+ 'label': '<cite>%s</cite>, %s' % (b.title, b.author_unicode()),
'category': _('book'),
'id': b.id,
'url': b.get_absolute_url()
- })
-
+ }
+ for b in Book.objects.filter(title__iregex='\m' + prefix)[:limit-len(data)]
+ ]
callback = request.GET.get('callback', None)
if callback:
return HttpResponse("%s(%s);" % (callback, json.dumps(data)),
'catalogue/search_too_long.html', {'prefix': query}, context_instance=RequestContext(request))
query = remove_query_syntax_chars(query)
-
- search = Search()
- theme_terms = search.index.analyze(text=query, field="themes_pl") \
- + search.index.analyze(text=query, field="themes")
+ words = query.split()
+ if len(words) > 10:
+ query = ' '.join(words[:10])
+
+ search = Search()
- # change hints
tags = search.hint_tags(query, pdcounter=True, prefix=False)
tags = split_tags(tags)
- author_results = search.search_phrase(query, 'authors', book=True)
- translator_results = search.search_phrase(query, 'translators', book=True)
-
- title_results = search.search_phrase(query, 'title', book=True)
-
- # Boost main author/title results with mixed search, and save some of its results for end of list.
- # boost author, title results
- author_title_mixed = search.search_some(query, ['authors', 'translators', 'title', 'tags'], query_terms=theme_terms)
- author_title_rest = []
-
- for b in author_title_mixed:
- also_in_mixed = filter(lambda ba: ba.book_id == b.book_id, author_results + translator_results + title_results)
- for b2 in also_in_mixed:
- b2.boost *= 1.1
- if also_in_mixed is []:
- author_title_rest.append(b)
-
- # Do a phrase search but a term search as well - this can give us better snippets then search_everywhere,
- # Because the query is using only one field.
- text_phrase = SearchResult.aggregate(
- search.search_phrase(query, 'text', snippets=True, book=False),
- search.search_some(query, ['text'], snippets=True, book=False, query_terms=theme_terms))
-
- everywhere = search.search_everywhere(query, query_terms=theme_terms)
-
- def already_found(results):
- def f(e):
- for r in results:
- if e.book_id == r.book_id:
- e.boost = 0.9
- results.append(e)
- return True
- return False
- return f
- f = already_found(author_results + translator_results + title_results + text_phrase)
- everywhere = filter(lambda x: not f(x), everywhere)
-
- author_results = SearchResult.aggregate(author_results)
- translator_results = SearchResult.aggregate(translator_results)
- title_results = SearchResult.aggregate(title_results)
-
- everywhere = SearchResult.aggregate(everywhere, author_title_rest)
-
- for field, res in [('authors', author_results),
- ('translators', translator_results),
- ('title', title_results),
- ('text', text_phrase),
- ('text', everywhere)]:
- res.sort(reverse=True)
- for r in res:
- search.get_snippets(r, query, field, 3)
+ results_parts = []
+
+ search_fields = []
+ fieldsets = (
+ (['authors'], True),
+ (['title'], True),
+ (['metadata'], True),
+ (['text', 'themes_pl'], False),
+ )
+ for fieldset, is_book in fieldsets:
+ search_fields += fieldset
+ results_parts.append(search.search_words(words, search_fields, book=is_book))
+
+ results = []
+ ids_results = {}
+ for results_part in results_parts:
+ for result in sorted(SearchResult.aggregate(results_part), reverse=True):
+ book_id = result.book_id
+ if book_id in ids_results:
+ ids_results[book_id].merge(result)
+ else:
+ results.append(result)
+ ids_results[book_id] = result
+
+ for result in results:
+ search.get_snippets(result, query, num=3)
suggestion = u''
except Book.DoesNotExist:
return False
- author_results = filter(ensure_exists, author_results)
- translator_results = filter(ensure_exists, translator_results)
- title_results = filter(ensure_exists, title_results)
- text_phrase = filter(ensure_exists, text_phrase)
- everywhere = filter(ensure_exists, everywhere)
-
- results = author_results + translator_results + title_results + text_phrase + everywhere
- # ensure books do exists & sort them
- for res in (author_results, translator_results, title_results, text_phrase, everywhere):
- res.sort(reverse=True)
-
- # We don't want to redirect to book text, but rather display result page even with one result.
- # if len(results) == 1:
- # fragment_hits = filter(lambda h: 'fragment' in h, results[0].hits)
- # if len(fragment_hits) == 1:
- # #anchor = fragment_hits[0]['fragment']
- # #frag = Fragment.objects.get(anchor=anchor)
- # return HttpResponseRedirect(fragment_hits[0]['fragment'].get_absolute_url())
- # return HttpResponseRedirect(results[0].book.get_absolute_url())
- if len(results) == 0:
+ results = filter(ensure_exists, results)
+
+ if not results:
form = PublishingSuggestForm(initial={"books": query + ", "})
return render_to_response(
'catalogue/search_no_hits.html',
return render_to_response(
'catalogue/search_multiple_hits.html',
{
- 'tags': tags,
+ 'tags': tags['author'] + tags['kind'] + tags['genre'] + tags['epoch'] + tags['theme'],
'prefix': query,
- 'results': {
- 'author': author_results,
- 'translator': translator_results,
- 'title': title_results,
- 'content': text_phrase,
- 'other': everywhere
- },
+ 'results': results,
'did_you_mean': suggestion
},
context_instance=RequestContext(request))
render_item: function (ul, item) {
return $("<li></li>").data('item.autocomplete', item)
- .append('<a href="'+this.options.host+item.url+'"><span class="search-hint-label">'+item.label+'</span>'+
- '<span class="search-hint-category mono">'+item.category+'</span></a>')
+ .append('<a href="'+this.options.host+item.url+'"><span class="search-hint-label">'+item.label+'</span>')
.appendTo(ul);
},
}
}
-.audiobook-box {
+.audiobook-box, .search-result .book-box {
.book-left-column {
@media screen and (min-width: 1024px) {
display: inline-block;
}
}
- .audiobook-right-column {
+ .book-right-column {
@media screen and (min-width: 1024px) {
float: right;
+ @include size(padding-top, 15px);
@include size(width, 360px);
}
}
@media screen and (min-width: 1024px) {
float: right;
- margin-top: 48px;
}
}
}
@include size(margin-bottom, 16px);
}
-
-.search-result {
+/*.search-result {
@include size(border, 1px solid #ddd);
@include size(box-shadow, 2px 2px 2px #ddd);
@include size(margin, 1px);
margin: 0;
}
}
-}
+}*/
.snippets {
@media screen and (min-width: 62.5em) {
- @include size(width, 440px);
+ @include size(width, 360px);
float: right;
}
.snippet-text {
- @include size(font-size, 12px);
+ @include size(font-size, 16px);
@include size(margin, 13px 0);
@include size(padding, 12px);
background: #f7f7f7;
.search-hint-label {
display: inline-block;
- @include size(font-size, 11px);
- @include size(width, 275px);
- line-height: 1.636em;
-}
-
-.search-hint-category {
- @include size(font-size, 11px);
+ @include size(font-size, 16px);
+ //@include size(width, 275px);
line-height: 1.636em;
}
-