merge search into pretty branch
authorMarcin Koziej <marcin.koziej@nowoczesnapolska.org.pl>
Wed, 21 Dec 2011 09:03:56 +0000 (10:03 +0100)
committerMarcin Koziej <marcin.koziej@nowoczesnapolska.org.pl>
Wed, 21 Dec 2011 09:03:56 +0000 (10:03 +0100)
1  2 
apps/catalogue/forms.py
apps/catalogue/management/commands/importbooks.py
apps/catalogue/models.py
apps/catalogue/urls.py
apps/opds/views.py
lib/librarian
requirements.txt
wolnelektury/settings.py
wolnelektury/static/js/catalogue.js
wolnelektury/templates/catalogue/search_multiple_hits.html
wolnelektury/urls.py

diff --combined apps/catalogue/forms.py
@@@ -3,11 -3,12 +3,11 @@@
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
  from django import forms
 -from django.core.files.base import ContentFile
  from django.utils.translation import ugettext_lazy as _
  from slughifi import slughifi
  
  from catalogue.models import Tag, Book
- from catalogue.fields import JQueryAutoCompleteField
+ from catalogue.fields import JQueryAutoCompleteSearchField
  from catalogue import utils
  
  
@@@ -16,8 -17,6 +16,8 @@@ class BookImportForm(forms.Form)
      book_xml = forms.CharField(required=False)
  
      def clean(self):
 +        from django.core.files.base import ContentFile
 +
          if not self.cleaned_data['book_xml_file']:
              if self.cleaned_data['book_xml']:
                  self.cleaned_data['book_xml_file'] = \
  
  
  class SearchForm(forms.Form):
-     q = JQueryAutoCompleteField('/katalog/tags/', {'minChars': 2, 'selectFirst': True, 'cacheLength': 50, 'matchContains': "word"})
+     q = JQueryAutoCompleteSearchField('/newsearch/hint/') # {'minChars': 2, 'selectFirst': True, 'cacheLength': 50, 'matchContains': "word"})
      tags = forms.CharField(widget=forms.HiddenInput, required=False)
  
+     book = forms.IntegerField(widget=forms.HiddenInput, min_value=0, required=False)
      def __init__(self, *args, **kwargs):
          tags = kwargs.pop('tags', [])
+         book = kwargs.pop('book', None)
          super(SearchForm, self).__init__(*args, **kwargs)
-         self.fields['q'].widget.attrs['title'] = _('title, author, theme/topic, epoch, kind, genre')
+         self.fields['q'].widget.attrs['title'] = _('title, author, theme/topic, epoch, kind, genre, phrase')
            #self.fields['q'].widget.attrs['style'] = ''
          self.fields['tags'].initial = '/'.join(tag.url_chunk for tag in Tag.get_tag_list(tags))
+         if book is not None:
+             self.fields['book'].initial = book.id
  
  
  class UserSetsForm(forms.Form):
@@@ -56,7 -60,7 +61,7 @@@ class ObjectSetsForm(forms.Form)
          self.fields['set_ids'] = forms.MultipleChoiceField(
              label=_('Shelves'),
              required=False,
 -            choices=[(tag.id, "%s (%s)" % (tag.name, tag.get_count())) for tag in Tag.objects.filter(category='set', user=user)],
 +            choices=[(tag.id, "%s (%s)" % (tag.name, tag.book_count)) for tag in Tag.objects.filter(category='set', user=user)],
              initial=[tag.id for tag in obj.tags.filter(category='set', user=user)],
              widget=forms.CheckboxSelectMultiple
          )
@@@ -78,62 -82,21 +83,62 @@@ class NewSetForm(forms.Form)
          return new_set
  
  
 -FORMATS = (
 -    ('mp3', 'MP3'),
 -    ('ogg', 'OGG'),
 -    ('pdf', 'PDF'),
 -    ('odt', 'ODT'),
 -    ('txt', 'TXT'),
 -    ('epub', 'EPUB'),
 -    ('daisy', 'DAISY'),
 -    ('mobi', 'MOBI'),
 -)
 +FORMATS = [(f, f.upper()) for f in Book.ebook_formats]
  
  
  class DownloadFormatsForm(forms.Form):
 -    formats = forms.MultipleChoiceField(required=False, choices=FORMATS, widget=forms.CheckboxSelectMultiple)
 +    formats = forms.MultipleChoiceField(required=False, choices=FORMATS,
 +            widget=forms.CheckboxSelectMultiple)
  
      def __init__(self, *args, **kwargs):
           super(DownloadFormatsForm, self).__init__(*args, **kwargs)
  
 +
 +PDF_PAGE_SIZES = (
 +    ('a4paper', _('A4')),
 +    ('a5paper', _('A5')),
 +)
 +
 +
 +PDF_LEADINGS = (
 +    ('', _('Normal leading')),
 +    ('onehalfleading', _('One and a half leading')),
 +    ('doubleleading', _('Double leading')),
 +    )
 +
 +PDF_FONT_SIZES = (
 +    ('11pt', _('Default')),
 +    ('13pt', _('Big'))
 +    )
 +
 +
 +class CustomPDFForm(forms.Form):
 +    nofootnotes = forms.BooleanField(required=False, label=_("Don't show footnotes"))
 +    nothemes = forms.BooleanField(required=False, label=_("Don't disply themes"))
 +    nowlfont = forms.BooleanField(required=False, label=_("Don't use our custom font"))
 +    ##    pagesize = forms.ChoiceField(PDF_PAGE_SIZES, required=True, label=_("Paper size"))
 +    leading = forms.ChoiceField(PDF_LEADINGS, required=False, label=_("Leading"))
 +    fontsize = forms.ChoiceField(PDF_FONT_SIZES, required=True, label=_("Font size"))
 +
 +    @property
 +    def customizations(self):
 +        c = []
 +        if self.cleaned_data['nofootnotes']:
 +            c.append('nofootnotes')
 +            
 +        if self.cleaned_data['nothemes']:
 +            c.append('nothemes')
 +            
 +        if self.cleaned_data['nowlfont']:
 +            c.append('nowlfont')
 +        
 +            ##  c.append(self.cleaned_data['pagesize'])
 +        c.append(self.cleaned_data['fontsize'])
 +
 +        if self.cleaned_data['leading']:
 +            c.append(self.cleaned_data['leading'])
 +
 +        c.sort()
 +
 +        return c
 +
@@@ -12,7 -12,6 +12,7 @@@ from django.core.management.color impor
  from django.core.files import File
  
  from catalogue.models import Book
 +from picture.models import Picture
  
  
  class Command(BaseCommand):
              help='Don\'t build TXT file'),
          make_option('-P', '--no-build-pdf', action='store_false', dest='build_pdf', default=True,
              help='Don\'t build PDF file'),
+         make_option('-S', '--no-search-index', action='store_false', dest='search_index', default=True,
+             help='Don\'t build PDF file'),
          make_option('-w', '--wait-until', dest='wait_until', metavar='TIME',
              help='Wait until specified time (Y-M-D h:m:s)'),
-         
 +        make_option('-p', '--picture', action='store_true', dest='import_picture', default=False,
 +            help='Import pictures'),
      )
      help = 'Imports books from the specified directories.'
      args = 'directory [directory ...]'
  
-                                   build_epub=options.get('build_epub'),
-                                   build_txt=options.get('build_txt'),
-                                   build_pdf=options.get('build_pdf'),
-                                   build_mobi=options.get('build_mobi'))
 +    def import_book(self, file_path, options):
 +        verbose = options.get('verbose')
 +        file_base, ext = os.path.splitext(file_path)
 +        book = Book.from_xml_file(file_path, overwrite=options.get('force'),
++                                                    build_epub=options.get('build_epub'),
++                                                    build_txt=options.get('build_txt'),
++                                                    build_pdf=options.get('build_pdf'),
++                                                    build_mobi=options.get('build_mobi'),
++                                                    search_index=options.get('search_index'))
 +        fileid = book.fileid()
 +        for ebook_format in Book.ebook_formats:
 +            if os.path.isfile(file_base + '.' + ebook_format):
 +                getattr(book, '%s_file' % ebook_format).save(
 +                    '%s.%s' % (fileid, ebook_format), 
 +                    File(file(file_base + '.' + ebook_format)))
 +                if verbose:
 +                    print "Importing %s.%s" % (file_base, ebook_format)
 +
 +        book.save()
 +
 +    def import_picture(self, file_path, options):
 +        picture = Picture.from_xml_file(file_path, overwrite=options.get('force'))
 +        return picture
 +
      def handle(self, *directories, **options):
          from django.db import transaction
  
          verbose = options.get('verbose')
          force = options.get('force')
          show_traceback = options.get('traceback', False)
 +        import_picture = options.get('import_picture')
  
          wait_until = None
          if options.get('wait_until'):
              wait_until = time.mktime(time.strptime(options.get('wait_until'), '%Y-%m-%d %H:%M:%S'))
              if verbose > 0:
                  print "Will wait until %s; it's %f seconds from now" % (
 -                    time.strftime('%Y-%m-%d %H:%M:%S', 
 +                    time.strftime('%Y-%m-%d %H:%M:%S',
                      time.localtime(wait_until)), wait_until - time.time())
  
          # Start transaction management.
  
                      # Import book files
                      try:
 -                        book = Book.from_xml_file(file_path, overwrite=force, 
 -                                                  build_epub=options.get('build_epub'),
 -                                                  build_txt=options.get('build_txt'),
 -                                                  build_pdf=options.get('build_pdf'),
 -                                                  build_mobi=options.get('build_mobi'),
 -                                                  search_index=options.get('search_index'))
 +                        if import_picture:
 +                            self.import_picture(file_path, options)
 +                        else:
 +                            self.import_book(file_path, options)
                          files_imported += 1
  
 -                        if os.path.isfile(file_base + '.pdf'):
 -                            book.pdf_file.save('%s.pdf' % book.slug, File(file(file_base + '.pdf')))
 -                            if verbose:
 -                                print "Importing %s.pdf" % file_base
 -                        if os.path.isfile(file_base + '.mobi'):
 -                            book.mobi_file.save('%s.mobi' % book.slug, File(file(file_base + '.mobi')))
 -                            if verbose:
 -                                print "Importing %s.mobi" % file_base
 -                        if os.path.isfile(file_base + '.epub'):
 -                            book.epub_file.save('%s.epub' % book.slug, File(file(file_base + '.epub')))
 -                            if verbose:
 -                                print "Importing %s.epub" % file_base
 -                        if os.path.isfile(file_base + '.txt'):
 -                            book.txt_file.save('%s.txt' % book.slug, File(file(file_base + '.txt')))
 -                            if verbose:
 -                                print "Importing %s.txt" % file_base
 -
 -                        book.save()
 -
 -                    except Book.AlreadyExists, msg:
 -                        print self.style.ERROR('%s: Book already imported. Skipping. To overwrite use --force.' %
 +                    except (Book.AlreadyExists, Picture.AlreadyExists):
 +                        print self.style.ERROR('%s: Book or Picture already imported. Skipping. To overwrite use --force.' %
                              file_path)
                          files_skipped += 1
  
diff --combined apps/catalogue/models.py
@@@ -2,17 -2,16 +2,17 @@@
  # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
 -from datetime import datetime
 +from collections import namedtuple
  
  from django.db import models
  from django.db.models import permalink, Q
  import django.dispatch
  from django.core.cache import cache
 +from django.core.files.storage import DefaultStorage
  from django.utils.translation import ugettext_lazy as _
  from django.contrib.auth.models import User
 -from django.core.files import File
  from django.template.loader import render_to_string
 +from django.utils.datastructures import SortedDict
  from django.utils.safestring import mark_safe
  from django.utils.translation import get_language
  from django.core.urlresolvers import reverse
@@@ -23,14 -22,17 +23,16 @@@ from django.conf import setting
  from newtagging.models import TagBase, tags_updated
  from newtagging import managers
  from catalogue.fields import JSONField, OverwritingFileField
 -from catalogue.utils import ExistingFile, ORMDocProvider, create_zip, remove_zip
 +from catalogue.utils import create_zip, split_tags
 +from catalogue.tasks import touch_tag
 +from shutil import copy
 +from glob import glob
 +import re
 +from os import path
  
 -from librarian import dcparser, html, epub, NoDublinCore
 -import mutagen
 -from mutagen import id3
 -from slughifi import slughifi
 -from sortify import sortify
 -from os import unlink
  
+ import search
  TAG_CATEGORIES = (
      ('author', _('author')),
      ('epoch', _('epoch')),
      ('book', _('book')),
  )
  
 -MEDIA_FORMATS = (
 -    ('odt', _('ODT file')),
 -    ('mp3', _('MP3 file')),
 -    ('ogg', _('OGG file')),
 -    ('daisy', _('DAISY file')), 
 -)
 -
  # not quite, but Django wants you to set a timeout
  CACHE_FOREVER = 2419200  # 28 days
  
@@@ -61,6 -70,7 +63,6 @@@ class Tag(TagBase)
      category = models.CharField(_('category'), max_length=50, blank=False, null=False,
          db_index=True, choices=TAG_CATEGORIES)
      description = models.TextField(_('description'), blank=True)
 -    main_page = models.BooleanField(_('main page'), default=False, db_index=True, help_text=_('Show tag on main page'))
  
      user = models.ForeignKey(User, blank=True, null=True)
      book_count = models.IntegerField(_('book count'), blank=True, null=True)
      has_description.boolean = True
  
      def get_count(self):
 -        """ returns global book count for book tags, fragment count for themes """
 -
 -        if self.book_count is None:
 -            if self.category == 'book':
 -                # never used
 -                objects = Book.objects.none()
 -            elif self.category == 'theme':
 -                objects = Fragment.tagged.with_all((self,))
 -            else:
 -                objects = Book.tagged.with_all((self,)).order_by()
 -                if self.category != 'set':
 -                    # eliminate descendants
 -                    l_tags = Tag.objects.filter(slug__in=[book.book_tag_slug() for book in objects])
 -                    descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags)]
 -                    if descendants_keys:
 -                        objects = objects.exclude(pk__in=descendants_keys)
 -            self.book_count = objects.count()
 -            self.save()
 -        return self.book_count
 +        """Returns global book count for book tags, fragment count for themes."""
 +
 +        if self.category == 'book':
 +            # never used
 +            objects = Book.objects.none()
 +        elif self.category == 'theme':
 +            objects = Fragment.tagged.with_all((self,))
 +        else:
 +            objects = Book.tagged.with_all((self,)).order_by()
 +            if self.category != 'set':
 +                # eliminate descendants
 +                l_tags = Tag.objects.filter(slug__in=[book.book_tag_slug() for book in objects])
 +                descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags)]
 +                if descendants_keys:
 +                    objects = objects.exclude(pk__in=descendants_keys)
 +        return objects.count()
  
      @staticmethod
      def get_tag_list(tags):
      def url_chunk(self):
          return '/'.join((Tag.categories_dict[self.category], self.slug))
  
 +    @staticmethod
 +    def tags_from_info(info):
 +        from slughifi import slughifi
 +        from sortify import sortify
 +        meta_tags = []
 +        categories = (('kinds', 'kind'), ('genres', 'genre'), ('authors', 'author'), ('epochs', 'epoch'))
 +        for field_name, category in categories:
 +            try:
 +                tag_names = getattr(info, field_name)
 +            except:
 +                try:
 +                    tag_names = [getattr(info, category)]
 +                except:
 +                    # For instance, Pictures do not have 'genre' field.
 +                    continue
 +            for tag_name in tag_names:
 +                tag_sort_key = tag_name
 +                if category == 'author':
 +                    tag_sort_key = tag_name.last_name
 +                    tag_name = tag_name.readable()
 +                tag, created = Tag.objects.get_or_create(slug=slughifi(tag_name), category=category)
 +                if created:
 +                    tag.name = tag_name
 +                    tag.sort_key = sortify(tag_sort_key.lower())
 +                    tag.save()
 +                meta_tags.append(tag)
 +        return meta_tags
 +
 +
 +
 +def get_dynamic_path(media, filename, ext=None, maxlen=100):
 +    from slughifi import slughifi
 +
 +    # how to put related book's slug here?
 +    if not ext:
 +        # BookMedia case
 +        ext = media.formats[media.type].ext
 +    if media is None or not media.name:
 +        name = slughifi(filename.split(".")[0])
 +    else:
 +        name = slughifi(media.name)
 +    return 'book/%s/%s.%s' % (ext, name[:maxlen-len('book/%s/.%s' % (ext, ext))-4], ext)
 +
  
  # TODO: why is this hard-coded ?
  def book_upload_path(ext=None, maxlen=100):
 -    def get_dynamic_path(media, filename, ext=ext):
 -        # how to put related book's slug here?
 -        if not ext:
 -            if media.type == 'daisy':
 -                ext = 'daisy.zip'
 -            else:
 -                ext = media.type
 -        if not media.name:
 -            name = slughifi(filename.split(".")[0])
 -        else:
 -            name = slughifi(media.name)
 -        return 'book/%s/%s.%s' % (ext, name[:maxlen-len('book/%s/.%s' % (ext, ext))-4], ext)
 -    return get_dynamic_path
 +    return lambda *args: get_dynamic_path(*args, ext=ext, maxlen=maxlen)
 +
 +
 +def get_customized_pdf_path(book, customizations):
 +    """
 +    Returns a MEDIA_ROOT relative path for a customized pdf. The name will contain a hash of customization options.
 +    """
 +    customizations.sort()
 +    h = hash(tuple(customizations))
 +
 +    pdf_name = '%s-custom-%s' % (book.fileid(), h)
 +    pdf_file = get_dynamic_path(None, pdf_name, ext='pdf')
 +
 +    return pdf_file
 +
 +
 +def get_existing_customized_pdf(book):
 +    """
 +    Returns a list of paths to generated customized pdf of a book
 +    """
 +    pdf_glob = '%s-custom-' % (book.fileid(),)
 +    pdf_glob = get_dynamic_path(None, pdf_glob, ext='pdf')
 +    pdf_glob = re.sub(r"[.]([a-z0-9]+)$", "*.\\1", pdf_glob)
 +    return glob(path.join(settings.MEDIA_ROOT, pdf_glob))
  
  
  class BookMedia(models.Model):
 -    type        = models.CharField(_('type'), choices=MEDIA_FORMATS, max_length="100")
 +    FileFormat = namedtuple("FileFormat", "name ext")
 +    formats = SortedDict([
 +        ('mp3', FileFormat(name='MP3', ext='mp3')),
 +        ('ogg', FileFormat(name='Ogg Vorbis', ext='ogg')),
 +        ('daisy', FileFormat(name='DAISY', ext='daisy.zip')),
 +    ])
 +    format_choices = [(k, _('%s file') % t.name)
 +            for k, t in formats.items()]
 +
 +    type        = models.CharField(_('type'), choices=format_choices, max_length="100")
      name        = models.CharField(_('name'), max_length="100")
      file        = OverwritingFileField(_('file'), upload_to=book_upload_path())
      uploaded_at = models.DateTimeField(_('creation date'), auto_now_add=True, editable=False)
          verbose_name_plural = _('book media')
  
      def save(self, *args, **kwargs):
 +        from slughifi import slughifi
 +        from catalogue.utils import ExistingFile, remove_zip
 +
          try:
              old = BookMedia.objects.get(pk=self.pk)
          except BookMedia.DoesNotExist, e:
          super(BookMedia, self).save(*args, **kwargs)
  
          # remove the zip package for book with modified media
 -        remove_zip(self.book.slug)
 +        remove_zip(self.book.fileid())
  
          extra_info = self.get_extra_info_value()
          extra_info.update(self.read_meta())
          """
              Reads some metadata from the audiobook.
          """
 +        import mutagen
 +        from mutagen import id3
  
          artist_name = director_name = project = funded_by = ''
          if self.type == 'mp3':
          """
              Reads source file SHA1 from audiobok metadata.
          """
 +        import mutagen
 +        from mutagen import id3
  
          if filetype == 'mp3':
              try:
  class Book(models.Model):
      title         = models.CharField(_('title'), max_length=120)
      sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
 -    slug          = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
 +    slug          = models.SlugField(_('slug'), max_length=120, db_index=True)
 +    language = models.CharField(_('language code'), max_length=3, db_index=True,
 +                    default=settings.CATALOGUE_DEFAULT_LANGUAGE)
      description   = models.TextField(_('description'), blank=True)
      created_at    = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
      changed_at    = models.DateTimeField(_('creation date'), auto_now=True, db_index=True)
      wiki_link     = models.CharField(blank=True, max_length=240)
      # files generated during publication
  
 -    file_types = ['epub', 'html', 'mobi', 'pdf', 'txt', 'xml']
 -    
 +    cover = models.FileField(_('cover'), upload_to=book_upload_path('png'),
 +                null=True, blank=True)
 +    ebook_formats = ['pdf', 'epub', 'mobi', 'txt']
 +    formats = ebook_formats + ['html', 'xml']
 +
      parent        = models.ForeignKey('self', blank=True, null=True, related_name='children')
      objects  = models.Manager()
      tagged   = managers.ModelTaggedItemManager(Tag)
      tags     = managers.TagDescriptor(Tag)
  
      html_built = django.dispatch.Signal()
 +    published = django.dispatch.Signal()
 +
 +    URLID_RE = r'[a-z0-9-]+(?:/[a-z]{3})?'
 +    FILEID_RE = r'[a-z0-9-]+(?:_[a-z]{3})?'
  
      class AlreadyExists(Exception):
          pass
  
      class Meta:
 +        unique_together = [['slug', 'language']]
          ordering = ('sort_key',)
          verbose_name = _('book')
          verbose_name_plural = _('books')
      def __unicode__(self):
          return self.title
  
 +    def urlid(self, sep='/'):
 +        stem = self.slug
 +        if self.language != settings.CATALOGUE_DEFAULT_LANGUAGE:
 +            stem += sep + self.language
 +        return stem
 +
 +    def fileid(self):
 +        return self.urlid('_')
 +
 +    @staticmethod
 +    def split_urlid(urlid, sep='/', default_lang=settings.CATALOGUE_DEFAULT_LANGUAGE):
 +        """Splits a URL book id into slug and language code.
 +        
 +        Returns a dictionary usable i.e. for object lookup, or None.
 +
 +        >>> Book.split_urlid("a-slug/pol", default_lang="eng")
 +        {'slug': 'a-slug', 'language': 'pol'}
 +        >>> Book.split_urlid("a-slug", default_lang="eng")
 +        {'slug': 'a-slug', 'language': 'eng'}
 +        >>> Book.split_urlid("a-slug_pol", "_", default_lang="eng")
 +        {'slug': 'a-slug', 'language': 'pol'}
 +        >>> Book.split_urlid("a-slug/eng", default_lang="eng")
 +
 +        """
 +        parts = urlid.rsplit(sep, 1)
 +        if len(parts) == 2:
 +            if parts[1] == default_lang:
 +                return None
 +            return {'slug': parts[0], 'language': parts[1]}
 +        else:
 +            return {'slug': urlid, 'language': default_lang}
 +
 +    @classmethod
 +    def split_fileid(cls, fileid):
 +        return cls.split_urlid(fileid, '_')
 +
      def save(self, force_insert=False, force_update=False, reset_short_html=True, **kwargs):
 +        from sortify import sortify
 +
          self.sort_key = sortify(self.title)
  
          ret = super(Book, self).save(force_insert, force_update)
  
      @permalink
      def get_absolute_url(self):
 -        return ('catalogue.views.book_detail', [self.slug])
 +        return ('catalogue.views.book_detail', [self.urlid()])
  
      @property
      def name(self):
          return self.title
  
      def book_tag_slug(self):
 -        return ('l-' + self.slug)[:120]
 +        stem = 'l-' + self.slug
 +        if self.language != settings.CATALOGUE_DEFAULT_LANGUAGE:
 +            return stem[:116] + ' ' + self.language
 +        else:
 +            return stem[:120]
  
      def book_tag(self):
          slug = self.book_tag_slug()
          return book_tag
  
      def has_media(self, type):
 -        if type in Book.file_types:
 +        if type in Book.formats:
              return bool(getattr(self, "%s_file" % type))
          else:
              return self.media.filter(type=type).exists()
  
      def get_media(self, type):
          if self.has_media(type):
 -            if type in Book.file_types:
 +            if type in Book.formats:
                  return getattr(self, "%s_file" % type)
              else:                                             
                  return self.media.filter(type=type)
          cache_key = "Book.short_html/%d/%s"
          for lang, langname in settings.LANGUAGES:
              cache.delete(cache_key % (self.id, lang))
 +        cache.delete(cache_key = "Book.mini_box/%d" % (self.id, ))
          # Fragment.short_html relies on book's tags, so reset it here too
          for fragm in self.fragments.all():
              fragm.reset_short_html()
          if short_html is not None:
              return mark_safe(short_html)
          else:
 -            tags = self.tags.filter(~Q(category__in=('set', 'theme', 'book')))
 -            tags = [mark_safe(u'<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name)) for tag in tags]
 +            tags = self.tags.filter(category__in=('author', 'kind', 'genre', 'epoch'))
 +            tags = split_tags(tags)
  
              formats = []
              # files generated during publication
 -            if self.has_media("html"):
 -                formats.append(u'<a href="%s">%s</a>' % (reverse('book_text', kwargs={'slug': self.slug}), _('Read online')))
 -            if self.has_media("pdf"):
 -                formats.append(u'<a href="%s">PDF</a>' % self.get_media('pdf').url)
 -            if self.has_media("mobi"):
 -                formats.append(u'<a href="%s">MOBI</a>' % self.get_media('mobi').url)
 -            if self.root_ancestor.has_media("epub"):
 -                formats.append(u'<a href="%s">EPUB</a>' % self.root_ancestor.get_media('epub').url)
 -            if self.has_media("txt"):
 -                formats.append(u'<a href="%s">TXT</a>' % self.get_media('txt').url)
 -            # other files
 -            for m in self.media.order_by('type'):
 -                formats.append(u'<a href="%s">%s</a>' % (m.file.url, m.type.upper()))
 +            for ebook_format in self.ebook_formats:
 +                if self.has_media(ebook_format):
 +                    formats.append(u'<a href="%s">%s</a>' % (
 +                        "", #self.get_media(ebook_format).url,
 +                        ebook_format.upper()
 +                    ))
  
              formats = [mark_safe(format) for format in formats]
  
                  cache.set(cache_key, short_html, CACHE_FOREVER)
              return mark_safe(short_html)
  
 -    @property
 -    def root_ancestor(self):
 -        """ returns the oldest ancestor """
 +    def mini_box(self):
 +        if self.id:
 +            cache_key = "Book.mini_box/%d" % (self.id, )
 +            short_html = cache.get(cache_key)
 +        else:
 +            short_html = None
  
 -        if not hasattr(self, '_root_ancestor'):
 -            book = self
 -            while book.parent:
 -                book = book.parent
 -            self._root_ancestor = book
 -        return self._root_ancestor
 +        if short_html is None:
 +            authors = self.tags.filter(category='author')
  
 +            short_html = unicode(render_to_string('catalogue/book_mini_box.html',
 +                {'book': self, 'authors': authors, 'STATIC_URL': settings.STATIC_URL}))
 +
 +            if self.id:
 +                cache.set(cache_key, short_html, CACHE_FOREVER)
 +        return mark_safe(short_html)
  
      def has_description(self):
          return len(self.description) > 0
      has_description.boolean = True
  
      # ugly ugly ugly
 -    def has_odt_file(self):
 -        return bool(self.has_media("odt"))
 -    has_odt_file.short_description = 'ODT'
 -    has_odt_file.boolean = True
 -
      def has_mp3_file(self):
          return bool(self.has_media("mp3"))
      has_mp3_file.short_description = 'MP3'
      has_daisy_file.short_description = 'DAISY'
      has_daisy_file.boolean = True
  
 -    def build_pdf(self):
 -        """ (Re)builds the pdf file.
 +    def wldocument(self, parse_dublincore=True):
 +        from catalogue.utils import ORMDocProvider
 +        from librarian.parser import WLDocument
  
 +        return WLDocument.from_file(self.xml_file.path,
 +                provider=ORMDocProvider(self),
 +                parse_dublincore=parse_dublincore)
 +
 +    def build_cover(self, book_info=None):
 +        """(Re)builds the cover image."""
 +        from StringIO import StringIO
 +        from django.core.files.base import ContentFile
 +        from librarian.cover import WLCover
 +
 +        if book_info is None:
 +            book_info = self.wldocument().book_info
 +
 +        cover = WLCover(book_info).image()
 +        imgstr = StringIO()
 +        cover.save(imgstr, 'png')
 +        self.cover.save(None, ContentFile(imgstr.getvalue()))
 +
 +    def build_pdf(self, customizations=None, file_name=None):
 +        """ (Re)builds the pdf file.
 +        customizations - customizations which are passed to LaTeX class file.
 +        file_name - save the pdf file under a different name and DO NOT save it in db.
          """
 -        from librarian import pdf
 -        from tempfile import NamedTemporaryFile
 -        import os
 +        from os import unlink
 +        from django.core.files import File
 +        from catalogue.utils import remove_zip
  
 -        try:
 -            pdf_file = NamedTemporaryFile(delete=False)
 -            pdf.transform(ORMDocProvider(self),
 -                      file_path=str(self.xml_file.path),
 -                      output_file=pdf_file,
 -                      )
 +        pdf = self.wldocument().as_pdf(customizations=customizations)
  
 -            self.pdf_file.save('%s.pdf' % self.slug, File(open(pdf_file.name)))
 -        finally:
 -            unlink(pdf_file.name)
 +        if file_name is None:
 +            # we'd like to be sure not to overwrite changes happening while
 +            # (timely) pdf generation is taking place (async celery scenario)
 +            current_self = Book.objects.get(id=self.id)
 +            current_self.pdf_file.save('%s.pdf' % self.fileid(),
 +                    File(open(pdf.get_filename())))
 +            self.pdf_file = current_self.pdf_file
 +
 +            # remove cached downloadables
 +            remove_zip(settings.ALL_PDF_ZIP)
  
 -        # remove zip with all pdf files
 -        remove_zip(settings.ALL_PDF_ZIP)
 +            for customized_pdf in get_existing_customized_pdf(self):
 +                unlink(customized_pdf)
 +        else:
 +            print "saving %s" % file_name
 +            print "to: %s" % DefaultStorage().path(file_name)
 +            DefaultStorage().save(file_name, File(open(pdf.get_filename())))
  
      def build_mobi(self):
          """ (Re)builds the MOBI file.
  
          """
 -        from librarian import mobi
 -        from tempfile import NamedTemporaryFile
 -        import os
 +        from django.core.files import File
 +        from catalogue.utils import remove_zip
  
 -        try:
 -            mobi_file = NamedTemporaryFile(suffix='.mobi', delete=False)
 -            mobi.transform(ORMDocProvider(self), verbose=1,
 -                      file_path=str(self.xml_file.path),
 -                      output_file=mobi_file.name,
 -                      )
 +        mobi = self.wldocument().as_mobi()
  
 -            self.mobi_file.save('%s.mobi' % self.slug, File(open(mobi_file.name)))
 -        finally:
 -            unlink(mobi_file.name)
 +        self.mobi_file.save('%s.mobi' % self.fileid(), File(open(mobi.get_filename())))
  
          # remove zip with all mobi files
          remove_zip(settings.ALL_MOBI_ZIP)
  
 -    def build_epub(self, remove_descendants=True):
 -        """ (Re)builds the epub file.
 -            If book has a parent, does nothing.
 -            Unless remove_descendants is False, descendants' epubs are removed.
 -        """
 -        from StringIO import StringIO
 -        from hashlib import sha1
 -        from django.core.files.base import ContentFile
 +    def build_epub(self):
 +        """(Re)builds the epub file."""
 +        from django.core.files import File
 +        from catalogue.utils import remove_zip
  
 -        if self.parent:
 -            # don't need an epub
 -            return
 +        epub = self.wldocument().as_epub()
  
 -        epub_file = StringIO()
 -        try:
 -            epub.transform(ORMDocProvider(self), self.slug, output_file=epub_file)
 -            self.epub_file.save('%s.epub' % self.slug, ContentFile(epub_file.getvalue()))
 -            FileRecord(slug=self.slug, type='epub', sha1=sha1(epub_file.getvalue()).hexdigest()).save()
 -        except NoDublinCore:
 -            pass
 -
 -        book_descendants = list(self.children.all())
 -        while len(book_descendants) > 0:
 -            child_book = book_descendants.pop(0)
 -            if remove_descendants and child_book.has_epub_file():
 -                child_book.epub_file.delete()
 -            # save anyway, to refresh short_html
 -            child_book.save()
 -            book_descendants += list(child_book.children.all())
 +        self.epub_file.save('%s.epub' % self.fileid(),
 +                File(open(epub.get_filename())))
  
          # remove zip package with all epub files
          remove_zip(settings.ALL_EPUB_ZIP)
  
      def build_txt(self):
 -        from StringIO import StringIO
          from django.core.files.base import ContentFile
 -        from librarian import text
  
 -        out = StringIO()
 -        text.transform(open(self.xml_file.path), out)
 -        self.txt_file.save('%s.txt' % self.slug, ContentFile(out.getvalue()))
 +        text = self.wldocument().as_text()
 +        self.txt_file.save('%s.txt' % self.fileid(), ContentFile(text.get_string()))
  
  
      def build_html(self):
 -        from tempfile import NamedTemporaryFile
          from markupstring import MarkupString
 +        from django.core.files.base import ContentFile
 +        from slughifi import slughifi
 +        from librarian import html
  
          meta_tags = list(self.tags.filter(
              category__in=('author', 'epoch', 'genre', 'kind')))
          book_tag = self.book_tag()
  
 -        html_file = NamedTemporaryFile()
 -        if html.transform(self.xml_file.path, html_file, parse_dublincore=False):
 -            self.html_file.save('%s.html' % self.slug, File(html_file))
 +        html_output = self.wldocument(parse_dublincore=False).as_html()
 +        if html_output:
 +            self.html_file.save('%s.html' % self.fileid(),
 +                    ContentFile(html_output.get_string()))
  
              # get ancestor l-tags for adding to new fragments
              ancestor_tags = []
          def pretty_file_name(book):
              return "%s/%s.%s" % (
                  b.get_extra_info_value()['author'],
 -                b.slug,
 +                b.fileid(),
                  format_)
  
          field_name = "%s_file" % format_
      def zip_audiobooks(self):
          bm = BookMedia.objects.filter(book=self, type='mp3')
          paths = map(lambda bm: (None, bm.file.path), bm)
 -        result = create_zip.delay(paths, self.slug)
 +        result = create_zip.delay(paths, self.fileid())
          return result.wait()
  
+     def search_index(self):
+         if settings.SEARCH_INDEX_PARALLEL:
+             if instance(settings.SEARCH_INDEX_PARALLEL, int):
+                 idx = search.ReusableIndex(threads=4)
+             else:
+                 idx = search.ReusableIndex()
+         else:
+             idx = search.Index()
+             
+         idx.open()
+         try:
+             idx.index_book(self)
+         finally:
+             idx.close()
      @classmethod
      def from_xml_file(cls, xml_file, **kwargs):
 +        from django.core.files import File
 +        from librarian import dcparser
 +
          # use librarian to parse meta-data
          book_info = dcparser.parse(xml_file)
  
  
      @classmethod
      def from_text_and_meta(cls, raw_file, book_info, overwrite=False,
-             build_epub=True, build_txt=True, build_pdf=True, build_mobi=True):
+             build_epub=True, build_txt=True, build_pdf=True, build_mobi=True,
+             search_index=True):
          import re
 +        from sortify import sortify
  
          # check for parts before we do anything
          children = []
          if hasattr(book_info, 'parts'):
              for part_url in book_info.parts:
 -                base, slug = part_url.rsplit('/', 1)
                  try:
 -                    children.append(Book.objects.get(slug=slug))
 +                    children.append(Book.objects.get(
 +                        slug=part_url.slug, language=part_url.language))
                  except Book.DoesNotExist, e:
 -                    raise Book.DoesNotExist(_('Book with slug = "%s" does not exist.') % slug)
 +                    raise Book.DoesNotExist(_('Book "%s/%s" does not exist.') %
 +                            (part_url.slug, part_url.language))
  
  
          # Read book metadata
 -        book_base, book_slug = book_info.url.rsplit('/', 1)
 +        book_slug = book_info.url.slug
 +        language = book_info.language
          if re.search(r'[^a-zA-Z0-9-]', book_slug):
              raise ValueError('Invalid characters in slug')
 -        book, created = Book.objects.get_or_create(slug=book_slug)
 +        book, created = Book.objects.get_or_create(slug=book_slug, language=language)
  
          if created:
              book_shelves = []
          else:
              if not overwrite:
 -                raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
 +                raise Book.AlreadyExists(_('Book %s/%s already exists') % (
 +                        book_slug, language))
              # Save shelves for this book
              book_shelves = list(book.tags.filter(category='set'))
  
          book.set_extra_info_value(book_info.to_dict())
          book.save()
  
 -        meta_tags = []
 -        categories = (('kinds', 'kind'), ('genres', 'genre'), ('authors', 'author'), ('epochs', 'epoch'))
 -        for field_name, category in categories:
 -            try:
 -                tag_names = getattr(book_info, field_name)
 -            except:
 -                tag_names = [getattr(book_info, category)]
 -            for tag_name in tag_names:
 -                tag_sort_key = tag_name
 -                if category == 'author':
 -                    tag_sort_key = tag_name.last_name
 -                    tag_name = ' '.join(tag_name.first_names) + ' ' + tag_name.last_name
 -                tag, created = Tag.objects.get_or_create(slug=slughifi(tag_name), category=category)
 -                if created:
 -                    tag.name = tag_name
 -                    tag.sort_key = sortify(tag_sort_key.lower())
 -                    tag.save()
 -                meta_tags.append(tag)
 +        meta_tags = Tag.tags_from_info(book_info)
  
          book.tags = set(meta_tags + book_shelves)
  
              if not settings.NO_BUILD_TXT and build_txt:
                  book.build_txt()
  
 +        book.build_cover(book_info)
 +
          if not settings.NO_BUILD_EPUB and build_epub:
 -            book.root_ancestor.build_epub()
 +            book.build_epub()
  
          if not settings.NO_BUILD_PDF and build_pdf:
 -            book.root_ancestor.build_pdf()
 +            book.build_pdf()
  
          if not settings.NO_BUILD_MOBI and build_mobi:
              book.build_mobi()
  
+         if not settings.NO_SEARCH_INDEX and search_index:
+             book.search_index()
          book_descendants = list(book.children.all())
 +        descendants_tags = set()
          # add l-tag to descendants and their fragments
 -        # delete unnecessary EPUB files
          while len(book_descendants) > 0:
              child_book = book_descendants.pop(0)
 +            descendants_tags.update(child_book.tags)
              child_book.tags = list(child_book.tags) + [book_tag]
              child_book.save()
              for fragment in child_book.fragments.all():
                  fragment.tags = set(list(fragment.tags) + [book_tag])
              book_descendants += list(child_book.children.all())
  
 +        for tag in descendants_tags:
 +            touch_tag.delay(tag)
 +
          book.save()
  
          # refresh cache
          book.reset_tag_counter()
          book.reset_theme_counter()
  
 +        cls.published.send(sender=book)
          return book
  
      def reset_tag_counter(self):
  
          return objects
  
 +    @classmethod
 +    def book_list(cls, filter=None):
 +        """Generates a hierarchical listing of all books.
 +
 +        Books are optionally filtered with a test function.
 +
 +        """
 +
 +        books_by_parent = {}
 +        books = cls.objects.all().order_by('parent_number', 'sort_key').only(
 +                'title', 'parent', 'slug', 'language')
 +        if filter:
 +            books = books.filter(filter).distinct()
 +            book_ids = set((book.pk for book in books))
 +            for book in books:
 +                parent = book.parent_id
 +                if parent not in book_ids:
 +                    parent = None
 +                books_by_parent.setdefault(parent, []).append(book)
 +        else:
 +            for book in books:
 +                books_by_parent.setdefault(book.parent_id, []).append(book)
 +
 +        orphans = []
 +        books_by_author = SortedDict()
 +        for tag in Tag.objects.filter(category='author'):
 +            books_by_author[tag] = []
 +
 +        for book in books_by_parent.get(None,()):
 +            authors = list(book.tags.filter(category='author'))
 +            if authors:
 +                for author in authors:
 +                    books_by_author[author].append(book)
 +            else:
 +                orphans.append(book)
 +
 +        return books_by_author, orphans, books_by_parent
 +
 +    _audiences_pl = {
 +        "SP1": (1, u"szkoła podstawowa"),
 +        "SP2": (1, u"szkoła podstawowa"),
 +        "P": (1, u"szkoła podstawowa"),
 +        "G": (2, u"gimnazjum"),
 +        "L": (3, u"liceum"),
 +        "LP": (3, u"liceum"),
 +    }
 +    def audiences_pl(self):
 +        audiences = self.get_extra_info_value().get('audiences', [])
 +        audiences = sorted(set([self._audiences_pl[a] for a in audiences]))
 +        return [a[1] for a in audiences]
 +
  
  def _has_factory(ftype):
      has = lambda self: bool(getattr(self, "%s_file" % ftype))
  
      
  # add the file fields
 -for t in Book.file_types:
 +for t in Book.formats:
      field_name = "%s_file" % t
      models.FileField(_("%s file" % t.upper()),
              upload_to=book_upload_path(t),
@@@ -1016,7 -883,7 +1037,7 @@@ class Fragment(models.Model)
          verbose_name_plural = _('fragments')
  
      def get_absolute_url(self):
 -        return '%s#m%s' % (reverse('book_text', kwargs={'slug': self.book.slug}), self.anchor)
 +        return '%s#m%s' % (self.book.get_html_url(), self.anchor)
  
      def reset_short_html(self):
          if self.id is None:
              return mark_safe(short_html)
  
  
 -class FileRecord(models.Model):
 -    slug = models.SlugField(_('slug'), max_length=120, db_index=True)
 -    type = models.CharField(_('type'), max_length=20, db_index=True)
 -    sha1 = models.CharField(_('sha-1 hash'), max_length=40)
 -    time = models.DateTimeField(_('time'), auto_now_add=True)
 -
 -    class Meta:
 -        ordering = ('-time','-slug', '-type')
 -        verbose_name = _('file record')
 -        verbose_name_plural = _('file records')
 -
 -    def __unicode__(self):
 -        return "%s %s.%s" % (self.sha1,  self.slug, self.type)
 -
  ###########
  #
  # SIGNALS
  def _tags_updated_handler(sender, affected_tags, **kwargs):
      # reset tag global counter
      # we want Tag.changed_at updated for API to know the tag was touched
 -    Tag.objects.filter(pk__in=[tag.pk for tag in affected_tags]).update(book_count=None, changed_at=datetime.now())
 +    for tag in affected_tags:
 +        touch_tag.delay(tag)
  
      # if book tags changed, reset book tag counter
      if isinstance(sender, Book) and \
diff --combined apps/catalogue/urls.py
@@@ -4,46 -4,41 +4,46 @@@
  #
  from django.conf.urls.defaults import *
  from catalogue.feeds import AudiobookFeed
 -
 +from catalogue.models import Book
 +from picture.models import Picture
  
  urlpatterns = patterns('catalogue.views',
 -    url(r'^$', 'main_page', name='main_page'),
 +    url(r'^$', 'catalogue', name='catalogue'),
      url(r'^polki/(?P<shelf>[a-zA-Z0-9-]+)/formaty/$', 'shelf_book_formats', name='shelf_book_formats'),
 -    url(r'^polki/(?P<shelf>[a-zA-Z0-9-]+)/(?P<book>[a-zA-Z0-9-0-]+)/usun$', 'remove_from_shelf', name='remove_from_shelf'),
 +    url(r'^polki/(?P<shelf>[a-zA-Z0-9-]+)/(?P<book>%s)/usun$' % Book.URLID_RE, 'remove_from_shelf', name='remove_from_shelf'),
      url(r'^polki/$', 'user_shelves', name='user_shelves'),
      url(r'^polki/(?P<slug>[a-zA-Z0-9-]+)/usun/$', 'delete_shelf', name='delete_shelf'),
      url(r'^polki/(?P<slug>[a-zA-Z0-9-]+)\.zip$', 'download_shelf', name='download_shelf'),
      url(r'^lektury/', 'book_list', name='book_list'),
      url(r'^audiobooki/$', 'audiobook_list', name='audiobook_list'),
      url(r'^daisy/$', 'daisy_list', name='daisy_list'),
 -    url(r'^lektura/(?P<slug>[a-zA-Z0-9-]+)/polki/', 'book_sets', name='book_shelves'),
 +    url(r'^lektura/(?P<book>%s)/polki/' % Book.URLID_RE, 'book_sets', name='book_shelves'),
      url(r'^polki/nowa/$', 'new_set', name='new_set'),
      url(r'^tags/$', 'tags_starting_with', name='hint'),
      url(r'^jtags/$', 'json_tags_starting_with', name='jhint'),
-     url(r'^szukaj/$', 'search', name='search'),
+     url(r'^szukaj/$', 'search', name='old_search'),
  
      # zip
      #url(r'^zip/pdf\.zip$', 'download_zip', {'format': 'pdf', 'slug': None}, 'download_zip_pdf'),
      #url(r'^zip/epub\.zip$', 'download_zip', {'format': 'epub', 'slug': None}, 'download_zip_epub'),
      #url(r'^zip/mobi\.zip$', 'download_zip', {'format': 'mobi', 'slug': None}, 'download_zip_mobi'),
 -    #url(r'^zip/audiobook/(?P<slug>[a-zA-Z0-9-]+)\.zip', 'download_zip', {'format': 'audiobook'}, 'download_zip_audiobook'),
 -
 -    # tools
 -    url(r'^zegar/$', 'clock', name='clock'),
 +    #url(r'^zip/audiobook/(?P<book>%s)\.zip' % Book.FILEID_RE, 'download_zip', {'format': 'audiobook'}, 'download_zip_audiobook'),
  
      # Public interface. Do not change this URLs.
 -    url(r'^lektura/(?P<slug>[a-zA-Z0-9-]+)\.html$', 'book_text', name='book_text'),
 -    url(r'^lektura/(?P<slug>[a-zA-Z0-9-]+)/$', 'book_detail', name='book_detail'),
 -    url(r'^lektura/(?P<book_slug>[a-zA-Z0-9-]+)/motyw/(?P<theme_slug>[a-zA-Z0-9-]+)/$',
 +    url(r'^lektura/(?P<book>%s)\.html$' % Book.FILEID_RE, 'book_text', name='book_text'),
 +    url(r'^lektura/(?P<book>%s)/$' % Book.URLID_RE, 'book_detail', name='book_detail'),
 +    url(r'^lektura/(?P<book>%s)/motyw/(?P<theme_slug>[a-zA-Z0-9-]+)/$' % Book.URLID_RE,
          'book_fragments', name='book_fragments'),
  
      url(r'^(?P<tags>[a-zA-Z0-9-/]*)/$', 'tagged_object_list', name='tagged_object_list'),
  
      url(r'^audiobooki/(?P<type>mp3|ogg|daisy|all).xml$', AudiobookFeed(), name='audiobook_feed'),
 -)
 +
 +    url(r'^custompdf/(?P<book_fileid>%s).pdf' % Book.FILEID_RE, 'download_custom_pdf'),
 +
 +) + patterns('picture.views',
 +        # pictures - currently pictures are coupled with catalogue, hence the url is here
 +        url(r'^obraz/?$', 'picture_list'),
 +        url(r'^obraz/(?P<picture>%s)/?$' % Picture.URLID_RE, 'picture_detail')
 +    )
  
diff --combined apps/opds/views.py
@@@ -5,6 -5,7 +5,7 @@@
  from base64 import b64encode
  import os.path
  from urlparse import urljoin
+ from urllib2 import unquote
  
  from django.contrib.syndication.views import Feed
  from django.core.urlresolvers import reverse
@@@ -16,7 -17,11 +17,11 @@@ from django.contrib.sites.models impor
  
  from basicauth import logged_in_or_basicauth, factory_decorator
  from catalogue.models import Book, Tag
- from catalogue.views import books_starting_with
+ from search import MultiSearch, SearchResult, JVM
+ from lucene import Term, QueryWrapperFilter, TermQuery
+ import re
  
  from stats.utils import piwik_track
  
@@@ -229,7 -234,7 +234,7 @@@ class ByCategoryFeed(Feed)
          return feed['title']
  
      def items(self, feed):
 -        return (tag for tag in Tag.objects.filter(category=feed['category']) if tag.get_count() > 0)
 +        return Tag.objects.filter(category=feed['category']).exclude(book_count=0)
  
      def item_title(self, item):
          return item.name
@@@ -280,7 -285,7 +285,7 @@@ class UserFeed(Feed)
          return u"Półki użytkownika %s" % user.username
  
      def items(self, user):
 -        return (tag for tag in Tag.objects.filter(category='set', user=user) if tag.get_count() > 0)
 +        return Tag.objects.filter(category='set', user=user).exclude(book_count=0)
  
      def item_title(self, item):
          return item.name
@@@ -316,20 -321,124 +321,124 @@@ class UserSetFeed(AcquisitionFeed)
  # no class decorators in python 2.5
  #UserSetFeed = factory_decorator(logged_in_or_basicauth())(UserSetFeed)
  
  @piwik_track
  class SearchFeed(AcquisitionFeed):
      description = u"Wyniki wyszukiwania na stronie WolneLektury.pl"
      title = u"Wyniki wyszukiwania"
+     INLINE_QUERY_RE = re.compile(r"(author:(?P<author>[^ ]+)|title:(?P<title>[^ ]+)|categories:(?P<categories>[^ ]+)|description:(?P<description>[^ ]+))")
      
      def get_object(self, request):
-         return request.GET.get('q', '')
+         """
+         For OPDS 1.1 We should handle a query for search terms
+         and criteria provided either as opensearch or 'inline' query.
+         OpenSearch defines fields: atom:author, atom:contributor (treated as translator),
+         atom:title. Inline query provides author, title, categories (treated as book tags),
+         description (treated as content search terms).
+         
+         if search terms are provided, we shall search for books
+         according to Hint information (from author & contributror & title).
+         but if search terms are empty, we should do a different search
+         (perhaps for is_book=True)
+         """
+         JVM.attachCurrentThread()
+         query = request.GET.get('q', '')
+         inline_criteria = re.findall(self.INLINE_QUERY_RE, query)
+         if inline_criteria:
+             def get_criteria(criteria, name, position):
+                 e = filter(lambda el: el[0][0:len(name)] == name, criteria)
+                 print e
+                 if not e:
+                     return None
+                 c = e[0][position]
+                 print c
+                 if c[0] == '"' and c[-1] == '"':
+                     c = c[1:-1]
+                     c = c.replace('+', ' ')
+                 return c
+             #import pdb; pdb.set_trace()
+             author = get_criteria(inline_criteria, 'author', 1)
+             title = get_criteria(inline_criteria, 'title', 2)
+             translator = None
+             categories = get_criteria(inline_criteria, 'categories', 3)
+             query = get_criteria(inline_criteria, 'description', 4)
+         else:
+             author = request.GET.get('author', '')
+             title = request.GET.get('title', '')
+             translator = request.GET.get('translator', '')
+             categories = None
+             fuzzy = False
+         srch = MultiSearch()
+         hint = srch.hint()
+         # Scenario 1: full search terms provided.
+         # Use auxiliarry information to narrow it and make it better.
+         if query:
+             filters = []
+             if author:
+                 print "narrow to author %s" % author
+                 hint.tags(srch.search_tags(author, filter=srch.term_filter(Term('tag_category', 'author'))))
+             if translator:
+                 print "filter by translator %s" % translator
+                 filters.append(QueryWrapperFilter(
+                     srch.make_phrase(srch.get_tokens(translator, field='translators'),
+                                      field='translators')))
+             if categories:
+                 filters.append(QueryWrapperFilter(
+                     srch.make_phrase(srch.get_tokens(categories, field="tag_name_pl"),
+                                      field='tag_name_pl')))
+             flt = srch.chain_filters(filters)
+             if title:
+                 print "hint by book title %s" % title
+                 q = srch.make_phrase(srch.get_tokens(title, field='title'), field='title')
+                 hint.books(*srch.search_books(q, filter=flt))
+             toks = srch.get_tokens(query)
+             print "tokens: %s" % toks
+             #            import pdb; pdb.set_trace()
+             results = SearchResult.aggregate(srch.search_perfect_book(toks, fuzzy=fuzzy, hint=hint),
+                 srch.search_perfect_parts(toks, fuzzy=fuzzy, hint=hint),
+                 srch.search_everywhere(toks, fuzzy=fuzzy, hint=hint))
+             results.sort(reverse=True)
+             return [r.book for r in results]
+         else:
+             # Scenario 2: since we no longer have to figure out what the query term means to the user,
+             # we can just use filters and not the Hint class.
+             filters = []
+             fields = {
+                 'author': author,
+                 'translators': translator,
+                 'title': title
+                 }
+             for fld, q in fields.items():
+                 if q:
+                     filters.append(QueryWrapperFilter(
+                         srch.make_phrase(srch.get_tokens(q, field=fld), field=fld)))
+             flt = srch.chain_filters(filters)
+             books = srch.search_books(TermQuery(Term('is_book', 'true')), filter=flt)
+             return books
  
      def get_link(self, query):
-         return "%s?q=%s" % (reverse('search'), query) 
+         return "%s?q=%s" % (reverse('search'), query)
  
-     def items(self, query):
+     def items(self, books):
          try:
-             return books_starting_with(query)
+             return books
          except ValueError:
              # too short a query
              return []
diff --combined lib/librarian
@@@ -1,1 -1,1 +1,1 @@@
- Subproject commit 0718c9d23a5791aa51bc11bba6a011fe9a8a616d
 -Subproject commit d7ba2c607dacf7a6136b83a1588b5adf2278ad46
++Subproject commit 5b407667ca47cf4d9752821fd49e5611737146d2
diff --combined requirements.txt
@@@ -17,7 -17,7 +17,7 @@@ Feedparser>=4.
  # PIL 
  PIL>=1.1.6
  mutagen>=1.17
 -sorl-thumbnail>=3.2,<10
 +sorl-thumbnail>=11.09<12
  
  # home-brewed & dependencies
  lxml>=2.2.2
@@@ -28,3 -28,6 +28,6 @@@
  # celery tasks
  django-celery
  django-kombu
+ # spell checking
+ pyenchant
diff --combined wolnelektury/settings.py
@@@ -60,6 -60,7 +60,7 @@@ USE_I18N = Tru
  # Example: "/home/media/media.lawrence.com/"
  MEDIA_ROOT = path.join(PROJECT_DIR, '../media')
  STATIC_ROOT = path.join(PROJECT_DIR, 'static')
+ SEARCH_INDEX = path.join(MEDIA_ROOT, 'search')
  
  # URL that handles the media served from MEDIA_ROOT. Make sure to use a
  # trailing slash if there is a path component (optional in other cases).
@@@ -110,7 -111,7 +111,7 @@@ TEMPLATE_DIRS = 
      path.join(PROJECT_DIR, 'templates'),
  ]
  
 -LOGIN_URL = '/uzytkownicy/login/'
 +LOGIN_URL = '/uzytkownicy/zaloguj/'
  
  LOGIN_REDIRECT_URL = '/'
  
@@@ -137,7 -138,6 +138,7 @@@ INSTALLED_APPS = 
      'modeltranslation',
  
      # our
 +    'ajaxable',
      'api',
      'catalogue',
      'chunks',
      'newtagging',
      'opds',
      'pdcounter',
 +    'reporting',
      'sponsors',
      'stats',
      'suggest',
 -
 -    #
 -    'django_nose',
 +    'picture',
+     'search',
  ]
  
  #CACHE_BACKEND = 'locmem:///?max_entries=3000'
 -CACHE_BACKEND = 'memcached://127.0.0.1:11211/'
 +#CACHE_BACKEND = 'memcached://127.0.0.1:11211/'
  CACHE_MIDDLEWARE_ANONYMOUS_ONLY=True
  
  # CSS and JavaScript file groups
  COMPRESS_CSS = {
      'all': {
 -        'source_filenames': ('css/master.css', 'css/jquery.countdown.css',
 -                             'css/master.plain.css', 'css/sponsors.css', 'css/facelist_2-0.css', 'css/ui-lightness/jquery-ui-1.8.16.custom.css'),
 +        #'source_filenames': ('css/master.css', 'css/jquery.autocomplete.css', 'css/master.plain.css', 'css/facelist_2-0.css',),
 +        'source_filenames': [
 +            'css/jquery.countdown.css', 
 +
 +            'css/base.css',
 +            'css/header.css',
 +            'css/main_page.css',
 +            'css/dialogs.css',
 +            'css/book_box.css',
 +            'css/catalogue.css',
 +            'css/sponsors.css',
 +        ],
          'output_filename': 'css/all.min?.css',
      },
      'book': {
  }
  
  COMPRESS_JS = {
 -    'jquery': {
 -        'source_filenames': ('js/jquery.js', 'js/jquery-ui-1.8.16.custom.min.js'),
 -        'output_filename': 'js/jquery.min.js',
 -    },
 -    'all': {
 -        'source_filenames': ('js/jquery.form.js',
 +    'base': {
 +        'source_filenames': (
 +            'js/jquery.cycle.min.js',
 +            'js/jquery.jqmodal.js',
 +            'js/jquery.form.js',
              'js/jquery.countdown.js', 'js/jquery.countdown-pl.js',
              'js/jquery.countdown-de.js', 'js/jquery.countdown-uk.js',
              'js/jquery.countdown-es.js', 'js/jquery.countdown-lt.js',
              'js/jquery.countdown-ru.js', 'js/jquery.countdown-fr.js',
 -            'js/jquery.cycle.min.js',
 -            'js/jquery.jqmodal.js', 'js/jquery.labelify.js', 'js/catalogue.js',
 +
 +            'js/locale.js',
 +            'js/dialogs.js',
 +            'js/sponsors.js',
 +            'js/pdcounter.js',
 +
 +            #~ 'js/jquery.autocomplete.js',
 +            #~ 'js/jquery.labelify.js', 'js/catalogue.js',
              ),
 -        'output_filename': 'js/all?.min.js',
 -    },
 -    'book': {
 -        'source_filenames': ('js/jquery.eventdelegation.js', 'js/jquery.scrollto.js', 'js/jquery.highlightfade.js', 'js/book.js',),
 -        'output_filename': 'js/book?.min.js',
 +        'output_filename': 'js/base?.min.js',
      },
 -    'book_ie': {
 -        'source_filenames': ('js/ierange-m2.js',),
 -        'output_filename': 'js/book_ie?.min.js',
 -    }
 +    #~ 'book': {
 +        #~ 'source_filenames': ('js/jquery.eventdelegation.js', 'js/jquery.scrollto.js', 'js/jquery.highlightfade.js', 'js/book.js',),
 +        #~ 'source_filenames': [],
 +        #~ 'output_filename': 'js/book?.min.js',
 +    #~ },
 +    #~ 'book_ie': {
 +        #~ 'source_filenames': ('js/ierange-m2.js',),
 +        #~ 'source_filenames': [],
 +        #~ 'output_filename': 'js/book_ie?.min.js',
 +    #~ }
  
  }
  
@@@ -248,14 -233,14 +250,16 @@@ MAX_TAG_LIST = 
  NO_BUILD_EPUB = False
  NO_BUILD_TXT = False
  NO_BUILD_PDF = False
- NO_BUILD_MOBI = False
+ NO_BUILD_MOBI = True
+ NO_SEARCH_INDEX = False
+ SEARCH_INDEX_PARALLEL = False
  
  ALL_EPUB_ZIP = 'wolnelektury_pl_epub'
  ALL_PDF_ZIP = 'wolnelektury_pl_pdf'
  ALL_MOBI_ZIP = 'wolnelektury_pl_mobi'
  
 +CATALOGUE_DEFAULT_LANGUAGE = 'pol'
 +
  PAGINATION_INVALID_PAGE_RAISES_404 = True
  
  import djcelery
@@@ -269,6 -254,7 +273,7 @@@ BROKER_PASSWORD = "guest
  BROKER_VHOST = "/"
  
  
  # Load localsettings, if they exist
  try:
      from localsettings import *
@@@ -90,9 -90,20 +90,20 @@@ function changeBannerText() 
      }
  }
  
- function autocomplete_result_handler(event, item) {
-     $(event.target).closest('form').submit();
+ function autocomplete_format_item(ul, item) {
+     return $("<li></li>").data('item.autocomplete', item)
+     .append('<a href="'+item.url+'">'+item.label+ ' ('+item.category+')</a>')
+     .appendTo(ul);
  }
+ function autocomplete_result_handler(event, ui) {
+     if (ui.item.url != undefined) {
+       location.href = ui.item.url;
+     } else {
+       $(event.target).closest('form').submit();
+     }
+ }
  function serverTime() {
      var time = null;
      $.ajax({url: '/katalog/zegar/',
                  }
              }); 
          }*/       
 -
 +      $("#custom-pdf-link").toggle(
 +          function(ev) { $(".custom-pdf").show(); return false; },
 +          function(ev) { $(".custom-pdf").hide(); return false; }
 +      );
      });
  })(jQuery)
  
@@@ -2,13 -2,42 +2,40 @@@
  {% load i18n %}
  {% load catalogue_tags pagination_tags %}
  
 -{% block title %}{% trans "Searching in" %} WolneLektury.pl{% endblock %}
 +{% block titleextra %}{% trans "Search" %}{% endblock %}
  
  {% block bodyid %}tagged-object-list{% endblock %}
  
  {% block body %}
 -    <form action="{% url search %}" method="get" accept-charset="utf-8" id="search-form">
 -        <p>{{ form.q }} <input type="submit" value="{% trans "Search" %}" /> <strong>{% trans "or" %}</strong> <a href="{% url main_page %}">{% trans "return to main page" %}</a></p>
 -    </form>
 +    <h1>{% trans "Search" %}</h1>
  
+     <div id="results">
+       <ol>
+       {% for result in results %}
+       <li>
+       <p><a href="{{result.book.get_absolute_url}}">{{result.book.pretty_title}}</a> (id: {{result.book_id}}, score: {{result.score}})</p>
+       <ul>
+         {% for hit in result.process_hits %}
+         <li>
+           {% if hit.fragment %}
+           <div style="">Tagi/Motywy: {% for tag in hit.themes %}{{tag.name}} {% endfor %}</div>
+           {% endif %}
+           {% for snip in hit.snippets %}
+             {{snip|safe}}<br/>
+           {% endfor %}
+         </li>
+         {% endfor %}
+       </ul>
+       </li>
+       {% empty %}
+       <p>No results.</p>
+       {% endfor %}
+       </ol>
+     </div>
+ {% comment %}
      <div id="books-list">
          <p>{% trans "More than one result matching the criteria found." %}</p>
          <ul class='matches'>
          {% endfor %}
          </ul>
      </div>
+ {% endcomment %}
  
- {% endblock %}
 -    <div id="set-window">
 -        <div class="header"><a href="#" class="jqmClose">{% trans "Close" %}</a></div>
 -        <div class="target">
 -            <p><img src="{{ STATIC_URL }}img/indicator.gif" alt="*"/> {% trans "Loading" %}</p>
 -        </div>
 -    </div>
+ {% endblock %}
diff --combined wolnelektury/urls.py
@@@ -4,62 -4,58 +4,64 @@@ import o
  from django.conf.urls.defaults import *
  from django.conf import settings
  from django.contrib import admin
 +import views
  
 -from catalogue.forms import SearchForm
  
 -from infopages.models import InfoPage
 +admin.autodiscover()
  
 +urlpatterns = patterns('wolnelektury.views',
 +    url(r'^$', 'main_page', name='main_page'),
  
 -admin.autodiscover()
 +    url(r'^zegar/$', 'clock', name='clock'),
 +
 +    # Authentication
 +    url(r'^uzytkownicy/zaloguj/$', views.LoginFormView(), name='login'),
 +    url(r'^uzytkownicy/utworz/$', views.RegisterFormView(), name='register'),
 +    url(r'^uzytkownicy/wyloguj/$', 'logout_then_redirect', name='logout'),
 +)
  
 -urlpatterns = patterns('',
 +urlpatterns += patterns('',
      url(r'^katalog/', include('catalogue.urls')),
      url(r'^materialy/', include('lessons.urls')),
      url(r'^opds/', include('opds.urls')),
      url(r'^sugestia/', include('suggest.urls')),
      url(r'^lesmianator/', include('lesmianator.urls')),
      url(r'^przypisy/', include('dictionary.urls')),
 -    url(r'^statystyka/', include('stats.urls')),
 -
 -    # Static pages
 -    url(r'^mozesz-nam-pomoc/$', 'infopages.views.infopage', {'slug': 'help_us'}, name='help_us'),
 -    url(r'^o-projekcie/$', 'infopages.views.infopage', {'slug': 'about_us'}, name='about_us'),
 -    url(r'^widget/$', 'infopages.views.infopage', {'slug': 'widget'}, name='widget'),
 +    url(r'^raporty/', include('reporting.urls')),
 +    url(r'^info/', include('infopages.urls')),
  
      # Admin panel
      url(r'^admin/catalogue/book/import$', 'catalogue.views.import_book', name='import_book'),
      url(r'^admin/doc/', include('django.contrib.admindocs.urls')),
      url(r'^admin/', include(admin.site.urls)),
  
 -    # Authentication
 -    url(r'^uzytkownicy/zaloguj/$', 'catalogue.views.login', name='login'),
 -    url(r'^uzytkownicy/wyloguj/$', 'catalogue.views.logout_then_redirect', name='logout'),
 -    url(r'^uzytkownicy/utworz/$', 'catalogue.views.register', name='register'),
 -    url(r'^uzytkownicy/login/$', 'django.contrib.auth.views.login', name='simple_login'),
 -
      # API
      (r'^api/', include('api.urls')),
  
+     url(r'^newsearch/', include('search.urls')),
      # Static files
      url(r'^%s(?P<path>.*)$' % settings.MEDIA_URL[1:], 'django.views.static.serve',
          {'document_root': settings.MEDIA_ROOT, 'show_indexes': True}),
      url(r'^%s(?P<path>.*)$' % settings.STATIC_URL[1:], 'django.views.static.serve',
          {'document_root': settings.STATIC_ROOT, 'show_indexes': True}),
 -    url(r'^$', 'django.views.generic.simple.redirect_to', {'url': 'katalog/'}),
      url(r'^i18n/', include('django.conf.urls.i18n')),
  )
  
  urlpatterns += patterns('django.views.generic.simple',
      # old static pages - redirected
 -    (r'^1procent/$', 'redirect_to', {'url': 'http://nowoczesnapolska.org.pl/wesprzyj_nas/'}),
 -    (r'^wolontariat/$', 'redirect_to', {'url': '/mozesz-nam-pomoc/'}),
 -    (r'^epub/$', 'redirect_to', {'url': '/katalog/lektury/'}),
 +    url(r'^1procent/$', 'redirect_to',
 +        {'url': 'http://nowoczesnapolska.org.pl/wesprzyj_nas/'}),
 +    url(r'^epub/$', 'redirect_to',
 +        {'url': '/katalog/lektury/'}),
 +    url(r'^mozesz-nam-pomoc/$', 'redirect_to',
 +        {'url': '/info/mozesz-nam-pomoc'}),
 +    url(r'^o-projekcie/$', 'redirect_to',
 +        {'url': '/info/o-projekcie'}),
 +    url(r'^widget/$', 'redirect_to',
 +        {'url': '/info/widget'}),
 +    url(r'^wolontariat/$', 'redirect_to',
 +        {'url': '/info/mozesz-nam-pomoc/'}),
  )