1 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
 
   2 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
 
   4 from functools import total_ordering
 
   5 from itertools import chain
 
  10 from django.conf import settings
 
  11 from librarian import dcparser
 
  12 from librarian.parser import WLDocument
 
  13 from lxml import etree
 
  15 import catalogue.models
 
  17 from pdcounter.models import Author as PDCounterAuthor, BookStub as PDCounterBook
 
  18 from wolnelektury.utils import makedirs
 
  21 log = logging.getLogger('search')
 
  24 if os.path.isfile(settings.SOLR_STOPWORDS):
 
  27         for line in open(settings.SOLR_STOPWORDS) if not line.startswith('#'))
 
  32 class SolrIndex(object):
 
  33     def __init__(self, mode=None):
 
  34         self.index = custom.CustomSolrInterface(settings.SOLR, mode=mode)
 
  37 class Snippets(object):
 
  39     This class manages snippet files for indexed object (book)
 
  40     the snippets are concatenated together, and their positions and
 
  41     lengths are kept in lucene index fields.
 
  43     SNIPPET_DIR = "snippets"
 
  45     def __init__(self, book_id, revision=None):
 
  46         makedirs(os.path.join(settings.SEARCH_INDEX, self.SNIPPET_DIR))
 
  47         self.book_id = book_id
 
  48         self.revision = revision
 
  55             fn = "%d.%d" % (self.book_id, self.revision)
 
  57             fn = "%d" % self.book_id
 
  59         return os.path.join(settings.SEARCH_INDEX, self.SNIPPET_DIR, fn)
 
  61     def open(self, mode='r'):
 
  63         Open the snippet file. Call .close() afterwards.
 
  69             if os.path.exists(self.path):
 
  72                     if not os.path.exists(self.path):
 
  76         self.file = open(self.path, mode)
 
  80     def add(self, snippet):
 
  82         Append a snippet (unicode) to the snippet file.
 
  83         Return a (position, length) tuple
 
  85         txt = snippet.encode('utf-8')
 
  88         pos = (self.position, l)
 
  94         Given a tuple of (position, length) return an unicode
 
  95         of the snippet stored there.
 
  97         self.file.seek(pos[0], 0)
 
  98         txt = self.file.read(pos[1]).decode('utf-8')
 
 102         """Close snippet file"""
 
 118 class Index(SolrIndex):
 
 120     Class indexing books.
 
 123         super(Index, self).__init__(mode='rw')
 
 125     def delete_query(self, *queries):
 
 127         index.delete(queries=...) doesn't work, so let's reimplement it
 
 128         using deletion of list of uids.
 
 132             if isinstance(q, scorched.search.LuceneQuery):
 
 133                 q = self.index.query(q)
 
 134             q.field_limiter.update(['uid'])
 
 138                 ids = q.paginate(start=st, rows=rows).execute()
 
 145             self.index.delete(uids)
 
 150     def index_tags(self, *tags, **kw):
 
 152         Re-index global tag list.
 
 153         Removes all tags from index, then index them again.
 
 154         Indexed fields include: id, name (with and without polish stems), category
 
 156         log.debug("Indexing tags")
 
 157         remove_only = kw.get('remove_only', False)
 
 158         # first, remove tags from index.
 
 162                 q_id = self.index.Q(tag_id=tag.id)
 
 164                 if isinstance(tag, PDCounterAuthor):
 
 165                     q_cat = self.index.Q(tag_category='pd_author')
 
 166                 elif isinstance(tag, PDCounterBook):
 
 167                     q_cat = self.index.Q(tag_category='pd_book')
 
 169                     q_cat = self.index.Q(tag_category=tag.category)
 
 171                 q_id_cat = self.index.Q(q_id & q_cat)
 
 172                 tag_qs.append(q_id_cat)
 
 173             self.delete_query(*tag_qs)
 
 175             q = self.index.Q(tag_id__any=True)
 
 179             # then add them [all or just one passed]
 
 182                     catalogue.models.Tag.objects.exclude(category='set'),
 
 183                     PDCounterAuthor.objects.all(),
 
 184                     PDCounterBook.objects.all())
 
 187                 if isinstance(tag, PDCounterAuthor):
 
 189                         "tag_id": int(tag.id),
 
 190                         "tag_name": tag.name,
 
 191                         "tag_name_pl": tag.name,
 
 192                         "tag_category": 'pd_author',
 
 193                         "is_pdcounter": True,
 
 194                         "uid": "tag%d_pd_a" % tag.id
 
 196                 elif isinstance(tag, PDCounterBook):
 
 198                         "tag_id": int(tag.id),
 
 199                         "tag_name": tag.title,
 
 200                         "tag_name_pl": tag.title,
 
 201                         "tag_category": 'pd_book',
 
 202                         "is_pdcounter": True,
 
 203                         "uid": "tag%d_pd_b" % tag.id
 
 207                         "tag_id": int(tag.id),
 
 208                         "tag_name": tag.name,
 
 209                         "tag_name_pl": tag.name,
 
 210                         "tag_category": tag.category,
 
 211                         "is_pdcounter": False,
 
 212                         "uid": "tag%d" % tag.id
 
 216     def create_book_doc(self, book):
 
 218         Create a lucene document referring book id.
 
 220         doc = {'book_id': int(book.id)}
 
 221         if book.parent is not None:
 
 222             doc['parent_id'] = int(book.parent.id)
 
 225     def remove_book(self, book_or_id, remove_snippets=True):
 
 226         """Removes a book from search index.
 
 227         book - Book instance."""
 
 228         if isinstance(book_or_id, catalogue.models.Book):
 
 229             book_id = book_or_id.id
 
 233         self.delete_query(self.index.Q(book_id=book_id))
 
 236             snippets = Snippets(book_id)
 
 239     def index_book(self, book, book_info=None, overwrite=True):
 
 242         Creates a lucene document for extracted metadata
 
 243         and calls self.index_content() to index the contents of the book.
 
 246             # we don't remove snippets, since they might be still needed by
 
 247             # threads using not reopened index
 
 248             self.remove_book(book, remove_snippets=False)
 
 250         book_doc = self.create_book_doc(book)
 
 251         meta_fields = self.extract_metadata(book, book_info, dc_only=[
 
 252             'source_name', 'authors', 'translators', 'title', 'epochs', 'kinds', 'genres'])
 
 253         # let's not index it - it's only used for extracting publish date
 
 254         if 'source_name' in meta_fields:
 
 255             del meta_fields['source_name']
 
 257         for n, f in meta_fields.items():
 
 260         book_doc['uid'] = "book%s" % book_doc['book_id']
 
 261         self.index.add(book_doc)
 
 264             'title': meta_fields['title'],
 
 265             'authors': meta_fields['authors'],
 
 266             'published_date': meta_fields['published_date']
 
 269         for tag_name in ('translators', 'epochs', 'kinds', 'genres'):
 
 270             if tag_name in meta_fields:
 
 271                 book_fields[tag_name] = meta_fields[tag_name]
 
 273         self.index_content(book, book_fields=book_fields)
 
 278         'dramat_wierszowany_l',
 
 279         'dramat_wierszowany_lp',
 
 280         'dramat_wspolczesny', 'liryka_l', 'liryka_lp',
 
 284     ignore_content_tags = [
 
 285         'uwaga', 'extra', 'nota_red', 'abstrakt',
 
 286         'zastepnik_tekstu', 'sekcja_asterysk', 'separator_linia', 'zastepnik_wersu',
 
 288         'naglowek_aktu', 'naglowek_sceny', 'naglowek_czesc',
 
 291     footnote_tags = ['pa', 'pt', 'pr', 'pe']
 
 293     skip_header_tags = ['autor_utworu', 'nazwa_utworu', 'dzielo_nadrzedne',
 
 294                         '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}RDF']
 
 296     published_date_re = re.compile("([0-9]+)[\]. ]*$")
 
 298     def extract_metadata(self, book, book_info=None, dc_only=None):
 
 300         Extract metadata from book and returns a map of fields keyed by fieldname
 
 304         if book_info is None:
 
 305             book_info = dcparser.parse(open(book.xml_file.path))
 
 307         fields['slug'] = book.slug
 
 308         fields['is_book'] = True
 
 311         for field in dcparser.BookInfo.FIELDS:
 
 312             if dc_only and field.name not in dc_only:
 
 314             if hasattr(book_info, field.name):
 
 315                 if not getattr(book_info, field.name):
 
 317                 # since no type information is available, we use validator
 
 318                 type_indicator = field.validator
 
 319                 if type_indicator == dcparser.as_unicode:
 
 320                     s = getattr(book_info, field.name)
 
 323                     fields[field.name] = s
 
 324                 elif type_indicator == dcparser.as_person:
 
 325                     p = getattr(book_info, field.name)
 
 326                     if isinstance(p, dcparser.Person):
 
 329                         persons = ', '.join(map(str, p))
 
 330                     fields[field.name] = persons
 
 331                 elif type_indicator == dcparser.as_date:
 
 332                     dt = getattr(book_info, field.name)
 
 333                     fields[field.name] = dt
 
 337         if hasattr(book_info, 'source_name') and book_info.source_name:
 
 338             match = self.published_date_re.search(book_info.source_name)
 
 339             if match is not None:
 
 340                 pd = str(match.groups()[0])
 
 343         fields["published_date"] = pd
 
 347     # def add_gaps(self, fields, fieldname):
 
 349     #     Interposes a list of fields with gap-fields, which are indexed spaces and returns it.
 
 350     #     This allows for doing phrase queries which do not overlap the gaps (when slop is 0).
 
 354     #             yield Field(fieldname, ' ', Field.Store.NO, Field.Index.NOT_ANALYZED)
 
 355     #     return reduce(lambda a, b: a + b, zip(fields, gap()))[0:-1]
 
 357     def get_master(self, root):
 
 359         Returns the first master tag from an etree.
 
 361         for master in root.iter():
 
 362             if master.tag in self.master_tags:
 
 365     def index_content(self, book, book_fields):
 
 367         Walks the book XML and extract content from it.
 
 368         Adds parts for each header tag and for each fragment.
 
 370         wld = WLDocument.from_file(book.xml_file.path, parse_dublincore=False)
 
 371         root = wld.edoc.getroot()
 
 373         master = self.get_master(root)
 
 378             if node.tag not in self.ignore_content_tags:
 
 379                 yield node, None, None
 
 380                 if node.text is not None:
 
 381                     yield None, node.text, None
 
 382                 for child in list(node):
 
 383                     for b, t, e in walker(child):
 
 385                 yield None, None, node
 
 387             if node.tail is not None:
 
 388                 yield None, node.tail, None
 
 391         def fix_format(text):
 
 392             # separator = [u" ", u"\t", u".", u";", u","]
 
 393             if isinstance(text, list):
 
 394                 # need to join it first
 
 395                 text = filter(lambda s: s is not None, content)
 
 396                 text = u' '.join(text)
 
 397                 # for i in range(len(text)):
 
 399                 #         if text[i][0] not in separator\
 
 400                 #             and text[i - 1][-1] not in separator:
 
 401                 #          text.insert(i, u" ")
 
 403             return re.sub("(?m)/$", "", text)
 
 405         def add_part(snippets, **fields):
 
 406             doc = self.create_book_doc(book)
 
 407             for n, v in book_fields.items():
 
 410             doc['header_index'] = fields["header_index"]
 
 411             doc['header_span'] = 'header_span' in fields and fields['header_span'] or 1
 
 412             doc['header_type'] = fields['header_type']
 
 414             doc['text'] = fields['text']
 
 417             snip_pos = snippets.add(fields["text"])
 
 419             doc['snippets_position'] = snip_pos[0]
 
 420             doc['snippets_length'] = snip_pos[1]
 
 421             if snippets.revision:
 
 422                 doc["snippets_revision"] = snippets.revision
 
 424             if 'fragment_anchor' in fields:
 
 425                 doc["fragment_anchor"] = fields['fragment_anchor']
 
 427             if 'themes' in fields:
 
 428                 doc['themes'] = fields['themes']
 
 429             doc['uid'] = "part%s-%s-%s-%s" % (
 
 430                 book.id, doc['header_index'], doc['header_span'], doc.get('fragment_anchor', ''))
 
 434         snippets = Snippets(book.id).open('w')
 
 436             for header, position in zip(list(master), range(len(master))):
 
 438                 if header.tag in self.skip_header_tags:
 
 440                 if header.tag is etree.Comment:
 
 447                 def all_content(text):
 
 448                     for frag in fragments.values():
 
 449                         frag['text'].append(text)
 
 451                 handle_text = [all_content]
 
 453                 for start, text, end in walker(header):
 
 455                     if start is not None and start.tag in self.footnote_tags:
 
 458                         def collect_footnote(t):
 
 461                         handle_text.append(collect_footnote)
 
 462                     elif end is not None and footnote is not [] and end.tag in self.footnote_tags:
 
 464                         doc = add_part(snippets, header_index=position, header_type=header.tag,
 
 465                                        text=u''.join(footnote),
 
 470                     # handle fragments and themes.
 
 471                     if start is not None and start.tag == 'begin':
 
 472                         fid = start.attrib['id'][1:]
 
 474                             'text': [], 'themes': [], 'start_section': position, 'start_header': header.tag}
 
 476                     # themes for this fragment
 
 477                     elif start is not None and start.tag == 'motyw':
 
 478                         fid = start.attrib['id'][1:]
 
 479                         handle_text.append(lambda text: None)
 
 480                         if start.text is not None:
 
 481                             fragments[fid]['themes'] += map(str.strip, map(str, (start.text.split(','))))
 
 482                     elif end is not None and end.tag == 'motyw':
 
 485                     elif start is not None and start.tag == 'end':
 
 486                         fid = start.attrib['id'][1:]
 
 487                         if fid not in fragments:
 
 488                             continue  # a broken <end> node, skip it
 
 489                         frag = fragments[fid]
 
 490                         if not frag['themes']:
 
 491                             continue  # empty themes list.
 
 494                         doc = add_part(snippets,
 
 495                                        header_type=frag['start_header'],
 
 496                                        header_index=frag['start_section'],
 
 497                                        header_span=position - frag['start_section'] + 1,
 
 499                                        text=fix_format(frag['text']),
 
 500                                        themes=frag['themes'])
 
 505                     if text is not None and handle_text is not []:
 
 506                         hdl = handle_text[-1]
 
 509                         # in the end, add a section text.
 
 510                 doc = add_part(snippets, header_index=position,
 
 511                                header_type=header.tag, text=fix_format(content))
 
 518     def remove_picture(self, picture_or_id):
 
 519         """Removes a picture from search index."""
 
 520         if isinstance(picture_or_id, picture.models.Picture):
 
 521             picture_id = picture_or_id.id
 
 523             picture_id = picture_or_id
 
 524         self.delete_query(self.index.Q(picture_id=picture_id))
 
 526     def index_picture(self, picture, picture_info=None, overwrite=True):
 
 529         Creates a lucene document for extracted metadata
 
 530         and calls self.index_area() to index the contents of the picture.
 
 533             # we don't remove snippets, since they might be still needed by
 
 534             # threads using not reopened index
 
 535             self.remove_picture(picture)
 
 537         picture_doc = {'picture_id': int(picture.id)}
 
 538         meta_fields = self.extract_metadata(picture, picture_info, dc_only=[
 
 539             'authors', 'title', 'epochs', 'kinds', 'genres'])
 
 541         picture_doc.update(meta_fields)
 
 543         picture_doc['uid'] = "picture%s" % picture_doc['picture_id']
 
 544         self.index.add(picture_doc)
 
 545         del picture_doc['is_book']
 
 546         for area in picture.areas.all():
 
 547             self.index_area(area, picture_fields=picture_doc)
 
 549     def index_area(self, area, picture_fields):
 
 551         Indexes themes and objects on the area.
 
 553         doc = dict(picture_fields)
 
 554         doc['area_id'] = area.id
 
 555         doc['themes'] = list(area.tags.filter(category__in=('theme', 'thing')).values_list('name', flat=True))
 
 556         doc['uid'] = 'area%s' % area.id
 
 561 class SearchResult(object):
 
 562     def __init__(self, doc, how_found=None, query_terms=None):
 
 565         self._processed_hits = None  # processed hits
 
 567         self.query_terms = query_terms
 
 571             self._score = doc['score']
 
 575         self.book_id = int(doc["book_id"])
 
 578             self.published_date = int(doc.get("published_date"))
 
 580             self.published_date = 0
 
 583         header_type = doc.get("header_type", None)
 
 584         # we have a content hit in some header of fragment
 
 585         if header_type is not None:
 
 586             sec = (header_type, int(doc["header_index"]))
 
 587             header_span = doc['header_span']
 
 588             header_span = header_span is not None and int(header_span) or 1
 
 589             fragment = doc.get("fragment_anchor", None)
 
 590             snippets_pos = (doc['snippets_position'], doc['snippets_length'])
 
 591             snippets_rev = doc.get('snippets_revision', None)
 
 593             hit = (sec + (header_span,), fragment, self._score, {
 
 594                 'how_found': how_found,
 
 595                 'snippets_pos': snippets_pos,
 
 596                 'snippets_revision': snippets_rev,
 
 597                 'themes': doc.get('themes', []),
 
 598                 'themes_pl': doc.get('themes_pl', [])
 
 601             self._hits.append(hit)
 
 604     def from_book(cls, book, how_found=None, query_terms=None):
 
 606             'score': book.popularity.count,
 
 610         result = cls(doc, how_found=how_found, query_terms=query_terms)
 
 615         return u"<SR id=%d %d(%d) hits score=%f %d snippets>" % \
 
 616             (self.book_id, len(self._hits),
 
 617              len(self._processed_hits) if self._processed_hits else -1,
 
 618              self._score, len(self.snippets))
 
 621         return str(self).encode('utf-8')
 
 625         return self._score * self.boost
 
 627     def merge(self, other):
 
 628         if self.book_id != other.book_id:
 
 629             raise ValueError("this search result is for book %d; tried to merge with %d" % (self.book_id, other.book_id))
 
 630         self._hits += other._hits
 
 631         self._score += max(other._score, 0)
 
 635         if self._book is not None:
 
 637         self._book = catalogue.models.Book.objects.get(id=self.book_id)
 
 640     book = property(get_book)
 
 651         if self._processed_hits is not None:
 
 652             return self._processed_hits
 
 654         # to sections and fragments
 
 655         frags = filter(lambda r: r[self.FRAGMENT] is not None, self._hits)
 
 657         sect = [hit for hit in self._hits if hit[self.FRAGMENT] is None]
 
 659         # sections not covered by fragments
 
 660         sect = filter(lambda s: 0 == len(list(filter(
 
 661             lambda f: f[self.POSITION][self.POSITION_INDEX] <= s[self.POSITION][self.POSITION_INDEX] <
 
 662                       f[self.POSITION][self.POSITION_INDEX] + f[self.POSITION][self.POSITION_SPAN], frags))), sect)
 
 664         def remove_duplicates(lst, keyfn, compare):
 
 669                     if compare(els[eif], e) >= 1:
 
 674         # remove fragments with duplicated fid's and duplicated snippets
 
 675         frags = remove_duplicates(frags, lambda f: f[self.FRAGMENT], lambda a, b: cmp(a[self.SCORE], b[self.SCORE]))
 
 676         # frags = remove_duplicates(frags, lambda f: f[OTHER]['snippet_pos'] and f[OTHER]['snippet_pos'] or f[FRAGMENT],
 
 677         #                           lambda a, b: cmp(a[SCORE], b[SCORE]))
 
 679         # remove duplicate sections
 
 683             si = s[self.POSITION][self.POSITION_INDEX]
 
 686                 if sections[si]['score'] >= s[self.SCORE]:
 
 689             m = {'score': s[self.SCORE],
 
 690                  'section_number': s[self.POSITION][self.POSITION_INDEX] + 1,
 
 692             m.update(s[self.OTHER])
 
 695         hits = list(sections.values())
 
 699                 frag = catalogue.models.Fragment.objects.get(anchor=f[self.FRAGMENT], book__id=self.book_id)
 
 700             except catalogue.models.Fragment.DoesNotExist:
 
 703             # Figure out if we were searching for a token matching some word in theme name.
 
 704             themes = frag.tags.filter(category='theme')
 
 706             if self.query_terms is not None:
 
 707                 for i in range(0, len(f[self.OTHER]['themes'])):
 
 708                     tms = f[self.OTHER]['themes'][i].split(r' +') + f[self.OTHER]['themes_pl'][i].split(' ')
 
 709                     tms = map(str.lower, tms)
 
 710                     for qt in self.query_terms:
 
 712                             themes_hit.add(f[self.OTHER]['themes'][i])
 
 715             def theme_by_name(n):
 
 716                 th = filter(lambda t: t.name == n, themes)
 
 721             themes_hit = filter(lambda a: a is not None, map(theme_by_name, themes_hit))
 
 723             m = {'score': f[self.SCORE],
 
 725                  'section_number': f[self.POSITION][self.POSITION_INDEX] + 1,
 
 727                  'themes_hit': themes_hit
 
 729             m.update(f[self.OTHER])
 
 732         hits.sort(key=lambda h: h['score'], reverse=True)
 
 734         self._processed_hits = hits
 
 739     def aggregate(*result_lists):
 
 741         for rl in result_lists:
 
 743                 if r.book_id in books:
 
 744                     books[r.book_id].merge(r)
 
 747         return books.values()
 
 749     def __lt__(self, other):
 
 750         return (-self.score, self.published_date, self.book.sort_key_author, self.book.sort_key) > \
 
 751                (-other.score, other.published_date, other.book.sort_key_author, other.book.sort_key)
 
 753     def __eq__(self, other):
 
 754         return (self.score, self.published_date, self.book.sort_key_author, self.book.sort_key) == \
 
 755                (other.score, other.published_date, other.book.sort_key_author, other.book.sort_key)
 
 758         return len(self.hits)
 
 760     def snippet_pos(self, idx=0):
 
 761         return self.hits[idx]['snippets_pos']
 
 763     def snippet_revision(self, idx=0):
 
 765             return self.hits[idx]['snippets_revision']
 
 766         except (IndexError, KeyError):
 
 771 class PictureResult(object):
 
 772     def __init__(self, doc, how_found=None, query_terms=None):
 
 774         self.query_terms = query_terms
 
 777         self._processed_hits = None
 
 780             self._score = doc['score']
 
 784         self.picture_id = int(doc["picture_id"])
 
 786         if doc.get('area_id'):
 
 787             hit = (self._score, {
 
 788                 'how_found': how_found,
 
 789                 'area_id': doc['area_id'],
 
 790                 'themes': doc.get('themes', []),
 
 791                 'themes_pl': doc.get('themes_pl', []),
 
 794             self._hits.append(hit)
 
 797         return u"<PR id=%d score=%f >" % (self.picture_id, self._score)
 
 804         return self._score * self.boost
 
 806     def merge(self, other):
 
 807         if self.picture_id != other.picture_id:
 
 809                 "this search result is for picture %d; tried to merge with %d" % (self.picture_id, other.picture_id))
 
 810         self._hits += other._hits
 
 811         self._score += max(other._score, 0)
 
 819         if self._processed_hits is not None:
 
 820             return self._processed_hits
 
 823         for hit in self._hits:
 
 825                 area = picture.models.PictureArea.objects.get(id=hit[self.OTHER]['area_id'])
 
 826             except picture.models.PictureArea.DoesNotExist:
 
 829             # Figure out if we were searching for a token matching some word in theme name.
 
 831             if self.query_terms is not None:
 
 832                 for i in range(0, len(hit[self.OTHER]['themes'])):
 
 833                     tms = hit[self.OTHER]['themes'][i].split(r' +') + hit[self.OTHER]['themes_pl'][i].split(' ')
 
 834                     tms = map(str.lower, tms)
 
 835                     for qt in self.query_terms:
 
 837                             themes_hit.add(hit[self.OTHER]['themes'][i])
 
 841                 'score': hit[self.SCORE],
 
 843                 'themes_hit': themes_hit,
 
 845             m.update(hit[self.OTHER])
 
 848         hits.sort(key=lambda h: h['score'], reverse=True)
 
 850         self._processed_hits = hits
 
 853     def get_picture(self):
 
 854         if self._picture is None:
 
 855             self._picture = picture.models.Picture.objects.get(id=self.picture_id)
 
 858     picture = property(get_picture)
 
 861     def aggregate(*result_lists):
 
 863         for rl in result_lists:
 
 865                 if r.picture_id in books:
 
 866                     books[r.picture_id].merge(r)
 
 868                     books[r.picture_id] = r
 
 869         return books.values()
 
 871     def __lt__(self, other):
 
 872         return self.score < other.score
 
 874     def __eq__(self, other):
 
 875         return self.score == other.score
 
 878 class Search(SolrIndex):
 
 882     def __init__(self, default_field="text"):
 
 883         super(Search, self).__init__(mode='r')
 
 885     def make_term_query(self, query, field='text', modal=operator.or_):
 
 887         Returns term queries joined by boolean query.
 
 888         modal - applies to boolean query
 
 889         fuzzy - should the query by fuzzy.
 
 894         q = reduce(modal, map(lambda s: self.index.Q(**{field: s}), query.split(r" ")), q)
 
 898     def search_by_author(self, words):
 
 899         from catalogue.models import Book
 
 900         books = Book.objects.filter(parent=None).order_by('-popularity__count')
 
 902             books = books.filter(cached_author__iregex='\m%s\M' % word).select_related('popularity__count')
 
 903         return [SearchResult.from_book(book, how_found='search_by_author', query_terms=words) for book in books[:30]]
 
 905     def search_words(self, words, fields, required=None, book=True, picture=False):
 
 906         if book and not picture and fields == ['authors']:
 
 907             return self.search_by_author(words)
 
 910             if book or picture or (word not in stopwords):
 
 913                     q = self.index.Q(**{field: word})
 
 914                     if word_filter is None:
 
 918                 filters.append(word_filter)
 
 920             required_filter = None
 
 921             for field in required:
 
 923                     if book or picture or (word not in stopwords):
 
 924                         q = self.index.Q(**{field: word})
 
 925                         if required_filter is None:
 
 929             filters.append(required_filter)
 
 934             params['is_book'] = True
 
 936             params['picture_id__gt'] = 0
 
 938             params['book_id__gt'] = 0
 
 939         query = self.index.query(**params)
 
 940         query = self.apply_filters(query, filters).field_limit(score=True, all_fields=True)
 
 941         result_class = PictureResult if picture else SearchResult
 
 942         return [result_class(found, how_found='search_words', query_terms=words) for found in query.execute()]
 
 944     def get_snippets(self, searchresult, query, field='text', num=1):
 
 946         Returns a snippet for found scoreDoc.
 
 948         maxnum = len(searchresult)
 
 949         if num is None or num < 0 or num > maxnum:
 
 951         book_id = searchresult.book_id
 
 952         revision = searchresult.snippet_revision()
 
 953         snippets = Snippets(book_id, revision=revision)
 
 954         snips = [None] * maxnum
 
 958             while idx < maxnum and num > 0:
 
 959                 position, length = searchresult.snippet_pos(idx)
 
 960                 if position is None or length is None:
 
 962                 text = snippets.get((int(position),
 
 964                 snip = self.index.highlight(text=text, field=field, q=query)
 
 965                 if not snip and field == 'text':
 
 966                     snip = self.index.highlight(text=text, field='text_nonstem', q=query)
 
 967                 if snip not in snips:
 
 974             book = catalogue.models.Book.objects.filter(id=book_id)
 
 976                 log.error("Book does not exist for book id = %d" % book_id)
 
 977             elif not book.get().children.exists():
 
 978                 log.error("Cannot open snippet file for book id = %d [rev=%s], %s" % (book_id, revision, e))
 
 983         # remove verse end markers..
 
 984         snips = [s.replace("/\n", "\n") if s else s for s in snips]
 
 986         searchresult.snippets = snips
 
 991     def apply_filters(query, filters):
 
 993         Apply filters to a query
 
 997         filters = filter(lambda x: x is not None, filters)
 
 999             query = query.query(f)
 
1003 if getattr(settings, 'SEARCH_MOCK', False):
 
1004     from .mock_search import Search