+ self._score = 0
+
+ self.book_id = int(doc["book_id"])
+
+ try:
+ self.published_date = int(doc.get("published_date"))
+ except ValueError:
+ self.published_date = 0
+
+ # content hits
+ header_type = doc.get("header_type", None)
+ # we have a content hit in some header of fragment
+ if header_type is not None:
+ sec = (header_type, int(doc["header_index"]))
+ header_span = doc['header_span']
+ header_span = header_span is not None and int(header_span) or 1
+ fragment = doc.get("fragment_anchor", None)
+ snippets_pos = (doc['snippets_position'], doc['snippets_length'])
+ snippets_rev = doc.get('snippets_revision', None)
+
+ hit = (sec + (header_span,), fragment, self._score, {
+ 'how_found': how_found,
+ 'snippets_pos': snippets_pos,
+ 'snippets_revision': snippets_rev,
+ 'themes': doc.get('themes', []),
+ 'themes_pl': doc.get('themes_pl', [])
+ })
+
+ self._hits.append(hit)
+
+ def __unicode__(self):
+ return u"<SR id=%d %d(%d) hits score=%f %d snippets>" % \
+ (self.book_id, len(self._hits), self._processed_hits and len(self._processed_hits) or -1, self._score, len(self.snippets))
+
+ def __str__(self):
+ return unicode(self).encode('utf-8')
+
+ @property
+ def score(self):
+ return self._score * self.boost
+
+ def merge(self, other):
+ if self.book_id != other.book_id:
+ raise ValueError("this search result is or book %d; tried to merge with %d" % (self.book_id, other.book_id))
+ self._hits += other._hits
+ if other.score > self.score:
+ self._score = other._score
+ return self
+
+ def get_book(self):
+ if hasattr(self, '_book'):
+ return self._book
+ self._book = catalogue.models.Book.objects.get(id=self.book_id)
+ return self._book
+
+ book = property(get_book)
+
+ POSITION = 0
+ FRAGMENT = 1
+ POSITION_INDEX = 1
+ POSITION_SPAN = 2
+ SCORE = 2
+ OTHER = 3
+
+ @property
+ def hits(self):
+ if self._processed_hits is not None:
+ return self._processed_hits
+
+ # to sections and fragments
+ frags = filter(lambda r: r[self.FRAGMENT] is not None, self._hits)
+
+ sect = filter(lambda r: r[self.FRAGMENT] is None, self._hits)
+
+ # sections not covered by fragments
+ sect = filter(lambda s: 0 == len(filter(
+ lambda f: s[self.POSITION][self.POSITION_INDEX] >= f[self.POSITION][self.POSITION_INDEX]
+ and s[self.POSITION][self.POSITION_INDEX] < f[self.POSITION][self.POSITION_INDEX] + f[self.POSITION][self.POSITION_SPAN],
+ frags)), sect)
+
+ hits = []
+
+ def remove_duplicates(lst, keyfn, compare):
+ els = {}
+ for e in lst:
+ eif = keyfn(e)
+ if eif in els:
+ if compare(els[eif], e) >= 1:
+ continue
+ els[eif] = e
+ return els.values()
+
+ # remove fragments with duplicated fid's and duplicated snippets
+ frags = remove_duplicates(frags, lambda f: f[self.FRAGMENT], lambda a, b: cmp(a[self.SCORE], b[self.SCORE]))
+ # frags = remove_duplicates(frags, lambda f: f[OTHER]['snippet_pos'] and f[OTHER]['snippet_pos'] or f[FRAGMENT],
+ # lambda a, b: cmp(a[SCORE], b[SCORE]))
+
+ # remove duplicate sections
+ sections = {}
+
+ for s in sect:
+ si = s[self.POSITION][self.POSITION_INDEX]
+ # skip existing
+ if si in sections:
+ if sections[si]['score'] >= s[self.SCORE]:
+ continue
+
+ m = {'score': s[self.SCORE],
+ 'section_number': s[self.POSITION][self.POSITION_INDEX] + 1,
+ }
+ m.update(s[self.OTHER])
+ sections[si] = m