X-Git-Url: https://git.mdrn.pl/wolnelektury.git/blobdiff_plain/a169ba7caa0d9d83fddcbad5ed05f536e0ca1b9e..0be9e0edd734fe1b7f9f9fbb7103a88adfce4427:/src/search/index.py diff --git a/src/search/index.py b/src/search/index.py index bd31a2acf..2d84cb4d7 100644 --- a/src/search/index.py +++ b/src/search/index.py @@ -390,16 +390,16 @@ class Index(SolrIndex): return def fix_format(text): - # separator = [u" ", u"\t", u".", u";", u","] + # separator = [" ", "\t", ".", ";", ","] if isinstance(text, list): # need to join it first text = filter(lambda s: s is not None, content) - text = u' '.join(text) + text = ' '.join(text) # for i in range(len(text)): # if i > 0: # if text[i][0] not in separator\ # and text[i - 1][-1] not in separator: - # text.insert(i, u" ") + # text.insert(i, " ") return re.sub("(?m)/$", "", text) @@ -463,7 +463,7 @@ class Index(SolrIndex): elif end is not None and footnote is not [] and end.tag in self.footnote_tags: handle_text.pop() doc = add_part(snippets, header_index=position, header_type=header.tag, - text=u''.join(footnote), + text=''.join(footnote), is_footnote=True) self.index.add(doc) footnote = [] @@ -613,7 +613,7 @@ class SearchResult(object): return result def __str__(self): - return u"" % \ + return "" % \ (self.book_id, len(self._hits), len(self._processed_hits) if self._processed_hits else -1, self._score, len(self.snippets)) @@ -665,20 +665,18 @@ class SearchResult(object): lambda f: f[self.POSITION][self.POSITION_INDEX] <= s[self.POSITION][self.POSITION_INDEX] < f[self.POSITION][self.POSITION_INDEX] + f[self.POSITION][self.POSITION_SPAN], frags))), sect) - def remove_duplicates(lst, keyfn, compare): + def remove_duplicates(lst, keyfn, larger): els = {} for e in lst: eif = keyfn(e) if eif in els: - if compare(els[eif], e) >= 1: + if larger(els[eif], e): continue els[eif] = e return els.values() # remove fragments with duplicated fid's and duplicated snippets - frags = remove_duplicates(frags, lambda f: f[self.FRAGMENT], lambda a, b: cmp(a[self.SCORE], b[self.SCORE])) - # frags = remove_duplicates(frags, lambda f: f[OTHER]['snippet_pos'] and f[OTHER]['snippet_pos'] or f[FRAGMENT], - # lambda a, b: cmp(a[SCORE], b[SCORE])) + frags = remove_duplicates(frags, lambda f: f[self.FRAGMENT], lambda a, b: a[self.SCORE] > b[self.SCORE]) # remove duplicate sections sections = {} @@ -802,7 +800,7 @@ class PictureResult(object): self._hits.append(hit) def __str__(self): - return u"" % (self.picture_id, self._score) + return "" % (self.picture_id, self._score) def __repr__(self): return str(self)