return
def fix_format(text):
- # separator = [u" ", u"\t", u".", u";", u","]
+ # separator = [" ", "\t", ".", ";", ","]
if isinstance(text, list):
# need to join it first
text = filter(lambda s: s is not None, content)
- text = u' '.join(text)
+ text = ' '.join(text)
# for i in range(len(text)):
# if i > 0:
# if text[i][0] not in separator\
# and text[i - 1][-1] not in separator:
- # text.insert(i, u" ")
+ # text.insert(i, " ")
return re.sub("(?m)/$", "", text)
elif end is not None and footnote is not [] and end.tag in self.footnote_tags:
handle_text.pop()
doc = add_part(snippets, header_index=position, header_type=header.tag,
- text=u''.join(footnote),
+ text=''.join(footnote),
is_footnote=True)
self.index.add(doc)
footnote = []
return result
def __str__(self):
- return u"<SR id=%d %d(%d) hits score=%f %d snippets>" % \
+ return "<SR id=%d %d(%d) hits score=%f %d snippets>" % \
(self.book_id, len(self._hits),
len(self._processed_hits) if self._processed_hits else -1,
self._score, len(self.snippets))
lambda f: f[self.POSITION][self.POSITION_INDEX] <= s[self.POSITION][self.POSITION_INDEX] <
f[self.POSITION][self.POSITION_INDEX] + f[self.POSITION][self.POSITION_SPAN], frags))), sect)
- def remove_duplicates(lst, keyfn, compare):
+ def remove_duplicates(lst, keyfn, larger):
els = {}
for e in lst:
eif = keyfn(e)
if eif in els:
- if compare(els[eif], e) >= 1:
+ if larger(els[eif], e):
continue
els[eif] = e
return els.values()
# remove fragments with duplicated fid's and duplicated snippets
- frags = remove_duplicates(frags, lambda f: f[self.FRAGMENT], lambda a, b: cmp(a[self.SCORE], b[self.SCORE]))
- # frags = remove_duplicates(frags, lambda f: f[OTHER]['snippet_pos'] and f[OTHER]['snippet_pos'] or f[FRAGMENT],
- # lambda a, b: cmp(a[SCORE], b[SCORE]))
+ frags = remove_duplicates(frags, lambda f: f[self.FRAGMENT], lambda a, b: a[self.SCORE] > b[self.SCORE])
# remove duplicate sections
sections = {}
self._hits.append(hit)
def __str__(self):
- return u"<PR id=%d score=%f >" % (self.picture_id, self._score)
+ return "<PR id=%d score=%f >" % (self.picture_id, self._score)
def __repr__(self):
return str(self)