X-Git-Url: https://git.mdrn.pl/wolnelektury.git/blobdiff_plain/568be7ec391479b5a253137fc08fa0ffa17b8249..8658102277980ea8e8e53a953e7c2f5fedf8de16:/apps/search/index.py diff --git a/apps/search/index.py b/apps/search/index.py index 9b7efa2b6..6e2140ac1 100644 --- a/apps/search/index.py +++ b/apps/search/index.py @@ -117,13 +117,11 @@ class Snippets(object): self.file.write(txt) pos = (self.position, l) self.position += l - print "Snip<%s>%s" %(pos, txt) return pos def get(self, pos): self.file.seek(pos[0], 0) txt = self.file.read(pos[1]).decode('utf-8') - print "got from snippets %d bytes from %s:" % (len(txt), pos) return txt def close(self): @@ -317,6 +315,13 @@ class Index(IndexStore): return doc + def give_me_utf8(s): + if isinstance(s, unicode): + return s.encode('utf-8') + else: + return s + + fragments = {} snippets = Snippets(book.id).open('w') try: @@ -339,13 +344,16 @@ class Index(IndexStore): fragments[fid]['content'].append(start.tail) elif start is not None and start.tag == 'motyw': fid = start.attrib['id'][1:] - fragments[fid]['themes'].append(start.text) + if start.text is not None: + fragments[fid]['themes'] += map(str.strip, map(give_me_utf8, start.text.split(','))) fragments[fid]['content'].append(start.tail) elif start is not None and start.tag == 'end': fid = start.attrib['id'][1:] if fid not in fragments: continue # a broken node, skip it frag = fragments[fid] + if frag['themes'] == []: + continue # empty themes list. del fragments[fid] def jstr(l): @@ -890,7 +898,6 @@ class MultiSearch(Search): # highlighter.getBestTextFragments(tokenStream, text, False, 10) # import pdb; pdb.set_trace() snip = highlighter.getBestFragments(tokenStream, text, 3, "...") - print('snips: %s' % snip) return [snip]