From: Marcin Koziej <marcin.koziej@nowoczesnapolska.org.pl>
Date: Thu, 1 Dec 2011 15:13:54 +0000 (+0100)
Subject: be prepared for mixes unicode/str input from lxml..
X-Git-Url: https://git.mdrn.pl/wolnelektury.git/commitdiff_plain/5ce9801414bc64793eaf1663b1cfce1b2c4bf857

be prepared for mixes unicode/str input from lxml..
---

diff --git a/apps/search/index.py b/apps/search/index.py
index 46fac4832..6e2140ac1 100644
--- a/apps/search/index.py
+++ b/apps/search/index.py
@@ -117,13 +117,11 @@ class Snippets(object):
         self.file.write(txt)
         pos = (self.position, l)
         self.position += l
-        print "Snip<%s>%s</s>" %(pos, txt)
         return pos
 
     def get(self, pos):
         self.file.seek(pos[0], 0)
         txt = self.file.read(pos[1]).decode('utf-8')
-        print "got from snippets %d bytes from %s:" % (len(txt), pos)
         return txt
 
     def close(self):
@@ -317,6 +315,13 @@ class Index(IndexStore):
 
             return doc
 
+        def give_me_utf8(s):
+            if isinstance(s, unicode):
+                return s.encode('utf-8')
+            else:
+                return s
+
+
         fragments = {}
         snippets = Snippets(book.id).open('w')
         try:
@@ -340,7 +345,7 @@ class Index(IndexStore):
                     elif start is not None and start.tag == 'motyw':
                         fid = start.attrib['id'][1:]
                         if start.text is not None:
-                            fragments[fid]['themes'] += map(unicode.strip, start.text.split(','))
+                            fragments[fid]['themes'] += map(str.strip, map(give_me_utf8, start.text.split(',')))
                         fragments[fid]['content'].append(start.tail)
                     elif start is not None and start.tag == 'end':
                         fid = start.attrib['id'][1:]
@@ -893,7 +898,6 @@ class MultiSearch(Search):
         #  highlighter.getBestTextFragments(tokenStream, text, False, 10)
         #        import pdb; pdb.set_trace()
         snip = highlighter.getBestFragments(tokenStream, text, 3, "...")
-        print('snips: %s' % snip)
 
         return [snip]