+ themes = self.add_gaps(themes, 'themes')
+ themes_pl = self.add_gaps(themes_pl, 'themes_pl')
+
+ for t in themes:
+ doc.add(t)
+ for t in themes_pl:
+ doc.add(t)
+
+ return doc
+
+ def give_me_utf8(s):
+ if isinstance(s, unicode):
+ return s.encode('utf-8')
+ else:
+ return s
+
+ fragments = {}
+ snippets = Snippets(book.id).open('w')
+ try:
+ for header, position in zip(list(master), range(len(master))):
+
+ if header.tag in self.skip_header_tags:
+ continue
+ if header.tag is etree.Comment:
+ continue
+
+ # section content
+ content = []
+ footnote = None
+
+ for start, end in walker(header, ignore_tags=self.ignore_content_tags):
+ # handle footnotes
+ # if start is not None and start.tag in self.footnote_tags:
+ # footnote = ' '.join(start.itertext())
+ # elif end is not None and footnote is not None and end.tag in self.footnote_tags:
+ # doc = add_part(snippets, header_index=position, header_type=header.tag,
+ # content=footnote)
+
+ # self.index.addDocument(doc)
+
+ # footnote = None
+
+ # handle fragments and themes.
+ if start is not None and start.tag == 'begin':
+ fid = start.attrib['id'][1:]
+ fragments[fid] = {'content': [], 'themes': [], 'start_section': position, 'start_header': header.tag}
+
+ elif start is not None and start.tag == 'motyw':
+ fid = start.attrib['id'][1:]
+ if start.text is not None:
+ fragments[fid]['themes'] += map(str.strip, map(give_me_utf8, start.text.split(',')))
+
+ elif start is not None and start.tag == 'end':
+ fid = start.attrib['id'][1:]
+ if fid not in fragments:
+ continue # a broken <end> node, skip it
+ # import pdb; pdb.set_trace()
+ frag = fragments[fid]
+ if frag['themes'] == []:
+ continue # empty themes list.
+ del fragments[fid]
+
+ doc = add_part(snippets,
+ header_type=frag['start_header'],
+ header_index=frag['start_section'],
+ header_span=position - frag['start_section'] + 1,
+ fragment_anchor=fid,
+ content=fix_format(frag['content']),
+ themes=frag['themes'])
+
+ self.index.addDocument(doc)
+
+ # Collect content.
+ elif start is not None:
+ for frag in fragments.values():
+ frag['content'].append(start.text)
+ content.append(start.text)
+ elif end is not None:
+ for frag in fragments.values():
+ frag['content'].append(end.tail)
+ content.append(end.tail)
+
+ # in the end, add a section text.
+ doc = add_part(snippets, header_index=position, header_type=header.tag,
+ content=fix_format(content))
+
+ self.index.addDocument(doc)
+
+ finally:
+ snippets.close()