- doc.add(Field("content",
- u' '.join(filter(lambda s: s is not None, frag['content'])),
- Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS))
- doc.add(Field("themes",
- u' '.join(filter(lambda s: s is not None, frag['themes'])),
- Field.Store.NO, Field.Index.ANALYZED))
-
- fragment_docs.append(doc)
- elif start is not None:
- for frag in fragments.values():
- frag['content'].append(start.text)
- elif end is not None:
- for frag in fragments.values():
- frag['content'].append(end.tail)
-
- return header_docs + fragment_docs
+
+ if 'themes' in fields:
+ themes, themes_pl = zip(*[
+ (Field("themes", theme, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS),
+ Field("themes_pl", theme, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS))
+ for theme in fields['themes']])
+
+ themes = self.add_gaps(themes, 'themes')
+ themes_pl = self.add_gaps(themes_pl, 'themes_pl')
+
+ for t in themes:
+ doc.add(t)
+ for t in themes_pl:
+ doc.add(t)
+
+ return doc
+
+ fragments = {}
+ snippets = Snippets(book.id).open('w')
+ try:
+ for header, position in zip(list(master), range(len(master))):
+
+ if header.tag in self.skip_header_tags:
+ continue
+
+ content = u' '.join([t for t in header.itertext()])
+ content = fix_format(content)
+
+ doc = add_part(snippets, header_index=position, header_type=header.tag, content=content)
+
+ self.index.addDocument(doc)
+
+ for start, end in walker(header):
+ if start is not None and start.tag == 'begin':
+ fid = start.attrib['id'][1:]
+ fragments[fid] = {'content': [], 'themes': [], 'start_section': position, 'start_header': header.tag}
+ fragments[fid]['content'].append(start.tail)
+ elif start is not None and start.tag == 'motyw':
+ fid = start.attrib['id'][1:]
+ fragments[fid]['themes'].append(start.text)
+ fragments[fid]['content'].append(start.tail)
+ elif start is not None and start.tag == 'end':
+ fid = start.attrib['id'][1:]
+ if fid not in fragments:
+ continue # a broken <end> node, skip it
+ frag = fragments[fid]
+ del fragments[fid]
+
+ def jstr(l):
+ return u' '.join(map(
+ lambda x: x == None and u'(none)' or unicode(x),
+ l))
+
+ doc = add_part(snippets,
+ header_type=frag['start_header'],
+ header_index=frag['start_section'],
+ header_span=position - frag['start_section'] + 1,
+ fragment_anchor=fid,
+ content=u' '.join(filter(lambda s: s is not None, frag['content'])),
+ themes=frag['themes'])
+
+ self.index.addDocument(doc)
+ elif start is not None:
+ for frag in fragments.values():
+ frag['content'].append(start.text)
+ elif end is not None:
+ for frag in fragments.values():
+ frag['content'].append(end.tail)
+ finally:
+ snippets.close()
+