+ "nbsp",
+ exclude=[DCNS("identifier.url"), DCNS("rights.license"), DCNS("title"), DCNS("description")]
+ )
+
+
+def move_motifs_inside(doc):
+ """ moves motifs to be into block elements """
+ main_tags = ('powiesc', 'opowiadanie', 'liryka_l', 'liryka_lp',
+ 'dramat_wierszowany_l', 'dramat_wierszowany_lp', 'dramat_wspolczesny')
+ for master in doc.xpath('|'.join('//' + tag for tag in main_tags)):
+ for motif in master.xpath('motyw'):
+ for sib in motif.itersiblings():
+ special_tags = ('sekcja_swiatlo', 'sekcja_asterysk', 'separator_linia',
+ 'begin', 'end', 'motyw', 'extra', 'uwaga')
+ if sib.tag not in special_tags:
+ # motif shouldn't have a tail - it would be untagged text
+ motif.tail = None
+ motif.getparent().remove(motif)
+ sib.insert(0, motif)
+ break
+
+
+def hack_motifs(doc):
+ """ dirty hack for the marginpar-creates-orphans LaTeX problem
+ see http://www.latex-project.org/cgi-bin/ltxbugs2html?pr=latex/2304
+
+ moves motifs in stanzas from first verse to second
+ and from next to last to last, then inserts negative vspace before them
+ """
+ for motif in doc.findall('//strofa//motyw'):
+ # find relevant verse-level tag
+ verse, stanza = motif, motif.getparent()
+ while stanza is not None and stanza.tag != 'strofa':
+ verse, stanza = stanza, stanza.getparent()
+ breaks_before = sum(1 for i in verse.itersiblings('br', preceding=True))
+ breaks_after = sum(1 for i in verse.itersiblings('br'))
+ if (breaks_before == 0 and breaks_after > 0) or breaks_after == 1:
+ move_by = 1
+ if breaks_after == 2:
+ move_by += 1
+ moved_motif = deepcopy(motif)
+ motif.tag = 'span'
+ motif.text = None
+ moved_motif.tail = None
+ moved_motif.set('moved', str(move_by))
+
+ for br in verse.itersiblings('br'):
+ if move_by > 1:
+ move_by -= 1
+ continue
+ br.addnext(moved_motif)
+ break
+
+
+def parse_creator(doc):
+ """Generates readable versions of creator and translator tags.
+
+ Finds all dc:creator and dc.contributor.translator tags
+ and adds *_parsed versions with forenames first.
+ """
+ persons = doc.xpath(
+ "|".join('//dc:' + tag for tag in ('creator', 'contributor.translator')),
+ namespaces={'dc': str(DCNS)})[::-1]
+ for person in persons:
+ if not person.text:
+ continue
+ p = Person.from_text(person.text)
+ person_parsed = deepcopy(person)
+ person_parsed.tag = person.tag + '_parsed'
+ person_parsed.set('sortkey', person.text)
+ person_parsed.text = p.readable()
+ person.getparent().insert(0, person_parsed)