from lxml import etree
-# Parse args
-usage = """Usage: %prog [options] SOURCE [SOURCE...]
-Extract theme fragments from SOURCE."""
-
-parser = optparse.OptionParser(usage=usage)
-
-parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False,
- help='print status messages to stdout')
-
-options, input_filenames = parser.parse_args()
-
-if len(input_filenames) < 1:
- parser.print_help()
- exit(1)
-
-
class Fragment(object):
def __init__(self, id, themes):
super(Fragment, self).__init__()
return self.to_string()
-# Do some real work
-for input_filename in input_filenames:
- if options.verbose:
- print input_filename
-
- output_filename = os.path.splitext(input_filename)[0] + '.fragments.html'
-
+def extract_fragments(input_filename):
+ """Extracts theme fragments from input_filename."""
open_fragments = {}
closed_fragments = {}
- lost_text = []
for event, element in etree.iterparse(input_filename, events=('start', 'end')):
-
# Process begin and end elements
if element.tag == 'span' and element.get('class', '') in ('theme-begin', 'theme-end'):
if not event == 'end': continue # Process elements only once, on end event
else:
for fragment_id in open_fragments:
open_fragments[fragment_id].append(event, copy.copy(element))
+
+ return closed_fragments, open_fragments
+
+if __name__ == '__main__':
+ # Parse commandline arguments
+ usage = """Usage: %prog [options] SOURCE [SOURCE...]
+ Extract theme fragments from SOURCE."""
+
+ parser = optparse.OptionParser(usage=usage)
+
+ parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False,
+ help='print status messages to stdout')
+
+ options, input_filenames = parser.parse_args()
+
+ if len(input_filenames) < 1:
+ parser.print_help()
+ exit(1)
+
+ # Do some real work
+ for input_filename in input_filenames:
+ if options.verbose:
+ print input_filename
+
+ output_filename = os.path.splitext(input_filename)[0] + '.fragments.html'
+
+ closed_fragments, open_fragments = extract_fragments(input_filename)
- for fragment_id in open_fragments:
- print '%s:warning:unclosed fragment #%s' % (input_filename, fragment_id)
+ for fragment_id in open_fragments:
+ print '%s:warning:unclosed fragment #%s' % (input_filename, fragment_id)
- output_file = open(output_filename, 'w')
- output_file.write("""
- <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
- <html><head>
- <title>bookfragments output</title>
- <meta http-equiv="content-type" content="text/html;charset=utf-8"/>
- <link rel="stylesheet" href="master.css" type="text/css" media="screen" charset="utf-8" />
- </head>
- <body>""")
- for fragment in closed_fragments.values():
- html = u'<div class="fragment"><h3>[#%s] %s</h3>%s</div>' % (fragment.id, fragment.themes, fragment)
- output_file.write(html.encode('utf-8'))
- output_file.write('</body></html>')
- output_file.close()
+ output_file = open(output_filename, 'w')
+ output_file.write("""
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
+ <html><head>
+ <title>bookfragments output</title>
+ <meta http-equiv="content-type" content="text/html;charset=utf-8"/>
+ <link rel="stylesheet" href="master.css" type="text/css" media="screen" charset="utf-8" />
+ </head>
+ <body>""")
+ for fragment in closed_fragments.values():
+ html = u'<div class="fragment"><h3>[#%s] %s</h3>%s</div>' % (fragment.id, fragment.themes, fragment)
+ output_file.write(html.encode('utf-8'))
+ output_file.write('</body></html>')
+ output_file.close()