Added caching of latest blog posts fetched from wolnepodreczniki blog.
[wolnelektury.git] / bin / bookfragments.py
index 73d2711..ea2b0b7 100755 (executable)
@@ -7,22 +7,6 @@ import copy
 from lxml import etree
 
 
-# Parse args
-usage = """Usage: %prog [options] SOURCE [SOURCE...]
-Extract theme fragments from SOURCE."""
-
-parser = optparse.OptionParser(usage=usage)
-
-parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False,
-    help='print status messages to stdout')
-
-options, input_filenames = parser.parse_args()
-
-if len(input_filenames) < 1:
-    parser.print_help()
-    exit(1)
-
-
 class Fragment(object):
     def __init__(self, id, themes):
         super(Fragment, self).__init__()
@@ -67,19 +51,12 @@ class Fragment(object):
         return self.to_string()
 
 
-# Do some real work
-for input_filename in input_filenames:
-    if options.verbose:
-        print input_filename
-    
-    output_filename = os.path.splitext(input_filename)[0] + '.fragments.html'
-    
+def extract_fragments(input_filename):
+    """Extracts theme fragments from input_filename."""
     open_fragments = {}
     closed_fragments = {}
-    lost_text = []
     
     for event, element in etree.iterparse(input_filename, events=('start', 'end')):
-        
         # Process begin and end elements
         if element.tag == 'span' and element.get('class', '') in ('theme-begin', 'theme-end'):
             if not event == 'end': continue # Process elements only once, on end event
@@ -126,23 +103,50 @@ for input_filename in input_filenames:
             else:
                 for fragment_id in open_fragments:
                     open_fragments[fragment_id].append(event, copy.copy(element))
+        
+    return closed_fragments, open_fragments
+
 
+if __name__ == '__main__':
+    # Parse commandline arguments
+    usage = """Usage: %prog [options] SOURCE [SOURCE...]
+    Extract theme fragments from SOURCE."""
+    
+    parser = optparse.OptionParser(usage=usage)
+    
+    parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False,
+        help='print status messages to stdout')
+    
+    options, input_filenames = parser.parse_args()
+    
+    if len(input_filenames) < 1:
+        parser.print_help()
+        exit(1)
+    
+    # Do some real work
+    for input_filename in input_filenames:
+        if options.verbose:
+            print input_filename
+    
+        output_filename = os.path.splitext(input_filename)[0] + '.fragments.html'
+    
+        closed_fragments, open_fragments = extract_fragments(input_filename)
 
-    for fragment_id in open_fragments:
-        print '%s:warning:unclosed fragment #%s' % (input_filename, fragment_id)
+        for fragment_id in open_fragments:
+            print '%s:warning:unclosed fragment #%s' % (input_filename, fragment_id)
 
-    output_file = open(output_filename, 'w')
-    output_file.write("""
-        <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
-        <html><head>
-            <title>bookfragments output</title>
-            <meta http-equiv="content-type" content="text/html;charset=utf-8"/>
-            <link rel="stylesheet" href="master.css" type="text/css" media="screen" charset="utf-8" />
-        </head>
-        <body>""")
-    for fragment in closed_fragments.values():
-        html = u'<div class="fragment"><h3>[#%s] %s</h3>%s</div>' % (fragment.id, fragment.themes, fragment)
-        output_file.write(html.encode('utf-8'))
-    output_file.write('</body></html>')
-    output_file.close()
+        output_file = open(output_filename, 'w')
+        output_file.write("""
+            <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
+            <html><head>
+                <title>bookfragments output</title>
+                <meta http-equiv="content-type" content="text/html;charset=utf-8"/>
+                <link rel="stylesheet" href="master.css" type="text/css" media="screen" charset="utf-8" />
+            </head>
+            <body>""")
+        for fragment in closed_fragments.values():
+            html = u'<div class="fragment"><h3>[#%s] %s</h3>%s</div>' % (fragment.id, fragment.themes, fragment)
+            output_file.write(html.encode('utf-8'))
+        output_file.write('</body></html>')
+        output_file.close()