move stats to L2

author Radek Czajka <rczajka@rczajka.pl>

Thu, 10 Aug 2023 16:21:01 +0000 (18:21 +0200)

committer Radek Czajka <rczajka@rczajka.pl>

Thu, 10 Aug 2023 20:35:19 +0000 (22:35 +0200)
author Radek Czajka <rczajka@rczajka.pl>
Thu, 10 Aug 2023 16:21:01 +0000 (18:21 +0200)
committer Radek Czajka <rczajka@rczajka.pl>
Thu, 10 Aug 2023 20:35:19 +0000 (22:35 +0200)
diff --git a/CHANGELOG.md b/CHANGELOG.md

index a36fd2f..a1cff15 100644 (file)
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,55 @@
  
  This document records all notable changes to Librarian.
  
  
  This document records all notable changes to Librarian.
  
+## 23.08
+
+- Move statistics counter to L2 WLgit lDocument.
+
+## 23.07.1
+
+- Add <category.thema.main>.
+- Support Python 3.7--3.11.
+
+## 2.6.1
+
+- Fix for better ignoring <extra>.
+
+## 2.6
+
+- Change default cover to marquise.
+- Add support for full predesigned covers to marquise.
+- Remove support for changing actual cover class via coverClass.
+
+## 2.5.1
+
+- Bugfix release.
+
+## 2.5
+
+- Add html-snippet builder.
+- Remove DateValue class.
+- Fix some texts and tests.
+- Drop Python < 3.6. Up to 3.9 is supported.
+
+## 2.4.13
+
+- Added thema meta field.
+
+## 2.4.12
+
+- Fix for marquise cover: allow scaling title text in all layouts.
+
+## 2.4.11.1
+
+- Added assigning and preserving id attribute .
+
+## 2.4.10
+
+- Added <wers_srodek>, <tab>, <rownlolegle> and <blok>.
+
+## 2.4.9
+
+- Added verse counters to document statistics.
  
  ## 2.4.8 (2022-07-23)
  
  
  ## 2.4.8 (2022-07-23)
  
diff --git a/setup.py b/setup.py

index b4eb161..0f7be1b 100755 (executable)
--- a/setup.py
+++ b/setup.py
@@ -22,7 +22,7 @@ def whole_tree(prefix, path):
  
  setup(
      name='librarian',
  
  setup(
      name='librarian',
-    version='23.07.1',
+    version='23.8',
      description='Converter from WolneLektury.pl XML-based language to XHTML, TXT and other formats',
      author="Marek Stępniowski",
      author_email='marek@stepniowski.com',
      description='Converter from WolneLektury.pl XML-based language to XHTML, TXT and other formats',
      author="Marek Stępniowski",
      author_email='marek@stepniowski.com',
diff --git a/src/librarian/document.py b/src/librarian/document.py

index aa6f37f..2e9a4a5 100644 (file)
--- a/src/librarian/document.py
+++ b/src/librarian/document.py
@@ -1,13 +1,14 @@
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Wolne Lektury. See NOTICE for more information.
  #
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Wolne Lektury. See NOTICE for more information.
  #
+from collections import Counter
  import gettext
  import os
  import re
  import urllib.request
  from lxml import etree
  from .parser import parser
  import gettext
  import os
  import re
  import urllib.request
  from lxml import etree
  from .parser import parser
-from . import dcparser, DCNS, DirDocProvider
+from . import dcparser, DCNS, RDFNS, DirDocProvider
  from .functions import lang_code_3to2
  
  
  from .functions import lang_code_3to2
  
  
@@ -36,8 +37,12 @@ class WLDocument:
      def children(self):
          for part_uri in self.meta.parts or []:
              with self.provider.by_slug(part_uri.slug) as f:
      def children(self):
          for part_uri in self.meta.parts or []:
              with self.provider.by_slug(part_uri.slug) as f:
-                yield type(self)(filename=f, provider=self.provider)
-    
+                try:
+                    yield type(self)(filename=f, provider=self.provider)
+                except Exception as e:
+
+                    yield e
+
      def build(self, builder, base_url=None, **kwargs):
          return builder(base_url=base_url).build(self, **kwargs)
  
      def build(self, builder, base_url=None, **kwargs):
          return builder(base_url=base_url).build(self, **kwargs)
  
@@ -67,7 +72,7 @@ class WLDocument:
                  i += 1
              item.attrib['id'] = f'e{i}'
              i += 1
                  i += 1
              item.attrib['id'] = f'e{i}'
              i += 1
-    
+
      def _compat_assign_ordered_ids(self):
          """
          Compatibility: ids in document order, to be roughly compatible with legacy
      def _compat_assign_ordered_ids(self):
          """
          Compatibility: ids in document order, to be roughly compatible with legacy
@@ -121,3 +126,64 @@ class WLDocument:
  
      def references(self):
          return self.tree.findall('.//ref')
  
      def references(self):
          return self.tree.findall('.//ref')
+
+    def get_statistics(self):
+        def count_text(text, counter, in_fn=False, stanza=False):
+            if text:
+                text = re.sub(r'\s+', ' ', text)
+
+                chars = len(text) if text.strip() else 0
+                words = len(text.split()) if text.strip() else 0
+
+                counter['chars_with_fn'] += chars
+                counter['words_with_fn'] += words
+                if not in_fn:
+                    counter['chars'] += chars
+                    counter['words'] += words
+                if not stanza:
+                    counter['chars_out_verse_with_fn'] += chars
+                    if not in_fn:
+                        counter['chars_out_verse'] += chars
+
+        def count(elem, counter, in_fn=False, stanza=False):
+            if elem.tag in (RDFNS('RDF'), 'nota_red', 'abstrakt', 'uwaga', 'ekstra'):
+                return
+            if not in_fn and elem.tag in ('pa', 'pe', 'pr', 'pt', 'motyw'):
+                in_fn = True
+            if elem.tag == 'strofa':
+                # count verses now
+                #verses = len(elem.findall('.//br')) + 1
+                verses = list(elem.get_verses())
+                counter['verses_with_fn'] += len(verses)
+                if not in_fn:
+                    counter['verses'] += len(verses)
+                stanza = True
+
+                for child in verses:
+                    count(child, counter, in_fn=in_fn, stanza=True)
+            else:
+                count_text(elem.text, counter, in_fn=in_fn, stanza=stanza)
+                for child in elem:
+                    count(child, counter, in_fn=in_fn, stanza=stanza)
+                    count_text(child.tail, counter, in_fn=in_fn, stanza=stanza)
+
+        data = {
+            "self": Counter(),
+            "parts": [],
+            "total": {
+            }
+        }
+
+        count(self.tree.getroot(), data['self'])
+        for k, v in data['self'].items():
+            data['total'][k] = v
+
+        for part in self.children:
+            if isinstance(part, Exception):
+                data['parts'].append((None, {'error': part}))
+            else:
+                data['parts'].append((part, part.get_statistics()))
+                for k, v in data['parts'][-1][1]['total'].items():
+                    data['total'][k] = data['total'].get(k, 0) + v
+
+        return data
diff --git a/src/librarian/parser.py b/src/librarian/parser.py

index 484b8f9..b4e4c5c 100644 (file)
--- a/src/librarian/parser.py
+++ b/src/librarian/parser.py
@@ -67,64 +67,6 @@ class WLDocument:
          else:
              self.book_info = None
  
          else:
              self.book_info = None
  
-    def get_statistics(self):
-        def count_text(text, counter, in_fn=False, stanza=False):
-            if text:
-                text = re.sub(r'\s+', ' ', text)
-
-                chars = len(text) if text.strip() else 0
-                words = len(text.split()) if text.strip() else 0
-                
-                counter['chars_with_fn'] += chars
-                counter['words_with_fn'] += words
-                if not in_fn:
-                    counter['chars'] += chars
-                    counter['words'] += words
-                if not stanza:
-                    counter['chars_out_verse_with_fn'] += chars
-                    if not in_fn:
-                        counter['chars_out_verse'] += chars
-                
-        def count(elem, counter, in_fn=False, stanza=False):
-            if elem.tag in (RDFNS('RDF'), 'nota_red', 'abstrakt', 'uwaga', 'ekstra'):
-                return
-            if not in_fn and elem.tag in ('pa', 'pe', 'pr', 'pt', 'motyw'):
-                in_fn = True
-            if elem.tag == 'strofa':
-                # count verses now
-                verses = len(elem.findall('.//br')) + 1
-                counter['verses_with_fn'] += verses
-                if not in_fn:
-                    counter['verses'] += verses
-                stanza = True
-            count_text(elem.text, counter, in_fn=in_fn, stanza=stanza)
-            for child in elem:
-                count(child, counter, in_fn=in_fn, stanza=stanza)
-                count_text(child.tail, counter, in_fn=in_fn, stanza=stanza)
-
-        self.swap_endlines()
-
-        data = {
-            "self": Counter(),
-            "parts": [],
-            "total": {
-            }
-        }
-
-        count(self.edoc.getroot(), data['self'])
-        for k, v in data['self'].items():
-            data['total'][k] = v
-        
-        for part in self.parts(pass_part_errors=True):
-            if isinstance(part, Exception):
-                data['parts'].append((None, {}))
-            else:
-                data['parts'].append((part, part.get_statistics()))
-                for k, v in data['parts'][-1][1]['total'].items():
-                    data['total'][k] = data['total'].get(k, 0) + v
-            
-        return data
-
      @classmethod
      def from_bytes(cls, xml, *args, **kwargs):
          return cls.from_file(io.BytesIO(xml), *args, **kwargs)
      @classmethod
      def from_bytes(cls, xml, *args, **kwargs):
          return cls.from_file(io.BytesIO(xml), *args, **kwargs)
author	Radek Czajka <rczajka@rczajka.pl>
	Thu, 10 Aug 2023 16:21:01 +0000 (18:21 +0200)
committer	Radek Czajka <rczajka@rczajka.pl>
	Thu, 10 Aug 2023 20:35:19 +0000 (22:35 +0200)
CHANGELOG.md		patch \| blob \| history
setup.py		patch \| blob \| history
src/librarian/document.py		patch \| blob \| history
src/librarian/parser.py		patch \| blob \| history