--- /dev/null
+import sys
+import docx
+from lxml import etree
+
+
+DEBUG = False
+
+DC = "{http://purl.org/dc/elements/1.1/}"
+RDF = "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}"
+
+ABOUT = "http://redakcja.wolnelektury.pl/documents/book/test-icm/"
+WLURI = "http://wolnelektury.pl/katalog/lektura/test-icm/"
+
+
+
+META_STYLES = {
+ "Author": DC + "creator",
+ "Title": DC + "title",
+ "Publisher": DC + "publisher",
+ "Year": DC + "date",
+ "Editor": DC + "contributor.editor",
+ "Copyright holder": DC + "rights",
+}
+
+
+P_STYLES = {
+ "Normal": "akap",
+ "Autor": "autor_utworu",
+ "Title": "nazwa_utworu",
+ "Subtitle": "podtytul",
+ "Heading 1": "naglowek_czesc",
+ "Heading 2": "naglowek_rozdzial",
+ "Heading 3": "naglowek_podrozdzial",
+ "Heading 4": "srodtytul",
+ "Heading 5": "srodtytul",
+
+}
+
+
+def wyroznienie(r):
+ if r.font.italic is not None or r.font.bold is not None or r.font.underline is not None: return r.font.italic or r.font.bold or r.font.underline
+ if r.style.font.italic is not None or r.style.font.bold is not None or r.style.font.underline is not None: return r.style.font.italic or r.style.font.bold or r.style.font.underline
+ return False
+
+
+def xml_from_docx(f):
+ d = docx.Document(f)
+
+ t = etree.Element("utwor")
+ rdf = etree.SubElement(t, RDF + "RDF")
+ meta = etree.SubElement(rdf, RDF + "Description")
+ meta.attrib[RDF + "about"] = ABOUT
+
+ etree.SubElement(meta, DC + "language").text = "pol"
+ etree.SubElement(meta, DC + "identifier.url").text = WLURI
+
+ m = etree.SubElement(t, "powiesc")
+ md = {}
+
+ for p in d.paragraphs:
+ can_ignore = False
+ if p.style.name == 'Title':
+ md['title'] = p.text
+ if p.style.name in META_STYLES:
+ item = etree.SubElement(meta, META_STYLES[p.style.name])
+ item.text = p.text
+ can_ignore = True
+ if p.style.name not in P_STYLES and not can_ignore:
+ print(p.style.name, file=sys.stderr)
+ if p.style.name in P_STYLES or not can_ignore:
+ tag = P_STYLES.get(p.style.name, "akap")
+ a = etree.SubElement(m, tag)
+
+ for r in p.runs:
+ if wyroznienie(r):
+ etree.SubElement(a, "wyroznienie").text = r.text
+ else:
+ if len(a):
+ a[-1].tail = (a[-1].tail or '') + r.text
+ else:
+ a.text = (a.text or '') + r.text
+
+ if DEBUG and p.style.name not in P_STYLES:
+ a.text += f" [{p.style.name}]"
+
+ return etree.tostring(t, pretty_print=True, encoding='unicode'), md
from django import forms
from django.utils.translation import ugettext_lazy as _
from django.conf import settings
-
+from slugify import slugify
from .constants import MASTERS
from .models import Book, Chunk, Image, User
+from .docx import xml_from_docx
class DocumentCreateForm(forms.ModelForm):
"""
"""
file = forms.FileField(required=False)
text = forms.CharField(required=False, widget=forms.Textarea)
+ docx = forms.FileField(required=False)
class Meta:
model = Book
def __init__(self, *args, **kwargs):
super(DocumentCreateForm, self).__init__(*args, **kwargs)
self.fields['slug'].widget.attrs={'class': 'autoslug'}
+ self.fields['slug'].required = False
self.fields['gallery'].widget.attrs={'class': 'autoslug'}
self.fields['title'].widget.attrs={'class': 'autoslug-source'}
+ self.fields['title'].required = False
def clean(self):
super(DocumentCreateForm, self).clean()
except UnicodeDecodeError:
raise forms.ValidationError(_("Text file must be UTF-8 encoded."))
+ docx = self.cleaned_data['docx']
+ if docx is not None:
+ try:
+ text, meta = xml_from_docx(docx)
+ except Exception as e:
+ raise forms.ValidationError(e)
+ else:
+ self.cleaned_data['text'] = text
+ if not self.cleaned_data['title']:
+ self.cleaned_data['title'] = meta.get('title', '')
+ if not self.cleaned_data['slug']:
+ self.cleaned_data['slug'] = slugify(meta.get('title', ''))
+
+ if not self.cleaned_data["title"]:
+ self._errors["title"] = self.error_class([_("Title not set")])
+
+ if not self.cleaned_data["slug"]:
+ self._errors["slug"] = self.error_class([_("Slug not set")])
+
if not self.cleaned_data["text"]:
- self._errors["file"] = self.error_class([_("You must either enter text or upload a file")])
+ self._errors["text"] = self.error_class([_("You must either enter text or upload a file")])
return self.cleaned_data