sorl-thumbnail==12.8.0
# home-brewed & dependencies
-librarian==24.5
+librarian==24.9
# celery tasks
celery[redis]==5.2.7
class TxtField(EbookField):
ext = 'txt'
for_parents = False
+ librarian2_api = True
@staticmethod
def transform(wldoc, book):
- return wldoc.as_text()
+ from librarian.builders.txt import TxtBuilder
+ return TxtBuilder().build(wldoc)
class Fb2Field(EbookField):
class HtmlField(EbookField):
ext = 'html'
for_parents = False
+ librarian2_api = True
def build(self, fieldfile):
from django.core.files.base import ContentFile
book = fieldfile.instance
- html_output = self.transform(book.wldocument(parse_dublincore=False), book)
+ html_output = self.transform(book.wldocument2(), book)
# Delete old fragments, create from scratch if necessary.
book.fragments.all().delete()
@staticmethod
def transform(wldoc, book):
- # ugly, but we can't use wldoc.book_info here
- from librarian import DCNS
- url_elem = wldoc.edoc.getroot().find('.//' + DCNS('identifier.url'))
- if url_elem is None:
+ from librarian.builders.html import HtmlBuilder
+ url = wldoc.meta.url
+ if not url:
gal_url = ''
gal_path = ''
else:
- slug = url_elem.text.rstrip('/').rsplit('/', 1)[1]
- gal_url = gallery_url(slug=slug)
- gal_path = gallery_path(slug=slug)
- return wldoc.as_html(gallery_path=gal_path, gallery_url=gal_url, base_url=absolute_url(gal_url))
+ gal_url = gallery_url(slug=url.slug)
+ gal_path = gallery_path(slug=url.slug)
+ return HtmlBuilder(gallery_path=gal_path, gallery_url=gal_url, base_url=absolute_url(gal_url)).build(wldoc)
class CoverField(EbookField):
+++ /dev/null
-# This file is part of Wolne Lektury, licensed under GNU Affero GPLv3 or later.
-# Copyright © Fundacja Wolne Lektury. See NOTICE for more information.
-#
-from django.core.management.base import BaseCommand
-
-from catalogue.models import Book
-
-
-class Command(BaseCommand):
- def handle(self, *args, **options):
- for b in Book.objects.order_by('slug'):
- print(b.slug)
- b.load_abstract()
- b.save()
+++ /dev/null
-# This file is part of Wolne Lektury, licensed under GNU Affero GPLv3 or later.
-# Copyright © Fundacja Wolne Lektury. See NOTICE for more information.
-#
-from django.core.management import BaseCommand
-from catalogue.models import Tag
-
-
-class Command(BaseCommand):
- help = "Update description for given tag."
-
- def add_arguments(self, parser):
- parser.add_argument('category')
- parser.add_argument('slug')
- parser.add_argument('description_filename')
-
- def handle(self, category, slug, description_filename, **options):
- tag = Tag.objects.get(category=category, slug=slug)
- description = open(description_filename).read().decode('utf-8')
- tag.description = description
- tag.save()
--- /dev/null
+# Generated by Django 4.0.8 on 2024-09-17 14:14
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('catalogue', '0048_remove_collection_kind_remove_tag_for_books_and_more'),
+ ]
+
+ operations = [
+ migrations.AddField(
+ model_name='snippet',
+ name='anchor',
+ field=models.CharField(default='', max_length=64),
+ preserve_default=False,
+ ),
+ ]
from fnpdjango.storage import BofhFileSystemStorage
from lxml import html
from librarian.cover import WLCover
-from librarian.html import transform_abstrakt
+from librarian.builders.html import AbstraktHtmlBuilder
from librarian.builders import builders
from newtagging import managers
from catalogue import constants
return int(total)
def get_time(self):
- return round(self.xml_file.size / 1000 * 40)
+ try:
+ return round(self.xml_file.size / 1000 * 40)
+ except ValueError:
+ return 0
def has_media(self, type_):
if type_ in Book.formats:
urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
def load_abstract(self):
- abstract = self.wldocument(parse_dublincore=False).edoc.getroot().find('.//abstrakt')
- if abstract is not None:
- self.abstract = transform_abstrakt(abstract)
- else:
- self.abstract = ''
+ self.abstract = AbstraktHtmlBuilder().build(
+ self.wldocument2()).get_bytes().decode('utf-8')
def load_toc(self):
self.toc = ''
cls.published.send(sender=cls, instance=book)
return book
+ # TODO TEST
def update_references(self):
Entity = apps.get_model('references', 'Entity')
doc = self.wldocument2()
- doc._compat_assign_section_ids()
- doc._compat_assign_ordered_ids()
+ doc.assign_ids()
+
refs = {}
- for ref_elem in doc.references():
+ for i, ref_elem in enumerate(doc.references()):
uri = ref_elem.attrib.get('href', '')
if not uri:
continue
refs[uri] = ref
if not ref_created:
ref.occurence_set.all().delete()
- sec = ref_elem.get_link()
- m = re.match(r'sec(\d+)', sec)
- assert m is not None
- sec = int(m.group(1))
+ anchor = ref_elem.get_link()
+
snippet = ref_elem.get_snippet()
b = builders['html-snippet']()
for s in snippet:
html = b.output().get_bytes().decode('utf-8')
ref.occurence_set.create(
- section=sec,
+ section=i,
+ anchor=anchor,
html=html
)
self.reference_set.exclude(entity__uri__in=refs).delete()
class Snippet(models.Model):
book = models.ForeignKey('Book', models.CASCADE)
sec = models.IntegerField()
+ anchor = models.CharField(max_length=64)
text = models.TextField()
search_vector = SearchVectorField()
@register.simple_tag
def related_books_2022(book=None, limit=4, taken=0):
limit -= taken
+ if limit < 0:
+ return []
max_books = limit
books_qs = Book.objects.filter(findable=True)
--- /dev/null
+# Generated by Django 4.0.8 on 2024-09-18 11:29
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('references', '0002_remove_reference_first_section_occurence'),
+ ]
+
+ operations = [
+ migrations.AddField(
+ model_name='occurence',
+ name='anchor',
+ field=models.CharField(default='', max_length=64),
+ preserve_default=False,
+ ),
+ ]
--- /dev/null
+# Generated by Django 4.0.8 on 2024-09-18 11:29
+
+from django.db import migrations, models
+from django.db.models.functions import Concat
+
+
+def update_anchor(apps, schema_editor):
+ Occurence = apps.get_model('references', 'Occurence')
+ Occurence.objects.filter(anchor='').update(
+ anchor=Concat(models.Value('sec'), 'section')
+ )
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('references', '0003_occurence_anchor'),
+ ]
+
+ operations = [
+ migrations.RunPython(update_anchor, migrations.RunPython.noop)
+ ]
class Occurence(models.Model):
reference = models.ForeignKey(Reference, models.CASCADE)
section = models.IntegerField()
+ anchor = models.CharField(max_length=64)
html = models.TextField()
class Meta:
</div>
{% for occ in ref.occurence_set.all %}
- <a target="_blank" href="/katalog/lektura/{{ ref.book.slug }}.html#sec{{ occ.section }}" class="c-search-result-fragment-text">
+ <a target="_blank" href="/katalog/lektura/{{ ref.book.slug }}.html#{{ occ.anchor }}" class="c-search-result-fragment-text">
{{ occ.html|safe }}
</a>
{% endfor %}
# Copyright © Fundacja Wolne Lektury. See NOTICE for more information.
#
import re
-from librarian.parser import WLDocument
+from librarian.elements.base import WLElement
+from librarian.document import WLDocument
from lxml import etree
skip_header_tags = ['autor_utworu', 'nazwa_utworu', 'dzielo_nadrzedne',
'{http://www.w3.org/1999/02/22-rdf-syntax-ns#}RDF']
- @classmethod
- def get_master(cls, root):
- """
- Returns the first master tag from an etree.
- """
- for master in root.iter():
- if master.tag in cls.master_tags:
- return master
-
@staticmethod
- def add_snippet(book, text, position):
+ def add_snippet(book, text, position, anchor):
book.snippet_set.create(
sec=position + 1,
- text=text
+ text=text,
+ anchor=anchor
)
+ # TODO: The section links stuff won't work.
@classmethod
def index_book(cls, book):
"""
book.snippet_set.all().delete()
- wld = WLDocument.from_file(book.xml_file.path, parse_dublincore=False)
- root = wld.edoc.getroot()
+ wld = WLDocument(filename=book.xml_file.path)
+ wld.assign_ids()
- master = cls.get_master(root)
+ master = wld.tree.getroot().master
if master is None:
return []
+ def get_indexable(element):
+ for child in element:
+ if not isinstance(child, WLElement):
+ continue
+ if not child.attrib.get('_id'):
+ for e in get_indexable(child):
+ yield e
+ else:
+ yield child
+
def walker(node):
if node.tag not in cls.ignore_content_tags:
yield node, None, None
return re.sub("(?m)/$", "", text)
- for position, header in enumerate(master):
+ for position, header in enumerate(get_indexable(master)):
if header.tag in cls.skip_header_tags:
continue
if header.tag is etree.Comment:
continue
+ el_id = header.attrib['_id']
+
# section content
content = []
footnote = []
handle_text.append(collect_footnote)
elif end is not None and footnote is not [] and end.tag in cls.footnote_tags:
handle_text.pop()
- cls.add_snippet(book, ''.join(footnote), position)
+ cls.add_snippet(book, ''.join(footnote), position, el_id)
footnote = []
if text is not None and handle_text is not []:
hdl(text)
# in the end, add a section text.
- cls.add_snippet(book, fix_format(content), position)
+ cls.add_snippet(book, fix_format(content), position, el_id)
{{ book.title }}
</a>
{% for f in snippets %}
- <a class="c-search-result-fragment-text" href='{% url 'book_text' f.book.slug %}#sec{{ f.sec }}'>
+ <a class="c-search-result-fragment-text" href='{% url 'book_text' f.book.slug %}#{{ f.anchor }}'>
{{ f.headline|safe }}
</a>
{% endfor %}