X-Git-Url: https://git.mdrn.pl/wolnelektury.git/blobdiff_plain/a9925ebffe713307d6bedecd670df1490aa206c8..ddf2102eff7ea420a4ea5144c43409587fc1156e:/src/catalogue/models/book.py diff --git a/src/catalogue/models/book.py b/src/catalogue/models/book.py index f002158a3..3eb6023d0 100644 --- a/src/catalogue/models/book.py +++ b/src/catalogue/models/book.py @@ -20,6 +20,7 @@ from fnpdjango.storage import BofhFileSystemStorage from lxml import html from librarian.cover import WLCover from librarian.html import transform_abstrakt +from librarian.builders import builders from newtagging import managers from catalogue import constants from catalogue import fields @@ -414,6 +415,21 @@ class Book(models.Model): has_daisy_file.short_description = 'DAISY' has_daisy_file.boolean = True + def has_sync_file(self): + return self.has_media("sync") + + def get_sync(self): + with self.get_media('sync').first().file.open('r') as f: + sync = f.read().split('\n') + offset = float(sync[0]) + items = [] + for line in sync[1:]: + if not line: + continue + start, end, elid = line.split() + items.append([elid, float(start) + offset]) + return json.dumps(items) + def has_audio_epub_file(self): return self.has_media("audio.epub") @@ -514,21 +530,11 @@ class Book(models.Model): }) return create_zip(paths, "%s_%s" % (self.slug, format_), {'informacje.txt': readme}) - def search_index(self, book_info=None, index=None, index_tags=True, commit=True): + def search_index(self, index=None): if not self.findable: return - if index is None: - from search.index import Index - index = Index() - try: - index.index_book(self, book_info) - if index_tags: - index.index_tags() - if commit: - index.index.commit() - except Exception as e: - index.index.rollback() - raise e + from search.index import Index + Index.index_book(self) # will make problems in conjunction with paid previews def download_pictures(self, remote_gallery_url): @@ -588,7 +594,7 @@ class Book(models.Model): @classmethod def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True, - search_index_tags=True, remote_gallery_url=None, days=0, findable=True): + remote_gallery_url=None, days=0, findable=True): from catalogue import tasks if dont_build is None: @@ -697,7 +703,7 @@ class Book(models.Model): getattr(book, '%s_file' % format_).build_delay() if not settings.NO_SEARCH_INDEX and search_index and findable: - tasks.index_book.delay(book.id, book_info=book_info, index_tags=search_index_tags) + tasks.index_book.delay(book.id) for child in notify_cover_changed: child.parent_cover_changed() @@ -708,45 +714,42 @@ class Book(models.Model): cls.published.send(sender=cls, instance=book) return book - def get_master(self): - master_tags = [ - 'opowiadanie', - 'powiesc', - 'dramat_wierszowany_l', - 'dramat_wierszowany_lp', - 'dramat_wspolczesny', 'liryka_l', 'liryka_lp', - 'wywiad', - ] - from librarian.parser import WLDocument - wld = WLDocument.from_file(self.xml_file.path, parse_dublincore=False) - root = wld.edoc.getroot() - for master in root.iter(): - if master.tag in master_tags: - return master - def update_references(self): - from references.models import Entity, Reference - master = self.get_master() - if master is None: - master = [] - found = set() - for i, sec in enumerate(master): - for ref in sec.findall('.//ref'): - href = ref.attrib.get('href', '') - if not href or href in found: - continue - found.add(href) - entity, created = Entity.objects.get_or_create( - uri=href - ) - ref, created = Reference.objects.get_or_create( - book=self, - entity=entity - ) - ref.first_section = 'sec%d' % (i + 1) - entity.populate() - entity.save() - Reference.objects.filter(book=self).exclude(entity__uri__in=found).delete() + Entity = apps.get_model('references', 'Entity') + doc = self.wldocument2() + doc._compat_assign_section_ids() + doc._compat_assign_ordered_ids() + refs = {} + for ref_elem in doc.references(): + uri = ref_elem.attrib.get('href', '') + if not uri: + continue + if uri in refs: + ref = refs[uri] + else: + entity, entity_created = Entity.objects.get_or_create(uri=uri) + if entity_created: + entity.populate() + entity.save() + ref, ref_created = entity.reference_set.get_or_create(book=self) + refs[uri] = ref + if not ref_created: + ref.occurence_set.all().delete() + sec = ref_elem.get_link() + m = re.match(r'sec(\d+)', sec) + assert m is not None + sec = int(m.group(1)) + snippet = ref_elem.get_snippet() + b = builders['html-snippet']() + for s in snippet: + s.html_build(b) + html = b.output().get_bytes().decode('utf-8') + + ref.occurence_set.create( + section=sec, + html=html + ) + self.reference_set.exclude(entity__uri__in=refs).delete() @property def references(self):