Cutting stuff out.
[wolnelektury.git] / src / catalogue / models / book.py
index 85cfd63..3eb6023 100644 (file)
@@ -20,6 +20,7 @@ from fnpdjango.storage import BofhFileSystemStorage
 from lxml import html
 from librarian.cover import WLCover
 from librarian.html import transform_abstrakt
+from librarian.builders import builders
 from newtagging import managers
 from catalogue import constants
 from catalogue import fields
@@ -529,21 +530,11 @@ class Book(models.Model):
         })
         return create_zip(paths, "%s_%s" % (self.slug, format_), {'informacje.txt': readme})
 
-    def search_index(self, book_info=None, index=None, index_tags=True, commit=True):
+    def search_index(self, index=None):
         if not self.findable:
             return
-        if index is None:
-            from search.index import Index
-            index = Index()
-        try:
-            index.index_book(self, book_info)
-            if index_tags:
-                index.index_tags()
-            if commit:
-                index.index.commit()
-        except Exception as e:
-            index.index.rollback()
-            raise e
+        from search.index import Index
+        Index.index_book(self)
 
     # will make problems in conjunction with paid previews
     def download_pictures(self, remote_gallery_url):
@@ -603,7 +594,7 @@ class Book(models.Model):
 
     @classmethod
     def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
-                           search_index_tags=True, remote_gallery_url=None, days=0, findable=True):
+                           remote_gallery_url=None, days=0, findable=True):
         from catalogue import tasks
 
         if dont_build is None:
@@ -712,7 +703,7 @@ class Book(models.Model):
                 getattr(book, '%s_file' % format_).build_delay()
 
         if not settings.NO_SEARCH_INDEX and search_index and findable:
-            tasks.index_book.delay(book.id, book_info=book_info, index_tags=search_index_tags)
+            tasks.index_book.delay(book.id)
 
         for child in notify_cover_changed:
             child.parent_cover_changed()
@@ -723,45 +714,42 @@ class Book(models.Model):
         cls.published.send(sender=cls, instance=book)
         return book
 
-    def get_master(self):
-        master_tags = [
-            'opowiadanie',
-            'powiesc',
-            'dramat_wierszowany_l',
-            'dramat_wierszowany_lp',
-            'dramat_wspolczesny', 'liryka_l', 'liryka_lp',
-            'wywiad',
-        ]
-        from librarian.parser import WLDocument
-        wld = WLDocument.from_file(self.xml_file.path, parse_dublincore=False)
-        root = wld.edoc.getroot()
-        for master in root.iter():
-            if master.tag in master_tags:
-                return master
-
     def update_references(self):
-        from references.models import Entity, Reference
-        master = self.get_master()
-        if master is None:
-            master = []
-        found = set()
-        for i, sec in enumerate(master):
-            for ref in sec.findall('.//ref'):
-                href = ref.attrib.get('href', '')
-                if not href or href in found:
-                    continue
-                found.add(href)
-                entity, created = Entity.objects.get_or_create(
-                    uri=href
-                )
-                ref, created = Reference.objects.get_or_create(
-                    book=self,
-                    entity=entity
-                )
-                ref.first_section = 'sec%d' % (i + 1)
-                entity.populate()
-                entity.save()
-        Reference.objects.filter(book=self).exclude(entity__uri__in=found).delete()
+        Entity = apps.get_model('references', 'Entity')
+        doc = self.wldocument2()
+        doc._compat_assign_section_ids()
+        doc._compat_assign_ordered_ids()
+        refs = {}
+        for ref_elem in doc.references():
+            uri = ref_elem.attrib.get('href', '')
+            if not uri:
+                continue
+            if uri in refs:
+                ref = refs[uri]
+            else:
+                entity, entity_created = Entity.objects.get_or_create(uri=uri)
+                if entity_created:
+                    entity.populate()
+                    entity.save()
+                ref, ref_created = entity.reference_set.get_or_create(book=self)
+                refs[uri] = ref
+                if not ref_created:
+                    ref.occurence_set.all().delete()
+            sec = ref_elem.get_link()
+            m = re.match(r'sec(\d+)', sec)
+            assert m is not None
+            sec = int(m.group(1))
+            snippet = ref_elem.get_snippet()
+            b = builders['html-snippet']()
+            for s in snippet:
+                s.html_build(b)
+            html = b.output().get_bytes().decode('utf-8')
+
+            ref.occurence_set.create(
+                section=sec,
+                html=html
+            )
+        self.reference_set.exclude(entity__uri__in=refs).delete()
 
     @property
     def references(self):