from lxml import html
from librarian.cover import WLCover
from librarian.html import transform_abstrakt
+from librarian.builders import builders
from newtagging import managers
from catalogue import constants
from catalogue import fields
})
return create_zip(paths, "%s_%s" % (self.slug, format_), {'informacje.txt': readme})
- def search_index(self, book_info=None, index=None, index_tags=True, commit=True):
+ def search_index(self, index=None):
if not self.findable:
return
- if index is None:
- from search.index import Index
- index = Index()
- try:
- index.index_book(self, book_info)
- if index_tags:
- index.index_tags()
- if commit:
- index.index.commit()
- except Exception as e:
- index.index.rollback()
- raise e
+ from search.index import Index
+ Index.index_book(self)
# will make problems in conjunction with paid previews
def download_pictures(self, remote_gallery_url):
@classmethod
def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
- search_index_tags=True, remote_gallery_url=None, days=0, findable=True):
+ remote_gallery_url=None, days=0, findable=True):
from catalogue import tasks
if dont_build is None:
getattr(book, '%s_file' % format_).build_delay()
if not settings.NO_SEARCH_INDEX and search_index and findable:
- tasks.index_book.delay(book.id, book_info=book_info, index_tags=search_index_tags)
+ tasks.index_book.delay(book.id)
for child in notify_cover_changed:
child.parent_cover_changed()
cls.published.send(sender=cls, instance=book)
return book
- def get_master(self):
- master_tags = [
- 'opowiadanie',
- 'powiesc',
- 'dramat_wierszowany_l',
- 'dramat_wierszowany_lp',
- 'dramat_wspolczesny', 'liryka_l', 'liryka_lp',
- 'wywiad',
- ]
- from librarian.parser import WLDocument
- wld = WLDocument.from_file(self.xml_file.path, parse_dublincore=False)
- root = wld.edoc.getroot()
- for master in root.iter():
- if master.tag in master_tags:
- return master
-
def update_references(self):
- from references.models import Entity, Reference
- master = self.get_master()
- if master is None:
- master = []
- found = set()
- for i, sec in enumerate(master):
- for ref in sec.findall('.//ref'):
- href = ref.attrib.get('href', '')
- if not href or href in found:
- continue
- found.add(href)
- entity, created = Entity.objects.get_or_create(
- uri=href
- )
- ref, created = Reference.objects.get_or_create(
- book=self,
- entity=entity
- )
- ref.first_section = 'sec%d' % (i + 1)
- entity.populate()
- entity.save()
- Reference.objects.filter(book=self).exclude(entity__uri__in=found).delete()
+ Entity = apps.get_model('references', 'Entity')
+ doc = self.wldocument2()
+ doc._compat_assign_section_ids()
+ doc._compat_assign_ordered_ids()
+ refs = {}
+ for ref_elem in doc.references():
+ uri = ref_elem.attrib.get('href', '')
+ if not uri:
+ continue
+ if uri in refs:
+ ref = refs[uri]
+ else:
+ entity, entity_created = Entity.objects.get_or_create(uri=uri)
+ if entity_created:
+ entity.populate()
+ entity.save()
+ ref, ref_created = entity.reference_set.get_or_create(book=self)
+ refs[uri] = ref
+ if not ref_created:
+ ref.occurence_set.all().delete()
+ sec = ref_elem.get_link()
+ m = re.match(r'sec(\d+)', sec)
+ assert m is not None
+ sec = int(m.group(1))
+ snippet = ref_elem.get_snippet()
+ b = builders['html-snippet']()
+ for s in snippet:
+ s.html_build(b)
+ html = b.output().get_bytes().decode('utf-8')
+
+ ref.occurence_set.create(
+ section=sec,
+ html=html
+ )
+ self.reference_set.exclude(entity__uri__in=refs).delete()
@property
def references(self):