Add to_dict method to BookInfo class in dcparser.
[wolnelektury.git] / apps / catalogue / models.py
1 # -*- coding: utf-8 -*-
2 from django.db import models
3 from django.db.models import permalink, Q
4 from django.utils.translation import ugettext_lazy as _
5 from django.contrib.auth.models import User
6 from django.core.files import File
7 from django.template.loader import render_to_string
8 from django.utils.safestring import mark_safe
9 from django.core.urlresolvers import reverse
10
11 from newtagging.models import TagBase
12 from newtagging import managers
13 from catalogue.fields import JSONField
14
15 from librarian import html, dcparser
16
17
18 TAG_CATEGORIES = (
19     ('author', _('author')),
20     ('epoch', _('epoch')),
21     ('kind', _('kind')),
22     ('genre', _('genre')),
23     ('theme', _('theme')),
24     ('set', _('set')),
25 )
26
27
28 class TagSubcategoryManager(models.Manager):
29     def __init__(self, subcategory):
30         super(TagSubcategoryManager, self).__init__()
31         self.subcategory = subcategory
32         
33     def get_query_set(self):
34         return super(TagSubcategoryManager, self).get_query_set().filter(category=self.subcategory)
35
36
37 class Tag(TagBase):
38     name = models.CharField(_('name'), max_length=50, db_index=True)
39     slug = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
40     sort_key = models.SlugField(_('sort key'), max_length=120, db_index=True)
41     category = models.CharField(_('category'), max_length=50, blank=False, null=False, 
42         db_index=True, choices=TAG_CATEGORIES)
43     description = models.TextField(_('description'), blank=True)
44     main_page = models.BooleanField(_('main page'), default=False, db_index=True, help_text=_('Show tag on main page'))
45     
46     user = models.ForeignKey(User, blank=True, null=True)
47     book_count = models.IntegerField(_('book count'), default=0, blank=False, null=False)
48     
49     def has_description(self):
50         return len(self.description) > 0
51     has_description.short_description = _('description')
52     has_description.boolean = True
53
54     @permalink
55     def get_absolute_url(self):
56         return ('catalogue.views.tagged_object_list', [self.slug])
57     
58     class Meta:
59         ordering = ('sort_key',)
60         verbose_name = _('tag')
61         verbose_name_plural = _('tags')
62     
63     def __unicode__(self):
64         return self.name
65
66     @staticmethod
67     def get_tag_list(tags):
68         if isinstance(tags, basestring):
69             tag_slugs = tags.split('/')
70             return [Tag.objects.get(slug=slug) for slug in tag_slugs]
71         else:
72             return TagBase.get_tag_list(tags)
73
74
75 def book_upload_path(ext):
76     def get_dynamic_path(book, filename):
77         return 'lektura/%s.%s' % (book.slug, ext)
78     return get_dynamic_path
79
80
81 class Book(models.Model):
82     title = models.CharField(_('title'), max_length=120)
83     slug = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
84     description = models.TextField(_('description'), blank=True)
85     created_at = models.DateTimeField(_('creation date'), auto_now=True)
86     _short_html = models.TextField(_('short HTML'), editable=False)
87     parent_number = models.IntegerField(_('parent number'), default=0)
88     extra_info = JSONField(_('extra information'))
89     
90     # Formats
91     xml_file = models.FileField(_('XML file'), upload_to=book_upload_path('xml'), blank=True)
92     html_file = models.FileField(_('HTML file'), upload_to=book_upload_path('html'), blank=True)
93     pdf_file = models.FileField(_('PDF file'), upload_to=book_upload_path('pdf'), blank=True)
94     odt_file = models.FileField(_('ODT file'), upload_to=book_upload_path('odt'), blank=True)
95     txt_file = models.FileField(_('TXT file'), upload_to=book_upload_path('txt'), blank=True)
96     
97     parent = models.ForeignKey('self', blank=True, null=True, related_name='children')
98     
99     objects = models.Manager()
100     tagged = managers.ModelTaggedItemManager(Tag)
101     tags = managers.TagDescriptor(Tag)
102
103     
104     @property
105     def name(self):
106         return self.title
107     
108     def short_html(self):
109         if len(self._short_html):
110             return mark_safe(self._short_html)
111         else:
112             tags = self.tags.filter(~Q(category__in=('set', 'theme')))
113             tags = [u'<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name) for tag in tags]
114
115             formats = []
116             if self.html_file:
117                 formats.append(u'<a href="%s">Czytaj online</a>' % reverse('book_text', kwargs={'slug': self.slug}))
118             if self.pdf_file:
119                 formats.append(u'<a href="%s">Plik PDF</a>' % self.pdf_file.url)
120             if self.odt_file:
121                 formats.append(u'<a href="%s">Plik ODT</a>' % self.odt_file.url)
122             if self.txt_file:
123                 formats.append(u'<a href="%s">Plik TXT</a>' % self.txt_file.url)
124             
125             self._short_html = unicode(render_to_string('catalogue/book_short.html',
126                 {'book': self, 'tags': tags, 'formats': formats}))
127             self.save()
128             return mark_safe(self._short_html)
129     
130     def has_description(self):
131         return len(self.description) > 0
132     has_description.short_description = _('description')
133     has_description.boolean = True
134     
135     def has_pdf_file(self):
136         return bool(self.pdf_file)
137     has_pdf_file.short_description = 'PDF'
138     has_pdf_file.boolean = True
139     
140     def has_odt_file(self):
141         return bool(self.odt_file)
142     has_odt_file.short_description = 'ODT'
143     has_odt_file.boolean = True
144     
145     def has_html_file(self):
146         return bool(self.html_file)
147     has_html_file.short_description = 'HTML'
148     has_html_file.boolean = True
149
150     class AlreadyExists(Exception):
151         pass
152     
153     @staticmethod
154     def from_xml_file(xml_file, overwrite=False):
155         from tempfile import NamedTemporaryFile
156         from slughifi import slughifi
157         from markupstring import MarkupString
158         
159         # Read book metadata
160         book_info = dcparser.parse(xml_file)
161         book_base, book_slug = book_info.url.rsplit('/', 1)
162         book, created = Book.objects.get_or_create(slug=book_slug)
163         
164         if created:
165             book_shelves = []
166         else:
167             if not overwrite:
168                 raise Book.AlreadyExists('Book %s already exists' % book_slug)
169             # Save shelves for this book
170             book_shelves = list(book.tags.filter(category='set'))
171         
172         book.title = book_info.title
173         book._short_html = ''
174         book.save()
175         
176         book_tags = []
177         for category in ('kind', 'genre', 'author', 'epoch'):    
178             tag_name = getattr(book_info, category)
179             tag_sort_key = tag_name
180             if category == 'author':
181                 tag_sort_key = tag_name.last_name
182                 tag_name = ' '.join(tag_name.first_names) + ' ' + tag_name.last_name
183             tag, created = Tag.objects.get_or_create(slug=slughifi(tag_name))
184             if created:
185                 tag.name = tag_name
186                 tag.sort_key = slughifi(tag_sort_key)
187                 tag.category = category
188                 tag.save()
189             book_tags.append(tag)
190         book.tags = book_tags
191         
192         if hasattr(book_info, 'parts'):
193             for n, part_url in enumerate(book_info.parts):
194                 base, slug = part_url.rsplit('/', 1)
195                 child_book = Book.objects.get(slug=slug)
196                 child_book.parent = book
197                 child_book.parent_number = n
198                 child_book.save()
199         
200         # Save XML and HTML files
201         book.xml_file.save('%s.xml' % book.slug, File(file(xml_file)), save=False)
202         
203         html_file = NamedTemporaryFile()
204         if html.transform(book.xml_file.path, html_file):
205             book.html_file.save('%s.html' % book.slug, File(html_file), save=False)
206             
207             # Extract fragments
208             closed_fragments, open_fragments = html.extract_fragments(book.html_file.path)
209             book_themes = []
210             for fragment in closed_fragments.values():
211                 text = fragment.to_string()
212                 short_text = ''
213                 if (len(MarkupString(text)) > 240):
214                     short_text = unicode(MarkupString(text)[:160])
215                 new_fragment, created = Fragment.objects.get_or_create(anchor=fragment.id, book=book, 
216                     defaults={'text': text, 'short_text': short_text})
217                 
218                 try:
219                     theme_names = [s.strip() for s in fragment.themes.split(',')]
220                 except AttributeError:
221                     continue
222                 themes = []
223                 for theme_name in theme_names:
224                     tag, created = Tag.objects.get_or_create(slug=slughifi(theme_name))
225                     if created:
226                         tag.name = theme_name
227                         tag.sort_key = slughifi(theme_name)
228                         tag.category = 'theme'
229                         tag.save()
230                     themes.append(tag)
231                 new_fragment.save()
232                 new_fragment.tags = list(book.tags) + themes
233                 book_themes += themes
234             
235             book_themes = set(book_themes)
236             book.tags = list(book.tags) + list(book_themes) + book_shelves
237         
238         book.save()
239         return book
240     
241     @permalink
242     def get_absolute_url(self):
243         return ('catalogue.views.book_detail', [self.slug])
244         
245     class Meta:
246         ordering = ('title',)
247         verbose_name = _('book')
248         verbose_name_plural = _('books')
249
250     def __unicode__(self):
251         return self.title
252
253
254 class Fragment(models.Model):
255     text = models.TextField()
256     short_text = models.TextField(editable=False)
257     _short_html = models.TextField(editable=False)
258     anchor = models.CharField(max_length=120)
259     book = models.ForeignKey(Book, related_name='fragments')
260
261     objects = models.Manager()
262     tagged = managers.ModelTaggedItemManager(Tag)
263     tags = managers.TagDescriptor(Tag)
264     
265     def short_html(self):
266         if len(self._short_html):
267             return mark_safe(self._short_html)
268         else:
269             book_authors = [u'<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name) 
270                 for tag in self.book.tags if tag.category == 'author']
271             
272             self._short_html = unicode(render_to_string('catalogue/fragment_short.html',
273                 {'fragment': self, 'book': self.book, 'book_authors': book_authors}))
274             self.save()
275             return mark_safe(self._short_html)
276     
277     def get_absolute_url(self):
278         return '%s#m%s' % (reverse('book_text', kwargs={'slug': self.book.slug}), self.anchor)
279     
280     class Meta:
281         ordering = ('book', 'anchor',)
282         verbose_name = _('fragment')
283         verbose_name_plural = _('fragments')
284