Now importing a book that already exists in our database doesn't duplicate it's fragm...
[wolnelektury.git] / apps / catalogue / models.py
1 # -*- coding: utf-8 -*-
2 from django.db import models
3 from django.db.models import permalink, Q
4 from django.utils.translation import ugettext_lazy as _
5 from django.contrib.auth.models import User
6 from django.core.files import File
7 from django.template.loader import render_to_string
8 from django.utils.safestring import mark_safe
9 from django.core.urlresolvers import reverse
10
11 from newtagging.models import TagBase
12 from newtagging import managers
13
14 from librarian import html, dcparser
15
16
17 TAG_CATEGORIES = (
18     ('author', _('author')),
19     ('epoch', _('epoch')),
20     ('kind', _('kind')),
21     ('genre', _('genre')),
22     ('theme', _('theme')),
23     ('set', _('set')),
24 )
25
26
27 class TagSubcategoryManager(models.Manager):
28     def __init__(self, subcategory):
29         super(TagSubcategoryManager, self).__init__()
30         self.subcategory = subcategory
31         
32     def get_query_set(self):
33         return super(TagSubcategoryManager, self).get_query_set().filter(category=self.subcategory)
34
35
36 class Tag(TagBase):
37     name = models.CharField(_('name'), max_length=50, db_index=True)
38     slug = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
39     sort_key = models.SlugField(_('sort key'), max_length=120, db_index=True)
40     category = models.CharField(_('category'), max_length=50, blank=False, null=False, 
41         db_index=True, choices=TAG_CATEGORIES)
42     description = models.TextField(_('description'), blank=True)
43     main_page = models.BooleanField(_('main page'), default=False, db_index=True, help_text=_('Show tag on main page'))
44         
45     user = models.ForeignKey(User, blank=True, null=True)
46     
47     def has_description(self):
48         return len(self.description) > 0
49     has_description.short_description = _('description')
50     has_description.boolean = True
51
52     @permalink
53     def get_absolute_url(self):
54         return ('catalogue.views.tagged_object_list', [self.slug])
55     
56     class Meta:
57         ordering = ('sort_key',)
58         verbose_name = _('tag')
59         verbose_name_plural = _('tags')
60     
61     def __unicode__(self):
62         return self.name
63
64     @staticmethod
65     def get_tag_list(tags):
66         if isinstance(tags, basestring):
67             tag_slugs = tags.split('/')
68             return [Tag.objects.get(slug=slug) for slug in tag_slugs]
69         else:
70             return TagBase.get_tag_list(tags)
71
72
73 def book_upload_path(ext):
74     def get_dynamic_path(book, filename):
75         return 'lektura/%s.%s' % (book.slug, ext)
76     return get_dynamic_path
77
78
79 class Book(models.Model):
80     title = models.CharField(_('title'), max_length=120)
81     slug = models.SlugField(_('slug'), max_length=120, unique=True, db_index=True)
82     description = models.TextField(_('description'), blank=True)
83     created_at = models.DateTimeField(_('creation date'), auto_now=True)
84     _short_html = models.TextField(_('short HTML'), editable=False)
85     parent_number = models.IntegerField(_('parent number'), default=0)
86     
87     # Formats
88     xml_file = models.FileField(_('XML file'), upload_to=book_upload_path('xml'), blank=True)
89     html_file = models.FileField(_('HTML file'), upload_to=book_upload_path('html'), blank=True)
90     pdf_file = models.FileField(_('PDF file'), upload_to=book_upload_path('pdf'), blank=True)
91     odt_file = models.FileField(_('ODT file'), upload_to=book_upload_path('odt'), blank=True)
92     txt_file = models.FileField(_('TXT file'), upload_to=book_upload_path('txt'), blank=True)
93     
94     parent = models.ForeignKey('self', blank=True, null=True, related_name='children')
95     
96     objects = models.Manager()
97     tagged = managers.ModelTaggedItemManager(Tag)
98     tags = managers.TagDescriptor(Tag)
99     
100     @property
101     def name(self):
102         return self.title
103     
104     def short_html(self):
105         if len(self._short_html):
106             return mark_safe(self._short_html)
107         else:
108             tags = self.tags.filter(~Q(category__in=('set', 'theme')))
109             tags = [u'<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name) for tag in tags]
110
111             formats = []
112             if self.html_file:
113                 formats.append(u'<a href="%s">Czytaj online</a>' % reverse('book_text', kwargs={'slug': self.slug}))
114             if self.pdf_file:
115                 formats.append(u'<a href="%s">Plik PDF</a>' % self.pdf_file.url)
116             if self.odt_file:
117                 formats.append(u'<a href="%s">Plik ODT</a>' % self.odt_file.url)
118             if self.txt_file:
119                 formats.append(u'<a href="%s">Plik TXT</a>' % self.txt_file.url)
120             
121             self._short_html = unicode(render_to_string('catalogue/book_short.html',
122                 {'book': self, 'tags': tags, 'formats': formats}))
123             self.save()
124             return mark_safe(self._short_html)
125     
126     def has_description(self):
127         return len(self.description) > 0
128     has_description.short_description = _('description')
129     has_description.boolean = True
130     
131     def has_pdf_file(self):
132         return bool(self.pdf_file)
133     has_pdf_file.short_description = 'PDF'
134     has_pdf_file.boolean = True
135     
136     def has_odt_file(self):
137         return bool(self.odt_file)
138     has_odt_file.short_description = 'ODT'
139     has_odt_file.boolean = True
140     
141     def has_html_file(self):
142         return bool(self.html_file)
143     has_html_file.short_description = 'HTML'
144     has_html_file.boolean = True
145
146     class AlreadyExists(Exception):
147         pass
148     
149     @staticmethod
150     def from_xml_file(xml_file, overwrite=False):
151         from tempfile import NamedTemporaryFile
152         from slughifi import slughifi
153         from markupstring import MarkupString
154         
155         # Read book metadata
156         book_info = dcparser.parse(xml_file)
157         book_base, book_slug = book_info.url.rsplit('/', 1)
158         book, created = Book.objects.get_or_create(slug=book_slug)
159         if not created and not overwrite:
160             raise Book.AlreadyExists('Book %s already exists' % book_slug)
161         
162         book.title = book_info.title
163         book._short_html = ''
164         book.save()
165         
166         book_tags = []
167         for category in ('kind', 'genre', 'author', 'epoch'):    
168             tag_name = getattr(book_info, category)
169             tag_sort_key = tag_name
170             if category == 'author':
171                 tag_sort_key = tag_name.last_name
172                 tag_name = ' '.join(tag_name.first_names) + ' ' + tag_name.last_name
173             tag, created = Tag.objects.get_or_create(slug=slughifi(tag_name))
174             if created:
175                 tag.name = tag_name
176                 tag.sort_key = slughifi(tag_sort_key)
177                 tag.category = category
178                 tag.save()
179             book_tags.append(tag)
180         book.tags = book_tags
181         
182         if hasattr(book_info, 'parts'):
183             for n, part_url in enumerate(book_info.parts):
184                 base, slug = part_url.rsplit('/', 1)
185                 child_book = Book.objects.get(slug=slug)
186                 child_book.parent = book
187                 child_book.parent_number = n
188                 child_book.save()
189         
190         # Save XML and HTML files
191         book.xml_file.save('%s.xml' % book.slug, File(file(xml_file)), save=False)
192         
193         html_file = NamedTemporaryFile()
194         if html.transform(book.xml_file.path, html_file):
195             book.html_file.save('%s.html' % book.slug, File(html_file), save=False)
196             
197             # Extract fragments
198             closed_fragments, open_fragments = html.extract_fragments(book.html_file.path)
199             book_themes = []
200             for fragment in closed_fragments.values():
201                 text = fragment.to_string()
202                 short_text = ''
203                 if (len(MarkupString(text)) > 240):
204                     short_text = unicode(MarkupString(text)[:160])
205                 new_fragment, created = Fragment.objects.get_or_create(anchor=fragment.id, book=book, 
206                     defaults={'text': text, 'short_text': short_text})
207                 
208                 try:
209                     theme_names = [s.strip() for s in fragment.themes.split(',')]
210                 except AttributeError:
211                     continue
212                 themes = []
213                 for theme_name in theme_names:
214                     tag, created = Tag.objects.get_or_create(slug=slughifi(theme_name))
215                     if created:
216                         tag.name = theme_name
217                         tag.sort_key = slughifi(theme_name)
218                         tag.category = 'theme'
219                         tag.save()
220                     themes.append(tag)
221                 new_fragment.save()
222                 new_fragment.tags = list(book.tags) + themes
223                 book_themes += themes
224             
225             book_themes = set(book_themes)
226             book.tags = list(book.tags) + list(book_themes)
227         
228         book.save()
229         return book
230     
231     @permalink
232     def get_absolute_url(self):
233         return ('catalogue.views.book_detail', [self.slug])
234         
235     class Meta:
236         ordering = ('title',)
237         verbose_name = _('book')
238         verbose_name_plural = _('books')
239
240     def __unicode__(self):
241         return self.title
242
243
244 class Fragment(models.Model):
245     text = models.TextField()
246     short_text = models.TextField(editable=False)
247     _short_html = models.TextField(editable=False)
248     anchor = models.CharField(max_length=120)
249     book = models.ForeignKey(Book, related_name='fragments')
250
251     objects = models.Manager()
252     tagged = managers.ModelTaggedItemManager(Tag)
253     tags = managers.TagDescriptor(Tag)
254     
255     def short_html(self):
256         if len(self._short_html):
257             return mark_safe(self._short_html)
258         else:
259             book_authors = [u'<a href="%s">%s</a>' % (tag.get_absolute_url(), tag.name) 
260                 for tag in self.book.tags if tag.category == 'author']
261             
262             self._short_html = unicode(render_to_string('catalogue/fragment_short.html',
263                 {'fragment': self, 'book': self.book, 'book_authors': book_authors}))
264             self.save()
265             return mark_safe(self._short_html)
266     
267     def get_absolute_url(self):
268         return '%s#m%s' % (reverse('book_text', kwargs={'slug': self.book.slug}), self.anchor)
269     
270     class Meta:
271         ordering = ('book', 'anchor',)
272         verbose_name = _('fragment')
273         verbose_name_plural = _('fragments')
274