apps/catalogue/utils.py

   1 # -*- coding: utf-8 -*-
   2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
   3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   4 #
   5 from __future__ import with_statement
   6
   7 import random
   8 import re
   9 import time
  10 from base64 import urlsafe_b64encode
  11
  12 from django.http import HttpResponse, HttpResponseRedirect, Http404, HttpResponsePermanentRedirect
  13 from django.core.files.uploadedfile import UploadedFile
  14 from django.core.files.base import File
  15 from django.core.files.storage import DefaultStorage
  16 from django.utils.encoding import force_unicode
  17 from django.utils.hashcompat import sha_constructor
  18 from django.conf import settings
  19 from celery.task import task
  20 from os import mkdir, path, unlink
  21 from errno import EEXIST, ENOENT
  22 from fcntl import flock, LOCK_EX
  23 from zipfile import ZipFile
  24 from traceback import print_exc
  25
  26 from reporting.utils import read_chunks
  27 from celery.task import task
  28 import catalogue.models
  29
  30 # Use the system (hardware-based) random number generator if it exists.
  31 if hasattr(random, 'SystemRandom'):
  32     randrange = random.SystemRandom().randrange
  33 else:
  34     randrange = random.randrange
  35 MAX_SESSION_KEY = 18446744073709551616L     # 2 << 63
  36
  37
  38 def get_random_hash(seed):
  39     sha_digest = sha_constructor('%s%s%s%s' %
  40         (randrange(0, MAX_SESSION_KEY), time.time(), unicode(seed).encode('utf-8', 'replace'),
  41         settings.SECRET_KEY)).digest()
  42     return urlsafe_b64encode(sha_digest).replace('=', '').replace('_', '-').lower()
  43
  44
  45 def split_tags(tags):
  46     result = {}
  47     for tag in tags:
  48         result.setdefault(tag.category, []).append(tag)
  49     return result
  50
  51
  52 class ExistingFile(UploadedFile):
  53
  54     def __init__(self, path, *args, **kwargs):
  55         self.path = path
  56         super(ExistingFile, self).__init__(*args, **kwargs)
  57
  58     def temporary_file_path(self):
  59         return self.path
  60
  61     def close(self):
  62         pass
  63
  64
  65 class LockFile(object):
  66     """
  67     A file lock monitor class; createas an ${objname}.lock
  68     file in directory dir, and locks it exclusively.
  69     To be used in 'with' construct.
  70     """
  71     def __init__(self, dir, objname):
  72         self.lockname = path.join(dir, objname + ".lock")
  73
  74     def __enter__(self):
  75         self.lock = open(self.lockname, 'w')
  76         flock(self.lock, LOCK_EX)
  77
  78     def __exit__(self, *err):
  79         try:
  80             unlink(self.lockname)
  81         except OSError as oe:
  82             if oe.errno != EEXIST:
  83                 raise oe
  84         self.lock.close()
  85
  86
  87 @task
  88 def create_zip(paths, zip_slug):
  89     """
  90     Creates a zip in MEDIA_ROOT/zip directory containing files from path.
  91     Resulting archive filename is ${zip_slug}.zip
  92     Returns it's path relative to MEDIA_ROOT (no initial slash)
  93     """
  94     # directory to store zip files
  95     zip_path = path.join(settings.MEDIA_ROOT, 'zip')
  96
  97     try:
  98         mkdir(zip_path)
  99     except OSError as oe:
 100         if oe.errno != EEXIST:
 101             raise oe
 102     zip_filename = zip_slug + ".zip"
 103
 104     with LockFile(zip_path, zip_slug):
 105         if not path.exists(path.join(zip_path, zip_filename)):
 106             zipf = ZipFile(path.join(zip_path, zip_filename), 'w')
 107             try:
 108                 for arcname, p in paths:
 109                     if arcname is None:
 110                         arcname = path.basename(p)
 111                     zipf.write(p, arcname)
 112             finally:
 113                 zipf.close()
 114
 115         return 'zip/' + zip_filename
 116
 117
 118 def remove_zip(zip_slug):
 119     """
 120     removes the ${zip_slug}.zip file from zip store.
 121     """
 122     zip_file = path.join(settings.MEDIA_ROOT, 'zip', zip_slug + '.zip')
 123     try:
 124         unlink(zip_file)
 125     except OSError as oe:
 126         if oe.errno != ENOENT:
 127             raise oe
 128
 129
 130 class AttachmentHttpResponse(HttpResponse):
 131     """Response serving a file to be downloaded.
 132     """
 133     def __init__ (self, file_path, file_name, mimetype):
 134         super(AttachmentHttpResponse, self).__init__(mimetype=mimetype)
 135         self['Content-Disposition'] = 'attachment; filename=%s' % file_name
 136         self.file_path = file_path
 137         self.file_name = file_name
 138
 139         with open(DefaultStorage().path(self.file_path)) as f:
 140             for chunk in read_chunks(f):
 141                 self.write(chunk)
 142
 143 @task
 144 def async_build_pdf(book_id, customizations, file_name):
 145     """
 146     A celery task to generate pdf files.
 147     Accepts the same args as Book.build_pdf, but with book id as first parameter
 148     instead of Book instance
 149     """
 150     try:
 151         book = catalogue.models.Book.objects.get(id=book_id)
 152         print "will gen %s" % DefaultStorage().path(file_name)
 153         if not DefaultStorage().exists(file_name):
 154             book.build_pdf(customizations=customizations, file_name=file_name)
 155         print "done."
 156     except Exception, e:
 157         print "Error during pdf creation: %s" % e
 158         print_exc
 159         raise e
 160
 161
 162 class MultiQuerySet(object):
 163     def __init__(self, *args, **kwargs):
 164         self.querysets = args
 165         self._count = None
 166
 167     def count(self):
 168         if not self._count:
 169             self._count = sum(len(qs) for qs in self.querysets)
 170         return self._count
 171
 172     def __len__(self):
 173         return self.count()
 174
 175     def __getitem__(self, item):
 176         try:
 177             indices = (offset, stop, step) = item.indices(self.count())
 178         except AttributeError:
 179             # it's not a slice - make it one
 180             return self[item : item + 1][0]
 181         items = []
 182         total_len = stop - offset
 183         for qs in self.querysets:
 184             if len(qs) < offset:
 185                 offset -= len(qs)
 186             else:
 187                 items += list(qs[offset:stop])
 188                 if len(items) >= total_len:
 189                     return items
 190                 else:
 191                     offset = 0
 192                     stop = total_len - len(items)
 193                     continue
 194
 195
 196 def truncate_html_words(s, num, end_text='...'):
 197     """Truncates HTML to a certain number of words (not counting tags and
 198     comments). Closes opened tags if they were correctly closed in the given
 199     html. Takes an optional argument of what should be used to notify that the
 200     string has been truncated, defaulting to ellipsis (...).
 201
 202     Newlines in the HTML are preserved.
 203
 204     This is just a version of django.utils.text.truncate_html_words with no space before the end_text.
 205     """
 206     s = force_unicode(s)
 207     length = int(num)
 208     if length <= 0:
 209         return u''
 210     html4_singlets = ('br', 'col', 'link', 'base', 'img', 'param', 'area', 'hr', 'input')
 211     # Set up regular expressions
 212     re_words = re.compile(r'&.*?;|<.*?>|(\w[\w-]*)', re.U)
 213     re_tag = re.compile(r'<(/)?([^ ]+?)(?: (/)| .*?)?>')
 214     # Count non-HTML words and keep note of open tags
 215     pos = 0
 216     end_text_pos = 0
 217     words = 0
 218     open_tags = []
 219     while words <= length:
 220         m = re_words.search(s, pos)
 221         if not m:
 222             # Checked through whole string
 223             break
 224         pos = m.end(0)
 225         if m.group(1):
 226             # It's an actual non-HTML word
 227             words += 1
 228             if words == length:
 229                 end_text_pos = pos
 230             continue
 231         # Check for tag
 232         tag = re_tag.match(m.group(0))
 233         if not tag or end_text_pos:
 234             # Don't worry about non tags or tags after our truncate point
 235             continue
 236         closing_tag, tagname, self_closing = tag.groups()
 237         tagname = tagname.lower()  # Element names are always case-insensitive
 238         if self_closing or tagname in html4_singlets:
 239             pass
 240         elif closing_tag:
 241             # Check for match in open tags list
 242             try:
 243                 i = open_tags.index(tagname)
 244             except ValueError:
 245                 pass
 246             else:
 247                 # SGML: An end tag closes, back to the matching start tag, all unclosed intervening start tags with omitted end tags
 248                 open_tags = open_tags[i+1:]
 249         else:
 250             # Add it to the start of the open tags list
 251             open_tags.insert(0, tagname)
 252     if words <= length:
 253         # Don't try to close tags if we don't need to truncate
 254         return s
 255     out = s[:end_text_pos]
 256     if end_text:
 257         out += end_text
 258     # Close any tags still open
 259     for tag in open_tags:
 260         out += '</%s>' % tag
 261     # Return string
 262     return out