apps/catalogue/utils.py

   1 # -*- coding: utf-8 -*-
   2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
   3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   4 #
   5 from __future__ import with_statement
   6
   7 import random
   8 import re
   9 import time
  10 from base64 import urlsafe_b64encode
  11
  12 from django.http import HttpResponse
  13 from django.core.files.uploadedfile import UploadedFile
  14 from django.core.files.storage import DefaultStorage
  15 from django.utils.encoding import force_unicode
  16 from django.utils.hashcompat import sha_constructor
  17 from django.conf import settings
  18 from os import mkdir, path, unlink
  19 from errno import EEXIST, ENOENT
  20 from fcntl import flock, LOCK_EX
  21 from zipfile import ZipFile
  22
  23 from reporting.utils import read_chunks
  24
  25 # Use the system (hardware-based) random number generator if it exists.
  26 if hasattr(random, 'SystemRandom'):
  27     randrange = random.SystemRandom().randrange
  28 else:
  29     randrange = random.randrange
  30 MAX_SESSION_KEY = 18446744073709551616L     # 2 << 63
  31
  32
  33 def get_random_hash(seed):
  34     sha_digest = sha_constructor('%s%s%s%s' %
  35         (randrange(0, MAX_SESSION_KEY), time.time(), unicode(seed).encode('utf-8', 'replace'),
  36         settings.SECRET_KEY)).digest()
  37     return urlsafe_b64encode(sha_digest).replace('=', '').replace('_', '-').lower()
  38
  39
  40 def split_tags(tags):
  41     result = {}
  42     for tag in tags:
  43         result.setdefault(tag.category, []).append(tag)
  44     return result
  45
  46
  47 class ExistingFile(UploadedFile):
  48
  49     def __init__(self, path, *args, **kwargs):
  50         self.path = path
  51         super(ExistingFile, self).__init__(*args, **kwargs)
  52
  53     def temporary_file_path(self):
  54         return self.path
  55
  56     def close(self):
  57         pass
  58
  59
  60 class LockFile(object):
  61     """
  62     A file lock monitor class; createas an ${objname}.lock
  63     file in directory dir, and locks it exclusively.
  64     To be used in 'with' construct.
  65     """
  66     def __init__(self, dir, objname):
  67         self.lockname = path.join(dir, objname + ".lock")
  68
  69     def __enter__(self):
  70         self.lock = open(self.lockname, 'w')
  71         flock(self.lock, LOCK_EX)
  72
  73     def __exit__(self, *err):
  74         try:
  75             unlink(self.lockname)
  76         except OSError as oe:
  77             if oe.errno != EEXIST:
  78                 raise oe
  79         self.lock.close()
  80
  81
  82 #@task
  83 def create_zip(paths, zip_slug):
  84     """
  85     Creates a zip in MEDIA_ROOT/zip directory containing files from path.
  86     Resulting archive filename is ${zip_slug}.zip
  87     Returns it's path relative to MEDIA_ROOT (no initial slash)
  88     """
  89     # directory to store zip files
  90     zip_path = path.join(settings.MEDIA_ROOT, 'zip')
  91
  92     try:
  93         mkdir(zip_path)
  94     except OSError as oe:
  95         if oe.errno != EEXIST:
  96             raise oe
  97     zip_filename = zip_slug + ".zip"
  98
  99     with LockFile(zip_path, zip_slug):
 100         if not path.exists(path.join(zip_path, zip_filename)):
 101             zipf = ZipFile(path.join(zip_path, zip_filename), 'w')
 102             try:
 103                 for arcname, p in paths:
 104                     if arcname is None:
 105                         arcname = path.basename(p)
 106                     zipf.write(p, arcname)
 107             finally:
 108                 zipf.close()
 109
 110         return 'zip/' + zip_filename
 111
 112
 113 def remove_zip(zip_slug):
 114     """
 115     removes the ${zip_slug}.zip file from zip store.
 116     """
 117     zip_file = path.join(settings.MEDIA_ROOT, 'zip', zip_slug + '.zip')
 118     try:
 119         unlink(zip_file)
 120     except OSError as oe:
 121         if oe.errno != ENOENT:
 122             raise oe
 123
 124
 125 class AttachmentHttpResponse(HttpResponse):
 126     """Response serving a file to be downloaded.
 127     """
 128     def __init__ (self, file_path, file_name, mimetype):
 129         super(AttachmentHttpResponse, self).__init__(mimetype=mimetype)
 130         self['Content-Disposition'] = 'attachment; filename=%s' % file_name
 131         self.file_path = file_path
 132         self.file_name = file_name
 133
 134         with open(DefaultStorage().path(self.file_path)) as f:
 135             for chunk in read_chunks(f):
 136                 self.write(chunk)
 137
 138 class MultiQuerySet(object):
 139     def __init__(self, *args, **kwargs):
 140         self.querysets = args
 141         self._count = None
 142
 143     def count(self):
 144         if not self._count:
 145             self._count = sum(len(qs) for qs in self.querysets)
 146         return self._count
 147
 148     def __len__(self):
 149         return self.count()
 150
 151     def __getitem__(self, item):
 152         try:
 153             indices = (offset, stop, step) = item.indices(self.count())
 154         except AttributeError:
 155             # it's not a slice - make it one
 156             return self[item : item + 1][0]
 157         items = []
 158         total_len = stop - offset
 159         for qs in self.querysets:
 160             if len(qs) < offset:
 161                 offset -= len(qs)
 162             else:
 163                 items += list(qs[offset:stop])
 164                 if len(items) >= total_len:
 165                     return items
 166                 else:
 167                     offset = 0
 168                     stop = total_len - len(items)
 169                     continue
 170
 171
 172 def truncate_html_words(s, num, end_text='...'):
 173     """Truncates HTML to a certain number of words (not counting tags and
 174     comments). Closes opened tags if they were correctly closed in the given
 175     html. Takes an optional argument of what should be used to notify that the
 176     string has been truncated, defaulting to ellipsis (...).
 177
 178     Newlines in the HTML are preserved.
 179
 180     This is just a version of django.utils.text.truncate_html_words with no space before the end_text.
 181     """
 182     s = force_unicode(s)
 183     length = int(num)
 184     if length <= 0:
 185         return u''
 186     html4_singlets = ('br', 'col', 'link', 'base', 'img', 'param', 'area', 'hr', 'input')
 187     # Set up regular expressions
 188     re_words = re.compile(r'&.*?;|<.*?>|(\w[\w-]*)', re.U)
 189     re_tag = re.compile(r'<(/)?([^ ]+?)(?: (/)| .*?)?>')
 190     # Count non-HTML words and keep note of open tags
 191     pos = 0
 192     end_text_pos = 0
 193     words = 0
 194     open_tags = []
 195     while words <= length:
 196         m = re_words.search(s, pos)
 197         if not m:
 198             # Checked through whole string
 199             break
 200         pos = m.end(0)
 201         if m.group(1):
 202             # It's an actual non-HTML word
 203             words += 1
 204             if words == length:
 205                 end_text_pos = pos
 206             continue
 207         # Check for tag
 208         tag = re_tag.match(m.group(0))
 209         if not tag or end_text_pos:
 210             # Don't worry about non tags or tags after our truncate point
 211             continue
 212         closing_tag, tagname, self_closing = tag.groups()
 213         tagname = tagname.lower()  # Element names are always case-insensitive
 214         if self_closing or tagname in html4_singlets:
 215             pass
 216         elif closing_tag:
 217             # Check for match in open tags list
 218             try:
 219                 i = open_tags.index(tagname)
 220             except ValueError:
 221                 pass
 222             else:
 223                 # SGML: An end tag closes, back to the matching start tag, all unclosed intervening start tags with omitted end tags
 224                 open_tags = open_tags[i+1:]
 225         else:
 226             # Add it to the start of the open tags list
 227             open_tags.insert(0, tagname)
 228     if words <= length:
 229         # Don't try to close tags if we don't need to truncate
 230         return s
 231     out = s[:end_text_pos]
 232     if end_text:
 233         out += end_text
 234     # Close any tags still open
 235     for tag in open_tags:
 236         out += '</%s>' % tag
 237     # Return string
 238     return out
 239
 240
 241 def customizations_hash(customizations):
 242     customizations.sort()
 243     return hash(tuple(customizations))
 244
 245
 246 def get_customized_pdf_path(book, customizations):
 247     """
 248     Returns a MEDIA_ROOT relative path for a customized pdf. The name will contain a hash of customization options.
 249     """
 250     h = customizations_hash(customizations)
 251     return 'book/%s/%s-custom-%s.pdf' % (book.slug, book.slug, h)
 252
 253
 254 def clear_custom_pdf(book):
 255     """
 256     Returns a list of paths to generated customized pdf of a book
 257     """
 258     from waiter.utils import clear_cache
 259     clear_cache('book/%s' % book.slug)