apps/catalogue/utils.py

   1 # -*- coding: utf-8 -*-
   2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
   3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   4 #
   5 from __future__ import with_statement
   6
   7 import random
   8 import re
   9 import time
  10 from base64 import urlsafe_b64encode
  11
  12 from django.http import HttpResponse
  13 from django.core.files.uploadedfile import UploadedFile
  14 from django.core.files.storage import DefaultStorage
  15 from django.utils.encoding import force_unicode
  16 from django.utils.hashcompat import sha_constructor
  17 from django.conf import settings
  18 from os import mkdir, path, unlink
  19 from errno import EEXIST, ENOENT
  20 from fcntl import flock, LOCK_EX
  21 from zipfile import ZipFile
  22
  23 from reporting.utils import read_chunks
  24
  25 # Use the system (hardware-based) random number generator if it exists.
  26 if hasattr(random, 'SystemRandom'):
  27     randrange = random.SystemRandom().randrange
  28 else:
  29     randrange = random.randrange
  30 MAX_SESSION_KEY = 18446744073709551616L     # 2 << 63
  31
  32
  33 def get_random_hash(seed):
  34     sha_digest = sha_constructor('%s%s%s%s' %
  35         (randrange(0, MAX_SESSION_KEY), time.time(), unicode(seed).encode('utf-8', 'replace'),
  36         settings.SECRET_KEY)).digest()
  37     return urlsafe_b64encode(sha_digest).replace('=', '').replace('_', '-').lower()
  38
  39
  40 def split_tags(tags):
  41     result = {}
  42     for tag in tags:
  43         result.setdefault(tag.category, []).append(tag)
  44     return result
  45
  46
  47 def get_dynamic_path(media, filename, ext=None, maxlen=100):
  48     from slughifi import slughifi
  49
  50     # how to put related book's slug here?
  51     if not ext:
  52         # BookMedia case
  53         ext = media.formats[media.type].ext
  54     if media is None or not media.name:
  55         name = slughifi(filename.split(".")[0])
  56     else:
  57         name = slughifi(media.name)
  58     return 'book/%s/%s.%s' % (ext, name[:maxlen-len('book/%s/.%s' % (ext, ext))-4], ext)
  59
  60
  61 # TODO: why is this hard-coded ?
  62 def book_upload_path(ext=None, maxlen=100):
  63     return lambda *args: get_dynamic_path(*args, ext=ext, maxlen=maxlen)
  64
  65
  66 class ExistingFile(UploadedFile):
  67
  68     def __init__(self, path, *args, **kwargs):
  69         self.path = path
  70         super(ExistingFile, self).__init__(*args, **kwargs)
  71
  72     def temporary_file_path(self):
  73         return self.path
  74
  75     def close(self):
  76         pass
  77
  78
  79 class LockFile(object):
  80     """
  81     A file lock monitor class; createas an ${objname}.lock
  82     file in directory dir, and locks it exclusively.
  83     To be used in 'with' construct.
  84     """
  85     def __init__(self, dir, objname):
  86         self.lockname = path.join(dir, objname + ".lock")
  87
  88     def __enter__(self):
  89         self.lock = open(self.lockname, 'w')
  90         flock(self.lock, LOCK_EX)
  91
  92     def __exit__(self, *err):
  93         try:
  94             unlink(self.lockname)
  95         except OSError as oe:
  96             if oe.errno != EEXIST:
  97                 raise oe
  98         self.lock.close()
  99
 100
 101 #@task
 102 def create_zip(paths, zip_slug):
 103     """
 104     Creates a zip in MEDIA_ROOT/zip directory containing files from path.
 105     Resulting archive filename is ${zip_slug}.zip
 106     Returns it's path relative to MEDIA_ROOT (no initial slash)
 107     """
 108     # directory to store zip files
 109     zip_path = path.join(settings.MEDIA_ROOT, 'zip')
 110
 111     try:
 112         mkdir(zip_path)
 113     except OSError as oe:
 114         if oe.errno != EEXIST:
 115             raise oe
 116     zip_filename = zip_slug + ".zip"
 117
 118     with LockFile(zip_path, zip_slug):
 119         if not path.exists(path.join(zip_path, zip_filename)):
 120             zipf = ZipFile(path.join(zip_path, zip_filename), 'w')
 121             try:
 122                 for arcname, p in paths:
 123                     if arcname is None:
 124                         arcname = path.basename(p)
 125                     zipf.write(p, arcname)
 126             finally:
 127                 zipf.close()
 128
 129         return 'zip/' + zip_filename
 130
 131
 132 def remove_zip(zip_slug):
 133     """
 134     removes the ${zip_slug}.zip file from zip store.
 135     """
 136     zip_file = path.join(settings.MEDIA_ROOT, 'zip', zip_slug + '.zip')
 137     try:
 138         unlink(zip_file)
 139     except OSError as oe:
 140         if oe.errno != ENOENT:
 141             raise oe
 142
 143
 144 class AttachmentHttpResponse(HttpResponse):
 145     """Response serving a file to be downloaded.
 146     """
 147     def __init__ (self, file_path, file_name, mimetype):
 148         super(AttachmentHttpResponse, self).__init__(mimetype=mimetype)
 149         self['Content-Disposition'] = 'attachment; filename=%s' % file_name
 150         self.file_path = file_path
 151         self.file_name = file_name
 152
 153         with open(DefaultStorage().path(self.file_path)) as f:
 154             for chunk in read_chunks(f):
 155                 self.write(chunk)
 156
 157 class MultiQuerySet(object):
 158     def __init__(self, *args, **kwargs):
 159         self.querysets = args
 160         self._count = None
 161
 162     def count(self):
 163         if not self._count:
 164             self._count = sum(len(qs) for qs in self.querysets)
 165         return self._count
 166
 167     def __len__(self):
 168         return self.count()
 169
 170     def __getitem__(self, item):
 171         try:
 172             indices = (offset, stop, step) = item.indices(self.count())
 173         except AttributeError:
 174             # it's not a slice - make it one
 175             return self[item : item + 1][0]
 176         items = []
 177         total_len = stop - offset
 178         for qs in self.querysets:
 179             if len(qs) < offset:
 180                 offset -= len(qs)
 181             else:
 182                 items += list(qs[offset:stop])
 183                 if len(items) >= total_len:
 184                     return items
 185                 else:
 186                     offset = 0
 187                     stop = total_len - len(items)
 188                     continue
 189
 190
 191 def truncate_html_words(s, num, end_text='...'):
 192     """Truncates HTML to a certain number of words (not counting tags and
 193     comments). Closes opened tags if they were correctly closed in the given
 194     html. Takes an optional argument of what should be used to notify that the
 195     string has been truncated, defaulting to ellipsis (...).
 196
 197     Newlines in the HTML are preserved.
 198
 199     This is just a version of django.utils.text.truncate_html_words with no space before the end_text.
 200     """
 201     s = force_unicode(s)
 202     length = int(num)
 203     if length <= 0:
 204         return u''
 205     html4_singlets = ('br', 'col', 'link', 'base', 'img', 'param', 'area', 'hr', 'input')
 206     # Set up regular expressions
 207     re_words = re.compile(r'&.*?;|<.*?>|(\w[\w-]*)', re.U)
 208     re_tag = re.compile(r'<(/)?([^ ]+?)(?: (/)| .*?)?>')
 209     # Count non-HTML words and keep note of open tags
 210     pos = 0
 211     end_text_pos = 0
 212     words = 0
 213     open_tags = []
 214     while words <= length:
 215         m = re_words.search(s, pos)
 216         if not m:
 217             # Checked through whole string
 218             break
 219         pos = m.end(0)
 220         if m.group(1):
 221             # It's an actual non-HTML word
 222             words += 1
 223             if words == length:
 224                 end_text_pos = pos
 225             continue
 226         # Check for tag
 227         tag = re_tag.match(m.group(0))
 228         if not tag or end_text_pos:
 229             # Don't worry about non tags or tags after our truncate point
 230             continue
 231         closing_tag, tagname, self_closing = tag.groups()
 232         tagname = tagname.lower()  # Element names are always case-insensitive
 233         if self_closing or tagname in html4_singlets:
 234             pass
 235         elif closing_tag:
 236             # Check for match in open tags list
 237             try:
 238                 i = open_tags.index(tagname)
 239             except ValueError:
 240                 pass
 241             else:
 242                 # SGML: An end tag closes, back to the matching start tag, all unclosed intervening start tags with omitted end tags
 243                 open_tags = open_tags[i+1:]
 244         else:
 245             # Add it to the start of the open tags list
 246             open_tags.insert(0, tagname)
 247     if words <= length:
 248         # Don't try to close tags if we don't need to truncate
 249         return s
 250     out = s[:end_text_pos]
 251     if end_text:
 252         out += end_text
 253     # Close any tags still open
 254     for tag in open_tags:
 255         out += '</%s>' % tag
 256     # Return string
 257     return out
 258
 259
 260 def customizations_hash(customizations):
 261     customizations.sort()
 262     return hash(tuple(customizations))
 263
 264
 265 def get_customized_pdf_path(book, customizations):
 266     """
 267     Returns a MEDIA_ROOT relative path for a customized pdf. The name will contain a hash of customization options.
 268     """
 269     h = customizations_hash(customizations)
 270     return 'book/%s/%s-custom-%s.pdf' % (book.slug, book.slug, h)
 271
 272
 273 def clear_custom_pdf(book):
 274     """
 275     Returns a list of paths to generated customized pdf of a book
 276     """
 277     from waiter.utils import clear_cache
 278     clear_cache('book/%s' % book.slug)