apps/catalogue/utils.py

   1 # -*- coding: utf-8 -*-
   2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
   3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   4 #
   5 from __future__ import with_statement
   6
   7 import hashlib
   8 import random
   9 import re
  10 import time
  11 from base64 import urlsafe_b64encode
  12
  13 from django.http import HttpResponse
  14 from django.core.files.uploadedfile import UploadedFile
  15 from django.core.files.storage import DefaultStorage
  16 from django.utils.encoding import force_unicode
  17 from django.utils.translation import get_language
  18 from django.conf import settings
  19 from os import mkdir, path, unlink
  20 from errno import EEXIST, ENOENT
  21 from fcntl import flock, LOCK_EX
  22 from zipfile import ZipFile
  23
  24 from reporting.utils import read_chunks
  25
  26 # Use the system (hardware-based) random number generator if it exists.
  27 if hasattr(random, 'SystemRandom'):
  28     randrange = random.SystemRandom().randrange
  29 else:
  30     randrange = random.randrange
  31 MAX_SESSION_KEY = 18446744073709551616L     # 2 << 63
  32
  33
  34 def get_random_hash(seed):
  35     sha_digest = hashlib.sha1('%s%s%s%s' %
  36         (randrange(0, MAX_SESSION_KEY), time.time(), unicode(seed).encode('utf-8', 'replace'),
  37         settings.SECRET_KEY)).digest()
  38     return urlsafe_b64encode(sha_digest).replace('=', '').replace('_', '-').lower()
  39
  40
  41 def split_tags(tags):
  42     result = {}
  43     for tag in tags:
  44         result.setdefault(tag.category, []).append(tag)
  45     return result
  46
  47
  48 def get_dynamic_path(media, filename, ext=None, maxlen=100):
  49     from fnpdjango.utils.text.slughifi import slughifi
  50
  51     # how to put related book's slug here?
  52     if not ext:
  53         # BookMedia case
  54         ext = media.formats[media.type].ext
  55     if media is None or not media.name:
  56         name = slughifi(filename.split(".")[0])
  57     else:
  58         name = slughifi(media.name)
  59     return 'book/%s/%s.%s' % (ext, name[:maxlen-len('book/%s/.%s' % (ext, ext))-4], ext)
  60
  61
  62 # TODO: why is this hard-coded ?
  63 def book_upload_path(ext=None, maxlen=100):
  64     return lambda *args: get_dynamic_path(*args, ext=ext, maxlen=maxlen)
  65
  66
  67 class ExistingFile(UploadedFile):
  68
  69     def __init__(self, path, *args, **kwargs):
  70         self.path = path
  71         super(ExistingFile, self).__init__(*args, **kwargs)
  72
  73     def temporary_file_path(self):
  74         return self.path
  75
  76     def close(self):
  77         pass
  78
  79
  80 class LockFile(object):
  81     """
  82     A file lock monitor class; createas an ${objname}.lock
  83     file in directory dir, and locks it exclusively.
  84     To be used in 'with' construct.
  85     """
  86     def __init__(self, dir, objname):
  87         self.lockname = path.join(dir, objname + ".lock")
  88
  89     def __enter__(self):
  90         self.lock = open(self.lockname, 'w')
  91         flock(self.lock, LOCK_EX)
  92
  93     def __exit__(self, *err):
  94         try:
  95             unlink(self.lockname)
  96         except OSError as oe:
  97             if oe.errno != EEXIST:
  98                 raise oe
  99         self.lock.close()
 100
 101
 102 #@task
 103 def create_zip(paths, zip_slug):
 104     """
 105     Creates a zip in MEDIA_ROOT/zip directory containing files from path.
 106     Resulting archive filename is ${zip_slug}.zip
 107     Returns it's path relative to MEDIA_ROOT (no initial slash)
 108     """
 109     # directory to store zip files
 110     zip_path = path.join(settings.MEDIA_ROOT, 'zip')
 111
 112     try:
 113         mkdir(zip_path)
 114     except OSError as oe:
 115         if oe.errno != EEXIST:
 116             raise oe
 117     zip_filename = zip_slug + ".zip"
 118
 119     with LockFile(zip_path, zip_slug):
 120         if not path.exists(path.join(zip_path, zip_filename)):
 121             zipf = ZipFile(path.join(zip_path, zip_filename), 'w')
 122             try:
 123                 for arcname, p in paths:
 124                     if arcname is None:
 125                         arcname = path.basename(p)
 126                     zipf.write(p, arcname)
 127             finally:
 128                 zipf.close()
 129
 130         return 'zip/' + zip_filename
 131
 132
 133 def remove_zip(zip_slug):
 134     """
 135     removes the ${zip_slug}.zip file from zip store.
 136     """
 137     zip_file = path.join(settings.MEDIA_ROOT, 'zip', zip_slug + '.zip')
 138     try:
 139         unlink(zip_file)
 140     except OSError as oe:
 141         if oe.errno != ENOENT:
 142             raise oe
 143
 144
 145 class AttachmentHttpResponse(HttpResponse):
 146     """Response serving a file to be downloaded.
 147     """
 148     def __init__ (self, file_path, file_name, mimetype):
 149         super(AttachmentHttpResponse, self).__init__(mimetype=mimetype)
 150         self['Content-Disposition'] = 'attachment; filename=%s' % file_name
 151         self.file_path = file_path
 152         self.file_name = file_name
 153
 154         with open(DefaultStorage().path(self.file_path)) as f:
 155             for chunk in read_chunks(f):
 156                 self.write(chunk)
 157
 158 class MultiQuerySet(object):
 159     def __init__(self, *args, **kwargs):
 160         self.querysets = args
 161         self._count = None
 162
 163     def count(self):
 164         if not self._count:
 165             self._count = sum(len(qs) for qs in self.querysets)
 166         return self._count
 167
 168     def __len__(self):
 169         return self.count()
 170
 171     def __getitem__(self, item):
 172         try:
 173             indices = (offset, stop, step) = item.indices(self.count())
 174         except AttributeError:
 175             # it's not a slice - make it one
 176             return self[item : item + 1][0]
 177         items = []
 178         total_len = stop - offset
 179         for qs in self.querysets:
 180             if len(qs) < offset:
 181                 offset -= len(qs)
 182             else:
 183                 items += list(qs[offset:stop])
 184                 if len(items) >= total_len:
 185                     return items
 186                 else:
 187                     offset = 0
 188                     stop = total_len - len(items)
 189                     continue
 190
 191
 192 def truncate_html_words(s, num, end_text='...'):
 193     """Truncates HTML to a certain number of words (not counting tags and
 194     comments). Closes opened tags if they were correctly closed in the given
 195     html. Takes an optional argument of what should be used to notify that the
 196     string has been truncated, defaulting to ellipsis (...).
 197
 198     Newlines in the HTML are preserved.
 199
 200     This is just a version of django.utils.text.truncate_html_words with no space before the end_text.
 201     """
 202     s = force_unicode(s)
 203     length = int(num)
 204     if length <= 0:
 205         return u''
 206     html4_singlets = ('br', 'col', 'link', 'base', 'img', 'param', 'area', 'hr', 'input')
 207     # Set up regular expressions
 208     re_words = re.compile(r'&.*?;|<.*?>|(\w[\w-]*)', re.U)
 209     re_tag = re.compile(r'<(/)?([^ ]+?)(?: (/)| .*?)?>')
 210     # Count non-HTML words and keep note of open tags
 211     pos = 0
 212     end_text_pos = 0
 213     words = 0
 214     open_tags = []
 215     while words <= length:
 216         m = re_words.search(s, pos)
 217         if not m:
 218             # Checked through whole string
 219             break
 220         pos = m.end(0)
 221         if m.group(1):
 222             # It's an actual non-HTML word
 223             words += 1
 224             if words == length:
 225                 end_text_pos = pos
 226             continue
 227         # Check for tag
 228         tag = re_tag.match(m.group(0))
 229         if not tag or end_text_pos:
 230             # Don't worry about non tags or tags after our truncate point
 231             continue
 232         closing_tag, tagname, self_closing = tag.groups()
 233         tagname = tagname.lower()  # Element names are always case-insensitive
 234         if self_closing or tagname in html4_singlets:
 235             pass
 236         elif closing_tag:
 237             # Check for match in open tags list
 238             try:
 239                 i = open_tags.index(tagname)
 240             except ValueError:
 241                 pass
 242             else:
 243                 # SGML: An end tag closes, back to the matching start tag, all unclosed intervening start tags with omitted end tags
 244                 open_tags = open_tags[i+1:]
 245         else:
 246             # Add it to the start of the open tags list
 247             open_tags.insert(0, tagname)
 248     if words <= length:
 249         # Don't try to close tags if we don't need to truncate
 250         return s
 251     out = s[:end_text_pos]
 252     if end_text:
 253         out += end_text
 254     # Close any tags still open
 255     for tag in open_tags:
 256         out += '</%s>' % tag
 257     # Return string
 258     return out
 259
 260
 261 def customizations_hash(customizations):
 262     customizations.sort()
 263     return hash(tuple(customizations))
 264
 265
 266 def get_customized_pdf_path(book, customizations):
 267     """
 268     Returns a MEDIA_ROOT relative path for a customized pdf. The name will contain a hash of customization options.
 269     """
 270     h = customizations_hash(customizations)
 271     return 'book/%s/%s-custom-%s.pdf' % (book.slug, book.slug, h)
 272
 273
 274 def clear_custom_pdf(book):
 275     """
 276     Returns a list of paths to generated customized pdf of a book
 277     """
 278     from waiter.utils import clear_cache
 279     clear_cache('book/%s' % book.slug)
 280
 281
 282 class AppSettings(object):
 283     """Allows specyfying custom settings for an app, with default values.
 284
 285     Just subclass, set some properties and instantiate with a prefix.
 286     Getting a SETTING from an instance will check for prefix_SETTING
 287     in project settings if set, else take the default. The value will be
 288     then filtered through _more_SETTING method, if there is one.
 289
 290     """
 291     def __init__(self, prefix):
 292         self._prefix = prefix
 293
 294     def __getattribute__(self, name):
 295         if name.startswith('_'):
 296             return object.__getattribute__(self, name)
 297         value = getattr(settings,
 298                          "%s_%s" % (self._prefix, name),
 299                          object.__getattribute__(self, name))
 300         more = "_more_%s" % name
 301         if hasattr(self, more):
 302             value = getattr(self, more)(value)
 303         return value
 304
 305
 306 def trim_query_log(trim_to=25):
 307     """
 308 connection.queries includes all SQL statements -- INSERTs, UPDATES, SELECTs, etc. Each time your app hits the database, the query will be recorded.
 309 This can sometimes occupy lots of memory, so trim it here a bit.
 310     """
 311     if settings.DEBUG:
 312         from django.db import connection
 313         connection.queries = trim_to > 0 \
 314             and connection.queries[-trim_to:] \
 315             or []
 316
 317
 318 def related_tag_name(tag_info, language=None):
 319     return tag_info.get("name_%s" % (language or get_language()),
 320         tag_info.get("name_%s" % settings.LANGUAGE_CODE))
 321
 322
 323 def delete_from_cache_by_language(cache, key_template):
 324     cache.delete_many([key_template % lc for lc, ln in settings.LANGUAGES])