apps/catalogue/utils.py

   1 # -*- coding: utf-8 -*-
   2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
   3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   4 #
   5 from __future__ import with_statement
   6
   7 import hashlib
   8 import random
   9 import re
  10 import time
  11 from base64 import urlsafe_b64encode
  12
  13 from django.http import HttpResponse
  14 from django.core.files.uploadedfile import UploadedFile
  15 from django.core.files.storage import DefaultStorage
  16 from django.utils.encoding import force_unicode
  17 from django.conf import settings
  18 from os import mkdir, path, unlink
  19 from errno import EEXIST, ENOENT
  20 from fcntl import flock, LOCK_EX
  21 from zipfile import ZipFile
  22
  23 from reporting.utils import read_chunks
  24
  25 # Use the system (hardware-based) random number generator if it exists.
  26 if hasattr(random, 'SystemRandom'):
  27     randrange = random.SystemRandom().randrange
  28 else:
  29     randrange = random.randrange
  30 MAX_SESSION_KEY = 18446744073709551616L     # 2 << 63
  31
  32
  33 def get_random_hash(seed):
  34     sha_digest = hashlib.sha1('%s%s%s%s' %
  35         (randrange(0, MAX_SESSION_KEY), time.time(), unicode(seed).encode('utf-8', 'replace'),
  36         settings.SECRET_KEY)).digest()
  37     return urlsafe_b64encode(sha_digest).replace('=', '').replace('_', '-').lower()
  38
  39
  40 def split_tags(tags, initial=None):
  41     if initial is None:
  42         result = {}
  43     else:
  44         result = initial
  45
  46     for tag in tags:
  47         result.setdefault(tag.category, []).append(tag)
  48     return result
  49
  50
  51 class ExistingFile(UploadedFile):
  52
  53     def __init__(self, path, *args, **kwargs):
  54         self.path = path
  55         super(ExistingFile, self).__init__(*args, **kwargs)
  56
  57     def temporary_file_path(self):
  58         return self.path
  59
  60     def close(self):
  61         pass
  62
  63
  64 class LockFile(object):
  65     """
  66     A file lock monitor class; createas an ${objname}.lock
  67     file in directory dir, and locks it exclusively.
  68     To be used in 'with' construct.
  69     """
  70     def __init__(self, dir, objname):
  71         self.lockname = path.join(dir, objname + ".lock")
  72
  73     def __enter__(self):
  74         self.lock = open(self.lockname, 'w')
  75         flock(self.lock, LOCK_EX)
  76
  77     def __exit__(self, *err):
  78         try:
  79             unlink(self.lockname)
  80         except OSError as oe:
  81             if oe.errno != EEXIST:
  82                 raise oe
  83         self.lock.close()
  84
  85
  86 #@task
  87 def create_zip(paths, zip_slug):
  88     """
  89     Creates a zip in MEDIA_ROOT/zip directory containing files from path.
  90     Resulting archive filename is ${zip_slug}.zip
  91     Returns it's path relative to MEDIA_ROOT (no initial slash)
  92     """
  93     # directory to store zip files
  94     zip_path = path.join(settings.MEDIA_ROOT, 'zip')
  95
  96     try:
  97         mkdir(zip_path)
  98     except OSError as oe:
  99         if oe.errno != EEXIST:
 100             raise oe
 101     zip_filename = zip_slug + ".zip"
 102
 103     with LockFile(zip_path, zip_slug):
 104         if not path.exists(path.join(zip_path, zip_filename)):
 105             zipf = ZipFile(path.join(zip_path, zip_filename), 'w')
 106             try:
 107                 for arcname, p in paths:
 108                     if arcname is None:
 109                         arcname = path.basename(p)
 110                     zipf.write(p, arcname)
 111             finally:
 112                 zipf.close()
 113
 114         return 'zip/' + zip_filename
 115
 116
 117 def remove_zip(zip_slug):
 118     """
 119     removes the ${zip_slug}.zip file from zip store.
 120     """
 121     zip_file = path.join(settings.MEDIA_ROOT, 'zip', zip_slug + '.zip')
 122     try:
 123         unlink(zip_file)
 124     except OSError as oe:
 125         if oe.errno != ENOENT:
 126             raise oe
 127
 128
 129 class AttachmentHttpResponse(HttpResponse):
 130     """Response serving a file to be downloaded.
 131     """
 132     def __init__(self, file_path, file_name, mimetype):
 133         super(AttachmentHttpResponse, self).__init__(mimetype=mimetype)
 134         self['Content-Disposition'] = 'attachment; filename=%s' % file_name
 135         self.file_path = file_path
 136         self.file_name = file_name
 137
 138         with open(DefaultStorage().path(self.file_path)) as f:
 139             for chunk in read_chunks(f):
 140                 self.write(chunk)
 141
 142 class MultiQuerySet(object):
 143     def __init__(self, *args, **kwargs):
 144         self.querysets = args
 145         self._count = None
 146
 147     def count(self):
 148         if not self._count:
 149             self._count = sum(len(qs) for qs in self.querysets)
 150         return self._count
 151
 152     def __len__(self):
 153         return self.count()
 154
 155     def __getitem__(self, item):
 156         try:
 157             (offset, stop, step) = item.indices(self.count())
 158         except AttributeError:
 159             # it's not a slice - make it one
 160             return self[item : item + 1][0]
 161         items = []
 162         total_len = stop - offset
 163         for qs in self.querysets:
 164             if len(qs) < offset:
 165                 offset -= len(qs)
 166             else:
 167                 items += list(qs[offset:stop])
 168                 if len(items) >= total_len:
 169                     return items
 170                 else:
 171                     offset = 0
 172                     stop = total_len - len(items)
 173                     continue
 174
 175 class SortedMultiQuerySet(MultiQuerySet):
 176     def __init__(self, *args, **kwargs):
 177         self.order_by = kwargs.pop('order_by', None)
 178         self.sortfn = kwargs.pop('sortfn', None)
 179         if self.order_by is not None:
 180             self.sortfn = lambda a, b: cmp((getattr(a, f) for f in self.order_by),
 181                                            (getattr(b, f) for f in self.order_by))
 182         super(SortedMultiQuerySet, self).__init__(*args, **kwargs)
 183
 184     def __getitem__(self, item):
 185         sort_heads = [0] * len(self.querysets)
 186         try:
 187             (offset, stop, step) = item.indices(self.count())
 188         except AttributeError:
 189             # it's not a slice - make it one
 190             return self[item : item + 1][0]
 191         items = []
 192         total_len = stop - offset
 193         skipped = 0
 194         i_s = range(len(sort_heads))
 195
 196         while len(items) < total_len:
 197             candidate = None
 198             candidate_i = None
 199             for i in i_s:
 200                 def get_next():
 201                     return self.querysets[i][sort_heads[i]]
 202                 try:
 203                     if candidate is None:
 204                         candidate = get_next()
 205                         candidate_i = i
 206                     else:
 207                         competitor = get_next()
 208                         if self.sortfn(candidate, competitor) > 0:
 209                             candidate = competitor
 210                             candidate_i = i
 211                 except IndexError:
 212                     continue # continue next sort_head
 213             # we have no more elements:
 214             if candidate is None:
 215                 break
 216             sort_heads[candidate_i] += 1
 217             if skipped < offset:
 218                 skipped += 1
 219                 continue # continue next item
 220             items.append(candidate)
 221
 222         return items
 223
 224
 225 def truncate_html_words(s, num, end_text='...'):
 226     """Truncates HTML to a certain number of words (not counting tags and
 227     comments). Closes opened tags if they were correctly closed in the given
 228     html. Takes an optional argument of what should be used to notify that the
 229     string has been truncated, defaulting to ellipsis (...).
 230
 231     Newlines in the HTML are preserved.
 232
 233     This is just a version of django.utils.text.truncate_html_words with no space before the end_text.
 234     """
 235     s = force_unicode(s)
 236     length = int(num)
 237     if length <= 0:
 238         return u''
 239     html4_singlets = ('br', 'col', 'link', 'base', 'img', 'param', 'area', 'hr', 'input')
 240     # Set up regular expressions
 241     re_words = re.compile(r'&.*?;|<.*?>|(\w[\w-]*)', re.U)
 242     re_tag = re.compile(r'<(/)?([^ ]+?)(?: (/)| .*?)?>')
 243     # Count non-HTML words and keep note of open tags
 244     pos = 0
 245     end_text_pos = 0
 246     words = 0
 247     open_tags = []
 248     while words <= length:
 249         m = re_words.search(s, pos)
 250         if not m:
 251             # Checked through whole string
 252             break
 253         pos = m.end(0)
 254         if m.group(1):
 255             # It's an actual non-HTML word
 256             words += 1
 257             if words == length:
 258                 end_text_pos = pos
 259             continue
 260         # Check for tag
 261         tag = re_tag.match(m.group(0))
 262         if not tag or end_text_pos:
 263             # Don't worry about non tags or tags after our truncate point
 264             continue
 265         closing_tag, tagname, self_closing = tag.groups()
 266         tagname = tagname.lower()  # Element names are always case-insensitive
 267         if self_closing or tagname in html4_singlets:
 268             pass
 269         elif closing_tag:
 270             # Check for match in open tags list
 271             try:
 272                 i = open_tags.index(tagname)
 273             except ValueError:
 274                 pass
 275             else:
 276                 # SGML: An end tag closes, back to the matching start tag, all unclosed intervening start tags with omitted end tags
 277                 open_tags = open_tags[i+1:]
 278         else:
 279             # Add it to the start of the open tags list
 280             open_tags.insert(0, tagname)
 281     if words <= length:
 282         # Don't try to close tags if we don't need to truncate
 283         return s
 284     out = s[:end_text_pos]
 285     if end_text:
 286         out += end_text
 287     # Close any tags still open
 288     for tag in open_tags:
 289         out += '</%s>' % tag
 290     # Return string
 291     return out
 292
 293
 294 def customizations_hash(customizations):
 295     customizations.sort()
 296     return hash(tuple(customizations))
 297
 298
 299 def get_customized_pdf_path(book, customizations):
 300     """
 301     Returns a MEDIA_ROOT relative path for a customized pdf. The name will contain a hash of customization options.
 302     """
 303     h = customizations_hash(customizations)
 304     return 'book/%s/%s-custom-%s.pdf' % (book.slug, book.slug, h)
 305
 306
 307 def clear_custom_pdf(book):
 308     """
 309     Returns a list of paths to generated customized pdf of a book
 310     """
 311     from waiter.utils import clear_cache
 312     clear_cache('book/%s' % book.slug)
 313
 314
 315 class AppSettings(object):
 316     """Allows specyfying custom settings for an app, with default values.
 317
 318     Just subclass, set some properties and instantiate with a prefix.
 319     Getting a SETTING from an instance will check for prefix_SETTING
 320     in project settings if set, else take the default. The value will be
 321     then filtered through _more_SETTING method, if there is one.
 322
 323     """
 324     def __init__(self, prefix):
 325         self._prefix = prefix
 326
 327     def __getattribute__(self, name):
 328         if name.startswith('_'):
 329             return object.__getattribute__(self, name)
 330         value = getattr(settings,
 331                          "%s_%s" % (self._prefix, name),
 332                          object.__getattribute__(self, name))
 333         more = "_more_%s" % name
 334         if hasattr(self, more):
 335             value = getattr(self, more)(value)
 336         return value
 337
 338
 339 def trim_query_log(trim_to=25):
 340     """
 341 connection.queries includes all SQL statements -- INSERTs, UPDATES, SELECTs, etc. Each time your app hits the database, the query will be recorded.
 342 This can sometimes occupy lots of memory, so trim it here a bit.
 343     """
 344     if settings.DEBUG:
 345         from django.db import connection
 346         connection.queries = trim_to > 0 \
 347             and connection.queries[-trim_to:] \
 348             or []
 349
 350
 351 def delete_from_cache_by_language(cache, key_template):
 352     cache.delete_many([key_template % lc for lc, ln in settings.LANGUAGES])