apps/catalogue/utils.py

   1 # -*- coding: utf-8 -*-
   2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
   3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   4 #
   5 from __future__ import with_statement
   6
   7 import hashlib
   8 import random
   9 import re
  10 import time
  11 from base64 import urlsafe_b64encode
  12
  13 from django.http import HttpResponse
  14 from django.core.files.uploadedfile import UploadedFile
  15 from django.core.files.storage import DefaultStorage
  16 from django.utils.encoding import force_unicode
  17 from django.utils.translation import get_language
  18 from django.conf import settings
  19 from os import mkdir, path, unlink
  20 from errno import EEXIST, ENOENT
  21 from fcntl import flock, LOCK_EX
  22 from zipfile import ZipFile
  23
  24 from reporting.utils import read_chunks
  25
  26 # Use the system (hardware-based) random number generator if it exists.
  27 if hasattr(random, 'SystemRandom'):
  28     randrange = random.SystemRandom().randrange
  29 else:
  30     randrange = random.randrange
  31 MAX_SESSION_KEY = 18446744073709551616L     # 2 << 63
  32
  33
  34 def get_random_hash(seed):
  35     sha_digest = hashlib.sha1('%s%s%s%s' %
  36         (randrange(0, MAX_SESSION_KEY), time.time(), unicode(seed).encode('utf-8', 'replace'),
  37         settings.SECRET_KEY)).digest()
  38     return urlsafe_b64encode(sha_digest).replace('=', '').replace('_', '-').lower()
  39
  40
  41 def split_tags(tags, initial=None):
  42     if initial is None:
  43         result = {}
  44     else:
  45         result = initial
  46
  47     for tag in tags:
  48         result.setdefault(tag.category, []).append(tag)
  49     return result
  50
  51
  52 class ExistingFile(UploadedFile):
  53
  54     def __init__(self, path, *args, **kwargs):
  55         self.path = path
  56         super(ExistingFile, self).__init__(*args, **kwargs)
  57
  58     def temporary_file_path(self):
  59         return self.path
  60
  61     def close(self):
  62         pass
  63
  64
  65 class LockFile(object):
  66     """
  67     A file lock monitor class; createas an ${objname}.lock
  68     file in directory dir, and locks it exclusively.
  69     To be used in 'with' construct.
  70     """
  71     def __init__(self, dir, objname):
  72         self.lockname = path.join(dir, objname + ".lock")
  73
  74     def __enter__(self):
  75         self.lock = open(self.lockname, 'w')
  76         flock(self.lock, LOCK_EX)
  77
  78     def __exit__(self, *err):
  79         try:
  80             unlink(self.lockname)
  81         except OSError as oe:
  82             if oe.errno != EEXIST:
  83                 raise oe
  84         self.lock.close()
  85
  86
  87 #@task
  88 def create_zip(paths, zip_slug):
  89     """
  90     Creates a zip in MEDIA_ROOT/zip directory containing files from path.
  91     Resulting archive filename is ${zip_slug}.zip
  92     Returns it's path relative to MEDIA_ROOT (no initial slash)
  93     """
  94     # directory to store zip files
  95     zip_path = path.join(settings.MEDIA_ROOT, 'zip')
  96
  97     try:
  98         mkdir(zip_path)
  99     except OSError as oe:
 100         if oe.errno != EEXIST:
 101             raise oe
 102     zip_filename = zip_slug + ".zip"
 103
 104     with LockFile(zip_path, zip_slug):
 105         if not path.exists(path.join(zip_path, zip_filename)):
 106             zipf = ZipFile(path.join(zip_path, zip_filename), 'w')
 107             try:
 108                 for arcname, p in paths:
 109                     if arcname is None:
 110                         arcname = path.basename(p)
 111                     zipf.write(p, arcname)
 112             finally:
 113                 zipf.close()
 114
 115         return 'zip/' + zip_filename
 116
 117
 118 def remove_zip(zip_slug):
 119     """
 120     removes the ${zip_slug}.zip file from zip store.
 121     """
 122     zip_file = path.join(settings.MEDIA_ROOT, 'zip', zip_slug + '.zip')
 123     try:
 124         unlink(zip_file)
 125     except OSError as oe:
 126         if oe.errno != ENOENT:
 127             raise oe
 128
 129
 130 class AttachmentHttpResponse(HttpResponse):
 131     """Response serving a file to be downloaded.
 132     """
 133     def __init__(self, file_path, file_name, mimetype):
 134         super(AttachmentHttpResponse, self).__init__(mimetype=mimetype)
 135         self['Content-Disposition'] = 'attachment; filename=%s' % file_name
 136         self.file_path = file_path
 137         self.file_name = file_name
 138
 139         with open(DefaultStorage().path(self.file_path)) as f:
 140             for chunk in read_chunks(f):
 141                 self.write(chunk)
 142
 143 class MultiQuerySet(object):
 144     def __init__(self, *args, **kwargs):
 145         self.querysets = args
 146         self._count = None
 147
 148     def count(self):
 149         if not self._count:
 150             self._count = sum(len(qs) for qs in self.querysets)
 151         return self._count
 152
 153     def __len__(self):
 154         return self.count()
 155
 156     def __getitem__(self, item):
 157         try:
 158             indices = (offset, stop, step) = item.indices(self.count())
 159         except AttributeError:
 160             # it's not a slice - make it one
 161             return self[item : item + 1][0]
 162         items = []
 163         total_len = stop - offset
 164         for qs in self.querysets:
 165             if len(qs) < offset:
 166                 offset -= len(qs)
 167             else:
 168                 items += list(qs[offset:stop])
 169                 if len(items) >= total_len:
 170                     return items
 171                 else:
 172                     offset = 0
 173                     stop = total_len - len(items)
 174                     continue
 175
 176 class SortedMultiQuerySet(MultiQuerySet):
 177     def __init__(self, *args, **kwargs):
 178         self.order_by = kwargs.pop('order_by', None)
 179         self.sortfn = kwargs.pop('sortfn', None)
 180         if self.order_by is not None:
 181             self.sortfn = lambda a, b: cmp((getattr(a, f) for f in self.order_by),
 182                                            (getattr(b, f) for f in self.order_by))
 183         super(SortedMultiQuerySet, self).__init__(*args, **kwargs)
 184
 185     def __getitem__(self, item):
 186         sort_heads = [0] * len(self.querysets)
 187         try:
 188             indices = (offset, stop, step) = item.indices(self.count())
 189         except AttributeError:
 190             # it's not a slice - make it one
 191             return self[item : item + 1][0]
 192         items = []
 193         total_len = stop - offset
 194         skipped = 0
 195         i_s = range(len(sort_heads))
 196
 197         while len(items) < total_len:
 198             candidate = None
 199             candidate_i = None
 200             for i in i_s:
 201                 def get_next():
 202                     return self.querysets[i][sort_heads[i]]
 203                 try:
 204                     if candidate is None:
 205                         candidate = get_next()
 206                         candidate_i = i
 207                     else:
 208                         competitor = get_next()
 209                         if self.sortfn(candidate, competitor) > 0:
 210                             candidate = competitor
 211                             candidate_i = i
 212                 except IndexError:
 213                     continue # continue next sort_head
 214             # we have no more elements:
 215             if candidate is None:
 216                 break
 217             sort_heads[candidate_i] += 1
 218             if skipped < offset:
 219                 skipped += 1
 220                 continue # continue next item
 221             items.append(candidate)
 222
 223         return items
 224
 225
 226 def truncate_html_words(s, num, end_text='...'):
 227     """Truncates HTML to a certain number of words (not counting tags and
 228     comments). Closes opened tags if they were correctly closed in the given
 229     html. Takes an optional argument of what should be used to notify that the
 230     string has been truncated, defaulting to ellipsis (...).
 231
 232     Newlines in the HTML are preserved.
 233
 234     This is just a version of django.utils.text.truncate_html_words with no space before the end_text.
 235     """
 236     s = force_unicode(s)
 237     length = int(num)
 238     if length <= 0:
 239         return u''
 240     html4_singlets = ('br', 'col', 'link', 'base', 'img', 'param', 'area', 'hr', 'input')
 241     # Set up regular expressions
 242     re_words = re.compile(r'&.*?;|<.*?>|(\w[\w-]*)', re.U)
 243     re_tag = re.compile(r'<(/)?([^ ]+?)(?: (/)| .*?)?>')
 244     # Count non-HTML words and keep note of open tags
 245     pos = 0
 246     end_text_pos = 0
 247     words = 0
 248     open_tags = []
 249     while words <= length:
 250         m = re_words.search(s, pos)
 251         if not m:
 252             # Checked through whole string
 253             break
 254         pos = m.end(0)
 255         if m.group(1):
 256             # It's an actual non-HTML word
 257             words += 1
 258             if words == length:
 259                 end_text_pos = pos
 260             continue
 261         # Check for tag
 262         tag = re_tag.match(m.group(0))
 263         if not tag or end_text_pos:
 264             # Don't worry about non tags or tags after our truncate point
 265             continue
 266         closing_tag, tagname, self_closing = tag.groups()
 267         tagname = tagname.lower()  # Element names are always case-insensitive
 268         if self_closing or tagname in html4_singlets:
 269             pass
 270         elif closing_tag:
 271             # Check for match in open tags list
 272             try:
 273                 i = open_tags.index(tagname)
 274             except ValueError:
 275                 pass
 276             else:
 277                 # SGML: An end tag closes, back to the matching start tag, all unclosed intervening start tags with omitted end tags
 278                 open_tags = open_tags[i+1:]
 279         else:
 280             # Add it to the start of the open tags list
 281             open_tags.insert(0, tagname)
 282     if words <= length:
 283         # Don't try to close tags if we don't need to truncate
 284         return s
 285     out = s[:end_text_pos]
 286     if end_text:
 287         out += end_text
 288     # Close any tags still open
 289     for tag in open_tags:
 290         out += '</%s>' % tag
 291     # Return string
 292     return out
 293
 294
 295 def customizations_hash(customizations):
 296     customizations.sort()
 297     return hash(tuple(customizations))
 298
 299
 300 def get_customized_pdf_path(book, customizations):
 301     """
 302     Returns a MEDIA_ROOT relative path for a customized pdf. The name will contain a hash of customization options.
 303     """
 304     h = customizations_hash(customizations)
 305     return 'book/%s/%s-custom-%s.pdf' % (book.slug, book.slug, h)
 306
 307
 308 def clear_custom_pdf(book):
 309     """
 310     Returns a list of paths to generated customized pdf of a book
 311     """
 312     from waiter.utils import clear_cache
 313     clear_cache('book/%s' % book.slug)
 314
 315
 316 class AppSettings(object):
 317     """Allows specyfying custom settings for an app, with default values.
 318
 319     Just subclass, set some properties and instantiate with a prefix.
 320     Getting a SETTING from an instance will check for prefix_SETTING
 321     in project settings if set, else take the default. The value will be
 322     then filtered through _more_SETTING method, if there is one.
 323
 324     """
 325     def __init__(self, prefix):
 326         self._prefix = prefix
 327
 328     def __getattribute__(self, name):
 329         if name.startswith('_'):
 330             return object.__getattribute__(self, name)
 331         value = getattr(settings,
 332                          "%s_%s" % (self._prefix, name),
 333                          object.__getattribute__(self, name))
 334         more = "_more_%s" % name
 335         if hasattr(self, more):
 336             value = getattr(self, more)(value)
 337         return value
 338
 339
 340 def trim_query_log(trim_to=25):
 341     """
 342 connection.queries includes all SQL statements -- INSERTs, UPDATES, SELECTs, etc. Each time your app hits the database, the query will be recorded.
 343 This can sometimes occupy lots of memory, so trim it here a bit.
 344     """
 345     if settings.DEBUG:
 346         from django.db import connection
 347         connection.queries = trim_to > 0 \
 348             and connection.queries[-trim_to:] \
 349             or []
 350
 351
 352 def delete_from_cache_by_language(cache, key_template):
 353     cache.delete_many([key_template % lc for lc, ln in settings.LANGUAGES])