apps/catalogue/utils.py

   1 # -*- coding: utf-8 -*-
   2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
   3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   4 #
   5 from __future__ import with_statement
   6
   7 import hashlib
   8 import random
   9 import re
  10 import time
  11 from base64 import urlsafe_b64encode
  12
  13 from django.http import HttpResponse
  14 from django.core.files.uploadedfile import UploadedFile
  15 from django.core.files.storage import DefaultStorage
  16 from django.utils.encoding import force_unicode
  17 from django.utils.translation import get_language
  18 from django.conf import settings
  19 from os import mkdir, path, unlink
  20 from errno import EEXIST, ENOENT
  21 from fcntl import flock, LOCK_EX
  22 from zipfile import ZipFile
  23
  24 from reporting.utils import read_chunks
  25
  26 # Use the system (hardware-based) random number generator if it exists.
  27 if hasattr(random, 'SystemRandom'):
  28     randrange = random.SystemRandom().randrange
  29 else:
  30     randrange = random.randrange
  31 MAX_SESSION_KEY = 18446744073709551616L     # 2 << 63
  32
  33
  34 def get_random_hash(seed):
  35     sha_digest = hashlib.sha1('%s%s%s%s' %
  36         (randrange(0, MAX_SESSION_KEY), time.time(), unicode(seed).encode('utf-8', 'replace'),
  37         settings.SECRET_KEY)).digest()
  38     return urlsafe_b64encode(sha_digest).replace('=', '').replace('_', '-').lower()
  39
  40
  41 def split_tags(tags, initial=None):
  42     if initial is None:
  43         result = {}
  44     else:
  45         result = initial
  46
  47     for tag in tags:
  48         result.setdefault(tag.category, []).append(tag)
  49     return result
  50
  51
  52 def get_dynamic_path(media, filename, ext=None, maxlen=100):
  53     from fnpdjango.utils.text.slughifi import slughifi
  54
  55     # how to put related book's slug here?
  56     if not ext:
  57         # BookMedia case
  58         ext = media.formats[media.type].ext
  59     if media is None or not media.name:
  60         name = slughifi(filename.split(".")[0])
  61     else:
  62         name = slughifi(media.name)
  63     return 'book/%s/%s.%s' % (ext, name[:maxlen-len('book/%s/.%s' % (ext, ext))-4], ext)
  64
  65
  66 # TODO: why is this hard-coded ?
  67 def book_upload_path(ext=None, maxlen=100):
  68     return lambda *args: get_dynamic_path(*args, ext=ext, maxlen=maxlen)
  69
  70
  71 class ExistingFile(UploadedFile):
  72
  73     def __init__(self, path, *args, **kwargs):
  74         self.path = path
  75         super(ExistingFile, self).__init__(*args, **kwargs)
  76
  77     def temporary_file_path(self):
  78         return self.path
  79
  80     def close(self):
  81         pass
  82
  83
  84 class LockFile(object):
  85     """
  86     A file lock monitor class; createas an ${objname}.lock
  87     file in directory dir, and locks it exclusively.
  88     To be used in 'with' construct.
  89     """
  90     def __init__(self, dir, objname):
  91         self.lockname = path.join(dir, objname + ".lock")
  92
  93     def __enter__(self):
  94         self.lock = open(self.lockname, 'w')
  95         flock(self.lock, LOCK_EX)
  96
  97     def __exit__(self, *err):
  98         try:
  99             unlink(self.lockname)
 100         except OSError as oe:
 101             if oe.errno != EEXIST:
 102                 raise oe
 103         self.lock.close()
 104
 105
 106 #@task
 107 def create_zip(paths, zip_slug):
 108     """
 109     Creates a zip in MEDIA_ROOT/zip directory containing files from path.
 110     Resulting archive filename is ${zip_slug}.zip
 111     Returns it's path relative to MEDIA_ROOT (no initial slash)
 112     """
 113     # directory to store zip files
 114     zip_path = path.join(settings.MEDIA_ROOT, 'zip')
 115
 116     try:
 117         mkdir(zip_path)
 118     except OSError as oe:
 119         if oe.errno != EEXIST:
 120             raise oe
 121     zip_filename = zip_slug + ".zip"
 122
 123     with LockFile(zip_path, zip_slug):
 124         if not path.exists(path.join(zip_path, zip_filename)):
 125             zipf = ZipFile(path.join(zip_path, zip_filename), 'w')
 126             try:
 127                 for arcname, p in paths:
 128                     if arcname is None:
 129                         arcname = path.basename(p)
 130                     zipf.write(p, arcname)
 131             finally:
 132                 zipf.close()
 133
 134         return 'zip/' + zip_filename
 135
 136
 137 def remove_zip(zip_slug):
 138     """
 139     removes the ${zip_slug}.zip file from zip store.
 140     """
 141     zip_file = path.join(settings.MEDIA_ROOT, 'zip', zip_slug + '.zip')
 142     try:
 143         unlink(zip_file)
 144     except OSError as oe:
 145         if oe.errno != ENOENT:
 146             raise oe
 147
 148
 149 class AttachmentHttpResponse(HttpResponse):
 150     """Response serving a file to be downloaded.
 151     """
 152     def __init__ (self, file_path, file_name, mimetype):
 153         super(AttachmentHttpResponse, self).__init__(mimetype=mimetype)
 154         self['Content-Disposition'] = 'attachment; filename=%s' % file_name
 155         self.file_path = file_path
 156         self.file_name = file_name
 157
 158         with open(DefaultStorage().path(self.file_path)) as f:
 159             for chunk in read_chunks(f):
 160                 self.write(chunk)
 161
 162 class MultiQuerySet(object):
 163     def __init__(self, *args, **kwargs):
 164         self.querysets = args
 165         self._count = None
 166
 167     def count(self):
 168         if not self._count:
 169             self._count = sum(len(qs) for qs in self.querysets)
 170         return self._count
 171
 172     def __len__(self):
 173         return self.count()
 174
 175     def __getitem__(self, item):
 176         try:
 177             indices = (offset, stop, step) = item.indices(self.count())
 178         except AttributeError:
 179             # it's not a slice - make it one
 180             return self[item : item + 1][0]
 181         items = []
 182         total_len = stop - offset
 183         for qs in self.querysets:
 184             if len(qs) < offset:
 185                 offset -= len(qs)
 186             else:
 187                 items += list(qs[offset:stop])
 188                 if len(items) >= total_len:
 189                     return items
 190                 else:
 191                     offset = 0
 192                     stop = total_len - len(items)
 193                     continue
 194
 195 class SortedMultiQuerySet(MultiQuerySet):
 196     def __init__(self, *args, **kwargs):
 197         self.order_by = kwargs.pop('order_by', None)
 198         self.sortfn = kwargs.pop('sortfn', None)
 199         if self.order_by is not None:
 200             self.sortfn = lambda a, b: cmp(getattr(a, self.order_by),
 201                                            getattr(b, self.order_by))
 202         super(SortedMultiQuerySet, self).__init__(*args, **kwargs)
 203
 204     def __getitem__(self, item):
 205         sort_heads = [0] * len(self.querysets)
 206         try:
 207             indices = (offset, stop, step) = item.indices(self.count())
 208         except AttributeError:
 209             # it's not a slice - make it one
 210             return self[item : item + 1][0]
 211         items = []
 212         total_len = stop - offset
 213         skipped = 0
 214         i_s = range(len(sort_heads))
 215
 216         while len(items) < total_len:
 217             candidate = None
 218             candidate_i = None
 219             for i in i_s:
 220                 def get_next():
 221                     return self.querysets[i][sort_heads[i]]
 222                 try:
 223                     if candidate is None:
 224                         candidate = get_next()
 225                         candidate_i = i
 226                     else:
 227                         competitor = get_next()
 228                         if self.sortfn(candidate, competitor) > 0:
 229                             candidate = competitor
 230                             candidate_i = i
 231                 except IndexError:
 232                     continue # continue next sort_head
 233             # we have no more elements:
 234             if candidate is None:
 235                 break
 236             sort_heads[candidate_i] += 1
 237             if skipped < offset:
 238                 skipped += 1
 239                 continue # continue next item
 240             items.append(candidate)
 241
 242         return items
 243
 244
 245 def truncate_html_words(s, num, end_text='...'):
 246     """Truncates HTML to a certain number of words (not counting tags and
 247     comments). Closes opened tags if they were correctly closed in the given
 248     html. Takes an optional argument of what should be used to notify that the
 249     string has been truncated, defaulting to ellipsis (...).
 250
 251     Newlines in the HTML are preserved.
 252
 253     This is just a version of django.utils.text.truncate_html_words with no space before the end_text.
 254     """
 255     s = force_unicode(s)
 256     length = int(num)
 257     if length <= 0:
 258         return u''
 259     html4_singlets = ('br', 'col', 'link', 'base', 'img', 'param', 'area', 'hr', 'input')
 260     # Set up regular expressions
 261     re_words = re.compile(r'&.*?;|<.*?>|(\w[\w-]*)', re.U)
 262     re_tag = re.compile(r'<(/)?([^ ]+?)(?: (/)| .*?)?>')
 263     # Count non-HTML words and keep note of open tags
 264     pos = 0
 265     end_text_pos = 0
 266     words = 0
 267     open_tags = []
 268     while words <= length:
 269         m = re_words.search(s, pos)
 270         if not m:
 271             # Checked through whole string
 272             break
 273         pos = m.end(0)
 274         if m.group(1):
 275             # It's an actual non-HTML word
 276             words += 1
 277             if words == length:
 278                 end_text_pos = pos
 279             continue
 280         # Check for tag
 281         tag = re_tag.match(m.group(0))
 282         if not tag or end_text_pos:
 283             # Don't worry about non tags or tags after our truncate point
 284             continue
 285         closing_tag, tagname, self_closing = tag.groups()
 286         tagname = tagname.lower()  # Element names are always case-insensitive
 287         if self_closing or tagname in html4_singlets:
 288             pass
 289         elif closing_tag:
 290             # Check for match in open tags list
 291             try:
 292                 i = open_tags.index(tagname)
 293             except ValueError:
 294                 pass
 295             else:
 296                 # SGML: An end tag closes, back to the matching start tag, all unclosed intervening start tags with omitted end tags
 297                 open_tags = open_tags[i+1:]
 298         else:
 299             # Add it to the start of the open tags list
 300             open_tags.insert(0, tagname)
 301     if words <= length:
 302         # Don't try to close tags if we don't need to truncate
 303         return s
 304     out = s[:end_text_pos]
 305     if end_text:
 306         out += end_text
 307     # Close any tags still open
 308     for tag in open_tags:
 309         out += '</%s>' % tag
 310     # Return string
 311     return out
 312
 313
 314 def customizations_hash(customizations):
 315     customizations.sort()
 316     return hash(tuple(customizations))
 317
 318
 319 def get_customized_pdf_path(book, customizations):
 320     """
 321     Returns a MEDIA_ROOT relative path for a customized pdf. The name will contain a hash of customization options.
 322     """
 323     h = customizations_hash(customizations)
 324     return 'book/%s/%s-custom-%s.pdf' % (book.slug, book.slug, h)
 325
 326
 327 def clear_custom_pdf(book):
 328     """
 329     Returns a list of paths to generated customized pdf of a book
 330     """
 331     from waiter.utils import clear_cache
 332     clear_cache('book/%s' % book.slug)
 333
 334
 335 class AppSettings(object):
 336     """Allows specyfying custom settings for an app, with default values.
 337
 338     Just subclass, set some properties and instantiate with a prefix.
 339     Getting a SETTING from an instance will check for prefix_SETTING
 340     in project settings if set, else take the default. The value will be
 341     then filtered through _more_SETTING method, if there is one.
 342
 343     """
 344     def __init__(self, prefix):
 345         self._prefix = prefix
 346
 347     def __getattribute__(self, name):
 348         if name.startswith('_'):
 349             return object.__getattribute__(self, name)
 350         value = getattr(settings,
 351                          "%s_%s" % (self._prefix, name),
 352                          object.__getattribute__(self, name))
 353         more = "_more_%s" % name
 354         if hasattr(self, more):
 355             value = getattr(self, more)(value)
 356         return value
 357
 358
 359 def trim_query_log(trim_to=25):
 360     """
 361 connection.queries includes all SQL statements -- INSERTs, UPDATES, SELECTs, etc. Each time your app hits the database, the query will be recorded.
 362 This can sometimes occupy lots of memory, so trim it here a bit.
 363     """
 364     if settings.DEBUG:
 365         from django.db import connection
 366         connection.queries = trim_to > 0 \
 367             and connection.queries[-trim_to:] \
 368             or []
 369
 370
 371 def related_tag_name(tag_info, language=None):
 372     return tag_info.get("name_%s" % (language or get_language()),
 373         tag_info.get("name_%s" % settings.LANGUAGE_CODE, ""))
 374
 375
 376 def delete_from_cache_by_language(cache, key_template):
 377     cache.delete_many([key_template % lc for lc, ln in settings.LANGUAGES])