src/catalogue/utils.py

   1 # -*- coding: utf-8 -*-
   2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
   3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   4 #
   5 from collections import defaultdict
   6 import hashlib
   7 import random
   8 import re
   9 import time
  10 from base64 import urlsafe_b64encode
  11
  12 from django.http import HttpResponse
  13 from django.core.files.uploadedfile import UploadedFile
  14 from django.core.files.storage import DefaultStorage
  15 from django.utils.encoding import force_unicode
  16 from django.conf import settings
  17 from os import mkdir, path, unlink
  18 from errno import EEXIST, ENOENT
  19 from fcntl import flock, LOCK_EX
  20 from zipfile import ZipFile
  21
  22 from reporting.utils import read_chunks
  23
  24 # Use the system (hardware-based) random number generator if it exists.
  25 if hasattr(random, 'SystemRandom'):
  26     randrange = random.SystemRandom().randrange
  27 else:
  28     randrange = random.randrange
  29 MAX_SESSION_KEY = 18446744073709551616L     # 2 << 63
  30
  31
  32 def get_random_hash(seed):
  33     sha_digest = hashlib.sha1('%s%s%s%s' %
  34         (randrange(0, MAX_SESSION_KEY), time.time(), unicode(seed).encode('utf-8', 'replace'),
  35         settings.SECRET_KEY)).digest()
  36     return urlsafe_b64encode(sha_digest).replace('=', '').replace('_', '-').lower()
  37
  38
  39 def split_tags(*tag_lists):
  40     if len(tag_lists) == 1:
  41         result = defaultdict(list)
  42         for tag in tag_lists[0]:
  43             result[tag.category].append(tag)
  44     else:
  45         result = defaultdict(dict)
  46         for tag_list in tag_lists:
  47             for tag in tag_list:
  48                 try:
  49                     result[tag.category][tag.pk].count += tag.count
  50                 except KeyError:
  51                     result[tag.category][tag.pk] = tag
  52         for k, v in result.items():
  53             result[k] = sorted(v.values(), key=lambda tag: tag.sort_key)
  54     return result
  55
  56
  57 class ExistingFile(UploadedFile):
  58
  59     def __init__(self, path, *args, **kwargs):
  60         self.path = path
  61         super(ExistingFile, self).__init__(*args, **kwargs)
  62
  63     def temporary_file_path(self):
  64         return self.path
  65
  66     def close(self):
  67         pass
  68
  69
  70 class LockFile(object):
  71     """
  72     A file lock monitor class; createas an ${objname}.lock
  73     file in directory dir, and locks it exclusively.
  74     To be used in 'with' construct.
  75     """
  76     def __init__(self, dir, objname):
  77         self.lockname = path.join(dir, objname + ".lock")
  78
  79     def __enter__(self):
  80         self.lock = open(self.lockname, 'w')
  81         flock(self.lock, LOCK_EX)
  82
  83     def __exit__(self, *err):
  84         try:
  85             unlink(self.lockname)
  86         except OSError as oe:
  87             if oe.errno != EEXIST:
  88                 raise oe
  89         self.lock.close()
  90
  91
  92 #@task
  93 def create_zip(paths, zip_slug):
  94     """
  95     Creates a zip in MEDIA_ROOT/zip directory containing files from path.
  96     Resulting archive filename is ${zip_slug}.zip
  97     Returns it's path relative to MEDIA_ROOT (no initial slash)
  98     """
  99     # directory to store zip files
 100     zip_path = path.join(settings.MEDIA_ROOT, 'zip')
 101
 102     try:
 103         mkdir(zip_path)
 104     except OSError as oe:
 105         if oe.errno != EEXIST:
 106             raise oe
 107     zip_filename = zip_slug + ".zip"
 108
 109     with LockFile(zip_path, zip_slug):
 110         if not path.exists(path.join(zip_path, zip_filename)):
 111             zipf = ZipFile(path.join(zip_path, zip_filename), 'w')
 112             try:
 113                 for arcname, p in paths:
 114                     if arcname is None:
 115                         arcname = path.basename(p)
 116                     zipf.write(p, arcname)
 117             finally:
 118                 zipf.close()
 119
 120         return 'zip/' + zip_filename
 121
 122
 123 def remove_zip(zip_slug):
 124     """
 125     removes the ${zip_slug}.zip file from zip store.
 126     """
 127     zip_file = path.join(settings.MEDIA_ROOT, 'zip', zip_slug + '.zip')
 128     try:
 129         unlink(zip_file)
 130     except OSError as oe:
 131         if oe.errno != ENOENT:
 132             raise oe
 133
 134
 135 class AttachmentHttpResponse(HttpResponse):
 136     """Response serving a file to be downloaded.
 137     """
 138     def __init__(self, file_path, file_name, mimetype):
 139         super(AttachmentHttpResponse, self).__init__(mimetype=mimetype)
 140         self['Content-Disposition'] = 'attachment; filename=%s' % file_name
 141         self.file_path = file_path
 142         self.file_name = file_name
 143
 144         with open(DefaultStorage().path(self.file_path)) as f:
 145             for chunk in read_chunks(f):
 146                 self.write(chunk)
 147
 148 class MultiQuerySet(object):
 149     def __init__(self, *args, **kwargs):
 150         self.querysets = args
 151         self._count = None
 152
 153     def count(self):
 154         if not self._count:
 155             self._count = sum(len(qs) for qs in self.querysets)
 156         return self._count
 157
 158     def __len__(self):
 159         return self.count()
 160
 161     def __getitem__(self, item):
 162         try:
 163             (offset, stop, step) = item.indices(self.count())
 164         except AttributeError:
 165             # it's not a slice - make it one
 166             return self[item : item + 1][0]
 167         items = []
 168         total_len = stop - offset
 169         for qs in self.querysets:
 170             if len(qs) < offset:
 171                 offset -= len(qs)
 172             else:
 173                 items += list(qs[offset:stop])
 174                 if len(items) >= total_len:
 175                     return items
 176                 else:
 177                     offset = 0
 178                     stop = total_len - len(items)
 179                     continue
 180
 181 class SortedMultiQuerySet(MultiQuerySet):
 182     def __init__(self, *args, **kwargs):
 183         self.order_by = kwargs.pop('order_by', None)
 184         self.sortfn = kwargs.pop('sortfn', None)
 185         if self.order_by is not None:
 186             self.sortfn = lambda a, b: cmp((getattr(a, f) for f in self.order_by),
 187                                            (getattr(b, f) for f in self.order_by))
 188         super(SortedMultiQuerySet, self).__init__(*args, **kwargs)
 189
 190     def __getitem__(self, item):
 191         sort_heads = [0] * len(self.querysets)
 192         try:
 193             (offset, stop, step) = item.indices(self.count())
 194         except AttributeError:
 195             # it's not a slice - make it one
 196             return self[item : item + 1][0]
 197         items = []
 198         total_len = stop - offset
 199         skipped = 0
 200         i_s = range(len(sort_heads))
 201
 202         while len(items) < total_len:
 203             candidate = None
 204             candidate_i = None
 205             for i in i_s:
 206                 def get_next():
 207                     return self.querysets[i][sort_heads[i]]
 208                 try:
 209                     if candidate is None:
 210                         candidate = get_next()
 211                         candidate_i = i
 212                     else:
 213                         competitor = get_next()
 214                         if self.sortfn(candidate, competitor) > 0:
 215                             candidate = competitor
 216                             candidate_i = i
 217                 except IndexError:
 218                     continue # continue next sort_head
 219             # we have no more elements:
 220             if candidate is None:
 221                 break
 222             sort_heads[candidate_i] += 1
 223             if skipped < offset:
 224                 skipped += 1
 225                 continue # continue next item
 226             items.append(candidate)
 227
 228         return items
 229
 230
 231 def truncate_html_words(s, num, end_text='...'):
 232     """Truncates HTML to a certain number of words (not counting tags and
 233     comments). Closes opened tags if they were correctly closed in the given
 234     html. Takes an optional argument of what should be used to notify that the
 235     string has been truncated, defaulting to ellipsis (...).
 236
 237     Newlines in the HTML are preserved.
 238
 239     This is just a version of django.utils.text.truncate_html_words with no space before the end_text.
 240     """
 241     s = force_unicode(s)
 242     length = int(num)
 243     if length <= 0:
 244         return u''
 245     html4_singlets = ('br', 'col', 'link', 'base', 'img', 'param', 'area', 'hr', 'input')
 246     # Set up regular expressions
 247     re_words = re.compile(r'&.*?;|<.*?>|(\w[\w-]*)', re.U)
 248     re_tag = re.compile(r'<(/)?([^ ]+?)(?: (/)| .*?)?>')
 249     # Count non-HTML words and keep note of open tags
 250     pos = 0
 251     end_text_pos = 0
 252     words = 0
 253     open_tags = []
 254     while words <= length:
 255         m = re_words.search(s, pos)
 256         if not m:
 257             # Checked through whole string
 258             break
 259         pos = m.end(0)
 260         if m.group(1):
 261             # It's an actual non-HTML word
 262             words += 1
 263             if words == length:
 264                 end_text_pos = pos
 265             continue
 266         # Check for tag
 267         tag = re_tag.match(m.group(0))
 268         if not tag or end_text_pos:
 269             # Don't worry about non tags or tags after our truncate point
 270             continue
 271         closing_tag, tagname, self_closing = tag.groups()
 272         tagname = tagname.lower()  # Element names are always case-insensitive
 273         if self_closing or tagname in html4_singlets:
 274             pass
 275         elif closing_tag:
 276             # Check for match in open tags list
 277             try:
 278                 i = open_tags.index(tagname)
 279             except ValueError:
 280                 pass
 281             else:
 282                 # SGML: An end tag closes, back to the matching start tag, all unclosed intervening start tags with omitted end tags
 283                 open_tags = open_tags[i+1:]
 284         else:
 285             # Add it to the start of the open tags list
 286             open_tags.insert(0, tagname)
 287     if words <= length:
 288         # Don't try to close tags if we don't need to truncate
 289         return s
 290     out = s[:end_text_pos]
 291     if end_text:
 292         out += end_text
 293     # Close any tags still open
 294     for tag in open_tags:
 295         out += '</%s>' % tag
 296     # Return string
 297     return out
 298
 299
 300 def customizations_hash(customizations):
 301     customizations.sort()
 302     return hash(tuple(customizations))
 303
 304
 305 def get_customized_pdf_path(book, customizations):
 306     """
 307     Returns a MEDIA_ROOT relative path for a customized pdf. The name will contain a hash of customization options.
 308     """
 309     h = customizations_hash(customizations)
 310     return 'book/%s/%s-custom-%s.pdf' % (book.slug, book.slug, h)
 311
 312
 313 def clear_custom_pdf(book):
 314     """
 315     Returns a list of paths to generated customized pdf of a book
 316     """
 317     from waiter.utils import clear_cache
 318     clear_cache('book/%s' % book.slug)
 319
 320
 321 class AppSettings(object):
 322     """Allows specyfying custom settings for an app, with default values.
 323
 324     Just subclass, set some properties and instantiate with a prefix.
 325     Getting a SETTING from an instance will check for prefix_SETTING
 326     in project settings if set, else take the default. The value will be
 327     then filtered through _more_SETTING method, if there is one.
 328
 329     """
 330     def __init__(self, prefix):
 331         self._prefix = prefix
 332
 333     def __getattribute__(self, name):
 334         if name.startswith('_'):
 335             return object.__getattribute__(self, name)
 336         value = getattr(settings,
 337                          "%s_%s" % (self._prefix, name),
 338                          object.__getattribute__(self, name))
 339         more = "_more_%s" % name
 340         if hasattr(self, more):
 341             value = getattr(self, more)(value)
 342         return value
 343
 344
 345 def trim_query_log(trim_to=25):
 346     """
 347 connection.queries includes all SQL statements -- INSERTs, UPDATES, SELECTs, etc. Each time your app hits the database, the query will be recorded.
 348 This can sometimes occupy lots of memory, so trim it here a bit.
 349     """
 350     if settings.DEBUG:
 351         from django.db import connection
 352         connection.queries = trim_to > 0 \
 353             and connection.queries[-trim_to:] \
 354             or []
 355
 356
 357 def delete_from_cache_by_language(cache, key_template):
 358     cache.delete_many([key_template % lc for lc, ln in settings.LANGUAGES])