src/catalogue/utils.py

   1 # -*- coding: utf-8 -*-
   2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
   3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   4 #
   5 import hashlib
   6 import os.path
   7 import random
   8 import re
   9 import time
  10 from base64 import urlsafe_b64encode
  11 from collections import defaultdict
  12 from errno import EEXIST, ENOENT
  13 from fcntl import flock, LOCK_EX
  14 from os import mkdir, path, unlink
  15 from zipfile import ZipFile
  16
  17 from django.conf import settings
  18 from django.core.files.storage import DefaultStorage
  19 from django.core.files.uploadedfile import UploadedFile
  20 from django.http import HttpResponse
  21 from django.utils.encoding import force_unicode
  22
  23 from paypal.rest import user_is_subscribed
  24 from reporting.utils import read_chunks
  25
  26 # Use the system (hardware-based) random number generator if it exists.
  27 if hasattr(random, 'SystemRandom'):
  28     randrange = random.SystemRandom().randrange
  29 else:
  30     randrange = random.randrange
  31 MAX_SESSION_KEY = 18446744073709551616L     # 2 << 63
  32
  33
  34 def get_random_hash(seed):
  35     sha_digest = hashlib.sha1('%s%s%s%s' % (
  36         randrange(0, MAX_SESSION_KEY), time.time(), unicode(seed).encode('utf-8', 'replace'), settings.SECRET_KEY)
  37     ).digest()
  38     return urlsafe_b64encode(sha_digest).replace('=', '').replace('_', '-').lower()
  39
  40
  41 def split_tags(*tag_lists):
  42     if len(tag_lists) == 1:
  43         result = defaultdict(list)
  44         for tag in tag_lists[0]:
  45             result[tag.category].append(tag)
  46     else:
  47         result = defaultdict(dict)
  48         for tag_list in tag_lists:
  49             for tag in tag_list:
  50                 try:
  51                     result[tag.category][tag.pk].count += tag.count
  52                 except KeyError:
  53                     result[tag.category][tag.pk] = tag
  54         for k, v in result.items():
  55             result[k] = sorted(v.values(), key=lambda tag: tag.sort_key)
  56     return result
  57
  58
  59 class ExistingFile(UploadedFile):
  60
  61     def __init__(self, path, *args, **kwargs):
  62         self.path = path
  63         super(ExistingFile, self).__init__(*args, **kwargs)
  64
  65     def temporary_file_path(self):
  66         return self.path
  67
  68     def close(self):
  69         pass
  70
  71
  72 class LockFile(object):
  73     """
  74     A file lock monitor class; createas an ${objname}.lock
  75     file in directory dir, and locks it exclusively.
  76     To be used in 'with' construct.
  77     """
  78     def __init__(self, dir, objname):
  79         self.lockname = path.join(dir, objname + ".lock")
  80
  81     def __enter__(self):
  82         self.lock = open(self.lockname, 'w')
  83         flock(self.lock, LOCK_EX)
  84
  85     def __exit__(self, *err):
  86         try:
  87             unlink(self.lockname)
  88         except OSError as oe:
  89             if oe.errno != ENOENT:
  90                 raise oe
  91         self.lock.close()
  92
  93
  94 # @task
  95 def create_zip(paths, zip_slug):
  96     """
  97     Creates a zip in MEDIA_ROOT/zip directory containing files from path.
  98     Resulting archive filename is ${zip_slug}.zip
  99     Returns it's path relative to MEDIA_ROOT (no initial slash)
 100     """
 101     # directory to store zip files
 102     zip_path = path.join(settings.MEDIA_ROOT, 'zip')
 103
 104     try:
 105         mkdir(zip_path)
 106     except OSError as oe:
 107         if oe.errno != EEXIST:
 108             raise oe
 109     zip_filename = zip_slug + ".zip"
 110
 111     with LockFile(zip_path, zip_slug):
 112         if not path.exists(path.join(zip_path, zip_filename)):
 113             zipf = ZipFile(path.join(zip_path, zip_filename), 'w')
 114             try:
 115                 for arcname, p in paths:
 116                     if arcname is None:
 117                         arcname = path.basename(p)
 118                     zipf.write(p, arcname)
 119             finally:
 120                 zipf.close()
 121
 122         return 'zip/' + zip_filename
 123
 124
 125 def remove_zip(zip_slug):
 126     """
 127     removes the ${zip_slug}.zip file from zip store.
 128     """
 129     zip_file = path.join(settings.MEDIA_ROOT, 'zip', zip_slug + '.zip')
 130     try:
 131         unlink(zip_file)
 132     except OSError as oe:
 133         if oe.errno != ENOENT:
 134             raise oe
 135
 136
 137 class AttachmentHttpResponse(HttpResponse):
 138     """Response serving a file to be downloaded.
 139     """
 140     def __init__(self, file_path, file_name, mimetype):
 141         super(AttachmentHttpResponse, self).__init__(mimetype=mimetype)
 142         self['Content-Disposition'] = 'attachment; filename=%s' % file_name
 143         self.file_path = file_path
 144         self.file_name = file_name
 145
 146         with open(DefaultStorage().path(self.file_path)) as f:
 147             for chunk in read_chunks(f):
 148                 self.write(chunk)
 149
 150
 151 class MultiQuerySet(object):
 152     def __init__(self, *args, **kwargs):
 153         self.querysets = args
 154         self._count = None
 155
 156     def count(self):
 157         if not self._count:
 158             self._count = sum(len(qs) for qs in self.querysets)
 159         return self._count
 160
 161     def __len__(self):
 162         return self.count()
 163
 164     def __getitem__(self, item):
 165         try:
 166             (offset, stop, step) = item.indices(self.count())
 167         except AttributeError:
 168             # it's not a slice - make it one
 169             return self[item:item + 1][0]
 170         items = []
 171         total_len = stop - offset
 172         for qs in self.querysets:
 173             if len(qs) < offset:
 174                 offset -= len(qs)
 175             else:
 176                 items += list(qs[offset:stop])
 177                 if len(items) >= total_len:
 178                     return items
 179                 else:
 180                     offset = 0
 181                     stop = total_len - len(items)
 182                     continue
 183
 184
 185 class SortedMultiQuerySet(MultiQuerySet):
 186     def __init__(self, *args, **kwargs):
 187         self.order_by = kwargs.pop('order_by', None)
 188         self.sortfn = kwargs.pop('sortfn', None)
 189         if self.order_by is not None:
 190             self.sortfn = lambda a, b: cmp((getattr(a, f) for f in self.order_by),
 191                                            (getattr(b, f) for f in self.order_by))
 192         super(SortedMultiQuerySet, self).__init__(*args, **kwargs)
 193
 194     def __getitem__(self, item):
 195         sort_heads = [0] * len(self.querysets)
 196         try:
 197             (offset, stop, step) = item.indices(self.count())
 198         except AttributeError:
 199             # it's not a slice - make it one
 200             return self[item:item + 1][0]
 201         items = []
 202         total_len = stop - offset
 203         skipped = 0
 204         i_s = range(len(sort_heads))
 205
 206         while len(items) < total_len:
 207             candidate = None
 208             candidate_i = None
 209             for i in i_s:
 210                 def get_next():
 211                     return self.querysets[i][sort_heads[i]]
 212                 try:
 213                     if candidate is None:
 214                         candidate = get_next()
 215                         candidate_i = i
 216                     else:
 217                         competitor = get_next()
 218                         if self.sortfn(candidate, competitor) > 0:
 219                             candidate = competitor
 220                             candidate_i = i
 221                 except IndexError:
 222                     continue  # continue next sort_head
 223             # we have no more elements:
 224             if candidate is None:
 225                 break
 226             sort_heads[candidate_i] += 1
 227             if skipped < offset:
 228                 skipped += 1
 229                 continue  # continue next item
 230             items.append(candidate)
 231
 232         return items
 233
 234
 235 def truncate_html_words(s, num, end_text='...'):
 236     """Truncates HTML to a certain number of words (not counting tags and
 237     comments). Closes opened tags if they were correctly closed in the given
 238     html. Takes an optional argument of what should be used to notify that the
 239     string has been truncated, defaulting to ellipsis (...).
 240
 241     Newlines in the HTML are preserved.
 242
 243     This is just a version of django.utils.text.truncate_html_words with no space before the end_text.
 244     """
 245     s = force_unicode(s)
 246     length = int(num)
 247     if length <= 0:
 248         return u''
 249     html4_singlets = ('br', 'col', 'link', 'base', 'img', 'param', 'area', 'hr', 'input')
 250     # Set up regular expressions
 251     re_words = re.compile(r'&.*?;|<.*?>|(\w[\w-]*)', re.U)
 252     re_tag = re.compile(r'<(/)?([^ ]+?)(?: (/)| .*?)?>')
 253     # Count non-HTML words and keep note of open tags
 254     pos = 0
 255     end_text_pos = 0
 256     words = 0
 257     open_tags = []
 258     while words <= length:
 259         m = re_words.search(s, pos)
 260         if not m:
 261             # Checked through whole string
 262             break
 263         pos = m.end(0)
 264         if m.group(1):
 265             # It's an actual non-HTML word
 266             words += 1
 267             if words == length:
 268                 end_text_pos = pos
 269             continue
 270         # Check for tag
 271         tag = re_tag.match(m.group(0))
 272         if not tag or end_text_pos:
 273             # Don't worry about non tags or tags after our truncate point
 274             continue
 275         closing_tag, tagname, self_closing = tag.groups()
 276         tagname = tagname.lower()  # Element names are always case-insensitive
 277         if self_closing or tagname in html4_singlets:
 278             pass
 279         elif closing_tag:
 280             # Check for match in open tags list
 281             try:
 282                 i = open_tags.index(tagname)
 283             except ValueError:
 284                 pass
 285             else:
 286                 # SGML: An end tag closes, back to the matching start tag,
 287                 # all unclosed intervening start tags with omitted end tags
 288                 open_tags = open_tags[i+1:]
 289         else:
 290             # Add it to the start of the open tags list
 291             open_tags.insert(0, tagname)
 292     if words <= length:
 293         # Don't try to close tags if we don't need to truncate
 294         return s
 295     out = s[:end_text_pos]
 296     if end_text:
 297         out += end_text
 298     # Close any tags still open
 299     for tag in open_tags:
 300         out += '</%s>' % tag
 301     # Return string
 302     return out
 303
 304
 305 def customizations_hash(customizations):
 306     customizations.sort()
 307     return hash(tuple(customizations))
 308
 309
 310 def get_customized_pdf_path(book, customizations):
 311     """
 312     Returns a MEDIA_ROOT relative path for a customized pdf. The name will contain a hash of customization options.
 313     """
 314     h = customizations_hash(customizations)
 315     return 'book/%s/%s-custom-%s.pdf' % (book.slug, book.slug, h)
 316
 317
 318 def clear_custom_pdf(book):
 319     """
 320     Returns a list of paths to generated customized pdf of a book
 321     """
 322     from waiter.utils import clear_cache
 323     clear_cache('book/%s' % book.slug)
 324
 325
 326 class AppSettings(object):
 327     """Allows specyfying custom settings for an app, with default values.
 328
 329     Just subclass, set some properties and instantiate with a prefix.
 330     Getting a SETTING from an instance will check for prefix_SETTING
 331     in project settings if set, else take the default. The value will be
 332     then filtered through _more_SETTING method, if there is one.
 333
 334     """
 335     def __init__(self, prefix):
 336         self._prefix = prefix
 337
 338     def __getattribute__(self, name):
 339         if name.startswith('_'):
 340             return object.__getattribute__(self, name)
 341         value = getattr(settings, "%s_%s" % (self._prefix, name), object.__getattribute__(self, name))
 342         more = "_more_%s" % name
 343         if hasattr(self, more):
 344             value = getattr(self, more)(value)
 345         return value
 346
 347
 348 def delete_from_cache_by_language(cache, key_template):
 349     cache.delete_many([key_template % lc for lc, ln in settings.LANGUAGES])
 350
 351
 352 def gallery_path(slug):
 353     return os.path.join(settings.MEDIA_ROOT, settings.IMAGE_DIR, slug)
 354
 355
 356 def gallery_url(slug):
 357     return '%s%s%s/' % (settings.MEDIA_URL, settings.IMAGE_DIR, slug)
 358
 359
 360 def is_subscribed(user):
 361     return user_is_subscribed(user)