src/suggest/models.py

   1 # -*- coding: utf-8 -*-
   2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
   3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   4 #
   5 import re
   6 from datetime import timedelta
   7
   8 from django.db import models
   9 from django.contrib.auth.models import User
  10 from django.utils.translation import ugettext_lazy as _
  11
  12
  13 class Suggestion(models.Model):
  14     contact = models.CharField(_('contact'), blank=True, max_length=120)
  15     description = models.TextField(_('description'), blank=True)
  16     created_at = models.DateTimeField(_('creation date'), auto_now=True)
  17     ip = models.GenericIPAddressField(_('IP address'))
  18     user = models.ForeignKey(User, blank=True, null=True)
  19
  20     class Meta:
  21         ordering = ('-created_at',)
  22         verbose_name = _('suggestion')
  23         verbose_name_plural = _('suggestions')
  24
  25     def __unicode__(self):
  26         return unicode(self.created_at)
  27
  28
  29 class PublishingSuggestion(models.Model):
  30     contact = models.CharField(_('contact'), blank=True, max_length=120)
  31     books = models.TextField(_('books'), null=True, blank=True)
  32     audiobooks = models.TextField(_('audiobooks'), null=True, blank=True)
  33     created_at = models.DateTimeField(_('creation date'), auto_now_add=True)
  34     ip = models.GenericIPAddressField(_('IP address'))
  35     user = models.ForeignKey(User, blank=True, null=True)
  36
  37     class Meta:
  38         ordering = ('-created_at',)
  39         verbose_name = _('publishing suggestion')
  40         verbose_name_plural = _('publishing suggestions')
  41
  42     def is_spam(self):
  43         suggestion_text = (self.books or self.audiobooks).strip(' \r\n,')
  44         # similar = PublishingSuggestion.objects.filter(
  45         #     books__in=('', suggestion_text), audiobooks__in=('', suggestion_text))
  46         similar = PublishingSuggestion.objects.filter(books=self.books, audiobooks=self.audiobooks).exclude(pk=self.pk)
  47         http = 'http' in suggestion_text
  48         spam = False
  49         if re.search(r'([^\W\d_])\1\1\1', suggestion_text):
  50             # same letter repetition outside URL
  51             spam = True
  52         elif re.search(r'[^\W\d_]\d|\d[^\W\d_]', suggestion_text) and not http:
  53             # string of letters and digits outside URL
  54             spam = True
  55         elif re.search(r'[^\W\d_]{17}', suggestion_text):
  56             # long string of letters (usually gibberish)
  57             spam = True
  58         elif ' ' not in suggestion_text:
  59             # single word - usually spam
  60             spam = True
  61         elif len(suggestion_text) < 11:
  62             # too short
  63             spam = True
  64         elif similar.filter(created_at__range=(self.created_at - timedelta(1), self.created_at)):
  65             # the same suggestion within 24h
  66             spam = True
  67         elif similar.filter(ip=self.ip):
  68             # the same suggestion from the same IP
  69             spam = True
  70         return spam
  71
  72     def __unicode__(self):
  73         return unicode(self.created_at)