src/suggest/models.py

   1 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
   2 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   3 #
   4 import re
   5 from datetime import timedelta
   6
   7 from django.db import models
   8 from django.contrib.auth.models import User
   9
  10
  11 class Suggestion(models.Model):
  12     contact = models.CharField('kontakt', blank=True, max_length=120)
  13     description = models.TextField('opis', blank=True)
  14     created_at = models.DateTimeField('data utworzenia', auto_now=True)
  15     ip = models.GenericIPAddressField('adres IP')
  16     user = models.ForeignKey(User, models.SET_NULL, blank=True, null=True)
  17
  18     class Meta:
  19         ordering = ('-created_at',)
  20         verbose_name = 'sugestia'
  21         verbose_name_plural = 'sugestie'
  22
  23     def __str__(self):
  24         return str(self.created_at)
  25
  26
  27 class PublishingSuggestion(models.Model):
  28     contact = models.CharField('kontakt', blank=True, max_length=120)
  29     books = models.TextField('książki', null=True, blank=True)
  30     audiobooks = models.TextField('audiobooki', null=True, blank=True)
  31     created_at = models.DateTimeField('data utworzenia', auto_now_add=True)
  32     ip = models.GenericIPAddressField('adres IP')
  33     user = models.ForeignKey(User, models.SET_NULL, blank=True, null=True)
  34
  35     class Meta:
  36         ordering = ('-created_at',)
  37         verbose_name = 'sugestia publikacji'
  38         verbose_name_plural = 'sugestie publikacji'
  39
  40     def is_spam(self):
  41         suggestion_text = (self.books or self.audiobooks).strip(' \r\n,')
  42         # similar = PublishingSuggestion.objects.filter(
  43         #     books__in=('', suggestion_text), audiobooks__in=('', suggestion_text))
  44         similar = PublishingSuggestion.objects.filter(books=self.books, audiobooks=self.audiobooks).exclude(pk=self.pk)
  45         http = 'http' in suggestion_text
  46         spam = False
  47         if re.search(r'([^\W\d_])\1\1\1', suggestion_text):
  48             # same letter repetition outside URL
  49             spam = True
  50         elif re.search(r'[^\W\d_]\d|\d[^\W\d_]', suggestion_text) and not http:
  51             # string of letters and digits outside URL
  52             spam = True
  53         elif re.search(r'[^\W\d_]{17}', suggestion_text):
  54             # long string of letters (usually gibberish)
  55             spam = True
  56         elif ' ' not in suggestion_text:
  57             # single word - usually spam
  58             spam = True
  59         elif len(suggestion_text) < 11:
  60             # too short
  61             spam = True
  62         elif similar.filter(created_at__range=(self.created_at - timedelta(1), self.created_at)):
  63             # the same suggestion within 24h
  64             spam = True
  65         elif similar.filter(ip=self.ip):
  66             # the same suggestion from the same IP
  67             spam = True
  68         return spam
  69
  70     def __str__(self):
  71         return str(self.created_at)