large boost for multiple results
[wolnelektury.git] / src / suggest / models.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 import re
6 from datetime import timedelta
7
8 from django.db import models
9 from django.contrib.auth.models import User
10 from django.utils.translation import ugettext_lazy as _
11
12
13 class Suggestion(models.Model):
14     contact = models.CharField(_('contact'), blank=True, max_length=120)
15     description = models.TextField(_('description'), blank=True)
16     created_at = models.DateTimeField(_('creation date'), auto_now=True)
17     ip = models.GenericIPAddressField(_('IP address'))
18     user = models.ForeignKey(User, blank=True, null=True)
19
20     class Meta:
21         ordering = ('-created_at',)
22         verbose_name = _('suggestion')
23         verbose_name_plural = _('suggestions')
24
25     def __unicode__(self):
26         return unicode(self.created_at)
27
28
29 class PublishingSuggestion(models.Model):
30     contact = models.CharField(_('contact'), blank=True, max_length=120)
31     books = models.TextField(_('books'), null=True, blank=True)
32     audiobooks = models.TextField(_('audiobooks'), null=True, blank=True)
33     created_at = models.DateTimeField(_('creation date'), auto_now_add=True)
34     ip = models.GenericIPAddressField(_('IP address'))
35     user = models.ForeignKey(User, blank=True, null=True)
36
37     class Meta:
38         ordering = ('-created_at',)
39         verbose_name = _('publishing suggestion')
40         verbose_name_plural = _('publishing suggestions')
41
42     def is_spam(self):
43         suggestion_text = (self.books or self.audiobooks).strip(' \r\n,')
44         # similar = PublishingSuggestion.objects.filter(
45         #     books__in=('', suggestion_text), audiobooks__in=('', suggestion_text))
46         similar = PublishingSuggestion.objects.filter(books=self.books, audiobooks=self.audiobooks).exclude(pk=self.pk)
47         http = 'http' in suggestion_text
48         spam = False
49         if re.search(r'([^\W\d_])\1\1\1', suggestion_text):
50             # same letter repetition outside URL
51             spam = True
52         elif re.search(r'[^\W\d_]\d|\d[^\W\d_]', suggestion_text) and not http:
53             # string of letters and digits outside URL
54             spam = True
55         elif re.search(r'[^\W\d_]{17}', suggestion_text):
56             # long string of letters (usually gibberish)
57             spam = True
58         elif ' ' not in suggestion_text:
59             # single word - usually spam
60             spam = True
61         elif len(suggestion_text) < 11:
62             # too short
63             spam = True
64         elif similar.filter(created_at__range=(self.created_at - timedelta(1), self.created_at)):
65             # the same suggestion within 24h
66             spam = True
67         elif similar.filter(ip=self.ip):
68             # the same suggestion from the same IP
69             spam = True
70         return spam
71
72     def __unicode__(self):
73         return unicode(self.created_at)