X-Git-Url: https://git.mdrn.pl/wolnelektury.git/blobdiff_plain/b2d342589a7889a3b096e7192453d53bd28eed7d..5b312d5dfc897d291d4d4ed159fc25d7f5493d41:/src/suggest/models.py?ds=sidebyside diff --git a/src/suggest/models.py b/src/suggest/models.py index 87454a211..b499ee8dc 100644 --- a/src/suggest/models.py +++ b/src/suggest/models.py @@ -2,6 +2,9 @@ # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later. # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. # +import re +from datetime import timedelta + from django.db import models from django.contrib.auth.models import User from django.utils.translation import ugettext_lazy as _ @@ -36,5 +39,35 @@ class PublishingSuggestion(models.Model): verbose_name = _('publishing suggestion') verbose_name_plural = _('publishing suggestions') + def is_spam(self): + suggestion_text = (self.books or self.audiobooks).strip(' \r\n,') + # similar = PublishingSuggestion.objects.filter( + # books__in=('', suggestion_text), audiobooks__in=('', suggestion_text)) + similar = PublishingSuggestion.objects.filter(books=self.books, audiobooks=self.audiobooks).exclude(pk=self.pk) + http = 'http' in suggestion_text + spam = False + if re.search(r'([^\W\d_])\1\1\1', suggestion_text): + # same letter repetition outside URL + spam = True + elif re.search(r'[^\W\d_]\d|\d[^\W\d_]', suggestion_text) and not http: + # string of letters and digits outside URL + spam = True + elif re.search(r'[^\W\d_]{17}', suggestion_text): + # long string of letters (usually gibberish) + spam = True + elif ' ' not in suggestion_text: + # single word - usually spam + spam = True + elif len(suggestion_text) < 11: + # too short + spam = True + elif similar.filter(created_at__range=(self.created_at - timedelta(1), self.created_at)): + # the same suggestion within 24h + spam = True + elif similar.filter(ip=self.ip): + # the same suggestion from the same IP + spam = True + return spam + def __unicode__(self): return unicode(self.created_at)