spam filter for publishing suggestions

author Jan Szejko <j-sz@o2.pl>

Mon, 22 Feb 2016 17:04:25 +0000 (18:04 +0100)

committer Jan Szejko <j-sz@o2.pl>

Mon, 22 Feb 2016 17:04:25 +0000 (18:04 +0100)
author Jan Szejko <j-sz@o2.pl>
Mon, 22 Feb 2016 17:04:25 +0000 (18:04 +0100)
committer Jan Szejko <j-sz@o2.pl>
Mon, 22 Feb 2016 17:04:25 +0000 (18:04 +0100)
diff --git a/src/suggest/forms.py b/src/suggest/forms.py

index 03e1232..4e9cf2f 100644 (file)
--- a/src/suggest/forms.py
+++ b/src/suggest/forms.py
@@ -56,20 +56,23 @@ The suggestion has been referred to the project coordinator.""") +
  
  class PublishingSuggestForm(forms.Form):
      contact = forms.CharField(label=_('Contact'), max_length=120, required=False)
-    books = forms.CharField(label=_('books'), widget=forms.Textarea, required=False)
-    audiobooks = forms.CharField(label=_('audiobooks'), widget=forms.Textarea, required=False)
+    books = forms.CharField(label=_('books'), widget=forms.Textarea, required=True)
+    ebook = forms.BooleanField(label=_('ebook'), required=False, initial=True)
+    audiobook = forms.BooleanField(label=_('audiobook'), required=False)
  
      def clean(self):
-        if not self.cleaned_data['books'] and not self.cleaned_data['audiobooks']:
-            msg = ugettext(u"One of these fields is required.")
-            self._errors["books"] = self.error_class([msg])
-            self._errors["audiobooks"] = self.error_class([msg])
+        if not self.cleaned_data['ebook'] and not self.cleaned_data['audiobook']:
+            msg = ugettext(u"One of these options is required.")
+            self._errors['ebook'] = self.error_class([msg])
+            self._errors['audiobook'] = self.error_class([msg])
          return super(PublishingSuggestForm, self).clean()
  
      def save(self, request):
          contact = self.cleaned_data['contact']
-        books = self.cleaned_data['books']
-        audiobooks = self.cleaned_data['audiobooks']
+        suggestion_text = self.cleaned_data['books'].strip(', \n\r')
+
+        books = suggestion_text if self.cleaned_data['ebook'] else ''
+        audiobooks = suggestion_text if self.cleaned_data['audiobook'] else ''
  
          suggestion = PublishingSuggestion(
              contact=contact, books=books,
@@ -78,34 +81,35 @@ class PublishingSuggestForm(forms.Form):
              suggestion.user = request.user
          suggestion.save()
  
-        mail_managers(u'Konsultacja planu wydawniczego na WolneLektury.pl', u'''\
-Zgłoszono nową sugestię nt. planu wydawniczego w serwisie WolneLektury.pl.
-%(url)s
+        if not suggestion.is_spam():
+            mail_managers(u'Konsultacja planu wydawniczego na WolneLektury.pl', u'''\
+    Zgłoszono nową sugestię nt. planu wydawniczego w serwisie WolneLektury.pl.
+    %(url)s
  
-Użytkownik: %(user)s
-Kontakt: %(contact)s
+    Użytkownik: %(user)s
+    Kontakt: %(contact)s
  
-Książki:
-%(books)s
+    Książki:
+    %(books)s
  
-Audiobooki:
-%(audiobooks)s''' % {
-            'url': request.build_absolute_uri(reverse('admin:suggest_suggestion_change', args=[suggestion.id])),
-            'user': str(request.user) if request.user.is_authenticated() else '',
-            'contact': contact,
-            'books': books,
-            'audiobooks': audiobooks,
+    Audiobooki:
+    %(audiobooks)s''' % {
+                'url': request.build_absolute_uri(reverse('admin:suggest_suggestion_change', args=[suggestion.id])),
+                'user': str(request.user) if request.user.is_authenticated() else '',
+                'contact': contact,
+                'books': books,
+                'audiobooks': audiobooks,
              }, fail_silently=True)
  
-        try:
-            validate_email(contact)
-        except ValidationError:
-            pass
-        else:
-            send_mail(
-                u'[WolneLektury] ' + ugettext(u'Thank you for your suggestion.'),
-                ugettext(u"""\
-Thank you for your comment on WolneLektury.pl.
-The suggestion has been referred to the project coordinator.""") +
-                u"\n\n-- \n" + ugettext(u'''Message sent automatically. Please do not reply.'''),
-                'no-reply@wolnelektury.pl', [contact], fail_silently=True)
+            try:
+                validate_email(contact)
+            except ValidationError:
+                pass
+            else:
+                send_mail(
+                    u'[WolneLektury] ' + ugettext(u'Thank you for your suggestion.'),
+                    ugettext(u"""\
+    Thank you for your comment on WolneLektury.pl.
+    The suggestion has been referred to the project coordinator.""") +
+                    u"\n\n-- \n" + ugettext(u'''Message sent automatically. Please do not reply.'''),
+                    'no-reply@wolnelektury.pl', [contact], fail_silently=True)
diff --git a/src/suggest/models.py b/src/suggest/models.py

index 87454a2..b499ee8 100644 (file)
--- a/src/suggest/models.py
+++ b/src/suggest/models.py
@@ -2,6 +2,9 @@
  # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
  # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
  #
+import re
+from datetime import timedelta
+
  from django.db import models
  from django.contrib.auth.models import User
  from django.utils.translation import ugettext_lazy as _
@@ -36,5 +39,35 @@ class PublishingSuggestion(models.Model):
          verbose_name = _('publishing suggestion')
          verbose_name_plural = _('publishing suggestions')
  
+    def is_spam(self):
+        suggestion_text = (self.books or self.audiobooks).strip(' \r\n,')
+        # similar = PublishingSuggestion.objects.filter(
+        #     books__in=('', suggestion_text), audiobooks__in=('', suggestion_text))
+        similar = PublishingSuggestion.objects.filter(books=self.books, audiobooks=self.audiobooks).exclude(pk=self.pk)
+        http = 'http' in suggestion_text
+        spam = False
+        if re.search(r'([^\W\d_])\1\1\1', suggestion_text):
+            # same letter repetition outside URL
+            spam = True
+        elif re.search(r'[^\W\d_]\d|\d[^\W\d_]', suggestion_text) and not http:
+            # string of letters and digits outside URL
+            spam = True
+        elif re.search(r'[^\W\d_]{17}', suggestion_text):
+            # long string of letters (usually gibberish)
+            spam = True
+        elif ' ' not in suggestion_text:
+            # single word - usually spam
+            spam = True
+        elif len(suggestion_text) < 11:
+            # too short
+            spam = True
+        elif similar.filter(created_at__range=(self.created_at - timedelta(1), self.created_at)):
+            # the same suggestion within 24h
+            spam = True
+        elif similar.filter(ip=self.ip):
+            # the same suggestion from the same IP
+            spam = True
+        return spam
+
      def __unicode__(self):
          return unicode(self.created_at)
diff --git a/src/suggest/templates/publishing_suggest.html b/src/suggest/templates/publishing_suggest.html

index 2ac0ec2..33f95a5 100755 (executable)
--- a/src/suggest/templates/publishing_suggest.html
+++ b/src/suggest/templates/publishing_suggest.html
@@ -12,10 +12,12 @@
  
      <li>{% trans "I'd like to find in WolneLektury.pl these…" %}</li>
  
-    <li><span class="error">{{ form.books.errors }}</span><label for="id_books">{{ form.books.label }}:</label> {{ form.books }}</li>
+    <li><label for="id_books">{{ form.books.label }}:</label> {{ form.books }}</li>
  
-    <li><span class="error">{{ form.audiobooks.errors }}</span><label for="id_audiobooks">{{ form.audiobooks.label }}:</label> {{ form.audiobooks }}</li>
+    <li class="checkbox"><span class="error">{{ form.ebook.errors }}</span><label for="id_ebook">{{ form.ebook.label }}:</label> {{ form.ebook }}</li>
+    <li class="checkbox"><span class="error">{{ form.audiobook.errors }}</span><label for="id_audiobook">{{ form.audiobook.label }}:</label> {{ form.audiobook }}</li>
  
      <li><input type="submit" value="{% trans "Send report" %}"/></li>
+    <li>{% trans "Remember that we can only publish books in public domain, ie. 70 years after the death of the author!" %}</li>
  </ol>
  </form>
diff --git a/src/wolnelektury/static/scss/main/dialogs.scss b/src/wolnelektury/static/scss/main/dialogs.scss

index a2d33ab..5825979 100755 (executable)
--- a/src/wolnelektury/static/scss/main/dialogs.scss
+++ b/src/wolnelektury/static/scss/main/dialogs.scss
@@ -94,6 +94,16 @@
      .errorlist {
          color: #BF3024;
      }
+
+    .checkbox {
+        label {
+            display: inline;
+        }
+
+        input {
+            width: auto;
+        }
+    }
  }
author	Jan Szejko <j-sz@o2.pl>
	Mon, 22 Feb 2016 17:04:25 +0000 (18:04 +0100)
committer	Jan Szejko <j-sz@o2.pl>
	Mon, 22 Feb 2016 17:04:25 +0000 (18:04 +0100)
src/suggest/forms.py		patch \| blob \| history
src/suggest/models.py		patch \| blob \| history
src/suggest/templates/publishing_suggest.html		patch \| blob \| history
src/wolnelektury/static/scss/main/dialogs.scss		patch \| blob \| history