← Back to team overview

widelands-dev team mailing list archive

[Merge] lp:~widelands-dev/widelands-website/anti_spam_app into lp:widelands-website

 

kaputtnik has proposed merging lp:~widelands-dev/widelands-website/anti_spam_app into lp:widelands-website.

Requested reviews:
  Widelands Developers (widelands-dev)
Related bugs:
  Bug #1614403 in Widelands Website: "Ideas to prevent spammers, make their work harder"
  https://bugs.launchpad.net/widelands-website/+bug/1614403

For more details, see:
https://code.launchpad.net/~widelands-dev/widelands-website/anti_spam_app/+merge/334232

Decouple check for spam (suspicious user content) from pybb. Main goal is to add the possibility to add such checks to different apps, e.g. wiki or comments.

For pybb we have already the hidden property, so i used it. For other apps it is maybe possible to add a template filter or a middleware to hide suspicious content. This is work for the future...

- Implement a new app 'check_input' containing:
- A new model SuspiciousInput which is responsible for checking users input and collecting data about suspicious content. Beside the username and suspicious text, the model stores the applications model where this suspicious text comes from. E.g. pybb/Post or pybb/Topic with the appropriate ID
- Moved management command for sending hidden_posts_mail into the new app and renamed it
- Added an admin page for the new model
- Removed the http 403 response, instead reworked the informational view of moderating users input which is prompted to the user who wrote suspicious content.


- Make pybb use of the new app, including a check when editing posts


Deleting a user removes also the entries in model SuspiciousInput.
An entry in SuspiciousInput get not deleted when a post get unhidden. So an admin is responsible for removing the entry in this case.

-- 
Your team Widelands Developers is requested to review the proposed merge of lp:~widelands-dev/widelands-website/anti_spam_app into lp:widelands-website.
=== added directory 'check_input'
=== added file 'check_input/__init__.py'
--- check_input/__init__.py	1970-01-01 00:00:00 +0000
+++ check_input/__init__.py	2017-11-24 12:01:31 +0000
@@ -0,0 +1,1 @@
+default_app_config = 'check_input.apps.CheckInput'

=== added file 'check_input/admin.py'
--- check_input/admin.py	1970-01-01 00:00:00 +0000
+++ check_input/admin.py	2017-11-24 12:01:31 +0000
@@ -0,0 +1,18 @@
+from check_input.models import SuspiciousInput
+from django.contrib import admin
+from django.contrib.contenttypes.models import ContentType
+
+
+class SuspiciousInputAdmin(admin.ModelAdmin):
+    list_display = ('text', 'user', 'get_app')
+    readonly_fields = ('text', 'user', 'get_app',)
+    exclude = ('content_type', 'object_id', )
+    
+    def get_app(self, obj):
+        app = ContentType.objects.get_for_id(
+            obj.content_type_id)
+
+        return '%s/%s' % (app.app_label, app.name)
+    get_app.short_description = 'Found in App/Model'
+
+admin.site.register(SuspiciousInput, SuspiciousInputAdmin)

=== added file 'check_input/apps.py'
--- check_input/apps.py	1970-01-01 00:00:00 +0000
+++ check_input/apps.py	2017-11-24 12:01:31 +0000
@@ -0,0 +1,5 @@
+from django.apps import AppConfig
+
+class CheckInput(AppConfig):
+    name = 'check_input'
+    verbose_name = "Check Input"

=== added directory 'check_input/management'
=== added file 'check_input/management/__init__.py'
=== added directory 'check_input/management/commands'
=== added file 'check_input/management/commands/__init__.py'
=== renamed file 'pybb/management/commands/send_hidden_post_mail.py' => 'check_input/management/commands/send_hidden_post_mail.py'
--- pybb/management/commands/send_hidden_post_mail.py	2016-12-13 18:28:51 +0000
+++ check_input/management/commands/send_hidden_post_mail.py	2017-11-24 12:01:31 +0000
@@ -1,24 +1,26 @@
 from django.core.management.base import BaseCommand
-from pybb.models import Post
+from check_input.models import SuspiciousInput
 from django.core.mail import send_mail
 from django.conf import settings
 from django.contrib.sites.models import Site
+from django.contrib.contenttypes.models import ContentType
 
 
 class Command(BaseCommand):
-    help = 'Send emails if hidden posts are found'
+    help = 'Send email of found spam'
 
     def handle(self, *args, **options):
-        hidden_posts = Post.objects.filter(hidden=True)
-
-        if hidden_posts:
-            message = 'There were %d hidden posts found:' % len(hidden_posts)
-            for post in hidden_posts:
-                message += '\n' + post.user.username + \
-                    ': ' + post.body_text[:70]
-
-            message += '\n\nAdmin page: ' + Site.objects.get_current().domain + \
-                '/admin/pybb/post/'
+        spams = SuspiciousInput.objects.all()
+        if spams:
+            message = 'There were %d hidden posts found:' % len(spams)
+    
+            for spam in spams:
+                app = ContentType.objects.get_for_id(
+                    spam.content_type_id)
+                message += '\nIn %s/%s: ' % (app.app_label, app.model)
+                message += '\n User \'%s\' wrote: %s' % (spam.user, spam.text)
+    
+            message += '\n\nAdmin page: https://%s/admin/pybb/post/' % Site.objects.get_current().domain
             recipients = [addr[1] for addr in settings.ADMINS]
-            send_mail('Hidden posts were found', message, 'pybb@xxxxxxxxxxxxx',
+            send_mail('Hidden posts were found', message, 'admins@xxxxxxxxxxxxx',
                       recipients, fail_silently=False)

=== added directory 'check_input/migrations'
=== added file 'check_input/migrations/0001_initial.py'
--- check_input/migrations/0001_initial.py	1970-01-01 00:00:00 +0000
+++ check_input/migrations/0001_initial.py	2017-11-24 12:01:31 +0000
@@ -0,0 +1,30 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+from django.db import models, migrations
+from django.conf import settings
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('contenttypes', '0002_remove_content_type_name'),
+        migrations.swappable_dependency(settings.AUTH_USER_MODEL),
+    ]
+
+    operations = [
+        migrations.CreateModel(
+            name='SuspiciousInput',
+            fields=[
+                ('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)),
+                ('text', models.CharField(max_length=200, verbose_name=b'suspicious user input')),
+                ('object_id', models.PositiveIntegerField()),
+                ('content_type', models.ForeignKey(verbose_name=b'related model', to='contenttypes.ContentType')),
+                ('user', models.ForeignKey(verbose_name=b'related user', to=settings.AUTH_USER_MODEL)),
+            ],
+            options={
+                'ordering': ['content_type_id'],
+                'default_permissions': ('change', 'delete'),
+            },
+        ),
+    ]

=== added file 'check_input/migrations/__init__.py'
=== added file 'check_input/models.py'
--- check_input/models.py	1970-01-01 00:00:00 +0000
+++ check_input/models.py	2017-11-24 12:01:31 +0000
@@ -0,0 +1,45 @@
+from django.db import models
+from django.contrib.auth.models import User
+from django.contrib.contenttypes.models import ContentType
+from django.contrib.contenttypes.fields import GenericForeignKey
+from django.conf import settings
+import re
+
+
+class SuspiciousInput(models.Model):
+    """Model for collecting suspicios user input.
+    
+    Call directly the send function with this attributes:
+    content_object = Model instance of a saved(!) object
+    user = user
+    text = text to check for suspicious content
+    
+    Example:
+    is_suspicous = SuspiciousInput(content_type=post, user=post.user, text=post.body).send()
+    """
+    
+    text = models.CharField(max_length=200, verbose_name="suspicious user input")
+    user = models.ForeignKey(User, verbose_name="related user")
+    content_type = models.ForeignKey(ContentType, verbose_name="related model")
+    object_id = models.PositiveIntegerField()
+    content_object = GenericForeignKey('content_type', 'object_id')
+    
+    class Meta:
+        ordering = ['content_type_id']
+        default_permissions = ('change', 'delete',)
+
+    def __unicode__(self):
+        return self.text
+    
+    def is_suspicious(self):
+        if any(x in self.text.lower() for x in settings.ANTI_SPAM_KWRDS):
+            return True
+        if re.search(settings.ANTI_SPAM_PHONE_NR, self.text):
+            return True
+        return False
+
+    def save(self, *args, **kwargs):
+        is_spam = self.is_suspicious()
+        if is_spam:
+            super(SuspiciousInput, self).save(*args, **kwargs)
+        return is_spam

=== added file 'check_input/urls.py'
--- check_input/urls.py	1970-01-01 00:00:00 +0000
+++ check_input/urls.py	2017-11-24 12:01:31 +0000
@@ -0,0 +1,6 @@
+from django.conf.urls import *
+from check_input import views
+
+urlpatterns = [
+    url(r'^$', views.moderate_info, name='found_spam'),
+    ]

=== added file 'check_input/views.py'
--- check_input/views.py	1970-01-01 00:00:00 +0000
+++ check_input/views.py	2017-11-24 12:01:31 +0000
@@ -0,0 +1,36 @@
+from django.shortcuts import render
+from django.shortcuts import get_object_or_404
+from django.http import HttpResponseRedirect
+from django.conf import settings
+from django.contrib.auth import logout
+from django.contrib.auth.models import User
+from check_input.models import SuspiciousInput
+
+
+def moderate_info(request):
+    """Redirect to the moderate comments info page."""
+    
+    # We need the try to catch logged out users
+    try:
+        hidden_posts_count = SuspiciousInput.objects.filter(
+            user=request.user).count()
+    except TypeError:
+        return HttpResponseRedirect('/')
+
+    # Don't make the page accesible through browsers addressbar
+    if hidden_posts_count == 0:
+        return HttpResponseRedirect('/')
+
+    if hidden_posts_count >= settings.MAX_HIDDEN_POSTS:
+        user = get_object_or_404(User, username=request.user)
+        # Set the user inactive so he can't login
+        user.is_active = False
+        user.save()
+        # Log the user out
+        logout(request)
+    
+    context={
+        'max_count': settings.MAX_HIDDEN_POSTS,
+        'act_count': hidden_posts_count,
+    }
+    return render(request, 'check_input/moderate_info.html', context=context)

=== modified file 'pybb/forms.py'
--- pybb/forms.py	2017-04-23 21:00:01 +0000
+++ pybb/forms.py	2017-11-24 12:01:31 +0000
@@ -10,7 +10,6 @@
 from pybb.models import Topic, Post, PrivateMessage, Attachment
 from pybb import settings as pybb_settings
 from django.conf import settings
-from notification.models import send, get_observers_for
 
 
 class AddPostForm(forms.ModelForm):
@@ -62,42 +61,14 @@
             topic_is_new = False
             topic = self.topic
 
-        # Check for spam and hide the post
-        # TODO(Franku): This is currently a simple keyword search. Maybe add akismet check here
-        # could be improved...
-        # The admins get informed of hidden post(s) over
-        # a Django command. See pybb/management/commands
-        hidden = False
-        text = self.cleaned_data['body']
-        if any(x in text.lower() for x in settings.ANTI_SPAM_BODY):
-            hidden = True
-
-        if re.search(settings.ANTI_SPAM_PHONE_NR, text):
-            hidden = True
-
-        if topic_is_new:
-            text = self.cleaned_data['name']
-            if any(x in text.lower() for x in settings.ANTI_SPAM_TOPIC):
-                hidden = True
-            if re.search(settings.ANTI_SPAM_PHONE_NR, text):
-                hidden = True
-
         post = Post(topic=topic, user=self.user, user_ip=self.ip,
                     markup=self.cleaned_data['markup'],
-                    body=self.cleaned_data['body'], hidden=hidden)
+                    body=self.cleaned_data['body'])
         post.save(*args, **kwargs)
 
         if pybb_settings.ATTACHMENT_ENABLE:
             self.save_attachment(post, self.cleaned_data['attachment'])
 
-        if not hidden:
-            if topic_is_new:
-                send(get_observers_for('forum_new_topic'), 'forum_new_topic',
-                     {'topic': topic, 'post': post, 'user': topic.user}, queue = True)
-            else:
-                send(self.topic.subscribers.all(), 'forum_new_post',
-                     {'post': post, 'topic': topic, 'user': post.user}, queue = True)
-
         return post
 
     def save_attachment(self, post, memfile):

=== modified file 'pybb/views.py'
--- pybb/views.py	2017-10-27 17:30:08 +0000
+++ pybb/views.py	2017-11-24 12:01:31 +0000
@@ -11,7 +11,7 @@
 from django.db import connection
 from django.utils import translation
 from django.shortcuts import render
-from django.contrib.auth import logout
+
 
 from pybb.util import render_to, paged, build_form, quote_text, ajax, urlize
 from pybb.models import Category, Forum, Topic, Post, PrivateMessage, Attachment,\
@@ -21,6 +21,7 @@
 from pybb.orm import load_related
 
 from wl_utils import get_real_ip
+from check_input.models import SuspiciousInput
 
 try:
     from notification import models as notification
@@ -158,22 +159,34 @@
 
     if form.is_valid():
         post = form.save()
+
+        is_spam = False
+        # Check for spam in topics name for new topics
         if not topic:
-            post.topic.subscribers.add(request.user)
-
-        if post.hidden:
-            hidden_posts_count = Post.objects.filter(
-                user=request.user, hidden=True).count()
-
-            if hidden_posts_count >= settings.MAX_HIDDEN_POSTS:
-                user = get_object_or_404(User, username=request.user)
-                # Set the user inactive so he can't login
-                user.is_active = False
-                user.save()
-                # Log the user out
-                logout(request)
-                return HttpResponse(status=403)
-            return HttpResponseRedirect('pybb_moderate_info')
+            is_spam = SuspiciousInput(
+                content_object=post.topic, user=post.topic.user, text=post.topic.name).save()
+        # Check for spam in Post
+        if not is_spam:
+            is_spam = SuspiciousInput(
+                content_object=post, user=post.user, text=post.body).save()
+
+        if is_spam:
+            post.hidden = is_spam
+            post.save(update_fields=['hidden'])
+            return HttpResponseRedirect('/moderated/')
+
+        if notification:
+            if not topic:
+                # Inform subscribers of a new topic
+                notification.send(notification.get_observers_for('forum_new_topic'), 'forum_new_topic',
+                                  {'topic': post.topic, 'post': post, 'user': post.topic.user}, queue = True)
+                # Topics author is subscriber for all new posts in his topic
+                post.topic.subscribers.add(request.user)
+
+            else:
+                # Send mails about a new post to topic subscribers
+                notification.send(post.topic.subscribers.all(), 'forum_new_post',
+                                  {'post': post, 'topic': topic, 'user': post.user}, queue = True)
 
         return HttpResponseRedirect(post.get_absolute_url())
 
@@ -220,6 +233,11 @@
 
     if form.is_valid():
         post = form.save()
+        is_spam = SuspiciousInput(content_object=post, user=post.user, text=post.body).save()
+        if is_spam:
+            post.hidden = is_spam
+            post.save()
+            return HttpResponseRedirect('/moderated/')
         return HttpResponseRedirect(post.get_absolute_url())
 
     return {'form': form,

=== modified file 'settings.py'
--- settings.py	2017-09-29 21:24:37 +0000
+++ settings.py	2017-11-24 12:01:31 +0000
@@ -94,6 +94,7 @@
     'wlmaps',
     'wlscreens',
     'wlggz',
+    'check_input',
     'haystack', # search engine; see option HAYSTACK_CONNECTIONS
 
     # Modified 3rd party apps

=== added directory 'templates/check_input'
=== renamed file 'templates/pybb/pybb_moderate_info.html' => 'templates/check_input/moderate_info.html'
--- templates/pybb/pybb_moderate_info.html	2016-10-08 09:30:34 +0000
+++ templates/check_input/moderate_info.html	2017-11-24 12:01:31 +0000
@@ -1,12 +1,17 @@
-{% extends 'pybb/base.html' %}
+{% extends 'base.html' %}
 
 {% block content %}
 
 <h1>All comments have to be moderated</h1>
 
 <div class="blogEntry">
-	<p>Your comment has been saved but hidden to normal users. A moderator
-	will take a look at it and review it as soon as possible.</p>
+	<p>Your comment has to be reviewd by a moderator.</p>
+		{% if act_count == max_count|add:"-1" %}
+			<p><strong>Attention! The next time you will get logged out and deactivated!</strong></p>
+		{% endif %}
+		{% if act_count == max_count %}
+			<p><strong>You can't login anymore...</strong></p>
+		{% endif %}
 </div>
 	
 {% endblock %}

=== modified file 'urls.py'
--- urls.py	2017-08-24 16:19:01 +0000
+++ urls.py	2017-11-24 12:01:31 +0000
@@ -65,6 +65,7 @@
     url(r'^maps/', include('wlmaps.urls')),
     url(r'^screenshots/', include('wlscreens.urls')),
     url(r'^ggz/', include('wlggz.urls')),
+    url(r'^moderated/', include('check_input.urls')),
 ]
 
 try:


Follow ups