[odrs-web/oscp] Allow moderators to add per-locale banned words
- From: Richard Hughes <rhughes src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [odrs-web/oscp] Allow moderators to add per-locale banned words
- Date: Wed, 3 Jul 2019 19:35:21 +0000 (UTC)
commit 0d351d694cfa52fafe83c9f86aeb84d47634be47
Author: Richard Hughes <richard hughsie com>
Date: Wed Jul 3 18:29:57 2019 +0100
Allow moderators to add per-locale banned words
This allows us to auto-report reviews at submission time.
app_data/cron.py | 56 ++++++++++++++++++-
app_data/migrations/versions/e37c745e3097_.py | 31 +++++++++++
app_data/odrs/models.py | 50 +++++++++++++++++
app_data/odrs/templates/default.html | 1 +
app_data/odrs/templates/show.html | 10 ++++
app_data/odrs/templates/taboos.html | 49 +++++++++++++++++
app_data/odrs/tests/odrs_test.py | 32 +++++++++++
app_data/odrs/util.py | 15 +++++-
app_data/odrs/views_admin.py | 77 +++++++++++++++++++++++++--
app_data/odrs/views_api.py | 5 ++
10 files changed, 320 insertions(+), 6 deletions(-)
---
diff --git a/app_data/cron.py b/app_data/cron.py
index 31fd7fc..2f87b7b 100755
--- a/app_data/cron.py
+++ b/app_data/cron.py
@@ -10,11 +10,12 @@
import json
import sys
import datetime
+import csv
from odrs import db
-from odrs.models import Review
-from odrs.util import _get_rating_for_app_id
+from odrs.models import Review, Taboo
+from odrs.util import _get_rating_for_app_id, _get_taboos_for_locale
def _auto_delete(days=31):
@@ -48,6 +49,50 @@ def _regenerate_ratings(fn):
with open(fn, 'w') as outfd:
outfd.write(json.dumps(item, sort_keys=True, indent=4, separators=(',', ': ')))
+def _taboo_check():
+
+ # this is moderately expensive, so cache for each locale
+ taboos = {}
+ for review in db.session.query(Review).\
+ filter(Review.reported < 5).all():
+ if review.locale not in taboos:
+ taboos[review.locale] = _get_taboos_for_locale(review.locale)
+ matched_taboos = review.matches_taboos(taboos[review.locale])
+ if matched_taboos:
+ for taboo in matched_taboos:
+ print(review.review_id, review.locale, taboo.value)
+ review.reported = 5
+ db.session.commit()
+
+def _taboo_import(fn):
+
+ # get all the taboos in one database call
+ taboos = {}
+ for taboo in db.session.query(Taboo).all():
+ key = taboo.locale + ':' + taboo.value
+ taboos[key] = taboo
+
+ # add any new ones
+ with open(fn, newline='') as csvfile:
+ for locale, value, description in csv.reader(csvfile):
+ locale = locale.strip()
+ value = value.strip()
+ description = description.strip()
+ key = locale + ':' + value
+ if key in taboos:
+ continue
+ if value.find(' ') != -1:
+ print('Ignoring', locale, value)
+ continue
+ if value.lower() != value:
+ print('Ignoring', locale, value)
+ continue
+ taboo = Taboo(locale, value, description)
+ taboos[key] = taboo
+ print('Adding', locale, value)
+ db.session.add(taboo)
+ db.session.commit()
+
if __name__ == '__main__':
if len(sys.argv) < 2:
@@ -62,6 +107,13 @@ if __name__ == '__main__':
_regenerate_ratings(sys.argv[2])
elif sys.argv[1] == 'auto-delete':
_auto_delete()
+ elif sys.argv[1] == 'taboo-check':
+ _taboo_check()
+ elif sys.argv[1] == 'taboo-import':
+ if len(sys.argv) < 3:
+ print('Usage: %s taboo-import filename' % sys.argv[0])
+ sys.exit(1)
+ _taboo_import(sys.argv[2])
else:
print("cron mode %s not known" % sys.argv[1])
sys.exit(1)
diff --git a/app_data/migrations/versions/e37c745e3097_.py b/app_data/migrations/versions/e37c745e3097_.py
new file mode 100644
index 0000000..a8bce80
--- /dev/null
+++ b/app_data/migrations/versions/e37c745e3097_.py
@@ -0,0 +1,31 @@
+"""
+
+Revision ID: e37c745e3097
+Revises: 64751cf97429
+Create Date: 2019-07-03 19:54:01.718718
+
+"""
+
+# revision identifiers, used by Alembic.
+revision = 'e37c745e3097'
+down_revision = '64751cf97429'
+
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import mysql
+
+def upgrade():
+ op.create_table('taboos',
+ sa.Column('taboo_id', sa.Integer(), nullable=False),
+ sa.Column('locale', sa.String(length=8), nullable=False),
+ sa.Column('value', sa.Text(), nullable=False),
+ sa.Column('description', sa.Text(), nullable=True),
+ sa.PrimaryKeyConstraint('taboo_id'),
+ sa.UniqueConstraint('taboo_id'),
+ mysql_character_set='utf8mb4'
+ )
+ op.create_index(op.f('ix_taboos_locale'), 'taboos', ['locale'], unique=False)
+
+def downgrade():
+ op.drop_index(op.f('ix_taboos_locale'), table_name='taboos')
+ op.drop_table('taboos')
diff --git a/app_data/odrs/models.py b/app_data/odrs/models.py
index 4684dda..7bb4d40 100644
--- a/app_data/odrs/models.py
+++ b/app_data/odrs/models.py
@@ -8,6 +8,7 @@
# SPDX-License-Identifier: GPL-3.0+
import datetime
+import re
from werkzeug.security import generate_password_hash, check_password_hash
@@ -43,6 +44,25 @@ class Analytic(db.Model):
def __repr__(self):
return 'Analytic object %s' % self.analytic_id
+class Taboo(db.Model):
+
+ # sqlalchemy metadata
+ __tablename__ = 'taboos'
+ __table_args__ = {'mysql_character_set': 'utf8mb4'}
+
+ taboo_id = Column(Integer, primary_key=True, nullable=False, unique=True)
+ locale = Column(String(8), nullable=False, index=True)
+ value = Column(Text, nullable=False)
+ description = Column(Text, nullable=True)
+
+ def __init__(self, locale, value, description=True):
+ self.locale = locale
+ self.value = value
+ self.description = description
+
+ def __repr__(self):
+ return 'Taboo object %s' % self.taboo_id
+
class Vote(db.Model):
# sqlalchemy metadata
@@ -93,6 +113,9 @@ class User(db.Model):
def __repr__(self):
return 'User object %s' % self.user_id
+def _tokenize(val):
+ return [token.lower() for token in re.findall(r"[\w']+", val)]
+
class Review(db.Model):
# sqlalchemy metadata
@@ -135,6 +158,33 @@ class Review(db.Model):
self.rating = 0
self.reported = 0
+ def _generate_keywords(self):
+
+ # tokenize anything the user can specify
+ tokens = []
+ if self.summary:
+ tokens.extend(_tokenize(self.summary))
+ if self.description:
+ tokens.extend(_tokenize(self.description))
+ if self.user_display:
+ tokens.extend(_tokenize(self.user_display))
+
+ # dedupe, and remove anything invalid
+ tokens = set(tokens)
+ if None in tokens:
+ tokens.remove(None)
+ return tokens
+
+ def matches_taboos(self, taboos):
+
+ # does the review contain any banned keywords
+ kws = self._generate_keywords()
+ matches = []
+ for taboo in taboos:
+ if taboo.value in kws:
+ matches.append(taboo)
+ return matches
+
@property
def user_addr(self):
raise AttributeError('user_addr is not a readable attribute')
diff --git a/app_data/odrs/templates/default.html b/app_data/odrs/templates/default.html
index 8a7a4a7..087a9b6 100644
--- a/app_data/odrs/templates/default.html
+++ b/app_data/odrs/templates/default.html
@@ -39,6 +39,7 @@
<li><a href="{{url_for('.admin_show_stats')}}">Statistics</a></li>
<li><a href="{{url_for('.admin_users_all')}}">Users</a></li>
<li><a href="{{url_for('.admin_moderator_show_all')}}">Moderators</a></li>
+ <li><a href="{{url_for('.admin_taboo_show_all')}}">Taboos</a></li>
<li><a href="{{url_for('.admin_distros')}}">Distributions</a></li>
<li><a href="{{url_for('.admin_graph_month')}}">Usage</a></li>
<li><a href="{{url_for('.admin_search')}}">Search</a></li>
diff --git a/app_data/odrs/templates/show.html b/app_data/odrs/templates/show.html
index 5ef094f..a1fd9c3 100644
--- a/app_data/odrs/templates/show.html
+++ b/app_data/odrs/templates/show.html
@@ -11,6 +11,16 @@
{{r.app_id}}
<a class="btn pull-right" href="{{url_for('.admin_show_app', app_id=r.app_id)}}">All</a>
</h1>
+{% if matched_taboos %}
+ <div class="alert alert-warning" role="alert">
+ <strong>Warning: Contains taboo:</strong>
+ <ul>
+{% for taboo in matched_taboos %}
+ <li>{{taboo.value}}: {{taboo.description}}</li>
+{% endfor %}
+ </ul>
+ </div>
+{% endif %}
<table class="table card-text table-borderless table-condensed">
<tr class="row">
<th class="col col-md-2">Rating</th>
diff --git a/app_data/odrs/templates/taboos.html b/app_data/odrs/templates/taboos.html
new file mode 100644
index 0000000..6097b65
--- /dev/null
+++ b/app_data/odrs/templates/taboos.html
@@ -0,0 +1,49 @@
+{% extends "default.html" %}
+{% block title %}Taboos{% endblock %}
+
+{% block content %}
+
+<h2>Taboos</h2>
+
+{% if taboos|length == 0 %}
+<p>
+ There are no taboos stored.
+</p>
+{% else %}
+<form method="post" action="{{url_for('admin_taboo_add')}}" class="form">
+<table class="table table-hover table-responsive">
+ <tr class="row">
+ <th class="col-sm-1">Locale</th>
+ <th class="col-sm-2">Value</th>
+ <th class="col-sm-7">Description</th>
+ <th class="col-sm-2"> </th>
+ </tr>
+{% for taboo in taboos %}
+ <tr class="row">
+ <td>{{taboo.locale}}</td>
+ <td>{{taboo.value}}</td>
+ <td>{{taboo.description}}</td>
+ <td>
+ <a class="btn btn-danger btn-block" href="{{url_for('.admin_taboo_delete',
taboo_id=taboo.taboo_id)}}">Delete</a>
+ </td>
+ </tr>
+{% endfor %}
+ <tr class="row">
+ <td>
+ <input type="text" class="form-control" name="locale" required/>
+ </td>
+ <td>
+ <input type="text" class="form-control" name="value" required/>
+ </td>
+ <td>
+ <input type="text" class="form-control" name="description" required/>
+ </td>
+ <td>
+ <button class="btn btn-action btn-block" type="submit">Add</button>
+ </td>
+ </tr>
+</table>
+</form>
+{% endif %}
+
+{% endblock %}
diff --git a/app_data/odrs/tests/odrs_test.py b/app_data/odrs/tests/odrs_test.py
index c757c06..ccdb6b6 100644
--- a/app_data/odrs/tests/odrs_test.py
+++ b/app_data/odrs/tests/odrs_test.py
@@ -215,6 +215,38 @@ class OdrsTest(unittest.TestCase):
rv = self.app.get('/admin/search?value=inkscape+notgoingtoexist')
assert b'Somebody Import' in rv.data, rv.data
+ def _admin_taboo_add(self, locale='en', value='inkscape', description='ola!'):
+ data = {'locale': locale, 'value': value, 'description': description}
+ return self.app.post('/admin/taboo/add', data=data, follow_redirects=True)
+
+ def test_admin_taboo(self):
+
+ self.login()
+
+ rv = self.app.get('/admin/taboo/all')
+ assert b'There are no taboos stored' in rv.data, rv.data
+
+ # add taboos
+ rv = self._admin_taboo_add()
+ assert b'Added taboo' in rv.data, rv.data
+ assert b'inkscape' in rv.data, rv.data
+ rv = self._admin_taboo_add()
+ assert b'Already added that taboo' in rv.data, rv.data
+ rv = self._admin_taboo_add(locale='fr_FR')
+ assert b'Added taboo' in rv.data, rv.data
+
+ # submit something, and ensure it's flagged
+ self.review_submit()
+ rv = self.app.get('/admin/review/1')
+ assert b'Somebody Important' in rv.data, rv.data
+ assert b'Contains taboo' in rv.data, rv.data
+
+ # delete
+ rv = self.app.get('/admin/taboo/1/delete', follow_redirects=True)
+ assert b'Deleted taboo' in rv.data, rv.data
+ rv = self.app.get('/admin/taboo/1/delete', follow_redirects=True)
+ assert b'No taboo with ID' in rv.data, rv.data
+
def test_api_submit_when_banned(self):
# submit abusive review
diff --git a/app_data/odrs/util.py b/app_data/odrs/util.py
index ae5cfb8..fb6dd23 100644
--- a/app_data/odrs/util.py
+++ b/app_data/odrs/util.py
@@ -8,7 +8,7 @@
import json
import hashlib
-from sqlalchemy import text
+from sqlalchemy import text, or_
from flask import Response
@@ -94,6 +94,19 @@ def _addr_hash(value):
from odrs import app
return hashlib.sha1((app.secret_key + value).encode('utf-8')).hexdigest()
+def _get_taboos_for_locale(locale):
+ from .models import Taboo
+ from odrs import db
+ if locale.find('_') != -1:
+ lang, _ = locale.split('_', maxsplit=1)
+ return db.session.query(Taboo).\
+ filter(or_(Taboo.locale == locale,
+ Taboo.locale == lang,
+ Taboo.locale == 'en')).all()
+ return db.session.query(Taboo).\
+ filter(or_(Taboo.locale == locale,
+ Taboo.locale == 'en')).all()
+
def _sanitised_input(val):
# remove trailing whitespace
diff --git a/app_data/odrs/views_admin.py b/app_data/odrs/views_admin.py
index cf605f2..56b98d3 100644
--- a/app_data/odrs/views_admin.py
+++ b/app_data/odrs/views_admin.py
@@ -17,9 +17,9 @@ from flask import abort, request, flash, render_template, redirect, url_for
from flask_login import login_required, current_user
from odrs import app, db
-from .models import Review, User, Moderator, Vote
+from .models import Review, User, Moderator, Vote, Taboo
from .models import _vote_exists
-from .util import _get_datestr_from_dt
+from .util import _get_datestr_from_dt, _get_taboos_for_locale
def _get_chart_labels_months():
""" Gets the chart labels """
@@ -276,7 +276,11 @@ def admin_show_review(review_id):
else:
vote = None
- return render_template('show.html', r=review, vote_exists=vote)
+ # does the review contain any banned keywords
+ matched_taboos = review.matches_taboos(_get_taboos_for_locale(review.locale))
+ return render_template('show.html', r=review,
+ vote_exists=vote,
+ matched_taboos=matched_taboos)
@app.route('/admin/modify/<review_id>', methods=['POST'])
@login_required
@@ -622,6 +626,73 @@ def admin_moderate_delete(moderator_id):
flash('Deleted user')
return redirect(url_for('.admin_moderator_show_all'))
+@app.route('/admin/taboo/all')
+@login_required
+def admin_taboo_show_all():
+ """
+ Return all the taboos.
+ """
+ # security check
+ if not current_user.is_admin:
+ flash('Unable to show all taboos', 'error')
+ return redirect(url_for('.odrs_index'))
+ taboos = db.session.query(Taboo).\
+ order_by(Taboo.locale.asc()).\
+ order_by(Taboo.value.asc()).all()
+ return render_template('taboos.html', taboos=taboos)
+
+@app.route('/admin/taboo/add', methods=['GET', 'POST'])
+@login_required
+def admin_taboo_add():
+ """ Add a taboo [ADMIN ONLY] """
+
+ # only accept form data
+ if request.method != 'POST':
+ return redirect(url_for('.admin_taboo_show_all'))
+
+ # security check
+ if not current_user.is_admin:
+ flash('Unable to add taboo as non-admin', 'error')
+ return redirect(url_for('.odrs_index'))
+
+ for key in ['locale', 'value', 'description']:
+ if not key in request.form:
+ flash('Unable to add taboo as {} missing'.format(key), 'error')
+ return redirect(url_for('.odrs_index'))
+ if db.session.query(Taboo).\
+ filter(Taboo.locale == request.form['locale']).\
+ filter(Taboo.value == request.form['value']).first():
+ flash('Already added that taboo', 'warning')
+ return redirect(url_for('.admin_taboo_show_all'))
+
+ # verify username
+ db.session.add(Taboo(request.form['locale'],
+ request.form['value'],
+ request.form['description']))
+ db.session.commit()
+ flash('Added taboo')
+ return redirect(url_for('.admin_taboo_show_all'))
+
+@app.route('/admin/taboo/<taboo_id>/delete')
+@login_required
+def admin_taboo_delete(taboo_id):
+ """ Delete an taboo """
+
+ # security check
+ if not current_user.is_admin:
+ flash('Unable to delete taboo as not admin', 'error')
+ return redirect(url_for('.odrs_index'))
+
+ # check whether exists in database
+ taboo = db.session.query(Taboo).filter(Taboo.taboo_id == taboo_id).first()
+ if not taboo:
+ flash("No taboo with ID {}".format(taboo_id), 'warning')
+ return redirect(url_for('.admin_taboo_show_all'))
+ db.session.delete(taboo)
+ db.session.commit()
+ flash('Deleted taboo')
+ return redirect(url_for('.admin_taboo_show_all'))
+
@app.route('/admin/vote/<review_id>/<val_str>')
@login_required
def admin_vote(review_id, val_str):
diff --git a/app_data/odrs/views_api.py b/app_data/odrs/views_api.py
index f8dc443..cfc4796 100644
--- a/app_data/odrs/views_api.py
+++ b/app_data/odrs/views_api.py
@@ -22,6 +22,7 @@ from .models import Review, User, Vote, Analytic
from .models import _vote_exists
from .util import json_success, json_error, _locale_is_compatible, _eventlog_add, _get_user_key,
_get_datestr_from_dt
from .util import _sanitised_version, _sanitised_summary, _sanitised_description, _get_rating_for_app_id
+from .util import _get_taboos_for_locale
ODRS_REPORTED_CNT = 2
@@ -130,6 +131,10 @@ def api_submit():
if item['user_display'] not in user_display_ignore:
review.user_display = item['user_display']
+ # contains taboos
+ if review.matches_taboos(_get_taboos_for_locale(review.locale)):
+ review.reported = 5
+
# log and add
_eventlog_add(_get_client_address(),
review.user_id,
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]