Skip to content

Commit

Permalink
Support meta-data for suspicion reasons
Browse files Browse the repository at this point in the history
* Represent suspicion reasons with a new Reason class that, in addition
to the string identifier, supports a label and source/origin of the
reason

* Continue to store string identifiers in the
Changeset.suspicion_reasons field for backward compatibility

* Introduce a new Changeset.detailed_reasons field that has the full
Reason objects

* Use the Reason.source to track when suspicion reasons originated as
warnings generated by an editor, such as iD

* When printing suspicion reasons in the CLI, include the source of the
reason if it was not from osmcha

* Update unit tests
  • Loading branch information
nrotstan committed Dec 9, 2019
1 parent cd39be8 commit 688166d
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 20 deletions.
39 changes: 24 additions & 15 deletions osmcha/changeset.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,14 @@ class InvalidChangesetError(Exception):
pass


class Reason:
"""Details and meta-data for a suspicion reason."""
def __init__(self, identifier, label=None, source="osmcha"):
self.identifier = identifier
self.label = label if label else identifier
self.source = source


def get_user_details(user_id):
"""Get information about number of changesets, blocks and mapping days of a
user, using both the OSM API and the Mapbox comments APIself.
Expand All @@ -56,7 +64,7 @@ def get_user_details(user_id):
changesets = [i for i in xml_data if i.tag == 'changesets'][0]
blocks = [i for i in xml_data if i.tag == 'blocks'][0]
if int(changesets.get('count')) <= 5:
reasons.append('New mapper')
reasons.append(Reason('New mapper'))
elif int(changesets.get('count')) <= 30:
url = MAPBOX_USERS_API.format(
user_id=requests.compat.quote(user_id)
Expand All @@ -67,9 +75,9 @@ def get_user_details(user_id):
user_request.json().get('extra').get('mapping_days')
)
if mapping_days <= 5:
reasons.append('New mapper')
reasons.append(Reason('New mapper'))
if int(blocks[0].get('count')) > 1:
reasons.append('User has multiple blocks')
reasons.append(Reason('User has multiple blocks'))
except Exception as e:
message = 'Could not verify user of the changeset: {}, {}'
print(message.format(user_id, str(e)))
Expand Down Expand Up @@ -239,7 +247,6 @@ def filter(self):
if get_bounds(ch).intersects(self.area)
]


class Analyse(object):
"""Analyse a changeset and define if it is suspect."""
def __init__(self, changeset, create_threshold=200, modify_threshold=200,
Expand Down Expand Up @@ -299,6 +306,7 @@ def set_fields(self, changeset):
'%Y-%m-%dT%H:%M:%SZ'
)
self.suspicion_reasons = []
self.detailed_reasons = {}
self.is_suspect = False
self.powerfull_editor = False
self.warning_tags = [
Expand All @@ -307,7 +315,8 @@ def set_fields(self, changeset):

def label_suspicious(self, reason):
"""Add suspicion reason and set the suspicious flag."""
self.suspicion_reasons.append(reason)
self.suspicion_reasons.append(reason.identifier)
self.detailed_reasons[reason.identifier] = reason
self.is_suspect = True

def full_analysis(self):
Expand All @@ -318,12 +327,12 @@ def full_analysis(self):
self.verify_warning_tags()

if self.review_requested == 'yes':
self.label_suspicious('Review requested')
self.label_suspicious(Reason('Review requested'))

def verify_warning_tags(self):
for tag in self.warning_tags:
if tag in self.enabled_warnings.keys():
self.label_suspicious(self.enabled_warnings.get(tag))
self.label_suspicious(Reason(self.enabled_warnings.get(tag), None, self.editor))

def verify_user(self):
"""Verify if the changeset was made by a inexperienced mapper (anyone
Expand All @@ -338,21 +347,21 @@ def verify_words(self):
"""
if self.comment:
if find_words(self.comment, self.suspect_words, self.excluded_words):
self.label_suspicious('suspect_word')
self.label_suspicious(Reason('suspect_word'))

if self.source:
for word in self.illegal_sources:
if word in self.source.lower():
if word == 'yandex' and 'yandex panorama' in self.source.lower():
pass
else:
self.label_suspicious('suspect_word')
self.label_suspicious(Reason('suspect_word'))
break

if self.imagery_used:
for word in self.illegal_sources:
if word in self.imagery_used.lower():
self.label_suspicious('suspect_word')
self.label_suspicious(Reason('suspect_word'))
break

self.suspicion_reasons = list(set(self.suspicion_reasons))
Expand Down Expand Up @@ -383,10 +392,10 @@ def verify_editor(self):
'mapwith.ai'
]
if self.host.split('://')[-1].split('/')[0] not in trusted_hosts:
self.label_suspicious('Unknown iD instance')
self.label_suspicious(Reason('Unknown iD instance'))
else:
self.powerfull_editor = True
self.label_suspicious('Software editor was not declared')
self.label_suspicious(Reason('Software editor was not declared'))

def count(self):
"""Count the number of elements created, modified and deleted by the
Expand All @@ -404,14 +413,14 @@ def count(self):
if (self.create / len(actions) > self.percentage and
self.create > self.create_threshold and
(self.powerfull_editor or self.create > self.top_threshold)):
self.label_suspicious('possible import')
self.label_suspicious(Reason('possible import'))
elif (self.modify / len(actions) > self.percentage and
self.modify > self.modify_threshold):
self.label_suspicious('mass modification')
self.label_suspicious(Reason('mass modification'))
elif ((self.delete / len(actions) > self.percentage and
self.delete > self.delete_threshold) or
self.delete > self.top_threshold):
self.label_suspicious('mass deletion')
self.label_suspicious(Reason('mass deletion'))
except ZeroDivisionError:
print('It seems this changeset was redacted')

Expand Down
12 changes: 11 additions & 1 deletion osmcha/scripts/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,19 @@ def cli(id):
'Created: %s. Modified: %s. Deleted: %s' % (ch.create, ch.modify, ch.delete)
)
if ch.is_suspect:
reasonDescriptions = [formatted_reason(reason) for reason in ch.detailed_reasons.values()]
click.echo('The changeset {} is suspect! Reasons: {}'.format(
id,
', '.join(ch.suspicion_reasons)
', '.join(reasonDescriptions)
))
else:
click.echo('The changeset %s is not suspect!' % id)

def formatted_reason(reason):
"""
Generate a formatted description of the reason using its label and appending
the source of the reason if it was not from osmcha (e.g. if it originated
from an editor warning)
"""
sourceLabel = f' ({reason.source})' if reason.source != 'osmcha' else ''
return f'{reason.label}{sourceLabel}'
13 changes: 9 additions & 4 deletions tests/test_mod.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from shapely.geometry import Polygon

from osmcha.changeset import ChangesetList
from osmcha.changeset import Reason
from osmcha.changeset import Analyse
from osmcha.changeset import WORDS
from osmcha.changeset import find_words
Expand Down Expand Up @@ -100,8 +101,9 @@ def test_analyse_label_suspicious():
])
}
ch = Analyse(ch_dict)
ch.label_suspicious('some reason')
ch.label_suspicious(Reason('some reason'))
assert 'some reason' in ch.suspicion_reasons
assert ch.detailed_reasons['some reason'].identifier == 'some reason'
assert ch.is_suspect


Expand Down Expand Up @@ -601,10 +603,11 @@ def test_get_dict():
assert 'is_suspect' in ch.get_dict().keys()
assert 'powerfull_editor' in ch.get_dict().keys()
assert 'suspicion_reasons' in ch.get_dict().keys()
assert 'detailed_reasons' in ch.get_dict().keys()
assert 'create' in ch.get_dict().keys()
assert 'modify' in ch.get_dict().keys()
assert 'delete' in ch.get_dict().keys()
assert len(ch.get_dict().keys()) == 15
assert len(ch.get_dict().keys()) == 16

# An iD changeset with warnings:
ch = Analyse(72783703)
Expand All @@ -621,10 +624,11 @@ def test_get_dict():
assert 'is_suspect' in ch.get_dict().keys()
assert 'powerfull_editor' in ch.get_dict().keys()
assert 'suspicion_reasons' in ch.get_dict().keys()
assert 'detailed_reasons' in ch.get_dict().keys()
assert 'create' in ch.get_dict().keys()
assert 'modify' in ch.get_dict().keys()
assert 'delete' in ch.get_dict().keys()
assert len(ch.get_dict().keys()) == 15
assert len(ch.get_dict().keys()) == 16

# A JOSM changeset
ch = Analyse(46315321)
Expand All @@ -641,10 +645,11 @@ def test_get_dict():
assert 'is_suspect' in ch.get_dict().keys()
assert 'powerfull_editor' in ch.get_dict().keys()
assert 'suspicion_reasons' in ch.get_dict().keys()
assert 'detailed_reasons' in ch.get_dict().keys()
assert 'create' in ch.get_dict().keys()
assert 'modify' in ch.get_dict().keys()
assert 'delete' in ch.get_dict().keys()
assert len(ch.get_dict().keys()) == 15
assert len(ch.get_dict().keys()) == 16


def test_changeset_without_tags():
Expand Down

0 comments on commit 688166d

Please sign in to comment.