From 0fb7133e571038087713f015b3e1f78c91e3e41f Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Mon, 26 Feb 2024 12:07:49 -0500 Subject: [PATCH 1/2] Added a simple Blocklist test. --- tests/nameres/test_blocklist.py | 114 ++++++++++++++++++++++++++++++++ 1 file changed, 114 insertions(+) create mode 100644 tests/nameres/test_blocklist.py diff --git a/tests/nameres/test_blocklist.py b/tests/nameres/test_blocklist.py new file mode 100644 index 0000000..a8e103f --- /dev/null +++ b/tests/nameres/test_blocklist.py @@ -0,0 +1,114 @@ +import csv +import io +import logging +import urllib.parse +from dataclasses import dataclass +from typing import Optional + +import requests +import pytest + + +# The Translator Blocklist is stored in a private GitHub repository; however, +# we are currently using a spreadsheet to manage "Red Team" exercises where +# multiple Translator members try out different offensive terms and log them +# into a single spreadsheet. Eventually this test will support both, but since +# my immediate need is to check the spreadsheet, I'll start with that. +@dataclass(frozen=True) +class BlocklistEntry: + """ + A single Blocklist entry. + """ + Query: Optional[str] = None + CURIE: Optional[str] = None + Blocked: str = None + Status: str = None + Issue: str = None + TreatsOnly: str = None + Submitter: str = None + Comment: str = None + + def is_blocked(self): + """ Is this term supposed to be blocked? """ + if self.Blocked is not None and self.Blocked == 'y': + return True + return False + + @staticmethod + def from_gsheet_dict(row): + """ + Given a dictionary from a row in Google Sheets, fill in the necessary fields. + + :return: A BlocklistEntry with the filled in fields. + """ + + return BlocklistEntry( + Query=row.get('String (optional)', None), + CURIE=row.get('CURIE (optional)', None), + Blocked=row['Blocked?'], + Status=row['Status (Feb 21, 2024)'], + Issue=row['Blocklist issue'], + TreatsOnly=row['Block for "treats" only?'], + Submitter=row['Submitter'], + Comment=row['Comment (optional)'], + ) + + +def load_blocklist_from_gsheet(google_sheet_id): + """ + Load the Blocklist from a Google Sheet. + + :param google_sheet_id: The Google Sheet ID containing the blocklist. + :return: A list of BlocklistEntry. + """ + google_sheet_id = '1UR2eplHBvFRwaSIVOhlB44wpfNPY1z7AVzUkqzDqIWA' + csv_url = f"https://docs.google.com/spreadsheets/d/{google_sheet_id}/gviz/tq?tqx=out:csv&sheet=Tests" + + response = requests.get(csv_url) + csv_content = response.text + + rows = [] + with io.StringIO(csv_content) as f: + reader = csv.DictReader(f) + for row in reader: + rows.append(BlocklistEntry.from_gsheet_dict(row)) + + return rows + + +def test_check_blocklist_entry(target_info): + """ + Test whether a NameRes instance has blocked every item from a blocklist. + + :param target_info: The test target information. + """ + nameres_url = target_info['NameResURL'] + nameres_url_reverse_lookup = nameres_url + 'reverse_lookup' + + blocklist_gsheet_id = target_info['BlocklistGSheetID'] + + # We only support the Google Sheet blocklist for now. + if not blocklist_gsheet_id: + return + + blocklist_entries = load_blocklist_from_gsheet(blocklist_gsheet_id) + + assert len(blocklist_entries) > 0, f"No blocklist entries found in Google Sheet {blocklist_gsheet_id}" + + for entry in blocklist_entries: + # Only "blocked" entries are considered, since most of the spreadsheet is things we decided _not_ to block. + if not entry.is_blocked(): + logging.info(f"Skipping blocklist entry as it is not asserted to be blocked: {entry}") + continue + + assert entry.CURIE, f"Blocklist entry claims to be blocked, but does not have a CURIE: {entry}" + + # Someday we would like to do this with the query as well, but that would require some work. + # So we only test the CURIE for now. + response = requests.get(nameres_url_reverse_lookup, params={ + 'curies': entry.CURIE, + }) + assert response.ok + result = response.json()[entry.CURIE] + + assert result == {}, f"Expected {entry.CURIE} to be absent on {nameres_url_reverse_lookup}, but found: {result}" From de7b236ede328e06a22b93ddeca59c088574ec43 Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Mon, 26 Feb 2024 12:27:30 -0500 Subject: [PATCH 2/2] Parametrize blocklist entries. --- tests/nameres/test_blocklist.py | 52 ++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 24 deletions(-) diff --git a/tests/nameres/test_blocklist.py b/tests/nameres/test_blocklist.py index a8e103f..23558ec 100644 --- a/tests/nameres/test_blocklist.py +++ b/tests/nameres/test_blocklist.py @@ -54,7 +54,7 @@ def from_gsheet_dict(row): ) -def load_blocklist_from_gsheet(google_sheet_id): +def load_blocklist_from_gsheet(): """ Load the Blocklist from a Google Sheet. @@ -75,8 +75,12 @@ def load_blocklist_from_gsheet(google_sheet_id): return rows +# Parameterize blocklist entries. +blocklist_entries = load_blocklist_from_gsheet() -def test_check_blocklist_entry(target_info): + +@pytest.mark.parametrize("blocklist_entry", blocklist_entries) +def test_check_blocklist_entry(target_info, blocklist_entry): """ Test whether a NameRes instance has blocked every item from a blocklist. @@ -85,30 +89,30 @@ def test_check_blocklist_entry(target_info): nameres_url = target_info['NameResURL'] nameres_url_reverse_lookup = nameres_url + 'reverse_lookup' - blocklist_gsheet_id = target_info['BlocklistGSheetID'] - - # We only support the Google Sheet blocklist for now. - if not blocklist_gsheet_id: + # Only "blocked" entries are considered, since most of the spreadsheet is things we decided _not_ to block. + if blocklist_entry.Blocked == 'y': + flag_expect_present = False + elif blocklist_entry.Blocked == 'n': + flag_expect_present = True + else: + logging.info(f"Skipping blocklist entry as it is not asserted to be blocked: {blocklist_entry}") return - blocklist_entries = load_blocklist_from_gsheet(blocklist_gsheet_id) - - assert len(blocklist_entries) > 0, f"No blocklist entries found in Google Sheet {blocklist_gsheet_id}" - - for entry in blocklist_entries: - # Only "blocked" entries are considered, since most of the spreadsheet is things we decided _not_ to block. - if not entry.is_blocked(): - logging.info(f"Skipping blocklist entry as it is not asserted to be blocked: {entry}") - continue + if flag_expect_present and not blocklist_entry.CURIE: + # We've got a bunch of these, just ignore them. + pytest.skip(f"Blocklist entry expected to not be blocked, but no CURIE provided: {blocklist_entry}") - assert entry.CURIE, f"Blocklist entry claims to be blocked, but does not have a CURIE: {entry}" + assert blocklist_entry.CURIE, f"Blocklist entry claims to be blocked, but does not have a CURIE: {blocklist_entry}" - # Someday we would like to do this with the query as well, but that would require some work. - # So we only test the CURIE for now. - response = requests.get(nameres_url_reverse_lookup, params={ - 'curies': entry.CURIE, - }) - assert response.ok - result = response.json()[entry.CURIE] + # Someday we would like to do this with the query as well, but that would require some work. + # So we only test the CURIE for now. + response = requests.get(nameres_url_reverse_lookup, params={ + 'curies': blocklist_entry.CURIE, + }) + assert response.ok + result = response.json()[blocklist_entry.CURIE] - assert result == {}, f"Expected {entry.CURIE} to be absent on {nameres_url_reverse_lookup}, but found: {result}" + if flag_expect_present: + assert result != {}, f"Expected {blocklist_entry.CURIE} to be present on {nameres_url_reverse_lookup}, but found: {result}" + else: + assert result == {}, f"Expected {blocklist_entry.CURIE} to be absent on {nameres_url_reverse_lookup}, but found: {result}"