From dfbc353f5bd64d0e13f11e075dc0b2e46645cf32 Mon Sep 17 00:00:00 2001 From: Travis Briggs Date: Mon, 19 Aug 2024 12:33:06 -0700 Subject: [PATCH] Code review fixes, with test --- wp1/scores.py | 16 +++++++++++----- wp1/scores_test.py | 16 ++++++++++++++++ 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/wp1/scores.py b/wp1/scores.py index 5bd0b3a72..2d1da7cc9 100644 --- a/wp1/scores.py +++ b/wp1/scores.py @@ -31,6 +31,7 @@ def wiki_languages(): r = requests.get( 'https://wikistats.wmcloud.org/api.php?action=dump&table=wikipedias&format=csv', headers={'User-Agent': WP1_USER_AGENT}, + timeout=60, ) try: r.raise_for_status() @@ -83,12 +84,17 @@ def download_pageviews(): # File already downloaded return - with requests.get(get_pageview_url(), stream=True) as r: + with requests.get(get_pageview_url(), stream=True, timeout=60) as r: r.raise_for_status() - with open(cur_filepath, 'wb') as f: - # Read data in 8 KB chunks - for chunk in r.iter_content(chunk_size=8 * 1024): - f.write(chunk) + try: + with open(cur_filepath, 'wb') as f: + # Read data in 8 KB chunks + for chunk in r.iter_content(chunk_size=8 * 1024): + f.write(chunk) + except Exception as e: + logger.exception('Error downloading pageviews') + os.remove(cur_filepath) + raise Wp1ScoreProcessingError('Error downloading pageviews') from e def raw_pageviews(decode=False): diff --git a/wp1/scores_test.py b/wp1/scores_test.py index 00c21363b..b8ca010ae 100644 --- a/wp1/scores_test.py +++ b/wp1/scores_test.py @@ -187,6 +187,22 @@ def test_download_pageviews_skip_existing(self, mock_get_response, mock_get_response.assert_not_called() self.assertTrue(os.path.exists(file_path)) + @patch('wp1.scores.requests.get') + def test_download_pageviews_handle_error(self, mock_get_response): + context = MagicMock() + resp = MagicMock() + # Return partial data and then raise an exception + resp.iter_content.side_effect = (pageview_bz2[:100], + requests.exceptions.HTTPError) + context.__enter__.return_value = resp + mock_get_response.return_value = context + + with self.assertRaises(Wp1ScoreProcessingError): + scores.download_pageviews() + + file_path = scores.get_cur_file_path() + self.assertFalse(os.path.exists(file_path)) + @patch('wp1.scores.get_current_datetime', return_value=datetime(2024, 5, 25)) @patch("builtins.open", new_callable=mock_open, read_data=pageview_bz2) def test_raw_pageviews(self, mock_file_open, mock_datetime):