Skip to content

Commit

Permalink
Add more tests
Browse files Browse the repository at this point in the history
  • Loading branch information
audiodude committed Aug 19, 2024
1 parent 31e12d3 commit bfe4d6b
Show file tree
Hide file tree
Showing 3 changed files with 143 additions and 4 deletions.
5 changes: 5 additions & 0 deletions wp1/credentials.py.example
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,11 @@ CREDENTIALS = {
'password': 'farmpass',
'hook_token': 'hook-token-abc',
}

'FILE_PATH': {
# Path where pageviews.bz2 file (~3GB) will be downloaded.
'pageviews': '/tmp/pageviews',
}
},

# EDIT: Remove the next line after you've provided actual production credentials.
Expand Down
8 changes: 4 additions & 4 deletions wp1/scores.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def download_pageviews():

with requests.get(get_pageview_url(), stream=True) as r:
r.raise_for_status()
with open(PAGEVIEW_FILE_NAME, 'wb') as f:
with open(cur_filepath, 'wb') as f:
# Read data in 8 KB chunks
for chunk in r.iter_content(chunk_size=8 * 1024):
f.write(chunk)
Expand Down Expand Up @@ -140,7 +140,7 @@ def pageview_components():
try:
views = int(parts[4])
except ValueError:
log.warning('Views field wasn\'t int in pageview dump: %r', line)
logger.warning('Views field wasn\'t int in pageview dump: %r', line)
continue

if (tally is not None and tally.lang == lang and tally.name == name and
Expand Down Expand Up @@ -172,7 +172,7 @@ def update_db_pageviews(wp10db, lang, article, page_id, views):
})


def update_pageviews(filter_lang=None):
def update_pageviews(filter_lang=None, commit_after=50000):
download_pageviews()

# Convert filter lang to bytes if necessary
Expand All @@ -191,7 +191,7 @@ def update_pageviews(filter_lang=None):
update_db_pageviews(wp10db, lang, article, page_id, views)

n += 1
if n >= 50000:
if n >= commit_after:
logger.debug('Committing')
wp10db.commit()
n = 0
Expand Down
134 changes: 134 additions & 0 deletions wp1/scores_test.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import bz2
from datetime import datetime
import os.path
import unittest
from unittest.mock import patch, MagicMock, mock_open

Expand Down Expand Up @@ -35,7 +36,33 @@
af.wikipedia 1712 753 mobile-web 2 O2
af.wikipedia 1712 753 desktop 20 E12J7U1'''

pageview_error_text = b'''af.wikipedia 1701 1402 desktop 4 F1
af.wikipedia 1701 1402 mobile-web 3 O2T1
af.wikipedia 1702 1404 mobile-web 3 L1O2
af.wikipedia 1702 1404 desktop 1 P1
af.wikipedia - 1405 mobile-web 3 C1O2
af.wikipedia - 1405 desktop 1 ^1
af.wikipedia 1704 1406 mobile-web 4 A1O2T1
af.wikipedia 1704 1406 desktop 2 F1
af.wikipedia 1705 1407 mobile-web 3 O3
af.wikipedia 1705 1407 desktop 1 F1
af.wikipedia 1706 desktop 8 H8
af.wikipedia 1706 mobile-web 4 C1O2Y1
af.wikipedia 1707 1409 mobile-web 2 O2
af.wikipedia 1707 1409 desktop 3 H1J1
af.wikipedia 1708 1410 desktop X V1]1
af.wikipedia 1708 1410 mobile-web Z O1
af.wikipedia 1709 1411 desktop 2 F1
af.wikipedia 1709 1411 mobile-web 2 O2
af.wikipedia \xc3\xa9\xc3\xa1\xc3\xb8 3774 mobile-web 1 A1
af.wikipedia \xc3\xa9\xc3\xa1\xc3\xb8 3774 mobile-web 2 F2
af.wikipedia 1711 752 mobile-web 4 C1O2U1
af.wikipedia 1711 752 desktop 1 K1
af.wikipedia 1712 753 mobile-web 2 O2
af.wikipedia 1712 753 desktop 20 E12J7U1'''

pageview_bz2 = bz2.compress(pageview_text)
pageview_error_bz2 = bz2.compress(pageview_error_text)


class ScoresTest(BaseWpOneDbTest):
Expand Down Expand Up @@ -105,6 +132,61 @@ def test_get_pageview_file_path(self):
actual = scores.get_pageview_file_path('pageviews-202404-user.bz2')
self.assertEqual('/tmp/pageviews/pageviews-202404-user.bz2', actual)

@patch('wp1.scores.get_current_datetime', return_value=datetime(2024, 5, 25))
@patch('wp1.scores.requests.get')
def test_download_pageviews(self, mock_get_response, mock_datetime):
context = MagicMock()
resp = MagicMock()
resp.iter_content.return_value = (pageview_bz2,)
context.__enter__.return_value = resp
mock_get_response.return_value = context

file_path = scores.get_cur_file_path()
if os.path.exists(file_path):
os.remove(file_path)

scores.download_pageviews()

mock_get_response.assert_called_once()
self.assertTrue(os.path.exists(file_path))

@patch('wp1.scores.get_current_datetime', return_value=datetime(2024, 5, 25))
@patch('wp1.scores.requests.get')
def test_download_pageviews_remove_prev(self, mock_get_response,
mock_datetime):
context = MagicMock()
resp = MagicMock()
resp.iter_content.return_value = (pageview_bz2,)
context.__enter__.return_value = resp
mock_get_response.return_value = context

file_path = scores.get_prev_file_path()
# Create empty file
open(file_path, 'a').close()

scores.download_pageviews()

self.assertFalse(os.path.exists(file_path))

@patch('wp1.scores.get_current_datetime', return_value=datetime(2024, 5, 25))
@patch('wp1.scores.requests.get')
def test_download_pageviews_skip_existing(self, mock_get_response,
mock_datetime):
context = MagicMock()
resp = MagicMock()
resp.iter_content.return_value = (pageview_bz2,)
context.__enter__.return_value = resp
mock_get_response.return_value = context

file_path = scores.get_cur_file_path()
# Create empty file
open(file_path, 'a').close()

scores.download_pageviews()

mock_get_response.assert_not_called()
self.assertTrue(os.path.exists(file_path))

@patch('wp1.scores.get_current_datetime', return_value=datetime(2024, 5, 25))
@patch("builtins.open", new_callable=mock_open, read_data=pageview_bz2)
def test_raw_pageviews(self, mock_file_open, mock_datetime):
Expand Down Expand Up @@ -140,6 +222,24 @@ def test_pageview_components(self, mock_file_open):

self.assertEqual(expected, actual)

@patch("builtins.open", new_callable=mock_open, read_data=pageview_error_bz2)
def test_pageview_components_errors(self, mock_file_open):
expected = [
(b'af', b'1701', b'1402', 7),
(b'af', b'1702', b'1404', 4),
(b'af', b'1704', b'1406', 6),
(b'af', b'1705', b'1407', 4),
(b'af', b'1707', b'1409', 5),
(b'af', b'1709', b'1411', 4),
(b'af', b'\xc3\xa9\xc3\xa1\xc3\xb8', b'3774', 3),
(b'af', b'1711', b'752', 5),
(b'af', b'1712', b'753', 22),
]

actual = list(scores.pageview_components())

self.assertEqual(expected, actual)

def test_update_db_pageviews(self):
scores.update_db_pageviews(self.wp10db, 'en', 'Statue_of_Liberty', 1234,
100)
Expand Down Expand Up @@ -170,3 +270,37 @@ def test_update_db_pageviews_existing(self):
self.assertEqual(result['ps_article'], b'Statue_of_Liberty')
self.assertEqual(result['ps_page_id'], 1234)
self.assertEqual(result['ps_views'], 200)

@patch('wp1.scores.download_pageviews')
@patch('wp1.scores.pageview_components')
def test_update_pageviews(self, mock_components, mock_download):
mock_components.return_value = (
(b'en', b'Statue_of_Liberty', 100, 100),
(b'en', b'Eiffel_Tower', 200, 200),
(b'fr', b'George-\xc3\x89tienne_Cartier_Monument', 300, 300),
)

scores.update_pageviews(commit_after=2)

mock_download.assert_called_once()
with self.wp10db.cursor() as cursor:
cursor.execute('SELECT COUNT(*) as cnt FROM page_scores')
n = cursor.fetchone()['cnt']
self.assertEqual(3, n)

@patch('wp1.scores.download_pageviews')
@patch('wp1.scores.pageview_components')
def test_update_pageviews_filter(self, mock_components, mock_download):
mock_components.return_value = (
(b'en', b'Statue_of_Liberty', 100, 100),
(b'en', b'Eiffel_Tower', 200, 200),
(b'fr', b'George-\xc3\x89tienne_Cartier_Monument', 300, 300),
)

scores.update_pageviews(filter_lang='fr')

mock_download.assert_called_once()
with self.wp10db.cursor() as cursor:
cursor.execute('SELECT COUNT(*) as cnt FROM page_scores')
n = cursor.fetchone()['cnt']
self.assertEqual(1, n)

0 comments on commit bfe4d6b

Please sign in to comment.