Skip to content

Commit b5cca5a

Browse files
committed
Migrate site probes to github action
1 parent dcc4a2a commit b5cca5a

File tree

3 files changed

+114
-1
lines changed

3 files changed

+114
-1
lines changed

.github/workflows/probe.yml

+51
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
# Simple workflow for deploying static content to GitHub Pages
2+
name: Probe
3+
4+
on:
5+
# Runs on pushes targeting the default branch
6+
push:
7+
branches: [ "master" ]
8+
9+
# Allows you to run this workflow manually from the Actions tab
10+
workflow_dispatch:
11+
12+
repository_dispatch:
13+
types: [ probe-hn-sites ]
14+
15+
schedule:
16+
- cron: "*/10 * * * *"
17+
18+
# Allow one concurrent deployment
19+
concurrency:
20+
group: probe-event-${{ github.event_name }} # So UT won't be interrupted by cronjobs
21+
cancel-in-progress: true # Avoid batch pending when one job hangs
22+
23+
jobs:
24+
# Single deploy job since we're just deploying
25+
probe:
26+
runs-on: ubuntu-latest
27+
timeout-minutes: 30
28+
steps:
29+
- name: Checkout
30+
uses: actions/checkout@v3
31+
32+
- name: Setup Python
33+
uses: actions/[email protected]
34+
with:
35+
python-version: 3.9
36+
cache: 'pip'
37+
38+
- name: Install Python Dependencies
39+
run: pip install --upgrade -r requirements.txt
40+
41+
- name: Blog
42+
run: python probe.py blog
43+
if: ${{ always() }}
44+
45+
- name: HN Summary
46+
run: python probe.py hn
47+
if: ${{ always() }}
48+
49+
- name: HN Summary ZH
50+
run: python probe.py hn-zh
51+
if: ${{ always() }}

config.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
logging.basicConfig(level=logging.DEBUG if DEBUG else logging.INFO,
2424
format='%(asctime)s %(levelname)s [%(filename)s:%(lineno)d %(funcName)s] - %(message)s',
2525
handlers=log_handlers)
26-
logger = logging.getLogger()
26+
logger = logging.getLogger(__name__)
2727

2828

2929
def int_env(name, default):

probe.py

+62
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
# coding: utf-8
2+
import argparse
3+
import logging
4+
import re
5+
from datetime import datetime, timedelta
6+
7+
from page_content_extractor.http import session
8+
9+
parser = argparse.ArgumentParser(description='Probe betacat.io sites')
10+
parser.add_argument("site", choices=['hn', 'hn-zh', 'blog'], help="Specify site to probe")
11+
args = parser.parse_args()
12+
logger = logging.getLogger(__name__)
13+
14+
15+
def probe_hn_summary():
16+
url = 'https://hackernews.betacat.io/'
17+
resp = session.get(url)
18+
resp.raise_for_status()
19+
body = resp.text
20+
21+
assert "Hacker News" in body, '"Hacker News" not in response'
22+
assert body.count("OpenAI") > 5, "Too few OpenAI summaries, only got %d" % body.count("OpenAI")
23+
logger.info(f'OpenAI summaries {body.count("OpenAI")} times')
24+
25+
pattern = r'Last updated: <span>(.*?)<\/span>'
26+
matches = re.search(pattern, body)
27+
28+
time_updated_str = matches.group(1)
29+
time_updated = datetime.strptime(time_updated_str, "%Y-%m-%d %H:%M:%S %Z")
30+
31+
current_time = datetime.utcnow()
32+
33+
assert current_time <= time_updated + timedelta(hours=1), "Haven't been updated for one hour, last update: " + time_updated_str
34+
35+
36+
def probe_hn_zh():
37+
url = 'https://hackernews.betacat.io/zh.html'
38+
resp = session.get(url)
39+
resp.raise_for_status()
40+
body = resp.text
41+
42+
assert '摘要' in body
43+
44+
45+
def probe_blog():
46+
url = 'https://blog.betacat.io/'
47+
resp = session.get(url)
48+
resp.raise_for_status()
49+
body = resp.text
50+
51+
assert '喵叔没话说' in body
52+
53+
54+
if __name__ == '__main__':
55+
if args.site == 'blog':
56+
probe_blog()
57+
elif args.site == 'hn-zh':
58+
probe_hn_zh()
59+
elif args.site == 'hn':
60+
probe_hn_summary()
61+
else:
62+
assert False

0 commit comments

Comments
 (0)