|
| 1 | +# coding: utf-8 |
| 2 | +import argparse |
| 3 | +import logging |
| 4 | +import re |
| 5 | +from datetime import datetime, timedelta |
| 6 | + |
| 7 | +from page_content_extractor.http import session |
| 8 | + |
| 9 | +parser = argparse.ArgumentParser(description='Probe betacat.io sites') |
| 10 | +parser.add_argument("site", choices=['hn', 'hn-zh', 'blog'], help="Specify site to probe") |
| 11 | +args = parser.parse_args() |
| 12 | +logger = logging.getLogger(__name__) |
| 13 | + |
| 14 | + |
| 15 | +def probe_hn_summary(): |
| 16 | + url = 'https://hackernews.betacat.io/' |
| 17 | + resp = session.get(url) |
| 18 | + resp.raise_for_status() |
| 19 | + body = resp.text |
| 20 | + |
| 21 | + assert "Hacker News" in body, '"Hacker News" not in response' |
| 22 | + assert body.count("OpenAI") > 5, "Too few OpenAI summaries, only got %d" % body.count("OpenAI") |
| 23 | + logger.info(f'OpenAI summaries {body.count("OpenAI")} times') |
| 24 | + |
| 25 | + pattern = r'Last updated: <span>(.*?)<\/span>' |
| 26 | + matches = re.search(pattern, body) |
| 27 | + |
| 28 | + time_updated_str = matches.group(1) |
| 29 | + time_updated = datetime.strptime(time_updated_str, "%Y-%m-%d %H:%M:%S %Z") |
| 30 | + |
| 31 | + current_time = datetime.utcnow() |
| 32 | + |
| 33 | + assert current_time <= time_updated + timedelta(hours=1), "Haven't been updated for one hour, last update: " + time_updated_str |
| 34 | + |
| 35 | + |
| 36 | +def probe_hn_zh(): |
| 37 | + url = 'https://hackernews.betacat.io/zh.html' |
| 38 | + resp = session.get(url) |
| 39 | + resp.raise_for_status() |
| 40 | + body = resp.text |
| 41 | + |
| 42 | + assert '摘要' in body |
| 43 | + |
| 44 | + |
| 45 | +def probe_blog(): |
| 46 | + url = 'https://blog.betacat.io/' |
| 47 | + resp = session.get(url) |
| 48 | + resp.raise_for_status() |
| 49 | + body = resp.text |
| 50 | + |
| 51 | + assert '喵叔没话说' in body |
| 52 | + |
| 53 | + |
| 54 | +if __name__ == '__main__': |
| 55 | + if args.site == 'blog': |
| 56 | + probe_blog() |
| 57 | + elif args.site == 'hn-zh': |
| 58 | + probe_hn_zh() |
| 59 | + elif args.site == 'hn': |
| 60 | + probe_hn_summary() |
| 61 | + else: |
| 62 | + assert False |
0 commit comments