From 7c0fca076029b1c405779850fbdc904f035ddec5 Mon Sep 17 00:00:00 2001 From: Thordata Date: Fri, 19 Dec 2025 10:31:26 +0800 Subject: [PATCH 1/2] ci: add offline E2E test and align Google News engine usage --- .github/workflows/ci.yml | 38 +++++++++ requirements-dev.txt | 4 + requirements.txt | 2 +- ...s_offline_e2e.cpython-314-pytest-9.0.2.pyc | Bin 0 -> 5551 bytes test/test_examples_scrape_news_offline_e2e.py | 74 ++++++++++++++++++ 5 files changed, 117 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/ci.yml create mode 100644 requirements-dev.txt create mode 100644 test/__pycache__/test_examples_scrape_news_offline_e2e.cpython-314-pytest-9.0.2.pyc create mode 100644 test/test_examples_scrape_news_offline_e2e.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..de0d9b8 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,38 @@ +name: CI + +on: + push: + branches: [main] + pull_request: + branches: [main] + +jobs: + test: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: ["3.10", "3.11", "3.12"] + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install -r requirements.txt + python -m pip install -r requirements-dev.txt + + - name: Ruff + run: ruff check . + + - name: Black + run: black --check . + + - name: Pytest + run: pytest -v \ No newline at end of file diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 0000000..d9d2f0b --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,4 @@ +pytest>=8.0.0 +pytest-httpserver>=1.1.0 +ruff>=0.1.0 +black>=23.0.0 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 896c341..35f2315 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ -thordata-sdk>=0.3.0 +thordata-sdk>=0.5.0 python-dotenv>=1.0.0 pandas>=2.1.0 \ No newline at end of file diff --git a/test/__pycache__/test_examples_scrape_news_offline_e2e.cpython-314-pytest-9.0.2.pyc b/test/__pycache__/test_examples_scrape_news_offline_e2e.cpython-314-pytest-9.0.2.pyc new file mode 100644 index 0000000000000000000000000000000000000000..01c667a5b1ac0feac21c1d91ca343d547df58e10 GIT binary patch literal 5551 zcmb^#TWlN0@s2z`bbK5|$&##xC0Vj9$|9+kBh|JXE3#|}wIz0?P;nsWdLqvf6N-1* zJ6aNp6e=2^a`WNzqlj7rsEZb;Q}n~=S3X;yK>g7IRAigPCPDI%e&kPq1ElCzXYY6< zt*S{3WXZdonVs34ot@d8y|aEF&p=7Me&QZ ztJh8i^BNm>O;s|QoM1tiKD0RDvBUFp7+%{Ij+E;n6zbwat9RTw+_lpszo@c0jAYT`{VM z^9J!LFfobpOaTeNV_4}W7sE(_Z6?Af0mL*i@xTyR2$sg34yd*`6SM3}yC@Z8qqZ6; z^%tZ494c|!a7nHmRv_j$)C?$cSR66X;BcvGtvTEVvQFe0E{(3*<1BLbc`95gw`jOM zrNpR>4wV?S-JwE#g%;I;YT^#Wp9Xm09P-c_d4b|HAU{BY0R;f6r4X(AngJ*kz=M>& zNG)uXSkQthbtkh}Z(*?p3#!zp4%MS$r~&y=BWgm&Q8Q{;JZ`neIm>nts#`1kyr^yI zB04crvZd;bJ7ZN^S&}M*@A}W;?$IfRS^w3U5_ytcX`<1w*lO{`PFPT-?qpDZyav2u zU}1B}HcY)A3Tj6kiya0Cuh>Zos??o~W!>)qai#6hNZl1q3+iOzPHTpij>*PDa@5F& zO10b#7ImP`su>;4@3O{0C%=)`ZSh9!ea_0zDqkhpQ=wZB)hgC&VU_v_Xjr6$UC-jh0%DlB;I3v_z+>spxc^kFw9`?OBXkgH_F--=bA&n1RRY)v941J#&Fo zT^Crqi+a}oF&Ch#yZ&)`PJceMUPpV>VB7~DH5B*G1*&{1RHe_+>*lreK)vYfckh35 z0jqy&KHtv&Dstd|XQQ@f?CnQ==w#e~XchS9SOwnF%Bsy(!2gVMvBAoESrmth&tZGP>cgr_w%s%phjboCJ%LvSJ^L&_q&Y;M*aI&OdQi=%$OV0Mxh!QbL)l^ z{vu0fcVvzL3FSyB2RLHKvZ?H1Id6!wmQ5k!upOB#*e#B#zQD?@4D6b=2&ApmXRvCY za~zdb#LK;a6u0ycIcO$<+^7ce$OC7n56)X5>WHu?5MiQr$Rx_y$Jgf+utS#5WazgR z1xHG=gY~SNhXm0jX`(J~>VzRqIkN(Z9pMs6QYKy_*~7AyP3szQ=qWud=e(09E-jN%RawsoR!1cQ;UFiKiDO<# zN?XLG;1wy8O5_5Ps-{y3Nlz)6=xdsi3E7})$ThPX@g|gvE@yO6-%{m}i?~;0U8I>U zrc)$#K#3FNqIGQD6$Th&bRcR}&NJ-)~tpT~O zEN3)&uAc6IvTgV`aji<3WLm}~@-2P5#BaFwAEC2p-!-byq3Yx8>XWfv|h+z~ih99@yP&{(~Un zYj_(UG;U0S;>+__ zM{{-Mk^9Q(5ckZi7(=tK+$8RdBC1%~+{!hU994@}U2EVflXHc`aD&5J#G$5gIVtA| zhcnr=92X9+q=_x7QD_w)Ig_gihZWE(m6qq}hNz~cgdB1byPUaA{0T{gyO=Bjhnm$1 zXT+CV&OltHGAj^g7%NyKELYgC&i7FzA9RA=gQ)=2w@9$({!9t@r%@2 zHU-QIYmmYa>9CK%{gl_T%dqYVSp&Pl`9tj5mPUAaQ%+=cX*n$eLC1uzC`Xs4K z5+=1GoKa~|M#ZqN!Q~%zMran7v#GS6%4o!Co?@g))K-;jI%%+qD`{m}N{bn3O(r#> zCf$;0573)=9~aH#Sku58G(Ab3fY#s`#6NDVHEej3)J;g8B?4wiOBrJ;1Q#+^CRB^* zl@iliCXP@8xR)jPIsvx9vy^t3zOK;*4^`S6b)XstY$0q zW_EDSa7C>ahCglWopk>+%Vl#HZTs5I6W<0&nq+9}aH*yn=82B&>tAs+D=@QxgI>lV z6u7oMu5I_V2Mu|yZJ!%0a5wh28xL0=aq*&XydZS$37z-4_Jz|0A-pGq?MfVSan+Q-8y5^J9P4_O;J=@7>-zy?3K`qWgTy_C&G!?Dj<7-Lc1Y7JCQt z?$dkRnc|64l-9h*wG^9xXUDt89V<2f;|~9m*K|_W_C}$5B;P&q0KGr_xa-Bgyzuw2zm63q;(7Sb#~*iL>G8-OoE&2ZugeK$M3)MqNl|mga(?B{dnwvw!3?_)v2$nZ+ z3bL1)GKroE1`YoZWH7yb;|2P9mA>eAKALFgtLfCT@uiM None: + engines_seen: list[str] = [] + + def handler(request: Request) -> Response: + body = request.get_data(as_text=True) or "" + form = parse_qs(body) + engine = (form.get("engine") or [""])[0] + engines_seen.append(engine) + + payload = { + "code": 200, + "news_results": [ + { + "title": "Example News", + "link": "https://example.com", + "source": "ExampleSource", + "snippet": "Example snippet", + "date": "Today", + } + ], + "organic": [], + } + return Response(json.dumps(payload), status=200, content_type="application/json") + + httpserver.expect_request("/request", method="POST").respond_with_handler(handler) + base_url = httpserver.url_for("/").rstrip("/").replace("localhost", "127.0.0.1") + + outfile = tmp_path / "news.csv" + + env = os.environ.copy() + env["THORDATA_SCRAPER_TOKEN"] = "dummy" + env["THORDATA_SCRAPERAPI_BASE_URL"] = base_url + env["PYTHONIOENCODING"] = "utf-8" + env["PYTHONUTF8"] = "1" + env["NO_PROXY"] = "127.0.0.1,localhost" + env["no_proxy"] = env["NO_PROXY"] + + result = subprocess.run( + [ + sys.executable, + "examples/scrape_news.py", + "--query", + "pizza", + "--num", + "1", + "--gl", + "us", + "--hl", + "en", + "--outfile", + str(outfile), + ], + env=env, + capture_output=True, + text=True, + encoding="utf-8", + errors="replace", + timeout=60, + ) + + assert result.returncode == 0, (result.stdout or "") + "\n" + (result.stderr or "") + assert "google_news" in engines_seen + assert outfile.exists() \ No newline at end of file From ec1d5594e8db161e39e9c011a87238c86fbc72b3 Mon Sep 17 00:00:00 2001 From: Thordata Date: Fri, 19 Dec 2025 10:34:56 +0800 Subject: [PATCH 2/2] ci: black offline E2E test and GitHub Actions --- examples/scrape_news.py | 5 ++++- test/test_examples_scrape_news_offline_e2e.py | 6 ++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/examples/scrape_news.py b/examples/scrape_news.py index 373c2e1..904afde 100644 --- a/examples/scrape_news.py +++ b/examples/scrape_news.py @@ -82,6 +82,7 @@ def build_client() -> ThordataClient: # CLI parsing # ------------------------------------------------------------- + def parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser( description="Google News scraper using Thordata SERP API (engine=google_news)", @@ -172,6 +173,7 @@ def parse_args() -> argparse.Namespace: # SERP call: engine=google_news # ------------------------------------------------------------- + def search_google_news( client: ThordataClient, query: str, @@ -260,6 +262,7 @@ def search_google_news( # Main # ------------------------------------------------------------- + def main() -> None: args = parse_args() client = build_client() @@ -323,4 +326,4 @@ def main() -> None: if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/test/test_examples_scrape_news_offline_e2e.py b/test/test_examples_scrape_news_offline_e2e.py index 96343ba..fd8d6d1 100644 --- a/test/test_examples_scrape_news_offline_e2e.py +++ b/test/test_examples_scrape_news_offline_e2e.py @@ -31,7 +31,9 @@ def handler(request: Request) -> Response: ], "organic": [], } - return Response(json.dumps(payload), status=200, content_type="application/json") + return Response( + json.dumps(payload), status=200, content_type="application/json" + ) httpserver.expect_request("/request", method="POST").respond_with_handler(handler) base_url = httpserver.url_for("/").rstrip("/").replace("localhost", "127.0.0.1") @@ -71,4 +73,4 @@ def handler(request: Request) -> Response: assert result.returncode == 0, (result.stdout or "") + "\n" + (result.stderr or "") assert "google_news" in engines_seen - assert outfile.exists() \ No newline at end of file + assert outfile.exists()