diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..de0d9b8 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,38 @@ +name: CI + +on: + push: + branches: [main] + pull_request: + branches: [main] + +jobs: + test: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: ["3.10", "3.11", "3.12"] + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install -r requirements.txt + python -m pip install -r requirements-dev.txt + + - name: Ruff + run: ruff check . + + - name: Black + run: black --check . + + - name: Pytest + run: pytest -v \ No newline at end of file diff --git a/examples/scrape_news.py b/examples/scrape_news.py index 373c2e1..904afde 100644 --- a/examples/scrape_news.py +++ b/examples/scrape_news.py @@ -82,6 +82,7 @@ def build_client() -> ThordataClient: # CLI parsing # ------------------------------------------------------------- + def parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser( description="Google News scraper using Thordata SERP API (engine=google_news)", @@ -172,6 +173,7 @@ def parse_args() -> argparse.Namespace: # SERP call: engine=google_news # ------------------------------------------------------------- + def search_google_news( client: ThordataClient, query: str, @@ -260,6 +262,7 @@ def search_google_news( # Main # ------------------------------------------------------------- + def main() -> None: args = parse_args() client = build_client() @@ -323,4 +326,4 @@ def main() -> None: if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 0000000..d9d2f0b --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,4 @@ +pytest>=8.0.0 +pytest-httpserver>=1.1.0 +ruff>=0.1.0 +black>=23.0.0 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 896c341..35f2315 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ -thordata-sdk>=0.3.0 +thordata-sdk>=0.5.0 python-dotenv>=1.0.0 pandas>=2.1.0 \ No newline at end of file diff --git a/test/__pycache__/test_examples_scrape_news_offline_e2e.cpython-314-pytest-9.0.2.pyc b/test/__pycache__/test_examples_scrape_news_offline_e2e.cpython-314-pytest-9.0.2.pyc new file mode 100644 index 0000000..01c667a Binary files /dev/null and b/test/__pycache__/test_examples_scrape_news_offline_e2e.cpython-314-pytest-9.0.2.pyc differ diff --git a/test/test_examples_scrape_news_offline_e2e.py b/test/test_examples_scrape_news_offline_e2e.py new file mode 100644 index 0000000..fd8d6d1 --- /dev/null +++ b/test/test_examples_scrape_news_offline_e2e.py @@ -0,0 +1,76 @@ +import json +import os +import subprocess +import sys +from pathlib import Path +from urllib.parse import parse_qs + +from pytest_httpserver import HTTPServer +from werkzeug.wrappers import Request, Response + + +def test_scrape_news_runs_offline(httpserver: HTTPServer, tmp_path: Path) -> None: + engines_seen: list[str] = [] + + def handler(request: Request) -> Response: + body = request.get_data(as_text=True) or "" + form = parse_qs(body) + engine = (form.get("engine") or [""])[0] + engines_seen.append(engine) + + payload = { + "code": 200, + "news_results": [ + { + "title": "Example News", + "link": "https://example.com", + "source": "ExampleSource", + "snippet": "Example snippet", + "date": "Today", + } + ], + "organic": [], + } + return Response( + json.dumps(payload), status=200, content_type="application/json" + ) + + httpserver.expect_request("/request", method="POST").respond_with_handler(handler) + base_url = httpserver.url_for("/").rstrip("/").replace("localhost", "127.0.0.1") + + outfile = tmp_path / "news.csv" + + env = os.environ.copy() + env["THORDATA_SCRAPER_TOKEN"] = "dummy" + env["THORDATA_SCRAPERAPI_BASE_URL"] = base_url + env["PYTHONIOENCODING"] = "utf-8" + env["PYTHONUTF8"] = "1" + env["NO_PROXY"] = "127.0.0.1,localhost" + env["no_proxy"] = env["NO_PROXY"] + + result = subprocess.run( + [ + sys.executable, + "examples/scrape_news.py", + "--query", + "pizza", + "--num", + "1", + "--gl", + "us", + "--hl", + "en", + "--outfile", + str(outfile), + ], + env=env, + capture_output=True, + text=True, + encoding="utf-8", + errors="replace", + timeout=60, + ) + + assert result.returncode == 0, (result.stdout or "") + "\n" + (result.stderr or "") + assert "google_news" in engines_seen + assert outfile.exists()