Skip to content

Commit

Permalink
Generate _google_gl.py from Google docs
Browse files Browse the repository at this point in the history
  • Loading branch information
Gallaecio committed Nov 13, 2024
1 parent ed9f073 commit 1798256
Show file tree
Hide file tree
Showing 8 changed files with 586 additions and 2 deletions.
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ ignore_missing_imports = true

[tool.black]
target-version = ["py38", "py39", "py310", "py311", "py312"]
force-exclude = "template.py"
3 changes: 3 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ ignore =
# First line should not be the function's "signature"
D402

exclude =
template.py

per-file-ignores =
# F401: Ignore "imported but unused" errors in __init__ files, as those
# imports are there to expose submodule functions so they can be imported
Expand Down
3 changes: 1 addition & 2 deletions tests/test_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,8 +128,7 @@ def test_arg_combinations(spider_cls, args, valid):
(
# extract_from
*(
(spider_cls, *scenario)
for spider_cls in (EcommerceSpider,)
(EcommerceSpider, *scenario)
for scenario in (
(
"extract_from",
Expand Down
3 changes: 3 additions & 0 deletions utils/google-gl-updater/requirements.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
jinja2
parsel
requests
32 changes: 32 additions & 0 deletions utils/google-gl-updater/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#
# This file is autogenerated by pip-compile with Python 3.12
# by the following command:
#
# pip-compile
#
certifi==2024.8.30
# via requests
charset-normalizer==3.4.0
# via requests
cssselect==1.2.0
# via parsel
idna==3.10
# via requests
jinja2==3.1.4
# via -r requirements.in
jmespath==1.0.1
# via parsel
lxml==5.3.0
# via parsel
markupsafe==3.0.2
# via jinja2
packaging==24.2
# via parsel
parsel==1.9.1
# via -r requirements.in
requests==2.32.3
# via -r requirements.in
urllib3==2.2.3
# via requests
w3lib==2.2.1
# via parsel
18 changes: 18 additions & 0 deletions utils/google-gl-updater/template.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{% raw %}# ../_geolocations.py counterpart for
# https://developers.google.com/custom-search/docs/json_api_reference#countryCodes
#
# Built automatically with ../../utils/google-gl-updater

from enum import Enum

GOOGLE_GL_OPTIONS = {{% endraw %}{% for country in countries %}
"{{ country.code }}": "{{ country.name }}",{% endfor %}{% raw %}
}
GOOGLE_GL_OPTIONS_WITH_CODE = {
code: f"{name} ({code})" for code, name in GOOGLE_GL_OPTIONS.items()
}


class GoogleGl(str, Enum):{% endraw %}{% for country in countries %}
{{ country.keyword }}: str = "{{ country.code }}"{% endfor %}

35 changes: 35 additions & 0 deletions utils/google-gl-updater/update.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
from keyword import iskeyword
from pathlib import Path

import jinja2
import requests
from parsel import Selector

countries = []

response = requests.get(
"https://developers.google.com/custom-search/docs/json_api_reference"
)
selector = Selector(text=response.text)
table = selector.xpath('//*[@id="country-codes"]/following-sibling::table[1]')
for tr in table.css("tr"):
name = tr.xpath("td/text()").get()
if not name: # header
continue
code = tr.xpath("td/span/text()").get()
keyword = f"{code}_" if iskeyword(code) else code
countries.append({"code": code, "keyword": keyword, "name": name})

template_path = Path(__file__).parent / "template.py"
template_environment = jinja2.Environment()
with template_path.open() as f:
template = template_environment.from_string(f.read())
output = template.render(countries=countries)
output_path = (
Path(__file__).parent.parent.parent
/ "zyte_spider_templates"
/ "spiders"
/ "_google_gl.py"
)
with output_path.open("w") as f:
f.write(output)
Loading

0 comments on commit 1798256

Please sign in to comment.