From 9a08850a0168c9b35f7e5dab84751063cf06c556 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= Date: Wed, 13 Nov 2024 09:09:22 +0100 Subject: [PATCH] Provide a combo-box for the Google geolocation parameter --- tests/test_serp.py | 26 ++++++++++++++++------ zyte_spider_templates/spiders/serp.py | 31 +++++++++++++++------------ 2 files changed, 37 insertions(+), 20 deletions(-) diff --git a/tests/test_serp.py b/tests/test_serp.py index 904b8ff..5c5999e 100644 --- a/tests/test_serp.py +++ b/tests/test_serp.py @@ -11,6 +11,11 @@ GEOLOCATION_OPTIONS_WITH_CODE, Geolocation, ) +from zyte_spider_templates.spiders._google_gl import ( + GOOGLE_GL_OPTIONS, + GOOGLE_GL_OPTIONS_WITH_CODE, + GoogleGl, +) from zyte_spider_templates.spiders.serp import GoogleSearchSpider from . import get_crawler @@ -286,15 +291,24 @@ def test_metadata(): "type": "integer", }, "gl": { - "default": "", + "anyOf": [ + {"type": "string"}, + {"type": "null"}, + ], + "default": None, "description": ( - "Google will boost results relevant to the country " - "with the specified code. For valid country codes, " - "see " - "https://developers.google.com/custom-search/docs/json_api_reference#countryCodes" + "Ask Google to boost results relevant to this country." ), + "enumMeta": { + code: { + "title": GOOGLE_GL_OPTIONS_WITH_CODE[code], + } + for code in sorted(GoogleGl) + }, "title": "Geolocation (Google)", - "type": "string", + "enum": list( + sorted(GOOGLE_GL_OPTIONS, key=GOOGLE_GL_OPTIONS.__getitem__) + ), }, "geolocation": { "anyOf": [ diff --git a/zyte_spider_templates/spiders/serp.py b/zyte_spider_templates/spiders/serp.py index b8e0f3a..25b69d2 100644 --- a/zyte_spider_templates/spiders/serp.py +++ b/zyte_spider_templates/spiders/serp.py @@ -10,6 +10,7 @@ from .._geolocations import GEOLOCATION_OPTIONS_WITH_CODE, Geolocation from ..params import MaxRequestsParam from ._google_domains import GoogleDomain +from ._google_gl import GOOGLE_GL_OPTIONS_WITH_CODE, GoogleGl from .base import BaseSpider @@ -40,7 +41,7 @@ def validate_search_queries(cls, value: Union[List[str], str]) -> List[str]: return result -class SerpIPGeolocationParam(BaseModel): +class SerpGeolocationParam(BaseModel): # We use “geolocation” as parameter name (instead of e.g. “ip_geolocation”) # to reuse the implementation in BaseSpider. geolocation: Optional[Geolocation] = Field( @@ -61,17 +62,19 @@ class SerpIPGeolocationParam(BaseModel): ) -# TODO: Make it a list of options like Geolocation out of -# https://developers.google.com/custom-search/docs/json_api_reference#countryCodes -class SerpURLGeolocationParam(BaseModel): - gl: str = Field( +class GoogleGlParam(BaseModel): + gl: Optional[GoogleGl] = Field( title="Geolocation (Google)", - description=( - "Google will boost results relevant to the country with the " - "specified code. For valid country codes, see " - "https://developers.google.com/custom-search/docs/json_api_reference#countryCodes" - ), - default="", + description="Ask Google to boost results relevant to this country.", + default=None, + json_schema_extra={ + "enumMeta": { + code: { + "title": GOOGLE_GL_OPTIONS_WITH_CODE[code], + } + for code in GoogleGl + } + }, ) @@ -94,8 +97,8 @@ class GoogleDomainParam(BaseModel): class GoogleSearchSpiderParams( MaxRequestsParam, - SerpIPGeolocationParam, - SerpURLGeolocationParam, + SerpGeolocationParam, + GoogleGlParam, SerpMaxPagesParam, SearchQueriesParam, GoogleDomainParam, @@ -138,7 +141,7 @@ def update_settings(cls, settings: BaseSettings) -> None: def get_serp_request(self, url: str, *, page_number: int): if self.args.gl: - url = add_or_replace_parameter(url, "gl", self.args.gl) + url = add_or_replace_parameter(url, "gl", self.args.gl.value) return Request( url=url, callback=self.parse_serp,