Skip to content

Commit

Permalink
Provide a combo-box for the Google geolocation parameter
Browse files Browse the repository at this point in the history
  • Loading branch information
Gallaecio committed Nov 13, 2024
1 parent 1798256 commit 9a08850
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 20 deletions.
26 changes: 20 additions & 6 deletions tests/test_serp.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@
GEOLOCATION_OPTIONS_WITH_CODE,
Geolocation,
)
from zyte_spider_templates.spiders._google_gl import (
GOOGLE_GL_OPTIONS,
GOOGLE_GL_OPTIONS_WITH_CODE,
GoogleGl,
)
from zyte_spider_templates.spiders.serp import GoogleSearchSpider

from . import get_crawler
Expand Down Expand Up @@ -286,15 +291,24 @@ def test_metadata():
"type": "integer",
},
"gl": {
"default": "",
"anyOf": [
{"type": "string"},
{"type": "null"},
],
"default": None,
"description": (
"Google will boost results relevant to the country "
"with the specified code. For valid country codes, "
"see "
"https://developers.google.com/custom-search/docs/json_api_reference#countryCodes"
"Ask Google to boost results relevant to this country."
),
"enumMeta": {
code: {
"title": GOOGLE_GL_OPTIONS_WITH_CODE[code],
}
for code in sorted(GoogleGl)
},
"title": "Geolocation (Google)",
"type": "string",
"enum": list(
sorted(GOOGLE_GL_OPTIONS, key=GOOGLE_GL_OPTIONS.__getitem__)
),
},
"geolocation": {
"anyOf": [
Expand Down
31 changes: 17 additions & 14 deletions zyte_spider_templates/spiders/serp.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from .._geolocations import GEOLOCATION_OPTIONS_WITH_CODE, Geolocation
from ..params import MaxRequestsParam
from ._google_domains import GoogleDomain
from ._google_gl import GOOGLE_GL_OPTIONS_WITH_CODE, GoogleGl
from .base import BaseSpider


Expand Down Expand Up @@ -40,7 +41,7 @@ def validate_search_queries(cls, value: Union[List[str], str]) -> List[str]:
return result


class SerpIPGeolocationParam(BaseModel):
class SerpGeolocationParam(BaseModel):
# We use “geolocation” as parameter name (instead of e.g. “ip_geolocation”)
# to reuse the implementation in BaseSpider.
geolocation: Optional[Geolocation] = Field(
Expand All @@ -61,17 +62,19 @@ class SerpIPGeolocationParam(BaseModel):
)


# TODO: Make it a list of options like Geolocation out of
# https://developers.google.com/custom-search/docs/json_api_reference#countryCodes
class SerpURLGeolocationParam(BaseModel):
gl: str = Field(
class GoogleGlParam(BaseModel):
gl: Optional[GoogleGl] = Field(
title="Geolocation (Google)",
description=(
"Google will boost results relevant to the country with the "
"specified code. For valid country codes, see "
"https://developers.google.com/custom-search/docs/json_api_reference#countryCodes"
),
default="",
description="Ask Google to boost results relevant to this country.",
default=None,
json_schema_extra={
"enumMeta": {
code: {
"title": GOOGLE_GL_OPTIONS_WITH_CODE[code],
}
for code in GoogleGl
}
},
)


Expand All @@ -94,8 +97,8 @@ class GoogleDomainParam(BaseModel):

class GoogleSearchSpiderParams(
MaxRequestsParam,
SerpIPGeolocationParam,
SerpURLGeolocationParam,
SerpGeolocationParam,
GoogleGlParam,
SerpMaxPagesParam,
SearchQueriesParam,
GoogleDomainParam,
Expand Down Expand Up @@ -138,7 +141,7 @@ def update_settings(cls, settings: BaseSettings) -> None:

def get_serp_request(self, url: str, *, page_number: int):
if self.args.gl:
url = add_or_replace_parameter(url, "gl", self.args.gl)
url = add_or_replace_parameter(url, "gl", self.args.gl.value)
return Request(
url=url,
callback=self.parse_serp,
Expand Down

0 comments on commit 9a08850

Please sign in to comment.