Skip to content

Commit

Permalink
Use build_from_crawler on Scrapy 2.12+ (#237)
Browse files Browse the repository at this point in the history
  • Loading branch information
Gallaecio authored Dec 27, 2024
1 parent cc3cfdd commit 654a404
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 35 deletions.
8 changes: 4 additions & 4 deletions scrapy_zyte_api/_request_fingerprinter.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,11 @@
from scrapy.settings.default_settings import (
REQUEST_FINGERPRINTER_CLASS as ScrapyRequestFingerprinter,
)
from scrapy.utils.misc import create_instance, load_object
from scrapy.utils.misc import load_object
from w3lib.url import canonicalize_url

from ._params import _REQUEST_PARAMS, _ParamParser, _uses_browser
from .utils import _build_from_crawler

class ScrapyZyteAPIRequestFingerprinter:
@classmethod
Expand All @@ -38,15 +39,14 @@ def __init__(self, crawler):
else:
self._has_poet = True
RequestFingerprinter = ScrapyPoetRequestFingerprinter
self._fallback_request_fingerprinter = create_instance(
self._fallback_request_fingerprinter = _build_from_crawler(
load_object(
settings.get(
"ZYTE_API_FALLBACK_REQUEST_FINGERPRINTER_CLASS",
RequestFingerprinter,
)
),
settings=crawler.settings,
crawler=crawler,
crawler,
)
if self._has_poet and not isinstance(
self._fallback_request_fingerprinter, cast(type, RequestFingerprinter)
Expand Down
33 changes: 5 additions & 28 deletions scrapy_zyte_api/_session.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,7 @@
from copy import deepcopy
from functools import partial
from logging import getLogger
from typing import (
Any,
DefaultDict,
Deque,
Dict,
List,
Optional,
Set,
Type,
TypeVar,
Union,
cast,
)
from typing import Any, DefaultDict, Deque, Dict, List, Optional, Set, Type, Union, cast
from uuid import uuid4
from weakref import WeakKeyDictionary

Expand All @@ -25,12 +13,12 @@
from scrapy.exceptions import CloseSpider, IgnoreRequest
from scrapy.http import Response
from scrapy.utils.httpobj import urlparse_cached
from scrapy.utils.misc import create_instance, load_object
from scrapy.utils.misc import load_object
from scrapy.utils.python import global_object_name
from tenacity import stop_after_attempt
from zyte_api import RequestError, RetryFactory

from scrapy_zyte_api.utils import _DOWNLOAD_NEEDS_SPIDER
from .utils import _DOWNLOAD_NEEDS_SPIDER, _build_from_crawler

logger = getLogger(__name__)
SESSION_INIT_META_KEY = "_is_session_init_request"
Expand Down Expand Up @@ -169,17 +157,6 @@ def deferred_to_future(d): # type: ignore[misc]
return d.asFuture(_get_asyncio_event_loop())


try:
from scrapy.utils.misc import build_from_crawler
except ImportError:
T = TypeVar("T")

def build_from_crawler(
objcls: Type[T], crawler: Crawler, /, *args: Any, **kwargs: Any
) -> T:
return create_instance(objcls, settings=None, crawler=crawler, *args, **kwargs) # type: ignore[misc]


class PoolError(ValueError):
pass

Expand Down Expand Up @@ -216,7 +193,7 @@ def __init__(self, crawler):

checker_cls = settings.get("ZYTE_API_SESSION_CHECKER", None)
if checker_cls:
self._checker = build_from_crawler(load_object(checker_cls), crawler)
self._checker = _build_from_crawler(load_object(checker_cls), crawler)
else:
self._checker = None
self._enabled = crawler.settings.getbool("ZYTE_API_SESSION_ENABLED", False)
Expand Down Expand Up @@ -630,7 +607,7 @@ def _get_session_config(self, request: Request) -> SessionConfig:
except KeyError:
cls = session_config_registry.session_config_cls(request)
if cls not in self._session_config_map:
self._session_config_map[cls] = build_from_crawler(cls, self._crawler)
self._session_config_map[cls] = _build_from_crawler(cls, self._crawler)
self._session_config_cache[request] = self._session_config_map[cls]
return self._session_config_map[cls]

Expand Down
6 changes: 3 additions & 3 deletions scrapy_zyte_api/handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from scrapy.http import Request
from scrapy.settings import Settings
from scrapy.utils.defer import deferred_from_coro
from scrapy.utils.misc import create_instance, load_object
from scrapy.utils.misc import load_object
from scrapy.utils.reactor import verify_installed_reactor
from twisted.internet.defer import Deferred, inlineCallbacks
from zyte_api import AsyncZyteAPI, RequestError
Expand All @@ -18,7 +18,7 @@

from ._params import _ParamParser
from .responses import ZyteAPIResponse, ZyteAPITextResponse, _process_response
from .utils import USER_AGENT
from .utils import USER_AGENT, _build_from_crawler

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -170,7 +170,7 @@ def _build_client(settings):

def _create_handler(self, path: Any) -> Any:
dhcls = load_object(path)
return create_instance(dhcls, settings=None, crawler=self._crawler)
return _build_from_crawler(dhcls, self._crawler)

def download_request(self, request: Request, spider: Spider) -> Deferred:
api_params = self._param_parser.parse(request)
Expand Down
15 changes: 15 additions & 0 deletions scrapy_zyte_api/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,18 @@
_RESPONSE_HAS_ATTRIBUTES = _SCRAPY_VERSION >= _SCRAPY_2_6_0
_RESPONSE_HAS_IP_ADDRESS = _SCRAPY_VERSION >= _SCRAPY_2_1_0
_RESPONSE_HAS_PROTOCOL = _SCRAPY_VERSION >= _SCRAPY_2_5_0

try:
from scrapy.utils.misc import build_from_crawler as _build_from_crawler
except ImportError: # Scrapy < 2.12
from typing import Any, TypeVar

from scrapy.crawler import Crawler
from scrapy.utils.misc import create_instance

T = TypeVar("T")

def _build_from_crawler(
objcls: type[T], crawler: Crawler, /, *args: Any, **kwargs: Any
) -> T:
return create_instance(objcls, None, crawler, *args, **kwargs)

0 comments on commit 654a404

Please sign in to comment.