Skip to content

Commit

Permalink
Merge pull request #153 from scrapinghub/s211
Browse files Browse the repository at this point in the history
fix for scrapy 2.11 compatibility on crawler object
  • Loading branch information
pawelmhm authored Sep 20, 2023
2 parents 59dddad + e76f2b9 commit 974db1e
Show file tree
Hide file tree
Showing 4 changed files with 8 additions and 5 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ jobs:
strategy:
matrix:
python-version: ["3.8", "3.9", "3.10", "3.11"]
scrapy-version: ["2.10", "2.9"]
scrapy-version: ["2.11", "2.10"]

steps:
- uses: actions/checkout@v2
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
Scrapy>=1.0.0
Scrapy>=2.11
7 changes: 5 additions & 2 deletions scrapyrt/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ def crawl(self, *args, **kwargs):
self.crawling = True
try:
self.spider = self._create_spider(*args, **kwargs)
if hasattr(self, "_apply_settings"):
self._apply_settings()
self._update_root_log_handler()
self.engine = self._create_engine()
if self.start_requests:
start_requests = iter(self.spider.start_requests())
Expand Down Expand Up @@ -112,6 +115,7 @@ def __init__(self, spider_name, request_kwargs,
self.debug = app_settings.DEBUG
self.crawler_process = None
self.crawler = None
self.crawl_start_time = datetime.datetime.utcnow()
# callback will be added after instantiation of crawler object
# because we need to know if spider has method available
self.callback_name = request_kwargs.pop('callback', None) or 'parse'
Expand Down Expand Up @@ -195,9 +199,8 @@ def handle_scheduling(self, request, spider):

def limit_runtime(self, spider):
"""Stop crawl if it takes too long."""
start_time = self.crawler.stats.get_value("start_time")
time_now = datetime.datetime.utcnow()
if (time_now - start_time).seconds >= self.timeout_limit:
if (time_now - self.crawl_start_time).seconds >= self.timeout_limit:
spider.crawler.engine.close_spider(spider, reason="timeout")

def limit_requests(self, spider):
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
"Tracker": "https://github.com/scrapinghub/scrapyrt/issues"
},
install_requires=[
'Scrapy>=1.0.0'
'Scrapy>=2.10'
],
package_data={
'scrapyrt': [
Expand Down

0 comments on commit 974db1e

Please sign in to comment.