From 76887a736adccd7245089d688459e3951d22a7d4 Mon Sep 17 00:00:00 2001 From: James McKinney <26463+jpmckinney@users.noreply.github.com> Date: Fri, 19 Jul 2024 00:06:36 -0400 Subject: [PATCH] chore(refactor): Access the spiderqueues via the Scheduler rather than the Poller In principle, the Scheduler is more related to "pending" jobs than the Poller. Both use the same SQLite databases by default, which support concurrency. --- scrapyd/webservice.py | 10 +++++----- scrapyd/website.py | 2 +- tests/conftest.py | 1 + 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/scrapyd/webservice.py b/scrapyd/webservice.py index c8cd3b63..8d695fa9 100644 --- a/scrapyd/webservice.py +++ b/scrapyd/webservice.py @@ -82,7 +82,7 @@ def render_OPTIONS(self, txrequest): class DaemonStatus(WsResource): def render_GET(self, txrequest): - pending = sum(q.count() for q in self.root.poller.queues.values()) + pending = sum(q.count() for q in self.root.scheduler.queues.values()) running = len(self.root.launcher.processes) finished = len(self.root.launcher.finished) @@ -135,12 +135,12 @@ class Cancel(WsResource): # https://github.com/scrapy/scrapy/blob/06f9c28/tests/test_crawler.py#L886 @param("signal", required=False, default="INT" if sys.platform != "win32" else "BREAK") def render_POST(self, txrequest, project, job, signal): - if project not in self.root.poller.queues: + if project not in self.root.scheduler.queues: raise error.Error(code=http.OK, message=b"project '%b' not found" % project.encode()) prevstate = None - if self.root.poller.queues[project].remove(lambda x: x["_job"] == job): + if self.root.scheduler.queues[project].remove(lambda x: x["_job"] == job): prevstate = "pending" spiders = self.root.launcher.processes.values() @@ -208,7 +208,7 @@ class Status(WsResource): @param("project", required=False) def render_GET(self, txrequest, job, project): spiders = self.root.launcher.processes.values() - queues = self.root.poller.queues + queues = self.root.scheduler.queues if project is not None and project not in queues: raise error.Error(code=http.OK, message=b"project '%b' not found" % project.encode()) @@ -238,7 +238,7 @@ class ListJobs(WsResource): @param("project", required=False) def render_GET(self, txrequest, project): spiders = self.root.launcher.processes.values() - queues = self.root.poller.queues + queues = self.root.scheduler.queues if project is not None and project not in queues: raise error.Error(code=http.OK, message=b"project '%b' not found" % project.encode()) diff --git a/scrapyd/website.py b/scrapyd/website.py index 85b3dd9d..d26f3b4e 100644 --- a/scrapyd/website.py +++ b/scrapyd/website.py @@ -317,7 +317,7 @@ def prep_tab_pending(self): "Cancel": self.cancel_button(project=project, jobid=m["_job"], base_path=self.base_path), } ) - for project, queue in self.root.poller.queues.items() + for project, queue in self.root.scheduler.queues.items() for m in queue.list() ) diff --git a/tests/conftest.py b/tests/conftest.py index 151e241a..a40b3b18 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -18,6 +18,7 @@ class FakeScheduler: def __init__(self, config): self.config = config self.calls = [] + self.queues = {} def schedule(self, project, spider_name, priority=0.0, **spider_args): self.calls.append([project, spider_name])