Skip to content

Commit

Permalink
chore(refactor): Move get_spider_list function into caller
Browse files Browse the repository at this point in the history
  • Loading branch information
jpmckinney committed Jul 19, 2024
1 parent a059e83 commit 97f79af
Show file tree
Hide file tree
Showing 3 changed files with 496 additions and 379 deletions.
77 changes: 0 additions & 77 deletions scrapyd/utils.py
Original file line number Diff line number Diff line change
@@ -1,41 +1,8 @@
import os
import sys
from subprocess import PIPE, Popen
from typing import ClassVar
from urllib.parse import urlsplit

from scrapy.utils.misc import load_object

from scrapyd.config import Config
from scrapyd.exceptions import RunnerError
from scrapyd.sqlite import JsonSqliteDict


class UtilsCache:
# array of project name that need to be invalided
invalid_cached_projects: ClassVar = []

def __init__(self):
self.cache_manager = JsonSqliteDict(table="utils_cache_manager")

# Invalid the spider's list's cache of a given project (by name)
@staticmethod
def invalid_cache(project):
UtilsCache.invalid_cached_projects.append(project)

def __getitem__(self, key):
for p in UtilsCache.invalid_cached_projects:
if p in self.cache_manager:
del self.cache_manager[p]
UtilsCache.invalid_cached_projects[:] = []
return self.cache_manager[key]

def __setitem__(self, key, value):
self.cache_manager[key] = value

def __repr__(self):
return f"UtilsCache(cache_manager={self.cache_manager!r})"


def get_spider_queues(config):
"""Return a dict of Spider Queues keyed by project name"""
Expand Down Expand Up @@ -89,50 +56,6 @@ def native_stringify_dict(dct_or_tuples, encoding="utf-8", *, keys_only=True):
return d


def get_spider_list(project, runner=None, pythonpath=None, version=None):
"""Return the spider list from the given project, using the given runner"""

# UtilsCache uses JsonSqliteDict, which encodes the project's value as JSON, but JSON allows only string keys,
# so the stored dict will have a "null" key, instead of a None key.
if version is None:
version = ""

if "cache" not in get_spider_list.__dict__:
get_spider_list.cache = UtilsCache()
try:
return get_spider_list.cache[project][version]
except KeyError:
pass

if runner is None:
runner = Config().get("runner")

env = os.environ.copy()
env["PYTHONIOENCODING"] = "UTF-8"
env["SCRAPY_PROJECT"] = project
if pythonpath:
env["PYTHONPATH"] = pythonpath
if version:
env["SCRAPYD_EGG_VERSION"] = version
pargs = [sys.executable, "-m", runner, "list", "-s", "LOG_STDOUT=0"]
proc = Popen(pargs, stdout=PIPE, stderr=PIPE, env=env)
out, err = proc.communicate()
if proc.returncode:
msg = err or out or ""
msg = msg.decode("utf8")
raise RunnerError(msg)

spiders = out.decode("utf-8").splitlines()
try:
project_cache = get_spider_list.cache[project]
project_cache[version] = spiders
except KeyError:
project_cache = {version: spiders}
get_spider_list.cache[project] = project_cache

return spiders


def to_native_str(text, encoding="utf-8", errors="strict"):
if isinstance(text, str):
return text
Expand Down
79 changes: 77 additions & 2 deletions scrapyd/webservice.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,24 @@

import functools
import json
import os
import sys
import traceback
import uuid
import zipfile
from copy import copy
from io import BytesIO
from subprocess import PIPE, Popen
from typing import ClassVar

from twisted.python import log
from twisted.web import error, http, resource

from scrapyd.exceptions import EggNotFoundError, ProjectNotFoundError
from scrapyd.config import Config
from scrapyd.exceptions import EggNotFoundError, ProjectNotFoundError, RunnerError
from scrapyd.jobstorage import job_items_url, job_log_url
from scrapyd.utils import UtilsCache, get_spider_list, native_stringify_dict
from scrapyd.sqlite import JsonSqliteDict
from scrapyd.utils import native_stringify_dict


def param(
Expand Down Expand Up @@ -53,6 +58,76 @@ def wrapper(self, txrequest, *args, **kwargs):
return decorator


def get_spider_list(project, runner=None, pythonpath=None, version=None):
"""Return the spider list from the given project, using the given runner"""

# UtilsCache uses JsonSqliteDict, which encodes the project's value as JSON, but JSON allows only string keys,
# so the stored dict will have a "null" key, instead of a None key.
if version is None:
version = ""

if "cache" not in get_spider_list.__dict__:
get_spider_list.cache = UtilsCache()
try:
return get_spider_list.cache[project][version]
except KeyError:
pass

if runner is None:
runner = Config().get("runner")

env = os.environ.copy()
env["PYTHONIOENCODING"] = "UTF-8"
env["SCRAPY_PROJECT"] = project
if pythonpath:
env["PYTHONPATH"] = pythonpath
if version:
env["SCRAPYD_EGG_VERSION"] = version
pargs = [sys.executable, "-m", runner, "list", "-s", "LOG_STDOUT=0"]
proc = Popen(pargs, stdout=PIPE, stderr=PIPE, env=env)
out, err = proc.communicate()
if proc.returncode:
msg = err or out or ""
msg = msg.decode("utf8")
raise RunnerError(msg)

spiders = out.decode("utf-8").splitlines()
try:
project_cache = get_spider_list.cache[project]
project_cache[version] = spiders
except KeyError:
project_cache = {version: spiders}
get_spider_list.cache[project] = project_cache

return spiders


class UtilsCache:
# array of project name that need to be invalided
invalid_cached_projects: ClassVar = []

def __init__(self):
self.cache_manager = JsonSqliteDict(table="utils_cache_manager")

# Invalid the spider's list's cache of a given project (by name)
@staticmethod
def invalid_cache(project):
UtilsCache.invalid_cached_projects.append(project)

def __getitem__(self, key):
for p in UtilsCache.invalid_cached_projects:
if p in self.cache_manager:
del self.cache_manager[p]
UtilsCache.invalid_cached_projects[:] = []
return self.cache_manager[key]

def __setitem__(self, key, value):
self.cache_manager[key] = value

def __repr__(self):
return f"UtilsCache(cache_manager={self.cache_manager!r})"


class JsonResource(resource.Resource):
json_encoder = json.JSONEncoder()

Expand Down
Loading

0 comments on commit 97f79af

Please sign in to comment.