Skip to content

Commit

Permalink
feat: Use twisted.logger (easier testing and not legacy)
Browse files Browse the repository at this point in the history
  • Loading branch information
jpmckinney committed Jul 23, 2024
1 parent d9ac0a1 commit 8b947b1
Show file tree
Hide file tree
Showing 9 changed files with 204 additions and 45 deletions.
7 changes: 7 additions & 0 deletions docs/news.rst
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,13 @@ API
CLI
^^^

- Scrapyd uses ``twisted.logger`` instead of the legacy ``twisted.python.log``. Some system information changes:

- ``[scrapyd.basicauth#info] Basic authentication ...``, instead of ``[-] ...``
- ``[scrapyd.app#info] Scrapyd web console available at ...``, instead of ``[-] ...``
- ``[-] Unhandled Error``, instead of ``[_GenericHTTPChannelProtocol,0,127.0.0.1] ...``
- Data received from standard error and non-zero exit status codes are logged at error level.

- Correct the usage message and long description.
- Remove the ``--rundir`` option, which only works if ``*_dir`` settings are absolute paths.
- Remove the ``--nodaemon`` option, which Scrapyd enables.
Expand Down
12 changes: 7 additions & 5 deletions scrapyd/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from twisted.application.internet import TCPServer, TimerService, UNIXServer
from twisted.application.service import Application
from twisted.python import log
from twisted.logger import Logger
from twisted.web import server

from scrapyd.basicauth import wrap_resource
Expand All @@ -11,6 +11,8 @@
from scrapyd.scheduler import SpiderScheduler
from scrapyd.utils import initialize_component

log = Logger()


def application(config):
app = Application("Scrapyd")
Expand Down Expand Up @@ -41,16 +43,16 @@ def application(config):
resource = server.Site(wrap_resource(webroot, config))
if bind_address and http_port:
webservice = TCPServer(http_port, resource, interface=bind_address)
log.msg(
format="Scrapyd web console available at http://%(bind_address)s:%(http_port)s/",
log.info(
"Scrapyd web console available at http://{bind_address}:{http_port}/",
bind_address=bind_address,
http_port=http_port,
)
if unix_socket_path:
unix_socket_path = os.path.abspath(unix_socket_path)
webservice = UNIXServer(unix_socket_path, resource, mode=0o660)
log.msg(
format="Scrapyd web console available at http+unix://%(unix_socket_path)s",
log.info(
"Scrapyd web console available at http+unix://{unix_socket_path}",
unix_socket_path=unix_socket_path,
)

Expand Down
8 changes: 5 additions & 3 deletions scrapyd/basicauth.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,15 @@
from twisted.cred.checkers import ICredentialsChecker
from twisted.cred.portal import IRealm, Portal
from twisted.internet import defer
from twisted.python import log
from twisted.logger import Logger
from twisted.web.guard import BasicCredentialFactory, HTTPAuthSessionWrapper
from twisted.web.resource import IResource
from zope.interface import implementer

from scrapyd.exceptions import InvalidUsernameError

log = Logger()


# https://docs.twisted.org/en/stable/web/howto/web-in-60/http-auth.html
@implementer(IRealm)
Expand Down Expand Up @@ -46,11 +48,11 @@ def wrap_resource(resource, config):
raise InvalidUsernameError

if username and password:
log.msg("Basic authentication enabled")
log.info("Basic authentication enabled")
return HTTPAuthSessionWrapper(
Portal(PublicHTMLRealm(resource), [StringCredentialsChecker(username, password)]),
[BasicCredentialFactory(b"Scrapyd")],
)

log.msg("Basic authentication disabled as either `username` or `password` is unset")
log.info("Basic authentication disabled as either `username` or `password` is unset")
return resource
52 changes: 28 additions & 24 deletions scrapyd/launcher.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@
import datetime
import multiprocessing
import sys
from itertools import chain
from multiprocessing import cpu_count

from twisted.application.service import Service
from twisted.internet import defer, error, protocol, reactor
from twisted.python import log
from twisted.logger import Logger

from scrapyd import __version__
from scrapyd.interfaces import IEnvironment, IJobStorage, IPoller
from scrapyd.utils import native_stringify_dict

log = Logger()


def get_crawl_args(message):
"""Return the command-line arguments to use for the scrapy crawl process
Expand Down Expand Up @@ -38,16 +40,16 @@ def __init__(self, config, app):

def startService(self):
for slot in range(self.max_proc):
self._wait_for_project(slot)
log.msg(
format="Scrapyd %(version)s started: max_proc=%(max_proc)r, runner=%(runner)r",
self._get_message(slot)
log.info(
"Scrapyd {version} started: max_proc={max_proc!r}, runner={runner!r}",
version=__version__,
max_proc=self.max_proc,
runner=self.runner,
system="Launcher",
log_system="Launcher",
)

def _wait_for_project(self, slot):
def _get_message(self, slot):
poller = self.app.getComponent(IPoller)
poller.next().addCallback(self._spawn_process, slot)

Expand All @@ -74,19 +76,21 @@ def _process_finished(self, _, slot):
process = self.processes.pop(slot)
process.end_time = datetime.datetime.now()
self.finished.add(process)
self._wait_for_project(slot)
self._get_message(slot)

def _get_max_proc(self, config):
max_proc = config.getint("max_proc", 0)
if not max_proc:
try:
cpus = cpu_count()
except NotImplementedError:
cpus = 1
max_proc = cpus * config.getint("max_proc_per_cpu", 4)
return max_proc
if max_proc:
return max_proc

try:
cpus = multiprocessing.cpu_count()
except NotImplementedError: # Windows 17520a3
cpus = 1
return cpus * config.getint("max_proc_per_cpu", 4)


# https://docs.twisted.org/en/stable/api/twisted.internet.protocol.ProcessProtocol.html
class ScrapyProcessProtocol(protocol.ProcessProtocol):
def __init__(self, project, spider, job, env, args):
self.pid = None
Expand All @@ -100,20 +104,21 @@ def __init__(self, project, spider, job, env, args):
self.deferred = defer.Deferred()

def outReceived(self, data):
log.msg(data.rstrip(), system=f"Launcher,{self.pid}/stdout")
log.info(data.rstrip(), log_system=f"Launcher,{self.pid}/stdout")

def errReceived(self, data):
log.msg(data.rstrip(), system=f"Launcher,{self.pid}/stderr")
log.error(data.rstrip(), log_system=f"Launcher,{self.pid}/stderr")

def connectionMade(self):
self.pid = self.transport.pid
self.log("Process started: ")
self.log("info", "Process started:")

# https://docs.twisted.org/en/stable/core/howto/process.html#things-that-can-happen-to-your-processprotocol
def processEnded(self, status):
if isinstance(status.value, error.ProcessDone):
self.log("Process finished: ")
self.log("info", "Process finished:")
else:
self.log(f"Process died: exitstatus={status.value.exitCode!r} ")
self.log("error", f"Process died: exitstatus={status.value.exitCode!r}")
self.deferred.callback(self)

def asdict(self):
Expand All @@ -125,10 +130,9 @@ def asdict(self):
"start_time": str(self.start_time),
}

def log(self, action):
fmt = "%(action)s project=%(project)r spider=%(spider)r job=%(job)r pid=%(pid)r args=%(args)r"
log.msg(
format=fmt,
def log(self, level, action):
getattr(log, level)(
"{action} project={project!r} spider={spider!r} job={job!r} pid={pid!r} args={args!r}",
action=action,
project=self.project,
spider=self.spider,
Expand Down
6 changes: 4 additions & 2 deletions scrapyd/webservice.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,14 @@
from subprocess import PIPE, Popen
from typing import ClassVar

from twisted.python import log
from twisted.logger import Logger
from twisted.web import error, http, resource

from scrapyd.exceptions import EggNotFoundError, ProjectNotFoundError, RunnerError
from scrapyd.utils import native_stringify_dict

log = Logger()


def param(
decoded: str,
Expand Down Expand Up @@ -121,7 +123,7 @@ def render(self, txrequest):
try:
obj = super().render(txrequest)
except Exception as e: # noqa: BLE001
log.err()
log.failure("")

if isinstance(e, error.Error):
txrequest.setResponseCode(int(e.status))
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
"pytest-cov",
"pytest-twisted",
"requests",
"twisted>=19.7", # twisted.logger.capturedLogs
],
"docs": [
"furo",
Expand Down
16 changes: 12 additions & 4 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@

from scrapyd import Config
from scrapyd.app import application
from scrapyd.txapp import application as txapplication
from scrapyd.webservice import spider_list
from scrapyd.website import Root
from tests import root_add_version
Expand All @@ -35,9 +34,18 @@ def chdir(monkeypatch, tmp_path):
return tmp_path


@pytest.fixture()
def app(chdir):
return txapplication
@pytest.fixture(
params=[
None,
(Config.SECTION, "items_dir", "items"),
]
)
def app(request, chdir):
config = Config()
if isinstance(request.param, tuple):
config.cp.set(*request.param)

return application(config)


@pytest.fixture(
Expand Down
7 changes: 1 addition & 6 deletions tests/test_endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import requests
from requests.models import Response

from scrapyd import __version__
from tests import get_egg_data
from tests.mockserver import MockScrapydServer

Expand Down Expand Up @@ -65,10 +64,6 @@ def test_auth():
assert f" [-] Basic authentication enabled{os.linesep}" in stdout
# scrapyd.app
assert f" [-] Scrapyd web console available at http://127.0.0.1:{server.http_port}/" in stdout
# scrapyd.launcher
assert re.search(
f" \\[Launcher\\] Scrapyd {__version__} started: max_proc=\\d+, runner='scrapyd.runner'{os.linesep}", stdout
)


def test_noauth():
Expand All @@ -89,7 +84,7 @@ def test_error():
stdout = server.stdout.decode()

# scrapyd.webservice
assert f" [_GenericHTTPChannelProtocol,0,127.0.0.1] Unhandled Error{os.linesep}" in stdout
assert f" [-] Unhandled Error{os.linesep}" in stdout
assert f"\tTraceback (most recent call last):{os.linesep}" in stdout
assert "\ttwisted.web.error.Error: 200 project is invalid: " in stdout

Expand Down
Loading

0 comments on commit 8b947b1

Please sign in to comment.