Skip to content

Commit d498ae3

Browse files
committed
chore: Use twisted.python.filepath to secure paths, instead of realpath and commonprefix
1 parent c56cf0b commit d498ae3

File tree

3 files changed

+42
-21
lines changed

3 files changed

+42
-21
lines changed

scrapyd/eggstorage.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from glob import escape, glob
55

66
from packaging.version import InvalidVersion, Version
7+
from twisted.python import filepath
78
from zope.interface import implementer
89

910
from scrapyd.exceptions import DirectoryTraversalError, EggNotFoundError, ProjectNotFoundError
@@ -72,10 +73,9 @@ def _egg_path(self, project, version):
7273
return self._get_path(project, f"{sanitized_version}.egg")
7374

7475
def _get_path(self, project, *trusted):
75-
resolvedir = os.path.realpath(self.basedir)
76-
projectdir = os.path.realpath(os.path.join(resolvedir, project))
77-
78-
if os.path.commonprefix((projectdir, resolvedir)) != resolvedir:
79-
raise DirectoryTraversalError(project)
76+
try:
77+
file = filepath.FilePath(self.basedir).child(project)
78+
except filepath.InsecurePath as e:
79+
raise DirectoryTraversalError(project) from e
8080

81-
return os.path.join(projectdir, *trusted)
81+
return os.path.join(file.path, *trusted)

scrapyd/environ.py

+11-14
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from posixpath import join as urljoin
55
from urllib.parse import urlsplit
66

7+
from twisted.python import filepath
78
from w3lib.url import path_to_file_uri
89
from zope.interface import implementer
910

@@ -57,31 +58,27 @@ def _get_feeds(self, message, extension):
5758
return parsed._replace(path=path).geturl()
5859

5960
def _get_file(self, message, directory, extension):
60-
resolvedir = os.path.realpath(directory)
6161
project = message["_project"]
6262
spider = message["_spider"]
6363
job = message["_job"]
64-
projectdir = os.path.realpath(os.path.join(resolvedir, project))
65-
spiderdir = os.path.realpath(os.path.join(projectdir, spider))
66-
jobfile = os.path.realpath(os.path.join(spiderdir, f"{job}.{extension}"))
6764

68-
if (
69-
os.path.commonprefix((projectdir, resolvedir)) != resolvedir
70-
or os.path.commonprefix((spiderdir, projectdir)) != projectdir
71-
or os.path.commonprefix((jobfile, spiderdir)) != spiderdir
72-
):
73-
raise DirectoryTraversalError(os.path.join(project, spider, f"{job}.{extension}"))
65+
# https://docs.twisted.org/en/stable/api/twisted.python.filepath.FilePath.html
66+
try:
67+
file = filepath.FilePath(directory).child(project).child(spider).child(f"{job}.{extension}")
68+
except filepath.InsecurePath as e:
69+
raise DirectoryTraversalError(os.path.join(project, spider, f"{job}.{extension}")) from e
7470

75-
if not os.path.exists(spiderdir):
76-
os.makedirs(spiderdir)
71+
parent = file.dirname() # returns a str
72+
if not os.path.exists(parent):
73+
os.makedirs(parent)
7774

7875
to_delete = sorted(
79-
(os.path.join(spiderdir, name) for name in os.listdir(spiderdir)),
76+
(os.path.join(parent, name) for name in os.listdir(parent)),
8077
key=os.path.getmtime,
8178
)[: -self.jobs_to_keep]
8279

8380
for path in to_delete:
8481
with suppress(OSError):
8582
os.remove(path)
8683

87-
return jobfile
84+
return file.path

tests/test_environ.py

+25-1
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,28 @@ def test_get_settings_secure(values, key, value):
8383
)
8484

8585

86+
def test_jobs_to_keep(chdir):
87+
config = Config(values={"jobs_to_keep": "2"})
88+
environ = Environment(config, initenv={})
89+
directory = chdir / "logs" / "p1" / "s1"
90+
91+
assert not directory.exists()
92+
93+
environ.get_settings({"_project": "p1", "_spider": "s1", "_job": "j1"})
94+
95+
assert directory.exists()
96+
97+
(directory / "j1.a").touch()
98+
(directory / "j2.b").touch()
99+
(directory / "j3.c").touch()
100+
(directory / "j4.d").touch()
101+
102+
environ.get_settings({"_project": "p1", "_spider": "s1", "_job": "j1"})
103+
104+
assert not (directory / "j1.a").exists()
105+
assert not (directory / "j2.b").exists()
106+
107+
86108
@pytest.mark.parametrize(
87109
("message", "run_only_if_has_settings"),
88110
[
@@ -91,12 +113,14 @@ def test_get_settings_secure(values, key, value):
91113
({"_project": "localproject"}, True),
92114
],
93115
)
94-
def test_get_environment(environ, message, run_only_if_has_settings):
116+
def test_get_environment(monkeypatch, environ, message, run_only_if_has_settings):
95117
if run_only_if_has_settings and not has_settings():
96118
pytest.skip("[settings] section is not set")
97119

120+
monkeypatch.setenv("CUSTOM", "value")
98121
env = environ.get_environment(message, 3)
99122

123+
assert env["CUSTOM"] == "value"
100124
assert env["SCRAPY_PROJECT"] == message["_project"]
101125

102126
if "_version" in message:

0 commit comments

Comments
 (0)