From a8d0a7395dd7f3c3874c4ae0573cc7f867c7f2b4 Mon Sep 17 00:00:00 2001 From: James McKinney <26463+jpmckinney@users.noreply.github.com> Date: Mon, 15 Jul 2024 11:54:32 -0400 Subject: [PATCH] feat: Add "Last modified" column to the directory listings of log files and item feeds, closes #509 --- docs/news.rst | 1 + scrapyd/website.py | 118 +++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 116 insertions(+), 3 deletions(-) diff --git a/docs/news.rst b/docs/news.rst index 5a6424d4..6879c863 100644 --- a/docs/news.rst +++ b/docs/news.rst @@ -11,6 +11,7 @@ Unreleased Added ~~~~~ +- Add "Last modified" column to the directory listings of log files and item feeds. - Add environment variables to override common options. See :doc:`config`. - Add documentation on how to add webservices (endpoints). See :ref:`config-services`. diff --git a/scrapyd/website.py b/scrapyd/website.py index cfcc557a..7a60120e 100644 --- a/scrapyd/website.py +++ b/scrapyd/website.py @@ -1,9 +1,11 @@ import socket from datetime import datetime, timedelta -from urllib.parse import urlparse +from html import escape +from urllib.parse import quote, urlparse from scrapy.utils.misc import load_object from twisted.application.service import IServiceCollection +from twisted.python import filepath from twisted.web import resource, static from scrapyd.interfaces import IEggStorage, IPoller, ISpiderScheduler @@ -15,6 +17,116 @@ def get_base_path(self, txrequest): return txrequest.getHeader(self.prefix_header) or '' +# Use local DirectoryLister class. +class File(static.File): + def directoryListing(self): + path = self.path + names = self.listNames() + return DirectoryLister( + path, names, self.contentTypes, self.contentEncodings, self.defaultType + ) + + +# Add "Last modified" column. +class DirectoryLister(static.DirectoryLister): + template = """ + +%(header)s + + + + +

%(header)s

+ + + + + + + + + + + + +%(tableContent)s + +
FilenameSizeLast modifiedContent typeContent encoding
+ + + +""" + + linePattern = """ + %(text)s + %(size)s + %(modified)s + %(type)s + %(encoding)s + +""" + + def _getFilesAndDirectories(self, directory): + files = [] + dirs = [] + + for path in directory: + if isinstance(path, bytes): + path = path.decode("utf8") + + url = quote(path, "/") + escapedPath = escape(path) + childPath = filepath.FilePath(self.path).child(path) + modified = datetime.fromtimestamp(childPath.getModificationTime()).strftime("%Y-%m-%d %H:%M") # NEW + + if childPath.isdir(): + dirs.append( + { + "text": escapedPath + "/", + "href": url + "/", + "size": "", + "type": "[Directory]", + "encoding": "", + "modified": modified, # NEW + } + ) + else: + mimetype, encoding = static.getTypeAndEncoding( + path, self.contentTypes, self.contentEncodings, self.defaultType + ) + try: + size = childPath.getsize() + except OSError: + continue + files.append( + { + "text": escapedPath, + "href": url, + "type": "[%s]" % mimetype, + "encoding": (encoding and "[%s]" % encoding or ""), + "size": static.formatFileSize(size), + "modified": modified, # NEW + } + ) + return dirs, files + + class Root(resource.Resource): def __init__(self, config, app): @@ -29,9 +141,9 @@ def __init__(self, config, app): self.nodename = config.get('node_name', socket.gethostname()) self.putChild(b'', Home(self, self.local_items)) if logsdir: - self.putChild(b'logs', static.File(logsdir.encode('ascii', 'ignore'), 'text/plain')) + self.putChild(b'logs', File(logsdir.encode('ascii', 'ignore'), 'text/plain')) if self.local_items: - self.putChild(b'items', static.File(itemsdir, 'text/plain')) + self.putChild(b'items', File(itemsdir, 'text/plain')) self.putChild(b'jobs', Jobs(self, self.local_items)) services = config.items('services', ()) for servName, servClsName in services: