Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
bradenhilton committed Sep 7, 2024
0 parents commit 82ba533
Show file tree
Hide file tree
Showing 13 changed files with 840 additions and 0 deletions.
6 changes: 6 additions & 0 deletions .editorconfig
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
root = true

[*]
charset = utf-8
trim_trailing_whitespace = true
insert_final_newline = true
1 change: 1 addition & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
* text=auto
11 changes: 11 additions & 0 deletions .github/dependabot.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
version: 2
updates:
- package-ecosystem: github-actions
directory: /
schedule:
interval: monthly
groups:
actions:
patterns:
- "*"
update-types: [minor, patch]
111 changes: 111 additions & 0 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
name: main

on:
pull_request:
branches: [main, master]
push:
branches: [main, master]

env:
PIP_DISABLE_PIP_VERSION_CHECK: 1
PIP_TRUSTED_HOST: "pypi.python.org pypi.org files.pythonhosted.org"

jobs:
changes:
runs-on: ubuntu-22.04
permissions:
contents: read
pull-requests: read
outputs:
extractors: ${{ steps.filter.outputs.extractors }}
extractors_files: ${{ steps.filter.outputs.extractors_files }}
core: ${{ steps.filter.outputs.core }}
modules_exist: ${{ steps.modules_exist.outputs.modules_exist }}
steps:
- uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332

- name: Modules exist
id: modules_exist
run: |
for file in extractor/*.py test/results/*.py; do
filename=$(basename "${file}")
if [ "${filename}" != "__init__.py" ]; then
echo "modules_exist=true" >> "${GITHUB_OUTPUT}"
exit 0
fi
done
echo "modules_exist=false" >> "${GITHUB_OUTPUT}"
- uses: dorny/paths-filter@de90cc6fb38fc0963ad72b210f1f284cd68cea36
id: filter
with:
list-files: json
filters: |
extractors:
- added|modified: 'extractor/!(__init__).py'
- added|modified: 'test/results/!(__init__).py'
core:
- added|modified: '**/__init__.py'
- added|modified: 'test/test_results.py'
lint:
runs-on: ubuntu-22.04
needs: changes
if: needs.changes.outputs.extractors == 'true' || needs.changes.outputs.core == 'true'
permissions:
contents: read
pull-requests: read
steps:
- uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332

- uses: pypa/hatch@257e27e51a6a5616ed08a39a408a21c35c9931bc

- name: Lint
run: hatch fmt --check

test:
runs-on: ubuntu-22.04
needs: [changes, lint]
if: needs.changes.outputs.modules_exist == 'true'
permissions:
contents: read
pull-requests: read
strategy:
fail-fast: false
matrix:
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "pypy3.9"]
steps:
- uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332

- uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3
with:
python-version: ${{ matrix.python-version }}

- uses: pypa/hatch@257e27e51a6a5616ed08a39a408a21c35c9931bc

- name: Test all
if: needs.changes.outputs.core == 'true'
run: hatch test --python "${{ matrix.python-version }}"

- name: Test individual
if: needs.changes.outputs.extractors == 'true' && needs.changes.outputs.core == 'false'
run: |
# Get unique extractor module names from the array of added/modified file paths
modules=($( jq -r 'map(match("/([^/]+)\\.py$").captures[0].string) | unique | join(" ")' <<< '${{ needs.changes.outputs.extractors_files }}' ))
for module in "${modules[@]}"; do
src_file="./extractor/${module}.py"
test_file="./test/results/${module}.py"
if [ ! -f "${src_file}" ]; then
echo "Error: Source file ${src_file} not found."
exit 1
fi
if [ ! -f "${test_file}" ]; then
echo "Error: Test file ${test_file} not found."
exit 1
fi
hatch test --python "${{ matrix.python-version }}" "${module}"
done
11 changes: 11 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
*cache/
__pycache__/
*.pyc
.coverage*

dist/

env
.env
venv
.venv
7 changes: 7 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
Copyright 2024 bradenhilton

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 changes: 24 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
<!-- markdownlint-disable MD033 -->

# gdl-extractors

Custom extractor modules for [gallery-dl](https://github.com/mikf/gallery-dl).

## Usage

Download extractor module `.py` files from [`extractor/`](extractor) to a directory of your choosing such as `~/.config/gallery-dl/modules`, then provide this directory as a [module source](https://gdl-org.github.io/docs/configuration.html#extractor-module-sources) in your gallery-dl config, e.g.:

<details open><summary><code>config.json</code></summary>

```json
{
"extractor": {
"module-sources": [
"~/.config/gallery-dl/modules",
null
]
}
}
```

</details>
79 changes: 79 additions & 0 deletions extractor/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
# Adapted from https://github.com/mikf/gallery-dl/blob/master/gallery_dl/extractor/__init__.py

import re
import sys
from pathlib import Path

__dirname__ = Path(__file__).parent

ignore = {"__init__.py", "__pycache__"}
modules = [
filepath.stem for filepath in __dirname__.iterdir() if filepath.name not in ignore and filepath.suffix == ".py"
]


def find(url):
"""Find a suitable extractor for the given URL"""
for cls in _list_classes():
match = cls.pattern.match(url)
if match:
return cls(match)
return None


def add(cls):
"""Add 'cls' to the list of available extractors"""
cls.pattern = re.compile(cls.pattern)
_cache.append(cls)
return cls


def add_module(module):
"""Add all extractors in 'module' to the list of available extractors"""
classes = _get_classes(module)
for cls in classes:
cls.pattern = re.compile(cls.pattern)
_cache.extend(classes)
return classes


def extractors():
"""Yield all available extractor classes"""
return sorted(_list_classes(), key=lambda x: x.__name__)


# --------------------------------------------------------------------
# internals


def _list_classes():
"""Yield available extractor classes"""
yield from _cache

for module in _module_iter:
yield from add_module(module)

globals()["_list_classes"] = lambda: _cache


def _modules_internal():
globals_ = globals()
for module_name in modules:
yield __import__(module_name, globals_, None, (), 1)


def _modules_path(path, files):
sys.path.insert(0, path)
try:
return [__import__(name[:-3]) for name in files if name.endswith(".py")]
finally:
del sys.path[0]


def _get_classes(module):
"""Return a list of all extractor classes in a module"""
return [cls for cls in module.__dict__.values() if (hasattr(cls, "pattern") and cls.__module__ == module.__name__)]


_cache = []
_module_iter = _modules_internal()
33 changes: 33 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"

[project]
name = "gdl-extractors"
version = "0.0.1"
description = "Custom extractors for gallery-dl"
requires-python = ">=3.8"
license = { file = "LICENSE" }
readme = "README.md"
keywords = ["gallery-dl", "extractor"]
authors = [{ name = "bradenhilton" }]
classifiers = ["Private :: Do Not Upload"]
dependencies = ["gallery-dl<2"]

[tool.hatch.build.targets.wheel]
packages = ["extractor"]

[tool.hatch.envs.hatch-test]
default-args = []

[tool.hatch.envs.hatch-test.scripts]
run = "hatch run python ./test/test_results.py {args}"
run-cov = "hatch run python ./test/test_results.py {args}"
cov-combine = ""
cov-report = ""

[tool.hatch.envs.hatch-static-analysis]
config-path = "ruff.toml"

[tool.ruff]
extend = "ruff.toml"
77 changes: 77 additions & 0 deletions ruff.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
line-length = 120

exclude = ["**/__init__.py", "test/test_results.py"]

[format]
docstring-code-format = true
docstring-code-line-length = 80

[lint]
select = [
"A",
"ARG",
"ASYNC",
"B",
"BLE",
"C",
"COM818",
"COM819",
"DTZ",
"E",
"EM",
"EXE",
"F",
"FA",
"FBT",
"FLY",
"G",
"I",
"ICN",
"INT",
"ISC002",
"ISC003",
"LOG",
"N",
"PERF",
"PGH",
"PIE",
"PLC",
"PLE",
"PLR",
"PLW",
"PT",
"PYI",
"RET",
"RSE",
"RUF00",
"RUF01",
"RUF020",
"RUF100",
"S1",
"S6",
"SIM",
"SLF",
"T10",
"T20",
"TCH",
"TD004",
"TD005",
"TD006",
"TD007",
"TID",
"TRY",
"UP",
"W2",
"W5",
"W6",
]

[lint.flake8-tidy-imports]
ban-relative-imports = "all"

[lint.isort]
known-first-party = ["gdl_extractors"]

[lint.flake8-pytest-style]
fixture-parentheses = false
mark-parentheses = false
Empty file added test/__init__.py
Empty file.
23 changes: 23 additions & 0 deletions test/results/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Adapted from https://github.com/mikf/gallery-dl/blob/master/test/results/__init__.py

import functools
from pathlib import Path

__directory__ = Path(__file__).parent


@functools.lru_cache(maxsize=None)
def tests(name):
module = __import__(name, globals(), None, (), 1)
return module.__tests__


def all():
ignore = {"__init__.py", "__pycache__"}
for filepath in __directory__.iterdir():
if filepath.name not in ignore and filepath.suffix == ".py":
yield from tests(filepath.stem)


def category(category):
return tests(category.replace(".", ""))
Loading

0 comments on commit 82ba533

Please sign in to comment.