Skip to content

Commit

Permalink
Merge pull request #3 from MarcAbonce/split_base_extractor_file
Browse files Browse the repository at this point in the history
Move base classes into separate files
  • Loading branch information
MarcAbonce authored Nov 4, 2024
2 parents b4905c2 + 3e4231c commit 27f04d6
Show file tree
Hide file tree
Showing 8 changed files with 95 additions and 76 deletions.
7 changes: 4 additions & 3 deletions thumbframes_dl/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from .extractors import * # noqa: F401, F403
from .utils import logger, ExtractorError # noqa: F401
from .version import __version__ # noqa: F401
# flake8: noqa F401
from .extractors import * # noqa: F403
from .utils import logger, ExtractorError
from .version import __version__
3 changes: 2 additions & 1 deletion thumbframes_dl/extractors/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
from .youtube import YouTubeFrames # noqa: F401
# flake8: noqa F401
from .youtube import YouTubeFrames
4 changes: 4 additions & 0 deletions thumbframes_dl/extractors/base/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# flake8: noqa F401
from .format import ThumbFramesFormat
from .frames import WebsiteFrames
from .image import ThumbFramesImage
39 changes: 39 additions & 0 deletions thumbframes_dl/extractors/base/format.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
from functools import reduce, total_ordering
from typing import List, Optional

from .image import ThumbFramesImage


@total_ordering
class ThumbFramesFormat(object):
"""
Basic metadata to show the qualities of each set of ThumbFramesImages.
Useful when there's more than one list of images per video.
Can be compared and sorted to get the frames with the highest resolution.
"""

def __init__(self, format_id: Optional[str], thumbframes: List[ThumbFramesImage]):
self.format_id = format_id
self.frame_width = thumbframes[0].width // thumbframes[0].cols
self.frame_height = thumbframes[0].height // thumbframes[0].rows
self.total_frames = reduce(lambda acum, x: acum + x.n_frames, thumbframes, 0)
self.total_images = len(thumbframes)

def __hash__(self):
return hash(self.format_id)

@property
def frame_size(self):
return self.frame_width * self.frame_height

def __eq__(self, other):
return self.frame_size == other.frame_size

def __lt__(self, other):
return self.frame_size < other.frame_size

def __repr__(self):
return "<%s %s: %s %sx%s frames in %s images>" % (
self.__class__.__name__,
self.format_id, self.total_frames, self.frame_width, self.frame_height, self.total_images
)
Original file line number Diff line number Diff line change
@@ -1,81 +1,13 @@
import abc
from functools import reduce, total_ordering
from typing import Dict, List, Optional, Sequence, Union

from youtube_dl.YoutubeDL import YoutubeDL
from youtube_dl.extractor.common import InfoExtractor

from thumbframes_dl.utils import logger


class ThumbFramesImage(InfoExtractor):
"""
Each ThumbFramesImage represents a single image with n_frames frames arranged in a cols*rows grid.
Note that different images may have different sizes and number of frames even if they're from the same video.
"""

def __init__(self, url: str, width: int, height: int, cols: int, rows: int, n_frames: int):
self.set_downloader(YoutubeDL({'source_address': '0.0.0.0', 'logger': logger}))
self.url = url
self.width = width
self.height = height
self.cols = cols
self.rows = rows
self.n_frames = n_frames
self.mime_type = None
self._image = None # type: Optional[bytes]

def get_image(self) -> bytes:
"""
The raw image as bytes.
Raises an ExtractorError if download fails.
"""
if self._image is None:
resp = self._request_webpage(self.url, self.url, fatal=True)
raw_image = resp.read()
self.mime_type = resp.headers.get('Content-Type', '').split(';')[0].split('/')[1]
self._image = raw_image
return self._image

def __repr__(self):
return "<%s: %sx%s image in a %sx%s grid>" % (
self.__class__.__name__, self.width, self.height, self.cols, self.rows
)


@total_ordering
class ThumbFramesFormat(object):
"""
Basic metadata to show the qualities of each set of ThumbFramesImages.
Useful when there's more than one list of images per video.
Can be compared and sorted to get the frames with the highest resolution.
"""

def __init__(self, format_id: Optional[str], thumbframes: List[ThumbFramesImage]):
self.format_id = format_id
self.frame_width = thumbframes[0].width // thumbframes[0].cols
self.frame_height = thumbframes[0].height // thumbframes[0].rows
self.total_frames = reduce(lambda acum, x: acum + x.n_frames, thumbframes, 0)
self.total_images = len(thumbframes)

def __hash__(self):
return hash(self.format_id)

@property
def frame_size(self):
return self.frame_width * self.frame_height

def __eq__(self, other):
return self.frame_size == other.frame_size

def __lt__(self, other):
return self.frame_size < other.frame_size

def __repr__(self):
return "<%s %s: %s %sx%s frames in %s images>" % (
self.__class__.__name__,
self.format_id, self.total_frames, self.frame_width, self.frame_height, self.total_images
)
from .format import ThumbFramesFormat
from .image import ThumbFramesImage


class WebsiteFrames(abc.ABC, InfoExtractor):
Expand Down
41 changes: 41 additions & 0 deletions thumbframes_dl/extractors/base/image.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
from typing import Optional

from youtube_dl.YoutubeDL import YoutubeDL
from youtube_dl.extractor.common import InfoExtractor

from thumbframes_dl.utils import logger


class ThumbFramesImage(InfoExtractor):
"""
Each ThumbFramesImage represents a single image with n_frames frames arranged in a cols*rows grid.
Note that different images may have different sizes and number of frames even if they're from the same video.
"""

def __init__(self, url: str, width: int, height: int, cols: int, rows: int, n_frames: int):
self.set_downloader(YoutubeDL({'source_address': '0.0.0.0', 'logger': logger}))
self.url = url
self.width = width
self.height = height
self.cols = cols
self.rows = rows
self.n_frames = n_frames
self.mime_type = None
self._image: Optional[bytes] = None

def get_image(self) -> bytes:
"""
The raw image as bytes.
Raises an ExtractorError if download fails.
"""
if self._image is None:
resp = self._request_webpage(self.url, self.url, fatal=True)
raw_image = resp.read()
self.mime_type = resp.headers.get('Content-Type', '').split(';')[0].split('/')[1]
self._image = raw_image
return self._image

def __repr__(self):
return "<%s: %sx%s image in a %sx%s grid>" % (
self.__class__.__name__, self.width, self.height, self.cols, self.rows
)
3 changes: 2 additions & 1 deletion thumbframes_dl/extractors/youtube.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@
from youtube_dl.utils import try_get, int_or_none
from youtube_dl.extractor.youtube import YoutubeIE

from ._base import WebsiteFrames, ThumbFramesImage
from thumbframes_dl.utils import logger

from .base import WebsiteFrames, ThumbFramesImage


class YouTubeFrames(WebsiteFrames, YoutubeIE):
_YOUTUBE_URL = 'https://www.youtube.com'
Expand Down
2 changes: 1 addition & 1 deletion thumbframes_dl/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.13.0"
__version__ = "0.14.0"

0 comments on commit 27f04d6

Please sign in to comment.