diff --git a/libs/community/google/youtube/garf/community/google/youtube/__init__.py b/libs/community/google/youtube/garf/community/google/youtube/__init__.py index ff244be..22d5ffe 100644 --- a/libs/community/google/youtube/garf/community/google/youtube/__init__.py +++ b/libs/community/google/youtube/garf/community/google/youtube/__init__.py @@ -32,4 +32,4 @@ 'YouTubeAnalyticsApiReportFetcher', ] -__version__ = '1.0.0' +__version__ = '1.1.0' diff --git a/libs/community/google/youtube/garf/community/google/youtube/api_clients.py b/libs/community/google/youtube/garf/community/google/youtube/api_clients.py index a086f88..49045d7 100644 --- a/libs/community/google/youtube/garf/community/google/youtube/api_clients.py +++ b/libs/community/google/youtube/garf/community/google/youtube/api_clients.py @@ -20,7 +20,7 @@ import os import warnings from collections import defaultdict -from typing import Any +from typing import Any, Final import dateutil import pydantic @@ -34,6 +34,48 @@ logging.getLogger('googleapiclient.discovery_cache').setLevel(logging.ERROR) +ALLOWED_PARAMETERS: Final[tuple[str, ...]] = ( + 'id', + 'forHandle', + 'forUsername', + 'managedByName', + 'mine', + 'chart', + 'myRating', + 'maxHeight', + 'maxWidth', + 'onBehalfOfContentOwner', + 'regionCode', + 'videoCategoryId', + 'parentId', + 'textFormat', + 'videoId', + 'order', + 'moderationStatus', + 'searchTerms', + 'playlistId', + 'channelId', + 'channelType', + 'location', + 'publishedAfter', + 'publishedBefore', + 'q', + 'maxResults', + 'relevanceLanguage', + 'safeSearch', + 'topicId', + 'type', + 'videoCaption', + 'videoDuration', + 'videoEmbeddable', + 'videoLicence', + 'videoPaidProductPlacement', + 'videoSyndicated', + 'videoType', + 'mode', + 'filterByMemberChannelId', +) + class YouTubeDataApiClientError(exceptions.GarfYouTubeDataApiError): """API client specific exception.""" @@ -75,8 +117,17 @@ def service(self): return build('youtube', self.api_version, developerKey=self.api_key) def get_types(self, request): + resource_mapping = { + 'videos': 'Video', + 'channels': 'Channel', + 'playlistItems': 'PlaylistItem', + } + if not (name := resource_mapping.get(request.resource_name)): + raise YouTubeDataApiClientError( + f'Unsupported resource for interring types: {request.resource_name}' + ) fields = {field.split('.')[0] for field in request.fields} - return self.infer_types('Video', fields) + return self.infer_types(name, fields) def infer_types(self, name, fields): results = {} @@ -124,13 +175,17 @@ def get_response( self, request: query_editor.BaseQueryElements, **kwargs: str ) -> api_clients.GarfApiResponse: span = trace.get_current_span() + api_parameters = {} for k, v in kwargs.items(): - span.set_attribute(f'youtube_data_api.kwargs.{k}', v) + if k in ALLOWED_PARAMETERS: + span.set_attribute(f'youtube_data_api.kwargs.{k}', v) + api_parameters[k] = v + fields = {field.split('.')[0] for field in request.fields} sub_service = getattr(self.service, request.resource_name)() part_str = ','.join(fields) - result = self._list(sub_service, part=part_str, **kwargs) + result = self._list(sub_service, part=part_str, **api_parameters) results = [] if data := result.get('items'): results.extend(data) @@ -139,7 +194,7 @@ def get_response( sub_service, part=part_str, next_page_token=result.get('nextPageToken'), - **kwargs, + **api_parameters, ) if data := result.get('items'): results.extend(data) diff --git a/libs/community/google/youtube/garf/community/google/youtube/builtins/channel_videos.py b/libs/community/google/youtube/garf/community/google/youtube/builtins/channel_videos.py index 6ec500f..beaae03 100644 --- a/libs/community/google/youtube/garf/community/google/youtube/builtins/channel_videos.py +++ b/libs/community/google/youtube/garf/community/google/youtube/builtins/channel_videos.py @@ -34,7 +34,9 @@ def get_youtube_channel_videos( """ videos = report_fetcher.fetch( channel_videos_query, - playlistId=videos_playlist.to_list(row_type='scalar', distinct=True), + playlistId=','.join( + videos_playlist.to_list(row_type='scalar', distinct=True) + ), maxResults=50, ).to_list(row_type='scalar', distinct=True) diff --git a/libs/community/google/youtube/garf/community/google/youtube/report_fetcher.py b/libs/community/google/youtube/garf/community/google/youtube/report_fetcher.py index 1750234..ca47cc2 100644 --- a/libs/community/google/youtube/garf/community/google/youtube/report_fetcher.py +++ b/libs/community/google/youtube/garf/community/google/youtube/report_fetcher.py @@ -15,26 +15,23 @@ import functools import itertools +import logging import operator from collections.abc import Iterable, MutableSequence -from typing import Any, Final +from typing import Final -from garf.community.google.youtube import api_clients, builtins, query_editor +import garf.core.query_editor +from garf.community.google.youtube import ( + api_clients, + builtins, + exceptions, + query_editor, + simulator, +) from garf.core import parsers, report, report_fetcher from typing_extensions import override -ALLOWED_FILTERS: Final[tuple[str, ...]] = ( - 'id', - 'regionCode', - 'videoCategoryId', - 'chart', - 'forHandle', - 'forUsername', - 'onBehalfOfContentOwner', - 'playlistId', - 'videoId', - 'channelId', -) +logger = logging.getLogger('garf.community.google.youtube.report_fetcher') MAX_BATCH_SIZE: Final[int] = 50 @@ -45,6 +42,10 @@ def _batched(iterable: Iterable[str], chunk_size: int): yield chunk +class YouTubeDataApiReportFetcherError(exceptions.GarfYouTubeApiError): + """YouTubeDataApiReportFetcher specific error.""" + + class YouTubeDataApiReportFetcher(report_fetcher.ApiReportFetcher): """Defines report fetcher for YouTube Data API.""" @@ -68,31 +69,58 @@ def __init__( @override def fetch( self, - query_specification, - args: dict[str, Any] = None, + query_specification: str | query_editor.YouTubeDataApiQuery, + args: garf.core.query_editor.GarfQueryParameters | None = None, + title: str | None = None, **kwargs, ) -> report.GarfReport: results = [] filter_identifier = list( - set(ALLOWED_FILTERS).intersection(set(kwargs.keys())) + set(api_clients.ALLOWED_PARAMETERS).intersection(set(kwargs.keys())) ) if len(filter_identifier) == 1: name = filter_identifier[0] ids = kwargs.pop(name) if not isinstance(ids, MutableSequence): ids = ids.split(',') + if not ids: + logger.warning('No values provided for %s parameter', name) + placeholder_report = simulator.YouTubeDataApiReportSimulator( + api_client=self.api_client, + parser=self.parser, + query_spec=self.query_specification_builder, + ).simulate( + query_specification=query_specification, args=args, title=title + ) + return report.GarfReport( + placeholder_results=placeholder_report.results, + column_names=placeholder_report.column_names, + ) else: - return super().fetch(query_specification, args, **kwargs) + return super().fetch( + query_specification=query_specification, + args=args, + title=title, + **kwargs, + ) if name == 'id': for batch in _batched(ids, MAX_BATCH_SIZE): res = super().fetch( - query_specification, args, **{name: batch}, **kwargs + query_specification=query_specification, + args=args, + title=title, + **{name: batch}, + **kwargs, ) results.append(res) else: for element in ids: res = super().fetch( - query_specification, args, **{name: element}, **kwargs + query_specification=query_specification, + args=args, + title=title, + **{name: element}, + **kwargs, ) results.append(res) res = functools.reduce(operator.add, results) diff --git a/libs/community/google/youtube/tests/e2e/test_lib.py b/libs/community/google/youtube/tests/e2e/test_lib.py index 20ca798..a3bae05 100644 --- a/libs/community/google/youtube/tests/e2e/test_lib.py +++ b/libs/community/google/youtube/tests/e2e/test_lib.py @@ -71,29 +71,22 @@ def test_builtin_channel_videos_with_enhanced_attributes(): FROM builtin.channelVideos """ - # Use a test channel ID from environment - test_channel_id = os.getenv('YOUTUBE_CHANNEL_ID', 'UC_x5XG1OV2P6uZZ5FSM9Ttw') + test_channel_id = os.getenv('YOUTUBE_CHANNEL_ID', 'UCmMowAX5A4tYd4Utq1Lymog') fetched_report = fetcher.fetch(query, id=[test_channel_id]) - # Verify we got results - assert fetched_report, 'No videos returned from channelVideos' - assert len(fetched_report) > 0, 'channelVideos returned empty list' - - # Verify the enhanced attributes are present in the first video - first_video = fetched_report[0] - assert hasattr(first_video, 'channel_id'), 'Missing channel_id attribute' - assert hasattr(first_video, 'video_id'), 'Missing video_id attribute' - assert hasattr(first_video, 'title'), 'Missing title attribute' - assert hasattr(first_video, 'view_count'), 'Missing view_count attribute' - assert hasattr(first_video, 'duration'), 'Missing duration attribute' - - # Verify the values are not None/empty - assert first_video.video_id, 'video_id is empty' - assert first_video.title, 'title is empty' - - print(f'✓ channelVideos test passed! Found {len(fetched_report)} videos') - print(f' First video: {first_video.title}') - print(f' Video ID: {first_video.video_id}') - if hasattr(first_video, 'view_count') and first_video.view_count: - print(f' Views: {first_video.view_count}') + assert fetched_report + assert fetched_report.column_names == [ + 'channel_id', + 'video_id', + 'title', + 'description', + 'published_at', + 'view_count', + 'like_count', + 'comment_count', + 'duration', + 'privacy_status', + ] + + assert fetched_report[0].channel_id == test_channel_id