Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -32,4 +32,4 @@
'YouTubeAnalyticsApiReportFetcher',
]

__version__ = '1.0.0'
__version__ = '1.1.0'
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
import os
import warnings
from collections import defaultdict
from typing import Any
from typing import Any, Final

import dateutil
import pydantic
Expand All @@ -34,6 +34,48 @@

logging.getLogger('googleapiclient.discovery_cache').setLevel(logging.ERROR)

ALLOWED_PARAMETERS: Final[tuple[str, ...]] = (
'id',
'forHandle',
'forUsername',
'managedByName',
'mine',
'chart',
'myRating',
'maxHeight',
'maxWidth',
'onBehalfOfContentOwner',
'regionCode',
'videoCategoryId',
'parentId',
'textFormat',
'videoId',
'order',
'moderationStatus',
'searchTerms',
'playlistId',
'channelId',
'channelType',
'location',
'publishedAfter',
'publishedBefore',
'q',
'maxResults',
'relevanceLanguage',
'safeSearch',
'topicId',
'type',
'videoCaption',
'videoDuration',
'videoEmbeddable',
'videoLicence',
'videoPaidProductPlacement',
'videoSyndicated',
'videoType',
'mode',
'filterByMemberChannelId',
)


class YouTubeDataApiClientError(exceptions.GarfYouTubeDataApiError):
"""API client specific exception."""
Expand Down Expand Up @@ -75,8 +117,17 @@ def service(self):
return build('youtube', self.api_version, developerKey=self.api_key)

def get_types(self, request):
resource_mapping = {
'videos': 'Video',
'channels': 'Channel',
'playlistItems': 'PlaylistItem',
}
if not (name := resource_mapping.get(request.resource_name)):
raise YouTubeDataApiClientError(
f'Unsupported resource for interring types: {request.resource_name}'
)
fields = {field.split('.')[0] for field in request.fields}
return self.infer_types('Video', fields)
return self.infer_types(name, fields)

def infer_types(self, name, fields):
results = {}
Expand Down Expand Up @@ -124,13 +175,17 @@ def get_response(
self, request: query_editor.BaseQueryElements, **kwargs: str
) -> api_clients.GarfApiResponse:
span = trace.get_current_span()
api_parameters = {}
for k, v in kwargs.items():
span.set_attribute(f'youtube_data_api.kwargs.{k}', v)
if k in ALLOWED_PARAMETERS:
span.set_attribute(f'youtube_data_api.kwargs.{k}', v)
api_parameters[k] = v

fields = {field.split('.')[0] for field in request.fields}
sub_service = getattr(self.service, request.resource_name)()
part_str = ','.join(fields)

result = self._list(sub_service, part=part_str, **kwargs)
result = self._list(sub_service, part=part_str, **api_parameters)
results = []
if data := result.get('items'):
results.extend(data)
Expand All @@ -139,7 +194,7 @@ def get_response(
sub_service,
part=part_str,
next_page_token=result.get('nextPageToken'),
**kwargs,
**api_parameters,
)
if data := result.get('items'):
results.extend(data)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,9 @@ def get_youtube_channel_videos(
"""
videos = report_fetcher.fetch(
channel_videos_query,
playlistId=videos_playlist.to_list(row_type='scalar', distinct=True),
playlistId=','.join(
videos_playlist.to_list(row_type='scalar', distinct=True)
),
maxResults=50,
).to_list(row_type='scalar', distinct=True)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,26 +15,23 @@

import functools
import itertools
import logging
import operator
from collections.abc import Iterable, MutableSequence
from typing import Any, Final
from typing import Final

from garf.community.google.youtube import api_clients, builtins, query_editor
import garf.core.query_editor
from garf.community.google.youtube import (
api_clients,
builtins,
exceptions,
query_editor,
simulator,
)
from garf.core import parsers, report, report_fetcher
from typing_extensions import override

ALLOWED_FILTERS: Final[tuple[str, ...]] = (
'id',
'regionCode',
'videoCategoryId',
'chart',
'forHandle',
'forUsername',
'onBehalfOfContentOwner',
'playlistId',
'videoId',
'channelId',
)
logger = logging.getLogger('garf.community.google.youtube.report_fetcher')

MAX_BATCH_SIZE: Final[int] = 50

Expand All @@ -45,6 +42,10 @@ def _batched(iterable: Iterable[str], chunk_size: int):
yield chunk


class YouTubeDataApiReportFetcherError(exceptions.GarfYouTubeApiError):
"""YouTubeDataApiReportFetcher specific error."""


class YouTubeDataApiReportFetcher(report_fetcher.ApiReportFetcher):
"""Defines report fetcher for YouTube Data API."""

Expand All @@ -68,31 +69,58 @@ def __init__(
@override
def fetch(
self,
query_specification,
args: dict[str, Any] = None,
query_specification: str | query_editor.YouTubeDataApiQuery,
args: garf.core.query_editor.GarfQueryParameters | None = None,
title: str | None = None,
**kwargs,
) -> report.GarfReport:
results = []
filter_identifier = list(
set(ALLOWED_FILTERS).intersection(set(kwargs.keys()))
set(api_clients.ALLOWED_PARAMETERS).intersection(set(kwargs.keys()))
)
if len(filter_identifier) == 1:
name = filter_identifier[0]
ids = kwargs.pop(name)
if not isinstance(ids, MutableSequence):
ids = ids.split(',')
if not ids:
logger.warning('No values provided for %s parameter', name)
placeholder_report = simulator.YouTubeDataApiReportSimulator(
api_client=self.api_client,
parser=self.parser,
query_spec=self.query_specification_builder,
).simulate(
query_specification=query_specification, args=args, title=title
)
return report.GarfReport(
placeholder_results=placeholder_report.results,
column_names=placeholder_report.column_names,
)
else:
return super().fetch(query_specification, args, **kwargs)
return super().fetch(
query_specification=query_specification,
args=args,
title=title,
**kwargs,
)
if name == 'id':
for batch in _batched(ids, MAX_BATCH_SIZE):
res = super().fetch(
query_specification, args, **{name: batch}, **kwargs
query_specification=query_specification,
args=args,
title=title,
**{name: batch},
**kwargs,
)
results.append(res)
else:
for element in ids:
res = super().fetch(
query_specification, args, **{name: element}, **kwargs
query_specification=query_specification,
args=args,
title=title,
**{name: element},
**kwargs,
)
results.append(res)
res = functools.reduce(operator.add, results)
Expand Down
39 changes: 16 additions & 23 deletions libs/community/google/youtube/tests/e2e/test_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,29 +71,22 @@ def test_builtin_channel_videos_with_enhanced_attributes():
FROM builtin.channelVideos
"""

# Use a test channel ID from environment
test_channel_id = os.getenv('YOUTUBE_CHANNEL_ID', 'UC_x5XG1OV2P6uZZ5FSM9Ttw')
test_channel_id = os.getenv('YOUTUBE_CHANNEL_ID', 'UCmMowAX5A4tYd4Utq1Lymog')

fetched_report = fetcher.fetch(query, id=[test_channel_id])

# Verify we got results
assert fetched_report, 'No videos returned from channelVideos'
assert len(fetched_report) > 0, 'channelVideos returned empty list'

# Verify the enhanced attributes are present in the first video
first_video = fetched_report[0]
assert hasattr(first_video, 'channel_id'), 'Missing channel_id attribute'
assert hasattr(first_video, 'video_id'), 'Missing video_id attribute'
assert hasattr(first_video, 'title'), 'Missing title attribute'
assert hasattr(first_video, 'view_count'), 'Missing view_count attribute'
assert hasattr(first_video, 'duration'), 'Missing duration attribute'

# Verify the values are not None/empty
assert first_video.video_id, 'video_id is empty'
assert first_video.title, 'title is empty'

print(f'✓ channelVideos test passed! Found {len(fetched_report)} videos')
print(f' First video: {first_video.title}')
print(f' Video ID: {first_video.video_id}')
if hasattr(first_video, 'view_count') and first_video.view_count:
print(f' Views: {first_video.view_count}')
assert fetched_report
assert fetched_report.column_names == [
'channel_id',
'video_id',
'title',
'description',
'published_at',
'view_count',
'like_count',
'comment_count',
'duration',
'privacy_status',
]

assert fetched_report[0].channel_id == test_channel_id