diff --git a/yt_fts/db_utils.py b/yt_fts/db_utils.py index c045dc9..3169795 100644 --- a/yt_fts/db_utils.py +++ b/yt_fts/db_utils.py @@ -87,9 +87,10 @@ def add_video(channel_id, video_id, video_title, video_url, video_date): (video_id,)).fetchone() if existing_video is None: - cur.execute( - "INSERT INTO Videos (video_id, video_title, video_url, video_date, channel_id) VALUES (?, ?, ?, ?, ?)", - (video_id, video_title, video_url, video_date, channel_id)) + cur.execute(""" + INSERT INTO Videos (video_id, video_title, video_url, video_date, channel_id) + VALUES (?, ?, ?, ?, ?) + """,(video_id, video_title, video_url, video_date, channel_id)) conn.commit() else: @@ -358,6 +359,7 @@ def delete_channel_from_chroma(channel_id): where={"channel_id": channel_id} ) + def get_channel_id_from_rowid(rowid): db = Database(get_db_path()) diff --git a/yt_fts/summarize.py b/yt_fts/summarize.py index 7ac8464..484a8db 100644 --- a/yt_fts/summarize.py +++ b/yt_fts/summarize.py @@ -1,7 +1,9 @@ import sys +import sqlite3 from urllib.parse import urlparse, parse_qs from rich.console import Console +from .config import get_db_path # determine if input_video is url or video id # if it's a url get the video id @@ -10,6 +12,11 @@ # if the video id is not in the database download the transcript # feed the transcript to an llm and print the summary +# https://www.youtube.com/watch?v=Xjk6d5fPs_k +# https://youtu.be/Xjk6d5fPs_k?si=BBb2URutUT2gG4th +# https://youtu.be/Xjk6d5fPs_k +# https://www.youtube.com/watch?v=Xjk6d5fPs_k&si=BBb2URutUT2gG4th + class SummarizeHandler: def __init__(self, openai_client, input_video): @@ -23,20 +30,77 @@ def __init__(self, openai_client, input_video): else: self.video_id = input_video - - def summarize_video(self): console = self.console - input_video = self.input_video + video_id = self.video_id - + if self.video_in_database(video_id): - def get_video_id_from_url(self, video_url): - # https://www.youtube.com/watch?v=Xjk6d5fPs_k - # https://youtu.be/Xjk6d5fPs_k?si=BBb2URutUT2gG4th - # https://youtu.be/Xjk6d5fPs_k - # https://www.youtube.com/watch?v=Xjk6d5fPs_k&si=BBb2URutUT2gG4th + transcript_text = self.get_transcript_from_database(video_id) + + + + + def get_transcript_from_database(self, video_id) -> str: + + console = self.console + try: + conn = sqlite3.connect(get_db_path()) + curr = conn.cursor() + curr.execute( + """ + SELECT + start_time, text + FROM + Subtitles + WHERE + video_id = ? + """, (video_id,) + ) + res = curr.fetchall() + transcript = "" + for row in res: + start_time, text = row + text = text.strip() + if len(text) == 0: + continue + transcript += f"{start_time[:-4]}: {text}\n" + conn.close() + return transcript + except Exception as e: + console.print(f"[red]Error:[/red] {e}") + sys.exit(1) + finally: + conn.close() + def video_in_database(self, video_id) -> bool: + console = self.console + try: + conn = sqlite3.connect(get_db_path()) + curr = conn.cursor() + curr.execute( + """ + SELECT + count(*) + FROM + Videos + WHERE + video_id = ? + """, (video_id,) + ) + count = curr.fetchone()[0] + conn.close() + if count > 0: + return True + return False + except Exception as e: + console.print(f"[red]Error:[/red] {e}") + sys.exit(1) + finally: + conn.close() + + + def get_video_id_from_url(self, video_url): console = self.console video_url = video_url.strip('/') parsed = urlparse(video_url)