Skip to content

Commit

Permalink
get transcript from db method
Browse files Browse the repository at this point in the history
  • Loading branch information
NotJoeMartinez committed Sep 6, 2024
1 parent f8dfc30 commit 9547660
Show file tree
Hide file tree
Showing 2 changed files with 78 additions and 12 deletions.
8 changes: 5 additions & 3 deletions yt_fts/db_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,9 +87,10 @@ def add_video(channel_id, video_id, video_title, video_url, video_date):
(video_id,)).fetchone()

if existing_video is None:
cur.execute(
"INSERT INTO Videos (video_id, video_title, video_url, video_date, channel_id) VALUES (?, ?, ?, ?, ?)",
(video_id, video_title, video_url, video_date, channel_id))
cur.execute("""
INSERT INTO Videos (video_id, video_title, video_url, video_date, channel_id)
VALUES (?, ?, ?, ?, ?)
""",(video_id, video_title, video_url, video_date, channel_id))
conn.commit()

else:
Expand Down Expand Up @@ -358,6 +359,7 @@ def delete_channel_from_chroma(channel_id):
where={"channel_id": channel_id}
)


def get_channel_id_from_rowid(rowid):
db = Database(get_db_path())

Expand Down
82 changes: 73 additions & 9 deletions yt_fts/summarize.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import sys
import sqlite3
from urllib.parse import urlparse, parse_qs

from rich.console import Console
from .config import get_db_path

# determine if input_video is url or video id
# if it's a url get the video id
Expand All @@ -10,6 +12,11 @@
# if the video id is not in the database download the transcript
# feed the transcript to an llm and print the summary

# https://www.youtube.com/watch?v=Xjk6d5fPs_k
# https://youtu.be/Xjk6d5fPs_k?si=BBb2URutUT2gG4th
# https://youtu.be/Xjk6d5fPs_k
# https://www.youtube.com/watch?v=Xjk6d5fPs_k&si=BBb2URutUT2gG4th


class SummarizeHandler:
def __init__(self, openai_client, input_video):
Expand All @@ -23,20 +30,77 @@ def __init__(self, openai_client, input_video):
else:
self.video_id = input_video



def summarize_video(self):
console = self.console
input_video = self.input_video
video_id = self.video_id

if self.video_in_database(video_id):

def get_video_id_from_url(self, video_url):
# https://www.youtube.com/watch?v=Xjk6d5fPs_k
# https://youtu.be/Xjk6d5fPs_k?si=BBb2URutUT2gG4th
# https://youtu.be/Xjk6d5fPs_k
# https://www.youtube.com/watch?v=Xjk6d5fPs_k&si=BBb2URutUT2gG4th
transcript_text = self.get_transcript_from_database(video_id)




def get_transcript_from_database(self, video_id) -> str:

console = self.console
try:
conn = sqlite3.connect(get_db_path())
curr = conn.cursor()
curr.execute(
"""
SELECT
start_time, text
FROM
Subtitles
WHERE
video_id = ?
""", (video_id,)
)
res = curr.fetchall()
transcript = ""
for row in res:
start_time, text = row
text = text.strip()
if len(text) == 0:
continue
transcript += f"{start_time[:-4]}: {text}\n"
conn.close()
return transcript
except Exception as e:
console.print(f"[red]Error:[/red] {e}")
sys.exit(1)
finally:
conn.close()

def video_in_database(self, video_id) -> bool:
console = self.console
try:
conn = sqlite3.connect(get_db_path())
curr = conn.cursor()
curr.execute(
"""
SELECT
count(*)
FROM
Videos
WHERE
video_id = ?
""", (video_id,)
)
count = curr.fetchone()[0]
conn.close()
if count > 0:
return True
return False
except Exception as e:
console.print(f"[red]Error:[/red] {e}")
sys.exit(1)
finally:
conn.close()


def get_video_id_from_url(self, video_url):
console = self.console
video_url = video_url.strip('/')
parsed = urlparse(video_url)
Expand Down

0 comments on commit 9547660

Please sign in to comment.