From 9f1a3a4e7684fe22ee33bde1407ef16d99e523a7 Mon Sep 17 00:00:00 2001
From: masterchief164 <63920595+masterchief164@users.noreply.github.com>
Date: Mon, 17 Apr 2023 23:39:25 +0530
Subject: [PATCH 01/10] feat: added deepgram for transcription

---
 app/application.py | 48 ++++++++++++++++++++++++++++++++++++++--------
 requirements.txt   |  5 +++--
 2 files changed, 43 insertions(+), 10 deletions(-)

diff --git a/app/application.py b/app/application.py
index 57b6918..1a80707 100644
--- a/app/application.py
+++ b/app/application.py
@@ -5,7 +5,6 @@
 from clint.textui import progress
 import pytube
 from moviepy.editor import VideoFileClip
-import whisper
 import os
 import static_ffmpeg
 from app import __version__
@@ -15,6 +14,8 @@
 import time
 from dotenv import dotenv_values
 import yt_dlp
+from deepgram import Deepgram
+import mimetypes
 
 
 def download_video(url):
@@ -165,6 +166,7 @@ def get_playlist_videos(url):
         print(e)
         return
 
+
 def get_audio_file(url, title):
     print("URL: " + url)
     print("downloading audio file")
@@ -183,14 +185,42 @@ def get_audio_file(url, title):
         return
 
 
+def decimal_to_sexagesimal(dec):
+    sec = int(dec % 60)
+    minu = int((dec // 60) % 60)
+    hrs = int((dec // 60) // 60)
+
+    return f'{hrs}:{minu}:{sec}'
+
+
 def process_mp3(filename, model):
     print("Transcribing audio to text...")
     try:
-        mymodel = whisper.load_model(model)
-        result = mymodel.transcribe(filename[:-4] + ".mp3")
-        result = result["text"]
-        print("Removed video and audio files")
-        return result
+        config = dotenv_values(".env")
+        dg_client = Deepgram(config["DEEPGRAM_API_KEY"])
+
+        with open(filename, "rb") as audio:
+            mimeType = mimetypes.MimeTypes().guess_type(filename)[0]
+            source = {'buffer': audio, 'mimetype': mimeType}
+            response = dg_client.transcription.sync_prerecorded(source, {'punctuate': True, 'speaker_labels': True,
+                                                                         'diarize': True, 'smart_formatting': True})
+            para = ""
+            string = ""
+            curr_speaker = None
+            for word in response["results"]["channels"][0]["alternatives"][0]["words"]:
+                if word["speaker"] != curr_speaker:
+                    if para != "":
+                        para = para.strip(" ")
+                        string = string + para + "\n\n"
+                    para = ""
+                    string = string + f'Speaker {word["speaker"]}: {decimal_to_sexagesimal(word["start"])}'
+                    curr_speaker = word["speaker"]
+                    string = string + '\n\n'
+
+                para = para + " " + word["punctuated_word"]
+            para = para.strip(" ")
+            string = string + para
+            return string
     except Exception as e:
         print("Error transcribing audio to text")
         print(e)
@@ -265,7 +295,8 @@ def write_to_file(result, loc, url, title, date, tags, category, speakers, video
         print(e)
 
 
-def get_md_file_path(result, loc, video, title, event_date, tags, category, speakers, username, local, video_title, test,
+def get_md_file_path(result, loc, video, title, event_date, tags, category, speakers, username, local, video_title,
+                     test,
                      pr):
     try:
         print("writing .md file")
@@ -350,7 +381,8 @@ def process_audio(source, title, event_date, tags, category, speakers, loc, mode
             result = test
         else:
             result = process_mp3(abs_path, model)
-        absolute_path = get_md_file_path(result=result, loc=loc, video=source, title=title, event_date=event_date, tags=tags,
+        absolute_path = get_md_file_path(result=result, loc=loc, video=source, title=title, event_date=event_date,
+                                         tags=tags,
                                          category=category, speakers=speakers, username=username, local=local,
                                          video_title=filename[:-4], test=test, pr=pr)
 
diff --git a/requirements.txt b/requirements.txt
index 089bf23..1e0993a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,11 +1,12 @@
-openai-whisper==20230314
 Click==7.0
 clint==0.5.1
+deepgram_sdk==2.4.0
 moviepy==1.0.3
+openai_whisper==20230314
 pytest==7.2.1
 python-dotenv==1.0.0
 pytube==12.1.2
 requests==2.28.2
-setuptools==45.2.0
+setuptools==67.6.1
 static_ffmpeg==2.3
 yt_dlp==2023.3.4

From eb288eccde8ed4c163ff8badaf9abdafabd028a1 Mon Sep 17 00:00:00 2001
From: masterchief164 <63920595+masterchief164@users.noreply.github.com>
Date: Tue, 18 Apr 2023 00:06:25 +0530
Subject: [PATCH 02/10] fix: removed the option to select different models

---
 app/application.py | 29 ++++++++++++++---------------
 test/test_audio.py |  6 +++---
 test/test_video.py |  6 +++---
 transcriber.py     |  6 +-----
 4 files changed, 21 insertions(+), 26 deletions(-)

diff --git a/app/application.py b/app/application.py
index 1a80707..19dcd98 100644
--- a/app/application.py
+++ b/app/application.py
@@ -193,7 +193,7 @@ def decimal_to_sexagesimal(dec):
     return f'{hrs}:{minu}:{sec}'
 
 
-def process_mp3(filename, model):
+def process_mp3(filename):
     print("Transcribing audio to text...")
     try:
         config = dotenv_values(".env")
@@ -352,7 +352,7 @@ def check_source_type(source):
         return None
 
 
-def process_audio(source, title, event_date, tags, category, speakers, loc, model, username, local,
+def process_audio(source, title, event_date, tags, category, speakers, loc, username, local,
                   created_files, test, pr):
     try:
         print("audio file detected")
@@ -380,7 +380,7 @@ def process_audio(source, title, event_date, tags, category, speakers, loc, mode
         if test:
             result = test
         else:
-            result = process_mp3(abs_path, model)
+            result = process_mp3(abs_path)
         absolute_path = get_md_file_path(result=result, loc=loc, video=source, title=title, event_date=event_date,
                                          tags=tags,
                                          category=category, speakers=speakers, username=username, local=local,
@@ -397,7 +397,7 @@ def process_audio(source, title, event_date, tags, category, speakers, loc, mode
         print(e)
 
 
-def process_videos(source, title, event_date, tags, category, speakers, loc, model, username, created_files,
+def process_videos(source, title, event_date, tags, category, speakers, loc, username, created_files,
                    chapters, pr):
     try:
         print("Playlist detected")
@@ -411,12 +411,11 @@ def process_videos(source, title, event_date, tags, category, speakers, loc, mod
             print("Playlist is empty")
             return
 
-        selected_model = model + '.en'
         filename = ""
 
         for video in videos:
             filename = process_video(video=video, title=title, event_date=event_date, tags=tags, category=category,
-                                     speakers=speakers, loc=loc, model=selected_model, username=username,
+                                     speakers=speakers, loc=loc, username=username,
                                      pr=pr, created_files=created_files, chapters=chapters, test=False)
             if filename is None:
                 return None
@@ -426,7 +425,7 @@ def process_videos(source, title, event_date, tags, category, speakers, loc, mod
         print(e)
 
 
-def process_video(video, title, event_date, tags, category, speakers, loc, model, username, created_files,
+def process_video(video, title, event_date, tags, category, speakers, loc, username, created_files,
                   chapters, test, pr, local=False):
     try:
         result = ""
@@ -472,7 +471,7 @@ def process_video(video, title, event_date, tags, category, speakers, loc, model
                     if file is None:
                         print("File not found")
                         return None
-                    temp_res = process_mp3(filename=temp_filename, model=model)
+                    temp_res = process_mp3(filename=temp_filename)
                     created_files.append(temp_filename[:-4] + ".mp3")
                 else:
                     temp_res = ""
@@ -490,7 +489,7 @@ def process_video(video, title, event_date, tags, category, speakers, loc, model
             if not test:
                 convert_video_to_mp3(abs_path)
                 created_files.append(abs_path[:-4] + '.mp3')
-                result = process_mp3(abs_path[:-4] + '.mp3', model)
+                result = process_mp3(abs_path[:-4] + '.mp3')
                 created_files.append(abs_path[:-4] + ".mp3")
             else:
                 result = ""
@@ -511,7 +510,7 @@ def process_video(video, title, event_date, tags, category, speakers, loc, model
         print(e)
 
 
-def process_source(source, title, event_date, tags, category, speakers, loc, model, username, source_type,
+def process_source(source, title, event_date, tags, category, speakers, loc, username, source_type,
                    created_files, chapters, local=False, test=None, pr=False):
     try:
         if not os.path.isdir("tmp"):
@@ -522,24 +521,24 @@ def process_source(source, title, event_date, tags, category, speakers, loc, mod
 
         if source_type == 'audio':
             filename = process_audio(source=source, title=title, event_date=event_date, tags=tags, category=category,
-                                     speakers=speakers, loc=loc, model=model, username=username,
+                                     speakers=speakers, loc=loc, username=username,
                                      local=local, created_files=created_files, test=test, pr=pr)
         elif source_type == 'audio-local':
             filename = process_audio(source=source, title=title, event_date=event_date, tags=tags, category=category,
-                                     speakers=speakers, loc=loc, model=model, username=username,
+                                     speakers=speakers, loc=loc, username=username,
                                      local=True, created_files=created_files, test=test, pr=pr)
         elif source_type == 'playlist':
             filename = process_videos(source=source, title=title, event_date=event_date, tags=tags, category=category,
-                                      speakers=speakers, loc=loc, model=model, username=username,
+                                      speakers=speakers, loc=loc, username=username,
                                       created_files=created_files, chapters=chapters, pr=pr)
         elif source_type == 'video-local':
             filename = process_video(video=source, title=title, event_date=event_date,
-                                     tags=tags, category=category, speakers=speakers, loc=loc, model=model,
+                                     tags=tags, category=category, speakers=speakers, loc=loc,
                                      username=username, created_files=created_files, local=True,
                                      chapters=chapters, test=test, pr=pr)
         else:
             filename = process_video(video=source, title=title, event_date=event_date,
-                                     tags=tags, category=category, speakers=speakers, loc=loc, model=model,
+                                     tags=tags, category=category, speakers=speakers, loc=loc,
                                      username=username, created_files=created_files, local=local,
                                      chapters=chapters, test=test, pr=pr)
         return filename
diff --git a/test/test_audio.py b/test/test_audio.py
index 731a4ed..24aac52 100644
--- a/test/test_audio.py
+++ b/test/test_audio.py
@@ -49,7 +49,7 @@ def test_audio_with_title():
     username = "username"
     created_files = []
     filename = application.process_source(source=source, title=title, event_date=None, tags=None, category=None,
-                                          speakers=None, loc="yada/yada", model="tiny", username=username,
+                                          speakers=None, loc="yada/yada", username=username,
                                           source_type="audio", local=True, test=result, chapters=False, pr=False,
                                           created_files=created_files)
     assert os.path.isfile(filename)
@@ -68,7 +68,7 @@ def test_audio_without_title():
     created_files = []
     title = None
     filename = application.process_source(source=source, title=title, event_date=None, tags=None, category=None,
-                                          speakers=None, loc="yada/yada", model="tiny", username=username, pr=False,
+                                          speakers=None, loc="yada/yada", username=username, pr=False,
                                           source_type="audio", local=True, created_files=created_files, test=result,
                                           chapters=False)
     assert filename is None
@@ -91,7 +91,7 @@ def test_audio_with_all_data():
     date = datetime.strptime(date, '%Y-%m-%d').date()
     created_files = []
     filename = application.process_source(source=source, title=title, event_date=date, tags=tags, category=category,
-                                          speakers=speakers, loc="yada/yada", model="tiny", username=username,
+                                          speakers=speakers, loc="yada/yada", username=username,
                                           source_type="audio", local=True, test=result, chapters=False,
                                           created_files=created_files, pr=False)
     category = [cat.strip() for cat in category.split(",")]
diff --git a/test/test_video.py b/test/test_video.py
index 2c65a6c..8ec747c 100644
--- a/test/test_video.py
+++ b/test/test_video.py
@@ -81,7 +81,7 @@ def test_video_with_title():
     date = None
     created_files = []
     filename = application.process_source(source=source, title=title, event_date=date, tags=tags, category=category,
-                                          speakers=speakers, loc="yada/yada", model="tiny", username=username,
+                                          speakers=speakers, loc="yada/yada", username=username,
                                           source_type="video", local=True,
                                           created_files=created_files, test=result, chapters=False)
     assert os.path.isfile(filename)
@@ -107,7 +107,7 @@ def test_video_with_all_options():
     date = datetime.strptime(date, '%Y-%m-%d').date()
     created_files = []
     filename = application.process_source(source=source, title=title, event_date=date, tags=tags, category=category,
-                                          speakers=speakers, loc="yada/yada", model="tiny", username=username,
+                                          speakers=speakers, loc="yada/yada", username=username,
                                           source_type="video", local=True,
                                           created_files=created_files, test=True, chapters=False)
     assert os.path.isfile(filename)
@@ -137,7 +137,7 @@ def test_video_with_chapters():
     date = datetime.strptime(date, '%Y-%m-%d').date()
     created_files = []
     filename = application.process_source(source=source, title=title, event_date=date, tags=tags, category=category,
-                                          speakers=speakers, loc="yada/yada", model="tiny", username=username,
+                                          speakers=speakers, loc="yada/yada", username=username,
                                           source_type="video", local=True,
                                           created_files=created_files, test=result, chapters=True, pr=True)
     chapter_names = []
diff --git a/transcriber.py b/transcriber.py
index 5c068ca..5ec110a 100644
--- a/transcriber.py
+++ b/transcriber.py
@@ -26,9 +26,6 @@ def print_help(ctx, param, value):
 @click.command()
 @click.argument('source', nargs=1)
 @click.argument('loc', nargs=1)
-@click.option('-m', '--model', type=click.Choice(['tiny', 'tiny.en', 'base', 'base.en', 'small', 'small.en', 'medium', 'medium.en', 'large-v2']), default='tiny.en',
-              help='Options for transcription model'
-              )
 @click.option('-t', '--title', type=str,
               help="Supply transcribed file title in 'quotes', title is mandatory in case of audio files")
 @click.option('-d', '--date', type=str, help="Supply the event date in format 'yyyy-mm-dd'")
@@ -47,7 +44,6 @@ def print_help(ctx, param, value):
 def add(
         source: str,
         loc: str,
-        model: str,
         title: str,
         date: str,
         tags: str,
@@ -76,7 +72,7 @@ def add(
             print("Invalid source")
             return
         filename = application.process_source(source=source, title=title, event_date=event_date, tags=tags,
-                                              category=category, speakers=speakers, loc=loc, model=model,
+                                              category=category, speakers=speakers, loc=loc,
                                               username=username, chapters=chapters, pr=pr,
                                               source_type=source_type, created_files=created_files)
         if filename:

From 9a726e502d31b1aa67b9300f1dc1039a8eddd1e3 Mon Sep 17 00:00:00 2001
From: masterchief164 <63920595+masterchief164@users.noreply.github.com>
Date: Thu, 20 Apr 2023 23:04:12 +0530
Subject: [PATCH 03/10] Revert "fix: removed the option to select different
 models"

This reverts commit eb288eccde8ed4c163ff8badaf9abdafabd028a1.
---
 app/application.py | 29 +++++++++++++++--------------
 test/test_audio.py |  6 +++---
 test/test_video.py |  6 +++---
 transcriber.py     |  6 +++++-
 4 files changed, 26 insertions(+), 21 deletions(-)

diff --git a/app/application.py b/app/application.py
index 19dcd98..1a80707 100644
--- a/app/application.py
+++ b/app/application.py
@@ -193,7 +193,7 @@ def decimal_to_sexagesimal(dec):
     return f'{hrs}:{minu}:{sec}'
 
 
-def process_mp3(filename):
+def process_mp3(filename, model):
     print("Transcribing audio to text...")
     try:
         config = dotenv_values(".env")
@@ -352,7 +352,7 @@ def check_source_type(source):
         return None
 
 
-def process_audio(source, title, event_date, tags, category, speakers, loc, username, local,
+def process_audio(source, title, event_date, tags, category, speakers, loc, model, username, local,
                   created_files, test, pr):
     try:
         print("audio file detected")
@@ -380,7 +380,7 @@ def process_audio(source, title, event_date, tags, category, speakers, loc, user
         if test:
             result = test
         else:
-            result = process_mp3(abs_path)
+            result = process_mp3(abs_path, model)
         absolute_path = get_md_file_path(result=result, loc=loc, video=source, title=title, event_date=event_date,
                                          tags=tags,
                                          category=category, speakers=speakers, username=username, local=local,
@@ -397,7 +397,7 @@ def process_audio(source, title, event_date, tags, category, speakers, loc, user
         print(e)
 
 
-def process_videos(source, title, event_date, tags, category, speakers, loc, username, created_files,
+def process_videos(source, title, event_date, tags, category, speakers, loc, model, username, created_files,
                    chapters, pr):
     try:
         print("Playlist detected")
@@ -411,11 +411,12 @@ def process_videos(source, title, event_date, tags, category, speakers, loc, use
             print("Playlist is empty")
             return
 
+        selected_model = model + '.en'
         filename = ""
 
         for video in videos:
             filename = process_video(video=video, title=title, event_date=event_date, tags=tags, category=category,
-                                     speakers=speakers, loc=loc, username=username,
+                                     speakers=speakers, loc=loc, model=selected_model, username=username,
                                      pr=pr, created_files=created_files, chapters=chapters, test=False)
             if filename is None:
                 return None
@@ -425,7 +426,7 @@ def process_videos(source, title, event_date, tags, category, speakers, loc, use
         print(e)
 
 
-def process_video(video, title, event_date, tags, category, speakers, loc, username, created_files,
+def process_video(video, title, event_date, tags, category, speakers, loc, model, username, created_files,
                   chapters, test, pr, local=False):
     try:
         result = ""
@@ -471,7 +472,7 @@ def process_video(video, title, event_date, tags, category, speakers, loc, usern
                     if file is None:
                         print("File not found")
                         return None
-                    temp_res = process_mp3(filename=temp_filename)
+                    temp_res = process_mp3(filename=temp_filename, model=model)
                     created_files.append(temp_filename[:-4] + ".mp3")
                 else:
                     temp_res = ""
@@ -489,7 +490,7 @@ def process_video(video, title, event_date, tags, category, speakers, loc, usern
             if not test:
                 convert_video_to_mp3(abs_path)
                 created_files.append(abs_path[:-4] + '.mp3')
-                result = process_mp3(abs_path[:-4] + '.mp3')
+                result = process_mp3(abs_path[:-4] + '.mp3', model)
                 created_files.append(abs_path[:-4] + ".mp3")
             else:
                 result = ""
@@ -510,7 +511,7 @@ def process_video(video, title, event_date, tags, category, speakers, loc, usern
         print(e)
 
 
-def process_source(source, title, event_date, tags, category, speakers, loc, username, source_type,
+def process_source(source, title, event_date, tags, category, speakers, loc, model, username, source_type,
                    created_files, chapters, local=False, test=None, pr=False):
     try:
         if not os.path.isdir("tmp"):
@@ -521,24 +522,24 @@ def process_source(source, title, event_date, tags, category, speakers, loc, use
 
         if source_type == 'audio':
             filename = process_audio(source=source, title=title, event_date=event_date, tags=tags, category=category,
-                                     speakers=speakers, loc=loc, username=username,
+                                     speakers=speakers, loc=loc, model=model, username=username,
                                      local=local, created_files=created_files, test=test, pr=pr)
         elif source_type == 'audio-local':
             filename = process_audio(source=source, title=title, event_date=event_date, tags=tags, category=category,
-                                     speakers=speakers, loc=loc, username=username,
+                                     speakers=speakers, loc=loc, model=model, username=username,
                                      local=True, created_files=created_files, test=test, pr=pr)
         elif source_type == 'playlist':
             filename = process_videos(source=source, title=title, event_date=event_date, tags=tags, category=category,
-                                      speakers=speakers, loc=loc, username=username,
+                                      speakers=speakers, loc=loc, model=model, username=username,
                                       created_files=created_files, chapters=chapters, pr=pr)
         elif source_type == 'video-local':
             filename = process_video(video=source, title=title, event_date=event_date,
-                                     tags=tags, category=category, speakers=speakers, loc=loc,
+                                     tags=tags, category=category, speakers=speakers, loc=loc, model=model,
                                      username=username, created_files=created_files, local=True,
                                      chapters=chapters, test=test, pr=pr)
         else:
             filename = process_video(video=source, title=title, event_date=event_date,
-                                     tags=tags, category=category, speakers=speakers, loc=loc,
+                                     tags=tags, category=category, speakers=speakers, loc=loc, model=model,
                                      username=username, created_files=created_files, local=local,
                                      chapters=chapters, test=test, pr=pr)
         return filename
diff --git a/test/test_audio.py b/test/test_audio.py
index 24aac52..731a4ed 100644
--- a/test/test_audio.py
+++ b/test/test_audio.py
@@ -49,7 +49,7 @@ def test_audio_with_title():
     username = "username"
     created_files = []
     filename = application.process_source(source=source, title=title, event_date=None, tags=None, category=None,
-                                          speakers=None, loc="yada/yada", username=username,
+                                          speakers=None, loc="yada/yada", model="tiny", username=username,
                                           source_type="audio", local=True, test=result, chapters=False, pr=False,
                                           created_files=created_files)
     assert os.path.isfile(filename)
@@ -68,7 +68,7 @@ def test_audio_without_title():
     created_files = []
     title = None
     filename = application.process_source(source=source, title=title, event_date=None, tags=None, category=None,
-                                          speakers=None, loc="yada/yada", username=username, pr=False,
+                                          speakers=None, loc="yada/yada", model="tiny", username=username, pr=False,
                                           source_type="audio", local=True, created_files=created_files, test=result,
                                           chapters=False)
     assert filename is None
@@ -91,7 +91,7 @@ def test_audio_with_all_data():
     date = datetime.strptime(date, '%Y-%m-%d').date()
     created_files = []
     filename = application.process_source(source=source, title=title, event_date=date, tags=tags, category=category,
-                                          speakers=speakers, loc="yada/yada", username=username,
+                                          speakers=speakers, loc="yada/yada", model="tiny", username=username,
                                           source_type="audio", local=True, test=result, chapters=False,
                                           created_files=created_files, pr=False)
     category = [cat.strip() for cat in category.split(",")]
diff --git a/test/test_video.py b/test/test_video.py
index 8ec747c..2c65a6c 100644
--- a/test/test_video.py
+++ b/test/test_video.py
@@ -81,7 +81,7 @@ def test_video_with_title():
     date = None
     created_files = []
     filename = application.process_source(source=source, title=title, event_date=date, tags=tags, category=category,
-                                          speakers=speakers, loc="yada/yada", username=username,
+                                          speakers=speakers, loc="yada/yada", model="tiny", username=username,
                                           source_type="video", local=True,
                                           created_files=created_files, test=result, chapters=False)
     assert os.path.isfile(filename)
@@ -107,7 +107,7 @@ def test_video_with_all_options():
     date = datetime.strptime(date, '%Y-%m-%d').date()
     created_files = []
     filename = application.process_source(source=source, title=title, event_date=date, tags=tags, category=category,
-                                          speakers=speakers, loc="yada/yada", username=username,
+                                          speakers=speakers, loc="yada/yada", model="tiny", username=username,
                                           source_type="video", local=True,
                                           created_files=created_files, test=True, chapters=False)
     assert os.path.isfile(filename)
@@ -137,7 +137,7 @@ def test_video_with_chapters():
     date = datetime.strptime(date, '%Y-%m-%d').date()
     created_files = []
     filename = application.process_source(source=source, title=title, event_date=date, tags=tags, category=category,
-                                          speakers=speakers, loc="yada/yada", username=username,
+                                          speakers=speakers, loc="yada/yada", model="tiny", username=username,
                                           source_type="video", local=True,
                                           created_files=created_files, test=result, chapters=True, pr=True)
     chapter_names = []
diff --git a/transcriber.py b/transcriber.py
index 5ec110a..5c068ca 100644
--- a/transcriber.py
+++ b/transcriber.py
@@ -26,6 +26,9 @@ def print_help(ctx, param, value):
 @click.command()
 @click.argument('source', nargs=1)
 @click.argument('loc', nargs=1)
+@click.option('-m', '--model', type=click.Choice(['tiny', 'tiny.en', 'base', 'base.en', 'small', 'small.en', 'medium', 'medium.en', 'large-v2']), default='tiny.en',
+              help='Options for transcription model'
+              )
 @click.option('-t', '--title', type=str,
               help="Supply transcribed file title in 'quotes', title is mandatory in case of audio files")
 @click.option('-d', '--date', type=str, help="Supply the event date in format 'yyyy-mm-dd'")
@@ -44,6 +47,7 @@ def print_help(ctx, param, value):
 def add(
         source: str,
         loc: str,
+        model: str,
         title: str,
         date: str,
         tags: str,
@@ -72,7 +76,7 @@ def add(
             print("Invalid source")
             return
         filename = application.process_source(source=source, title=title, event_date=event_date, tags=tags,
-                                              category=category, speakers=speakers, loc=loc,
+                                              category=category, speakers=speakers, loc=loc, model=model,
                                               username=username, chapters=chapters, pr=pr,
                                               source_type=source_type, created_files=created_files)
         if filename:

From 8708491c966c2d256ad6d61126060f9c98b03bea Mon Sep 17 00:00:00 2001
From: masterchief164 <63920595+masterchief164@users.noreply.github.com>
Date: Thu, 20 Apr 2023 23:04:13 +0530
Subject: [PATCH 04/10] Revert "feat: added deepgram for transcription"

This reverts commit 9f1a3a4e7684fe22ee33bde1407ef16d99e523a7.
---
 app/application.py | 48 ++++++++--------------------------------------
 requirements.txt   |  5 ++---
 2 files changed, 10 insertions(+), 43 deletions(-)

diff --git a/app/application.py b/app/application.py
index 1a80707..57b6918 100644
--- a/app/application.py
+++ b/app/application.py
@@ -5,6 +5,7 @@
 from clint.textui import progress
 import pytube
 from moviepy.editor import VideoFileClip
+import whisper
 import os
 import static_ffmpeg
 from app import __version__
@@ -14,8 +15,6 @@
 import time
 from dotenv import dotenv_values
 import yt_dlp
-from deepgram import Deepgram
-import mimetypes
 
 
 def download_video(url):
@@ -166,7 +165,6 @@ def get_playlist_videos(url):
         print(e)
         return
 
-
 def get_audio_file(url, title):
     print("URL: " + url)
     print("downloading audio file")
@@ -185,42 +183,14 @@ def get_audio_file(url, title):
         return
 
 
-def decimal_to_sexagesimal(dec):
-    sec = int(dec % 60)
-    minu = int((dec // 60) % 60)
-    hrs = int((dec // 60) // 60)
-
-    return f'{hrs}:{minu}:{sec}'
-
-
 def process_mp3(filename, model):
     print("Transcribing audio to text...")
     try:
-        config = dotenv_values(".env")
-        dg_client = Deepgram(config["DEEPGRAM_API_KEY"])
-
-        with open(filename, "rb") as audio:
-            mimeType = mimetypes.MimeTypes().guess_type(filename)[0]
-            source = {'buffer': audio, 'mimetype': mimeType}
-            response = dg_client.transcription.sync_prerecorded(source, {'punctuate': True, 'speaker_labels': True,
-                                                                         'diarize': True, 'smart_formatting': True})
-            para = ""
-            string = ""
-            curr_speaker = None
-            for word in response["results"]["channels"][0]["alternatives"][0]["words"]:
-                if word["speaker"] != curr_speaker:
-                    if para != "":
-                        para = para.strip(" ")
-                        string = string + para + "\n\n"
-                    para = ""
-                    string = string + f'Speaker {word["speaker"]}: {decimal_to_sexagesimal(word["start"])}'
-                    curr_speaker = word["speaker"]
-                    string = string + '\n\n'
-
-                para = para + " " + word["punctuated_word"]
-            para = para.strip(" ")
-            string = string + para
-            return string
+        mymodel = whisper.load_model(model)
+        result = mymodel.transcribe(filename[:-4] + ".mp3")
+        result = result["text"]
+        print("Removed video and audio files")
+        return result
     except Exception as e:
         print("Error transcribing audio to text")
         print(e)
@@ -295,8 +265,7 @@ def write_to_file(result, loc, url, title, date, tags, category, speakers, video
         print(e)
 
 
-def get_md_file_path(result, loc, video, title, event_date, tags, category, speakers, username, local, video_title,
-                     test,
+def get_md_file_path(result, loc, video, title, event_date, tags, category, speakers, username, local, video_title, test,
                      pr):
     try:
         print("writing .md file")
@@ -381,8 +350,7 @@ def process_audio(source, title, event_date, tags, category, speakers, loc, mode
             result = test
         else:
             result = process_mp3(abs_path, model)
-        absolute_path = get_md_file_path(result=result, loc=loc, video=source, title=title, event_date=event_date,
-                                         tags=tags,
+        absolute_path = get_md_file_path(result=result, loc=loc, video=source, title=title, event_date=event_date, tags=tags,
                                          category=category, speakers=speakers, username=username, local=local,
                                          video_title=filename[:-4], test=test, pr=pr)
 
diff --git a/requirements.txt b/requirements.txt
index 1e0993a..089bf23 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,12 +1,11 @@
+openai-whisper==20230314
 Click==7.0
 clint==0.5.1
-deepgram_sdk==2.4.0
 moviepy==1.0.3
-openai_whisper==20230314
 pytest==7.2.1
 python-dotenv==1.0.0
 pytube==12.1.2
 requests==2.28.2
-setuptools==67.6.1
+setuptools==45.2.0
 static_ffmpeg==2.3
 yt_dlp==2023.3.4

From 6a941dae0590d3ec3e9ef3425c11fa1427e0db27 Mon Sep 17 00:00:00 2001
From: masterchief164 <63920595+masterchief164@users.noreply.github.com>
Date: Sat, 22 Apr 2023 23:56:18 +0530
Subject: [PATCH 05/10] fix: added chapters without splitting the original file

---
 app/application.py | 128 +++++++++++++++++++++------------------------
 test/test_cli.py   |  11 ----
 2 files changed, 59 insertions(+), 80 deletions(-)

diff --git a/app/application.py b/app/application.py
index 57b6918..85a14c6 100644
--- a/app/application.py
+++ b/app/application.py
@@ -55,19 +55,13 @@ def read_description(prefix):
             return list_of_chapters
         for index, x in enumerate(info['chapters']):
             name = x['title']
-
             start = x['start_time']
-            m, s = divmod(start, 60)
-            h, m = divmod(m, 60)
-            current_dur = ':'.join([str(int(h)), str(int(m)), str(s)])
-            start = current_dur
-
-            list_of_chapters.append((str(index), str(start), str(name)))
+            list_of_chapters.append((str(index), start, str(name)))
 
         return list_of_chapters
     except Exception as e:
         print("Error reading description")
-        return list_of_chapters
+        return []
 
 
 def write_chapters_file(chapter_file: str, chapter_list: list) -> None:
@@ -85,33 +79,6 @@ def write_chapters_file(chapter_file: str, chapter_list: list) -> None:
         print(e)
 
 
-def split_mp4(chapters: list, download_filename: str, download_name: str) -> None:
-    try:
-        current_duration_pretext = subprocess.run(['ffprobe', '-i', download_filename,
-                                                   '-show_entries', 'format=duration',
-                                                   '-v', 'quiet'],
-                                                  capture_output=True, encoding='UTF8')
-        current_duration = float(current_duration_pretext.stdout[18:-13])
-        m, s = divmod(current_duration, 60)
-        h, m = divmod(m, 60)
-        current_dur = ':'.join([str(int(h)), str(int(m)), str(s)])
-        for current_index, current_chapter in enumerate(chapters):
-            # current_chapter will be a tuple: position, timecode, name
-            next_index = current_index + 1
-            start_time = current_chapter[1]
-            try:
-                end_time = chapters[next_index][1]
-            except:
-                end_time = current_dur
-            output_name = f'{download_name} - ({current_index}).mp4'
-            subprocess.run(["ffmpeg", "-ss", start_time, "-to", end_time,
-                            "-i", download_filename, "-acodec", "copy",
-                            "-vcodec", "copy", output_name, "-loglevel", "quiet"])
-    except Exception as e:
-        print("Error splitting mp4")
-        print(e)
-
-
 def convert_video_to_mp3(filename):
     try:
         clip = VideoFileClip(filename)
@@ -165,6 +132,7 @@ def get_playlist_videos(url):
         print(e)
         return
 
+
 def get_audio_file(url, title):
     print("URL: " + url)
     print("downloading audio file")
@@ -186,17 +154,27 @@ def get_audio_file(url, title):
 def process_mp3(filename, model):
     print("Transcribing audio to text...")
     try:
-        mymodel = whisper.load_model(model)
-        result = mymodel.transcribe(filename[:-4] + ".mp3")
-        result = result["text"]
+        my_model = whisper.load_model(model)
+        result = my_model.transcribe(filename)
+        data = []
+        for x in result["segments"]:
+            data.append(tuple((x["start"], x["end"], x["text"])))
         print("Removed video and audio files")
-        return result
+        return data
     except Exception as e:
         print("Error transcribing audio to text")
         print(e)
         return
 
 
+def create_transcript(data):
+    result = ""
+    for x in data:
+        result = result + x[2] + " "
+
+    return result
+
+
 def initialize():
     try:
         print('''
@@ -265,7 +243,8 @@ def write_to_file(result, loc, url, title, date, tags, category, speakers, video
         print(e)
 
 
-def get_md_file_path(result, loc, video, title, event_date, tags, category, speakers, username, local, video_title, test,
+def get_md_file_path(result, loc, video, title, event_date, tags, category, speakers, username, local, video_title,
+                     test,
                      pr):
     try:
         print("writing .md file")
@@ -334,7 +313,9 @@ def process_audio(source, title, event_date, tags, category, speakers, loc, mode
         # process audio file
         if not local:
             filename = get_audio_file(url=source, title=title)
-            abs_path = os.path.abspath(path=filename)
+            abs_path = os.path.abspath(path="tmp/" + filename)
+            print("filename", filename)
+            print("abs_path", abs_path)
             created_files.append(abs_path)
         else:
             filename = source.split("/")[-1]
@@ -344,13 +325,16 @@ def process_audio(source, title, event_date, tags, category, speakers, loc, mode
             print("File not found")
             return
         if filename.endswith('wav'):
+            initialize()
             abs_path = convert_wav_to_mp3(abs_path=abs_path, filename=filename)
             created_files.append(abs_path)
         if test:
             result = test
         else:
             result = process_mp3(abs_path, model)
-        absolute_path = get_md_file_path(result=result, loc=loc, video=source, title=title, event_date=event_date, tags=tags,
+            result = create_transcript(result)
+        absolute_path = get_md_file_path(result=result, loc=loc, video=source, title=title, event_date=event_date,
+                                         tags=tags,
                                          category=category, speakers=speakers, username=username, local=local,
                                          video_title=filename[:-4], test=test, pr=pr)
 
@@ -394,10 +378,34 @@ def process_videos(source, title, event_date, tags, category, speakers, loc, mod
         print(e)
 
 
+def combine_chapter(chapters, transcript):
+    chapters_pointer = 0
+    transcript_pointer = 0
+    result = ""
+    # chapters index, start time, name
+    # transcript start time, end time, text
+
+    while chapters_pointer < len(chapters) and transcript_pointer < len(transcript):
+        if chapters[chapters_pointer][1] <= transcript[transcript_pointer][0]:
+            result = result + "\n\n## " + chapters[chapters_pointer][2] + "\n\n"
+            chapters_pointer += 1
+        else:
+            result = result + transcript[transcript_pointer][2]
+            transcript_pointer += 1
+
+    while transcript_pointer < len(transcript):
+        result = result + transcript[transcript_pointer][2]
+        transcript_pointer += 1
+
+    with open("result.md", "w") as file:
+        file.write(result)
+
+    return result
+
+
 def process_video(video, title, event_date, tags, category, speakers, loc, model, username, created_files,
                   chapters, test, pr, local=False):
     try:
-        result = ""
         curr_time = str(round(time.time() * 1000))
         if not local:
             if "watch?v=" in video:
@@ -422,49 +430,31 @@ def process_video(video, title, event_date, tags, category, speakers, loc, model
         print()
         print()
 
+        initialize()
         if chapters and not test:
             chapters = read_description("tmp/")
         elif test:
             chapters = read_description("test/testAssets/")
+        convert_video_to_mp3(abs_path[:-4] + '.mp4')
+        result = process_mp3(abs_path[:-4] + ".mp3", model)
+        created_files.append(abs_path[:-4] + ".mp3")
         if chapters and len(chapters) > 0:
             print("Chapters detected")
             write_chapters_file(abs_path[:-4] + '.chapters', chapters)
             created_files.append(abs_path[:-4] + '.chapters')
-            split_mp4(chapters=chapters, download_filename=abs_path, download_name=abs_path[:-4])
-            initialize()
-            for current_index, chapter in enumerate(chapters):
-                print(f"Processing chapter {chapter[2]} {current_index + 1} of {len(chapters)}")
-                temp_filename = f'{abs_path[:-4]} - ({current_index}).mp4'
-                if not test:
-                    file = convert_video_to_mp3(filename=temp_filename)
-                    if file is None:
-                        print("File not found")
-                        return None
-                    temp_res = process_mp3(filename=temp_filename, model=model)
-                    created_files.append(temp_filename[:-4] + ".mp3")
-                else:
-                    temp_res = ""
-                created_files.append(temp_filename)
-
-                if chapter[2].startswith("<Untitled Chapter "):
-                    result = result + "\n\n" + temp_res + "\n\n"
-                else:
-                    result = result + "## " + chapter[2] + "\n\n" + temp_res + "\n\n"
-                print()
+            result = combine_chapter(chapters=chapters, transcript=result)
             if not local:
                 created_files.append(abs_path)
             created_files.append("tmp/" + filename[:-4] + '.chapters')
         else:
             if not test:
-                convert_video_to_mp3(abs_path)
-                created_files.append(abs_path[:-4] + '.mp3')
-                result = process_mp3(abs_path[:-4] + '.mp3', model)
-                created_files.append(abs_path[:-4] + ".mp3")
+                result = create_transcript(result)
             else:
                 result = ""
         if not title:
             title = filename[:-4]
-        absolute_path = get_md_file_path(result=result, loc=loc, video=video, title=title, event_date=event_date, tags=tags,
+        absolute_path = get_md_file_path(result=result, loc=loc, video=video, title=title, event_date=event_date,
+                                         tags=tags,
                                          category=category, speakers=speakers, username=username,
                                          video_title=filename[:-4], local=local, pr=pr, test=test)
         created_files.append("tmp/" + filename[:-4] + '.description')
diff --git a/test/test_cli.py b/test/test_cli.py
index 0b50fe6..ec3fe05 100644
--- a/test/test_cli.py
+++ b/test/test_cli.py
@@ -59,17 +59,6 @@ def test_download_video_file():
     shutil.rmtree('tmp')
 
 
-@pytest.mark.main
-def test_split_video():
-    chapters = application.read_description("test/testAssets/")
-    application.split_mp4(chapters, "test/testAssets/test_video.mp4", "test/testAssets/test_video")
-    is_pass = True
-    for i in range(3):
-        is_pass = is_pass and os.path.isfile("test/testAssets/test_video - (" + str(i) + ").mp4")
-        os.remove("test/testAssets/test_video - (" + str(i) + ").mp4")
-    assert is_pass
-
-
 @pytest.mark.main
 def test_convert_video_to_audio():
     application.convert_video_to_mp3("test/testAssets/test_video.mp4")

From 1e6df96a5a2aa7624ec378ddc7a6c9bebe36a980 Mon Sep 17 00:00:00 2001
From: masterchief164 <63920595+masterchief164@users.noreply.github.com>
Date: Tue, 9 May 2023 02:28:18 +0530
Subject: [PATCH 06/10] feat: added options for diarization and summary using
 deepgram (diarization doesn't work with chapters)

---
 app/application.py | 153 +++++++++++++++++++++++++++++++++++----------
 requirements.txt   |   1 +
 transcriber.py     |  20 ++++--
 3 files changed, 135 insertions(+), 39 deletions(-)

diff --git a/app/application.py b/app/application.py
index 85a14c6..ded61a9 100644
--- a/app/application.py
+++ b/app/application.py
@@ -15,6 +15,8 @@
 import time
 from dotenv import dotenv_values
 import yt_dlp
+from deepgram import Deepgram
+import mimetypes
 
 
 def download_video(url):
@@ -152,7 +154,7 @@ def get_audio_file(url, title):
 
 
 def process_mp3(filename, model):
-    print("Transcribing audio to text...")
+    print("Transcribing audio to text using whisper ...")
     try:
         my_model = whisper.load_model(model)
         result = my_model.transcribe(filename)
@@ -167,6 +169,69 @@ def process_mp3(filename, model):
         return
 
 
+def decimal_to_sexagesimal(dec):
+    sec = int(dec % 60)
+    minu = int((dec // 60) % 60)
+    hrs = int((dec // 60) // 60)
+
+    return f'{hrs}:{minu}:{sec}'
+
+
+def get_deepgram_transcript(deepgram_data, diarize):
+    if diarize:
+        para = ""
+        string = ""
+        curr_speaker = None
+        for word in deepgram_data["results"]["channels"][0]["alternatives"][0]["words"]:
+            if word["speaker"] != curr_speaker:
+                if para != "":
+                    para = para.strip(" ")
+                    string = string + para + "\n\n"
+                para = ""
+                string = string + f'Speaker {word["speaker"]}: {decimal_to_sexagesimal(word["start"])}'
+                curr_speaker = word["speaker"]
+                string = string + '\n\n'
+
+            para = para + " " + word["punctuated_word"]
+        para = para.strip(" ")
+        string = string + para
+        return string
+    else:
+        return deepgram_data["results"]["channels"][0]["alternatives"][0]["transcript"]
+
+
+def get_deepgram_summary(deepgram_data):
+    try:
+        summaries = deepgram_data["results"]["channels"][0]["alternatives"][0]["summaries"]
+        summary = ""
+        for x in summaries:
+            summary = summary + " " + x["summary"]
+        return summary.strip(" ")
+    except Exception as e:
+        print("Error getting summary")
+        print(e)
+
+
+def process_mp3_deepgram(filename, summarize, diarize):
+    print("Transcribing audio to text using deepgram...")
+    try:
+        config = dotenv_values(".env")
+        dg_client = Deepgram(config["DEEPGRAM_API_KEY"])
+
+        with open(filename, "rb") as audio:
+            mimeType = mimetypes.MimeTypes().guess_type(filename)[0]
+            source = {'buffer': audio, 'mimetype': mimeType}
+            response = dg_client.transcription.sync_prerecorded(source, {'punctuate': True, 'speaker_labels': True,
+                                                                         'diarize': diarize, 'smart_formatting': True,
+                                                                         'summarize': summarize})
+            audio.close()
+        return response
+    except Exception as e:
+        print("Error transcribing audio to text")
+        print(e)
+        return
+
+
 def create_transcript(data):
     result = ""
     for x in data:
@@ -189,7 +254,8 @@ def initialize():
         print(e)
 
 
-def write_to_file(result, loc, url, title, date, tags, category, speakers, video_title, username, local, test, pr):
+def write_to_file(result, loc, url, title, date, tags, category, speakers, video_title, username, local, test, pr,
+                  summary):
     try:
         transcribed_text = result
         if title:
@@ -219,6 +285,8 @@ def write_to_file(result, loc, url, title, date, tags, category, speakers, video
             for i in range(len(category)):
                 category[i] = category[i].strip()
             meta_data += f'categories: {category}\n'
+        if summary:
+            meta_data += f'summary: {summary}\n'
 
         file_name = video_title.replace(' ', '-')
         file_name_with_ext = "tmp/" + file_name + '.md'
@@ -244,12 +312,11 @@ def write_to_file(result, loc, url, title, date, tags, category, speakers, video
 
 
 def get_md_file_path(result, loc, video, title, event_date, tags, category, speakers, username, local, video_title,
-                     test,
-                     pr):
+                     test, pr, summary=None):
     try:
         print("writing .md file")
         file_name_with_ext = write_to_file(result, loc, video, title, event_date, tags, category, speakers, video_title,
-                                           username, local, test, pr)
+                                           username, local, test, pr, summary)
         print("wrote .md file")
 
         absolute_path = os.path.abspath(file_name_with_ext)
@@ -301,7 +368,7 @@ def check_source_type(source):
 
 
 def process_audio(source, title, event_date, tags, category, speakers, loc, model, username, local,
-                  created_files, test, pr):
+                  created_files, test, pr, deepgram, summarize, diarize):
     try:
         print("audio file detected")
         curr_time = str(round(time.time() * 1000))
@@ -311,6 +378,7 @@ def process_audio(source, title, event_date, tags, category, speakers, loc, mode
             print("Error: Please supply a title for the audio file")
             return None
         # process audio file
+        summary = None
         if not local:
             filename = get_audio_file(url=source, title=title)
             abs_path = os.path.abspath(path="tmp/" + filename)
@@ -331,12 +399,17 @@ def process_audio(source, title, event_date, tags, category, speakers, loc, mode
         if test:
             result = test
         else:
-            result = process_mp3(abs_path, model)
-            result = create_transcript(result)
+            if deepgram or summarize:
+                deepgram_resp = process_mp3_deepgram(filename=abs_path, summarize=summarize, diarize=diarize)
+                result = get_deepgram_transcript(deepgram_data=deepgram_resp, diarize=diarize)
+                if summarize:
+                    summary = get_deepgram_summary(deepgram_data=deepgram_resp)
+            if not deepgram:
+                result = process_mp3(abs_path, model)
+                result = create_transcript(result)
         absolute_path = get_md_file_path(result=result, loc=loc, video=source, title=title, event_date=event_date,
-                                         tags=tags,
-                                         category=category, speakers=speakers, username=username, local=local,
-                                         video_title=filename[:-4], test=test, pr=pr)
+                                         tags=tags, category=category, speakers=speakers, username=username,
+                                         local=local, video_title=filename[:-4], test=test, pr=pr, summary=summary)
 
         created_files.append(absolute_path)
         if pr:
@@ -350,7 +423,7 @@ def process_audio(source, title, event_date, tags, category, speakers, loc, mode
 
 
 def process_videos(source, title, event_date, tags, category, speakers, loc, model, username, created_files,
-                   chapters, pr):
+                   chapters, pr, deepgram, summarize, diarize):
     try:
         print("Playlist detected")
         if source.startswith("http") or source.startswith("www"):
@@ -369,7 +442,8 @@ def process_videos(source, title, event_date, tags, category, speakers, loc, mod
         for video in videos:
             filename = process_video(video=video, title=title, event_date=event_date, tags=tags, category=category,
                                      speakers=speakers, loc=loc, model=selected_model, username=username,
-                                     pr=pr, created_files=created_files, chapters=chapters, test=False)
+                                     pr=pr, created_files=created_files, chapters=chapters, test=False, diarize=diarize,
+                                     deepgram=deepgram, summarize=summarize)
             if filename is None:
                 return None
         return filename
@@ -404,7 +478,7 @@ def combine_chapter(chapters, transcript):
 
 
 def process_video(video, title, event_date, tags, category, speakers, loc, model, username, created_files,
-                  chapters, test, pr, local=False):
+                  chapters, test, pr, local=False, deepgram=False, summarize=False, diarize=False):
     try:
         curr_time = str(round(time.time() * 1000))
         if not local:
@@ -431,12 +505,21 @@ def process_video(video, title, event_date, tags, category, speakers, loc, model
         print()
 
         initialize()
+        summary = None
         if chapters and not test:
             chapters = read_description("tmp/")
         elif test:
             chapters = read_description("test/testAssets/")
         convert_video_to_mp3(abs_path[:-4] + '.mp4')
-        result = process_mp3(abs_path[:-4] + ".mp3", model)
+        if deepgram or summarize:
+            deepgram_data = process_mp3_deepgram(abs_path[:-4] + ".mp3", summarize=summarize, diarize=diarize)
+            result = get_deepgram_transcript(deepgram_data=deepgram_data, diarize=diarize)
+            if summarize:
+                print("Summarizing")
+                summary = get_deepgram_summary(deepgram_data=deepgram_data)
+                print(summary)
+        if not deepgram:
+            result = process_mp3(abs_path[:-4] + ".mp3", model)
         created_files.append(abs_path[:-4] + ".mp3")
         if chapters and len(chapters) > 0:
             print("Chapters detected")
@@ -447,16 +530,16 @@ def process_video(video, title, event_date, tags, category, speakers, loc, model
                 created_files.append(abs_path)
             created_files.append("tmp/" + filename[:-4] + '.chapters')
         else:
-            if not test:
+            if not test and not deepgram:
                 result = create_transcript(result)
-            else:
+            elif not deepgram:
                 result = ""
         if not title:
             title = filename[:-4]
+        print("Creating markdown file")
         absolute_path = get_md_file_path(result=result, loc=loc, video=video, title=title, event_date=event_date,
-                                         tags=tags,
-                                         category=category, speakers=speakers, username=username,
-                                         video_title=filename[:-4], local=local, pr=pr, test=test)
+                                         tags=tags, summary=summary, category=category, speakers=speakers,
+                                         username=username, video_title=filename[:-4], local=local, pr=pr, test=test)
         created_files.append("tmp/" + filename[:-4] + '.description')
         if not test:
             if pr:
@@ -470,7 +553,8 @@ def process_video(video, title, event_date, tags, category, speakers, loc, model
 
 
 def process_source(source, title, event_date, tags, category, speakers, loc, model, username, source_type,
-                   created_files, chapters, local=False, test=None, pr=False):
+                   created_files, chapters, local=False, test=None, pr=False, deepgram=False, summarize=False,
+                   diarize=False):
     try:
         if not os.path.isdir("tmp"):
             os.mkdir("tmp")
@@ -480,26 +564,29 @@ def process_source(source, title, event_date, tags, category, speakers, loc, mod
 
         if source_type == 'audio':
             filename = process_audio(source=source, title=title, event_date=event_date, tags=tags, category=category,
-                                     speakers=speakers, loc=loc, model=model, username=username,
-                                     local=local, created_files=created_files, test=test, pr=pr)
+                                     speakers=speakers, loc=loc, model=model, username=username, summarize=summarize,
+                                     local=local, created_files=created_files, test=test, pr=pr, deepgram=deepgram,
+                                     diarize=diarize)
         elif source_type == 'audio-local':
             filename = process_audio(source=source, title=title, event_date=event_date, tags=tags, category=category,
-                                     speakers=speakers, loc=loc, model=model, username=username,
-                                     local=True, created_files=created_files, test=test, pr=pr)
+                                     speakers=speakers, loc=loc, model=model, username=username, summarize=summarize,
+                                     local=True, created_files=created_files, test=test, pr=pr, deepgram=deepgram,
+                                     diarize=diarize)
         elif source_type == 'playlist':
             filename = process_videos(source=source, title=title, event_date=event_date, tags=tags, category=category,
-                                      speakers=speakers, loc=loc, model=model, username=username,
-                                      created_files=created_files, chapters=chapters, pr=pr)
+                                      speakers=speakers, loc=loc, model=model, username=username, summarize=summarize,
+                                      created_files=created_files, chapters=chapters, pr=pr, deepgram=deepgram,
+                                      diarize=diarize)
         elif source_type == 'video-local':
-            filename = process_video(video=source, title=title, event_date=event_date,
+            filename = process_video(video=source, title=title, event_date=event_date, summarize=summarize,
                                      tags=tags, category=category, speakers=speakers, loc=loc, model=model,
-                                     username=username, created_files=created_files, local=True,
-                                     chapters=chapters, test=test, pr=pr)
+                                     username=username, created_files=created_files, local=True, diarize=diarize,
+                                     chapters=chapters, test=test, pr=pr, deepgram=deepgram)
         else:
-            filename = process_video(video=source, title=title, event_date=event_date,
+            filename = process_video(video=source, title=title, event_date=event_date, summarize=summarize,
                                      tags=tags, category=category, speakers=speakers, loc=loc, model=model,
-                                     username=username, created_files=created_files, local=local,
-                                     chapters=chapters, test=test, pr=pr)
+                                     username=username, created_files=created_files, local=local, diarize=diarize,
+                                     chapters=chapters, test=test, pr=pr, deepgram=deepgram)
         return filename
     except Exception as e:
         print("Error processing source")
diff --git a/requirements.txt b/requirements.txt
index 089bf23..fb8dafd 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -9,3 +9,4 @@ requests==2.28.2
 setuptools==45.2.0
 static_ffmpeg==2.3
 yt_dlp==2023.3.4
+deepgram-sdk==2.4.0
diff --git a/transcriber.py b/transcriber.py
index 5c068ca..f8aab5f 100644
--- a/transcriber.py
+++ b/transcriber.py
@@ -26,9 +26,9 @@ def print_help(ctx, param, value):
 @click.command()
 @click.argument('source', nargs=1)
 @click.argument('loc', nargs=1)
-@click.option('-m', '--model', type=click.Choice(['tiny', 'tiny.en', 'base', 'base.en', 'small', 'small.en', 'medium', 'medium.en', 'large-v2']), default='tiny.en',
-              help='Options for transcription model'
-              )
+@click.option('-m', '--model', type=click.Choice(
+    ['tiny', 'tiny.en', 'base', 'base.en', 'small', 'small.en', 'medium', 'medium.en', 'large-v2']), default='tiny.en',
+              help='Options for transcription model')
 @click.option('-t', '--title', type=str,
               help="Supply transcribed file title in 'quotes', title is mandatory in case of audio files")
 @click.option('-d', '--date', type=str, help="Supply the event date in format 'yyyy-mm-dd'")
@@ -44,6 +44,10 @@ def print_help(ctx, param, value):
 @click.option('-h', '--help', is_flag=True, callback=print_help, expose_value=False, is_eager=True,
               help="Show the application's help and exit.")
 @click.option('-p', '--PR', is_flag=True, default=False, help="Supply this flag if you want to generate a payload")
+@click.option('-D', '--deepgram', is_flag=True, default=False, help="Supply this flag if you want to use deepgram")
+@click.option('-S', '--summarize', is_flag=True, default=False,
+              help="Supply this flag if you want to summarize the content")
+@click.option('--diarize', is_flag=True, default=False, help="Supply this flag if you want to diarize the content")
 def add(
         source: str,
         loc: str,
@@ -54,7 +58,10 @@ def add(
         speakers: str,
         category: str,
         chapters: bool,
-        pr: bool
+        pr: bool,
+        deepgram: bool,
+        summarize: bool,
+        diarize: bool
 ) -> None:
     """Supply a YouTube video id and directory for transcription. \n
        Note: The https links need to be wrapped in quotes when running the command on zsh
@@ -77,8 +84,9 @@ def add(
             return
         filename = application.process_source(source=source, title=title, event_date=event_date, tags=tags,
                                               category=category, speakers=speakers, loc=loc, model=model,
-                                              username=username, chapters=chapters, pr=pr,
-                                              source_type=source_type, created_files=created_files)
+                                              username=username, chapters=chapters, pr=pr, summarize=summarize,
+                                              source_type=source_type, created_files=created_files, deepgram=deepgram,
+                                              diarize=diarize)
         if filename:
             """ INITIALIZE GIT AND OPEN A PR"""
             print("Transcription complete")

From 8662914150e160fa7ea1809bf16ff5d54d1d39b0 Mon Sep 17 00:00:00 2001
From: masterchief164 <63920595+masterchief164@users.noreply.github.com>
Date: Wed, 10 May 2023 03:00:46 +0530
Subject: [PATCH 07/10] feat: added chapters support to deepgram

---
 app/application.py | 72 ++++++++++++++++++++++++++++++++--------------
 1 file changed, 51 insertions(+), 21 deletions(-)

diff --git a/app/application.py b/app/application.py
index ded61a9..c0327a7 100644
--- a/app/application.py
+++ b/app/application.py
@@ -292,7 +292,7 @@ def write_to_file(result, loc, url, title, date, tags, category, speakers, video
         file_name_with_ext = "tmp/" + file_name + '.md'
 
         if date:
-            meta_data = meta_data + f'date: {date}\n'
+            meta_data += f'date: {date}\n'
 
         meta_data += '---\n'
         if test is not None or pr:
@@ -312,7 +312,7 @@ def write_to_file(result, loc, url, title, date, tags, category, speakers, video
 
 
 def get_md_file_path(result, loc, video, title, event_date, tags, category, speakers, username, local, video_title,
-                     test, pr, summary=None):
+                     test, pr, summary=""):
     try:
         print("writing .md file")
         file_name_with_ext = write_to_file(result, loc, video, title, event_date, tags, category, speakers, video_title,
@@ -379,6 +379,7 @@ def process_audio(source, title, event_date, tags, category, speakers, loc, mode
             return None
         # process audio file
         summary = None
+        result = None
         if not local:
             filename = get_audio_file(url=source, title=title)
             abs_path = os.path.abspath(path="tmp/" + filename)
@@ -452,29 +453,54 @@ def process_videos(source, title, event_date, tags, category, speakers, loc, mod
         print(e)
 
 
+def combine_deepgram_with_chapters(deepgram_data, chapters):
+    try:
+        chapters_pointer = 0
+        words_pointer = 0
+        result = ""
+        words = deepgram_data["results"]["channels"][0]["alternatives"][0]["words"]
+        # chapters index, start time, name
+        # transcript start time, end time, text
+        while chapters_pointer < len(chapters) and words_pointer < len(words):
+            if chapters[chapters_pointer][1] <= words[words_pointer]["end"]:
+                result = result + "\n\n## " + chapters[chapters_pointer][2] + "\n\n"
+                chapters_pointer += 1
+            else:
+                result = result + words[words_pointer]["punctuated_word"] + " "
+                words_pointer += 1
+        return result
+    except Exception as e:
+        print("Error combining deepgram with chapters")
+        print(e)
+
+
 def combine_chapter(chapters, transcript):
-    chapters_pointer = 0
-    transcript_pointer = 0
-    result = ""
-    # chapters index, start time, name
-    # transcript start time, end time, text
+    try:
+        chapters_pointer = 0
+        transcript_pointer = 0
+        result = ""
+        # chapters index, start time, name
+        # transcript start time, end time, text
+
+        while chapters_pointer < len(chapters) and transcript_pointer < len(transcript):
+            if chapters[chapters_pointer][1] <= transcript[transcript_pointer][0]:
+                result = result + "\n\n## " + chapters[chapters_pointer][2] + "\n\n"
+                chapters_pointer += 1
+            else:
+                result = result + transcript[transcript_pointer][2]
+                transcript_pointer += 1
 
-    while chapters_pointer < len(chapters) and transcript_pointer < len(transcript):
-        if chapters[chapters_pointer][1] <= transcript[transcript_pointer][0]:
-            result = result + "\n\n## " + chapters[chapters_pointer][2] + "\n\n"
-            chapters_pointer += 1
-        else:
+        while transcript_pointer < len(transcript):
             result = result + transcript[transcript_pointer][2]
             transcript_pointer += 1
 
-    while transcript_pointer < len(transcript):
-        result = result + transcript[transcript_pointer][2]
-        transcript_pointer += 1
+        with open("result.md", "w") as file:
+            file.write(result)
 
-    with open("result.md", "w") as file:
-        file.write(result)
-
-    return result
+        return result
+    except Exception as e:
+        print("Error combining chapters")
+        print(e)
 
 
 def process_video(video, title, event_date, tags, category, speakers, loc, model, username, created_files,
@@ -506,6 +532,8 @@ def process_video(video, title, event_date, tags, category, speakers, loc, model
 
         initialize()
         summary = None
+        result = ""
+        deepgram_data = None
         if chapters and not test:
             chapters = read_description("tmp/")
         elif test:
@@ -517,7 +545,6 @@ def process_video(video, title, event_date, tags, category, speakers, loc, model
             if summarize:
                 print("Summarizing")
                 summary = get_deepgram_summary(deepgram_data=deepgram_data)
-                print(summary)
         if not deepgram:
             result = process_mp3(abs_path[:-4] + ".mp3", model)
         created_files.append(abs_path[:-4] + ".mp3")
@@ -525,7 +552,10 @@ def process_video(video, title, event_date, tags, category, speakers, loc, model
             print("Chapters detected")
             write_chapters_file(abs_path[:-4] + '.chapters', chapters)
             created_files.append(abs_path[:-4] + '.chapters')
-            result = combine_chapter(chapters=chapters, transcript=result)
+            if deepgram:
+                result = combine_deepgram_with_chapters(deepgram_data=deepgram_data, chapters=chapters)
+            else:
+                result = combine_chapter(chapters=chapters, transcript=result)
             if not local:
                 created_files.append(abs_path)
             created_files.append("tmp/" + filename[:-4] + '.chapters')

From 3d87efea0e606aa14d0d3d31ee1ace3ac0a1b2cd Mon Sep 17 00:00:00 2001
From: Jonas <jonas@chaincode.com>
Date: Thu, 11 May 2023 20:48:33 -0400
Subject: [PATCH 08/10] add -M flag for diarize and double-digit timestamps

---
 app/application.py | 2 +-
 transcriber.py     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/app/application.py b/app/application.py
index c0327a7..3d77378 100644
--- a/app/application.py
+++ b/app/application.py
@@ -174,7 +174,7 @@ def decimal_to_sexagesimal(dec):
     minu = int((dec // 60) % 60)
     hrs = int((dec // 60) // 60)
 
-    return f'{hrs}:{minu}:{sec}'
+    return f'{hrs:02d}:{minu:02d}:{sec:02d}'
 
 
 def get_deepgram_transcript(deepgram_data, diarize):
diff --git a/transcriber.py b/transcriber.py
index f8aab5f..4d165c8 100644
--- a/transcriber.py
+++ b/transcriber.py
@@ -47,7 +47,7 @@ def print_help(ctx, param, value):
 @click.option('-D', '--deepgram', is_flag=True, default=False, help="Supply this flag if you want to use deepgram")
 @click.option('-S', '--summarize', is_flag=True, default=False,
               help="Supply this flag if you want to summarize the content")
-@click.option('--diarize', is_flag=True, default=False, help="Supply this flag if you want to diarize the content")
+@click.option('-M', '--diarize', is_flag=True, default=False, help="Supply this flag if you have multiple speakers AKA want to diarize the content")
 def add(
         source: str,
         loc: str,

From 025e51979e420f68d76d0c20b54a22bda4e5d2bc Mon Sep 17 00:00:00 2001
From: Jonas <jonas@chaincode.com>
Date: Thu, 11 May 2023 21:01:29 -0400
Subject: [PATCH 09/10] add transcription after final chapter header

---
 app/application.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/app/application.py b/app/application.py
index 3d77378..942a969 100644
--- a/app/application.py
+++ b/app/application.py
@@ -468,6 +468,15 @@ def combine_deepgram_with_chapters(deepgram_data, chapters):
             else:
                 result = result + words[words_pointer]["punctuated_word"] + " "
                 words_pointer += 1
+
+        # Append the final chapter heading and remaining content
+        while chapters_pointer < len(chapters):
+            result = result + "\n\n## " + chapters[chapters_pointer][2] + "\n\n"
+            chapters_pointer += 1
+        while words_pointer < len(words):
+            result = result + words[words_pointer]["punctuated_word"] + " "
+            words_pointer += 1
+
         return result
     except Exception as e:
         print("Error combining deepgram with chapters")

From 5d96b2e191ec87eae016cb9eb16747d02c29905f Mon Sep 17 00:00:00 2001
From: masterchief164 <63920595+masterchief164@users.noreply.github.com>
Date: Fri, 12 May 2023 16:17:19 +0530
Subject: [PATCH 10/10] feat: added chapters support to deepgram with
 diarization

---
 app/application.py | 113 +++++++++++++++++++++++++++++++++------------
 transcriber.py     |   3 +-
 2 files changed, 85 insertions(+), 31 deletions(-)

diff --git a/app/application.py b/app/application.py
index 942a969..5f1c6c3 100644
--- a/app/application.py
+++ b/app/application.py
@@ -177,6 +177,85 @@ def decimal_to_sexagesimal(dec):
     return f'{hrs:02d}:{minu:02d}:{sec:02d}'
 
 
+def combine_chapter(chapters, transcript):
+    try:
+        chapters_pointer = 0
+        transcript_pointer = 0
+        result = ""
+        # chapters index, start time, name
+        # transcript start time, end time, text
+
+        while chapters_pointer < len(chapters) and transcript_pointer < len(transcript):
+            if chapters[chapters_pointer][1] <= transcript[transcript_pointer][0]:
+                result = result + "\n\n## " + chapters[chapters_pointer][2] + "\n\n"
+                chapters_pointer += 1
+            else:
+                result = result + transcript[transcript_pointer][2]
+                transcript_pointer += 1
+
+        while transcript_pointer < len(transcript):
+            result = result + transcript[transcript_pointer][2]
+            transcript_pointer += 1
+
+        with open("result.md", "w") as file:
+            file.write(result)
+
+        return result
+    except Exception as e:
+        print("Error combining chapters")
+        print(e)
+
+
+def combine_deepgram_chapters_with_diarization(deepgram_data, chapters):
+    try:
+        para = ""
+        string = ""
+        curr_speaker = None
+        words = deepgram_data["results"]["channels"][0]["alternatives"][0]["words"]
+        words_pointer = 0
+        chapters_pointer = 0
+        while chapters_pointer < len(chapters) and words_pointer < len(words):
+            if chapters[chapters_pointer][1] <= words[words_pointer]["start"]:
+                if para != "":
+                    para = para.strip(" ")
+                    string = string + para + "\n\n"
+                para = ""
+                string = string + f'## {chapters[chapters_pointer][2]}\n\n'
+                chapters_pointer += 1
+            else:
+                if words[words_pointer]["speaker"] != curr_speaker:
+                    if para != "":
+                        para = para.strip(" ")
+                        string = string + para + "\n\n"
+                    para = ""
+                    string = string + f'Speaker {words[words_pointer]["speaker"]}:' \
+                                      f' {decimal_to_sexagesimal(words[words_pointer]["start"])}'
+                    curr_speaker = words[words_pointer]["speaker"]
+                    string = string + '\n\n'
+
+                para = para + " " + words[words_pointer]["punctuated_word"]
+                words_pointer += 1
+        while words_pointer < len(words):
+            if words[words_pointer]["speaker"] != curr_speaker:
+                if para != "":
+                    para = para.strip(" ")
+                    string = string + para + "\n\n"
+                para = ""
+                string = string + f'Speaker {words[words_pointer]["speaker"]}:' \
+                                  f' {decimal_to_sexagesimal(words[words_pointer]["start"])}'
+                curr_speaker = words[words_pointer]["speaker"]
+                string = string + '\n\n'
+
+            para = para + " " + words[words_pointer]["punctuated_word"]
+            words_pointer += 1
+        para = para.strip(" ")
+        string = string + para
+        return string
+    except Exception as e:
+        print("Error combining deepgram chapters")
+        print(e)
+
+
 def get_deepgram_transcript(deepgram_data, diarize):
     if diarize:
         para = ""
@@ -483,35 +562,6 @@ def combine_deepgram_with_chapters(deepgram_data, chapters):
         print(e)
 
 
-def combine_chapter(chapters, transcript):
-    try:
-        chapters_pointer = 0
-        transcript_pointer = 0
-        result = ""
-        # chapters index, start time, name
-        # transcript start time, end time, text
-
-        while chapters_pointer < len(chapters) and transcript_pointer < len(transcript):
-            if chapters[chapters_pointer][1] <= transcript[transcript_pointer][0]:
-                result = result + "\n\n## " + chapters[chapters_pointer][2] + "\n\n"
-                chapters_pointer += 1
-            else:
-                result = result + transcript[transcript_pointer][2]
-                transcript_pointer += 1
-
-        while transcript_pointer < len(transcript):
-            result = result + transcript[transcript_pointer][2]
-            transcript_pointer += 1
-
-        with open("result.md", "w") as file:
-            file.write(result)
-
-        return result
-    except Exception as e:
-        print("Error combining chapters")
-        print(e)
-
-
 def process_video(video, title, event_date, tags, category, speakers, loc, model, username, created_files,
                   chapters, test, pr, local=False, deepgram=False, summarize=False, diarize=False):
     try:
@@ -562,7 +612,10 @@ def process_video(video, title, event_date, tags, category, speakers, loc, model
             write_chapters_file(abs_path[:-4] + '.chapters', chapters)
             created_files.append(abs_path[:-4] + '.chapters')
             if deepgram:
-                result = combine_deepgram_with_chapters(deepgram_data=deepgram_data, chapters=chapters)
+                if diarize:
+                    result = combine_deepgram_chapters_with_diarization(deepgram_data=deepgram_data, chapters=chapters)
+                else:
+                    result = combine_deepgram_with_chapters(deepgram_data=deepgram_data, chapters=chapters)
             else:
                 result = combine_chapter(chapters=chapters, transcript=result)
             if not local:
diff --git a/transcriber.py b/transcriber.py
index 4d165c8..1b037d5 100644
--- a/transcriber.py
+++ b/transcriber.py
@@ -47,7 +47,8 @@ def print_help(ctx, param, value):
 @click.option('-D', '--deepgram', is_flag=True, default=False, help="Supply this flag if you want to use deepgram")
 @click.option('-S', '--summarize', is_flag=True, default=False,
               help="Supply this flag if you want to summarize the content")
-@click.option('-M', '--diarize', is_flag=True, default=False, help="Supply this flag if you have multiple speakers AKA want to diarize the content")
+@click.option('-M', '--diarize', is_flag=True, default=False, help="Supply this flag if you have multiple speakers AKA "
+                                                                   "want to diarize the content")
 def add(
         source: str,
         loc: str,