From 7159d95b9f179512807681a174f3e1ea3136fd70 Mon Sep 17 00:00:00 2001 From: kouloumos Date: Sat, 4 Nov 2023 17:38:39 +0200 Subject: [PATCH] configure logging in separate module - move logging configuration in a separate module - alongside console logging, always log to a file in the workdir --- app/application.py | 15 ++++++++------- app/logging.py | 28 ++++++++++++++++++++++++++++ app/transcript.py | 5 +++-- app/transcription.py | 3 ++- transcriber.py | 21 +++------------------ 5 files changed, 44 insertions(+), 28 deletions(-) create mode 100644 app/logging.py diff --git a/app/application.py b/app/application.py index b0c2365..1362930 100644 --- a/app/application.py +++ b/app/application.py @@ -26,6 +26,9 @@ from app import __app_name__, __version__ from app.utils import write_to_json +from app.logging import get_logger + +logger = get_logger() def convert_wav_to_mp3(abs_path, filename, working_dir="tmp/"): @@ -85,7 +88,7 @@ def combine_chapter(chapters, transcript, working_dir="tmp/"): def combine_deepgram_chapters_with_diarization(deepgram_data, chapters): - logger = logging.getLogger(__app_name__) + logger.info("(deepgram) Combining transcript with detected chapters...") try: para = "" string = "" @@ -146,7 +149,6 @@ def get_deepgram_transcript(deepgram_data, diarize, title, upload, model_output_ logger = logging.getLogger(__app_name__) def save_local_json(json_data, title, model_output_dir): - logger.info(f"Saving Locally...") time_in_str = datetime.now().strftime("%Y-%m-%d-%H-%M-%S") if not os.path.isdir(model_output_dir): os.makedirs(model_output_dir) @@ -155,7 +157,7 @@ def save_local_json(json_data, title, model_output_dir): ) with open(file_path, "w") as json_file: json.dump(json_data, json_file, indent=4) - logger.info(f"Model stored at path {file_path}") + logger.info(f"(deepgram) Model stored at: {file_path}") return file_path try: data_path = write_to_json( @@ -164,6 +166,7 @@ def save_local_json(json_data, title, model_output_dir): if upload: upload_file_to_s3(data_path) if diarize: + logger.info(f"(deepgram) Processing diarization...") para = "" string = "" curr_speaker = None @@ -258,7 +261,7 @@ def create_pr(absolute_path, loc, username, curr_time, title): def combine_deepgram_with_chapters(deepgram_data, chapters): - logger = logging.getLogger(__app_name__) + logger.info("(deepgram) Combining transcript with detected chapters...") try: chapters_pointer = 0 words_pointer = 0 @@ -301,15 +304,13 @@ def clean_up(tmp_dir): def generate_srt(data, filename, model_output_dir): - logger = logging.getLogger(__app_name__) - logger.info("Saving Locally...") time_in_str = datetime.now().strftime("%Y-%m-%d-%H-%M-%S") if not os.path.isdir(model_output_dir): os.makedirs(model_output_dir) output_file = os.path.join( model_output_dir, filename + "_" + time_in_str + ".srt" ) - logger.debug(f"Writing srt to {output_file}") + logger.info(f"Writing srt to {output_file}...") with open(output_file, "w") as f: for index, segment in enumerate(data): start_time, end_time, text = segment diff --git a/app/logging.py b/app/logging.py new file mode 100644 index 0000000..c4cb077 --- /dev/null +++ b/app/logging.py @@ -0,0 +1,28 @@ +import logging +from pathlib import Path +import sys + +from app import __app_name__ + + +def configure_logger(log_level, working_dir=None): + logger = get_logger() + sh = logging.StreamHandler() + sh_log_fmt = '%(asctime)s [%(levelname)s] %(message)s' + sh.setLevel(log_level) + sh.setFormatter(logging.Formatter(sh_log_fmt)) + + # Always log debug out to a file in the workdir + if working_dir is not None: + filehandler = logging.FileHandler(Path(working_dir) / "tstbtc.log") + filehandler.setLevel(logging.DEBUG) + file_log_fmt = '%(asctime)s %(name)s [%(levelname)s] %(message)s' + filehandler.setFormatter(logging.Formatter(file_log_fmt)) + logger.addHandler(filehandler) + + logger.addHandler(sh) + logger.setLevel(logging.DEBUG) + + +def get_logger(): + return logging.getLogger(__app_name__) diff --git a/app/transcript.py b/app/transcript.py index 92130d1..3cd8cd0 100644 --- a/app/transcript.py +++ b/app/transcript.py @@ -15,6 +15,7 @@ from moviepy.editor import VideoFileClip from app import __app_name__, __version__, application +from app.logging import get_logger from app.utils import slugify logger = get_logger() @@ -24,7 +25,7 @@ class Transcript: def __init__(self, source, test_mode=False): self.source = source self.test_mode = test_mode - self.logger = logging.getLogger(__app_name__) + self.logger = get_logger() def create_transcript(self): result = "" @@ -202,7 +203,7 @@ def save_source(self, source_file, local, title, date, tags, category, speakers, self.tags = tags self.category = category self.speakers = speakers - self.logger = logging.getLogger(__app_name__) + self.logger = get_logger() self.preprocess = preprocess def __config_event_date(self, date): diff --git a/app/transcription.py b/app/transcription.py index 6ac2137..2d0dcba 100644 --- a/app/transcription.py +++ b/app/transcription.py @@ -15,6 +15,7 @@ from app.transcript import Transcript, Source, Audio, Video, Playlist from app import __app_name__, __version__, application from app.utils import write_to_json +from app.logging import get_logger class Transcription: @@ -37,7 +38,7 @@ def __init__(self, loc="test/test", model="tiny", chapters=False, pr=False, summ # during testing we need to create the markdown for validation purposes self.markdown = markdown or test_mode self.test_mode = test_mode - self.logger = logging.getLogger(__app_name__) + self.logger = get_logger() self.tmp_dir = working_dir if working_dir is not None else tempfile.mkdtemp() self.logger.info(f"Temp directory: {self.tmp_dir}") diff --git a/transcriber.py b/transcriber.py index d67ab01..500397f 100644 --- a/transcriber.py +++ b/transcriber.py @@ -6,19 +6,9 @@ from app import __app_name__, __version__, application from app.transcript import Transcript from app.transcription import Transcription +from app.logging import configure_logger, get_logger - -def setup_logger(): - logger = logging.getLogger(__app_name__) - console_handler = logging.StreamHandler() - console_handler.setLevel( - logging.DEBUG - ) # Set the desired log level for console output in the submodule - formatter = logging.Formatter( - "%(asctime)s - %(name)s - %(levelname)s - %(message)s" - ) - console_handler.setFormatter(formatter) - logger.addHandler(console_handler) +logger = get_logger() @click.group() @@ -207,13 +197,8 @@ def add( Note: The https links need to be wrapped in quotes when running the command on zsh """ - setup_logger() - logger = logging.getLogger(__app_name__) - if verbose: - logger.setLevel(logging.DEBUG) - else: - logger.setLevel(logging.WARNING) tmp_dir = tempfile.mkdtemp() + configure_logger(logging.DEBUG if verbose else logging.INFO, tmp_dir) logger.info( "This tool will convert Youtube videos to mp3 files and then "