Skip to content

Commit

Permalink
Merge pull request #8 from ThisIsntTheWay/master
Browse files Browse the repository at this point in the history
Add audio file naming sanity check
  • Loading branch information
kanjieater authored May 17, 2024
2 parents 34e2afa + 3281c58 commit cc329dc
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 10 deletions.
6 changes: 3 additions & 3 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,9 @@ Primarily I'm using this for syncing audiobooks to their book script. So while y

1. `git clone https://github.com/kanjieater/AudiobookTextSync.git`
1. Make sure you run any commands that start with `./` from the project root, eg after you clone you can run `cd ./AudiobookTextSync`
1. Setup the folder. Create a folder to hold a single media file (like an audiobook). Name it whatever you name your media file, eg `Arslan Senki 7`, this is what should go anywhere you see me write `<name>`
1. Setup the folder. Create a folder to hold a single media file (like an audiobook). Name it whatever you name your media file, eg `Arslan Senki 7`, this is what should go anywhere you see me write `<name>`.
1. Get the book script as text from a digital copy. Put the script at: `./<name>/script.txt`. Everything in this file will show up in your subtitles. So it's important you trim out excess (table of contents, character bios that aren't in the audiobook etc)
1. Single media file should be in `./<name>/<name>.m4b`. If you have the split audiobook as m4b,mp3, or mp4's you can run `./merge.sh "<full folder path>"`,
1. Single media file should be in `./<name>/<name>.m4b`. If you have the split audiobook as m4b, mp3, or mp4's you can run `./merge.sh "<full folder path>"`,
eg `./merge.sh "/mnt/d/Editing/Audiobooks/medium霊媒探偵城塚翡翠"`. The split files must be in `./<name>/<name>_merge/`. This will merge your file into a single file so it can be processed.
6. If you have the `script.txt` and either `./<name>/<name>.m4b`, you can now run the GPU intense, time intense, and occasionally CPU intense script part. `python run.py -d "<full folder path>"` eg `python run.py -d "/mnt/d/Editing/Audiobooks/かがみの孤城/"`. This runs each file to get a word level transcript. It then creates a sub format that can be matched to the `script.txt`. Each word level subtitle is merged into a phrase level, and your result should be a `<name>.srt` file that can be watched with `MPV`, showing audio in time with the full book as a subtitle.
7. From there, use a [texthooker](https://github.com/Renji-XD/texthooker-ui) with something like [mpv_websocket](https://github.com/kuroahna/mpv_websocket) and enjoy Immersion Reading.
Expand All @@ -58,7 +58,7 @@ Primarily I'm using this for syncing audiobooks to their book script. So while y
# Single File

You can also run for a single file. Beware if it's over 1GB/19hr you need as much as 8GB of RAM available.
You need your`m4b`, `mp3`, or `mp4` audiobook file to be inside the folder: "<full folder path>", with a `txt` file in the same folder. The `txt` file can be named anything as long as it has a `txt` extension.
You need your audio file to be inside a folder with the **same name as the audiofile**, in addition to a `txt` file in the same folder. The `txt` file can be named anything as long as it has a `txt` extension.
The `-d` parameter can multiple audiobooks to process like: `python run.py -d "/mnt/d/sync/Harry Potter 1/" "/mnt/d/sync/Harry Potter 2 The Spooky Sequel/"`
```bash
/sync/
Expand Down
14 changes: 12 additions & 2 deletions run.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from tqdm.contrib.concurrent import process_map
from tqdm import tqdm
from pprint import pprint
from utils import read_vtt, write_sub, grab_files
from utils import read_vtt, write_sub, grab_files, check_workdir_content
from split_sentences import split_sentences
import align
import traceback
Expand All @@ -19,7 +19,7 @@

def get_model(model_type='large-v2'):
return stable_whisper.load_model(model_type)
# return stable_whisper.load_faster_whisper(model_type)
# return stable_whisper.load_faster_whisper(model_type)

def generate_transcript_from_audio(audio_file, full_timings_path, model, sub_format='ass', **kwargs):
default_args = {
Expand Down Expand Up @@ -315,6 +315,16 @@ def align_transcript(working_folder, content_name):
for working_folder in working_folders:
try:
print(f"Working on {working_folder}")

# Ensure audio file(s) have the same name as the working dir
check_verdict = check_workdir_content(working_folder, SUPPORTED_FORMATS)

if not check_verdict:
expected_file_name_base = path.basename(path.normpath(working_folder))
print("> ERROR: Current working directory does not contain an audio file of the same name.")
print(f" Your audio file must have the same name as this directory, e.g. '{expected_file_name_base}.m4b'")
continue

split_txt(working_folder)
if args.use_stable_ts_align:
align_stable_transcript(working_folder, get_content_name(working_folder))
Expand Down
16 changes: 11 additions & 5 deletions utils.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,37 @@
import re
from natsort import os_sorted
from glob import glob, escape
from os import path
import json

audio_formats = ['aac', 'ac3', 'alac', 'ape', 'flac', 'mp3', 'm4a', 'ogg', 'opus', 'wav', 'm4b']
video_formats = ['3g2', '3gp', 'avi', 'flv', 'm4v', 'mkv', 'mov', 'mp4', 'mpeg', 'webm']
subtitle_formats = ['ass', 'srt', 'vtt']


class Subtitle:
def __init__(self, start, end, line):
self.start = start
self.end = end
self.line = line

def check_workdir_content(workdir, formats):
workdir_stripped = path.basename(path.normpath(workdir))

files = []
for format in formats:
result = glob(f"{workdir.rstrip('/')}/*{workdir_stripped}.{format}")
if len(result) > 0:
files.append(result)

return len(files) > 0

def remove_tags(line):
return re.sub('<[^>]*>', '', line)


def get_lines(file):
for line in file:
yield line.rstrip('\n')


def read_vtt(file):
lines = get_lines(file)

Expand Down Expand Up @@ -68,7 +76,6 @@ def read_vtt(file):

return subs


def write_sub(output_file_path, subs):
with open(output_file_path, "w", encoding='utf-8') as outfile:
outfile.write('WEBVTT\n\n')
Expand All @@ -77,7 +84,6 @@ def write_sub(output_file_path, subs):
outfile.write('%s --> %s\n' % (sub.start, sub.end))
outfile.write('%s\n\n' % (sub.line))


def grab_files(folder, types, sort=True):
files = []
for type in types:
Expand Down

0 comments on commit cc329dc

Please sign in to comment.