+# Changelog
+## 0.1.0 (unreleased)
+- Initial release
+Convert srt files to tables in docx
+This assumes you have a bash environment.
+### Installation
+- Run `make setup`, to initialise environment and create the vitual python environment
+- Source the init script `source source-me`, to activate the python environment
+### Running
+- Change settings in `srt2docx_settings.yaml` to your preferences
+- Go to directory with input files
+- execute the script `srt2docx` with the full path, or if you source the `source-me` file it will get added to your path and you can just use `srt2docx` without the path.
+### Watermarks
+If you want to use or change the watermark, put the file in the assets directory, and put the file name in the yaml.
+The script will look in the assets directory for all images.
+After a sucessful run you should have docx files that share the same base name with the input files except with a `.docx` extension. Errors should be reported to std out.
+>Any questions or comments [Scott Dillman](mailto:scott@bitwise.ninja)
+Licensed under MIT.
\ No newline at end of file
+dynamic = ["version"]
+name = "srt2docx"
+authors = [{ name = "Scott Dillman", email = "scott@bitwise.ninja" }]
+maintainers = [{ name = "Scott Dillman", email = "scott@bitwise.ninja" }]
+description = "Script to convert SRT files to DOCX files with tables"
+readme = { file = "README.md", content-type = "text/markdown" }
+license = { file = "LICENSE.md" }
+keywords = ["docx", "srt"]
+classifiers = [
+ # How mature is this project? Common values are
+ # 3 - Alpha
+ # 4 - Beta
+ # 5 - Production/Stable
+ "Development Status :: 4 - Beta",
+ # Indicate who your project is intended for
+ "Intended Audience :: Course Designers",
+ "Topic :: Content Creation :: Automation Tools",
+ # Pick your license as you wish (see also "license" above)
+ "License :: OSI Approved :: MIT License",
+ # Specify the Python versions you support here.
+ "Programming Language :: Python :: 3",
+dependencies = [
+ "certifi==2024.2.2",
+ "charset-normalizer==3.3.2",
+ "docx==0.2.4",
+ "idna==3.6",
+ "loguru==0.7.2",
+ "lxml==5.2.2",
+ "munch==4.0.0",
+ "pillow==10.3.0",
+ "python-docx==1.1.2",
+ "pytz==2024.1",
+ "PyYAML==6.0.1",
+ "requests==2.31.0",
+ "ruff==0.3.2",
+ "srt==3.5.3",
+ "typing_extensions==4.12.2",
+ "urllib3==2.2.1",
+requires-python = ">=3.8"
+Homepage = "https://dreamcyclestudios.com"
+Documentation = "https://readthedocs.org"
+Repository = "https://github.com/me/spam.git"
+Issues = "https://github.com/me/spam/issues"
+Changelog = "https://github.com/me/spam/blob/master/CHANGELOG.md"
+#!/usr/bin/env bash
+idempotent_path_prepend ()
+ PATH=${PATH//":$1"/} #delete any instances in the middle or at the end
+ PATH=${PATH//"$1:"/} #delete any instances at the beginning
+ export PATH="$1:$PATH" #prepend to beginning
+currentDir=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+idempotent_path_prepend "${PWD}"
+source "${currentDir}/pyenv/bin/activate"
+idempotent_path_prepend "${PWD}"
\ No newline at end of file
+#!/usr/bin/env bash
+currentDir=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+source "${currentDir}/source-me"
+python "${currentDir}/srt2docx.py" $@
\ No newline at end of file
+# -*- coding: utf-8 -*-
+"""Script to convert SRT files to DOCX files with tables
+Example of end state:
+- https://docs.google.com/document/d/1MqnG5MWRbjZBke9ja5CN-rJGijJnWBrRJa6lLOxMvRQ/edit
+SRT format docs
+- https://en.wikipedia.org/wiki/SubRip
+DOCX docs
+- https://python-docx.readthedocs.io/en/latest/
+ $ python srt2docx_funcs.py
+ * Option to go directly to google docs
+ * Option to combine all files into one docx
+ * add recursion option glob.glob('**/*.txt', recursive=True)
+This script reads settings from a srt2docx_settings.yaml file in the same directory as the
+script file. Edit this file to personalize the settings. This only has to be done once.
+Generally there is a make file distributed with this script and can be executed as
+ % make setup
+otherwise run the following to initialize the environment:
+ % python -m venv pyenv
+ % source ./pyenv/bin/activate
+ % pip install -r requirements.txt
+Then activate the environment:
+ % activate env with: source ./pyenv/bin/activate
+Then in a directory containing the files to be processed run:
+$ python srt2docx.py
+Output files will have the same name as input files but with the .docx extension.
+@Author: Scott Dillman
+@Date: 2024-06-25 22:47
+import srt2docx_funcs
+import argparse
+from loguru import logger
+import inspect
+import os
+## argument parser
+parser = argparse.ArgumentParser(
+ prog="srt2docx",
+ epilog="Please contact scott@bitwise.ninja with problems/issues",
+ description="Convert srt files to tables in docx",
+## update me on major changes
+__version__ = "0.1.0"
+__contact__ = "scott@bitwise.ninja"
+__web__ = "https://dreamcyclesetudios.com"
+## main entry point
+def main():
+ parser.add_argument(
+ "-v",
+ "--version",
+ action="version",
+ version="%(prog)s " + __version__ + " | {} | {}".format(__contact__, __web__),
+ )
+ args = parser.parse_args()
+ ## do any init we need
+ logger.info("Script started: [{}]".format(os.path.realpath(inspect.stack()[0][1])))
+ v = srt2docx_funcs.init(__version__)
+if __name__ == "__main__":
+ main()
+# -*- coding: utf-8 -*-
+"""Script to convert SRT files to DOCX files with tables
+Example of end state:
+- https://docs.google.com/document/d/1MqnG5MWRbjZBke9ja5CN-rJGijJnWBrRJa6lLOxMvRQ/edit
+SRT format docs
+- https://en.wikipedia.org/wiki/SubRip
+DOCX docs
+- https://python-docx.readthedocs.io/en/latest/
+ $ python srt2docx_funcs.py
+@Author: Scott Dillman
+@Date: 2024-06-25 22:47
+import uuid
+import sys
+from munch import munchify
+import yaml
+from loguru import logger
+from pathlib import Path
+from datetime import datetime
+import srt
+import pytz
+from docx import Document
+from docx.shared import Inches
+from docx.enum.text import WD_ALIGN_PARAGRAPH
+import inspect
+import os
+def readFiles(values) -> list:
+ """read files from cwd"""
+ logger.info("Glob in effect is: [{}]".format(values.settings.filetypes.glob))
+ files = list(Path().glob(values.settings.filetypes.glob))
+ return files
+def buildDocument(values, subs, title, version):
+ """build the docx document"""
+ document = Document()
+ ## add base file name as heading
+ document.add_heading(title, 0)
+ ## add SRT data in table
+ table = document.add_table(rows=len(subs), cols=values.settings.table.cols)
+ ## create table header
+ hdr_cells = table.rows[0].cells
+ count = 0
+ for h in values.settings.table.headers:
+ hdr_cells[count].text = h
+ count = count + 1
+ ## iterate over the subs and add data to table
+ ## TODO: format time
+ for item in subs:
+ row_cells = table.add_row().cells
+ row_cells[0].text = "{}".format(item.start)
+ row_cells[1].text = "{}".format(item.end)
+ row_cells[2].text = "{}".format(item.end - item.start)
+ row_cells[3].text = "{}".format(item.content)
+ ## set margins
+ document.sections[0].left_margin = Inches(values.settings.layout.margin_left)
+ document.sections[0].right_margin = Inches(values.settings.layout.margin_right)
+ ## add core properties
+ cprops = document.core_properties
+ ## add basic document properties from the config yaml
+ cprops.author = values.settings.meta.author
+ cprops.category = values.settings.meta.category
+ cprops.comments = values.settings.meta.comments
+ cprops.content_status = values.settings.meta.content_status
+ cprops.keywords = values.settings.meta.keywords
+ cprops.language = values.settings.meta.language
+ cprops.subject = values.settings.meta.subject
+ cprops.version = values.settings.meta.version
+ cprops.last_modified_by = values.settings.meta.author
+ ## UTC is expected, localization happens on the client
+ tz = pytz.timezone("UTC")
+ cprops.created = datetime.now(tz)
+ cprops.modified = datetime.now(tz)
+ ## assign a UUID to make this document unique and to tag it with eh str2docx version
+ ## we put the versioon here in case we need to debug a broken file
+ cprops.identifier = "v{}-{}".format(version, str(uuid.uuid4()))
+ logger.info("Document unique id: [{}]", cprops.identifier)
+ ## let's add a fun footer
+ if values.settings.footer.show:
+ section = document.sections[0]
+ section.footer_distance = Inches(0.2)
+ footer = section.footer
+ footer_para = footer.paragraphs[0]
+ logo_run = footer_para.add_run()
+ ## if you want other watermarks put them in the assets directory
+ ## because that is where we look for this file
+ p = os.path.join(os.path.dirname(os.path.realpath(inspect.stack()[0][1])),"assets")
+ logo_run.add_picture(
+ os.path.join(p,values.settings.footer.watermark),
+ width=Inches(values.settings.footer.width_in),
+ )
+ footer_para.alignment = WD_ALIGN_PARAGRAPH.RIGHT
+ ## save the doc out using the base name from read file
+ logger.info("Saving: [{}.docx]".format(title))
+ document.save("{}.docx".format(title))
+def processFiles(values, files: list, version: str):
+ """transform files"""
+ ## iterate over file list
+ for i in files:
+ logger.info("Processing: [{}]".format(i))
+ subs = []
+ ## open the file for reading
+ with open(i) as f:
+ ## parse the srt file
+ subs = list(srt.parse(f))
+ ## build doc
+ buildDocument(values, subs, Path(i).stem, version)
+ return subs
+def init(version: str) -> dict:
+ """Put any setup that needs to be done here"""
+ ## so this beast gets the path to the settings file as an absolute path
+ ## to the running script
+ p=os.path.join(os.path.dirname(os.path.realpath(inspect.stack()[0][1])),"srt2docx_settings.yaml")
+ ## load settings
+ values = munchify(yaml.safe_load(open(p)))
+ ## do the work
+ files = readFiles(values)
+ srt_data = processFiles(values, files, version)
+ return values
+## this is the settings file
+## widths are in inches
+ ## what files to search for
+ ## see globbing rules here : https://docs.python.org/3/library/glob.html
+ filetypes:
+ glob: "*.srt"
+ ## table settings
+ table:
+ cols: 4
+ headers: ["start", "end", "delta", "content"]
+ ## margins
+ layout:
+ margin_left: 0.25
+ margin_right: 0.25
+ ## document meta data
+ meta:
+ author: "someone@example.com"
+ category: "video"
+ comments: "This is just an exmaple xyzzy"
+ content_status: "draft"
+ keywords: "example,test,demo,srt"
+ language: "en"
+ subject: "testing"
+ version: "1.0.0"
+ ## footer settings
+ footer:
+ show: true
+ watermark: "flour006.png"
+ width_in: 0.50
\ No newline at end of file