Skip to content

Commit 8d38bf6

Browse files
authored
[scripts] Add a script for adding timestamps to a video (#63)
1 parent 3723d31 commit 8d38bf6

File tree

2 files changed

+1428
-0
lines changed

2 files changed

+1428
-0
lines changed

scripts/add_timestamps.py

Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
#!/usr/bin/env -S uv run --script
2+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3+
# SPDX-License-Identifier: Apache-2.0
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
# https://docs.astral.sh/uv/guides/scripts/#using-a-shebang-to-create-an-executable-file
18+
# /// script
19+
# requires-python = ">=3.10"
20+
# dependencies = [
21+
# "cosmos-reason1-utils",
22+
# "imageio_ffmpeg>=0.6.0",
23+
# "torch>=2.7.1",
24+
# "torchcodec>=0.6.0",
25+
# ]
26+
# [tool.uv.sources]
27+
# cosmos-reason1-utils = {path = "../cosmos_reason1_utils", editable = true}
28+
# torch = [
29+
# { index = "pytorch-cu128"},
30+
# ]
31+
# torchvision = [
32+
# { index = "pytorch-cu128"},
33+
# ]
34+
# [[tool.uv.index]]
35+
# name = "pytorch-cu128"
36+
# url = "https://download.pytorch.org/whl/cu128"
37+
# explicit = true
38+
# ///
39+
40+
"""Overlay timestamps at the bottom of a video.
41+
42+
Example:
43+
44+
```shell
45+
./scripts/add_timestamps.py --video assets/sample.mp4 -o outputs/sample_timestamped.mp4
46+
```
47+
"""
48+
# ruff: noqa: E402
49+
50+
from cosmos_reason1_utils.script import init_script
51+
52+
init_script()
53+
54+
import argparse
55+
import os
56+
import pathlib
57+
58+
import imageio_ffmpeg
59+
import torch
60+
import torchcodec
61+
62+
from cosmos_reason1_utils.vision import (
63+
overlay_text_on_tensor,
64+
)
65+
66+
ROOT = pathlib.Path(__file__).parents[1].resolve()
67+
SEPARATOR = "-" * 20
68+
69+
70+
def main():
71+
parser = argparse.ArgumentParser()
72+
parser.add_argument("--video", type=str, help="Video path.", required=True)
73+
parser.add_argument(
74+
"-o",
75+
"--output",
76+
type=str,
77+
help="Output path for the timestamped video.",
78+
required=True,
79+
)
80+
args = parser.parse_args()
81+
82+
video_path = args.video
83+
84+
dir_name = os.path.dirname(args.output)
85+
os.makedirs(dir_name, exist_ok=True)
86+
87+
print(SEPARATOR)
88+
print("Reading video to tensor.")
89+
video_tensor = _read_video(video_path)
90+
91+
print(SEPARATOR)
92+
metadata = _get_metadata(video_path)
93+
print("Adding watermark to each frame.")
94+
video_tensor = overlay_text_on_tensor(video_tensor, fps=metadata["fps"])
95+
96+
print(SEPARATOR)
97+
print(f"Saving frames to {args.output}.")
98+
99+
pix_fmt = metadata["pix_fmt"]
100+
suffix = "(progressive)"
101+
if pix_fmt.endswith(suffix):
102+
pix_fmt = pix_fmt[: -len(suffix)]
103+
_write_video(
104+
video_tensor,
105+
args.output,
106+
fps=metadata["fps"],
107+
pix_fmt_out=pix_fmt,
108+
bitrate=str(metadata["bitrate"]),
109+
)
110+
111+
112+
def _read_video(video_path):
113+
decoder = torchcodec.decoders.VideoDecoder(video_path)
114+
115+
# Pre-allocate output tensor using metadata.
116+
metadata = decoder.metadata
117+
num_frames = metadata.num_frames
118+
height, width = metadata.height, metadata.width
119+
channels = 3
120+
121+
# Preallocate tensor: [T, C, H, W].
122+
video_tensor = torch.empty((num_frames, channels, height, width), dtype=torch.uint8)
123+
124+
for idx, frame in enumerate(decoder):
125+
assert frame.shape == (3, height, width)
126+
video_tensor[idx, ...] = frame
127+
128+
return video_tensor
129+
130+
131+
def _get_metadata(video_path):
132+
# Unfortunately, neither `VideoDecoder.metadata` nor `imageio_ffmpeg` return everything we want.
133+
reader = imageio_ffmpeg.read_frames(video_path)
134+
try:
135+
# The first call yields a metadata dict.
136+
merged_metadata = next(reader)
137+
finally:
138+
reader.close()
139+
140+
metadata = torchcodec.decoders.VideoDecoder(video_path).metadata
141+
merged_metadata["bitrate"] = int(metadata.bit_rate)
142+
143+
return merged_metadata
144+
145+
146+
def _write_video(video_tensor, output_path, **ffmpeg_kwargs):
147+
# Expected channel order is HWC (input is TCHW).
148+
video_tensor = video_tensor.permute(0, 2, 3, 1).contiguous()
149+
# This takes the height added for the timestamps into account.
150+
height, width = video_tensor.shape[1], video_tensor.shape[2]
151+
generator = imageio_ffmpeg.write_frames(
152+
output_path,
153+
size=(width, height),
154+
**ffmpeg_kwargs,
155+
)
156+
try:
157+
# Seed the generator.
158+
generator.send(None)
159+
generator.send(video_tensor.cpu().numpy())
160+
finally:
161+
generator.close()
162+
163+
164+
if __name__ == "__main__":
165+
main()

0 commit comments

Comments
 (0)