Vaibhavs10 · chenxwh · Nov 13, 2023 · Nov 27, 2023 · Nov 28, 2023 · Dec 12, 2023
diff --git a/README.md b/README.md
@@ -16,6 +16,9 @@ Not convinced? Here are some benchmarks we ran on a free [Google Colab T4 GPU](/
 | Faster Whisper (`fp16` + `beam_size [1]`) | ~9.23 (*9 min 23 sec*) |
 | Faster Whisper (`8-bit` + `beam_size [1]`) | ~8 (*8 min 15 sec*) |
 
+Try the Relicate demo here: [![Replicate](https://replicate.com/cjwbw/insanely-fast-whisper/badge)](https://replicate.com/cjwbw/insanely-fast-whisper) 
-Try the Relicate demo here: [![Replicate](https://replicate.com/cjwbw/insanely-fast-whisper/badge)](https://replicate.com/cjwbw/insanely-fast-whisper) 
+Try the Relicate demo here: [![Replicate](https://replicate.com/vaibhavs10/incredibly-fast-whisper/badge)](https://replicate.com/vaibhavs10/incredibly-fast-whisper) 
-Try the Relicate demo here: [![Replicate](https://replicate.com/cjwbw/insanely-fast-whisper/badge)](https://replicate.com/cjwbw/insanely-fast-whisper) 
+Try the Relicate demo here: [![Replicate](https://replicate.com/vaibhavs10/incredibly-fast-whisper/badge)](https://replicate.com/vaibhavs10/incredibly-fast-whisper) 
+
+
 ## 🆕 Blazingly fast transcriptions via your terminal! ⚡️
 
 We've added a CLI to enable fast transcriptions. Here's how you can use it:

diff --git a/cog.yaml b/cog.yaml
@@ -0,0 +1,22 @@
+# Configuration for Cog ⚙️
+# Reference: https://github.com/replicate/cog/blob/main/docs/yaml.md
+
+build:
+ gpu: true
+ system_packages:
+ - "ffmpeg"
+ - "ninja-build"
+ python_version: "3.11"
+ python_packages:
+ - "torch==2.0.1"
+ - "tqdm==4.66.1"
+ - "more-itertools==10.1.0"
+ - "transformers==4.35.0"
+ - "ffmpeg-python==0.2.0"
+ - "openai-whisper==20231106"
+ - "optimum==1.14.0"
+ run: 
+ - pip install packaging
+ - pip install flash-attn --no-build-isolation
+
+predict: "predict.py:Predictor"
diff --git a/predict.py b/predict.py
@@ -0,0 +1,74 @@
+from typing import Any
+import torch
+from transformers import (
+ WhisperFeatureExtractor,
+ WhisperTokenizerFast,
+ WhisperForConditionalGeneration,
+ pipeline,
+)
+from cog import BasePredictor, Input, Path
+
+
+class Predictor(BasePredictor):
+ def setup(self):
+ """Loads whisper models into memory to make running multiple predictions efficient"""
+ model_cache = "model_cache"
+ local_files_only = True # set to true after the model is cached to model_cache
+ model_id = "openai/whisper-large-v3"
+ torch_dtype = torch.float16
+ device = "cuda:0"
+ model = WhisperForConditionalGeneration.from_pretrained(
+ model_id,
+ torch_dtype=torch_dtype,
+ cache_dir=model_cache,
+ local_files_only=local_files_only,
+ ).to(device)
+
+ tokenizer = WhisperTokenizerFast.from_pretrained(
+ model_id, cache_dir=model_cache, local_files_only=local_files_only
+ )
+ feature_extractor = WhisperFeatureExtractor.from_pretrained(
+ model_id, cache_dir=model_cache, local_files_only=local_files_only
+ )
+
+ self.pipe = pipeline(
+ "automatic-speech-recognition",
+ model=model,
+ tokenizer=tokenizer,
+ feature_extractor=feature_extractor,
+ model_kwargs={"use_flash_attention_2": True},
+ torch_dtype=torch_dtype,
+ device=device,
+ )
+
+ def predict(
+ self,
+ audio: Path = Input(description="Audio file"),
+ task: str = Input(
+ choices=["transcribe", "translate"],
+ default="transcribe",
+ description="Task to perform: transcribe or translate to another language. (default: transcribe).",
+ ),
+ language: str = Input(
+ default=None,
+ description="Optional. Language spoken in the audio, specify None to perform language detection.",
+ ),
+ batch_size: int = Input(
+ default=24,
+ description="Number of parallel batches you want to compute. Reduce if you face OOMs. (default: 24).",
+ ),
+ return_timestamps: bool = Input(
+ default=True,
+ description="Return timestamps information when set to True.",
+ ),
+ ) -> Any:
+ """Transcribes and optionally translates a single audio file"""
+
+ outputs = self.pipe(
+ str(audio),
+ chunk_length_s=30,
+ batch_size=batch_size,
+ generate_kwargs={"task": task, "language": language},
+ return_timestamps=return_timestamps,
+ )
+ return outputs