Skip to content

Commit

Permalink
Merge pull request #96 from Technoculture/vad
Browse files Browse the repository at this point in the history
VAD for longer audio transcription with pause detections
  • Loading branch information
sutyum authored Sep 23, 2024
2 parents 9fab122 + 7975477 commit 1a4386c
Show file tree
Hide file tree
Showing 13 changed files with 669 additions and 238 deletions.
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
"@radix-ui/react-toggle": "1.0.3",
"@radix-ui/react-tooltip": "1.0.6",
"@react-hook/media-query": "1.1.1",
"@ricky0123/vad-react": "^0.0.24",
"@sentry/nextjs": "7.61.0",
"@t3-oss/env-core": "0.3.1",
"@t3-oss/env-nextjs": "0.3.1",
Expand Down
82 changes: 82 additions & 0 deletions public/vad/ort-training-wasm-simd-threaded.mjs

Large diffs are not rendered by default.

Binary file added public/vad/ort-training-wasm-simd-threaded.wasm
Binary file not shown.
127 changes: 127 additions & 0 deletions public/vad/ort-wasm-simd-threaded.jsep.mjs

Large diffs are not rendered by default.

Binary file added public/vad/ort-wasm-simd-threaded.jsep.wasm
Binary file not shown.
80 changes: 80 additions & 0 deletions public/vad/ort-wasm-simd-threaded.mjs

Large diffs are not rendered by default.

Binary file added public/vad/ort-wasm-simd-threaded.wasm
Binary file not shown.
1 change: 1 addition & 0 deletions public/vad/polyfills.js

Large diffs are not rendered by default.

Binary file added public/vad/silero_vad.onnx
Binary file not shown.
1 change: 1 addition & 0 deletions public/vad/vad.worklet.bundle.min.js

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

135 changes: 135 additions & 0 deletions src/components/VadAudio.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
"use client";

import { useState, useRef, useCallback, useEffect } from "react";
import { useMicVAD, utils } from "@ricky0123/vad-react";
import { Microphone, StopCircle } from "@phosphor-icons/react";
import { Button } from "@/components/button";
import { cn } from "@/lib/utils";

interface VadAudioProps {
onAudioCapture: (audioFile: File) => void;
onStartListening: () => void;
onStopListening: () => void;
isHome?: boolean;
}

export default function VadAudio({
onAudioCapture,
onStartListening,
onStopListening,
isHome = false,
}: VadAudioProps) {
const [isListening, setIsListening] = useState(false);
const [duration, setDuration] = useState("00:00");
const audioChunks = useRef<Blob[]>([]);
const timerRef = useRef<NodeJS.Timeout | null>(null);
const startTimeRef = useRef<number | null>(null);

const vad = useMicVAD({
onSpeechEnd: (audio: Float32Array) => {
const wavBuffer = utils.encodeWAV(audio);
const audioBlob = new Blob([wavBuffer], { type: "audio/wav" });
const audioFile = new File([audioBlob], "audio.wav", {
type: "audio/wav",
});
console.log("audioFile", audioFile);

onAudioCapture(audioFile);
},
onSpeechStart: () => {
console.log("onSpeechStart");
},
workletURL: "/vad/vad.worklet.bundle.min.js",
modelURL: "/vad/silero_vad.onnx",
ortConfig: (ort) => {
ort.env.wasm.wasmPaths = "/vad/";
},
startOnLoad: false,
submitUserSpeechOnPause: true,
});

const handleStartListening = useCallback(() => {
vad.start();
startTimer();
onStartListening();
setIsListening(true);
audioChunks.current = [];
}, [vad]);

const handleStopListening = useCallback(() => {
setIsListening(false);
onStopListening();
vad.pause();
resetDuration();
clearTimer();
}, [vad]);

const startTimer = () => {
startTimeRef.current = Date.now();
timerRef.current = setInterval(() => {
if (startTimeRef.current) {
const elapsed = Date.now() - startTimeRef.current;
console.log("elapsed", elapsed);
const minutes = Math.floor(elapsed / 60000);
const seconds = Math.floor((elapsed % 60000) / 1000);
setDuration(
`${String(minutes).padStart(2, "0")}:${String(seconds).padStart(
2,
"0",
)}`,
);
}
}, 1000);
};

const resetDuration = () => {
setDuration("00:00");
clearTimer();
};

const clearTimer = () => {
if (timerRef.current) {
clearInterval(timerRef.current);
timerRef.current = null;
}
startTimeRef.current = null;
};

useEffect(() => {
return () => {
clearTimer();
};
}, []);

return (
<div
className={cn(
"flex items-center gap-2",
!isHome ? "flex-col-reverse sm:flex-row" : "",
)}
>
<Button
onClick={vad.listening ? handleStopListening : handleStartListening}
size="icon"
variant={vad.listening ? "destructive" : "secondary"}
type="button"
className="disabled:text-muted"
>
{vad.listening ? (
<StopCircle
className="h-4 w-4 fill-current"
color="#618a9e"
weight="bold"
/>
) : (
<Microphone
className="h-4 w-4 fill-current"
color="#618a9e"
weight="bold"
/>
)}
</Button>
{isListening ? <span>{duration}</span> : null}
</div>
);
}
Loading

0 comments on commit 1a4386c

Please sign in to comment.