-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathWhisperExample.cs
96 lines (81 loc) · 3.34 KB
/
WhisperExample.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
using System;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
using UnityEngine;
namespace Abuksigun.WhisperCpp
{
[ExecuteInEditMode]
public class WhisperExample : MonoBehaviour
{
CancellationTokenSource cts;
WhisperModel model;
// Download model here: https://huggingface.co/TheBloke/speechless-mistral-dolphin-orca-platypus-samantha-7B-GGUF/blob/main/speechless-mistral-dolphin-orca-platypus-samantha-7b.Q4_K_M.gguf
[SerializeField] string modelPath = "Models/ggml-medium.bin"; // Download model from provided URL
[SerializeField] int sampleRate = 16000;
[SerializeField] int lengthSeconds = 1;
[SerializeField] AudioSource audioSource;
[SerializeField] int micDeviceIndex = -1; // Run ListMicrophones to see available devices with indices
[ContextMenu("List Microphones")]
public void ListMicrophones()
{
StringBuilder sb = new StringBuilder();
for (int i = 0; i < Microphone.devices.Length; i++)
{
sb.AppendLine($"{i}: {Microphone.devices[i]}");
}
Debug.Log(sb.ToString());
}
[ContextMenu("Run")]
public async void RunAsync()
{
if (micDeviceIndex < 0 || micDeviceIndex >= Microphone.devices.Length)
{
Debug.LogError($"Invalid mic device index: {micDeviceIndex}");
return;
}
int threadsN = SystemInfo.processorCount;
var whisperParams = new WhisperModel.WhisperParams("en", threadsN);
long startTicks = DateTime.Now.Ticks;
string fullModelPath = Path.Join(Application.streamingAssetsPath, modelPath);
model ??= await WhisperModel.LoadModel(fullModelPath, whisperParams, true);
long loadTime = DateTime.Now.Ticks - startTicks;
if (loadTime > 0)
Debug.Log($"Model loaded in { loadTime / TimeSpan.TicksPerSecond } seconds.");
cts = new CancellationTokenSource();
string micDevice = Microphone.devices[micDeviceIndex];
audioSource.clip = Microphone.Start(micDevice, true, lengthSeconds, sampleRate);
audioSource.Play();
model?.RunAsync(text => Debug.Log(text), cts.Token);
try
{
while (!cts.IsCancellationRequested)
{
await Task.Delay(lengthSeconds * 1000); // Wait for audio clip to fill
Debug.Log(Microphone.GetPosition(micDevice));
float[] audioData = new float[audioSource.clip.samples * audioSource.clip.channels];
audioSource.clip.GetData(audioData, 0);
int deviationCount = audioData.Count(x => Mathf.Abs(x) > 0.1f);
if (deviationCount > 0)
{
Debug.Log($"Voice detected! {deviationCount}");
model.AddPcmf32(audioData);
}
}
}
finally
{
Microphone.End(micDevice);
audioSource.Stop();
}
}
[ContextMenu("Stop")]
public void Stop()
{
cts?.Cancel();
Microphone.End(null);
}
}
}