Skip to content

Commit a0f442e

Browse files
authored
[googlestt] Fix drop bytes (#14649)
* [googlestt] Fix drop bytes * fix unhandled cancelation error when using single utterance mode Signed-off-by: Miguel Álvarez <[email protected]>
1 parent 4f53371 commit a0f442e

File tree

1 file changed

+22
-12
lines changed
  • bundles/org.openhab.voice.googlestt/src/main/java/org/openhab/voice/googlestt/internal

1 file changed

+22
-12
lines changed

bundles/org.openhab.voice.googlestt/src/main/java/org/openhab/voice/googlestt/internal/GoogleSTTService.java

+22-12
Original file line numberDiff line numberDiff line change
@@ -255,14 +255,20 @@ private void streamAudio(ClientStream<StreamingRecognizeRequest> clientStream, A
255255
long startTime = System.currentTimeMillis();
256256
long maxTranscriptionMillis = (config.maxTranscriptionSeconds * 1000L);
257257
long maxSilenceMillis = (config.maxSilenceSeconds * 1000L);
258-
int readBytes = 6400;
259-
while (!aborted.get()) {
260-
byte[] data = new byte[readBytes];
261-
int dataN = audioStream.read(data);
258+
final int bufferSize = 6400;
259+
int numBytesRead;
260+
int remaining = bufferSize;
261+
byte[] audioBuffer = new byte[bufferSize];
262+
while (!aborted.get() && !responseObserver.isDone()) {
263+
numBytesRead = audioStream.read(audioBuffer, bufferSize - remaining, remaining);
262264
if (aborted.get()) {
263265
logger.debug("Stops listening, aborted");
264266
break;
265267
}
268+
if (numBytesRead == -1) {
269+
logger.debug("End of stream");
270+
break;
271+
}
266272
if (isExpiredInterval(maxTranscriptionMillis, startTime)) {
267273
logger.debug("Stops listening, max transcription time reached");
268274
break;
@@ -272,18 +278,17 @@ && isExpiredInterval(maxSilenceMillis, responseObserver.getLastInputTime())) {
272278
logger.debug("Stops listening, max silence time reached");
273279
break;
274280
}
275-
if (dataN != readBytes) {
276-
try {
277-
Thread.sleep(100);
278-
} catch (InterruptedException e) {
279-
}
281+
if (numBytesRead != remaining) {
282+
remaining = remaining - numBytesRead;
280283
continue;
281284
}
285+
remaining = bufferSize;
282286
StreamingRecognizeRequest dataRequest = StreamingRecognizeRequest.newBuilder()
283-
.setAudioContent(ByteString.copyFrom(data)).build();
284-
logger.debug("Sending audio data {}", dataN);
287+
.setAudioContent(ByteString.copyFrom(audioBuffer)).build();
288+
logger.debug("Sending audio data {}", bufferSize);
285289
clientStream.send(dataRequest);
286290
}
291+
audioStream.close();
287292
}
288293

289294
private void sendStreamConfig(ClientStream<StreamingRecognizeRequest> clientStream,
@@ -335,6 +340,7 @@ private static class TranscriptionListener implements ResponseObserver<Streaming
335340
private float confidenceSum = 0;
336341
private int responseCount = 0;
337342
private long lastInputTime = 0;
343+
private boolean done = false;
338344

339345
public TranscriptionListener(STTListener sttListener, GoogleSTTConfiguration config, AtomicBoolean aborted) {
340346
this.sttListener = sttListener;
@@ -374,7 +380,7 @@ public void onResponse(StreamingRecognizeResponse response) {
374380
responseCount++;
375381
// when in single utterance mode we can just get one final result so complete
376382
if (config.singleUtteranceMode) {
377-
onComplete();
383+
done = true;
378384
}
379385
}
380386
});
@@ -411,6 +417,10 @@ public void onError(@Nullable Throwable t) {
411417
}
412418
}
413419

420+
public boolean isDone() {
421+
return done;
422+
}
423+
414424
public long getLastInputTime() {
415425
return lastInputTime;
416426
}

0 commit comments

Comments
 (0)