diff --git a/.changeset/odd-eels-repair.md b/.changeset/odd-eels-repair.md new file mode 100644 index 0000000..ccf2c37 --- /dev/null +++ b/.changeset/odd-eels-repair.md @@ -0,0 +1,5 @@ +--- +"expo-speech-recognition": patch +--- + +Fixed handling of interim and final results on web diff --git a/src/ExpoSpeechRecognitionModule.web.ts b/src/ExpoSpeechRecognitionModule.web.ts index f3186bc..c266214 100644 --- a/src/ExpoSpeechRecognitionModule.web.ts +++ b/src/ExpoSpeechRecognitionModule.web.ts @@ -4,6 +4,7 @@ import type { ExpoSpeechRecognitionNativeEventMap, ExpoSpeechRecognitionNativeEvents, ExpoSpeechRecognitionOptions, + ExpoSpeechRecognitionResultSegment, } from "./ExpoSpeechRecognitionModule.types"; let _speechRecognitionRef: SpeechRecognition | null = null; @@ -312,22 +313,63 @@ const webToNativeEventMap: { end: (ev) => null, error: (ev) => ({ error: ev.error, message: ev.message }), nomatch: (ev) => null, - result: (ev) => { - const nativeResults: ExpoSpeechRecognitionNativeEventMap["result"]["results"] = - []; - - for (let i = 0; i < ev.results[ev.resultIndex].length; i++) { - const result = ev.results[ev.resultIndex][i]; - nativeResults.push({ - transcript: result.transcript, - confidence: result.confidence, - segments: [], - }); + result: (ev): ExpoSpeechRecognitionNativeEventMap["result"] => { + const isFinal = Boolean(ev.results[ev.resultIndex]?.isFinal); + + if (isFinal) { + const results: ExpoSpeechRecognitionNativeEventMap["result"]["results"] = + []; + + for (let i = 0; i < ev.results[ev.resultIndex].length; i++) { + const result = ev.results[ev.resultIndex][i]; + results.push({ + transcript: result.transcript, + confidence: result.confidence, + segments: [], + }); + } + return { + isFinal: true, + results, + }; + } + + // Interim results: Append to the transcript + let transcript = ""; + const segments: ExpoSpeechRecognitionResultSegment[] = []; + + for (let i = ev.resultIndex; i < ev.results.length; i++) { + const resultList = ev.results[i]; + + for (let j = 0; j < resultList.length; j++) { + const result = resultList[j]; + if (!result) { + continue; + } + segments.push({ + confidence: result.confidence, + segment: result.transcript, + startTimeMillis: 0, + endTimeMillis: 0, + }); + + if (!isFinal) { + transcript += result.transcript; + } + } } return { - isFinal: Boolean(ev.results[ev.resultIndex]?.isFinal), - results: nativeResults, + isFinal: false, + results: [ + { + transcript, + confidence: + segments.reduce((acc, curr) => acc + curr.confidence, 0) / + segments.length, + segments, + }, + ], }; }, soundstart: (ev) => null,