feat: android language detection

jamsch · Oct 4, 2024 · 2b2a9ff · 2b2a9ff
1 parent 522892e
commit 2b2a9ff
Show file tree

Hide file tree

Showing 4 changed files with 65 additions and 23 deletions.
diff --git a/README.md b/README.md
@@ -304,17 +304,18 @@ ExpoSpeechRecognitionModule.abort();
 
 Events are largely based on the [Web Speech API](https://developer.mozilla.org/en-US/docs/Web/API/SpeechRecognition). The following events are supported:
 
-| Event Name    | Description                                                                                | Notes                                                                                                                                                                                                                                                                                    |
-| ------------- | ------------------------------------------------------------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `audiostart`  | Audio capturing has started                                                                | Includes the `uri` if `recordingOptions.persist` is enabled.                                                                                                                                                                                                                             |
-| `audioend`    | Audio capturing has ended                                                                  | Includes the `uri` if `recordingOptions.persist` is enabled.                                                                                                                                                                                                                             |
-| `end`         | Speech recognition service has disconnected.                                               | This should always be the last event dispatched, including after errors.                                                                                                                                                                                                                 |
-| `error`       | Fired when a speech recognition error occurs.                                              | You'll also receive an `error` event (with code "aborted") when calling `.abort()`                                                                                                                                                                                                       |
-| `nomatch`     | Speech recognition service returns a final result with no significant recognition.         | You may have non-final results recognized. This may get emitted after cancellation.                                                                                                                                                                                                      |
-| `result`      | Speech recognition service returns a word or phrase has been positively recognized.        | On Android, continous mode runs as a segmented session, meaning when a final result is reached, additional partial and final results will cover a new segment separate from the previous final result. On iOS, you should expect one final result before speech recognition has stopped. |
-| `speechstart` | Fired when any sound — recognizable speech or not — has been detected                      | On iOS, this will fire once in the session after a result has occurred                                                                                                                                                                                                                   |
-| `speechend`   | Fired when speech recognized by the speech recognition service has stopped being detected. | Not supported yet on iOS                                                                                                                                                                                                                                                                 |
-| `start`       | Speech recognition has started                                                             | Use this event to indicate to the user when to speak.                                                                                                                                                                                                                                    |
+| Event Name          | Description                                                                                | Notes                                                                                                                                                                                                                                                                                    |
+| ------------------- | ------------------------------------------------------------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `audiostart`        | Audio capturing has started                                                                | Includes the `uri` if `recordingOptions.persist` is enabled.                                                                                                                                                                                                                             |
+| `audioend`          | Audio capturing has ended                                                                  | Includes the `uri` if `recordingOptions.persist` is enabled.                                                                                                                                                                                                                             |
+| `end`               | Speech recognition service has disconnected.                                               | This should always be the last event dispatched, including after errors.                                                                                                                                                                                                                 |
+| `error`             | Fired when a speech recognition error occurs.                                              | You'll also receive an `error` event (with code "aborted") when calling `.abort()`                                                                                                                                                                                                       |
+| `nomatch`           | Speech recognition service returns a final result with no significant recognition.         | You may have non-final results recognized. This may get emitted after cancellation.                                                                                                                                                                                                      |
+| `result`            | Speech recognition service returns a word or phrase has been positively recognized.        | On Android, continous mode runs as a segmented session, meaning when a final result is reached, additional partial and final results will cover a new segment separate from the previous final result. On iOS, you should expect one final result before speech recognition has stopped. |
+| `speechstart`       | Fired when any sound — recognizable speech or not — has been detected                      | On iOS, this will fire once in the session after a result has occurred                                                                                                                                                                                                                   |
+| `speechend`         | Fired when speech recognized by the speech recognition service has stopped being detected. | Not supported yet on iOS                                                                                                                                                                                                                                                                 |
+| `start`             | Speech recognition has started                                                             | Use this event to indicate to the user when to speak.                                                                                                                                                                                                                                    |
+| `languagedetection` | Called when the language detection (and switching) results are available.                  | Android 14+ only. Enabled with `EXTRA_ENABLE_LANGUAGE_DETECTION` in the `androidIntent` option when starting. Also can be called multiple times by enabling `EXTRA_ENABLE_LANGUAGE_SWITCH`.                                                                                              |
 
 ## Handling Errors
 

diff --git a/android/src/main/java/expo/modules/speechrecognition/ExpoSpeechRecognitionModule.kt b/android/src/main/java/expo/modules/speechrecognition/ExpoSpeechRecognitionModule.kt
@@ -325,26 +325,32 @@ class ExpoSpeechRecognitionModule : Module() {
         promise: Promise,
     ) {
         if (Build.VERSION.SDK_INT < Build.VERSION_CODES.TIRAMISU) {
-            promise.resolve(mapOf(
-                "locales" to mutableListOf<String>(),
-                "installedLocales" to mutableListOf<String>(),
-            ))
+            promise.resolve(
+                mapOf(
+                    "locales" to mutableListOf<String>(),
+                    "installedLocales" to mutableListOf<String>(),
+                ),
+            )
             return
         }
 
         if (options.androidRecognitionServicePackage == null && !SpeechRecognizer.isOnDeviceRecognitionAvailable(appContext)) {
-            promise.resolve(mapOf(
-                "locales" to mutableListOf<String>(),
-                "installedLocales" to mutableListOf<String>(),
-            ))
+            promise.resolve(
+                mapOf(
+                    "locales" to mutableListOf<String>(),
+                    "installedLocales" to mutableListOf<String>(),
+                ),
+            )
             return
         }
 
         if (options.androidRecognitionServicePackage != null && !SpeechRecognizer.isRecognitionAvailable(appContext)) {
-            promise.resolve(mapOf(
-                "locales" to mutableListOf<String>(),
-                "installedLocales" to mutableListOf<String>(),
-            ))
+            promise.resolve(
+                mapOf(
+                    "locales" to mutableListOf<String>(),
+                    "installedLocales" to mutableListOf<String>(),
+                ),
+            )
             return
         }
 

diff --git a/android/src/main/java/expo/modules/speechrecognition/ExpoSpeechService.kt b/android/src/main/java/expo/modules/speechrecognition/ExpoSpeechService.kt
@@ -561,6 +561,15 @@ class ExpoSpeechService(
             else -> 0.0f
         }
 
+    private fun languageDetectionConfidenceLevelToFloat(confidenceLevel: Int): Float =
+        when (confidenceLevel) {
+            SpeechRecognizer.LANGUAGE_DETECTION_CONFIDENCE_LEVEL_HIGHLY_CONFIDENT -> 1.0f
+            SpeechRecognizer.LANGUAGE_DETECTION_CONFIDENCE_LEVEL_CONFIDENT -> 0.8f
+            SpeechRecognizer.LANGUAGE_DETECTION_CONFIDENCE_LEVEL_NOT_CONFIDENT -> 0.5f
+            SpeechRecognizer.LANGUAGE_DETECTION_CONFIDENCE_LEVEL_UNKNOWN -> 0f
+            else -> 0.0f
+        }
+
     override fun onResults(results: Bundle?) {
         val resultsList = getResults(results)
 
@@ -594,6 +603,14 @@ class ExpoSpeechService(
         }
     }
 
+    override fun onLanguageDetection(results: Bundle) {
+        sendEvent("languagedetection", mapOf(
+            "detectedLanguage" to results.getString(SpeechRecognizer.DETECTED_LANGUAGE),
+            "confidence" to languageDetectionConfidenceLevelToFloat(results.getInt(SpeechRecognizer.LANGUAGE_DETECTION_CONFIDENCE_LEVEL)),
+            "topLocaleAlternatives" to results.getStringArrayList(SpeechRecognizer.TOP_LOCALE_ALTERNATIVES)
+        ))
+    }
+
     /**
      * For API 33: Basically same as onResults but doesn't stop
      */

diff --git a/src/ExpoSpeechRecognitionModule.types.ts b/src/ExpoSpeechRecognitionModule.types.ts
@@ -88,6 +88,23 @@ export type ExpoSpeechRecognitionErrorEvent = {
   message: string;
 };
 
+export type LanguageDetectionEvent = {
+  /** The language that was detected, in BCP-47 format. e.g. "en-US", "de-DE" */
+  detectedLanguage: string;
+  /** The confidence of the detected language. A value ranging between 0.0 and 1.0.
+   *
+   * Values range from:
+   *
+   * - 1.0 (highly confident)
+   * - 0.8 (confident)
+   * - 0.5 (not confident)
+   * - 0.0 (unknown)
+   */
+  confidence: number;
+  /** The alternative locales for the same language, in BCP-47 format. e.g. ["en-US", "en-GB"] */
+  topLocaleAlternatives: string[];
+};
+
 /**
  * Events that are dispatched from the native side
  */
@@ -127,6 +144,7 @@ export type ExpoSpeechRecognitionNativeEventMap = {
   end: null;
   soundstart: null;
   soundend: null;
+  languagedetection: LanguageDetectionEvent;
 };
 
 export type ExpoSpeechRecognitionOptions = {