jamsch · jamsch · Nov 19, 2024 · Nov 16, 2024 · Nov 17, 2024 · Nov 17, 2024
diff --git a/android/src/main/java/expo/modules/speechrecognition/ExpoSpeechRecognitionModule.kt b/android/src/main/java/expo/modules/speechrecognition/ExpoSpeechRecognitionModule.kt
@@ -6,6 +6,7 @@ import android.content.ComponentName
 import android.content.Context
 import android.content.Intent
 import android.os.Build
+import android.os.Bundle
 import android.os.Handler
 import android.provider.Settings
 import android.speech.ModelDownloadListener
@@ -16,6 +17,7 @@ import android.speech.RecognizerIntent
 import android.speech.SpeechRecognizer
 import android.util.Log
 import androidx.annotation.RequiresApi
+import expo.modules.interfaces.permissions.PermissionsResponse
 import expo.modules.interfaces.permissions.Permissions.askForPermissionsWithPermissionsManager
 import expo.modules.interfaces.permissions.Permissions.getPermissionsWithPermissionsManager
 import expo.modules.kotlin.Promise
@@ -143,6 +145,46 @@ class ExpoSpeechRecognitionModule : Module() {
                 )
             }
 
+            AsyncFunction("requestMicrophonePermissionsAsync") { promise: Promise ->
+                askForPermissionsWithPermissionsManager(
+                    appContext.permissions,
+                    promise,
+                    RECORD_AUDIO,
+                )
+            }
+
+            AsyncFunction("getMicrophonePermissionsAsync") { promise: Promise ->
+                getPermissionsWithPermissionsManager(
+                    appContext.permissions,
+                    promise,
+                    RECORD_AUDIO,
+                )
+            }
+
+            AsyncFunction("getSpeechRecognizerPermissionsAsync") { promise: Promise ->
+                Log.w("ExpoSpeechRecognitionModule", "getSpeechRecognizerPermissionsAsync is not supported on Android. Returning a granted permission response.")
+                promise.resolve(
+                    Bundle().apply {
+                        putString(PermissionsResponse.EXPIRES_KEY, "never")
+                        putString(PermissionsResponse.STATUS_KEY, "granted")
+                        putBoolean(PermissionsResponse.CAN_ASK_AGAIN_KEY, false)
+                        putBoolean(PermissionsResponse.GRANTED_KEY, true)
+                    }
+                )
+            }
+
+            AsyncFunction("requestSpeechRecognizerPermissionsAsync") { promise: Promise ->
+                Log.w("ExpoSpeechRecognitionModule", "requestSpeechRecognizerPermissionsAsync is not supported on Android. Returning a granted permission response.")
+                promise.resolve(
+                    Bundle().apply {
+                        putString(PermissionsResponse.EXPIRES_KEY, "never")
+                        putString(PermissionsResponse.STATUS_KEY, "granted")
+                        putBoolean(PermissionsResponse.CAN_ASK_AGAIN_KEY, false)
+                        putBoolean(PermissionsResponse.GRANTED_KEY, true)
+                    }
+                )
+            }
+
             AsyncFunction("getStateAsync") { promise: Promise ->
                 val state =
                     when (expoSpeechService.recognitionState) {

diff --git a/example/App.tsx b/example/App.tsx
@@ -128,24 +128,35 @@ export default function App() {
     console.log("[event]: languagedetection", ev);
   });
 
-  const startListening = () => {
+  const startListening = async () => {
     if (status !== "idle") {
       return;
     }
     setTranscription(null);
     setError(null);
     setStatus("starting");
 
-    ExpoSpeechRecognitionModule.requestPermissionsAsync().then((result) => {
-      console.log("Permissions", result);
-      if (!result.granted) {
-        console.log("Permissions not granted", result);
+    const microphonePermissions =
+      await ExpoSpeechRecognitionModule.requestMicrophonePermissionsAsync();
+    console.log("Microphone permissions", microphonePermissions);
+    if (!microphonePermissions.granted) {
+      setError({ error: "not-allowed", message: "Permissions not granted" });
+      setStatus("idle");
+      return;
+    }
+
+    if (!settings.requiresOnDeviceRecognition && Platform.OS === "ios") {
+      const speechRecognizerPermissions =
+        await ExpoSpeechRecognitionModule.requestSpeechRecognizerPermissionsAsync();
+      console.log("Speech recognizer permissions", speechRecognizerPermissions);
+      if (!speechRecognizerPermissions.granted) {
         setError({ error: "not-allowed", message: "Permissions not granted" });
         setStatus("idle");
         return;
       }
-      ExpoSpeechRecognitionModule.start(settings);
-    });
+    }
+
+    ExpoSpeechRecognitionModule.start(settings);
   };
 
   return (
@@ -811,6 +822,50 @@ function OtherSettings(props: {
             );
           }}
         />
+        <BigButton
+          title="Get microphone permissions"
+          color="#7C90DB"
+          onPress={() => {
+            ExpoSpeechRecognitionModule.getMicrophonePermissionsAsync().then(
+              (result) => {
+                Alert.alert("Result", JSON.stringify(result));
+              },
+            );
+          }}
+        />
+        <BigButton
+          title="Request microphone permissions"
+          color="#7C90DB"
+          onPress={() => {
+            ExpoSpeechRecognitionModule.requestMicrophonePermissionsAsync().then(
+              (result) => {
+                Alert.alert("Result", JSON.stringify(result));
+              },
+            );
+          }}
+        />
+        <BigButton
+          title="Get speech recognizer permissions"
+          color="#7C90DB"
+          onPress={() => {
+            ExpoSpeechRecognitionModule.getSpeechRecognizerPermissionsAsync().then(
+              (result) => {
+                Alert.alert("Result", JSON.stringify(result));
+              },
+            );
+          }}
+        />
+        <BigButton
+          title="Request speech recognizer permissions"
+          color="#7C90DB"
+          onPress={() => {
+            ExpoSpeechRecognitionModule.requestSpeechRecognizerPermissionsAsync().then(
+              (result) => {
+                Alert.alert("Result", JSON.stringify(result));
+              },
+            );
+          }}
+        />
         <BigButton
           title="Get speech recognizer state"
           color="#7C90DB"

diff --git a/ios/ExpoSpeechRecognitionModule.swift b/ios/ExpoSpeechRecognitionModule.swift
@@ -104,7 +104,11 @@ public class ExpoSpeechRecognitionModule: Module {
       guard let permissionsManager = appContext?.permissions else {
         return
       }
-      permissionsManager.register([EXSpeechRecognitionPermissionRequester()])
+      permissionsManager.register([
+        EXSpeechRecognitionPermissionRequester(),
+        MicrophoneRequester(),
+        SpeechRecognizerRequester()
+      ])
     }
 
     AsyncFunction("requestPermissionsAsync") { (promise: Promise) in
@@ -129,6 +133,38 @@ public class ExpoSpeechRecognitionModule: Module {
       )
     }
 
+    AsyncFunction("getMicrophonePermissionsAsync") { (promise: Promise) in
+      appContext?.permissions?.getPermissionUsingRequesterClass(
+        MicrophoneRequester.self,
+        resolve: promise.resolver,
+        reject: promise.legacyRejecter
+      )
+    }
+
+    AsyncFunction("requestMicrophonePermissionsAsync") { (promise: Promise) in
+      appContext?.permissions?.askForPermission(
+        usingRequesterClass: MicrophoneRequester.self,
+        resolve: promise.resolver,
+        reject: promise.legacyRejecter
+      )
+    }
+
+    AsyncFunction("getSpeechRecognizerPermissionsAsync") { (promise: Promise) in
+      appContext?.permissions?.getPermissionUsingRequesterClass(
+        SpeechRecognizerRequester.self,
+        resolve: promise.resolver,
+        reject: promise.legacyRejecter
+      )
+    }
+
+    AsyncFunction("requestSpeechRecognizerPermissionsAsync") { (promise: Promise) in
+      appContext?.permissions?.askForPermission(
+        usingRequesterClass: SpeechRecognizerRequester.self,
+        resolve: promise.resolver,
+        reject: promise.legacyRejecter
+      )
+    }
+
     AsyncFunction("getStateAsync") { (promise: Promise) in
       Task {
         let state = await speechRecognizer?.getState()
@@ -163,6 +199,24 @@ public class ExpoSpeechRecognitionModule: Module {
               locale: locale
             )
           }
+
+          if !options.requiresOnDeviceRecognition {
+            guard await SFSpeechRecognizer.hasAuthorizationToRecognize() else {
+              sendErrorAndStop(
+                error: "not-allowed",
+                message: RecognizerError.notAuthorizedToRecognize.message
+              )
+              return
+            }
+          }
+
+          guard await AVAudioSession.sharedInstance().hasPermissionToRecord() else {
+            sendErrorAndStop(
+              error: "not-allowed",
+              message: RecognizerError.notPermittedToRecord.message
+            )
+            return
+          }
 
           // Start recognition!
           await speechRecognizer?.start(

diff --git a/ios/ExpoSpeechRecognizer.swift b/ios/ExpoSpeechRecognizer.swift
@@ -56,14 +56,6 @@ actor ExpoSpeechRecognizer: ObservableObject {
     guard recognizer != nil else {
       throw RecognizerError.nilRecognizer
     }
-
-    guard await SFSpeechRecognizer.hasAuthorizationToRecognize() else {
-      throw RecognizerError.notAuthorizedToRecognize
-    }
-
-    guard await AVAudioSession.sharedInstance().hasPermissionToRecord() else {
-      throw RecognizerError.notPermittedToRecord
-    }
   }
 
   /// Returns a suitable audio format to use for the speech recognition task and audio file recording.

diff --git a/ios/MicrophoneRequester.swift b/ios/MicrophoneRequester.swift
@@ -0,0 +1,33 @@
+import ExpoModulesCore
+
+public class MicrophoneRequester: NSObject, EXPermissionsRequester {
+  static public func permissionType() -> String {
+    return "microphone"
+  }
+
+  public func requestPermissions(
+    resolver resolve: @escaping EXPromiseResolveBlock, rejecter reject: EXPromiseRejectBlock
+  ) {
+    AVAudioSession.sharedInstance().requestRecordPermission { authorized in
+      resolve(self.getPermissions())
+    }
+  }
+
+  public func getPermissions() -> [AnyHashable: Any] {
+    var status: EXPermissionStatus
+
+    let recordPermission = AVAudioSession.sharedInstance().recordPermission
+
+    if recordPermission == .granted {
+      status = EXPermissionStatusGranted
+    } else if recordPermission == .denied {
+      status = EXPermissionStatusDenied
+    } else {
+      status = EXPermissionStatusUndetermined
+    }
+
+    return [
+      "status": status.rawValue
+    ]
+  }
+}
diff --git a/ios/SpeechRecognizerRequester.swift b/ios/SpeechRecognizerRequester.swift
@@ -0,0 +1,34 @@
+import ExpoModulesCore
+import Speech
+
+public class SpeechRecognizerRequester: NSObject, EXPermissionsRequester {
+  static public func permissionType() -> String {
+    return "speechRecognizer"
+  }
+
+  public func requestPermissions(
+    resolver resolve: @escaping EXPromiseResolveBlock, rejecter reject: EXPromiseRejectBlock
+  ) {
+    SFSpeechRecognizer.requestAuthorization { status in
+      resolve(self.getPermissions())
+    }
+  }
+
+  public func getPermissions() -> [AnyHashable: Any] {
+    var status: EXPermissionStatus
+
+    let speechPermission = SFSpeechRecognizer.authorizationStatus()
+
+    if speechPermission == .authorized {
+      status = EXPermissionStatusGranted
+    } else if speechPermission == .denied {
+      status = EXPermissionStatusDenied
+    } else {
+      status = EXPermissionStatusUndetermined
+    }
+
+    return [
+      "status": status.rawValue
+    ]
+  }
+}
diff --git a/src/ExpoSpeechRecognitionModule.types.ts b/src/ExpoSpeechRecognitionModule.types.ts
@@ -563,14 +563,42 @@ export declare class ExpoSpeechRecognitionModuleType extends NativeModule<ExpoSp
   /**
    * Presents a dialog to the user to request permissions for using speech recognition and the microphone.
    *
-   * For iOS, once a user has granted (or denied) location permissions by responding to the original permission request dialog,
+   * For Android, this will request RECORD_AUDIO permission.
+   *
+   * For iOS, this will request microphone and speech recognition permissions.
+   * Once a user has granted (or denied) permissions by responding to the original permission request dialog,
    * the only way that the permissions can be changed is by the user themselves using the device settings app.
    */
   requestPermissionsAsync(): Promise<PermissionResponse>;
   /**
-   * Returns the current permission status for the microphone and speech recognition.
+   * Returns the current permission status for speech recognition and the microphone.
+   *
+   * You may also use `getMicrophonePermissionsAsync` and `getSpeechRecognizerPermissionsAsync` to get the permissions separately.
    */
   getPermissionsAsync(): Promise<PermissionResponse>;
+  /**
+   * Returns the current permission status for the microphone.
+   */
+  getMicrophonePermissionsAsync(): Promise<PermissionResponse>;
+  /**
+   * Presents a dialog to the user to request permissions for using the microphone.
+   *
+   * For iOS, once a user has granted (or denied) permissions by responding to the original permission request dialog,
+   * the only way that the permissions can be changed is by the user themselves using the device settings app.
+   */
+  requestMicrophonePermissionsAsync(): Promise<PermissionResponse>;
+  /**
+   * Returns the current permission status for speech recognition.
+   */
+  getSpeechRecognizerPermissionsAsync(): Promise<PermissionResponse>;
+  /**
+   * [iOS only] Presents a dialog to the user to request permissions for using the speech recognizer.
+   * This permission is required when `requiresOnDeviceRecognition` is disabled (i.e. network-based recognition)
+   *
+   * For iOS, once a user has granted (or denied) permissions by responding to the original permission request dialog,
+   * the only way that the permissions can be changed is by the user themselves using the device settings app.
+   */
+  requestSpeechRecognizerPermissionsAsync(): Promise<PermissionResponse>;
   /**
    * Returns an array of locales supported by the speech recognizer.
    *