Adding various execution providers

deven96 · Dec 16, 2024 · f24e419 · f24e419
1 parent 238c3d2
commit f24e419
Show file tree

Hide file tree

Showing 4 changed files with 30 additions and 1 deletion.
diff --git a/ahnlich/.cargo/config.toml b/ahnlich/.cargo/config.toml
@@ -0,0 +1,5 @@
+[target.aarch64-apple-darwin]
+rustflags = ["-Clink-arg=-fapple-link-rtlib"]
+
+[target.x86_64-apple-darwin]
+rustflags = ["-Clink-arg=-fapple-link-rtlib"]
diff --git a/ahnlich/Cargo.lock b/ahnlich/Cargo.lock
diff --git a/ahnlich/ai/Cargo.toml b/ahnlich/ai/Cargo.toml
@@ -40,7 +40,14 @@ fallible_collections.workspace = true
 rayon.workspace = true
 hf-hub = { version = "0.3", default-features = false }
 dirs = "5.0.1"
-ort = { version = "=2.0.0-rc.5", features = ["ndarray"] }
+ort = { version = "=2.0.0-rc.5", features = [
+  "ndarray",
+  "directml",
+  "tensorrt",
+  "cuda",
+  "coreml",
+] }
+ort-sys = "=2.0.0-rc.8"
 moka = { version = "0.12.8", features = ["future"] }
 tracing-opentelemetry.workspace = true
 futures.workspace = true

diff --git a/ahnlich/ai/src/engine/ai/providers/ort.rs b/ahnlich/ai/src/engine/ai/providers/ort.rs
@@ -5,6 +5,10 @@ use crate::error::AIProxyError;
 use fallible_collections::FallibleVec;
 use hf_hub::{api::sync::ApiBuilder, Cache};
 use itertools::Itertools;
+use ort::{
+    CUDAExecutionProvider, CoreMLExecutionProvider, DirectMLExecutionProvider,
+    TensorRTExecutionProvider,
+};
 use ort::{Session, SessionOutputs, Value};
 use rayon::prelude::*;
 
@@ -350,6 +354,18 @@ impl ProviderTrait for ORTProvider {
     }
 
     fn load_model(&mut self) -> Result<(), AIProxyError> {
+        ort::init()
+            .with_execution_providers([
+                // Prefer TensorRT over CUDA.
+                TensorRTExecutionProvider::default().build(),
+                CUDAExecutionProvider::default().build(),
+                // Use DirectML on Windows if NVIDIA EPs are not available
+                DirectMLExecutionProvider::default().build(),
+                // Or use ANE on Apple platforms
+                CoreMLExecutionProvider::default().build(),
+            ])
+            .commit()?;
+
         let Some(cache_location) = self.cache_location.clone() else {
             return Err(AIProxyError::CacheLocationNotInitiailized);
         };