From 7e55c61c2a39612ade5db9b929ffc883913ae0f3 Mon Sep 17 00:00:00 2001 From: Hyeongchan Kim Date: Tue, 18 Jun 2024 00:30:18 +0900 Subject: [PATCH] Add env for OTLP service name (#285) --- README.md | 6 ++++++ .../server/text_embeddings_server/cli.py | 3 ++- .../text_embeddings_server/utils/tracing.py | 6 ++---- backends/python/src/lib.rs | 10 ++++++++-- backends/python/src/management.rs | 20 +++++++++++-------- backends/src/lib.rs | 4 ++++ docs/source/en/cli_arguments.md | 6 ++++++ router/src/lib.rs | 2 ++ router/src/logging.rs | 8 ++++++-- router/src/main.rs | 13 ++++++++++-- router/tests/common.rs | 1 + 11 files changed, 60 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index aacc9459..30960206 100644 --- a/README.md +++ b/README.md @@ -252,6 +252,12 @@ Options: [env: OTLP_ENDPOINT=] + --otlp-service-name + The service name for opentelemetry. + + [env: OTLP_SERVICE_NAME=] + [default: text-embeddings-inference.server] + --cors-allow-origin [env: CORS_ALLOW_ORIGIN=] ``` diff --git a/backends/python/server/text_embeddings_server/cli.py b/backends/python/server/text_embeddings_server/cli.py index 4c627515..9497dc20 100644 --- a/backends/python/server/text_embeddings_server/cli.py +++ b/backends/python/server/text_embeddings_server/cli.py @@ -23,6 +23,7 @@ def serve( logger_level: str = "INFO", json_output: bool = False, otlp_endpoint: Optional[str] = None, + otlp_service_name: str = "text-embeddings-inference.server", ): # Remove default handler logger.remove() @@ -42,7 +43,7 @@ def serve( # Setup OpenTelemetry distributed tracing if otlp_endpoint is not None: - setup_tracing(otlp_endpoint=otlp_endpoint) + setup_tracing(otlp_endpoint=otlp_endpoint, otlp_service_name=otlp_service_name) # Downgrade enum into str for easier management later on dtype = None if dtype is None else dtype.value diff --git a/backends/python/server/text_embeddings_server/utils/tracing.py b/backends/python/server/text_embeddings_server/utils/tracing.py index 5a2bb3f7..9299e719 100644 --- a/backends/python/server/text_embeddings_server/utils/tracing.py +++ b/backends/python/server/text_embeddings_server/utils/tracing.py @@ -54,10 +54,8 @@ def _start_span(self, handler_call_details, context, set_status_on_exception=Fal ) -def setup_tracing(otlp_endpoint: str): - resource = Resource.create( - attributes={"service.name": f"text-embeddings-inference.server"} - ) +def setup_tracing(otlp_endpoint: str, otlp_service_name: str): + resource = Resource.create(attributes={"service.name": otlp_service_name}) span_exporter = OTLPSpanExporter(endpoint=otlp_endpoint, insecure=True) span_processor = BatchSpanProcessor(span_exporter) diff --git a/backends/python/src/lib.rs b/backends/python/src/lib.rs index f3519ee5..195f1d37 100644 --- a/backends/python/src/lib.rs +++ b/backends/python/src/lib.rs @@ -22,6 +22,7 @@ impl PythonBackend { model_type: ModelType, uds_path: String, otlp_endpoint: Option, + otlp_service_name: String, ) -> Result { match model_type { ModelType::Classifier => { @@ -37,8 +38,13 @@ impl PythonBackend { } }; - let backend_process = - management::BackendProcess::new(model_path, dtype, &uds_path, otlp_endpoint)?; + let backend_process = management::BackendProcess::new( + model_path, + dtype, + &uds_path, + otlp_endpoint, + otlp_service_name, + )?; let tokio_runtime = tokio::runtime::Builder::new_current_thread() .enable_all() .build() diff --git a/backends/python/src/management.rs b/backends/python/src/management.rs index ed0c851e..911c6984 100644 --- a/backends/python/src/management.rs +++ b/backends/python/src/management.rs @@ -21,6 +21,7 @@ impl BackendProcess { dtype: String, uds_path: &str, otlp_endpoint: Option, + otlp_service_name: String, ) -> Result { // Get UDS path let uds = Path::new(uds_path); @@ -33,21 +34,24 @@ impl BackendProcess { // Process args let mut python_server_args = vec![ model_path, - "--dtype".to_string(), + "--dtype".to_owned(), dtype, - "--uds-path".to_string(), - uds_path.to_string(), - "--logger-level".to_string(), - "INFO".to_string(), - "--json-output".to_string(), + "--uds-path".to_owned(), + uds_path.to_owned(), + "--logger-level".to_owned(), + "INFO".to_owned(), + "--json-output".to_owned(), ]; // OpenTelemetry if let Some(otlp_endpoint) = otlp_endpoint { - python_server_args.push("--otlp-endpoint".to_string()); + python_server_args.push("--otlp-endpoint".to_owned()); python_server_args.push(otlp_endpoint); } + python_server_args.push("--otlp-service-name".to_owned()); + python_server_args.push(otlp_service_name); + // Copy current process env let envs: Vec<(OsString, OsString)> = env::vars_os().collect(); @@ -64,7 +68,7 @@ impl BackendProcess { Err(err) => { if err.kind() == io::ErrorKind::NotFound { return Err(BackendError::Start( - "python-text-embeddings-server not found in PATH".to_string(), + "python-text-embeddings-server not found in PATH".to_owned(), )); } return Err(BackendError::Start(err.to_string())); diff --git a/backends/src/lib.rs b/backends/src/lib.rs index d7d271ee..d332b4a7 100644 --- a/backends/src/lib.rs +++ b/backends/src/lib.rs @@ -38,6 +38,7 @@ impl Backend { model_type: ModelType, uds_path: String, otlp_endpoint: Option, + otlp_service_name: String, ) -> Result { let (backend_sender, backend_receiver) = mpsc::unbounded_channel(); @@ -47,6 +48,7 @@ impl Backend { model_type.clone(), uds_path, otlp_endpoint, + otlp_service_name, )?; let padded_model = backend.is_padded(); let max_batch_size = backend.max_batch_size(); @@ -135,6 +137,7 @@ fn init_backend( model_type: ModelType, uds_path: String, otlp_endpoint: Option, + otlp_service_name: String, ) -> Result, BackendError> { if cfg!(feature = "candle") { #[cfg(feature = "candle")] @@ -154,6 +157,7 @@ fn init_backend( model_type, uds_path, otlp_endpoint, + otlp_service_name, ) }) .join() diff --git a/docs/source/en/cli_arguments.md b/docs/source/en/cli_arguments.md index c79b7f9c..5efa63cf 100644 --- a/docs/source/en/cli_arguments.md +++ b/docs/source/en/cli_arguments.md @@ -153,6 +153,12 @@ Options: [env: OTLP_ENDPOINT=] + --otlp-service-name + The service name for opentelemetry. + + [env: OTLP_SERVICE_NAME=] + [default: text-embeddings-inference.server] + --cors-allow-origin [env: CORS_ALLOW_ORIGIN=] ``` diff --git a/router/src/lib.rs b/router/src/lib.rs index 3801af8a..d2023515 100644 --- a/router/src/lib.rs +++ b/router/src/lib.rs @@ -60,6 +60,7 @@ pub async fn run( payload_limit: usize, api_key: Option, otlp_endpoint: Option, + otlp_service_name: String, cors_allow_origin: Option>, ) -> Result<()> { let model_id_path = Path::new(&model_id); @@ -198,6 +199,7 @@ pub async fn run( backend_model_type, uds_path.unwrap_or("/tmp/text-embeddings-inference-server".to_string()), otlp_endpoint.clone(), + otlp_service_name.clone(), ) .context("Could not create backend")?; backend diff --git a/router/src/logging.rs b/router/src/logging.rs index f8a6f0aa..7d5eb11e 100644 --- a/router/src/logging.rs +++ b/router/src/logging.rs @@ -10,7 +10,11 @@ use tracing_subscriber::{EnvFilter, Layer}; /// Init logging using env variables LOG_LEVEL and LOG_FORMAT: /// - otlp_endpoint is an optional URL to an Open Telemetry collector /// - LOG_LEVEL may be TRACE, DEBUG, INFO, WARN or ERROR (default to INFO) -pub fn init_logging(otlp_endpoint: Option<&String>, json_output: bool) -> bool { +pub fn init_logging( + otlp_endpoint: Option<&String>, + otlp_service_name: String, + json_output: bool, +) -> bool { let mut layers = Vec::new(); // STDOUT/STDERR layer @@ -40,7 +44,7 @@ pub fn init_logging(otlp_endpoint: Option<&String>, json_output: bool) -> bool { trace::config() .with_resource(Resource::new(vec![KeyValue::new( "service.name", - "text-embeddings-inference.router", + otlp_service_name, )])) .with_sampler(Sampler::AlwaysOn), ) diff --git a/router/src/main.rs b/router/src/main.rs index 06cd576a..2cdc7095 100644 --- a/router/src/main.rs +++ b/router/src/main.rs @@ -123,6 +123,11 @@ struct Args { #[clap(long, env)] otlp_endpoint: Option, + /// The service name for opentelemetry. + /// e.g. `text-embeddings-inference.server` + #[clap(default_value = "text-embeddings-inference.server", long, env)] + otlp_service_name: String, + /// Unused for gRPC servers #[clap(long, env)] cors_allow_origin: Option>, @@ -134,8 +139,11 @@ async fn main() -> Result<()> { let args: Args = Args::parse(); // Initialize logging and telemetry - let global_tracer = - text_embeddings_router::init_logging(args.otlp_endpoint.as_ref(), args.json_output); + let global_tracer = text_embeddings_router::init_logging( + args.otlp_endpoint.as_ref(), + args.otlp_service_name.clone(), + args.json_output, + ); tracing::info!("{args:?}"); @@ -158,6 +166,7 @@ async fn main() -> Result<()> { args.payload_limit, args.api_key, args.otlp_endpoint, + args.otlp_service_name, args.cors_allow_origin, ) .await?; diff --git a/router/tests/common.rs b/router/tests/common.rs index f9c47c94..c8669c12 100644 --- a/router/tests/common.rs +++ b/router/tests/common.rs @@ -64,6 +64,7 @@ pub async fn start_server(model_id: String, revision: Option, dtype: DTy 2_000_000, None, None, + "text-embeddings-inference.server".to_owned(), None, ) });