From 3897fb8be9b866ee3bbf445995f4df85ba602d98 Mon Sep 17 00:00:00 2001 From: fdupont Date: Tue, 6 Feb 2024 15:42:41 +0100 Subject: [PATCH] feat: enable opentelemetry grpc instrumentation --- README.md | 1 + pythie-serving-requirements.txt | 84 ++++++++++++++++++++++++++++++++- setup.py | 4 ++ src/pythie_serving/run.py | 16 +++++++ 4 files changed, 103 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 191234c..db5ab8b 100644 --- a/README.md +++ b/README.md @@ -52,6 +52,7 @@ The following models can be served by pythie-serving: * `--port`: Port number to listen to. #### Environment variables +* `OPENTELEMETRY_COLLECTOR_HOST`: OpenTelemetry Collector receiver endpoint. If not defined OpenTelemetry will not be activated. See https://opentelemetry.io/docs/what-is-opentelemetry for more details. For a treelite served model: * `TREELITE_NTHREAD`: Number of threads to use to compute predictions * `TREELINTE_BIND_THREADS`: Set to `0` to deactivate thread pinning. See https://treelite.readthedocs.io/en/latest/treelite-runtime-api.html diff --git a/pythie-serving-requirements.txt b/pythie-serving-requirements.txt index 0b3ecd6..908c487 100644 --- a/pythie-serving-requirements.txt +++ b/pythie-serving-requirements.txt @@ -4,10 +4,34 @@ # # pip-compile --extra=serving --no-emit-index-url --output-file=pythie-serving-requirements.txt setup.py # +backoff==2.2.1 + # via + # opentelemetry-exporter-otlp-proto-common + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http +certifi==2024.2.2 + # via requests +charset-normalizer==3.3.2 + # via requests cloudpickle==2.1.0 # via pythie-serving (setup.py) +deprecated==1.2.14 + # via + # opentelemetry-api + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http +googleapis-common-protos==1.62.0 + # via + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http grpcio==1.51.1 - # via pythie-serving (setup.py) + # via + # opentelemetry-exporter-otlp-proto-grpc + # pythie-serving (setup.py) +idna==3.6 + # via requests +importlib-metadata==6.11.0 + # via opentelemetry-api joblib==1.2.0 # via scikit-learn lightgbm==3.3.4 @@ -20,8 +44,50 @@ numpy==1.23.5 # scipy # treelite-runtime # xgboost -protobuf==4.22.0 +opentelemetry-api==1.22.0 + # via + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http + # opentelemetry-instrumentation + # opentelemetry-instrumentation-grpc + # opentelemetry-sdk + # pythie-serving (setup.py) +opentelemetry-exporter-otlp==1.22.0 # via pythie-serving (setup.py) +opentelemetry-exporter-otlp-proto-common==1.22.0 + # via + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http +opentelemetry-exporter-otlp-proto-grpc==1.22.0 + # via opentelemetry-exporter-otlp +opentelemetry-exporter-otlp-proto-http==1.22.0 + # via opentelemetry-exporter-otlp +opentelemetry-instrumentation==0.43b0 + # via opentelemetry-instrumentation-grpc +opentelemetry-instrumentation-grpc==0.43b0 + # via pythie-serving (setup.py) +opentelemetry-proto==1.22.0 + # via + # opentelemetry-exporter-otlp-proto-common + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http +opentelemetry-sdk==1.22.0 + # via + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http + # opentelemetry-instrumentation-grpc + # pythie-serving (setup.py) +opentelemetry-semantic-conventions==0.43b0 + # via + # opentelemetry-instrumentation-grpc + # opentelemetry-sdk +protobuf==4.22.0 + # via + # googleapis-common-protos + # opentelemetry-proto + # pythie-serving (setup.py) +requests==2.31.0 + # via opentelemetry-exporter-otlp-proto-http scikit-learn==1.2.0 # via # lightgbm @@ -36,7 +102,21 @@ threadpoolctl==3.1.0 # via scikit-learn treelite-runtime==2.2.2 # via pythie-serving (setup.py) +typing-extensions==4.9.0 + # via opentelemetry-sdk +urllib3==2.2.0 + # via requests wheel==0.38.4 # via lightgbm +wrapt==1.16.0 + # via + # deprecated + # opentelemetry-instrumentation + # opentelemetry-instrumentation-grpc xgboost==0.90 # via pythie-serving (setup.py) +zipp==3.17.0 + # via importlib-metadata + +# The following packages are considered to be unsafe in a requirements file: +# setuptools diff --git a/setup.py b/setup.py index 8e0021d..c37b119 100644 --- a/setup.py +++ b/setup.py @@ -9,6 +9,10 @@ "treelite_runtime~=2.2.2", "scikit-learn~=1.2.0", "cloudpickle~=2.1.0", + "opentelemetry-instrumentation-grpc~=0.38b0", + "opentelemetry-api>=1.17.0, <2.0", + "opentelemetry-sdk>=1.17.0, <2.0", + "opentelemetry-exporter-otlp>=1.17.0, <2.0", ] extras_require_test = [ *extras_require_serving, diff --git a/src/pythie_serving/run.py b/src/pythie_serving/run.py index 26d5e11..30a2042 100644 --- a/src/pythie_serving/run.py +++ b/src/pythie_serving/run.py @@ -5,6 +5,11 @@ from logging.config import dictConfig from google.protobuf import text_format +from opentelemetry import trace +from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter +from opentelemetry.instrumentation.grpc import GrpcInstrumentorServer +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.sdk.trace.export import BatchSpanProcessor from pythie_serving import create_grpc_server from pythie_serving.tensorflow_proto.tensorflow_serving.config import ( @@ -12,6 +17,15 @@ ) +def initialize_opentelemetry(): + otel_collector_host = str(os.environ.get("OPENTELEMETRY_COLLECTOR_HOST")) + if otel_collector_host is not None: + trace.set_tracer_provider(TracerProvider()) + otlp_exporter = OTLPSpanExporter(endpoint=otel_collector_host, insecure=True) + trace.get_tracer_provider().add_span_processor(BatchSpanProcessor(otlp_exporter)) + GrpcInstrumentorServer().instrument() + + def run(): model_choice_set = {"xgboost", "lightgbm", "treelite", "sklearn", "table"} model_choice_str = ",".join(model_choice_set) @@ -76,6 +90,8 @@ def run(): with open(ns.model_config_file_path) as opened_config_file: text_format.Parse(opened_config_file.read(), model_server_config) + initialize_opentelemetry() + maximum_concurrent_rpcs = ns.maximum_concurrent_rpcs if maximum_concurrent_rpcs < 0: maximum_concurrent_rpcs = None # grpc.server takes None to accept unlimited amount of connections