diff --git a/charts/infracloud-dashboards/Chart.yaml b/charts/infracloud-dashboards/Chart.yaml index b34275c..0d31366 100644 --- a/charts/infracloud-dashboards/Chart.yaml +++ b/charts/infracloud-dashboards/Chart.yaml @@ -15,7 +15,7 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.1.0 +version: 0.1.1 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to diff --git a/charts/infracloud-dashboards/README.md b/charts/infracloud-dashboards/README.md new file mode 100644 index 0000000..d2ede02 --- /dev/null +++ b/charts/infracloud-dashboards/README.md @@ -0,0 +1,30 @@ +# infracloud-dashboards + +A helm chart to deploy the [InfraCloud Dashboards](https://github.com/infracloudio/charts/tree/main/charts/infracloud-dashboards/grafana-dashboards) from InfraCloud. We have forked few original dashboards from community and added few customizations to make it work with our setup. + +## Setup Helm Repository + +```bash +helm repo add infracloud-charts https://infracloudio.github.io/charts +helm repo update +``` + +See [helm repo](https://helm.sh/docs/helm/helm_repo/) for command documentation. + +## Installing the Chart + +To install the chart with the release name `infracloud-dashboards`: + +```bash +helm install infracloud-dashboards infracloud-charts/infracloud-dashboards +``` + +## Uninstalling the Chart + +To uninstall the `infracloud-dashboards` deployment: + +```bash +helm uninstall infracloud-dashboards +``` + +The command removes all the Kubernetes components associated with the chart and deletes the release. diff --git a/charts/infracloud-dashboards/grafana-dashboards/tgi.json b/charts/infracloud-dashboards/grafana-dashboards/tgi.json index a79b759..0ffb96a 100644 --- a/charts/infracloud-dashboards/grafana-dashboards/tgi.json +++ b/charts/infracloud-dashboards/grafana-dashboards/tgi.json @@ -1,3999 +1,3940 @@ { - "__inputs": [ + "annotations": { + "list": [ { - "name": "DS_PROMETHEUS_EKS API INFERENCE PROD", - "label": "Prometheus EKS API Inference Prod", - "description": "", - "type": "datasource", - "pluginId": "prometheus", - "pluginName": "Prometheus" + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" } - ], - "__elements": {}, - "__requires": [ - { - "type": "panel", - "id": "gauge", - "name": "Gauge", - "version": "" + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 2, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - { - "type": "grafana", - "id": "grafana", - "name": "Grafana", - "version": "10.0.2" + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "fieldMinMax": false, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1000 + } + ] + }, + "unit": "ms" + }, + "overrides": [] }, - { - "type": "panel", - "id": "heatmap", - "name": "Heatmap", - "version": "" + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 0 }, - { - "type": "datasource", - "id": "prometheus", - "name": "Prometheus", - "version": "1.0.0" + "id": 49, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true }, - { - "type": "panel", - "id": "timeseries", - "name": "Time series", - "version": "" - } - ], - "annotations": { - "list": [ + "pluginVersion": "11.1.4", + "targets": [ { - "builtIn": 1, "datasource": { - "type": "grafana", - "uid": "-- Grafana --" + "type": "prometheus", + "uid": "prometheus" }, - "enable": true, + "editorMode": "code", + "expr": "(histogram_quantile(0.5, sum by (le) (rate(tgi_request_queue_duration_bucket{container=\"$service\"}[10m]))) * 1000) > 0", "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "target": { - "limit": 100, - "matchAny": false, - "tags": [], - "type": "dashboard" + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - "type": "dashboard" + "editorMode": "code", + "expr": "(histogram_quantile(0.5, sum by (le) (rate(tgi_batch_inference_duration_bucket{method=\"prefill\", container=\"$service\"}[10m]))) * 1000) > 0", + "hide": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "C" + }, + { + "datasource": { + "name": "Expression", + "type": "__expr__", + "uid": "__expr__" + }, + "expression": "$B + $C", + "hide": false, + "refId": "D", + "type": "math" } - ] + ], + "title": "Time to first token", + "type": "stat" }, - "editable": true, - "fiscalYearStartMonth": 0, - "graphTooltip": 2, - "id": 551, - "links": [], - "liveNow": false, - "panels": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "fieldMinMax": false, - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 1000 - } - ] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 8, - "x": 0, - "y": 0 - }, - "id": 49, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "mean" - ], - "fields": "", - "values": false - }, - "showPercentChange": false, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "10.4.2", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "editorMode": "code", - "expr": "(histogram_quantile(0.5, sum by (le) (rate(tgi_request_queue_duration_bucket{container=\"$service\"}[10m]))) * 1000) > 0", - "hide": true, - "instant": false, - "legendFormat": "__auto", - "range": true, - "refId": "B" + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "editorMode": "code", - "expr": "(histogram_quantile(0.5, sum by (le) (rate(tgi_batch_inference_duration_bucket{method=\"prefill\", container=\"$service\"}[10m]))) * 1000) > 0", - "hide": true, - "instant": false, - "legendFormat": "__auto", - "range": true, - "refId": "C" + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] }, - { - "datasource": { - "name": "Expression", - "type": "__expr__", - "uid": "__expr__" - }, - "expression": "$B + $C", - "hide": false, - "refId": "D", - "type": "math" - } - ], - "title": "Time to first token", - "type": "stat" + "unit": "ms" + }, + "overrides": [] }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 8, - "x": 9, - "y": 0 - }, - "id": 44, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "mean" - ], - "fields": "", - "values": false - }, - "showPercentChange": false, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "10.4.2", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "editorMode": "code", - "expr": "(histogram_quantile(0.5, sum by (le) (rate(tgi_batch_forward_duration_bucket{method=\"decode\", container=\"$service\"}[10m]))) * 1000)>0", - "instant": false, - "range": true, - "refId": "A" - } - ], - "title": "Decode per-token latency", - "type": "stat" + "gridPos": { + "h": 7, + "w": 8, + "x": 9, + "y": 0 }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 7, - "x": 17, - "y": 0 - }, - "id": 45, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "mean" - ], - "fields": "", - "values": false - }, - "showPercentChange": false, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "10.4.2", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "editorMode": "code", - "expr": "sum((rate(tgi_request_generated_tokens_sum{container=\"$service\"}[10m]) / rate(tgi_request_generated_tokens_count{container=\"$service\"}[10m]))>0)", - "instant": false, - "range": true, - "refId": "A" - } - ], - "title": "Throughput (generated tok/s)", - "type": "stat" + "id": 44, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" + "pluginVersion": "11.1.4", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "(histogram_quantile(0.5, sum by (le) (rate(tgi_batch_forward_duration_bucket{method=\"decode\", container=\"$service\"}[10m]))) * 1000)>0", + "instant": false, + "range": true, + "refId": "A" + } + ], + "title": "Decode per-token latency", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "none" + ] }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "p50" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "green", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "p90" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "orange", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "p99" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "red", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 7 - }, - "id": 48, - "options": { - "legend": { - "calcs": [ - "min", - "max" - ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "unit": "short" }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.5, sum by (le) (rate(tgi_request_input_length_bucket{container=\"$service\"}[10m])))", - "legendFormat": "p50", - "range": true, - "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.9, sum by (le) (rate(tgi_request_input_length_bucket{container=\"$service\"}[10m])))", - "hide": false, - "legendFormat": "p90", - "range": true, - "refId": "B" + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 7, + "x": 17, + "y": 0 + }, + "id": 45, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.1.4", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by (le) (rate(tgi_request_input_length_bucket{container=\"$service\"}[10m])))", - "hide": false, - "legendFormat": "p99", - "range": true, - "refId": "C" - } - ], - "title": "Number of tokens per prompt", - "type": "timeseries" + "editorMode": "code", + "expr": "sum((rate(tgi_request_generated_tokens_sum{container=\"$service\"}[10m]) / rate(tgi_request_generated_tokens_count{container=\"$service\"}[10m]))>0)", + "instant": false, + "range": true, + "refId": "A" + } + ], + "title": "Throughput (generated tok/s)", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "none" + ] }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "p50" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "green", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "p90" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "orange", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "p99" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "red", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 7 - }, - "id": 30, - "options": { - "legend": { - "calcs": [ - "min", - "max" - ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "unit": "none" }, - "targets": [ + "overrides": [ { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.5, sum by (le) (rate(tgi_request_generated_tokens_bucket{container=\"$service\"}[10m])))", - "legendFormat": "p50", - "range": true, - "refId": "A" + "matcher": { + "id": "byName", + "options": "p50" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] }, { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.9, sum by (le) (rate(tgi_request_generated_tokens_bucket{container=\"$service\"}[10m])))", - "hide": false, - "legendFormat": "p90", - "range": true, - "refId": "B" + "matcher": { + "id": "byName", + "options": "p90" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + } + ] }, { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by (le) (rate(tgi_request_generated_tokens_bucket{container=\"$service\"}[10m])))", - "hide": false, - "legendFormat": "p99", - "range": true, - "refId": "C" + "matcher": { + "id": "byName", + "options": "p99" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] } - ], - "title": "Number of generated tokens per request", - "type": "timeseries" + ] }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 15 - }, - "id": 20, - "panels": [], - "title": "General", - "type": "row" + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 7 }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 30, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } + "id": 48, + "options": { + "legend": { + "calcs": [ + "min", + "max" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 6, - "x": 0, - "y": 16 - }, - "id": 4, - "maxDataPoints": 100, - "options": { - "legend": { - "calcs": [ - "min", - "max" - ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "editorMode": "code", + "expr": "histogram_quantile(0.5, sum by (le) (rate(tgi_request_input_length_bucket{container=\"$service\"}[10m])))", + "legendFormat": "p50", + "range": true, + "refId": "A" }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "editorMode": "code", - "expr": "sum(increase(tgi_request_success{container=\"$service\"}[1m]))", - "legendFormat": "Success", - "range": true, - "refId": "A" + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "editorMode": "code", - "expr": "sum(increase(tgi_request_failure{container=\"$service\"}[1m])) by (err)", - "hide": false, - "legendFormat": "Error: {{err}}", - "range": true, - "refId": "B" - } - ], - "title": "Requests", - "type": "timeseries" + "editorMode": "code", + "expr": "histogram_quantile(0.9, sum by (le) (rate(tgi_request_input_length_bucket{container=\"$service\"}[10m])))", + "hide": false, + "legendFormat": "p90", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by (le) (rate(tgi_request_input_length_bucket{container=\"$service\"}[10m])))", + "hide": false, + "legendFormat": "p99", + "range": true, + "refId": "C" + } + ], + "title": "Number of tokens per prompt", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "s" + ] }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "p50" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "green", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "p90" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "orange", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "p99" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "red", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 13, - "w": 9, - "x": 6, - "y": 16 - }, - "id": 6, - "options": { - "legend": { - "calcs": [ - "min", - "max" - ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "unit": "none" }, - "targets": [ + "overrides": [ { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.5, sum by (le) (rate(tgi_request_mean_time_per_token_duration_bucket{container=\"$service\"}[10m])))", - "legendFormat": "p50", - "range": true, - "refId": "A" + "matcher": { + "id": "byName", + "options": "p50" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] }, { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.9, sum by (le) (rate(tgi_request_mean_time_per_token_duration_bucket{container=\"$service\"}[10m])))", - "hide": false, - "legendFormat": "p90", - "range": true, - "refId": "B" + "matcher": { + "id": "byName", + "options": "p90" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + } + ] }, { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by (le) (rate(tgi_request_mean_time_per_token_duration_bucket{container=\"$service\"}[10m])))", - "hide": false, - "legendFormat": "p99", - "range": true, - "refId": "C" + "matcher": { + "id": "byName", + "options": "p99" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] } - ], - "title": "Mean Time Per Token quantiles", - "type": "timeseries" + ] }, - { - "cards": {}, - "color": { - "cardColor": "#5794F2", - "colorScale": "linear", - "colorScheme": "interpolateSpectral", - "exponent": 0.5, - "min": 0, - "mode": "opacity" + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 7 + }, + "id": 30, + "options": { + "legend": { + "calcs": [ + "min", + "max" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, - "dataFormat": "tsbuckets", - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "fieldConfig": { - "defaults": { - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "scaleDistribution": { - "type": "linear" - } - } + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - "overrides": [] + "editorMode": "code", + "expr": "histogram_quantile(0.5, sum by (le) (rate(tgi_request_generated_tokens_bucket{container=\"$service\"}[10m])))", + "legendFormat": "p50", + "range": true, + "refId": "A" }, - "gridPos": { - "h": 13, - "w": 9, - "x": 15, - "y": 16 - }, - "heatmap": {}, - "hideZeroBuckets": false, - "highlightCards": true, - "id": 13, - "legend": { - "show": false + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.9, sum by (le) (rate(tgi_request_generated_tokens_bucket{container=\"$service\"}[10m])))", + "hide": false, + "legendFormat": "p90", + "range": true, + "refId": "B" }, - "maxDataPoints": 25, - "options": { - "calculate": false, - "calculation": {}, - "cellGap": 2, - "cellValues": {}, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by (le) (rate(tgi_request_generated_tokens_bucket{container=\"$service\"}[10m])))", + "hide": false, + "legendFormat": "p99", + "range": true, + "refId": "C" + } + ], + "title": "Number of generated tokens per request", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 15 + }, + "id": 20, + "panels": [], + "title": "General", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { "color": { - "exponent": 0.5, - "fill": "#5794F2", - "min": 0, - "mode": "scheme", - "reverse": false, - "scale": "exponential", - "scheme": "Spectral", - "steps": 128 - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 1e-9 - }, - "legend": { - "show": false - }, - "rowsFrame": { - "layout": "auto" - }, - "showValue": "never", - "tooltip": { - "mode": "single", - "showColorScale": false, - "yHistogram": false - }, - "yAxis": { - "axisPlacement": "left", - "decimals": 1, - "reverse": false, - "unit": "s" - } - }, - "pluginVersion": "10.4.2", - "reverseYBuckets": false, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum(increase(tgi_request_mean_time_per_token_duration_bucket{container=\"$service\"}[5m])) by (le)", - "format": "heatmap", - "interval": "", - "legendFormat": "{{ le }}", - "range": true, - "refId": "A" + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] } - ], - "title": "Mean Time Per Token", - "tooltip": { - "show": true, - "showHistogram": false }, - "type": "heatmap", - "xAxis": { - "show": true + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 0, + "y": 16 + }, + "id": 4, + "maxDataPoints": 100, + "options": { + "legend": { + "calcs": [ + "min", + "max" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, - "yAxis": { - "decimals": 1, - "format": "s", - "logBase": 1, - "show": true + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(increase(tgi_request_success{container=\"$service\"}[1m]))", + "legendFormat": "Success", + "range": true, + "refId": "A" }, - "yBucketBound": "auto" + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(increase(tgi_request_failure{container=\"$service\"}[1m])) by (err)", + "hide": false, + "legendFormat": "Error: {{err}}", + "range": true, + "refId": "B" + } + ], + "title": "Requests", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "percentage", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "orange", - "value": 70 - }, - { - "color": "red", - "value": 85 - } - ] + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" } }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 3, - "x": 0, - "y": 24 - }, - "id": 18, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" }, - "pluginVersion": "9.1.0", - "targets": [ + "overrides": [ { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "editorMode": "code", - "expr": "count(tgi_request_count{container=\"$service\"})", - "legendFormat": "Replicas", - "range": true, - "refId": "A" - } - ], - "title": "Number of replicas", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "fieldConfig": { - "defaults": { - "mappings": [], - "thresholds": { - "mode": "percentage", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "orange", - "value": 70 - }, - { - "color": "red", - "value": 85 + "matcher": { + "id": "byName", + "options": "p50" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" } - ] - } + } + ] }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 3, - "x": 3, - "y": 24 - }, - "id": 32, - "options": { - "minVizHeight": 75, - "minVizWidth": 75, - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showThresholdLabels": false, - "showThresholdMarkers": true, - "sizing": "auto" - }, - "pluginVersion": "10.4.2", - "targets": [ { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "editorMode": "code", - "expr": "sum(tgi_queue_size{container=\"$service\"})", - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "title": "Queue Size", - "type": "gauge" - }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 29 - }, - "id": 26, - "panels": [], - "title": "Batching", - "type": "row" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "bars", - "fillOpacity": 50, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 + "matcher": { + "id": "byName", + "options": "p90" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" } - ] - } + } + ] }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 6, - "x": 0, - "y": 30 - }, - "id": 29, - "maxDataPoints": 40, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" + { + "matcher": { + "id": "byName", + "options": "p99" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] } + ] + }, + "gridPos": { + "h": 13, + "w": 9, + "x": 6, + "y": 16 + }, + "id": 6, + "options": { + "legend": { + "calcs": [ + "min", + "max" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, - "pluginVersion": "9.1.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "editorMode": "code", - "expr": "avg(tgi_batch_current_max_tokens{container=\"$service\"})", - "legendFormat": "{{ pod }}", - "range": true, - "refId": "A" + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.5, sum by (le) (rate(tgi_request_mean_time_per_token_duration_bucket{container=\"$service\"}[10m])))", + "legendFormat": "p50", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.9, sum by (le) (rate(tgi_request_mean_time_per_token_duration_bucket{container=\"$service\"}[10m])))", + "hide": false, + "legendFormat": "p90", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by (le) (rate(tgi_request_mean_time_per_token_duration_bucket{container=\"$service\"}[10m])))", + "hide": false, + "legendFormat": "p99", + "range": true, + "refId": "C" + } + ], + "title": "Mean Time Per Token quantiles", + "type": "timeseries" + }, + { + "cards": {}, + "color": { + "cardColor": "#5794F2", + "colorScale": "linear", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "min": 0, + "mode": "opacity" + }, + "dataFormat": "tsbuckets", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } } - ], - "title": "Max tokens per batch", - "type": "timeseries" + }, + "overrides": [] }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" + "gridPos": { + "h": 13, + "w": 9, + "x": 15, + "y": 16 + }, + "heatmap": {}, + "hideZeroBuckets": false, + "highlightCards": true, + "id": 13, + "legend": { + "show": false + }, + "maxDataPoints": 25, + "options": { + "calculate": false, + "calculation": {}, + "cellGap": 2, + "cellValues": {}, + "color": { + "exponent": 0.5, + "fill": "#5794F2", + "min": 0, + "mode": "scheme", + "reverse": false, + "scale": "exponential", + "scheme": "Spectral", + "steps": 128 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 1e-9 + }, + "legend": { + "show": false + }, + "rowsFrame": { + "layout": "auto" + }, + "showValue": "never", + "tooltip": { + "mode": "single", + "showColorScale": false, + "yHistogram": false + }, + "yAxis": { + "axisPlacement": "left", + "decimals": 1, + "reverse": false, + "unit": "s" + } + }, + "pluginVersion": "11.1.4", + "reverseYBuckets": false, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(increase(tgi_request_mean_time_per_token_duration_bucket{container=\"$service\"}[5m])) by (le)", + "format": "heatmap", + "interval": "", + "legendFormat": "{{ le }}", + "range": true, + "refId": "A" + } + ], + "title": "Mean Time Per Token", + "tooltip": { + "show": true, + "showHistogram": false + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "yAxis": { + "decimals": 1, + "format": "s", + "logBase": 1, + "show": true + }, + "yBucketBound": "auto" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "green" }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" + { + "color": "orange", + "value": 70 }, - "thresholdsStyle": { - "mode": "off" + { + "color": "red", + "value": 85 } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "none" + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 0, + "y": 24 + }, + "id": 18, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "9.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "p50" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "green", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "p90" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "orange", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "p99" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "red", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 9, - "w": 4, - "x": 6, - "y": 30 - }, - "id": 33, - "options": { - "legend": { - "calcs": [ - "min", - "max" - ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" + "editorMode": "code", + "expr": "count(tgi_request_count{container=\"$service\"})", + "legendFormat": "Replicas", + "range": true, + "refId": "A" + } + ], + "title": "Number of replicas", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "green" + }, + { + "color": "orange", + "value": 70 + }, + { + "color": "red", + "value": 85 + } + ] } }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.5, sum by (le) (rate(tgi_request_skipped_tokens_bucket{container=\"$service\"}[10m])))", - "legendFormat": "p50", - "range": true, - "refId": "A" + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 3, + "y": 24 + }, + "id": 32, + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "sizing": "auto" + }, + "pluginVersion": "10.4.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.9, sum by (le) (rate(tgi_request_skipped_tokens_bucket{container=\"$service\"}[10m])))", - "hide": false, - "legendFormat": "p90", - "range": true, - "refId": "B" + "editorMode": "code", + "expr": "sum(tgi_queue_size{container=\"$service\"})", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Queue Size", + "type": "gauge" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 29 + }, + "id": 26, + "panels": [], + "title": "Batching", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 50, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by (le) (rate(tgi_request_skipped_tokens_bucket{container=\"$service\"}[10m])))", - "hide": false, - "legendFormat": "p99", - "range": true, - "refId": "C" + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] } - ], - "title": "Speculated Tokens", - "type": "timeseries" + }, + "overrides": [] }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" + "gridPos": { + "h": 5, + "w": 6, + "x": 0, + "y": 30 + }, + "id": 29, + "maxDataPoints": 40, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "9.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "avg(tgi_batch_current_max_tokens{container=\"$service\"})", + "legendFormat": "{{ pod }}", + "range": true, + "refId": "A" + } + ], + "title": "Max tokens per batch", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" }, - "thresholdsStyle": { - "mode": "off" + { + "color": "red", + "value": 80 } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "none" + ] }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "p50" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "green", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "p90" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "orange", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "p99" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "red", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 9, - "w": 5, - "x": 10, - "y": 30 - }, - "id": 46, - "options": { - "legend": { - "calcs": [ - "min", - "max" - ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "unit": "none" }, - "targets": [ + "overrides": [ { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.5, sum by (le) (rate(tgi_request_input_length_bucket{container=\"$service\"}[10m])))", - "legendFormat": "p50", - "range": true, - "refId": "A" + "matcher": { + "id": "byName", + "options": "p50" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] }, { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.9, sum by (le) (rate(tgi_request_input_length_bucket{container=\"$service\"}[10m])))", - "hide": false, - "legendFormat": "p90", - "range": true, - "refId": "B" + "matcher": { + "id": "byName", + "options": "p90" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + } + ] }, { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by (le) (rate(tgi_request_input_length_bucket{container=\"$service\"}[10m])))", - "hide": false, - "legendFormat": "p99", - "range": true, - "refId": "C" + "matcher": { + "id": "byName", + "options": "p99" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] } - ], - "title": "Prompt Tokens", - "type": "timeseries" + ] }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" + "gridPos": { + "h": 9, + "w": 4, + "x": 6, + "y": 30 + }, + "id": 33, + "options": { + "legend": { + "calcs": [ + "min", + "max" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.5, sum by (le) (rate(tgi_request_skipped_tokens_bucket{container=\"$service\"}[10m])))", + "legendFormat": "p50", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.9, sum by (le) (rate(tgi_request_skipped_tokens_bucket{container=\"$service\"}[10m])))", + "hide": false, + "legendFormat": "p90", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by (le) (rate(tgi_request_skipped_tokens_bucket{container=\"$service\"}[10m])))", + "hide": false, + "legendFormat": "p99", + "range": true, + "refId": "C" + } + ], + "title": "Speculated Tokens", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" }, - "thresholdsStyle": { - "mode": "off" + { + "color": "red", + "value": 80 } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "s" + ] }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "p50" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "green", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "p90" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "orange", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "p99" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "red", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 9, - "w": 9, - "x": 15, - "y": 30 - }, - "id": 8, - "options": { - "legend": { - "calcs": [ - "min", - "max" - ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "unit": "none" }, - "targets": [ + "overrides": [ { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.5, sum by (le) (rate(tgi_request_duration_bucket{container=\"$service\"}[10m])))", - "legendFormat": "p50", - "range": true, - "refId": "A" + "matcher": { + "id": "byName", + "options": "p50" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] }, { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.9, sum by (le) (rate(tgi_request_duration_bucket{container=\"$service\"}[10m])))", - "hide": false, - "legendFormat": "p90", - "range": true, - "refId": "B" + "matcher": { + "id": "byName", + "options": "p90" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + } + ] }, { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by (le) (rate(tgi_request_duration_bucket{container=\"$service\"}[10m])))", - "hide": false, - "legendFormat": "p99", - "range": true, - "refId": "C" + "matcher": { + "id": "byName", + "options": "p99" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] } - ], - "title": "Latency quantiles", - "type": "timeseries" + ] }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "bars", - "fillOpacity": 50, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" + "gridPos": { + "h": 9, + "w": 5, + "x": 10, + "y": 30 + }, + "id": 46, + "options": { + "legend": { + "calcs": [ + "min", + "max" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.5, sum by (le) (rate(tgi_request_input_length_bucket{container=\"$service\"}[10m])))", + "legendFormat": "p50", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.9, sum by (le) (rate(tgi_request_input_length_bucket{container=\"$service\"}[10m])))", + "hide": false, + "legendFormat": "p90", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by (le) (rate(tgi_request_input_length_bucket{container=\"$service\"}[10m])))", + "hide": false, + "legendFormat": "p99", + "range": true, + "refId": "C" + } + ], + "title": "Prompt Tokens", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" }, - "thresholdsStyle": { - "mode": "off" + { + "color": "red", + "value": 80 } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } + ] }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 6, - "x": 0, - "y": 35 - }, - "id": 27, - "maxDataPoints": 40, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "unit": "s" }, - "pluginVersion": "9.1.0", - "targets": [ + "overrides": [ { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "editorMode": "code", - "expr": "avg(tgi_batch_current_size{container=\"$service\"})", - "legendFormat": "{{ pod }}", - "range": true, - "refId": "A" - } - ], - "title": "Batch Size", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 30, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" + "matcher": { + "id": "byName", + "options": "p50" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 + ] + }, + { + "matcher": { + "id": "byName", + "options": "p90" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" } - ] - } + } + ] }, - "overrides": [] - }, - "gridPos": { - "h": 9, - "w": 6, - "x": 0, - "y": 39 - }, - "id": 28, - "maxDataPoints": 100, - "options": { - "legend": { - "calcs": [ - "min", - "max" - ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "editorMode": "code", - "expr": "sum(increase(tgi_batch_concat{container=\"$service\"}[1m])) by (reason)", - "hide": false, - "legendFormat": "Reason: {{ reason }}", - "range": true, - "refId": "B" + "matcher": { + "id": "byName", + "options": "p99" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] } - ], - "title": "Concatenates", - "type": "timeseries" + ] }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" + "gridPos": { + "h": 9, + "w": 9, + "x": 15, + "y": 30 + }, + "id": 8, + "options": { + "legend": { + "calcs": [ + "min", + "max" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.5, sum by (le) (rate(tgi_request_duration_bucket{container=\"$service\"}[10m])))", + "legendFormat": "p50", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.9, sum by (le) (rate(tgi_request_duration_bucket{container=\"$service\"}[10m])))", + "hide": false, + "legendFormat": "p90", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by (le) (rate(tgi_request_duration_bucket{container=\"$service\"}[10m])))", + "hide": false, + "legendFormat": "p99", + "range": true, + "refId": "C" + } + ], + "title": "Latency quantiles", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 50, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 0, + "y": 35 + }, + "id": 27, + "maxDataPoints": 40, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "9.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "avg(tgi_batch_current_size{container=\"$service\"})", + "legendFormat": "{{ pod }}", + "range": true, + "refId": "A" + } + ], + "title": "Batch Size", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" }, - "thresholdsStyle": { - "mode": "off" + { + "color": "red", + "value": 80 } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "s" + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 6, + "x": 0, + "y": 39 + }, + "id": 28, + "maxDataPoints": 100, + "options": { + "legend": { + "calcs": [ + "min", + "max" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(increase(tgi_batch_concat{container=\"$service\"}[1m])) by (reason)", + "hide": false, + "legendFormat": "Reason: {{ reason }}", + "range": true, + "refId": "B" + } + ], + "title": "Concatenates", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "p50" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "green", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "p90" + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "orange", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "p99" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "red", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 9, - "w": 9, - "x": 6, - "y": 39 - }, - "id": 31, - "options": { - "legend": { - "calcs": [ - "min", - "max" - ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" }, - "targets": [ + "overrides": [ { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.5, sum by (le) (rate(tgi_request_queue_duration_bucket{container=\"$service\"}[10m])))", - "legendFormat": "p50", - "range": true, - "refId": "A" + "matcher": { + "id": "byName", + "options": "p50" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] }, { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.9, sum by (le) (rate(tgi_request_queue_duration_bucket{container=\"$service\"}[10m])))", - "hide": false, - "legendFormat": "p90", - "range": true, - "refId": "B" + "matcher": { + "id": "byName", + "options": "p90" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + } + ] }, { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by (le) (rate(tgi_request_queue_duration_bucket{container=\"$service\"}[10m])))", - "hide": false, - "legendFormat": "p99", - "range": true, - "refId": "C" + "matcher": { + "id": "byName", + "options": "p99" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] } - ], - "title": "Queue quantiles", - "type": "timeseries" + ] }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 48 - }, - "id": 22, - "panels": [], - "title": "Prefill", - "type": "row" + "gridPos": { + "h": 9, + "w": 9, + "x": 6, + "y": 39 }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" + "id": 31, + "options": { + "legend": { + "calcs": [ + "min", + "max" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.5, sum by (le) (rate(tgi_request_queue_duration_bucket{container=\"$service\"}[10m])))", + "legendFormat": "p50", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.9, sum by (le) (rate(tgi_request_queue_duration_bucket{container=\"$service\"}[10m])))", + "hide": false, + "legendFormat": "p90", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by (le) (rate(tgi_request_queue_duration_bucket{container=\"$service\"}[10m])))", + "hide": false, + "legendFormat": "p99", + "range": true, + "refId": "C" + } + ], + "title": "Queue quantiles", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 48 + }, + "id": 22, + "panels": [], + "title": "Prefill", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" }, - "thresholdsStyle": { - "mode": "off" + { + "color": "red", + "value": 80 } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "s" + ] }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "p50" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "green", - "mode": "fixed" - } + "unit": "s" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "p50" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" } - ] - }, - { - "matcher": { - "id": "byName", - "options": "p90" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "orange", - "mode": "fixed" - } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "p90" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" } - ] - }, - { - "matcher": { - "id": "byName", - "options": "p99" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "red", - "mode": "fixed" - } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "p99" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" } - ] - } - ] - }, - "gridPos": { - "h": 11, - "w": 12, - "x": 0, - "y": 49 - }, - "id": 7, - "options": { - "legend": { - "calcs": [ - "min", - "max" - ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" + } + ] } + ] + }, + "gridPos": { + "h": 11, + "w": 12, + "x": 0, + "y": 49 + }, + "id": 7, + "options": { + "legend": { + "calcs": [ + "min", + "max" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.5, sum by (le) (rate(tgi_batch_inference_duration_bucket{method=\"prefill\", container=\"$service\"}[10m])))", - "legendFormat": "p50", - "range": true, - "refId": "A" + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.9, sum by (le) (rate(tgi_batch_inference_duration_bucket{method=\"prefill\", container=\"$service\"}[10m])))", - "hide": false, - "legendFormat": "p90", - "range": true, - "refId": "B" + "editorMode": "code", + "expr": "histogram_quantile(0.5, sum by (le) (rate(tgi_batch_inference_duration_bucket{method=\"prefill\", container=\"$service\"}[10m])))", + "legendFormat": "p50", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by (le) (rate(tgi_batch_inference_duration_bucket{method=\"prefill\", container=\"$service\"}[10m])))", - "hide": false, - "legendFormat": "p99", - "range": true, - "refId": "C" + "editorMode": "code", + "expr": "histogram_quantile(0.9, sum by (le) (rate(tgi_batch_inference_duration_bucket{method=\"prefill\", container=\"$service\"}[10m])))", + "hide": false, + "legendFormat": "p90", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by (le) (rate(tgi_batch_inference_duration_bucket{method=\"prefill\", container=\"$service\"}[10m])))", + "hide": false, + "legendFormat": "p99", + "range": true, + "refId": "C" + } + ], + "title": "Prefill Quantiles", + "type": "timeseries" + }, + { + "cards": {}, + "color": { + "cardColor": "#5794F2", + "colorScale": "linear", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "min": 0, + "mode": "opacity" + }, + "dataFormat": "tsbuckets", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } } - ], - "title": "Prefill Quantiles", - "type": "timeseries" + }, + "overrides": [] }, - { - "cards": {}, + "gridPos": { + "h": 11, + "w": 12, + "x": 12, + "y": 49 + }, + "heatmap": {}, + "hideZeroBuckets": false, + "highlightCards": true, + "id": 14, + "legend": { + "show": false + }, + "maxDataPoints": 25, + "options": { + "calculate": false, + "calculation": {}, + "cellGap": 2, + "cellValues": {}, "color": { - "cardColor": "#5794F2", - "colorScale": "linear", - "colorScheme": "interpolateSpectral", "exponent": 0.5, + "fill": "#5794F2", "min": 0, - "mode": "opacity" + "mode": "scheme", + "reverse": false, + "scale": "exponential", + "scheme": "Spectral", + "steps": 128 }, - "dataFormat": "tsbuckets", - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "fieldConfig": { - "defaults": { - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": [] + "exemplars": { + "color": "rgba(255,0,255,0.7)" }, - "gridPos": { - "h": 11, - "w": 12, - "x": 12, - "y": 49 + "filterValues": { + "le": 1e-9 }, - "heatmap": {}, - "hideZeroBuckets": false, - "highlightCards": true, - "id": 14, "legend": { "show": false }, - "maxDataPoints": 25, - "options": { - "calculate": false, - "calculation": {}, - "cellGap": 2, - "cellValues": {}, - "color": { - "exponent": 0.5, - "fill": "#5794F2", - "min": 0, - "mode": "scheme", - "reverse": false, - "scale": "exponential", - "scheme": "Spectral", - "steps": 128 - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 1e-9 - }, - "legend": { - "show": false - }, - "rowsFrame": { - "layout": "auto" - }, - "showValue": "never", - "tooltip": { - "mode": "single", - "showColorScale": false, - "yHistogram": false - }, - "yAxis": { - "axisPlacement": "left", - "decimals": 1, - "reverse": false, - "unit": "s" - } + "rowsFrame": { + "layout": "auto" }, - "pluginVersion": "10.4.2", - "reverseYBuckets": false, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum(increase(tgi_batch_inference_duration_bucket{method=\"prefill\", container=\"$service\"}[5m])) by (le)", - "format": "heatmap", - "interval": "", - "legendFormat": "{{ le }}", - "range": true, - "refId": "A" - } - ], - "title": "Prefill Latency", + "showValue": "never", "tooltip": { - "show": true, - "showHistogram": false - }, - "type": "heatmap", - "xAxis": { - "show": true + "mode": "single", + "showColorScale": false, + "yHistogram": false }, "yAxis": { + "axisPlacement": "left", "decimals": 1, - "format": "s", - "logBase": 1, - "show": true - }, - "yBucketBound": "auto" + "reverse": false, + "unit": "s" + } }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 60 - }, - "id": 24, - "panels": [], - "title": "Decode", - "type": "row" + "pluginVersion": "10.4.2", + "reverseYBuckets": false, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(increase(tgi_batch_inference_duration_bucket{method=\"prefill\", container=\"$service\"}[5m])) by (le)", + "format": "heatmap", + "interval": "", + "legendFormat": "{{ le }}", + "range": true, + "refId": "A" + } + ], + "title": "Prefill Latency", + "tooltip": { + "show": true, + "showHistogram": false }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" + "type": "heatmap", + "xAxis": { + "show": true + }, + "yAxis": { + "decimals": 1, + "format": "s", + "logBase": 1, + "show": true + }, + "yBucketBound": "auto" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 60 + }, + "id": 24, + "panels": [], + "title": "Decode", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" }, - "thresholdsStyle": { - "mode": "off" + { + "color": "red", + "value": 80 } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "s" + ] }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "p50" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "green", - "mode": "fixed" - } + "unit": "s" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "p50" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" } - ] - }, - { - "matcher": { - "id": "byName", - "options": "p90" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "orange", - "mode": "fixed" - } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "p90" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" } - ] - }, - { - "matcher": { - "id": "byName", - "options": "p99" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "red", - "mode": "fixed" - } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "p99" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" } - ] - } - ] - }, - "gridPos": { - "h": 11, - "w": 12, - "x": 0, - "y": 61 - }, - "id": 11, - "options": { - "legend": { - "calcs": [ - "min", - "max" - ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" + } + ] } + ] + }, + "gridPos": { + "h": 11, + "w": 12, + "x": 0, + "y": 61 + }, + "id": 11, + "options": { + "legend": { + "calcs": [ + "min", + "max" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.5, sum by (le) (rate(tgi_batch_inference_duration_bucket{method=\"decode\", container=\"$service\"}[10m])))", - "legendFormat": "p50", - "range": true, - "refId": "A" + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.9, sum by (le) (rate(tgi_batch_inference_duration_bucket{method=\"decode\", container=\"$service\"}[10m])))", - "hide": false, - "legendFormat": "p90", - "range": true, - "refId": "B" + "editorMode": "code", + "expr": "histogram_quantile(0.5, sum by (le) (rate(tgi_batch_inference_duration_bucket{method=\"decode\", container=\"$service\"}[10m])))", + "legendFormat": "p50", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by (le) (rate(tgi_batch_inference_duration_bucket{method=\"decode\", container=\"$service\"}[10m])))", - "hide": false, - "legendFormat": "p99", - "range": true, - "refId": "C" + "editorMode": "code", + "expr": "histogram_quantile(0.9, sum by (le) (rate(tgi_batch_inference_duration_bucket{method=\"decode\", container=\"$service\"}[10m])))", + "hide": false, + "legendFormat": "p90", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by (le) (rate(tgi_batch_inference_duration_bucket{method=\"decode\", container=\"$service\"}[10m])))", + "hide": false, + "legendFormat": "p99", + "range": true, + "refId": "C" + } + ], + "title": "Decode quantiles", + "type": "timeseries" + }, + { + "cards": {}, + "color": { + "cardColor": "#5794F2", + "colorScale": "linear", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "min": 0, + "mode": "opacity" + }, + "dataFormat": "tsbuckets", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } } - ], - "title": "Decode quantiles", - "type": "timeseries" + }, + "overrides": [] }, - { - "cards": {}, + "gridPos": { + "h": 11, + "w": 12, + "x": 12, + "y": 61 + }, + "heatmap": {}, + "hideZeroBuckets": false, + "highlightCards": true, + "id": 15, + "legend": { + "show": false + }, + "maxDataPoints": 25, + "options": { + "calculate": false, + "calculation": {}, + "cellGap": 2, + "cellValues": {}, "color": { - "cardColor": "#5794F2", - "colorScale": "linear", - "colorScheme": "interpolateSpectral", "exponent": 0.5, + "fill": "#5794F2", "min": 0, - "mode": "opacity" + "mode": "scheme", + "reverse": false, + "scale": "exponential", + "scheme": "Spectral", + "steps": 128 }, - "dataFormat": "tsbuckets", - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "fieldConfig": { - "defaults": { - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": [] + "exemplars": { + "color": "rgba(255,0,255,0.7)" }, - "gridPos": { - "h": 11, - "w": 12, - "x": 12, - "y": 61 + "filterValues": { + "le": 1e-9 }, - "heatmap": {}, - "hideZeroBuckets": false, - "highlightCards": true, - "id": 15, "legend": { "show": false }, - "maxDataPoints": 25, - "options": { - "calculate": false, - "calculation": {}, - "cellGap": 2, - "cellValues": {}, - "color": { - "exponent": 0.5, - "fill": "#5794F2", - "min": 0, - "mode": "scheme", - "reverse": false, - "scale": "exponential", - "scheme": "Spectral", - "steps": 128 - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 1e-9 - }, - "legend": { - "show": false - }, - "rowsFrame": { - "layout": "auto" - }, - "showValue": "never", - "tooltip": { - "mode": "single", - "showColorScale": false, - "yHistogram": false - }, - "yAxis": { - "axisPlacement": "left", - "decimals": 1, - "reverse": false, - "unit": "s" - } + "rowsFrame": { + "layout": "auto" }, - "pluginVersion": "10.4.2", - "reverseYBuckets": false, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum(increase(tgi_batch_inference_duration_bucket{method=\"decode\", container=\"$service\"}[5m])) by (le)", - "format": "heatmap", - "interval": "", - "legendFormat": "{{ le }}", - "range": true, - "refId": "A" - } - ], - "title": "Decode Latency", + "showValue": "never", "tooltip": { - "show": true, - "showHistogram": false - }, - "type": "heatmap", - "xAxis": { - "show": true + "mode": "single", + "showColorScale": false, + "yHistogram": false }, "yAxis": { + "axisPlacement": "left", "decimals": 1, - "format": "s", - "logBase": 1, - "show": true - }, - "yBucketBound": "auto" + "reverse": false, + "unit": "s" + } }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 72 - }, - "id": 43, - "panels": [], - "title": "Debug", - "type": "row" + "pluginVersion": "10.4.2", + "reverseYBuckets": false, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(increase(tgi_batch_inference_duration_bucket{method=\"decode\", container=\"$service\"}[5m])) by (le)", + "format": "heatmap", + "interval": "", + "legendFormat": "{{ le }}", + "range": true, + "refId": "A" + } + ], + "title": "Decode Latency", + "tooltip": { + "show": true, + "showHistogram": false }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" + "type": "heatmap", + "xAxis": { + "show": true + }, + "yAxis": { + "decimals": 1, + "format": "s", + "logBase": 1, + "show": true + }, + "yBucketBound": "auto" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 72 + }, + "id": 43, + "panels": [], + "title": "Debug", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" }, - "thresholdsStyle": { - "mode": "off" + { + "color": "red", + "value": 80 } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "s" + ] }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "p50" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "green", - "mode": "fixed" - } + "unit": "s" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "p50" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" } - ] - }, - { - "matcher": { - "id": "byName", - "options": "p90" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "orange", - "mode": "fixed" - } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "p90" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" } - ] - }, - { - "matcher": { - "id": "byName", - "options": "p99" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "red", - "mode": "fixed" - } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "p99" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" } - ] - } - ] - }, - "gridPos": { - "h": 11, - "w": 6, - "x": 0, - "y": 73 - }, - "id": 38, - "options": { - "legend": { - "calcs": [ - "min", - "max" - ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" + } + ] } + ] + }, + "gridPos": { + "h": 11, + "w": 6, + "x": 0, + "y": 73 + }, + "id": 38, + "options": { + "legend": { + "calcs": [ + "min", + "max" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.5, sum by (le) (rate(tgi_batch_forward_duration_bucket{method=\"decode\", container=\"$service\"}[10m])))", - "legendFormat": "p50", - "range": true, - "refId": "A" + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.9, sum by (le) (rate(tgi_batch_forward_duration_bucket{method=\"decode\", container=\"$service\"}[10m])))", - "hide": false, - "legendFormat": "p90", - "range": true, - "refId": "B" + "editorMode": "code", + "expr": "histogram_quantile(0.5, sum by (le) (rate(tgi_batch_forward_duration_bucket{method=\"decode\", container=\"$service\"}[10m])))", + "legendFormat": "p50", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by (le) (rate(tgi_batch_forward_duration_bucket{method=\"decode\", container=\"$service\"}[10m])))", - "hide": false, - "legendFormat": "p99", - "range": true, - "refId": "C" + "editorMode": "code", + "expr": "histogram_quantile(0.9, sum by (le) (rate(tgi_batch_forward_duration_bucket{method=\"decode\", container=\"$service\"}[10m])))", + "hide": false, + "legendFormat": "p90", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by (le) (rate(tgi_batch_forward_duration_bucket{method=\"decode\", container=\"$service\"}[10m])))", + "hide": false, + "legendFormat": "p99", + "range": true, + "refId": "C" + } + ], + "title": "Forward quantiles", + "type": "timeseries" + }, + { + "cards": {}, + "color": { + "cardColor": "#5794F2", + "colorScale": "linear", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "min": 0, + "mode": "opacity" + }, + "dataFormat": "tsbuckets", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } } - ], - "title": "Forward quantiles", - "type": "timeseries" + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 6, + "x": 6, + "y": 73 }, - { - "cards": {}, + "heatmap": {}, + "hideZeroBuckets": false, + "highlightCards": true, + "id": 35, + "legend": { + "show": false + }, + "maxDataPoints": 25, + "options": { + "calculate": false, + "calculation": {}, + "cellGap": 2, + "cellValues": {}, "color": { - "cardColor": "#5794F2", - "colorScale": "linear", - "colorScheme": "interpolateSpectral", "exponent": 0.5, + "fill": "#5794F2", "min": 0, - "mode": "opacity" + "mode": "scheme", + "reverse": false, + "scale": "exponential", + "scheme": "Spectral", + "steps": 128 }, - "dataFormat": "tsbuckets", - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "fieldConfig": { - "defaults": { - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": [] + "exemplars": { + "color": "rgba(255,0,255,0.7)" }, - "gridPos": { - "h": 11, - "w": 6, - "x": 6, - "y": 73 + "filterValues": { + "le": 1e-9 }, - "heatmap": {}, - "hideZeroBuckets": false, - "highlightCards": true, - "id": 35, "legend": { "show": false }, - "maxDataPoints": 25, - "options": { - "calculate": false, - "calculation": {}, - "cellGap": 2, - "cellValues": {}, - "color": { - "exponent": 0.5, - "fill": "#5794F2", - "min": 0, - "mode": "scheme", - "reverse": false, - "scale": "exponential", - "scheme": "Spectral", - "steps": 128 - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 1e-9 - }, - "legend": { - "show": false - }, - "rowsFrame": { - "layout": "auto" - }, - "showValue": "never", - "tooltip": { - "mode": "single", - "showColorScale": false, - "yHistogram": false - }, - "yAxis": { - "axisPlacement": "left", - "decimals": 1, - "reverse": false, - "unit": "s" - } + "rowsFrame": { + "layout": "auto" }, - "pluginVersion": "10.4.2", - "reverseYBuckets": false, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum(increase(tgi_batch_forward_duration_bucket{method=\"decode\", container=\"$service\"}[5m])) by (le)", - "format": "heatmap", - "interval": "", - "legendFormat": "{{ le }}", - "range": true, - "refId": "A" - } - ], - "title": "Forward Latency", + "showValue": "never", "tooltip": { - "show": true, - "showHistogram": false - }, - "type": "heatmap", - "xAxis": { - "show": true + "mode": "single", + "showColorScale": false, + "yHistogram": false }, "yAxis": { + "axisPlacement": "left", "decimals": 1, - "format": "s", - "logBase": 1, - "show": true - }, - "yBucketBound": "auto" + "reverse": false, + "unit": "s" + } }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" + "pluginVersion": "10.4.2", + "reverseYBuckets": false, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(increase(tgi_batch_forward_duration_bucket{method=\"decode\", container=\"$service\"}[5m])) by (le)", + "format": "heatmap", + "interval": "", + "legendFormat": "{{ le }}", + "range": true, + "refId": "A" + } + ], + "title": "Forward Latency", + "tooltip": { + "show": true, + "showHistogram": false + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "yAxis": { + "decimals": 1, + "format": "s", + "logBase": 1, + "show": true + }, + "yBucketBound": "auto" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" }, - "thresholdsStyle": { - "mode": "off" + { + "color": "red", + "value": 80 } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "s" + ] }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "p50" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "green", - "mode": "fixed" - } + "unit": "s" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "p50" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" } - ] - }, - { - "matcher": { - "id": "byName", - "options": "p90" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "orange", - "mode": "fixed" - } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "p90" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" } - ] - }, - { - "matcher": { - "id": "byName", - "options": "p99" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "red", - "mode": "fixed" - } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "p99" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" } - ] - } - ] - }, - "gridPos": { - "h": 11, - "w": 6, - "x": 12, - "y": 73 - }, - "id": 34, - "options": { - "legend": { - "calcs": [ - "min", - "max" - ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" + } + ] } + ] + }, + "gridPos": { + "h": 11, + "w": 6, + "x": 12, + "y": 73 + }, + "id": 34, + "options": { + "legend": { + "calcs": [ + "min", + "max" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.5, sum by (le) (rate(tgi_batch_decode_duration_bucket{method=\"decode\", container=\"$service\"}[10m])))", - "legendFormat": "p50", - "range": true, - "refId": "A" + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.9, sum by (le) (rate(tgi_batch_decode_duration_bucket{method=\"decode\", container=\"$service\"}[10m])))", - "hide": false, - "legendFormat": "p90", - "range": true, - "refId": "B" + "editorMode": "code", + "expr": "histogram_quantile(0.5, sum by (le) (rate(tgi_batch_decode_duration_bucket{method=\"decode\", container=\"$service\"}[10m])))", + "legendFormat": "p50", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by (le) (rate(tgi_batch_decode_duration_bucket{method=\"decode\", container=\"$service\"}[10m])))", - "hide": false, - "legendFormat": "p99", - "range": true, - "refId": "C" + "editorMode": "code", + "expr": "histogram_quantile(0.9, sum by (le) (rate(tgi_batch_decode_duration_bucket{method=\"decode\", container=\"$service\"}[10m])))", + "hide": false, + "legendFormat": "p90", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by (le) (rate(tgi_batch_decode_duration_bucket{method=\"decode\", container=\"$service\"}[10m])))", + "hide": false, + "legendFormat": "p99", + "range": true, + "refId": "C" + } + ], + "title": "Token Decode quantiles", + "type": "timeseries" + }, + { + "cards": {}, + "color": { + "cardColor": "#5794F2", + "colorScale": "linear", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "min": 0, + "mode": "opacity" + }, + "dataFormat": "tsbuckets", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } } - ], - "title": "Token Decode quantiles", - "type": "timeseries" + }, + "overrides": [] }, - { - "cards": {}, + "gridPos": { + "h": 11, + "w": 6, + "x": 18, + "y": 73 + }, + "heatmap": {}, + "hideZeroBuckets": false, + "highlightCards": true, + "id": 40, + "legend": { + "show": false + }, + "maxDataPoints": 25, + "options": { + "calculate": false, + "calculation": {}, + "cellGap": 2, + "cellValues": {}, "color": { - "cardColor": "#5794F2", - "colorScale": "linear", - "colorScheme": "interpolateSpectral", "exponent": 0.5, + "fill": "#5794F2", "min": 0, - "mode": "opacity" + "mode": "scheme", + "reverse": false, + "scale": "exponential", + "scheme": "Spectral", + "steps": 128 }, - "dataFormat": "tsbuckets", - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "fieldConfig": { - "defaults": { - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": [] + "exemplars": { + "color": "rgba(255,0,255,0.7)" }, - "gridPos": { - "h": 11, - "w": 6, - "x": 18, - "y": 73 + "filterValues": { + "le": 1e-9 }, - "heatmap": {}, - "hideZeroBuckets": false, - "highlightCards": true, - "id": 40, "legend": { "show": false }, - "maxDataPoints": 25, - "options": { - "calculate": false, - "calculation": {}, - "cellGap": 2, - "cellValues": {}, - "color": { - "exponent": 0.5, - "fill": "#5794F2", - "min": 0, - "mode": "scheme", - "reverse": false, - "scale": "exponential", - "scheme": "Spectral", - "steps": 128 - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 1e-9 - }, - "legend": { - "show": false - }, - "rowsFrame": { - "layout": "auto" - }, - "showValue": "never", - "tooltip": { - "mode": "single", - "showColorScale": false, - "yHistogram": false - }, - "yAxis": { - "axisPlacement": "left", - "decimals": 1, - "reverse": false, - "unit": "s" - } + "rowsFrame": { + "layout": "auto" }, - "pluginVersion": "10.4.2", - "reverseYBuckets": false, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum(increase(tgi_batch_decode_duration_bucket{method=\"decode\", container=\"$service\"}[5m])) by (le)", - "format": "heatmap", - "interval": "", - "legendFormat": "{{ le }}", - "range": true, - "refId": "A" - } - ], - "title": "Token Decode Latency", + "showValue": "never", "tooltip": { - "show": true, - "showHistogram": false - }, - "type": "heatmap", - "xAxis": { - "show": true + "mode": "single", + "showColorScale": false, + "yHistogram": false }, "yAxis": { + "axisPlacement": "left", "decimals": 1, - "format": "s", - "logBase": 1, - "show": true - }, - "yBucketBound": "auto" + "reverse": false, + "unit": "s" + } }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" + "pluginVersion": "10.4.2", + "reverseYBuckets": false, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(increase(tgi_batch_decode_duration_bucket{method=\"decode\", container=\"$service\"}[5m])) by (le)", + "format": "heatmap", + "interval": "", + "legendFormat": "{{ le }}", + "range": true, + "refId": "A" + } + ], + "title": "Token Decode Latency", + "tooltip": { + "show": true, + "showHistogram": false + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "yAxis": { + "decimals": 1, + "format": "s", + "logBase": 1, + "show": true + }, + "yBucketBound": "auto" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" }, - "thresholdsStyle": { - "mode": "off" + { + "color": "red", + "value": 80 } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "s" + ] }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "p50" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "green", - "mode": "fixed" - } + "unit": "s" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "p50" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" } - ] - }, - { - "matcher": { - "id": "byName", - "options": "p90" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "orange", - "mode": "fixed" - } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "p90" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" } - ] - }, - { - "matcher": { - "id": "byName", - "options": "p99" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "red", - "mode": "fixed" - } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "p99" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" } - ] - } - ] - }, - "gridPos": { - "h": 11, - "w": 6, - "x": 0, - "y": 84 - }, - "id": 42, - "options": { - "legend": { - "calcs": [ - "min", - "max" - ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" + } + ] } + ] + }, + "gridPos": { + "h": 11, + "w": 6, + "x": 0, + "y": 84 + }, + "id": 42, + "options": { + "legend": { + "calcs": [ + "min", + "max" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.5, sum by (le) (rate(tgi_batch_filter_duration_bucket{method=\"decode\", container=\"$service\"}[10m])))", - "legendFormat": "p50", - "range": true, - "refId": "A" + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.9, sum by (le) (rate(tgi_batch_filter_duration_bucket{method=\"decode\", container=\"$service\"}[10m])))", - "hide": false, - "legendFormat": "p90", - "range": true, - "refId": "B" + "editorMode": "code", + "expr": "histogram_quantile(0.5, sum by (le) (rate(tgi_batch_filter_duration_bucket{method=\"decode\", container=\"$service\"}[10m])))", + "legendFormat": "p50", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by (le) (rate(tgi_batch_filter_duration_bucket{method=\"decode\", container=\"$service\"}[10m])))", - "hide": false, - "legendFormat": "p99", - "range": true, - "refId": "C" + "editorMode": "code", + "expr": "histogram_quantile(0.9, sum by (le) (rate(tgi_batch_filter_duration_bucket{method=\"decode\", container=\"$service\"}[10m])))", + "hide": false, + "legendFormat": "p90", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by (le) (rate(tgi_batch_filter_duration_bucket{method=\"decode\", container=\"$service\"}[10m])))", + "hide": false, + "legendFormat": "p99", + "range": true, + "refId": "C" + } + ], + "title": "Filter Batch quantiles", + "type": "timeseries" + }, + { + "cards": {}, + "color": { + "cardColor": "#5794F2", + "colorScale": "linear", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "min": 0, + "mode": "opacity" + }, + "dataFormat": "tsbuckets", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } } - ], - "title": "Filter Batch quantiles", - "type": "timeseries" + }, + "overrides": [] }, - { - "cards": {}, + "gridPos": { + "h": 11, + "w": 6, + "x": 6, + "y": 84 + }, + "heatmap": {}, + "hideZeroBuckets": false, + "highlightCards": true, + "id": 39, + "legend": { + "show": false + }, + "maxDataPoints": 25, + "options": { + "calculate": false, + "calculation": {}, + "cellGap": 2, + "cellValues": {}, "color": { - "cardColor": "#5794F2", - "colorScale": "linear", - "colorScheme": "interpolateSpectral", "exponent": 0.5, + "fill": "#5794F2", "min": 0, - "mode": "opacity" + "mode": "scheme", + "reverse": false, + "scale": "exponential", + "scheme": "Spectral", + "steps": 128 }, - "dataFormat": "tsbuckets", - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "fieldConfig": { - "defaults": { - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": [] + "exemplars": { + "color": "rgba(255,0,255,0.7)" }, - "gridPos": { - "h": 11, - "w": 6, - "x": 6, - "y": 84 + "filterValues": { + "le": 1e-9 }, - "heatmap": {}, - "hideZeroBuckets": false, - "highlightCards": true, - "id": 39, "legend": { "show": false }, - "maxDataPoints": 25, - "options": { - "calculate": false, - "calculation": {}, - "cellGap": 2, - "cellValues": {}, - "color": { - "exponent": 0.5, - "fill": "#5794F2", - "min": 0, - "mode": "scheme", - "reverse": false, - "scale": "exponential", - "scheme": "Spectral", - "steps": 128 - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 1e-9 - }, - "legend": { - "show": false - }, - "rowsFrame": { - "layout": "auto" - }, - "showValue": "never", - "tooltip": { - "mode": "single", - "showColorScale": false, - "yHistogram": false - }, - "yAxis": { - "axisPlacement": "left", - "decimals": 1, - "reverse": false, - "unit": "s" - } + "rowsFrame": { + "layout": "auto" }, - "pluginVersion": "10.4.2", - "reverseYBuckets": false, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum(increase(tgi_batch_filter_duration_bucket{method=\"decode\", container=\"$service\"}[5m])) by (le)", - "format": "heatmap", - "interval": "", - "legendFormat": "{{ le }}", - "range": true, - "refId": "A" - } - ], - "title": "Filter Batch Latency", + "showValue": "never", "tooltip": { - "show": true, - "showHistogram": false - }, - "type": "heatmap", - "xAxis": { - "show": true + "mode": "single", + "showColorScale": false, + "yHistogram": false }, "yAxis": { + "axisPlacement": "left", "decimals": 1, - "format": "s", - "logBase": 1, - "show": true - }, - "yBucketBound": "auto" + "reverse": false, + "unit": "s" + } }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" + "pluginVersion": "10.4.2", + "reverseYBuckets": false, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(increase(tgi_batch_filter_duration_bucket{method=\"decode\", container=\"$service\"}[5m])) by (le)", + "format": "heatmap", + "interval": "", + "legendFormat": "{{ le }}", + "range": true, + "refId": "A" + } + ], + "title": "Filter Batch Latency", + "tooltip": { + "show": true, + "showHistogram": false + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "yAxis": { + "decimals": 1, + "format": "s", + "logBase": 1, + "show": true + }, + "yBucketBound": "auto" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" }, - "thresholdsStyle": { - "mode": "off" + { + "color": "red", + "value": 80 } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "s" + ] }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "p50" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "green", - "mode": "fixed" - } + "unit": "s" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "p50" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" } - ] - }, - { - "matcher": { - "id": "byName", - "options": "p90" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "orange", - "mode": "fixed" - } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "p90" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" } - ] - }, - { - "matcher": { - "id": "byName", - "options": "p99" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "red", - "mode": "fixed" - } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "p99" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" } - ] - } - ] - }, - "gridPos": { - "h": 11, - "w": 6, - "x": 12, - "y": 84 - }, - "id": 36, - "options": { - "legend": { - "calcs": [ - "min", - "max" - ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" + } + ] } + ] + }, + "gridPos": { + "h": 11, + "w": 6, + "x": 12, + "y": 84 + }, + "id": 36, + "options": { + "legend": { + "calcs": [ + "min", + "max" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.5, sum by (le) (rate(tgi_batch_concat_duration_bucket{method=\"decode\", container=\"$service\"}[10m])))", - "legendFormat": "p50", - "range": true, - "refId": "A" + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.9, sum by (le) (rate(tgi_batch_concat_duration_bucket{method=\"decode\", container=\"$service\"}[10m])))", - "hide": false, - "legendFormat": "p90", - "range": true, - "refId": "B" + "editorMode": "code", + "expr": "histogram_quantile(0.5, sum by (le) (rate(tgi_batch_concat_duration_bucket{method=\"decode\", container=\"$service\"}[10m])))", + "legendFormat": "p50", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by (le) (rate(tgi_batch_concat_duration_bucket{method=\"decode\", container=\"$service\"}[10m])))", - "hide": false, - "legendFormat": "p99", - "range": true, - "refId": "C" + "editorMode": "code", + "expr": "histogram_quantile(0.9, sum by (le) (rate(tgi_batch_concat_duration_bucket{method=\"decode\", container=\"$service\"}[10m])))", + "hide": false, + "legendFormat": "p90", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by (le) (rate(tgi_batch_concat_duration_bucket{method=\"decode\", container=\"$service\"}[10m])))", + "hide": false, + "legendFormat": "p99", + "range": true, + "refId": "C" + } + ], + "title": "Batch Concat quantiles", + "type": "timeseries" + }, + { + "cards": {}, + "color": { + "cardColor": "#5794F2", + "colorScale": "linear", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "min": 0, + "mode": "opacity" + }, + "dataFormat": "tsbuckets", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } } - ], - "title": "Batch Concat quantiles", - "type": "timeseries" + }, + "overrides": [] }, - { - "cards": {}, + "gridPos": { + "h": 11, + "w": 6, + "x": 18, + "y": 84 + }, + "heatmap": {}, + "hideZeroBuckets": false, + "highlightCards": true, + "id": 41, + "legend": { + "show": false + }, + "maxDataPoints": 25, + "options": { + "calculate": false, + "calculation": {}, + "cellGap": 2, + "cellValues": {}, "color": { - "cardColor": "#5794F2", - "colorScale": "linear", - "colorScheme": "interpolateSpectral", "exponent": 0.5, + "fill": "#5794F2", "min": 0, - "mode": "opacity" + "mode": "scheme", + "reverse": false, + "scale": "exponential", + "scheme": "Spectral", + "steps": 128 }, - "dataFormat": "tsbuckets", - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "fieldConfig": { - "defaults": { - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": [] + "exemplars": { + "color": "rgba(255,0,255,0.7)" }, - "gridPos": { - "h": 11, - "w": 6, - "x": 18, - "y": 84 + "filterValues": { + "le": 1e-9 }, - "heatmap": {}, - "hideZeroBuckets": false, - "highlightCards": true, - "id": 41, "legend": { "show": false }, - "maxDataPoints": 25, - "options": { - "calculate": false, - "calculation": {}, - "cellGap": 2, - "cellValues": {}, - "color": { - "exponent": 0.5, - "fill": "#5794F2", - "min": 0, - "mode": "scheme", - "reverse": false, - "scale": "exponential", - "scheme": "Spectral", - "steps": 128 - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 1e-9 - }, - "legend": { - "show": false - }, - "rowsFrame": { - "layout": "auto" - }, - "showValue": "never", - "tooltip": { - "mode": "single", - "showColorScale": false, - "yHistogram": false - }, - "yAxis": { - "axisPlacement": "left", - "decimals": 1, - "reverse": false, - "unit": "s" - } + "rowsFrame": { + "layout": "auto" }, - "pluginVersion": "10.4.2", - "reverseYBuckets": false, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum(increase(tgi_batch_concat_duration_bucket{method=\"decode\", container=\"$service\"}[5m])) by (le)", - "format": "heatmap", - "interval": "", - "legendFormat": "{{ le }}", - "range": true, - "refId": "A" - } - ], - "title": "Batch Concat latency", + "showValue": "never", "tooltip": { - "show": true, - "showHistogram": false - }, - "type": "heatmap", - "xAxis": { - "show": true + "mode": "single", + "showColorScale": false, + "yHistogram": false }, "yAxis": { + "axisPlacement": "left", "decimals": 1, - "format": "s", - "logBase": 1, - "show": true - }, - "yBucketBound": "auto" - } - ], - "refresh": "", - "schemaVersion": 39, - "tags": [], - "templating": { - "list": [ + "reverse": false, + "unit": "s" + } + }, + "pluginVersion": "10.4.2", + "reverseYBuckets": false, + "targets": [ { - "current": { - "selected": false, - "text": "gpu-txt-gen-cohereforai-c4ai-command-r-plu-ba7f1", - "value": "gpu-txt-gen-cohereforai-c4ai-command-r-plu-ba7f1" - }, "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" - }, - "definition": "label_values(tgi_request_count, container)", - "hide": 0, - "includeAll": false, - "multi": false, - "name": "service", - "options": [], - "query": { - "query": "label_values(tgi_request_count, container)", - "refId": "StandardVariableQuery" - }, - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 1, - "type": "query" + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(increase(tgi_batch_concat_duration_bucket{method=\"decode\", container=\"$service\"}[5m])) by (le)", + "format": "heatmap", + "interval": "", + "legendFormat": "{{ le }}", + "range": true, + "refId": "A" } - ] - }, - "time": { - "from": "now-30m", - "to": "now-30s" - }, - "timepicker": { - "nowDelay": "30s" - }, - "timezone": "", - "title": "Text Generation Inference", - "uid": "RHSk7EL4kdqsd", - "version": 12, - "weekStart": "" - } + ], + "title": "Batch Concat latency", + "tooltip": { + "show": true, + "showHistogram": false + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "yAxis": { + "decimals": 1, + "format": "s", + "logBase": 1, + "show": true + }, + "yBucketBound": "auto" + } + ], + "refresh": "", + "schemaVersion": 39, + "tags": [], + "templating": { + "list": [ + { + "current": { + "selected": true, + "text": "tgi", + "value": "tgi" + }, + "definition": "label_values(tgi_request_count,container)", + "hide": 0, + "includeAll": false, + "multi": false, + "name": "service", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(tgi_request_count,container)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + } + ] + }, + "time": { + "from": "now-30m", + "to": "now-30s" + }, + "timepicker": { + "nowDelay": "30s" + }, + "timezone": "", + "title": "Text Generation Inference", + "uid": "RHSk7EL4kdqsd", + "version": 1, + "weekStart": "" +} \ No newline at end of file