Skip to content

Commit

Permalink
move proxy metrics to a separate server (#352)
Browse files Browse the repository at this point in the history
* move metrics to a new server

* remove metricController

* rename metrics service

* refactor test

* remove public variables, use struct for metrics

* lint error fix

* remove leftover comment

---------

Co-authored-by: Matous Jobanek <mjobanek@redhat.com>
Co-authored-by: Francisc Munteanu <fmuntean@redhat.com>
Co-authored-by: Alexey Kazakov <alkazako@redhat.com>
  • Loading branch information
4 people authored Oct 17, 2023
1 parent bb7cf9a commit 5640c6e
Show file tree
Hide file tree
Showing 11 changed files with 142 additions and 138 deletions.
2 changes: 1 addition & 1 deletion build/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,6 @@ COPY build/_output/bin/registration-service ${REG_SERVICE}

ENTRYPOINT ["/usr/local/bin/registration-service"]

EXPOSE 8080 8081
EXPOSE 8080 8081 8082

USER ${USER_UID}
9 changes: 6 additions & 3 deletions cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"context"
"fmt"
"github.com/codeready-toolchain/registration-service/pkg/metrics"
"github.com/prometheus/client_golang/prometheus"
"net/http"
"os"
"os/signal"
Expand Down Expand Up @@ -98,10 +99,12 @@ func main() {
}

// Register metrics
metrics.RegisterCustomMetrics()
proxyMetrics := metrics.NewProxyMetrics(prometheus.NewRegistry())
// Start metrics server
metricsSrv := proxyMetrics.StartMetricsServer()

// Start the proxy server
p, err := proxy.NewProxy(app)
p, err := proxy.NewProxy(app, proxyMetrics)
if err != nil {
panic(errs.Wrap(err, "failed to create proxy"))
}
Expand Down Expand Up @@ -141,7 +144,7 @@ func main() {
}
}()

gracefulShutdown(configuration.GracefulTimeout, srv.HTTPServer(), proxySrv)
gracefulShutdown(configuration.GracefulTimeout, srv.HTTPServer(), proxySrv, metricsSrv)
}

func gracefulShutdown(timeout time.Duration, hs ...*http.Server) {
Expand Down
19 changes: 19 additions & 0 deletions deploy/registration-service.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ objects:
ports:
- containerPort: 8080
- containerPort: 8081
- containerPort: 8082
command:
- registration-service
imagePullPolicy: IfNotPresent
Expand Down Expand Up @@ -214,6 +215,24 @@ objects:
tls:
termination: edge
wildcardPolicy: None
- kind: Service
apiVersion: v1
metadata:
name: proxy-metrics-service
namespace: ${NAMESPACE}
labels:
provider: codeready-toolchain
run: registration-service
spec:
ports:
- name: "8082"
protocol: TCP
port: 80
targetPort: 8082
selector:
run: registration-service
type: ClusterIP
sessionAffinity: null
parameters:
- name: NAMESPACE
value: 'toolchain-host-operator'
Expand Down
67 changes: 35 additions & 32 deletions pkg/metrics/metrics.go
Original file line number Diff line number Diff line change
@@ -1,48 +1,45 @@
package metrics

import (
"errors"
"github.com/labstack/echo/v4"
glog "github.com/labstack/gommon/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
"net/http"
logf "sigs.k8s.io/controller-runtime/pkg/log"
)

var log = logf.Log.WithName("registration_metrics")
var Reg *prometheus.Registry

const (
MetricLabelRejected = "Rejected"
MetricsLabelVerbGet = "Get"
MetricsLabelVerbList = "List"
MetricsPort = "8082"
)

// histogram with labels
var (
type ProxyMetrics struct {
// RegServProxyAPIHistogramVec measures the time taken by proxy before forwarding the request
RegServProxyAPIHistogramVec *prometheus.HistogramVec
// RegServWorkspaceHistogramVec measures the response time for either response or error from proxy when there is no routing
RegServWorkspaceHistogramVec *prometheus.HistogramVec
)

// collections
var (
allHistogramVecs = []*prometheus.HistogramVec{}
)
Reg *prometheus.Registry
}

const metricsPrefix = "sandbox_"

func init() {
initMetrics()
}
func initMetrics() {
log.Info("initializing custom metrics")
RegServProxyAPIHistogramVec = newHistogramVec("proxy_api_http_request_time", "time taken by proxy to route to a target cluster", "status_code", "route_to")
RegServWorkspaceHistogramVec = newHistogramVec("proxy_workspace_http_request_time", "time for response of a request to proxy ", "status_code", "kube_verb")
log.Info("custom metrics initialized")
}

// Reset resets all metrics. For testing purpose only!
func Reset() {
log.Info("resetting custom metrics")
initMetrics()
func NewProxyMetrics(reg *prometheus.Registry) *ProxyMetrics {
regServProxyAPIHistogramVec := newHistogramVec("proxy_api_http_request_time", "time taken by proxy to route to a target cluster", "status_code", "route_to")
regServWorkspaceHistogramVec := newHistogramVec("proxy_workspace_http_request_time", "time for response of a request to proxy ", "status_code", "kube_verb")
metrics := &ProxyMetrics{
RegServWorkspaceHistogramVec: regServWorkspaceHistogramVec,
RegServProxyAPIHistogramVec: regServProxyAPIHistogramVec,
Reg: reg,
}
metrics.Reg.MustRegister(metrics.RegServProxyAPIHistogramVec)
metrics.Reg.MustRegister(metrics.RegServWorkspaceHistogramVec)
return metrics
}

func newHistogramVec(name, help string, labels ...string) *prometheus.HistogramVec {
Expand All @@ -51,16 +48,22 @@ func newHistogramVec(name, help string, labels ...string) *prometheus.HistogramV
Help: help,
Buckets: []float64{0.05, 0.1, 0.25, 0.5, 1, 5, 10},
}, labels)
allHistogramVecs = append(allHistogramVecs, v)
return v
}

// RegisterCustomMetrics registers the custom metrics
func RegisterCustomMetrics() {
Reg = prometheus.NewRegistry()
// register metrics
for _, v := range allHistogramVecs {
Reg.MustRegister(v)
}
log.Info("custom metrics registered")
func (p *ProxyMetrics) StartMetricsServer() *http.Server {
// start server
srv := echo.New()
srv.Logger.SetLevel(glog.INFO)
srv.GET("/metrics", echo.WrapHandler(promhttp.HandlerFor(p.Reg, promhttp.HandlerOpts{DisableCompression: true, Registry: p.Reg})))

log.Info("Starting the Registration-Service Metrics server...")
// listen concurrently to allow for graceful shutdown
go func() {
if err := srv.Start(":" + MetricsPort); !errors.Is(err, http.ErrServerClosed) {
log.Error(err, err.Error())
}
}()

return srv.Server
}
45 changes: 35 additions & 10 deletions pkg/metrics/metrics_test.go
Original file line number Diff line number Diff line change
@@ -1,20 +1,24 @@
package metrics

import (
"bytes"
"github.com/prometheus/client_golang/prometheus"
promtestutil "github.com/prometheus/client_golang/prometheus/testutil"
clientmodel "github.com/prometheus/client_model/go"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"net/http"
"strings"
"testing"
"time"
)

func TestHistogramVec(t *testing.T) {
// given
reg := prometheus.NewRegistry()
m := newHistogramVec("test_histogram_vec", "test histogram description", "status_code", "kube_verb")
getSuccess, getFailure, listSuccess, listFailure := getExpectedLabelPairs()
RegisterCustomMetrics()
reg.MustRegister(m)

// when
m.WithLabelValues("200", "get").Observe((5 * time.Second).Seconds())
Expand All @@ -28,7 +32,7 @@ func TestHistogramVec(t *testing.T) {
err := promtestutil.CollectAndCompare(m, strings.NewReader(expectedResponseMetadata+expectedResponse), "sandbox_test_histogram_vec")
require.NoError(t, err)

g, er := Reg.Gather()
g, er := reg.Gather()
require.NoError(t, er)
require.Equal(t, 1, len(g))
require.Equal(t, "sandbox_test_histogram_vec", g[0].GetName())
Expand All @@ -51,17 +55,33 @@ func TestHistogramVec(t *testing.T) {
require.Equal(t, 2, len(g[0].Metric[3].Label))
compareLabelPairValues(t, listFailure, g[0].GetMetric()[3].GetLabel())
require.Equal(t, uint64(2), *g[0].Metric[3].Histogram.SampleCount)

}

func TestRegisterCustomMetrics(t *testing.T) {
// when
RegisterCustomMetrics()
func TestMetricsServer(t *testing.T) {
reg := prometheus.NewRegistry()
testMetrics := NewProxyMetrics(reg)
server := testMetrics.StartMetricsServer()
require.NotNil(t, server)
defer func() {
_ = server.Close()
}()

req, err := http.NewRequest("GET", "http://localhost:8082/metrics", nil)
require.NoError(t, err)
require.NotNil(t, req)
resp, err := http.DefaultClient.Do(req)
require.NoError(t, err)
require.NotNil(t, resp)
require.Equal(t, http.StatusOK, resp.StatusCode)
require.Equal(t, "text/plain; version=0.0.4; charset=utf-8", resp.Header.Get("Content-Type"))
// compare the body of the response as well
defer resp.Body.Close()
buf := new(bytes.Buffer)
_, err = buf.ReadFrom(resp.Body)
require.NoError(t, err)
assert.Equal(t, expectedServerBlankResponse, buf.String())

// then
// verify all metrics were registered successfully
for _, m := range allHistogramVecs {
assert.True(t, Reg.Unregister(m))
}
}

var expectedResponseMetadata = `
Expand Down Expand Up @@ -109,6 +129,11 @@ var expectedResponse = `
sandbox_test_histogram_vec_sum{kube_verb="list",status_code="500"} 3.001
sandbox_test_histogram_vec_count{kube_verb="list",status_code="500"} 2
`
var expectedServerBlankResponse = `# HELP promhttp_metric_handler_errors_total Total number of internal errors encountered by the promhttp metric handler.
# TYPE promhttp_metric_handler_errors_total counter
promhttp_metric_handler_errors_total{cause="encoding"} 0
promhttp_metric_handler_errors_total{cause="gathering"} 0
`

func compareLabelPairValues(t *testing.T, expected []clientmodel.LabelPair, labelPairs []*clientmodel.LabelPair) {
for i := range labelPairs {
Expand Down
20 changes: 0 additions & 20 deletions pkg/proxy/handlers/metrics.go

This file was deleted.

37 changes: 0 additions & 37 deletions pkg/proxy/handlers/metrics_test.go

This file was deleted.

14 changes: 8 additions & 6 deletions pkg/proxy/handlers/spacelister.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,14 @@ const (
type SpaceLister struct {
GetSignupFunc func(ctx *gin.Context, userID, username string, checkUserSignupCompleted bool) (*signup.Signup, error)
GetInformerServiceFunc func() service.InformerService
ProxyMetrics *metrics.ProxyMetrics
}

func NewSpaceLister(app application.Application) *SpaceLister {
func NewSpaceLister(app application.Application, proxyMetrics *metrics.ProxyMetrics) *SpaceLister {
return &SpaceLister{
GetSignupFunc: app.SignupService().GetSignupFromInformer,
GetInformerServiceFunc: app.InformerService,
ProxyMetrics: proxyMetrics,
}
}

Expand All @@ -54,10 +56,10 @@ func (s *SpaceLister) HandleSpaceListRequest(ctx echo.Context) error {
requestReceivedTime := ctx.Get(context.RequestReceivedTime).(time.Time)
workspaces, err := s.ListUserWorkspaces(ctx)
if err != nil {
metrics.RegServWorkspaceHistogramVec.WithLabelValues(fmt.Sprintf("%d", http.StatusInternalServerError), metrics.MetricsLabelVerbList).Observe(time.Since(requestReceivedTime).Seconds()) // using list as the default value for verb to minimize label combinations for prometheus to process
s.ProxyMetrics.RegServWorkspaceHistogramVec.WithLabelValues(fmt.Sprintf("%d", http.StatusInternalServerError), metrics.MetricsLabelVerbList).Observe(time.Since(requestReceivedTime).Seconds()) // using list as the default value for verb to minimize label combinations for prometheus to process
return errorResponse(ctx, apierrors.NewInternalError(err))
}
metrics.RegServWorkspaceHistogramVec.WithLabelValues(fmt.Sprintf("%d", http.StatusOK), metrics.MetricsLabelVerbList).Observe(time.Since(requestReceivedTime).Seconds())
s.ProxyMetrics.RegServWorkspaceHistogramVec.WithLabelValues(fmt.Sprintf("%d", http.StatusOK), metrics.MetricsLabelVerbList).Observe(time.Since(requestReceivedTime).Seconds())
return listWorkspaceResponse(ctx, workspaces)
}

Expand All @@ -66,16 +68,16 @@ func (s *SpaceLister) HandleSpaceGetRequest(ctx echo.Context) error {
requestReceivedTime := ctx.Get(context.RequestReceivedTime).(time.Time)
workspace, err := s.GetUserWorkspace(ctx)
if err != nil {
metrics.RegServWorkspaceHistogramVec.WithLabelValues(fmt.Sprintf("%d", http.StatusInternalServerError), metrics.MetricsLabelVerbGet).Observe(time.Since(requestReceivedTime).Seconds()) // using list as the default value for verb to minimize label combinations for prometheus to process
s.ProxyMetrics.RegServWorkspaceHistogramVec.WithLabelValues(fmt.Sprintf("%d", http.StatusInternalServerError), metrics.MetricsLabelVerbGet).Observe(time.Since(requestReceivedTime).Seconds()) // using list as the default value for verb to minimize label combinations for prometheus to process
return errorResponse(ctx, apierrors.NewInternalError(err))
}
if workspace == nil {
// not found
metrics.RegServWorkspaceHistogramVec.WithLabelValues(fmt.Sprintf("%d", http.StatusNotFound), metrics.MetricsLabelVerbGet).Observe(time.Since(requestReceivedTime).Seconds())
s.ProxyMetrics.RegServWorkspaceHistogramVec.WithLabelValues(fmt.Sprintf("%d", http.StatusNotFound), metrics.MetricsLabelVerbGet).Observe(time.Since(requestReceivedTime).Seconds())
r := schema.GroupResource{Group: "toolchain.dev.openshift.com", Resource: "workspaces"}
return errorResponse(ctx, apierrors.NewNotFound(r, ctx.Param("workspace")))
}
metrics.RegServWorkspaceHistogramVec.WithLabelValues(fmt.Sprintf("%d", http.StatusOK), metrics.MetricsLabelVerbGet).Observe(time.Since(requestReceivedTime).Seconds())
s.ProxyMetrics.RegServWorkspaceHistogramVec.WithLabelValues(fmt.Sprintf("%d", http.StatusOK), metrics.MetricsLabelVerbGet).Observe(time.Since(requestReceivedTime).Seconds())
return getWorkspaceResponse(ctx, workspace)
}

Expand Down
Loading

0 comments on commit 5640c6e

Please sign in to comment.