Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add http client metrics #543

Merged
merged 1 commit into from
Oct 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .changelog/240018a27d404933a6038db3cf0aa991.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"id": "240018a2-7d40-4933-a603-8db3cf0aa991",
"type": "feature",
"description": "Add HTTP client metrics.",
"modules": [
"."
]
}
Original file line number Diff line number Diff line change
Expand Up @@ -394,7 +394,9 @@ private GoWriter.Writable generateInvokeOperation() {
defer endTimer()
defer span.End()

handler := $newClientHandler:T(options.HTTPClient)
handler := $newClientHandler:T(options.HTTPClient, func(o *smithyhttp.ClientHandler) {
o.Meter = options.MeterProvider.Meter($scope:S)
})
decorated := middleware.DecorateHandler(handler, stack)
result, metadata, err = decorated.Handle(ctx, params)
if err != nil {
Expand Down Expand Up @@ -442,7 +444,8 @@ defer endTimer()
ConfigFieldResolver.Target.FINALIZATION
).map(this::generateConfigFieldResolver).toList()
).compose(),
"newClientHandler", SmithyGoDependency.SMITHY_HTTP_TRANSPORT.func("NewClientHandler")
"newClientHandler", SmithyGoDependency.SMITHY_HTTP_TRANSPORT.func("NewClientHandlerWithOptions"),
"scope", settings.getModuleName()
));
}

Expand Down
27 changes: 25 additions & 2 deletions transport/http/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"net/http"

smithy "github.com/aws/smithy-go"
"github.com/aws/smithy-go/metrics"
"github.com/aws/smithy-go/middleware"
"github.com/aws/smithy-go/tracing"
)
Expand All @@ -28,13 +29,30 @@ func (fn ClientDoFunc) Do(r *http.Request) (*http.Response, error) {
// implementation is http.Client.
type ClientHandler struct {
client ClientDo

Meter metrics.Meter // For HTTP client metrics.
}

// NewClientHandler returns an initialized middleware handler for the client.
//
// Deprecated: Use [NewClientHandlerWithOptions].
func NewClientHandler(client ClientDo) ClientHandler {
return ClientHandler{
return NewClientHandlerWithOptions(client)
}

// NewClientHandlerWithOptions returns an initialized middleware handler for the client
// with applied options.
func NewClientHandlerWithOptions(client ClientDo, opts ...func(*ClientHandler)) ClientHandler {
h := ClientHandler{
client: client,
}
for _, opt := range opts {
opt(&h)
}
if h.Meter == nil {
h.Meter = metrics.NopMeterProvider{}.Meter("")
}
return h
}

// Handle implements the middleware Handler interface, that will invoke the
Expand All @@ -46,6 +64,11 @@ func (c ClientHandler) Handle(ctx context.Context, input interface{}) (
ctx, span := tracing.StartSpan(ctx, "DoHTTPRequest")
defer span.End()

ctx, client, err := withMetrics(ctx, c.client, c.Meter)
if err != nil {
return nil, metadata, fmt.Errorf("instrument with HTTP metrics: %w", err)
}

req, ok := input.(*Request)
if !ok {
return nil, metadata, fmt.Errorf("expect Smithy http.Request value as input, got unsupported type %T", input)
Expand All @@ -66,7 +89,7 @@ func (c ClientHandler) Handle(ctx context.Context, input interface{}) (
span.SetProperty("http.request_content_length", length)
}

resp, err := c.client.Do(builtRequest)
resp, err := client.Do(builtRequest)
if resp == nil {
// Ensure a http response value is always present to prevent unexpected
// panics.
Expand Down
184 changes: 184 additions & 0 deletions transport/http/metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
package http

import (
"context"
"crypto/tls"
"net/http"
"net/http/httptrace"
"time"

"github.com/aws/smithy-go/metrics"
)

var now = time.Now

// withMetrics instruments an HTTP client and context to collect HTTP metrics.
func withMetrics(parent context.Context, client ClientDo, meter metrics.Meter) (
context.Context, ClientDo, error,
) {
hm, err := newHTTPMetrics(meter)
if err != nil {
return nil, nil, err
}

ctx := httptrace.WithClientTrace(parent, &httptrace.ClientTrace{
DNSStart: hm.DNSStart,
ConnectStart: hm.ConnectStart,
TLSHandshakeStart: hm.TLSHandshakeStart,

GotConn: hm.GotConn(parent),
PutIdleConn: hm.PutIdleConn(parent),
ConnectDone: hm.ConnectDone(parent),
DNSDone: hm.DNSDone(parent),
TLSHandshakeDone: hm.TLSHandshakeDone(parent),
GotFirstResponseByte: hm.GotFirstResponseByte(parent),
})
return ctx, &timedClientDo{client, hm}, nil
}

type timedClientDo struct {
ClientDo
hm *httpMetrics
}

func (c *timedClientDo) Do(r *http.Request) (*http.Response, error) {
c.hm.doStart = now()
resp, err := c.ClientDo.Do(r)

c.hm.DoRequestDuration.Record(r.Context(), elapsed(c.hm.doStart))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How do we keep track of which request is the one that just ended?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure I follow - this is all happening inside Do(), which is one request?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Got it, then disregard the comment

return resp, err
}

type httpMetrics struct {
DNSLookupDuration metrics.Float64Histogram // client.http.connections.dns_lookup_duration
ConnectDuration metrics.Float64Histogram // client.http.connections.acquire_duration
TLSHandshakeDuration metrics.Float64Histogram // client.http.connections.tls_handshake_duration
ConnectionUsage metrics.Int64UpDownCounter // client.http.connections.usage

DoRequestDuration metrics.Float64Histogram // client.http.do_request_duration
TimeToFirstByte metrics.Float64Histogram // client.http.time_to_first_byte

doStart time.Time
dnsStart time.Time
connectStart time.Time
tlsStart time.Time
}

func newHTTPMetrics(meter metrics.Meter) (*httpMetrics, error) {
hm := &httpMetrics{}

var err error
hm.DNSLookupDuration, err = meter.Float64Histogram("client.http.connections.dns_lookup_duration", func(o *metrics.InstrumentOptions) {
o.UnitLabel = "s"
o.Description = "The time it takes a request to perform DNS lookup."
})
if err != nil {
return nil, err
}
hm.ConnectDuration, err = meter.Float64Histogram("client.http.connections.acquire_duration", func(o *metrics.InstrumentOptions) {
o.UnitLabel = "s"
o.Description = "The time it takes a request to acquire a connection."
})
if err != nil {
return nil, err
}
hm.TLSHandshakeDuration, err = meter.Float64Histogram("client.http.connections.tls_handshake_duration", func(o *metrics.InstrumentOptions) {
o.UnitLabel = "s"
o.Description = "The time it takes an HTTP request to perform the TLS handshake."
})
if err != nil {
return nil, err
}
hm.ConnectionUsage, err = meter.Int64UpDownCounter("client.http.connections.usage", func(o *metrics.InstrumentOptions) {
o.UnitLabel = "{connection}"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: can we name this connections?

also, probably outside the scope of this PR but this would be nice if it had been named client.http.connections.total or something like that, usage doesn't make it clear what kind of usage we're talking about (bandwidth, idle or active connections)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This comes from the SRA list of suggested metrics (which I copied in here) - I would rather not deviate where naming has been established.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, that's what I was hinting with "outside the scope of this PR". I didn't know the unit was also described there, my bad.

o.Description = "Current state of connections pool."
})
if err != nil {
return nil, err
}
hm.DoRequestDuration, err = meter.Float64Histogram("client.http.do_request_duration", func(o *metrics.InstrumentOptions) {
o.UnitLabel = "s"
o.Description = "Time spent performing an entire HTTP transaction."
})
if err != nil {
return nil, err
}
hm.TimeToFirstByte, err = meter.Float64Histogram("client.http.time_to_first_byte", func(o *metrics.InstrumentOptions) {
o.UnitLabel = "s"
o.Description = "Time from start of transaction to when the first response byte is available."
})
if err != nil {
return nil, err
}

return hm, nil
}

func (m *httpMetrics) DNSStart(httptrace.DNSStartInfo) {
m.dnsStart = now()
}

func (m *httpMetrics) ConnectStart(string, string) {
m.connectStart = now()
}

func (m *httpMetrics) TLSHandshakeStart() {
m.tlsStart = now()
}

func (m *httpMetrics) GotConn(ctx context.Context) func(httptrace.GotConnInfo) {
return func(httptrace.GotConnInfo) {
m.addConnAcquired(ctx, 1)
}
}

func (m *httpMetrics) PutIdleConn(ctx context.Context) func(error) {
return func(error) {
m.addConnAcquired(ctx, -1)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we count a connection set as idle as "not acquired"? I'm curious how this plays with keep-alive connections

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The SRA defined this as having one dimension: state = idle | acquired - acquired meaning "in use right now" and idle meaning "not in use" - might have been better to say be active | acquired but again this comes from external recommended naming.

}
}

func (m *httpMetrics) DNSDone(ctx context.Context) func(httptrace.DNSDoneInfo) {
return func(httptrace.DNSDoneInfo) {
m.DNSLookupDuration.Record(ctx, elapsed(m.dnsStart))
}
}

func (m *httpMetrics) ConnectDone(ctx context.Context) func(string, string, error) {
return func(string, string, error) {
m.ConnectDuration.Record(ctx, elapsed(m.connectStart))
}
}

func (m *httpMetrics) TLSHandshakeDone(ctx context.Context) func(tls.ConnectionState, error) {
return func(tls.ConnectionState, error) {
m.TLSHandshakeDuration.Record(ctx, elapsed(m.tlsStart))
}
}

func (m *httpMetrics) GotFirstResponseByte(ctx context.Context) func() {
return func() {
m.TimeToFirstByte.Record(ctx, elapsed(m.doStart))
}
}

func (m *httpMetrics) addConnAcquired(ctx context.Context, incr int64) {
m.ConnectionUsage.Add(ctx, incr, func(o *metrics.RecordMetricOptions) {
o.Properties.Set("state", "acquired")
})
}

// Not used: it is recommended to track acquired vs idle conn, but we can't
// determine when something is truly idle with the current HTTP client hooks
// available to us.
func (m *httpMetrics) addConnIdle(ctx context.Context, incr int64) {
m.ConnectionUsage.Add(ctx, incr, func(o *metrics.RecordMetricOptions) {
o.Properties.Set("state", "idle")
})
}

func elapsed(start time.Time) float64 {
end := now()
elapsed := end.Sub(start)
return float64(elapsed) / 1e9
}
Loading
Loading