From fd06406afea22230de41c53fd71cf8922a882d5d Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Thu, 23 Nov 2023 15:36:33 +0100 Subject: [PATCH] feat: progress towards fixing some fundamental issues --- .../webconnectivitylte/analysishttpcore.go | 1 + .../webconnectivitylte/cleartextflow.go | 53 +++++++++---- .../webconnectivitylte/httpfailure.go | 28 +++++++ .../webconnectivitylte/secureflow.go | 76 ++++++++++++++----- .../experiment/webconnectivityqa/redirect.go | 4 +- 5 files changed, 125 insertions(+), 37 deletions(-) create mode 100644 internal/experiment/webconnectivitylte/httpfailure.go diff --git a/internal/experiment/webconnectivitylte/analysishttpcore.go b/internal/experiment/webconnectivitylte/analysishttpcore.go index aa7ea46f74..794bfbd871 100644 --- a/internal/experiment/webconnectivitylte/analysishttpcore.go +++ b/internal/experiment/webconnectivitylte/analysishttpcore.go @@ -58,6 +58,7 @@ func (tk *TestKeys) analysisHTTPToplevel(logger model.Logger) { switch *failure { case netxlite.FailureConnectionReset, netxlite.FailureGenericTimeoutError, + netxlite.FailureConnectionRefused, netxlite.FailureEOFError: tk.BlockingFlags |= analysisFlagHTTPBlocking logger.Warnf( diff --git a/internal/experiment/webconnectivitylte/cleartextflow.go b/internal/experiment/webconnectivitylte/cleartextflow.go index 55045ee527..55b4ef1f4b 100644 --- a/internal/experiment/webconnectivitylte/cleartextflow.go +++ b/internal/experiment/webconnectivitylte/cleartextflow.go @@ -8,6 +8,7 @@ package webconnectivitylte import ( "context" + "errors" "io" "net" "net/http" @@ -97,6 +98,18 @@ func (t *CleartextFlow) Run(parentCtx context.Context, index int64) error { return err } + httpReq, err := t.newHTTPRequest() + if err != nil { + if t.Referer == "" { + // when the referer is empty, the failing URL comes from our backend + // or from the user, so it's a fundamental failure. After that, we + // are dealing with websites provided URLs, so we should not flag a + // fundamental failure, because we want to see the measurement submitted. + t.TestKeys.SetFundamentalFailure(err) + } + return err + } + // create trace trace := measurexlite.NewTrace(index, t.ZeroTime) @@ -120,7 +133,15 @@ func (t *CleartextFlow) Run(parentCtx context.Context, index int64) error { tcpConn, err := tcpDialer.DialContext(tcpCtx, "tcp", t.Address) t.TestKeys.AppendTCPConnectResults(trace.TCPConnects()...) if err != nil { - // TODO(bassosimone): add failed HTTP request to the heap + // TODO(bassosimone): document why we're adding a request to the heap here + t.TestKeys.PrependRequests(newArchivalHTTPRequestResultWithError( + trace, + "tcp", + t.Address, + "", + httpReq, + err, + )) ol.Stop(err) return err } @@ -133,6 +154,15 @@ func (t *CleartextFlow) Run(parentCtx context.Context, index int64) error { // Determine whether we're allowed to fetch the webpage if t.PrioSelector == nil || !t.PrioSelector.permissionToFetch(t.Address) { + // TODO(bassosimone): document why we're adding a request to the heap here + t.TestKeys.PrependRequests(newArchivalHTTPRequestResultWithError( + trace, + "tcp", + t.Address, + "", + httpReq, + errors.New("http_request_canceled"), // TODO(bassosimone): define this error + )) ol.Stop("stop after TCP connect") return errNotPermittedToFetch } @@ -147,22 +177,13 @@ func (t *CleartextFlow) Run(parentCtx context.Context, index int64) error { netxlite.NewNullTLSDialer(), ) - // create HTTP request + // create HTTP context const httpTimeout = 10 * time.Second httpCtx, httpCancel := context.WithTimeout(parentCtx, httpTimeout) defer httpCancel() - httpReq, err := t.newHTTPRequest(httpCtx) - if err != nil { - if t.Referer == "" { - // when the referer is empty, the failing URL comes from our backend - // or from the user, so it's a fundamental failure. After that, we - // are dealing with websites provided URLs, so we should not flag a - // fundamental failure, because we want to see the measurement submitted. - t.TestKeys.SetFundamentalFailure(err) - } - ol.Stop(err) - return err - } + + // make sure the request uses the context + httpReq = httpReq.WithContext(httpCtx) // perform HTTP transaction httpResp, httpRespBody, err := t.httpTransaction( @@ -209,7 +230,7 @@ func (t *CleartextFlow) urlHost(scheme string) (string, error) { } // newHTTPRequest creates a new HTTP request. -func (t *CleartextFlow) newHTTPRequest(ctx context.Context) (*http.Request, error) { +func (t *CleartextFlow) newHTTPRequest() (*http.Request, error) { const urlScheme = "http" urlHost, err := t.urlHost(urlScheme) if err != nil { @@ -221,7 +242,7 @@ func (t *CleartextFlow) newHTTPRequest(ctx context.Context) (*http.Request, erro Path: t.URLPath, RawQuery: t.URLRawQuery, } - httpReq, err := http.NewRequestWithContext(ctx, "GET", httpURL.String(), nil) + httpReq, err := http.NewRequest("GET", httpURL.String(), nil) if err != nil { return nil, err } diff --git a/internal/experiment/webconnectivitylte/httpfailure.go b/internal/experiment/webconnectivitylte/httpfailure.go new file mode 100644 index 0000000000..4d0ef73cf4 --- /dev/null +++ b/internal/experiment/webconnectivitylte/httpfailure.go @@ -0,0 +1,28 @@ +package webconnectivitylte + +import ( + "net/http" + + "github.com/ooni/probe-cli/v3/internal/measurexlite" + "github.com/ooni/probe-cli/v3/internal/model" +) + +// TODO(bassosimone): document this func +func newArchivalHTTPRequestResultWithError(trace *measurexlite.Trace, network, address, alpn string, + req *http.Request, err error) *model.ArchivalHTTPRequestResult { + duration := trace.TimeSince(trace.ZeroTime()) + return measurexlite.NewArchivalHTTPRequestResult( + trace.Index(), + duration, + network, + address, + alpn, + network, // TODO(bassosimone): get rid of this duplicate field? + req, + nil, + 0, + nil, + err, + duration, + ) +} diff --git a/internal/experiment/webconnectivitylte/secureflow.go b/internal/experiment/webconnectivitylte/secureflow.go index 31da071bc6..0540ce0778 100644 --- a/internal/experiment/webconnectivitylte/secureflow.go +++ b/internal/experiment/webconnectivitylte/secureflow.go @@ -9,6 +9,7 @@ package webconnectivitylte import ( "context" "crypto/tls" + "errors" "io" "net" "net/http" @@ -107,6 +108,21 @@ func (t *SecureFlow) Run(parentCtx context.Context, index int64) error { return err } + // Create the request early such that we can immediately bail in case + // it's broken and such that we can insert it into the heap in case there + // is a failure when connecting or TLS handshaking. + httpReq, err := t.newHTTPRequest() + if err != nil { + if t.Referer == "" { + // when the referer is empty, the failing URL comes from our backend + // or from the user, so it's a fundamental failure. After that, we + // are dealing with websites provided URLs, so we should not flag a + // fundamental failure, because we want to see the measurement submitted. + t.TestKeys.SetFundamentalFailure(err) + } + return err + } + // TODO(bassosimone): we're missing high-level information about the // transaction, which implies we are missing failed HTTP requests // to compare to, which causes netemx integration tests such as this @@ -162,7 +178,17 @@ func (t *SecureFlow) Run(parentCtx context.Context, index int64) error { tcpConn, err := tcpDialer.DialContext(tcpCtx, "tcp", t.Address) t.TestKeys.AppendTCPConnectResults(trace.TCPConnects()...) if err != nil { - // TODO(bassosimone): add failed HTTP request to the heap + // TODO(bassosimone): document why we're adding a request to the heap here + if t.PrioSelector != nil { + t.TestKeys.PrependRequests(newArchivalHTTPRequestResultWithError( + trace, + "tcp", + t.Address, + "", + httpReq, + err, + )) + } ol.Stop(err) return err } @@ -193,7 +219,17 @@ func (t *SecureFlow) Run(parentCtx context.Context, index int64) error { tlsConn, err := tlsHandshaker.Handshake(tlsCtx, tcpConn, tlsConfig) t.TestKeys.AppendTLSHandshakes(trace.TLSHandshakes()...) if err != nil { - // TODO(bassosimone): add failed HTTP request to the heap + // TODO(bassosimone): document why we're adding a request to the heap here + if t.PrioSelector != nil { + t.TestKeys.PrependRequests(newArchivalHTTPRequestResultWithError( + trace, + "tcp", + t.Address, + "", + httpReq, + err, + )) + } ol.Stop(err) return err } @@ -204,7 +240,17 @@ func (t *SecureFlow) Run(parentCtx context.Context, index int64) error { // Determine whether we're allowed to fetch the webpage if t.PrioSelector == nil || !t.PrioSelector.permissionToFetch(t.Address) { - // TODO(bassosimone): add failed HTTP request to the heap + // TODO(bassosimone): document why we're adding a request to the heap here + if t.PrioSelector != nil { + t.TestKeys.PrependRequests(newArchivalHTTPRequestResultWithError( + trace, + "tcp", + t.Address, + "", + httpReq, + errors.New("http_request_canceled"), // TODO(bassosimone): define this error + )) + } ol.Stop("stop after TLS handshake") return errNotPermittedToFetch } @@ -233,22 +279,13 @@ func (t *SecureFlow) Run(parentCtx context.Context, index int64) error { // structure according to the archival data format and instead we have to // generate it on the fly, which comes with its own drawbacks. - // create HTTP request + // create HTTP context const httpTimeout = 10 * time.Second httpCtx, httpCancel := context.WithTimeout(parentCtx, httpTimeout) defer httpCancel() - httpReq, err := t.newHTTPRequest(httpCtx) - if err != nil { - if t.Referer == "" { - // when the referer is empty, the failing URL comes from our backend - // or from the user, so it's a fundamental failure. After that, we - // are dealing with websites provided URLs, so we should not flag a - // fundamental failure, because we want to see the measurement submitted. - t.TestKeys.SetFundamentalFailure(err) - } - ol.Stop(err) - return err - } + + // make sure the context owns the request + httpReq = httpReq.WithContext(httpCtx) // perform HTTP transaction httpResp, httpRespBody, err := t.httpTransaction( @@ -315,7 +352,7 @@ func (t *SecureFlow) urlHost(scheme string) (string, error) { } // newHTTPRequest creates a new HTTP request. -func (t *SecureFlow) newHTTPRequest(ctx context.Context) (*http.Request, error) { +func (t *SecureFlow) newHTTPRequest() (*http.Request, error) { const urlScheme = "https" urlHost, err := t.urlHost(urlScheme) if err != nil { @@ -327,7 +364,7 @@ func (t *SecureFlow) newHTTPRequest(ctx context.Context) (*http.Request, error) Path: t.URLPath, RawQuery: t.URLRawQuery, } - httpReq, err := http.NewRequestWithContext(ctx, "GET", httpURL.String(), nil) + httpReq, err := http.NewRequest("GET", httpURL.String(), nil) if err != nil { return nil, err } @@ -354,7 +391,8 @@ func (t *SecureFlow) httpTransaction(ctx context.Context, network, address, alpn // start at the beginning of the flow rather than here. If we start it at the // beginning this is nicer, but, at the same time, starting it at the beginning // of the flow means we're not collecting information about DNS. So, I am a - // bit torn about what is the best approach to follow here. + // bit torn about what is the best approach to follow here. Maybe it does not + // even matter to emit transaction_start/end events given that we have transaction ID. t.TestKeys.AppendNetworkEvents(measurexlite.NewAnnotationArchivalNetworkEvent( trace.Index(), started, "http_transaction_start", )) diff --git a/internal/experiment/webconnectivityqa/redirect.go b/internal/experiment/webconnectivityqa/redirect.go index 03b5656f47..199314cc69 100644 --- a/internal/experiment/webconnectivityqa/redirect.go +++ b/internal/experiment/webconnectivityqa/redirect.go @@ -11,7 +11,7 @@ import ( func redirectWithConsistentDNSAndThenConnectionRefusedForHTTP() *TestCase { return &TestCase{ Name: "redirectWithConsistentDNSAndThenConnectionRefusedForHTTP", - Flags: 0, // BUG: LTE thinks this website is accessible (WTF?!) + Flags: 0, Input: "https://bit.ly/32447", Configure: func(env *netemx.QAEnv) { @@ -37,7 +37,7 @@ func redirectWithConsistentDNSAndThenConnectionRefusedForHTTP() *TestCase { HTTPExperimentFailure: "connection_refused", XStatus: 8320, // StatusExperimentHTTP | StatusAnomalyConnect XDNSFlags: 0, - XBlockingFlags: 32, // analysisFlagSuccess + XBlockingFlags: 8, // analysisFlagHTTPBlocking Accessible: false, Blocking: "http-failure", },