From dc9bf8306dde43ea021e9a173196b9bd2272c6c3 Mon Sep 17 00:00:00 2001 From: Renato Nascimento Date: Fri, 1 Oct 2021 17:48:43 -0300 Subject: [PATCH] Reachability test (#93) * Add Reachability package * Allow healtcheck route to notify reachability test * Switch serve to be non-blocking * Perform reachability test in weep serve * Fix: revert accidental package removal --- pkg/reachability/channels.go | 41 ++++++++++++++++++++++++++++++++ pkg/reachability/request.go | 37 ++++++++++++++++++++++++++++ pkg/server/healthcheckHandler.go | 8 +++++++ pkg/server/server.go | 40 +++++++++++++++++++++---------- 4 files changed, 113 insertions(+), 13 deletions(-) create mode 100644 pkg/reachability/channels.go create mode 100644 pkg/reachability/request.go diff --git a/pkg/reachability/channels.go b/pkg/reachability/channels.go new file mode 100644 index 0000000..6793a1b --- /dev/null +++ b/pkg/reachability/channels.go @@ -0,0 +1,41 @@ +package reachability + +import ( + "time" +) + +const maxWaitTimeSeconds = 3 + +var c chan struct{} + +func init() { + c = make(chan struct{}) +} + +// Notify will signal the reachability package that some reachability test +// were received by this Weep instance +func Notify() { + // Only sends when there is already some receiver waiting. Never blocks. + select { + case c <- struct{}{}: + default: + } +} + +func wait() bool { + timeout := make(chan struct{}) + go func() { + time.Sleep(maxWaitTimeSeconds * time.Second) + timeout <- struct{}{} + }() + + select { + case <-c: + // Received a rechability test + return true + case <-timeout: + // Timed out, move on + } + + return false +} diff --git a/pkg/reachability/request.go b/pkg/reachability/request.go new file mode 100644 index 0000000..d89ff94 --- /dev/null +++ b/pkg/reachability/request.go @@ -0,0 +1,37 @@ +package reachability + +import ( + "net/http" + "os" + + "github.com/netflix/weep/pkg/logging" +) + +// TestReachability sends a GET request to the address IMDS is expected to run, while checks +// whether this test was received by this same Weep instance, otherwise logs a warning +func TestReachability() { + go func() { + logging.Log.Debug("Doing a healthcheck request on 169.254.169.254") + resp, err := http.Get("http://169.254.169.254/healthcheck?reachability=1") + + // A response can be successful but have being served by another process on the + // IMDS port/an actual IMDS. So we prefer relying on the reachability signal (which + // means this same process received a reachability test). + + if err != nil { + logging.Log.WithField("err", err).Debug("Received an error from healthcheck route") + } else { + logging.Log.WithField("status", resp.StatusCode).Debug("Received a response from healthcheck route") + } + }() + + received := wait() + if received { + logging.Log.Info("Reachability test was successful") + } else { + logging.Log.Warningf( + "Reachability test was unsuccessful. Looks like we aren't being served in 169.254.169.254. Did you `%s setup`?", + os.Args[0], + ) + } +} diff --git a/pkg/server/healthcheckHandler.go b/pkg/server/healthcheckHandler.go index 13c3cf6..f5011e3 100644 --- a/pkg/server/healthcheckHandler.go +++ b/pkg/server/healthcheckHandler.go @@ -3,8 +3,10 @@ package server import ( "encoding/json" "net/http" + "strconv" "github.com/netflix/weep/pkg/logging" + "github.com/netflix/weep/pkg/reachability" "github.com/netflix/weep/pkg/health" ) @@ -22,6 +24,12 @@ func HealthcheckHandler(w http.ResponseWriter, r *http.Request) { } else { status = http.StatusInternalServerError } + + reachabilityFlag := r.URL.Query().Get("reachability") + if b, err := strconv.ParseBool(reachabilityFlag); err == nil && b { + reachability.Notify() + } + resp := healthcheckResponse{ Status: status, Message: reason, diff --git a/pkg/server/server.go b/pkg/server/server.go index 2f625f8..37babee 100644 --- a/pkg/server/server.go +++ b/pkg/server/server.go @@ -7,10 +7,10 @@ import ( "os" "time" - "github.com/netflix/weep/pkg/logging" - "github.com/netflix/weep/pkg/cache" "github.com/netflix/weep/pkg/creds" + "github.com/netflix/weep/pkg/logging" + "github.com/netflix/weep/pkg/reachability" "github.com/gorilla/mux" ) @@ -27,7 +27,9 @@ func Run(host string, port int, role, region string, shutdown chan os.Signal) er router := mux.NewRouter() router.HandleFunc("/healthcheck", HealthcheckHandler) - if role != "" { + isServingIMDS := role != "" + + if isServingIMDS { logging.Log.Infof("Configuring weep IMDS service for role %s", role) client, err := creds.GetClient(region) if err != nil { @@ -56,21 +58,33 @@ func Run(host string, port int, role, region string, shutdown chan os.Signal) er router.HandleFunc("/ecs/{role:.*}", TaskMetadataMiddleware(getCredentialHandler(region))) router.HandleFunc("/{path:.*}", TaskMetadataMiddleware(NotFoundHandler)) + logging.Log.Info("starting weep on ", listenAddr) + srv := &http.Server{ + ReadTimeout: 1 * time.Second, + WriteTimeout: 10 * time.Second, + IdleTimeout: 30 * time.Second, + ReadHeaderTimeout: 2 * time.Second, + Handler: router, + } + + ln, err := net.Listen("tcp", listenAddr) + if err != nil { + logging.Log.Fatalf("listen failed: %v", err) + } + go func() { - logging.Log.Info("starting weep on ", listenAddr) - srv := &http.Server{ - ReadTimeout: 1 * time.Second, - WriteTimeout: 10 * time.Second, - IdleTimeout: 30 * time.Second, - ReadHeaderTimeout: 2 * time.Second, - Addr: listenAddr, - Handler: router, - } - if err := srv.ListenAndServe(); err != nil { + if err := srv.Serve(ln); err != nil { logging.Log.Fatalf("server failed: %v", err) } }() + if isServingIMDS { + go func() { + logging.Log.Debug("Testing IMDS reachability") + reachability.TestReachability() + }() + } + // Check for interrupt signal and exit cleanly <-shutdown logging.Log.Print("shutdown signal received, stopping server...")