From 2ae8a0658351ee73975577bfbb9f7b16f32704bd Mon Sep 17 00:00:00 2001 From: Andrew Crump Date: Thu, 25 Apr 2024 23:45:28 +0000 Subject: [PATCH] feat: App stats availability measurement Currently concerned with when stats are seen as unavailable by the client. Does not include when stats are set to zero by the Cloud Controller when one or more required metrics are missing. Co-authored-by: Rebecca Roberts --- README.md | 12 +- cfCmdGenerator/cfCmdGenerator.go | 9 ++ cfCmdGenerator/cfCmdGenerator_test.go | 8 ++ cfWorkflow/cfWorkflow.go | 10 ++ cfWorkflow/cfWorkflow_test.go | 15 +++ .../cfWorkflowfakes/fake_cf_workflow.go | 74 +++++++++++ config/config.go | 1 + main.go | 44 +++++-- measurement/measurement.go | 16 +++ measurement/statsAvailability.go | 44 +++++++ measurement/statsAvailability_test.go | 116 ++++++++++++++++++ 11 files changed, 334 insertions(+), 15 deletions(-) create mode 100644 measurement/statsAvailability.go create mode 100644 measurement/statsAvailability_test.go diff --git a/README.md b/README.md index aa13464..c2fdecb 100644 --- a/README.md +++ b/README.md @@ -12,12 +12,13 @@ It measures: by periodically pushing a very simple app. - app syslog availability, by periodically checking that app logs - drain to a syslog sink. + drain to a syslog sink, +- app stats availability, + by periodically checking that app stats + are not unavailable. -The CF Release Integration team uses it -to monitor availability during migrations -from `cf-release` to `cf-deployment`, -and during upgrade deployments. +It is often used to monitor availability +during upgrade deployments. ## Installation @@ -66,6 +67,7 @@ Here is an example config `json`: "allowed_failures": { "app_pushability": 2, "http_availability": 5, + "app_stats": 2, "recent_logs": 2, "streaming_logs": 2, "app_syslog_availability": 2 diff --git a/cfCmdGenerator/cfCmdGenerator.go b/cfCmdGenerator/cfCmdGenerator.go index 1836d30..b44e48e 100644 --- a/cfCmdGenerator/cfCmdGenerator.go +++ b/cfCmdGenerator/cfCmdGenerator.go @@ -24,6 +24,7 @@ type CfCmdGenerator interface { DeleteOrg(org string) cmdStartWaiter.CmdStartWaiter DeleteQuota(quota string) cmdStartWaiter.CmdStartWaiter LogOut() cmdStartWaiter.CmdStartWaiter + AppStats(appName string) cmdStartWaiter.CmdStartWaiter RecentLogs(appName string) cmdStartWaiter.CmdStartWaiter StreamLogs(ctx context.Context, appName string) cmdStartWaiter.CmdStartWaiter MapRoute(appName, domain string, port int) cmdStartWaiter.CmdStartWaiter @@ -175,6 +176,14 @@ func (c *cfCmdGenerator) LogOut() cmdStartWaiter.CmdStartWaiter { ) } +func (c *cfCmdGenerator) AppStats(appName string) cmdStartWaiter.CmdStartWaiter { + return c.setCfHome( + exec.Command( + "cf", "app", appName, + ), + ) +} + func (c *cfCmdGenerator) RecentLogs(appName string) cmdStartWaiter.CmdStartWaiter { return c.setCfHome( exec.Command( diff --git a/cfCmdGenerator/cfCmdGenerator_test.go b/cfCmdGenerator/cfCmdGenerator_test.go index 9f41713..4ec551f 100644 --- a/cfCmdGenerator/cfCmdGenerator_test.go +++ b/cfCmdGenerator/cfCmdGenerator_test.go @@ -162,6 +162,14 @@ var _ = Describe("CfCmdGenerator", func() { }) }) + Describe("AppStats", func() { + It("Generates the correct command", func() { + expectedCmd := exec.Command("cf", "app", "appName") + cmd := generator.AppStats("appName") + expectCommandToBeEquivalent(cmd, expectedCmd, cfHomeEnvVar) + }) + }) + Describe("RecentLogs", func() { It("Generates the correct command", func() { expectedCmd := exec.Command("cf", "logs", "appName", "--recent") diff --git a/cfWorkflow/cfWorkflow.go b/cfWorkflow/cfWorkflow.go index 356c86f..b8d52d8 100644 --- a/cfWorkflow/cfWorkflow.go +++ b/cfWorkflow/cfWorkflow.go @@ -24,6 +24,7 @@ type CfWorkflow interface { Delete(cfCmdGenerator.CfCmdGenerator) []cmdStartWaiter.CmdStartWaiter TearDown(cfCmdGenerator.CfCmdGenerator) []cmdStartWaiter.CmdStartWaiter RecentLogs(cfCmdGenerator.CfCmdGenerator) []cmdStartWaiter.CmdStartWaiter + AppStats(cfCmdGenerator.CfCmdGenerator) []cmdStartWaiter.CmdStartWaiter StreamLogs(context.Context, cfCmdGenerator.CfCmdGenerator) []cmdStartWaiter.CmdStartWaiter MapSyslogRoute(cfCmdGenerator.CfCmdGenerator) []cmdStartWaiter.CmdStartWaiter @@ -141,6 +142,15 @@ func (c *cfWorkflow) TearDown(ccg cfCmdGenerator.CfCmdGenerator) []cmdStartWaite return ret } +func (c *cfWorkflow) AppStats(ccg cfCmdGenerator.CfCmdGenerator) []cmdStartWaiter.CmdStartWaiter { + return []cmdStartWaiter.CmdStartWaiter{ + ccg.Api(c.cf.API), + ccg.Auth(c.cf.AdminUser, c.cf.AdminPassword), + ccg.Target(c.org, c.space), + ccg.AppStats(c.appName), + } +} + func (c *cfWorkflow) RecentLogs(ccg cfCmdGenerator.CfCmdGenerator) []cmdStartWaiter.CmdStartWaiter { return []cmdStartWaiter.CmdStartWaiter{ ccg.Api(c.cf.API), diff --git a/cfWorkflow/cfWorkflow_test.go b/cfWorkflow/cfWorkflow_test.go index a107b2d..f696a86 100644 --- a/cfWorkflow/cfWorkflow_test.go +++ b/cfWorkflow/cfWorkflow_test.go @@ -213,6 +213,21 @@ var _ = Describe("CfWorkflow", func() { }) }) + Describe("AppStats", func() { + It("returns a set of commands to get stats for an app", func() { + cmds := cw.AppStats(ccg) + + Expect(cmds).To(Equal( + []cmdStartWaiter.CmdStartWaiter{ + ccg.Api("jigglypuff.cf-app.com"), + ccg.Auth("pika", "chu"), + ccg.Target("someOrg", "someSpace"), + ccg.AppStats("doraApp"), + }, + )) + }) + }) + Describe("RecentLogs", func() { It("returns a set of commands to get recent logs for an app", func() { cmds := cw.RecentLogs(ccg) diff --git a/cfWorkflow/cfWorkflowfakes/fake_cf_workflow.go b/cfWorkflow/cfWorkflowfakes/fake_cf_workflow.go index 9ef1a77..f0922e7 100644 --- a/cfWorkflow/cfWorkflowfakes/fake_cf_workflow.go +++ b/cfWorkflow/cfWorkflowfakes/fake_cf_workflow.go @@ -11,6 +11,17 @@ import ( ) type FakeCfWorkflow struct { + AppStatsStub func(cfCmdGenerator.CfCmdGenerator) []cmdStartWaiter.CmdStartWaiter + appStatsMutex sync.RWMutex + appStatsArgsForCall []struct { + arg1 cfCmdGenerator.CfCmdGenerator + } + appStatsReturns struct { + result1 []cmdStartWaiter.CmdStartWaiter + } + appStatsReturnsOnCall map[int]struct { + result1 []cmdStartWaiter.CmdStartWaiter + } AppUrlStub func() string appUrlMutex sync.RWMutex appUrlArgsForCall []struct { @@ -187,6 +198,67 @@ type FakeCfWorkflow struct { invocationsMutex sync.RWMutex } +func (fake *FakeCfWorkflow) AppStats(arg1 cfCmdGenerator.CfCmdGenerator) []cmdStartWaiter.CmdStartWaiter { + fake.appStatsMutex.Lock() + ret, specificReturn := fake.appStatsReturnsOnCall[len(fake.appStatsArgsForCall)] + fake.appStatsArgsForCall = append(fake.appStatsArgsForCall, struct { + arg1 cfCmdGenerator.CfCmdGenerator + }{arg1}) + stub := fake.AppStatsStub + fakeReturns := fake.appStatsReturns + fake.recordInvocation("AppStats", []interface{}{arg1}) + fake.appStatsMutex.Unlock() + if stub != nil { + return stub(arg1) + } + if specificReturn { + return ret.result1 + } + return fakeReturns.result1 +} + +func (fake *FakeCfWorkflow) AppStatsCallCount() int { + fake.appStatsMutex.RLock() + defer fake.appStatsMutex.RUnlock() + return len(fake.appStatsArgsForCall) +} + +func (fake *FakeCfWorkflow) AppStatsCalls(stub func(cfCmdGenerator.CfCmdGenerator) []cmdStartWaiter.CmdStartWaiter) { + fake.appStatsMutex.Lock() + defer fake.appStatsMutex.Unlock() + fake.AppStatsStub = stub +} + +func (fake *FakeCfWorkflow) AppStatsArgsForCall(i int) cfCmdGenerator.CfCmdGenerator { + fake.appStatsMutex.RLock() + defer fake.appStatsMutex.RUnlock() + argsForCall := fake.appStatsArgsForCall[i] + return argsForCall.arg1 +} + +func (fake *FakeCfWorkflow) AppStatsReturns(result1 []cmdStartWaiter.CmdStartWaiter) { + fake.appStatsMutex.Lock() + defer fake.appStatsMutex.Unlock() + fake.AppStatsStub = nil + fake.appStatsReturns = struct { + result1 []cmdStartWaiter.CmdStartWaiter + }{result1} +} + +func (fake *FakeCfWorkflow) AppStatsReturnsOnCall(i int, result1 []cmdStartWaiter.CmdStartWaiter) { + fake.appStatsMutex.Lock() + defer fake.appStatsMutex.Unlock() + fake.AppStatsStub = nil + if fake.appStatsReturnsOnCall == nil { + fake.appStatsReturnsOnCall = make(map[int]struct { + result1 []cmdStartWaiter.CmdStartWaiter + }) + } + fake.appStatsReturnsOnCall[i] = struct { + result1 []cmdStartWaiter.CmdStartWaiter + }{result1} +} + func (fake *FakeCfWorkflow) AppUrl() string { fake.appUrlMutex.Lock() ret, specificReturn := fake.appUrlReturnsOnCall[len(fake.appUrlArgsForCall)] @@ -1120,6 +1192,8 @@ func (fake *FakeCfWorkflow) TearDownReturnsOnCall(i int, result1 []cmdStartWaite func (fake *FakeCfWorkflow) Invocations() map[string][][]interface{} { fake.invocationsMutex.RLock() defer fake.invocationsMutex.RUnlock() + fake.appStatsMutex.RLock() + defer fake.appStatsMutex.RUnlock() fake.appUrlMutex.RLock() defer fake.appUrlMutex.RUnlock() fake.createAndBindSyslogDrainServiceMutex.RLock() diff --git a/config/config.go b/config/config.go index dcd95e7..eed0719 100644 --- a/config/config.go +++ b/config/config.go @@ -36,6 +36,7 @@ type AllowedFailures struct { HttpAvailability int `json:"http_availability"` RecentLogs int `json:"recent_logs"` StreamingLogs int `json:"streaming_logs"` + AppStats int `json:"app_stats"` AppSyslogAvailability int `json:"app_syslog_availability"` TCPAvailability int `json:"tcp_availability"` } diff --git a/main.go b/main.go index 92923cb..84dedf7 100644 --- a/main.go +++ b/main.go @@ -97,7 +97,7 @@ func main() { } logger.Println("Finished preparing included syslog sink app") } - orcTmpDir, recentLogsTmpDir, streamingLogsTmpDir, pushTmpDir, tcpTmpDir, sinkTmpDir, err := createTmpDirs() + orcTmpDir, recentLogsTmpDir, streamingLogsTmpDir, appStatsTmpDir, pushTmpDir, tcpTmpDir, sinkTmpDir, err := createTmpDirs() if err != nil { logger.Println("Failed to create temp dirs:", err) performMeasurements = false @@ -179,6 +179,7 @@ func main() { pushWorkflowGeneratorFunc, cfCmdGenerator.New(recentLogsTmpDir, *useBuildpackDetection), cfCmdGenerator.New(streamingLogsTmpDir, *useBuildpackDetection), + cfCmdGenerator.New(appStatsTmpDir, *useBuildpackDetection), pushCmdGenerator, cfg.AllowedFailures, authFailedRetryFunc, @@ -250,33 +251,37 @@ func main() { os.Exit(exitCode) } -func createTmpDirs() (string, string, string, string, string, string, error) { +func createTmpDirs() (string, string, string, string, string, string, string, error) { orcTmpDir, err := os.MkdirTemp("", "uptimer") if err != nil { - return "", "", "", "", "", "", err + return "", "", "", "", "", "", "", err } recentLogsTmpDir, err := os.MkdirTemp("", "uptimer") if err != nil { - return "", "", "", "", "", "", err + return "", "", "", "", "", "", "", err } streamingLogsTmpDir, err := os.MkdirTemp("", "uptimer") if err != nil { - return "", "", "", "", "", "", err + return "", "", "", "", "", "", "", err + } + appsStatsTmpDir, err := os.MkdirTemp("", "uptimer") + if err != nil { + return "", "", "", "", "", "", "", err } pushTmpDir, err := os.MkdirTemp("", "uptimer") if err != nil { - return "", "", "", "", "", "", err + return "", "", "", "", "", "", "", err } tcpTmpDir, err := os.MkdirTemp("", "uptimer") if err != nil { - return "", "", "", "", "", "", err + return "", "", "", "", "", "", "", err } sinkTmpDir, err := os.MkdirTemp("", "uptimer") if err != nil { - return "", "", "", "", "", "", err + return "", "", "", "", "", "", "", err } - return orcTmpDir, recentLogsTmpDir, streamingLogsTmpDir, pushTmpDir, tcpTmpDir, sinkTmpDir, nil + return orcTmpDir, recentLogsTmpDir, streamingLogsTmpDir, appsStatsTmpDir, pushTmpDir, tcpTmpDir, sinkTmpDir, nil } func prepareIncludedApp(name, source string) (string, error) { @@ -328,7 +333,7 @@ func createMeasurements( logger *log.Logger, orcWorkflow cfWorkflow.CfWorkflow, pushWorkFlowGeneratorFunc func() cfWorkflow.CfWorkflow, - recentLogsCmdGenerator, streamingLogsCmdGenerator, pushCmdGenerator cfCmdGenerator.CfCmdGenerator, + recentLogsCmdGenerator, streamingLogsCmdGenerator, pushCmdGenerator, appStatsCmdGenerator cfCmdGenerator.CfCmdGenerator, allowedFailures config.AllowedFailures, authFailedRetryFunc func(stdOut, stdErr string) bool, ) []measurement.Measurement { @@ -380,6 +385,16 @@ func createMeasurements( }, ) + appStatsRunner, appStatsRunnerOutBuf, appStatsRunnerErrBuf := createBufferedRunner() + appStatsMeasurement := measurement.NewStatsAvailability( + func() []cmdStartWaiter.CmdStartWaiter { + return orcWorkflow.AppStats(appStatsCmdGenerator) + }, + appStatsRunner, + appStatsRunnerOutBuf, + appStatsRunnerErrBuf, + ) + return []measurement.Measurement{ measurement.NewPeriodic( logger, @@ -417,6 +432,15 @@ func createMeasurements( allowedFailures.StreamingLogs, authFailedRetryFunc, ), + measurement.NewPeriodic( + logger, + clock, + 10*time.Second, + appStatsMeasurement, + measurement.NewResultSet(), + allowedFailures.AppStats, + authFailedRetryFunc, + ), } } diff --git a/measurement/measurement.go b/measurement/measurement.go index 9622870..8399010 100644 --- a/measurement/measurement.go +++ b/measurement/measurement.go @@ -166,3 +166,19 @@ func NewAppPushability( runnerErrBuf: runnerErrBuf, } } + +func NewStatsAvailability( + statsAvailabilityCommandGeneratorFunc func() []cmdStartWaiter.CmdStartWaiter, + runner cmdRunner.CmdRunner, + runnerOutBuf *bytes.Buffer, + runnerErrBuf *bytes.Buffer, +) BaseMeasurement { + return &statsAvailability{ + name: "Stats availability", + summaryPhrase: "retrieve stats for app", + statsAvailabilityCommandGeneratorFunc: statsAvailabilityCommandGeneratorFunc, + runner: runner, + runnerOutBuf: runnerOutBuf, + runnerErrBuf: runnerErrBuf, + } +} diff --git a/measurement/statsAvailability.go b/measurement/statsAvailability.go new file mode 100644 index 0000000..38a147a --- /dev/null +++ b/measurement/statsAvailability.go @@ -0,0 +1,44 @@ +package measurement + +import ( + "bytes" + "strings" + + "github.com/cloudfoundry/uptimer/cmdRunner" + "github.com/cloudfoundry/uptimer/cmdStartWaiter" +) + +type statsAvailability struct { + name string + summaryPhrase string + statsAvailabilityCommandGeneratorFunc func() []cmdStartWaiter.CmdStartWaiter + runner cmdRunner.CmdRunner + runnerOutBuf *bytes.Buffer + runnerErrBuf *bytes.Buffer +} + +func (s *statsAvailability) Name() string { + return s.name +} + +func (s *statsAvailability) SummaryPhrase() string { + return s.summaryPhrase +} + +func (s *statsAvailability) PerformMeasurement() (string, string, string, bool) { + defer s.runnerOutBuf.Reset() + defer s.runnerErrBuf.Reset() + + if err := s.runner.RunInSequence(s.statsAvailabilityCommandGeneratorFunc()...); err != nil { + return err.Error(), s.runnerOutBuf.String(), s.runnerErrBuf.String(), false + } + + if strings.Contains(s.runnerErrBuf.String(), "Stats server temporarily unavailable.") { + return "Stats server was unavailable", + s.runnerOutBuf.String(), + s.runnerErrBuf.String(), + false + } + + return "", "", "", true +} diff --git a/measurement/statsAvailability_test.go b/measurement/statsAvailability_test.go new file mode 100644 index 0000000..bf50e25 --- /dev/null +++ b/measurement/statsAvailability_test.go @@ -0,0 +1,116 @@ +package measurement_test + +import ( + "bytes" + "errors" + "os/exec" + + "github.com/cloudfoundry/uptimer/cmdRunner/cmdRunnerfakes" + "github.com/cloudfoundry/uptimer/cmdStartWaiter" + . "github.com/cloudfoundry/uptimer/measurement" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var _ = Describe("Stats Availability", func() { + var ( + commands []cmdStartWaiter.CmdStartWaiter + fakeCommandRunner *cmdRunnerfakes.FakeCmdRunner + fakeCmdGeneratorFunc func() []cmdStartWaiter.CmdStartWaiter + outBuf *bytes.Buffer + errBuf *bytes.Buffer + + sm BaseMeasurement + ) + + BeforeEach(func() { + fakeCommandRunner = &cmdRunnerfakes.FakeCmdRunner{} + fakeCmdGeneratorFunc = func() []cmdStartWaiter.CmdStartWaiter { + return commands + } + outBuf = bytes.NewBuffer([]byte{}) + errBuf = bytes.NewBuffer([]byte{}) + + sm = NewStatsAvailability(fakeCmdGeneratorFunc, fakeCommandRunner, outBuf, errBuf) + }) + + Describe("Name", func() { + It("returns the name", func() { + Expect(sm.Name()).To(Equal("Stats availability")) + }) + }) + + Describe("SummaryPhrase", func() { + It("returns the summary phrase", func() { + Expect(sm.SummaryPhrase()).To(Equal("retrieve stats for app")) + }) + }) + + Describe("PerformMeasurement", func() { + BeforeEach(func() { + commands = []cmdStartWaiter.CmdStartWaiter{ + exec.Command("foo"), + exec.Command("bar"), + } + }) + + It("runs the commands to retrieve the stats for the app", func() { + sm.PerformMeasurement() + + Expect(fakeCommandRunner.RunInSequenceCallCount()).To(Equal(1)) + Expect(fakeCommandRunner.RunInSequenceArgsForCall(0)).To(Equal( + []cmdStartWaiter.CmdStartWaiter{ + exec.Command("foo"), + exec.Command("bar"), + }, + )) + }) + + It("records the commands that run without an error as success", func() { + _, _, _, res := sm.PerformMeasurement() + Expect(res).To(BeTrue()) + }) + + Context("when the CLI reports that stats server is unavailable", func() { + BeforeEach(func() { + errBuf.WriteString("Stats server temporarily unavailable.") + }) + It("records the measurement as having failed", func() { + _, _, _, res := sm.PerformMeasurement() + Expect(res).To(BeFalse()) + }) + }) + + Context("when the commands error", func() { + BeforeEach(func() { + fakeCommandRunner.RunInSequenceReturns(errors.New("some error")) + }) + + It("records the measurement as having failed", func() { + _, _, _, res := sm.PerformMeasurement() + Expect(res).To(BeFalse()) + }) + + It("returns both stdout and stderr", func() { + outBuf.WriteString("some stdout output") + errBuf.WriteString("some stderr output") + msg, stdOut, stdErr, _ := sm.PerformMeasurement() + + Expect(msg).To(Equal("some error")) + Expect(stdOut).To(Equal("some stdout output")) + Expect(stdErr).To(Equal("some stderr output")) + }) + }) + + It("does not accumulate buffers indefinitely", func() { + outBuf.WriteString("some stdout output") + errBuf.WriteString("some stderr output") + + sm.PerformMeasurement() + + Expect(outBuf.Len()).To(Equal(0)) + Expect(errBuf.Len()).To(Equal(0)) + }) + }) +})