From 20f83c25a402037fb4341d0d78b6d1e71d50efd8 Mon Sep 17 00:00:00 2001 From: emilioalvap Date: Fri, 13 Oct 2023 17:43:30 +0200 Subject: [PATCH 1/4] Add empty state to retry cond --- .../monitors/wrappers/summarizer/plugstatestat.go | 5 +++-- .../monitors/wrappers/summarizer/summarizer_test.go | 10 +++++----- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/heartbeat/monitors/wrappers/summarizer/plugstatestat.go b/heartbeat/monitors/wrappers/summarizer/plugstatestat.go index 9567a666ad6d..17b150013200 100644 --- a/heartbeat/monitors/wrappers/summarizer/plugstatestat.go +++ b/heartbeat/monitors/wrappers/summarizer/plugstatestat.go @@ -150,12 +150,13 @@ func (ssp *commonSSP) BeforeSummary(event *beat.Event) BeforeSummaryActions { lastStatus := ssp.stateTracker.GetCurrentStatus(ssp.sf) curCheckDown := ssp.js.Status == monitorstate.StatusDown - lastStateUp := ssp.stateTracker.GetCurrentStatus(ssp.sf) == monitorstate.StatusUp + lastStateUpOrEmpty := ssp.stateTracker.GetCurrentStatus(ssp.sf) == monitorstate.StatusUp || + ssp.stateTracker.GetCurrentStatus(ssp.sf) == monitorstate.StatusEmpty hasAttemptsRemaining := ssp.js.Attempt < ssp.js.MaxAttempts // retry if... retry := curCheckDown && // the current check is down - lastStateUp && // we were previously up, if we were previously down we just check once + lastStateUpOrEmpty && // we were previously up or had no previous state, if we were previously down we just check once hasAttemptsRemaining // and we are configured to actually make multiple attempts // if we aren't retrying this is the final attempt ssp.js.FinalAttempt = !retry diff --git a/heartbeat/monitors/wrappers/summarizer/summarizer_test.go b/heartbeat/monitors/wrappers/summarizer/summarizer_test.go index 63de2da71a81..2a94b3e6f596 100644 --- a/heartbeat/monitors/wrappers/summarizer/summarizer_test.go +++ b/heartbeat/monitors/wrappers/summarizer/summarizer_test.go @@ -60,10 +60,10 @@ func TestSummarizer(t *testing.T) { { "start down, transition to up", 2, - "du", - "du", - "11", - 2, + "duu", + "duu", + "121", + 3, testURL, }, { @@ -80,7 +80,7 @@ func TestSummarizer(t *testing.T) { 2, "dddddddd", "dddddddd", - "11111111", + "12111111", 8, testURL, }, From 9a3ba5d2e915e3fe1d6f93122302e485a85a9e69 Mon Sep 17 00:00:00 2001 From: emilioalvap Date: Fri, 13 Oct 2023 17:48:48 +0200 Subject: [PATCH 2/4] Remove unwanted test --- heartbeat/monitors/wrappers/wrappers_test.go | 128 ------------------- 1 file changed, 128 deletions(-) diff --git a/heartbeat/monitors/wrappers/wrappers_test.go b/heartbeat/monitors/wrappers/wrappers_test.go index 26a54f9fc36f..eb4565334342 100644 --- a/heartbeat/monitors/wrappers/wrappers_test.go +++ b/heartbeat/monitors/wrappers/wrappers_test.go @@ -43,8 +43,6 @@ import ( "github.com/elastic/beats/v7/heartbeat/monitors/jobs" "github.com/elastic/beats/v7/heartbeat/monitors/logger" "github.com/elastic/beats/v7/heartbeat/monitors/stdfields" - "github.com/elastic/beats/v7/heartbeat/monitors/wrappers/monitorstate" - "github.com/elastic/beats/v7/heartbeat/monitors/wrappers/summarizer/jobsummary" "github.com/elastic/beats/v7/heartbeat/monitors/wrappers/summarizer/summarizertesthelper" "github.com/elastic/beats/v7/heartbeat/monitors/wrappers/wraputil" "github.com/elastic/beats/v7/heartbeat/scheduler/schedule" @@ -352,132 +350,6 @@ func TestMultiJobConts(t *testing.T) { }) } -// TestRetryMultiCont is of somewhat dubious utility at the moment, -// it mostly tests that we __don't__ retry on an initial down. -// retry logic is better and more completely tested in the summarizer -// and scenario tests. -func TestRetryMultiCont(t *testing.T) { - uniqScope := isdef.ScopedIsUnique() - - expected := []struct { - monStatus string - js jobsummary.JobSummary - state monitorstate.State - }{ - { - "down", - jobsummary.JobSummary{ - Status: "down", - FinalAttempt: true, - // we expect two up since this is a lightweight - // job and all events get a monitor status - // since no errors are returned that's 2 - Up: 0, - Down: 2, - Attempt: 1, - MaxAttempts: 2, - }, - monitorstate.State{ - Status: "down", - Up: 0, - Down: 2, - Checks: 2, - }, - }, - { - "down", - jobsummary.JobSummary{ - Status: "down", - FinalAttempt: true, - Up: 0, - Down: 2, - Attempt: 2, - MaxAttempts: 2, - }, - monitorstate.State{ - Status: "down", - Up: 0, - Down: 2, - Checks: 2, - }, - }, - } - - jobErr := fmt.Errorf("down") - - makeContJob := func(t *testing.T, u string) jobs.Job { - expIdx := 0 - return func(event *beat.Event) ([]jobs.Job, error) { - eventext.MergeEventFields(event, mapstr.M{"cont": "1st"}) - u, err := url.Parse(u) - require.NoError(t, err) - eventext.MergeEventFields(event, mapstr.M{"url": wraputil.URLFields(u)}) - - return []jobs.Job{ - func(event *beat.Event) ([]jobs.Job, error) { - eventext.MergeEventFields(event, mapstr.M{"cont": "2nd"}) - eventext.MergeEventFields(event, mapstr.M{"url": wraputil.URLFields(u)}) - - expIdx++ - if expIdx >= len(expected)-1 { - expIdx = 0 - } - exp := expected[expIdx] - if exp.js.Status == "down" { - return nil, jobErr - } - - return nil, nil - }, - }, jobErr - } - } - - contJobValidator := func(u string, msg string) validator.Validator { - return lookslike.Compose( - urlValidator(t, u), - hbtestllext.MaybeHasEventType, - lookslike.MustCompile(map[string]interface{}{"cont": msg}), - lookslike.MustCompile(map[string]interface{}{ - "error": map[string]interface{}{ - "message": isdef.IsString, - "type": isdef.IsString, - }, - "monitor": map[string]interface{}{ - "id": uniqScope.IsUniqueTo(u), - "name": testMonFields.Name, - "type": testMonFields.Type, - "status": "down", - "check_group": uniqScope.IsUniqueTo(u), - }, - "state": isdef.Optional(hbtestllext.IsMonitorState), - }), - hbtestllext.MonitorTimespanValidator, - ) - } - - retryMonFields := testMonFields - retryMonFields.MaxAttempts = 2 - - for _, expected := range expected { - testCommonWrap(t, testDef{ - "multi-job-continuations-retry", - retryMonFields, - []jobs.Job{makeContJob(t, "http://foo.com")}, - []validator.Validator{ - contJobValidator("http://foo.com", "1st"), - lookslike.Compose( - contJobValidator("http://foo.com", "2nd"), - summarizertesthelper.SummaryValidator(expected.js.Up, expected.js.Down), - hbtestllext.MaybeHasDuration, - ), - }, - nil, - nil, - }) - } -} - func TestMultiJobContsCancelledEvents(t *testing.T) { uniqScope := isdef.ScopedIsUnique() From 052f880c9c71dfbe16d76beb61a42a908044e340 Mon Sep 17 00:00:00 2001 From: emilioalvap Date: Fri, 13 Oct 2023 18:00:06 +0200 Subject: [PATCH 3/4] Add changelog --- CHANGELOG.next.asciidoc | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index 8ba973c8c63c..c4ec867eaf39 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -114,6 +114,7 @@ is collected by it. *Heartbeat* - Fix panics when parsing dereferencing invalid parsed url. {pull}34702[34702] +- Fix retries to trigger on a down monitor with no previous state. {pull}36842[36842] *Metricbeat* From 1244160e6885ee65ff5ce59f481c8c223b11fcb2 Mon Sep 17 00:00:00 2001 From: vigneshshanmugam Date: Fri, 13 Oct 2023 11:09:55 -0700 Subject: [PATCH 4/4] address review --- heartbeat/monitors/wrappers/summarizer/plugstatestat.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/heartbeat/monitors/wrappers/summarizer/plugstatestat.go b/heartbeat/monitors/wrappers/summarizer/plugstatestat.go index 17b150013200..4acfee4dc361 100644 --- a/heartbeat/monitors/wrappers/summarizer/plugstatestat.go +++ b/heartbeat/monitors/wrappers/summarizer/plugstatestat.go @@ -150,8 +150,7 @@ func (ssp *commonSSP) BeforeSummary(event *beat.Event) BeforeSummaryActions { lastStatus := ssp.stateTracker.GetCurrentStatus(ssp.sf) curCheckDown := ssp.js.Status == monitorstate.StatusDown - lastStateUpOrEmpty := ssp.stateTracker.GetCurrentStatus(ssp.sf) == monitorstate.StatusUp || - ssp.stateTracker.GetCurrentStatus(ssp.sf) == monitorstate.StatusEmpty + lastStateUpOrEmpty := lastStatus == monitorstate.StatusUp || lastStatus == monitorstate.StatusEmpty hasAttemptsRemaining := ssp.js.Attempt < ssp.js.MaxAttempts // retry if... @@ -175,7 +174,7 @@ func (ssp *commonSSP) BeforeSummary(event *beat.Event) BeforeSummaryActions { eventext.MergeEventFields(event, fields) - logp.L().Debugf("attempt info: current(%v) == lastStatus(%v) && attempts(%d < %d)", ssp.js.Status, lastStatus, ssp.js.Attempt, ssp.js.MaxAttempts) + logp.L().Infof("attempt info: current(%v) == lastStatus(%v) && attempts(%d < %d)", ssp.js.Status, lastStatus, ssp.js.Attempt, ssp.js.MaxAttempts) if retry { return RetryBeforeSummary