From eab643e28951e7c1c2134ae6186215ae3085531c Mon Sep 17 00:00:00 2001 From: Guillaume Pagnoux Date: Thu, 10 Oct 2024 14:05:26 +0200 Subject: [PATCH] fix CR - Only get container ids if there are previous potential services - Only add container id on confirmed new services - Add test for propagation of container id values - Revert unrelated formatting change made by auto-formatter --- .../servicediscovery/events_test.go | 8 +++ .../corechecks/servicediscovery/impl_linux.go | 62 +++++++++++-------- .../servicediscovery/impl_linux_test.go | 2 +- 3 files changed, 46 insertions(+), 26 deletions(-) diff --git a/pkg/collector/corechecks/servicediscovery/events_test.go b/pkg/collector/corechecks/servicediscovery/events_test.go index 0675a7dedd4c9..13d9ef013931e 100644 --- a/pkg/collector/corechecks/servicediscovery/events_test.go +++ b/pkg/collector/corechecks/servicediscovery/events_test.go @@ -65,6 +65,7 @@ func Test_telemetrySender(t *testing.T) { DDService: "dd-service", DDServiceInjected: true, CPUCores: 1.5, + ContainerID: "abcd", }, meta: ServiceMetadata{ Name: "test-service", @@ -101,6 +102,7 @@ func Test_telemetrySender(t *testing.T) { CommandLine: []string{"test-service", "--args"}, RSSMemory: 500 * 1024 * 1024, CPUCores: 1.5, + ContainerID: "abcd", }, }, { @@ -124,6 +126,7 @@ func Test_telemetrySender(t *testing.T) { CommandLine: []string{"test-service", "--args"}, RSSMemory: 500 * 1024 * 1024, CPUCores: 1.5, + ContainerID: "abcd", }, }, { @@ -147,6 +150,7 @@ func Test_telemetrySender(t *testing.T) { CommandLine: []string{"test-service", "--args"}, RSSMemory: 500 * 1024 * 1024, CPUCores: 1.5, + ContainerID: "abcd", }, }, } @@ -182,6 +186,7 @@ func Test_telemetrySender_name_provided(t *testing.T) { StartTimeSecs: uint64(now.Add(-20 * time.Minute).Unix()), GeneratedName: "generated-name2", DDService: "dd-service-provided", + ContainerID: "abcd", }, meta: ServiceMetadata{ Name: "test-service", @@ -215,6 +220,7 @@ func Test_telemetrySender_name_provided(t *testing.T) { APMInstrumentation: "injected", PID: 55, CommandLine: []string{"foo", "--option"}, + ContainerID: "abcd", }, }, { @@ -235,6 +241,7 @@ func Test_telemetrySender_name_provided(t *testing.T) { APMInstrumentation: "injected", PID: 55, CommandLine: []string{"foo", "--option"}, + ContainerID: "abcd", }, }, { @@ -255,6 +262,7 @@ func Test_telemetrySender_name_provided(t *testing.T) { APMInstrumentation: "injected", PID: 55, CommandLine: []string{"foo", "--option"}, + ContainerID: "abcd", }, }, } diff --git a/pkg/collector/corechecks/servicediscovery/impl_linux.go b/pkg/collector/corechecks/servicediscovery/impl_linux.go index cc6452f733190..b994e0f23b0a6 100644 --- a/pkg/collector/corechecks/servicediscovery/impl_linux.go +++ b/pkg/collector/corechecks/servicediscovery/impl_linux.go @@ -72,28 +72,9 @@ func (li *linuxImpl) DiscoverServices() (*discoveredServices, error) { } events := serviceEvents{} - now := li.time.Now() - // potentialServices contains processes that we scanned in the previous iteration and had open ports. - // we check if they are still alive in this iteration, and if so, we send a start-service telemetry event. - for pid, svc := range li.potentialServices { - if service, ok := serviceMap[pid]; ok { - svc.LastHeartbeat = now - svc.service.RSS = service.RSS - svc.service.CPUCores = service.CPUCores - li.aliveServices[pid] = svc - events.start = append(events.start, *svc) - } - } - clear(li.potentialServices) - - // Get container IDs to enrich the service info with it. The SD check is - // supposed to run once every minute, so we use this duration for cache - // validity. - // TODO: use/find a global constant for this delay, to keep in sync with - // the check delay if it were to change. - containers := li.containerProvider.GetPidToCid(1 * time.Minute) + li.handlePotentialServices(&events, now, serviceMap) // check open ports - these will be potential new services if they are still alive in the next iteration. for _, service := range response.Services { @@ -111,11 +92,6 @@ func (li *linuxImpl) DiscoverServices() (*discoveredServices, error) { continue } - if id, ok := containers[pid]; ok { - svc.service.ContainerID = id - log.Debugf("[pid: %d] add containerID to process: %s", pid, id) - } - log.Debugf("[pid: %d] adding process to potential: %s", pid, svc.meta.Name) li.potentialServices[pid] = &svc } @@ -149,6 +125,42 @@ func (li *linuxImpl) DiscoverServices() (*discoveredServices, error) { }, nil } +// handlePotentialServices checks cached potential services we have seen in the +// previous call of the check. If they are still alive, start events are sent +// for these services. +func (li *linuxImpl) handlePotentialServices(events *serviceEvents, now time.Time, serviceMap map[int]*model.Service) { + if len(li.potentialServices) == 0 { + return + } + + // Get container IDs to enrich the service info with it. The SD check is + // supposed to run once every minute, so we use this duration for cache + // validity. + // TODO: use/find a global constant for this delay, to keep in sync with + // the check delay if it were to change. + containers := li.containerProvider.GetPidToCid(1 * time.Minute) + + // potentialServices contains processes that we scanned in the previous + // iteration and had open ports. We check if they are still alive in this + // iteration, and if so, we send a start-service telemetry event. + for pid, svc := range li.potentialServices { + if service, ok := serviceMap[pid]; ok { + svc.LastHeartbeat = now + svc.service.RSS = service.RSS + svc.service.CPUCores = service.CPUCores + + if id, ok := containers[pid]; ok { + svc.service.ContainerID = id + log.Debugf("[pid: %d] add containerID to process: %s", pid, id) + } + + li.aliveServices[pid] = svc + events.start = append(events.start, *svc) + } + } + clear(li.potentialServices) +} + func (li *linuxImpl) getServiceInfo(service model.Service) serviceInfo { // if the process name is docker-proxy, we should talk to docker to get the process command line and env vars // have to see how far this can go but not for the initial release diff --git a/pkg/collector/corechecks/servicediscovery/impl_linux_test.go b/pkg/collector/corechecks/servicediscovery/impl_linux_test.go index c05f7ffe5e5eb..6a418534ca6ae 100644 --- a/pkg/collector/corechecks/servicediscovery/impl_linux_test.go +++ b/pkg/collector/corechecks/servicediscovery/impl_linux_test.go @@ -31,7 +31,7 @@ type testProc struct { } var ( - bootTimeSeconds = uint64(time.Date(2000, 0o1, 0o1, 0, 0, 0, 0, time.UTC).Unix()) + bootTimeSeconds = uint64(time.Date(2000, 01, 01, 0, 0, 0, 0, time.UTC).Unix()) // procLaunched is number of clicks (100 per second) since bootTime when the process started // assume it's 12 hours later procLaunchedSeconds = bootTimeSeconds + uint64((12 * time.Hour).Seconds())