diff --git a/.chloggen/hostmetricsreceiver-new-metrics.yaml b/.chloggen/hostmetricsreceiver-new-metrics.yaml new file mode 100644 index 000000000000..ce97515a2eab --- /dev/null +++ b/.chloggen/hostmetricsreceiver-new-metrics.yaml @@ -0,0 +1,16 @@ +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: enhancement + +# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver) +component: hostmetricsreceiver + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: Add new process metrics + +# One or more tracking issues related to the change +issues: [12482] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: Add `process.context_switches` and `process.open_file_descriptors` as process metrics. They are disabled by default. diff --git a/receiver/hostmetricsreceiver/internal/scraper/processscraper/documentation.md b/receiver/hostmetricsreceiver/internal/scraper/processscraper/documentation.md index 1915674c52e3..3b3c9cc41e78 100644 --- a/receiver/hostmetricsreceiver/internal/scraper/processscraper/documentation.md +++ b/receiver/hostmetricsreceiver/internal/scraper/processscraper/documentation.md @@ -8,11 +8,13 @@ These are the metrics available for this scraper. | Name | Description | Unit | Type | Attributes | | ---- | ----------- | ---- | ---- | ---------- | +| process.context_switches | Number of times the process has been context switched. | {count} | Sum(Int) | | | **process.cpu.time** | Total CPU seconds broken down by different states. | s | Sum(Double) | | | **process.disk.io** | Disk bytes transferred. | By | Sum(Int) | | -| **process.memory.physical_usage** | The amount of physical memory in use. | By | Sum(Int) | | +| **process.memory.physical_usage** | The amount of physical memory in use | By | Sum(Int) | | | **process.memory.virtual_usage** | Virtual memory size. | By | Sum(Int) | | -| process.paging.faults | Number of page faults the process has made. This metric is only available on Linux. | {faults} | Sum(Int) | | +| process.open_file_descriptors | Number of file descriptors in use by the process. | {count} | Sum(Int) | | +| process.paging.faults | Number of page faults the process has made. This metric is only available on Linux. | {faults} | Sum(Int) | | | process.threads | Process threads count. | {threads} | Sum(Int) | | **Highlighted metrics** are emitted by default. Other metrics are optional and not emitted by default. @@ -40,6 +42,7 @@ metrics: | Name | Description | Values | | ---- | ----------- | ------ | +| context_switch_type (type) | Type of context switched. | involuntary, voluntary | | direction | Direction of flow of bytes (read or write). | read, write | +| paging_fault_type (type) | Type of memory paging fault. | major, minor | | state | Breakdown of CPU usage by type. | system, user, wait | -| type | Type of memory paging fault. | major, minor | diff --git a/receiver/hostmetricsreceiver/internal/scraper/processscraper/internal/metadata/generated_metrics.go b/receiver/hostmetricsreceiver/internal/scraper/processscraper/internal/metadata/generated_metrics.go index 6348854b9865..84fc7a8720c1 100644 --- a/receiver/hostmetricsreceiver/internal/scraper/processscraper/internal/metadata/generated_metrics.go +++ b/receiver/hostmetricsreceiver/internal/scraper/processscraper/internal/metadata/generated_metrics.go @@ -38,16 +38,21 @@ func (ms *MetricSettings) Unmarshal(parser *confmap.Conf) error { // MetricsSettings provides settings for hostmetricsreceiver/process metrics. type MetricsSettings struct { + ProcessContextSwitches MetricSettings `mapstructure:"process.context_switches"` ProcessCPUTime MetricSettings `mapstructure:"process.cpu.time"` ProcessDiskIo MetricSettings `mapstructure:"process.disk.io"` ProcessMemoryPhysicalUsage MetricSettings `mapstructure:"process.memory.physical_usage"` ProcessMemoryVirtualUsage MetricSettings `mapstructure:"process.memory.virtual_usage"` + ProcessOpenFileDescriptors MetricSettings `mapstructure:"process.open_file_descriptors"` ProcessPagingFaults MetricSettings `mapstructure:"process.paging.faults"` ProcessThreads MetricSettings `mapstructure:"process.threads"` } func DefaultMetricsSettings() MetricsSettings { return MetricsSettings{ + ProcessContextSwitches: MetricSettings{ + Enabled: false, + }, ProcessCPUTime: MetricSettings{ Enabled: true, }, @@ -60,6 +65,9 @@ func DefaultMetricsSettings() MetricsSettings { ProcessMemoryVirtualUsage: MetricSettings{ Enabled: true, }, + ProcessOpenFileDescriptors: MetricSettings{ + Enabled: false, + }, ProcessPagingFaults: MetricSettings{ Enabled: false, }, @@ -69,6 +77,32 @@ func DefaultMetricsSettings() MetricsSettings { } } +// AttributeContextSwitchType specifies the a value context_switch_type attribute. +type AttributeContextSwitchType int + +const ( + _ AttributeContextSwitchType = iota + AttributeContextSwitchTypeInvoluntary + AttributeContextSwitchTypeVoluntary +) + +// String returns the string representation of the AttributeContextSwitchType. +func (av AttributeContextSwitchType) String() string { + switch av { + case AttributeContextSwitchTypeInvoluntary: + return "involuntary" + case AttributeContextSwitchTypeVoluntary: + return "voluntary" + } + return "" +} + +// MapAttributeContextSwitchType is a helper map of string to AttributeContextSwitchType attribute value. +var MapAttributeContextSwitchType = map[string]AttributeContextSwitchType{ + "involuntary": AttributeContextSwitchTypeInvoluntary, + "voluntary": AttributeContextSwitchTypeVoluntary, +} + // AttributeDirection specifies the a value direction attribute. type AttributeDirection int @@ -95,6 +129,32 @@ var MapAttributeDirection = map[string]AttributeDirection{ "write": AttributeDirectionWrite, } +// AttributePagingFaultType specifies the a value paging_fault_type attribute. +type AttributePagingFaultType int + +const ( + _ AttributePagingFaultType = iota + AttributePagingFaultTypeMajor + AttributePagingFaultTypeMinor +) + +// String returns the string representation of the AttributePagingFaultType. +func (av AttributePagingFaultType) String() string { + switch av { + case AttributePagingFaultTypeMajor: + return "major" + case AttributePagingFaultTypeMinor: + return "minor" + } + return "" +} + +// MapAttributePagingFaultType is a helper map of string to AttributePagingFaultType attribute value. +var MapAttributePagingFaultType = map[string]AttributePagingFaultType{ + "major": AttributePagingFaultTypeMajor, + "minor": AttributePagingFaultTypeMinor, +} + // AttributeState specifies the a value state attribute. type AttributeState int @@ -125,30 +185,57 @@ var MapAttributeState = map[string]AttributeState{ "wait": AttributeStateWait, } -// AttributeType specifies the a value type attribute. -type AttributeType int +type metricProcessContextSwitches struct { + data pmetric.Metric // data buffer for generated metric. + settings MetricSettings // metric settings provided by user. + capacity int // max observed number of data points added to the metric. +} -const ( - _ AttributeType = iota - AttributeTypeMajor - AttributeTypeMinor -) +// init fills process.context_switches metric with initial data. +func (m *metricProcessContextSwitches) init() { + m.data.SetName("process.context_switches") + m.data.SetDescription("Number of times the process has been context switched.") + m.data.SetUnit("{count}") + m.data.SetEmptySum() + m.data.Sum().SetIsMonotonic(true) + m.data.Sum().SetAggregationTemporality(pmetric.AggregationTemporalityCumulative) + m.data.Sum().DataPoints().EnsureCapacity(m.capacity) +} -// String returns the string representation of the AttributeType. -func (av AttributeType) String() string { - switch av { - case AttributeTypeMajor: - return "major" - case AttributeTypeMinor: - return "minor" +func (m *metricProcessContextSwitches) recordDataPoint(start pcommon.Timestamp, ts pcommon.Timestamp, val int64, contextSwitchTypeAttributeValue string) { + if !m.settings.Enabled { + return + } + dp := m.data.Sum().DataPoints().AppendEmpty() + dp.SetStartTimestamp(start) + dp.SetTimestamp(ts) + dp.SetIntValue(val) + dp.Attributes().PutStr("type", contextSwitchTypeAttributeValue) +} + +// updateCapacity saves max length of data point slices that will be used for the slice capacity. +func (m *metricProcessContextSwitches) updateCapacity() { + if m.data.Sum().DataPoints().Len() > m.capacity { + m.capacity = m.data.Sum().DataPoints().Len() + } +} + +// emit appends recorded metric data to a metrics slice and prepares it for recording another set of data points. +func (m *metricProcessContextSwitches) emit(metrics pmetric.MetricSlice) { + if m.settings.Enabled && m.data.Sum().DataPoints().Len() > 0 { + m.updateCapacity() + m.data.MoveTo(metrics.AppendEmpty()) + m.init() } - return "" } -// MapAttributeType is a helper map of string to AttributeType attribute value. -var MapAttributeType = map[string]AttributeType{ - "major": AttributeTypeMajor, - "minor": AttributeTypeMinor, +func newMetricProcessContextSwitches(settings MetricSettings) metricProcessContextSwitches { + m := metricProcessContextSwitches{settings: settings} + if settings.Enabled { + m.data = pmetric.NewMetric() + m.init() + } + return m } type metricProcessCPUTime struct { @@ -266,7 +353,7 @@ type metricProcessMemoryPhysicalUsage struct { // init fills process.memory.physical_usage metric with initial data. func (m *metricProcessMemoryPhysicalUsage) init() { m.data.SetName("process.memory.physical_usage") - m.data.SetDescription("The amount of physical memory in use.") + m.data.SetDescription("The amount of physical memory in use") m.data.SetUnit("By") m.data.SetEmptySum() m.data.Sum().SetIsMonotonic(false) @@ -359,6 +446,57 @@ func newMetricProcessMemoryVirtualUsage(settings MetricSettings) metricProcessMe return m } +type metricProcessOpenFileDescriptors struct { + data pmetric.Metric // data buffer for generated metric. + settings MetricSettings // metric settings provided by user. + capacity int // max observed number of data points added to the metric. +} + +// init fills process.open_file_descriptors metric with initial data. +func (m *metricProcessOpenFileDescriptors) init() { + m.data.SetName("process.open_file_descriptors") + m.data.SetDescription("Number of file descriptors in use by the process.") + m.data.SetUnit("{count}") + m.data.SetEmptySum() + m.data.Sum().SetIsMonotonic(false) + m.data.Sum().SetAggregationTemporality(pmetric.AggregationTemporalityCumulative) +} + +func (m *metricProcessOpenFileDescriptors) recordDataPoint(start pcommon.Timestamp, ts pcommon.Timestamp, val int64) { + if !m.settings.Enabled { + return + } + dp := m.data.Sum().DataPoints().AppendEmpty() + dp.SetStartTimestamp(start) + dp.SetTimestamp(ts) + dp.SetIntValue(val) +} + +// updateCapacity saves max length of data point slices that will be used for the slice capacity. +func (m *metricProcessOpenFileDescriptors) updateCapacity() { + if m.data.Sum().DataPoints().Len() > m.capacity { + m.capacity = m.data.Sum().DataPoints().Len() + } +} + +// emit appends recorded metric data to a metrics slice and prepares it for recording another set of data points. +func (m *metricProcessOpenFileDescriptors) emit(metrics pmetric.MetricSlice) { + if m.settings.Enabled && m.data.Sum().DataPoints().Len() > 0 { + m.updateCapacity() + m.data.MoveTo(metrics.AppendEmpty()) + m.init() + } +} + +func newMetricProcessOpenFileDescriptors(settings MetricSettings) metricProcessOpenFileDescriptors { + m := metricProcessOpenFileDescriptors{settings: settings} + if settings.Enabled { + m.data = pmetric.NewMetric() + m.init() + } + return m +} + type metricProcessPagingFaults struct { data pmetric.Metric // data buffer for generated metric. settings MetricSettings // metric settings provided by user. @@ -376,7 +514,7 @@ func (m *metricProcessPagingFaults) init() { m.data.Sum().DataPoints().EnsureCapacity(m.capacity) } -func (m *metricProcessPagingFaults) recordDataPoint(start pcommon.Timestamp, ts pcommon.Timestamp, val int64, typeAttributeValue string) { +func (m *metricProcessPagingFaults) recordDataPoint(start pcommon.Timestamp, ts pcommon.Timestamp, val int64, pagingFaultTypeAttributeValue string) { if !m.settings.Enabled { return } @@ -384,7 +522,7 @@ func (m *metricProcessPagingFaults) recordDataPoint(start pcommon.Timestamp, ts dp.SetStartTimestamp(start) dp.SetTimestamp(ts) dp.SetIntValue(val) - dp.Attributes().PutStr("type", typeAttributeValue) + dp.Attributes().PutStr("type", pagingFaultTypeAttributeValue) } // updateCapacity saves max length of data point slices that will be used for the slice capacity. @@ -471,10 +609,12 @@ type MetricsBuilder struct { resourceCapacity int // maximum observed number of resource attributes. metricsBuffer pmetric.Metrics // accumulates metrics data before emitting. buildInfo component.BuildInfo // contains version information + metricProcessContextSwitches metricProcessContextSwitches metricProcessCPUTime metricProcessCPUTime metricProcessDiskIo metricProcessDiskIo metricProcessMemoryPhysicalUsage metricProcessMemoryPhysicalUsage metricProcessMemoryVirtualUsage metricProcessMemoryVirtualUsage + metricProcessOpenFileDescriptors metricProcessOpenFileDescriptors metricProcessPagingFaults metricProcessPagingFaults metricProcessThreads metricProcessThreads } @@ -494,10 +634,12 @@ func NewMetricsBuilder(settings MetricsSettings, buildInfo component.BuildInfo, startTime: pcommon.NewTimestampFromTime(time.Now()), metricsBuffer: pmetric.NewMetrics(), buildInfo: buildInfo, + metricProcessContextSwitches: newMetricProcessContextSwitches(settings.ProcessContextSwitches), metricProcessCPUTime: newMetricProcessCPUTime(settings.ProcessCPUTime), metricProcessDiskIo: newMetricProcessDiskIo(settings.ProcessDiskIo), metricProcessMemoryPhysicalUsage: newMetricProcessMemoryPhysicalUsage(settings.ProcessMemoryPhysicalUsage), metricProcessMemoryVirtualUsage: newMetricProcessMemoryVirtualUsage(settings.ProcessMemoryVirtualUsage), + metricProcessOpenFileDescriptors: newMetricProcessOpenFileDescriptors(settings.ProcessOpenFileDescriptors), metricProcessPagingFaults: newMetricProcessPagingFaults(settings.ProcessPagingFaults), metricProcessThreads: newMetricProcessThreads(settings.ProcessThreads), } @@ -602,10 +744,12 @@ func (mb *MetricsBuilder) EmitForResource(rmo ...ResourceMetricsOption) { ils.Scope().SetName("otelcol/hostmetricsreceiver/process") ils.Scope().SetVersion(mb.buildInfo.Version) ils.Metrics().EnsureCapacity(mb.metricsCapacity) + mb.metricProcessContextSwitches.emit(ils.Metrics()) mb.metricProcessCPUTime.emit(ils.Metrics()) mb.metricProcessDiskIo.emit(ils.Metrics()) mb.metricProcessMemoryPhysicalUsage.emit(ils.Metrics()) mb.metricProcessMemoryVirtualUsage.emit(ils.Metrics()) + mb.metricProcessOpenFileDescriptors.emit(ils.Metrics()) mb.metricProcessPagingFaults.emit(ils.Metrics()) mb.metricProcessThreads.emit(ils.Metrics()) for _, op := range rmo { @@ -627,6 +771,11 @@ func (mb *MetricsBuilder) Emit(rmo ...ResourceMetricsOption) pmetric.Metrics { return metrics } +// RecordProcessContextSwitchesDataPoint adds a data point to process.context_switches metric. +func (mb *MetricsBuilder) RecordProcessContextSwitchesDataPoint(ts pcommon.Timestamp, val int64, contextSwitchTypeAttributeValue AttributeContextSwitchType) { + mb.metricProcessContextSwitches.recordDataPoint(mb.startTime, ts, val, contextSwitchTypeAttributeValue.String()) +} + // RecordProcessCPUTimeDataPoint adds a data point to process.cpu.time metric. func (mb *MetricsBuilder) RecordProcessCPUTimeDataPoint(ts pcommon.Timestamp, val float64, stateAttributeValue AttributeState) { mb.metricProcessCPUTime.recordDataPoint(mb.startTime, ts, val, stateAttributeValue.String()) @@ -647,9 +796,14 @@ func (mb *MetricsBuilder) RecordProcessMemoryVirtualUsageDataPoint(ts pcommon.Ti mb.metricProcessMemoryVirtualUsage.recordDataPoint(mb.startTime, ts, val) } +// RecordProcessOpenFileDescriptorsDataPoint adds a data point to process.open_file_descriptors metric. +func (mb *MetricsBuilder) RecordProcessOpenFileDescriptorsDataPoint(ts pcommon.Timestamp, val int64) { + mb.metricProcessOpenFileDescriptors.recordDataPoint(mb.startTime, ts, val) +} + // RecordProcessPagingFaultsDataPoint adds a data point to process.paging.faults metric. -func (mb *MetricsBuilder) RecordProcessPagingFaultsDataPoint(ts pcommon.Timestamp, val int64, typeAttributeValue AttributeType) { - mb.metricProcessPagingFaults.recordDataPoint(mb.startTime, ts, val, typeAttributeValue.String()) +func (mb *MetricsBuilder) RecordProcessPagingFaultsDataPoint(ts pcommon.Timestamp, val int64, pagingFaultTypeAttributeValue AttributePagingFaultType) { + mb.metricProcessPagingFaults.recordDataPoint(mb.startTime, ts, val, pagingFaultTypeAttributeValue.String()) } // RecordProcessThreadsDataPoint adds a data point to process.threads metric. diff --git a/receiver/hostmetricsreceiver/internal/scraper/processscraper/metadata.yaml b/receiver/hostmetricsreceiver/internal/scraper/processscraper/metadata.yaml index e44d1e42f527..9176656def24 100644 --- a/receiver/hostmetricsreceiver/internal/scraper/processscraper/metadata.yaml +++ b/receiver/hostmetricsreceiver/internal/scraper/processscraper/metadata.yaml @@ -47,10 +47,16 @@ attributes: description: Breakdown of CPU usage by type. enum: [system, user, wait] - type: + paging_fault_type: + value: type description: Type of memory paging fault. enum: [major, minor] + context_switch_type: + value: type + description: Type of context switched. + enum: [involuntary, voluntary] + metrics: process.cpu.time: enabled: true @@ -64,7 +70,7 @@ metrics: process.memory.physical_usage: enabled: true - description: The amount of physical memory in use. + description: The amount of physical memory in use unit: By sum: value_type: int @@ -98,7 +104,7 @@ metrics: value_type: int aggregation: cumulative monotonic: true - attributes: [type] + attributes: [paging_fault_type] process.threads: enabled: false @@ -108,3 +114,22 @@ metrics: value_type: int aggregation: cumulative monotonic: false + + process.open_file_descriptors: + enabled: false + description: Number of file descriptors in use by the process. + unit: '{count}' + sum: + value_type: int + aggregation: cumulative + monotonic: false + + process.context_switches: + enabled: false + description: Number of times the process has been context switched. + unit: '{count}' + sum: + value_type: int + aggregation: cumulative + monotonic: true + attributes: [context_switch_type] diff --git a/receiver/hostmetricsreceiver/internal/scraper/processscraper/process.go b/receiver/hostmetricsreceiver/internal/scraper/processscraper/process.go index cea7d473e954..928b2117ed3b 100644 --- a/receiver/hostmetricsreceiver/internal/scraper/processscraper/process.go +++ b/receiver/hostmetricsreceiver/internal/scraper/processscraper/process.go @@ -94,6 +94,8 @@ type processHandle interface { CreateTime() (int64, error) Parent() (*process.Process, error) PageFaults() (*process.PageFaultsStat, error) + NumCtxSwitches() (*process.NumCtxSwitchesStat, error) + NumFDs() (int32, error) } type gopsProcessHandles struct { diff --git a/receiver/hostmetricsreceiver/internal/scraper/processscraper/process_scraper.go b/receiver/hostmetricsreceiver/internal/scraper/processscraper/process_scraper.go index 110b07023c77..2374d5ea7fee 100644 --- a/receiver/hostmetricsreceiver/internal/scraper/processscraper/process_scraper.go +++ b/receiver/hostmetricsreceiver/internal/scraper/processscraper/process_scraper.go @@ -30,13 +30,15 @@ import ( ) const ( - cpuMetricsLen = 1 - memoryMetricsLen = 2 - diskMetricsLen = 1 - pagingMetricsLen = 1 - threadMetricsLen = 1 - - metricsLen = cpuMetricsLen + memoryMetricsLen + diskMetricsLen + pagingMetricsLen + threadMetricsLen + cpuMetricsLen = 1 + memoryMetricsLen = 2 + diskMetricsLen = 1 + pagingMetricsLen = 1 + threadMetricsLen = 1 + contextSwitchMetricsLen = 1 + fileDescriptorMetricsLen = 1 + + metricsLen = cpuMetricsLen + memoryMetricsLen + diskMetricsLen + pagingMetricsLen + threadMetricsLen + contextSwitchMetricsLen + fileDescriptorMetricsLen ) // scraper for Process Metrics @@ -122,6 +124,14 @@ func (s *scraper) scrape(_ context.Context) (pmetric.Metrics, error) { errs.AddPartial(threadMetricsLen, fmt.Errorf("error reading thread info for process %q (pid %v): %w", md.executable.name, md.pid, err)) } + if err = s.scrapeAndAppendContextSwitchMetrics(now, md.handle); err != nil { + errs.AddPartial(contextSwitchMetricsLen, fmt.Errorf("error reading context switch counts for process %q (pid %v): %w", md.executable.name, md.pid, err)) + } + + if err = s.scrapeAndAppendOpenFileDescriptorsMetric(now, md.handle); err != nil { + errs.AddPartial(fileDescriptorMetricsLen, fmt.Errorf("error reading open file descriptor count for process %q (pid %v): %w", md.executable.name, md.pid, err)) + } + options := append(md.resourceOptions(), metadata.WithStartTimeOverride(pcommon.Timestamp(md.createTime*1e6))) s.mb.EmitForResource(options...) } @@ -202,6 +212,10 @@ func (s *scraper) getProcessMetadata() ([]*processMetadata, error) { } func (s *scraper) scrapeAndAppendCPUTimeMetric(now pcommon.Timestamp, handle processHandle) error { + if !s.config.Metrics.ProcessCPUTime.Enabled { + return nil + } + times, err := handle.Times() if err != nil { return err @@ -212,6 +226,10 @@ func (s *scraper) scrapeAndAppendCPUTimeMetric(now pcommon.Timestamp, handle pro } func (s *scraper) scrapeAndAppendMemoryUsageMetrics(now pcommon.Timestamp, handle processHandle) error { + if !(s.config.Metrics.ProcessMemoryPhysicalUsage.Enabled || s.config.Metrics.ProcessMemoryVirtualUsage.Enabled) { + return nil + } + mem, err := handle.MemoryInfo() if err != nil { return err @@ -223,6 +241,10 @@ func (s *scraper) scrapeAndAppendMemoryUsageMetrics(now pcommon.Timestamp, handl } func (s *scraper) scrapeAndAppendDiskIOMetric(now pcommon.Timestamp, handle processHandle) error { + if !s.config.Metrics.ProcessDiskIo.Enabled { + return nil + } + io, err := handle.IOCounters() if err != nil { return err @@ -244,8 +266,8 @@ func (s *scraper) scrapeAndAppendPagingMetric(now pcommon.Timestamp, handle proc return err } - s.mb.RecordProcessPagingFaultsDataPoint(now, int64(pageFaultsStat.MajorFaults), metadata.AttributeTypeMajor) - s.mb.RecordProcessPagingFaultsDataPoint(now, int64(pageFaultsStat.MinorFaults), metadata.AttributeTypeMinor) + s.mb.RecordProcessPagingFaultsDataPoint(now, int64(pageFaultsStat.MajorFaults), metadata.AttributePagingFaultTypeMajor) + s.mb.RecordProcessPagingFaultsDataPoint(now, int64(pageFaultsStat.MinorFaults), metadata.AttributePagingFaultTypeMinor) return nil } @@ -261,3 +283,36 @@ func (s *scraper) scrapeAndAppendThreadsMetrics(now pcommon.Timestamp, handle pr return nil } + +func (s *scraper) scrapeAndAppendContextSwitchMetrics(now pcommon.Timestamp, handle processHandle) error { + if !s.config.Metrics.ProcessContextSwitches.Enabled { + return nil + } + + contextSwitches, err := handle.NumCtxSwitches() + + if err != nil { + return err + } + + s.mb.RecordProcessContextSwitchesDataPoint(now, contextSwitches.Involuntary, metadata.AttributeContextSwitchTypeInvoluntary) + s.mb.RecordProcessContextSwitchesDataPoint(now, contextSwitches.Voluntary, metadata.AttributeContextSwitchTypeVoluntary) + + return nil +} + +func (s *scraper) scrapeAndAppendOpenFileDescriptorsMetric(now pcommon.Timestamp, handle processHandle) error { + if !s.config.Metrics.ProcessOpenFileDescriptors.Enabled { + return nil + } + + fds, err := handle.NumFDs() + + if err != nil { + return err + } + + s.mb.RecordProcessOpenFileDescriptorsDataPoint(now, int64(fds)) + + return nil +} diff --git a/receiver/hostmetricsreceiver/internal/scraper/processscraper/process_scraper_test.go b/receiver/hostmetricsreceiver/internal/scraper/processscraper/process_scraper_test.go index 66aba5879806..9a7c4b48f60d 100644 --- a/receiver/hostmetricsreceiver/internal/scraper/processscraper/process_scraper_test.go +++ b/receiver/hostmetricsreceiver/internal/scraper/processscraper/process_scraper_test.go @@ -44,25 +44,36 @@ func skipTestOnUnsupportedOS(t *testing.T) { } } +func enableLinuxOnlyMetrics(ms *metadata.MetricsSettings) { + if runtime.GOOS != "linux" { + return + } + + ms.ProcessPagingFaults.Enabled = true + ms.ProcessContextSwitches.Enabled = true + ms.ProcessOpenFileDescriptors.Enabled = true +} + func TestScrape(t *testing.T) { skipTestOnUnsupportedOS(t) type testCase struct { - name string - expectPagingFaults bool - expectThreadsCount bool - mutateScraper func(*scraper) + name string + mutateScraper func(*scraper) + mutateMetricsSettings func(*testing.T, *metadata.MetricsSettings) } testCases := []testCase{ { name: "Standard", }, { - name: "With threads count", - expectThreadsCount: true, - }, - { - name: "With page faults", - expectPagingFaults: true, + name: "Enable Linux-only metrics", + mutateMetricsSettings: func(t *testing.T, ms *metadata.MetricsSettings) { + if runtime.GOOS != "linux" { + t.Skipf("skipping test on %v", runtime.GOOS) + } + + enableLinuxOnlyMetrics(ms) + }, }, } @@ -71,14 +82,11 @@ func TestScrape(t *testing.T) { for _, test := range testCases { t.Run(test.name, func(t *testing.T) { - metricsConfig := metadata.DefaultMetricsSettings() - if test.expectPagingFaults { - metricsConfig.ProcessPagingFaults.Enabled = true - } - if test.expectThreadsCount { - metricsConfig.ProcessThreads.Enabled = true + metricsSettings := metadata.DefaultMetricsSettings() + if test.mutateMetricsSettings != nil { + test.mutateMetricsSettings(t, &metricsSettings) } - scraper, err := newProcessScraper(componenttest.NewNopReceiverCreateSettings(), &Config{Metrics: metricsConfig}) + scraper, err := newProcessScraper(componenttest.NewNopReceiverCreateSettings(), &Config{Metrics: metricsSettings}) if test.mutateScraper != nil { test.mutateScraper(scraper) } @@ -109,14 +117,24 @@ func TestScrape(t *testing.T) { assertCPUTimeMetricValid(t, md.ResourceMetrics(), expectedStartTime) assertMemoryUsageMetricValid(t, md.ResourceMetrics(), expectedStartTime) assertOldDiskIOMetricValid(t, md.ResourceMetrics(), expectedStartTime) - if test.expectPagingFaults && runtime.GOOS == "linux" { + if metricsSettings.ProcessPagingFaults.Enabled { assertPagingMetricValid(t, md.ResourceMetrics(), expectedStartTime) } - if test.expectThreadsCount { + if metricsSettings.ProcessThreads.Enabled { assertThreadsCountValid(t, md.ResourceMetrics(), expectedStartTime) } else { assertMetricMissing(t, md.ResourceMetrics(), "process.threads") } + if metricsSettings.ProcessContextSwitches.Enabled { + assertContextSwitchMetricValid(t, md.ResourceMetrics(), expectedStartTime) + } else { + assertMetricMissing(t, md.ResourceMetrics(), "process.context_switches") + } + if metricsSettings.ProcessOpenFileDescriptors.Enabled { + assertOpenFileDescriptorMetricValid(t, md.ResourceMetrics(), expectedStartTime) + } else { + assertMetricMissing(t, md.ResourceMetrics(), "process.open_file_descriptors") + } assertSameTimeStampForAllMetricsWithinResource(t, md.ResourceMetrics()) }) } @@ -165,8 +183,8 @@ func assertMemoryUsageMetricValid(t *testing.T, resourceMetrics pmetric.Resource func assertPagingMetricValid(t *testing.T, resourceMetrics pmetric.ResourceMetricsSlice, startTime pcommon.Timestamp) { pagingFaultsMetric := getMetric(t, "process.paging.faults", resourceMetrics) - internal.AssertSumMetricHasAttributeValue(t, pagingFaultsMetric, 0, "type", pcommon.NewValueStr(metadata.AttributeTypeMajor.String())) - internal.AssertSumMetricHasAttributeValue(t, pagingFaultsMetric, 1, "type", pcommon.NewValueStr(metadata.AttributeTypeMinor.String())) + internal.AssertSumMetricHasAttributeValue(t, pagingFaultsMetric, 0, "type", pcommon.NewValueStr(metadata.AttributePagingFaultTypeMajor.String())) + internal.AssertSumMetricHasAttributeValue(t, pagingFaultsMetric, 1, "type", pcommon.NewValueStr(metadata.AttributePagingFaultTypeMinor.String())) if startTime != 0 { internal.AssertSumMetricStartTimeEquals(t, pagingFaultsMetric, startTime) @@ -206,6 +224,29 @@ func assertOldDiskIOMetricValid(t *testing.T, resourceMetrics pmetric.ResourceMe pcommon.NewValueStr(metadata.AttributeDirectionWrite.String())) } +func assertContextSwitchMetricValid(t *testing.T, resourceMetrics pmetric.ResourceMetricsSlice, startTime pcommon.Timestamp) { + contextSwitchMetric := getMetric(t, "process.context_switches", resourceMetrics) + assert.Equal(t, "process.context_switches", contextSwitchMetric.Name()) + + if startTime != 0 { + internal.AssertSumMetricStartTimeEquals(t, contextSwitchMetric, startTime) + } + + internal.AssertSumMetricHasAttributeValue(t, contextSwitchMetric, 0, "type", + pcommon.NewValueStr(metadata.AttributeContextSwitchTypeInvoluntary.String())) + internal.AssertSumMetricHasAttributeValue(t, contextSwitchMetric, 1, "type", + pcommon.NewValueStr(metadata.AttributeContextSwitchTypeVoluntary.String())) +} + +func assertOpenFileDescriptorMetricValid(t *testing.T, resourceMetrics pmetric.ResourceMetricsSlice, startTime pcommon.Timestamp) { + openFileDescriptorsMetric := getMetric(t, "process.open_file_descriptors", resourceMetrics) + assert.Equal(t, "process.open_file_descriptors", openFileDescriptorsMetric.Name()) + + if startTime != 0 { + internal.AssertSumMetricStartTimeEquals(t, openFileDescriptorsMetric, startTime) + } +} + func assertSameTimeStampForAllMetricsWithinResource(t *testing.T, resourceMetrics pmetric.ResourceMetricsSlice) { for i := 0; i < resourceMetrics.Len(); i++ { ilms := resourceMetrics.At(i).ScopeMetrics() @@ -345,6 +386,16 @@ func (p *processHandleMock) PageFaults() (*process.PageFaultsStat, error) { return args.Get(0).(*process.PageFaultsStat), args.Error(1) } +func (p *processHandleMock) NumCtxSwitches() (*process.NumCtxSwitchesStat, error) { + args := p.MethodCalled("NumCtxSwitches") + return args.Get(0).(*process.NumCtxSwitchesStat), args.Error(1) +} + +func (p *processHandleMock) NumFDs() (int32, error) { + args := p.MethodCalled("NumFDs") + return args.Get(0).(int32), args.Error(1) +} + func newDefaultHandleMock() *processHandleMock { handleMock := &processHandleMock{} handleMock.On("Username").Return("username", nil) @@ -356,6 +407,8 @@ func newDefaultHandleMock() *processHandleMock { handleMock.On("Parent").Return(&process.Process{Pid: 2}, nil) handleMock.On("NumThreads").Return(int32(0), nil) handleMock.On("PageFaults").Return(&process.PageFaultsStat{}, nil) + handleMock.On("NumCtxSwitches").Return(&process.NumCtxSwitchesStat{}, nil) + handleMock.On("NumFDs").Return(int32(0), nil) return handleMock } @@ -435,8 +488,11 @@ func TestScrapeMetrics_Filtered(t *testing.T) { for _, test := range testCases { t.Run(test.name, func(t *testing.T) { scrapeProcessDelay, _ := time.ParseDuration(test.scrapeProcessDelay) + metricsSettings := metadata.DefaultMetricsSettings() + enableLinuxOnlyMetrics(&metricsSettings) + config := &Config{ - Metrics: metadata.DefaultMetricsSettings(), + Metrics: metricsSettings, ScrapeProcessDelay: scrapeProcessDelay, } @@ -484,24 +540,33 @@ func TestScrapeMetrics_Filtered(t *testing.T) { } } +func enableOptionalMetrics(ms *metadata.MetricsSettings) { + ms.ProcessThreads.Enabled = true + ms.ProcessPagingFaults.Enabled = true + ms.ProcessContextSwitches.Enabled = true + ms.ProcessOpenFileDescriptors.Enabled = true +} + func TestScrapeMetrics_ProcessErrors(t *testing.T) { skipTestOnUnsupportedOS(t) type testCase struct { - name string - osFilter string - nameError error - exeError error - usernameError error - cmdlineError error - timesError error - memoryInfoError error - ioCountersError error - createTimeError error - parentPidError error - pageFaultsError error - numThreadsError error - expectedError string + name string + osFilter string + nameError error + exeError error + usernameError error + cmdlineError error + timesError error + memoryInfoError error + ioCountersError error + createTimeError error + parentPidError error + pageFaultsError error + numThreadsError error + numCtxSwitchesError error + numFDsError error + expectedError string } testCases := []testCase{ @@ -562,15 +627,27 @@ func TestScrapeMetrics_ProcessErrors(t *testing.T) { expectedError: `error reading thread info for process "test" (pid 1): err8`, }, { - name: "Multiple Errors", - cmdlineError: errors.New("err2"), - usernameError: errors.New("err3"), - createTimeError: errors.New("err4"), - timesError: errors.New("err5"), - memoryInfoError: errors.New("err6"), - ioCountersError: errors.New("err7"), - pageFaultsError: errors.New("err-paging"), - numThreadsError: errors.New("err8"), + name: "Context Switches Error", + numCtxSwitchesError: errors.New("err9"), + expectedError: `error reading context switch counts for process "test" (pid 1): err9`, + }, + { + name: "File Descriptors Error", + numFDsError: errors.New("err10"), + expectedError: `error reading open file descriptor count for process "test" (pid 1): err10`, + }, + { + name: "Multiple Errors", + cmdlineError: errors.New("err2"), + usernameError: errors.New("err3"), + createTimeError: errors.New("err4"), + timesError: errors.New("err5"), + memoryInfoError: errors.New("err6"), + ioCountersError: errors.New("err7"), + pageFaultsError: errors.New("err-paging"), + numThreadsError: errors.New("err8"), + numCtxSwitchesError: errors.New("err9"), + numFDsError: errors.New("err10"), expectedError: `error reading command for process "test" (pid 1): err2; ` + `error reading username for process "test" (pid 1): err3; ` + `error reading create time for process "test" (pid 1): err4; ` + @@ -578,7 +655,9 @@ func TestScrapeMetrics_ProcessErrors(t *testing.T) { `error reading memory info for process "test" (pid 1): err6; ` + `error reading disk usage for process "test" (pid 1): err7; ` + `error reading memory paging info for process "test" (pid 1): err-paging; ` + - `error reading thread info for process "test" (pid 1): err8`, + `error reading thread info for process "test" (pid 1): err8; ` + + `error reading context switch counts for process "test" (pid 1): err9; ` + + `error reading open file descriptor count for process "test" (pid 1): err10`, }, } @@ -589,8 +668,8 @@ func TestScrapeMetrics_ProcessErrors(t *testing.T) { } metricsSettings := metadata.DefaultMetricsSettings() - metricsSettings.ProcessPagingFaults.Enabled = true - metricsSettings.ProcessThreads.Enabled = true + enableOptionalMetrics(&metricsSettings) + scraper, err := newProcessScraper(componenttest.NewNopReceiverCreateSettings(), &Config{Metrics: metricsSettings}) require.NoError(t, err, "Failed to create process scraper: %v", err) err = scraper.start(context.Background(), componenttest.NewNopHost()) @@ -614,6 +693,8 @@ func TestScrapeMetrics_ProcessErrors(t *testing.T) { handleMock.On("Parent").Return(&process.Process{Pid: 2}, test.parentPidError) handleMock.On("NumThreads").Return(int32(0), test.numThreadsError) handleMock.On("PageFaults").Return(&process.PageFaultsStat{}, test.pageFaultsError) + handleMock.On("NumCtxSwitches").Return(&process.NumCtxSwitchesStat{}, test.numCtxSwitchesError) + handleMock.On("NumFDs").Return(int32(0), test.numFDsError) scraper.getProcessHandles = func() (processHandles, error) { return &processHandlesMock{handles: []*processHandleMock{handleMock}}, nil @@ -621,7 +702,7 @@ func TestScrapeMetrics_ProcessErrors(t *testing.T) { md, err := scraper.scrape(context.Background()) - expectedResourceMetricsLen, expectedMetricsLen := getExpectedLengthOfReturnedMetrics(test.nameError, test.exeError, test.timesError, test.memoryInfoError, test.ioCountersError, test.pageFaultsError, test.numThreadsError) + expectedResourceMetricsLen, expectedMetricsLen := getExpectedLengthOfReturnedMetrics(test.nameError, test.exeError, test.timesError, test.memoryInfoError, test.ioCountersError, test.pageFaultsError, test.numThreadsError, test.numCtxSwitchesError, test.numFDsError) assert.Equal(t, expectedResourceMetricsLen, md.ResourceMetrics().Len()) assert.Equal(t, expectedMetricsLen, md.MetricCount()) @@ -629,7 +710,7 @@ func TestScrapeMetrics_ProcessErrors(t *testing.T) { isPartial := scrapererror.IsPartialScrapeError(err) assert.True(t, isPartial) if isPartial { - expectedFailures := getExpectedScrapeFailures(test.nameError, test.exeError, test.timesError, test.memoryInfoError, test.ioCountersError, test.pageFaultsError, test.numThreadsError) + expectedFailures := getExpectedScrapeFailures(test.nameError, test.exeError, test.timesError, test.memoryInfoError, test.ioCountersError, test.pageFaultsError, test.numThreadsError, test.numCtxSwitchesError, test.numFDsError) var scraperErr scrapererror.PartialScrapeError require.ErrorAs(t, err, &scraperErr) assert.Equal(t, expectedFailures, scraperErr.Failed) @@ -638,7 +719,7 @@ func TestScrapeMetrics_ProcessErrors(t *testing.T) { } } -func getExpectedLengthOfReturnedMetrics(nameError, exeError, timeError, memError, diskError, pageFaultsError, threadError error) (int, int) { +func getExpectedLengthOfReturnedMetrics(nameError, exeError, timeError, memError, diskError, pageFaultsError, threadError, contextSwitchError, fileDescriptorError error) (int, int) { if nameError != nil || exeError != nil { return 0, 0 } @@ -659,6 +740,12 @@ func getExpectedLengthOfReturnedMetrics(nameError, exeError, timeError, memError if threadError == nil { expectedLen += threadMetricsLen } + if contextSwitchError == nil { + expectedLen += contextSwitchMetricsLen + } + if fileDescriptorError == nil { + expectedLen += fileDescriptorMetricsLen + } if expectedLen == 0 { return 0, 0 @@ -666,11 +753,11 @@ func getExpectedLengthOfReturnedMetrics(nameError, exeError, timeError, memError return 1, expectedLen } -func getExpectedScrapeFailures(nameError, exeError, timeError, memError, diskError, pageFaultsError, threadError error) int { +func getExpectedScrapeFailures(nameError, exeError, timeError, memError, diskError, pageFaultsError, threadError, contextSwitchError, fileDescriptorError error) int { if nameError != nil || exeError != nil { return 1 } - _, expectedMetricsLen := getExpectedLengthOfReturnedMetrics(nameError, exeError, timeError, memError, diskError, pageFaultsError, threadError) + _, expectedMetricsLen := getExpectedLengthOfReturnedMetrics(nameError, exeError, timeError, memError, diskError, pageFaultsError, threadError, contextSwitchError, fileDescriptorError) return metricsLen - expectedMetricsLen } @@ -732,3 +819,57 @@ func TestScrapeMetrics_MuteProcessNameError(t *testing.T) { }) } } + +type ProcessReadError struct{} + +func (m *ProcessReadError) Error() string { + return "unable to read data" +} + +func newErroringHandleMock() *processHandleMock { + handleMock := &processHandleMock{} + handleMock.On("Username").Return("username", nil) + handleMock.On("Cmdline").Return("cmdline", nil) + handleMock.On("CmdlineSlice").Return([]string{"cmdline"}, nil) + handleMock.On("Times").Return(&cpu.TimesStat{}, &ProcessReadError{}) + handleMock.On("MemoryInfo").Return(&process.MemoryInfoStat{}, &ProcessReadError{}) + handleMock.On("IOCounters").Return(&process.IOCountersStat{}, &ProcessReadError{}) + handleMock.On("NumThreads").Return(int32(0), &ProcessReadError{}) + handleMock.On("NumCtxSwitches").Return(&process.NumCtxSwitchesStat{}, &ProcessReadError{}) + handleMock.On("NumFDs").Return(int32(0), &ProcessReadError{}) + return handleMock +} + +func TestScrapeMetrics_DontCheckDisabledMetrics(t *testing.T) { + skipTestOnUnsupportedOS(t) + + metricSettings := metadata.DefaultMetricsSettings() + + metricSettings.ProcessCPUTime.Enabled = false + metricSettings.ProcessDiskIo.Enabled = false + metricSettings.ProcessMemoryPhysicalUsage.Enabled = false + metricSettings.ProcessMemoryVirtualUsage.Enabled = false + + t.Run("Metrics don't log errors when disabled", func(t *testing.T) { + config := &Config{Metrics: metricSettings} + + scraper, err := newProcessScraper(componenttest.NewNopReceiverCreateSettings(), config) + require.NoError(t, err, "Failed to create process scraper: %v", err) + err = scraper.start(context.Background(), componenttest.NewNopHost()) + require.NoError(t, err, "Failed to initialize process scraper: %v", err) + + handleMock := newErroringHandleMock() + handleMock.On("Name").Return("test", nil) + handleMock.On("Exe").Return("test", nil) + handleMock.On("CreateTime").Return(time.Now().UnixMilli(), nil) + handleMock.On("Parent").Return(&process.Process{Pid: 2}, nil) + + scraper.getProcessHandles = func() (processHandles, error) { + return &processHandlesMock{handles: []*processHandleMock{handleMock}}, nil + } + md, err := scraper.scrape(context.Background()) + + assert.Zero(t, md.MetricCount()) + assert.Nil(t, err) + }) +}