diff --git a/src/vmm/src/devices/mod.rs b/src/vmm/src/devices/mod.rs index 7a865ce6ad1..e2738677b81 100644 --- a/src/vmm/src/devices/mod.rs +++ b/src/vmm/src/devices/mod.rs @@ -17,17 +17,17 @@ pub mod virtio; pub use bus::{Bus, BusDevice, BusError}; use log::error; +use crate::devices::virtio::metrics::NetDeviceMetrics; use crate::devices::virtio::{QueueError, VsockError}; use crate::logger::{IncMetric, METRICS}; -use crate::net_metrics; // Function used for reporting error in terms of logging // but also in terms of metrics of net event fails. -// network metrics is reported per device so we need `net_iface_id` -// to identify which device the metrics and `err` should be reported for. -pub(crate) fn report_net_event_fail(net_iface_id: &String, err: DeviceError) { - error!("{:?}:{:?}", net_iface_id, err); - net_metrics!(net_iface_id, event_fails.inc()); +// network metrics is reported per device so we need a handle to each net device's +// metrics `net_iface_metrics` to report metrics for that device. +pub(crate) fn report_net_event_fail(net_iface_metrics: &NetDeviceMetrics, err: DeviceError) { + error!("{:?}", err); + net_iface_metrics.event_fails.inc(); } pub(crate) fn report_balloon_event_fail(err: virtio::balloon::BalloonError) { diff --git a/src/vmm/src/devices/virtio/net/device.rs b/src/vmm/src/devices/virtio/net/device.rs index 1fb41655c74..9525e2f972d 100755 --- a/src/vmm/src/devices/virtio/net/device.rs +++ b/src/vmm/src/devices/virtio/net/device.rs @@ -37,7 +37,7 @@ use crate::vstate::memory::{ByteValued, Bytes, GuestMemoryMmap}; const FRAME_HEADER_MAX_LEN: usize = PAYLOAD_OFFSET + ETH_IPV4_FRAME_LEN; use crate::devices::virtio::iovec::IoVecBuffer; -use crate::devices::virtio::metrics::NetMetricsPerDevice; +use crate::devices::virtio::metrics::{NetDeviceMetrics, NetMetricsPerDevice}; use crate::devices::virtio::net::tap::Tap; use crate::devices::virtio::net::{ gen, NetError, NetQueue, MAX_BUFFER_SIZE, NET_QUEUE_SIZES, RX_INDEX, TX_INDEX, @@ -46,7 +46,6 @@ use crate::devices::virtio::{ ActivateError, DescriptorChain, DeviceState, IrqTrigger, IrqType, Queue, VirtioDevice, TYPE_NET, }; use crate::devices::{report_net_event_fail, DeviceError}; -use crate::net_metrics; #[derive(Debug)] enum FrontendError { @@ -138,6 +137,7 @@ pub struct Net { /// The MMDS stack corresponding to this interface. /// Only if MMDS transport has been associated with it. pub mmds_ns: Option, + pub(crate) metrics: Arc, } impl Net { @@ -173,13 +173,8 @@ impl Net { queues.push(Queue::new(size)); } - // allocate NetDeviceMetrics to log metrics for this device. - // we don't need a pointer to NetDeviceMetrics since we use - // net_metrics! macro with device's id to access the metrics - // and so alloc doesn't need to return anything. - NetMetricsPerDevice::alloc(id.clone()); Ok(Net { - id, + id: id.clone(), tap, avail_features, acked_features: 0u64, @@ -197,6 +192,7 @@ impl Net { device_state: DeviceState::Inactive, activate_evt: EventFd::new(libc::EFD_NONBLOCK).map_err(NetError::EventFd)?, mmds_ns: None, + metrics: NetMetricsPerDevice::alloc(id), }) } @@ -279,7 +275,7 @@ impl Net { self.irq_trigger .trigger_irq(IrqType::Vring) .map_err(|err| { - net_metrics!(&self.id, event_fails.inc()); + self.metrics.event_fails.inc(); DeviceError::FailedSignalingIrq(err) })?; } @@ -312,7 +308,7 @@ impl Net { // Returns true on successful frame delivery. fn rate_limited_rx_single_frame(&mut self) -> bool { if !Self::rate_limiter_consume_op(&mut self.rx_rate_limiter, self.rx_bytes_read as u64) { - net_metrics!(&self.id, rx_rate_limiter_throttled.inc()); + self.metrics.rx_rate_limiter_throttled.inc(); return false; } @@ -338,7 +334,7 @@ impl Net { mem: &GuestMemoryMmap, data: &[u8], head: DescriptorChain, - net_iface_id: &String, + net_metrics: &NetDeviceMetrics, ) -> Result<(), FrontendError> { let mut chunk = data; let mut next_descriptor = Some(head); @@ -351,13 +347,13 @@ impl Net { let len = std::cmp::min(chunk.len(), descriptor.len as usize); match mem.write_slice(&chunk[..len], descriptor.addr) { Ok(()) => { - net_metrics!(net_iface_id, rx_count.inc()); + net_metrics.rx_count.inc(); chunk = &chunk[len..]; } Err(err) => { error!("Failed to write slice: {:?}", err); if let GuestMemoryError::PartialBuffer { .. } = err { - net_metrics!(net_iface_id, rx_partial_writes.inc()); + net_metrics.rx_partial_writes.inc(); } return Err(FrontendError::GuestMemory(err)); } @@ -366,8 +362,8 @@ impl Net { // If chunk is empty we are done here. if chunk.is_empty() { let len = data.len() as u64; - net_metrics!(net_iface_id, rx_bytes_count.add(len)); - net_metrics!(net_iface_id, rx_packets_count.inc()); + net_metrics.rx_bytes_count.add(len); + net_metrics.rx_packets_count.inc(); return Ok(()); } @@ -385,7 +381,7 @@ impl Net { let queue = &mut self.queues[RX_INDEX]; let head_descriptor = queue.pop_or_enable_notification(mem).ok_or_else(|| { - net_metrics!(&self.id, no_rx_avail_buffer.inc()); + self.metrics.no_rx_avail_buffer.inc(); FrontendError::EmptyQueue })?; let head_index = head_descriptor.index; @@ -394,11 +390,11 @@ impl Net { mem, &self.rx_frame_buf[..self.rx_bytes_read], head_descriptor, - &self.id, + &self.metrics, ); // Mark the descriptor chain as used. If an error occurred, skip the descriptor chain. let used_len = if result.is_err() { - net_metrics!(&self.id, rx_fails.inc()); + self.metrics.rx_fails.inc(); 0 } else { // Safe to unwrap because a frame must be smaller than 2^16 bytes. @@ -442,19 +438,19 @@ impl Net { frame_iovec: &IoVecBuffer, tap: &mut Tap, guest_mac: Option, - net_iface_id: &String, + net_metrics: &NetDeviceMetrics, ) -> Result { // Read the frame headers from the IoVecBuffer. This will return None // if the frame_iovec is empty. let header_len = frame_iovec.read_at(headers, 0).ok_or_else(|| { error!("Received empty TX buffer"); - net_metrics!(net_iface_id, tx_malformed_frames.inc()); + net_metrics.tx_malformed_frames.inc(); NetError::VnetHeaderMissing })?; let headers = frame_bytes_from_buf(&headers[..header_len]).map_err(|e| { error!("VNET headers missing in TX frame"); - net_metrics!(net_iface_id, tx_malformed_frames.inc()); + net_metrics.tx_malformed_frames.inc(); e })?; @@ -481,7 +477,7 @@ impl Net { if let Some(guest_mac) = guest_mac { let _ = EthernetFrame::from_bytes(headers).map(|eth_frame| { if guest_mac != eth_frame.src_mac() { - net_metrics!(net_iface_id, tx_spoofed_mac_count.inc()); + net_metrics.tx_spoofed_mac_count.inc(); } }); } @@ -489,13 +485,13 @@ impl Net { match Self::write_tap(tap, frame_iovec) { Ok(_) => { let len = frame_iovec.len() as u64; - net_metrics!(net_iface_id, tx_bytes_count.add(len)); - net_metrics!(net_iface_id, tx_packets_count.inc()); - net_metrics!(net_iface_id, tx_count.inc()); + net_metrics.tx_bytes_count.add(len); + net_metrics.tx_packets_count.inc(); + net_metrics.tx_count.inc(); } Err(err) => { error!("Failed to write to tap: {:?}", err); - net_metrics!(net_iface_id, tap_write_fails.inc()); + net_metrics.tap_write_fails.inc(); } }; Ok(false) @@ -524,7 +520,7 @@ impl Net { match self.read_from_mmds_or_tap() { Ok(count) => { self.rx_bytes_read = count; - net_metrics!(&self.id, rx_count.inc()); + self.metrics.rx_count.inc(); if !self.rate_limited_rx_single_frame() { self.rx_deferred_frame = true; break; @@ -537,7 +533,7 @@ impl Net { Some(err) if err == EAGAIN => (), _ => { error!("Failed to read tap: {:?}", err); - net_metrics!(&self.id, tap_read_fails.inc()); + self.metrics.tap_read_fails.inc(); return Err(DeviceError::FailedReadTap); } }; @@ -592,7 +588,7 @@ impl Net { let buffer = match IoVecBuffer::from_descriptor_chain(mem, head) { Ok(buffer) => buffer, Err(_) => { - net_metrics!(&self.id, tx_fails.inc()); + self.metrics.tx_fails.inc(); tx_queue .add_used(mem, head_index, 0) .map_err(DeviceError::QueueError)?; @@ -601,7 +597,7 @@ impl Net { }; if !Self::rate_limiter_consume_op(&mut self.tx_rate_limiter, buffer.len() as u64) { tx_queue.undo_pop(); - net_metrics!(&self.id, tx_rate_limiter_throttled.inc()); + self.metrics.tx_rate_limiter_throttled.inc(); break; } @@ -612,7 +608,7 @@ impl Net { &buffer, &mut self.tap, self.guest_mac, - &self.id, + &self.metrics, ) .unwrap_or(false); if frame_consumed_by_mmds && !self.rx_deferred_frame { @@ -627,7 +623,7 @@ impl Net { } if !used_any { - net_metrics!(&self.id, no_tx_avail_buffer.inc()); + self.metrics.no_tx_avail_buffer.inc(); } self.signal_used_queue(NetQueue::Tx)?; @@ -667,38 +663,38 @@ impl Net { /// This is called by the event manager responding to the guest adding a new /// buffer in the RX queue. pub fn process_rx_queue_event(&mut self) { - net_metrics!(&self.id, rx_queue_event_count.inc()); + self.metrics.rx_queue_event_count.inc(); if let Err(err) = self.queue_evts[RX_INDEX].read() { // rate limiters present but with _very high_ allowed rate error!("Failed to get rx queue event: {:?}", err); - net_metrics!(&self.id, event_fails.inc()); + self.metrics.event_fails.inc(); } else if self.rx_rate_limiter.is_blocked() { - net_metrics!(&self.id, rx_rate_limiter_throttled.inc()); + self.metrics.rx_rate_limiter_throttled.inc(); } else { // If the limiter is not blocked, resume the receiving of bytes. self.resume_rx() - .unwrap_or_else(|err| report_net_event_fail(&self.id, err)); + .unwrap_or_else(|err| report_net_event_fail(&self.metrics, err)); } } pub fn process_tap_rx_event(&mut self) { // This is safe since we checked in the event handler that the device is activated. let mem = self.device_state.mem().unwrap(); - net_metrics!(&self.id, rx_tap_event_count.inc()); + self.metrics.rx_tap_event_count.inc(); // While there are no available RX queue buffers and there's a deferred_frame // don't process any more incoming. Otherwise start processing a frame. In the // process the deferred_frame flag will be set in order to avoid freezing the // RX queue. if self.queues[RX_INDEX].is_empty(mem) && self.rx_deferred_frame { - net_metrics!(&self.id, no_rx_avail_buffer.inc()); + self.metrics.no_rx_avail_buffer.inc(); return; } // While limiter is blocked, don't process any more incoming. if self.rx_rate_limiter.is_blocked() { - net_metrics!(&self.id, rx_rate_limiter_throttled.inc()); + self.metrics.rx_rate_limiter_throttled.inc(); return; } @@ -707,10 +703,10 @@ impl Net { // until we manage to receive this deferred frame. { self.handle_deferred_frame() - .unwrap_or_else(|err| report_net_event_fail(&self.id, err)); + .unwrap_or_else(|err| report_net_event_fail(&self.metrics, err)); } else { self.process_rx() - .unwrap_or_else(|err| report_net_event_fail(&self.id, err)); + .unwrap_or_else(|err| report_net_event_fail(&self.metrics, err)); } } @@ -719,22 +715,22 @@ impl Net { /// This is called by the event manager responding to the guest adding a new /// buffer in the TX queue. pub fn process_tx_queue_event(&mut self) { - net_metrics!(&self.id, tx_queue_event_count.inc()); + self.metrics.tx_queue_event_count.inc(); if let Err(err) = self.queue_evts[TX_INDEX].read() { error!("Failed to get tx queue event: {:?}", err); - net_metrics!(&self.id, event_fails.inc()); + self.metrics.event_fails.inc(); } else if !self.tx_rate_limiter.is_blocked() // If the limiter is not blocked, continue transmitting bytes. { self.process_tx() - .unwrap_or_else(|err| report_net_event_fail(&self.id, err)); + .unwrap_or_else(|err| report_net_event_fail(&self.metrics, err)); } else { - net_metrics!(&self.id, tx_rate_limiter_throttled.inc()); + self.metrics.tx_rate_limiter_throttled.inc(); } } pub fn process_rx_rate_limiter_event(&mut self) { - net_metrics!(&self.id, rx_event_rate_limiter_count.inc()); + self.metrics.rx_event_rate_limiter_count.inc(); // Upon rate limiter event, call the rate limiter handler // and restart processing the queue. @@ -742,28 +738,28 @@ impl Net { Ok(_) => { // There might be enough budget now to receive the frame. self.resume_rx() - .unwrap_or_else(|err| report_net_event_fail(&self.id, err)); + .unwrap_or_else(|err| report_net_event_fail(&self.metrics, err)); } Err(err) => { error!("Failed to get rx rate-limiter event: {:?}", err); - net_metrics!(&self.id, event_fails.inc()); + self.metrics.event_fails.inc(); } } } pub fn process_tx_rate_limiter_event(&mut self) { - net_metrics!(&self.id, tx_rate_limiter_event_count.inc()); + self.metrics.tx_rate_limiter_event_count.inc(); // Upon rate limiter event, call the rate limiter handler // and restart processing the queue. match self.tx_rate_limiter.event_handler() { Ok(_) => { // There might be enough budget now to send the frame. self.process_tx() - .unwrap_or_else(|err| report_net_event_fail(&self.id, err)); + .unwrap_or_else(|err| report_net_event_fail(&self.metrics, err)); } Err(err) => { error!("Failed to get tx rate-limiter event: {:?}", err); - net_metrics!(&self.id, event_fails.inc()); + self.metrics.event_fails.inc(); } } } @@ -817,7 +813,7 @@ impl VirtioDevice for Net { let config_len = config_space_bytes.len() as u64; if offset >= config_len { error!("Failed to read config space"); - net_metrics!(&self.id, cfg_fails.inc()); + self.metrics.cfg_fails.inc(); return; } if let Some(end) = offset.checked_add(data.len() as u64) { @@ -838,13 +834,13 @@ impl VirtioDevice for Net { .and_then(|(start, end)| config_space_bytes.get_mut(start..end)) else { error!("Failed to write config space"); - net_metrics!(&self.id, cfg_fails.inc()); + self.metrics.cfg_fails.inc(); return; }; dst.copy_from_slice(data); self.guest_mac = Some(self.config_space.guest_mac); - net_metrics!(&self.id, mac_address_updates.inc()); + self.metrics.mac_address_updates.inc(); } fn activate(&mut self, mem: GuestMemoryMmap) -> Result<(), ActivateError> { @@ -880,6 +876,7 @@ pub mod tests { use utils::net::mac::{MacAddr, MAC_ADDR_LEN}; use super::*; + use crate::check_metric_after_block; use crate::devices::virtio::gen::virtio_ring::VIRTIO_RING_F_EVENT_IDX; use crate::devices::virtio::iovec::IoVecBuffer; use crate::devices::virtio::net::device::{ @@ -901,7 +898,6 @@ pub mod tests { use crate::logger::IncMetric; use crate::rate_limiter::{BucketUpdate, RateLimiter, TokenBucket, TokenType}; use crate::vstate::memory::{Address, GuestMemory}; - use crate::{check_metric_after_block, check_net_metric_after_block}; impl Net { pub(crate) fn read_tap(&mut self) -> io::Result { @@ -1026,7 +1022,7 @@ pub mod tests { // Check that the guest MAC was updated. let expected_guest_mac = MacAddr::from_bytes_unchecked(&new_config); assert_eq!(expected_guest_mac, net.guest_mac.unwrap()); - assert_eq!(net_metrics!(&net.id, mac_address_updates.count()), 1); + assert_eq!(net.metrics.mac_address_updates.count(), 1); // Partial write (this is how the kernel sets a new mac address) - byte by byte. let new_config = [0x11, 0x22, 0x33, 0x44, 0x55, 0x66]; @@ -1058,8 +1054,8 @@ pub mod tests { th.add_desc_chain(NetQueue::Rx, 0, &[(0, 4096, VIRTQ_DESC_F_WRITE)]); th.net().queue_evts[RX_INDEX].read().unwrap(); - check_net_metric_after_block!( - net_metrics!(&th.net().id, event_fails.count()), + check_metric_after_block!( + th.net().metrics.event_fails, 1, th.simulate_event(NetEvent::RxQueue) ); @@ -1153,8 +1149,8 @@ pub mod tests { // Inject frame to tap and run epoll. let frame = inject_tap_tx_frame(&th.net(), 1000); - check_net_metric_after_block!( - net_metrics!(&th.net().id, rx_packets_count.count()), + check_metric_after_block!( + th.net().metrics.rx_packets_count, 1, th.event_manager.run_with_timeout(100).unwrap() ); @@ -1194,8 +1190,8 @@ pub mod tests { ); // Inject frame to tap and run epoll. let frame = inject_tap_tx_frame(&th.net(), 1000); - check_net_metric_after_block!( - net_metrics!(&th.net().id, rx_packets_count.count()), + check_metric_after_block!( + th.net().metrics.rx_packets_count, 1, th.event_manager.run_with_timeout(100).unwrap() ); @@ -1234,8 +1230,8 @@ pub mod tests { // Inject 2 frames to tap and run epoll. let frame_1 = inject_tap_tx_frame(&th.net(), 200); let frame_2 = inject_tap_tx_frame(&th.net(), 300); - check_net_metric_after_block!( - net_metrics!(&th.net().id, rx_packets_count.count()), + check_metric_after_block!( + th.net().metrics.rx_packets_count, 2, th.event_manager.run_with_timeout(100).unwrap() ); @@ -1265,8 +1261,8 @@ pub mod tests { th.add_desc_chain(NetQueue::Tx, 0, &[(0, 4096, 0)]); th.net().queue_evts[TX_INDEX].read().unwrap(); - check_net_metric_after_block!( - net_metrics!(&th.net().id, event_fails.count()), + check_metric_after_block!( + th.net().metrics.event_fails, 1, th.simulate_event(NetEvent::TxQueue) ); @@ -1304,8 +1300,8 @@ pub mod tests { // Send an invalid frame (too small, VNET header missing). th.add_desc_chain(NetQueue::Tx, 0, &[(0, 1, 0)]); - check_net_metric_after_block!( - net_metrics!(&th.net().id, tx_malformed_frames.count()), + check_metric_after_block!( + th.net().metrics.tx_malformed_frames, 1, th.event_manager.run_with_timeout(100) ); @@ -1326,8 +1322,8 @@ pub mod tests { // Send an invalid frame (too small, VNET header missing). th.add_desc_chain(NetQueue::Tx, 0, &[(0, 0, 0)]); - check_net_metric_after_block!( - net_metrics!(&th.net().id, tx_malformed_frames.count()), + check_metric_after_block!( + th.net().metrics.tx_malformed_frames, 1, th.event_manager.run_with_timeout(100) ); @@ -1364,8 +1360,8 @@ pub mod tests { // One frame is valid, one will not be handled because it includes write-only memory // so that leaves us with 2 malformed (no vnet header) frames. - check_net_metric_after_block!( - net_metrics!(&th.net().id, tx_malformed_frames.count()), + check_metric_after_block!( + th.net().metrics.tx_malformed_frames, 2, th.event_manager.run_with_timeout(100) ); @@ -1394,8 +1390,8 @@ pub mod tests { th.add_desc_chain(NetQueue::Tx, 0, &desc_list); let frame = th.write_tx_frame(&desc_list, 1000); - check_net_metric_after_block!( - net_metrics!(&th.net().id, tx_packets_count.count()), + check_metric_after_block!( + th.net().metrics.tx_packets_count, 1, th.event_manager.run_with_timeout(100).unwrap() ); @@ -1420,8 +1416,8 @@ pub mod tests { th.add_desc_chain(NetQueue::Tx, 0, &desc_list); let _ = th.write_tx_frame(&desc_list, 1000); - check_net_metric_after_block!( - net_metrics!(&th.net().id, tap_write_fails.count()), + check_metric_after_block!( + th.net().metrics.tap_write_fails, 1, th.event_manager.run_with_timeout(100).unwrap() ); @@ -1447,8 +1443,8 @@ pub mod tests { th.add_desc_chain(NetQueue::Tx, 500, &desc_list); let frame_2 = th.write_tx_frame(&desc_list, 600); - check_net_metric_after_block!( - net_metrics!(&th.net().id, tx_packets_count.count()), + check_metric_after_block!( + th.net().metrics.tx_packets_count, 2, th.event_manager.run_with_timeout(100).unwrap() ); @@ -1527,7 +1523,7 @@ pub mod tests { &buffer, &mut net.tap, Some(src_mac), - &net.id, + &net.metrics, ) .unwrap()) ); @@ -1555,8 +1551,8 @@ pub mod tests { let mut headers = vec![0; frame_hdr_len()]; // Check that a legit MAC doesn't affect the spoofed MAC metric. - check_net_metric_after_block!( - net_metrics!(&net.id, tx_spoofed_mac_count.count()), + check_metric_after_block!( + net.metrics.tx_spoofed_mac_count, 0, Net::write_to_mmds_or_tap( net.mmds_ns.as_mut(), @@ -1565,13 +1561,13 @@ pub mod tests { &buffer, &mut net.tap, Some(guest_mac), - &net.id, + &net.metrics, ) ); // Check that a spoofed MAC increases our spoofed MAC metric. - check_net_metric_after_block!( - net_metrics!(&net.id, tx_spoofed_mac_count.count()), + check_metric_after_block!( + net.metrics.tx_spoofed_mac_count, 1, Net::write_to_mmds_or_tap( net.mmds_ns.as_mut(), @@ -1580,7 +1576,7 @@ pub mod tests { &buffer, &mut net.tap, Some(not_guest_mac), - &net.id, + &net.metrics, ) ); } @@ -1592,16 +1588,16 @@ pub mod tests { // RX rate limiter events should error since the limiter is not blocked. // Validate that the event failed and failure was properly accounted for. - check_net_metric_after_block!( - net_metrics!(&th.net().id, event_fails.count()), + check_metric_after_block!( + th.net().metrics.event_fails, 1, th.simulate_event(NetEvent::RxRateLimiter) ); // TX rate limiter events should error since the limiter is not blocked. // Validate that the event failed and failure was properly accounted for. - check_net_metric_after_block!( - net_metrics!(&th.net().id, event_fails.count()), + check_metric_after_block!( + th.net().metrics.event_fails, 1, th.simulate_event(NetEvent::TxRateLimiter) ); @@ -1618,8 +1614,8 @@ pub mod tests { // The RX queue is empty and rx_deffered_frame is set. th.net().rx_deferred_frame = true; - check_net_metric_after_block!( - net_metrics!(&th.net().id, no_rx_avail_buffer.count()), + check_metric_after_block!( + th.net().metrics.no_rx_avail_buffer, 1, th.simulate_event(NetEvent::Tap) ); @@ -1631,8 +1627,8 @@ pub mod tests { // Fake an avail buffer; this time, tap reading should error out. th.rxq.avail.idx.set(1); - check_net_metric_after_block!( - net_metrics!(&th.net().id, tap_read_fails.count()), + check_metric_after_block!( + th.net().metrics.tap_read_fails, 1, th.simulate_event(NetEvent::Tap) ); @@ -1644,22 +1640,19 @@ pub mod tests { th.activate_net(); th.net().tap.mocks.set_read_tap(ReadTapMock::TapFrame); - let rx_packets_count = net_metrics!(&th.net().id, rx_packets_count.count()); + let rx_packets_count = th.net().metrics.rx_packets_count.count(); let _ = inject_tap_tx_frame(&th.net(), 1000); // Trigger a Tap event that. This should fail since there // are not any available descriptors in the queue - check_net_metric_after_block!( - net_metrics!(&th.net().id, no_rx_avail_buffer.count()), + check_metric_after_block!( + th.net().metrics.no_rx_avail_buffer, 1, th.simulate_event(NetEvent::Tap) ); // The frame we read from the tap should be deferred now and // no frames should have been transmitted assert!(th.net().rx_deferred_frame); - assert_eq!( - net_metrics!(&th.net().id, rx_packets_count.count()), - rx_packets_count - ); + assert_eq!(th.net().metrics.rx_packets_count.count(), rx_packets_count); // Let's add a second frame, which should really have the same // fate. @@ -1669,8 +1662,8 @@ pub mod tests { // frame. However, this should try to handle the second tap as well and fail // since there's only one Descriptor Chain in the queue. th.add_desc_chain(NetQueue::Rx, 0, &[(0, 4096, VIRTQ_DESC_F_WRITE)]); - check_net_metric_after_block!( - net_metrics!(&th.net().id, no_rx_avail_buffer.count()), + check_metric_after_block!( + th.net().metrics.no_rx_avail_buffer, 1, th.simulate_event(NetEvent::Tap) ); @@ -1678,14 +1671,14 @@ pub mod tests { assert!(th.net().rx_deferred_frame); // However, we should have delivered the first frame assert_eq!( - net_metrics!(&th.net().id, rx_packets_count.count()), + th.net().metrics.rx_packets_count.count(), rx_packets_count + 1 ); // Let's add one more descriptor and try to handle the last frame as well. th.add_desc_chain(NetQueue::Rx, 0, &[(0, 4096, VIRTQ_DESC_F_WRITE)]); - check_net_metric_after_block!( - net_metrics!(&th.net().id, rx_packets_count.count()), + check_metric_after_block!( + th.net().metrics.rx_packets_count, 1, th.simulate_event(NetEvent::RxQueue) ); @@ -1701,8 +1694,8 @@ pub mod tests { th.net().rx_rate_limiter = RateLimiter::new(0, 0, 0, 0, 0, 0).unwrap(); // There is no actual event on the rate limiter's timerfd. - check_net_metric_after_block!( - net_metrics!(&th.net().id, event_fails.count()), + check_metric_after_block!( + th.net().metrics.event_fails, 1, th.simulate_event(NetEvent::RxRateLimiter) ); @@ -1716,8 +1709,8 @@ pub mod tests { th.net().tx_rate_limiter = RateLimiter::new(0, 0, 0, 0, 0, 0).unwrap(); th.simulate_event(NetEvent::TxRateLimiter); // There is no actual event on the rate limiter's timerfd. - check_net_metric_after_block!( - net_metrics!(&th.net().id, event_fails.count()), + check_metric_after_block!( + th.net().metrics.event_fails, 1, th.simulate_event(NetEvent::TxRateLimiter) ); @@ -1747,10 +1740,7 @@ pub mod tests { // assert that limiter is blocked assert!(th.net().tx_rate_limiter.is_blocked()); - assert_eq!( - net_metrics!(&th.net().id, tx_rate_limiter_throttled.count()), - 1 - ); + assert_eq!(th.net().metrics.tx_rate_limiter_throttled.count(), 1); // make sure the data is still queued for processing assert_eq!(th.txq.used.idx.get(), 0); } @@ -1761,10 +1751,7 @@ pub mod tests { // trigger the RX queue event handler th.simulate_event(NetEvent::TxQueue); - assert_eq!( - net_metrics!(&th.net().id, tx_rate_limiter_throttled.count()), - 2 - ); + assert_eq!(th.net().metrics.tx_rate_limiter_throttled.count(), 2); } // wait for 100ms to give the rate-limiter timer a chance to replenish @@ -1774,8 +1761,8 @@ pub mod tests { // following TX procedure should succeed because bandwidth should now be available { // tx_count increments 1 from write_to_mmds_or_tap() - check_net_metric_after_block!( - net_metrics!(&th.net().id, tx_count.count()), + check_metric_after_block!( + th.net().metrics.tx_count, 1, th.simulate_event(NetEvent::TxRateLimiter) ); @@ -1791,8 +1778,8 @@ pub mod tests { // following TX procedure should succeed to handle the second frame as well { // tx_count increments 1 from write_to_mmds_or_tap() - check_net_metric_after_block!( - net_metrics!(&th.net().id, tx_count.count()), + check_metric_after_block!( + th.net().metrics.tx_count, 1, th.simulate_event(NetEvent::TxRateLimiter) ); @@ -1824,10 +1811,7 @@ pub mod tests { // assert that limiter is blocked assert!(th.net().rx_rate_limiter.is_blocked()); - assert_eq!( - net_metrics!(&th.net().id, rx_rate_limiter_throttled.count()), - 1 - ); + assert_eq!(th.net().metrics.rx_rate_limiter_throttled.count(), 1); assert!(th.net().rx_deferred_frame); // assert that no operation actually completed (limiter blocked it) assert!(&th.net().irq_trigger.has_pending_irq(IrqType::Vring)); @@ -1840,10 +1824,7 @@ pub mod tests { // trigger the RX queue event handler th.simulate_event(NetEvent::RxQueue); - assert_eq!( - net_metrics!(&th.net().id, rx_rate_limiter_throttled.count()), - 2 - ); + assert_eq!(th.net().metrics.rx_rate_limiter_throttled.count(), 2); } // wait for 100ms to give the rate-limiter timer a chance to replenish @@ -1854,8 +1835,8 @@ pub mod tests { { let frame = &th.net().tap.mocks.read_tap.mock_frame(); // no longer throttled - check_net_metric_after_block!( - net_metrics!(&th.net().id, rx_rate_limiter_throttled.count()), + check_metric_after_block!( + th.net().metrics.rx_rate_limiter_throttled, 0, th.simulate_event(NetEvent::RxRateLimiter) ); @@ -1892,8 +1873,8 @@ pub mod tests { { // trigger the TX handler th.add_desc_chain(NetQueue::Tx, 0, &[(0, 4096, 0)]); - check_net_metric_after_block!( - net_metrics!(&th.net().id, tx_rate_limiter_throttled.count()), + check_metric_after_block!( + th.net().metrics.tx_rate_limiter_throttled, 1, th.simulate_event(NetEvent::TxQueue) ); @@ -1911,8 +1892,8 @@ pub mod tests { // following TX procedure should succeed because ops should now be available { // no longer throttled - check_net_metric_after_block!( - net_metrics!(&th.net().id, tx_rate_limiter_throttled.count()), + check_metric_after_block!( + th.net().metrics.tx_rate_limiter_throttled, 0, th.simulate_event(NetEvent::TxRateLimiter) ); @@ -1940,15 +1921,15 @@ pub mod tests { // following RX procedure should fail because of ops rate limiting { // trigger the RX handler - check_net_metric_after_block!( - net_metrics!(&th.net().id, rx_rate_limiter_throttled.count()), + check_metric_after_block!( + th.net().metrics.rx_rate_limiter_throttled, 1, th.simulate_event(NetEvent::Tap) ); // assert that limiter is blocked assert!(th.net().rx_rate_limiter.is_blocked()); - assert!(net_metrics!(&th.net().id, rx_rate_limiter_throttled.count()) >= 1); + assert!(th.net().metrics.rx_rate_limiter_throttled.count() >= 1); assert!(th.net().rx_deferred_frame); // assert that no operation actually completed (limiter blocked it) assert!(&th.net().irq_trigger.has_pending_irq(IrqType::Vring)); diff --git a/src/vmm/src/devices/virtio/net/event_handler.rs b/src/vmm/src/devices/virtio/net/event_handler.rs index b58e6a8b682..a3bd0499f18 100644 --- a/src/vmm/src/devices/virtio/net/event_handler.rs +++ b/src/vmm/src/devices/virtio/net/event_handler.rs @@ -9,7 +9,6 @@ use utils::epoll::EventSet; use crate::devices::virtio::net::device::Net; use crate::devices::virtio::{VirtioDevice, RX_INDEX, TX_INDEX}; use crate::logger::{debug, error, warn, IncMetric}; -use crate::net_metrics; impl Net { fn register_runtime_events(&self, ops: &mut EventOps) { @@ -85,7 +84,7 @@ impl MutEventSubscriber for Net { _ if activate_fd == source => self.process_activate_event(ops), _ => { warn!("Net: Spurious event received: {:?}", source); - net_metrics!(&self.id, event_fails.inc()); + self.metrics.event_fails.inc(); } } } else { diff --git a/src/vmm/src/devices/virtio/net/metrics.rs b/src/vmm/src/devices/virtio/net/metrics.rs index 40e9632fc4a..084b37bbbe3 100644 --- a/src/vmm/src/devices/virtio/net/metrics.rs +++ b/src/vmm/src/devices/virtio/net/metrics.rs @@ -76,31 +76,23 @@ //! The system implements 1 types of metrics: //! * Shared Incremental Metrics (SharedIncMetrics) - dedicated for the metrics which need a counter //! (i.e the number of times an API request failed). These metrics are reset upon flush. -//! We use NET_DEV_METRICS_PVT instead of adding an entry of NetDeviceMetrics +//! We use NET_METRICS instead of adding an entry of NetDeviceMetrics //! in Net so that metrics are accessible to be flushed even from signal handlers. use std::collections::BTreeMap; -use std::sync::{RwLock, RwLockReadGuard}; +use std::sync::{Arc, RwLock}; use serde::ser::SerializeMap; use serde::{Serialize, Serializer}; use crate::logger::{IncMetric, SharedIncMetric}; -/// returns lock-unwrapped NET_DEV_METRICS_PVT.metrics, -/// we have this function so that we don't have to make -/// NET_DEV_METRICS_PVT public -pub fn get_net_metrics() -> RwLockReadGuard<'static, NetMetricsPerDevice> { - // lock is always initialized so it is safe the unwrap the lock without a check. - NET_DEV_METRICS_PVT.read().unwrap() -} - /// map of network interface id and metrics /// this should be protected by a lock before accessing. #[derive(Debug)] pub struct NetMetricsPerDevice { /// used to access per net device metrics - pub metrics: BTreeMap, + pub metrics: BTreeMap>, } impl NetMetricsPerDevice { @@ -109,55 +101,35 @@ impl NetMetricsPerDevice { /// exist to avoid overwriting previously allocated data. /// lock is always initialized so it is safe the unwrap /// the lock without a check. - pub fn alloc(iface_id: String) { - if NET_DEV_METRICS_PVT - .read() - .unwrap() - .metrics - .get(&iface_id) - .is_none() - { - NET_DEV_METRICS_PVT + pub fn alloc(iface_id: String) -> Arc { + if NET_METRICS.read().unwrap().metrics.get(&iface_id).is_none() { + NET_METRICS .write() .unwrap() .metrics - .insert(iface_id, NetDeviceMetrics::default()); + .insert(iface_id.clone(), Arc::new(NetDeviceMetrics::default())); } + NET_METRICS + .read() + .unwrap() + .metrics + .get(&iface_id) + .unwrap() + .clone() } } -#[macro_export] -// Per device metrics are behind a ref lock and in a map which -// makes it difficult to return the metrics pointer via a function -// This macro is an attempt to make the code more readable and to -// centralize the error handling. -// The macro can be called in below ways where iface_id is String and -// activate_fails is a metric from struct `NetDeviceMetrics`: -// net_metrics!(&iface_id, activate_fails.inc()); -// net_metrics!(&iface_id, activate_fails.add(5)); -// net_metrics!(&iface_id, activate_fails.count()); -macro_rules! net_metrics { - ($iface_id:expr,$metric:ident.$action:ident($($value:tt)?)) => {{ - if $crate::devices::virtio::net::metrics::get_net_metrics().metrics.get($iface_id).is_some() { - $crate::devices::virtio::net::metrics::get_net_metrics().metrics.get($iface_id).unwrap().$metric.$action($($value)?) - }else{ - $crate::devices::virtio::net::metrics::NetMetricsPerDevice::alloc($iface_id.to_string()); - $crate::devices::virtio::net::metrics::get_net_metrics().metrics.get($iface_id).unwrap().$metric.$action($($value)?) - } - }} -} - /// Pool of Network-related metrics per device behind a lock to /// keep things thread safe. Since the lock is initialized here /// it is safe to unwrap it without any check. -static NET_DEV_METRICS_PVT: RwLock = RwLock::new(NetMetricsPerDevice { +static NET_METRICS: RwLock = RwLock::new(NetMetricsPerDevice { metrics: BTreeMap::new(), }); /// This function facilitates aggregation and serialization of /// per net device metrics. pub fn flush_metrics(serializer: S) -> Result { - let net_metrics = get_net_metrics(); + let net_metrics = NET_METRICS.read().unwrap(); let metrics_len = net_metrics.metrics.len(); // +1 to accomodate aggregate net metrics let mut seq = serializer.serialize_map(Some(1 + metrics_len))?; @@ -167,8 +139,9 @@ pub fn flush_metrics(serializer: S) -> Result { for (name, metrics) in net_metrics.metrics.iter() { let devn = format!("net_{}", name); // serialization will flush the metrics so aggregate before it. - net_aggregated.aggregate(metrics); - seq.serialize_entry(&devn, metrics)?; + let m: &NetDeviceMetrics = metrics; + net_aggregated.aggregate(m); + seq.serialize_entry(&devn, m)?; } seq.serialize_entry("net", &net_aggregated)?; seq.end() @@ -298,21 +271,73 @@ pub mod tests { // we have 5-23 irq for net devices so max 19 net devices. const MAX_NET_DEVICES: usize = 19; - assert!(NET_DEV_METRICS_PVT.read().is_ok()); - assert!(NET_DEV_METRICS_PVT.write().is_ok()); + assert!(NET_METRICS.read().is_ok()); + assert!(NET_METRICS.write().is_ok()); for i in 0..MAX_NET_DEVICES { let devn: String = format!("eth{}", i); NetMetricsPerDevice::alloc(devn.clone()); - net_metrics!(&devn, activate_fails.inc()); - net_metrics!(&devn, rx_bytes_count.add(10)); - net_metrics!(&devn, tx_bytes_count.add(5)); + NET_METRICS + .read() + .unwrap() + .metrics + .get(&devn) + .unwrap() + .activate_fails + .inc(); + NET_METRICS + .read() + .unwrap() + .metrics + .get(&devn) + .unwrap() + .rx_bytes_count + .add(10); + NET_METRICS + .read() + .unwrap() + .metrics + .get(&devn) + .unwrap() + .tx_bytes_count + .add(5); } + for i in 0..MAX_NET_DEVICES { let devn: String = format!("eth{}", i); - assert!(net_metrics!(&devn, activate_fails.count()) >= 1); - assert!(net_metrics!(&devn, rx_bytes_count.count()) >= 10); - assert_eq!(net_metrics!(&devn, tx_bytes_count.count()), 5); + assert!( + NET_METRICS + .read() + .unwrap() + .metrics + .get(&devn) + .unwrap() + .activate_fails + .count() + >= 1 + ); + assert!( + NET_METRICS + .read() + .unwrap() + .metrics + .get(&devn) + .unwrap() + .rx_bytes_count + .count() + >= 10 + ); + assert_eq!( + NET_METRICS + .read() + .unwrap() + .metrics + .get(&devn) + .unwrap() + .tx_bytes_count + .count(), + 5 + ); } } #[test] @@ -321,29 +346,90 @@ pub mod tests { // `test_net_dev_metrics` which also uses the same name. let devn = "eth0"; - assert!(NET_DEV_METRICS_PVT.read().is_ok()); - assert!(NET_DEV_METRICS_PVT.write().is_ok()); + assert!(NET_METRICS.read().is_ok()); + assert!(NET_METRICS.write().is_ok()); NetMetricsPerDevice::alloc(String::from(devn)); - assert!(NET_DEV_METRICS_PVT.read().is_ok()); - assert!(get_net_metrics().metrics.get(devn).is_some()); + assert!(NET_METRICS.read().is_ok()); + assert!(NET_METRICS.read().unwrap().metrics.get(devn).is_some()); - net_metrics!(devn, activate_fails.inc()); + NET_METRICS + .read() + .unwrap() + .metrics + .get(devn) + .unwrap() + .activate_fails + .inc(); assert!( - net_metrics!(devn, activate_fails.count()) > 0, + NET_METRICS + .read() + .unwrap() + .metrics + .get(devn) + .unwrap() + .activate_fails + .count() + > 0, "{}", - net_metrics!(devn, activate_fails.count()) + NET_METRICS + .read() + .unwrap() + .metrics + .get(devn) + .unwrap() + .activate_fails + .count() ); // we expect only 2 tests (this and test_max_net_dev_metrics) // to update activate_fails count for eth0. assert!( - net_metrics!(devn, activate_fails.count()) <= 2, + NET_METRICS + .read() + .unwrap() + .metrics + .get(devn) + .unwrap() + .activate_fails + .count() + <= 2, "{}", - net_metrics!(devn, activate_fails.count()) + NET_METRICS + .read() + .unwrap() + .metrics + .get(devn) + .unwrap() + .activate_fails + .count() ); - net_metrics!(&String::from(devn), activate_fails.inc()); - net_metrics!(&String::from(devn), rx_bytes_count.add(5)); - assert!(net_metrics!(&String::from(devn), rx_bytes_count.count()) >= 5); + NET_METRICS + .read() + .unwrap() + .metrics + .get(devn) + .unwrap() + .activate_fails + .inc(); + NET_METRICS + .read() + .unwrap() + .metrics + .get(devn) + .unwrap() + .rx_bytes_count + .add(5); + assert!( + NET_METRICS + .read() + .unwrap() + .metrics + .get(devn) + .unwrap() + .rx_bytes_count + .count() + >= 5 + ); } } diff --git a/src/vmm/src/devices/virtio/net/test_utils.rs b/src/vmm/src/devices/virtio/net/test_utils.rs index 3726362226c..376920f56b6 100644 --- a/src/vmm/src/devices/virtio/net/test_utils.rs +++ b/src/vmm/src/devices/virtio/net/test_utils.rs @@ -352,6 +352,7 @@ pub mod test { use event_manager::{EventManager, SubscriberId, SubscriberOps}; + use crate::check_metric_after_block; use crate::devices::virtio::net::device::vnet_hdr_len; use crate::devices::virtio::net::gen::ETH_HLEN; use crate::devices::virtio::net::test_utils::{ @@ -366,7 +367,6 @@ pub mod test { use crate::vstate::memory::{ Address, Bytes, GuestAddress, GuestMemoryExtension, GuestMemoryMmap, }; - use crate::{check_net_metric_after_block, NET_METRICS}; pub struct TestHelper<'a> { pub event_manager: EventManager>>, @@ -496,8 +496,8 @@ pub mod test { // Inject frame to tap and run epoll. let frame = inject_tap_tx_frame(&self.net(), frame_len); - check_net_metric_after_block!( - net_metrics!(&self.net().id, rx_packets_count.count()), + check_metric_after_block!( + self.net().metrics.rx_packets_count, 0, self.event_manager.run_with_timeout(100).unwrap() ); @@ -524,8 +524,8 @@ pub mod test { VIRTQ_DESC_F_WRITE, )], ); - check_net_metric_after_block!( - net_metrics!(&self.net().id, rx_packets_count.count()), + check_metric_after_block!( + self.net().metrics.rx_packets_count, 1, self.event_manager.run_with_timeout(100).unwrap() ); diff --git a/src/vmm/src/devices/virtio/test_utils.rs b/src/vmm/src/devices/virtio/test_utils.rs index a5199f5683a..564b59f7779 100644 --- a/src/vmm/src/devices/virtio/test_utils.rs +++ b/src/vmm/src/devices/virtio/test_utils.rs @@ -22,20 +22,6 @@ macro_rules! check_metric_after_block { }}; } -#[macro_export] -// macro created from check_metric_after_block because `metric` will be -// emitted by NET_METRICS macro which does not fit with the $metric.count() -// that check_metric_after_block had. -// TODO: After all devices have per device metrics we won't need the -// check_metric_after_block macro. -macro_rules! check_net_metric_after_block { - ($metric:expr, $delta:expr, $block:expr) => {{ - let before = $metric; - let _ = $block; - assert_eq!($metric, before + $delta, "unexpected metric value"); - }}; -} - /// Creates a [`GuestMemoryMmap`] with a single region of the given size starting at guest physical /// address 0 pub fn single_region_mem(region_size: usize) -> GuestMemoryMmap {