From 402639bd23709916521655acd9b94f6dc62e11a5 Mon Sep 17 00:00:00 2001 From: Hang Yan Date: Fri, 8 Nov 2024 12:35:16 +0800 Subject: [PATCH 1/3] Add packetcapture feature (#5443) Introduced a new CRD called PacketCapture, which allow users to configure capture options on target traffic. If a file server is specified, the result pcap file will be uploaded. The pcap file can be inspected by tools like wireshark or tcpdump. Signed-off-by: Hang Yan Co-authored-by: Quan Tian Co-authored-by: Lan Co-authored-by: Antonin Bas --- build/charts/antrea/conf/antrea-agent.conf | 5 +- build/charts/antrea/crds/packetcapture.yaml | 2 +- .../antrea/templates/agent/clusterrole.yaml | 15 + build/yamls/antrea-aks.yml | 26 +- build/yamls/antrea-crds.yml | 2 +- build/yamls/antrea-eks.yml | 26 +- build/yamls/antrea-gke.yml | 26 +- build/yamls/antrea-ipsec.yml | 26 +- build/yamls/antrea.yml | 26 +- cmd/antrea-agent/agent.go | 19 + docs/feature-gates.md | 10 + docs/packetcapture-guide.md | 87 ++ go.mod | 1 + go.sum | 2 + hack/.notableofcontents | 1 + pkg/agent/packetcapture/capture/bpf.go | 220 +++++ pkg/agent/packetcapture/capture/bpf_test.go | 184 +++++ pkg/agent/packetcapture/capture/pcap_linux.go | 66 ++ .../packetcapture/capture/pcap_windows.go | 36 + pkg/agent/packetcapture/capture_interface.go | 28 + .../packetcapture/packetcapture_controller.go | 780 ++++++++++++++++++ .../packetcapture_controller_test.go | 556 +++++++++++++ .../support_bundle_controller_test.go | 2 +- .../handlers/featuregates/handler_test.go | 1 + pkg/features/antrea_features.go | 7 + test/e2e/packetcapture_test.go | 728 ++++++++++++++++ 26 files changed, 2858 insertions(+), 24 deletions(-) create mode 100644 docs/packetcapture-guide.md create mode 100644 pkg/agent/packetcapture/capture/bpf.go create mode 100644 pkg/agent/packetcapture/capture/bpf_test.go create mode 100644 pkg/agent/packetcapture/capture/pcap_linux.go create mode 100644 pkg/agent/packetcapture/capture/pcap_windows.go create mode 100644 pkg/agent/packetcapture/capture_interface.go create mode 100644 pkg/agent/packetcapture/packetcapture_controller.go create mode 100644 pkg/agent/packetcapture/packetcapture_controller_test.go create mode 100644 test/e2e/packetcapture_test.go diff --git a/build/charts/antrea/conf/antrea-agent.conf b/build/charts/antrea/conf/antrea-agent.conf index 3d6dee19dbe..6bbe0824a23 100644 --- a/build/charts/antrea/conf/antrea-agent.conf +++ b/build/charts/antrea/conf/antrea-agent.conf @@ -24,9 +24,12 @@ featureGates: # be enabled, otherwise this flag will not take effect. {{- include "featureGate" (dict "featureGates" .Values.featureGates "name" "CleanupStaleUDPSvcConntrack" "default" true) }} -# Enable traceflow which provides packet tracing feature to diagnose network issue. +# Enable Traceflow which provides packet tracing feature to diagnose network issue. {{- include "featureGate" (dict "featureGates" .Values.featureGates "name" "Traceflow" "default" true) }} +# Enable PacketCapture feature which supports capturing packets to diagnose network issues. +{{- include "featureGate" (dict "featureGates" .Values.featureGates "name" "PacketCapture" "default" false) }} + # Enable NodePortLocal feature to make the Pods reachable externally through NodePort {{- include "featureGate" (dict "featureGates" .Values.featureGates "name" "NodePortLocal" "default" true) }} diff --git a/build/charts/antrea/crds/packetcapture.yaml b/build/charts/antrea/crds/packetcapture.yaml index b9a1d9ed9df..fec130517ed 100644 --- a/build/charts/antrea/crds/packetcapture.yaml +++ b/build/charts/antrea/crds/packetcapture.yaml @@ -160,7 +160,7 @@ spec: properties: url: type: string - pattern: 's{0,1}ftps{0,1}:\/\/[\w-_./]+:\d+' + pattern: 'sftps{0,1}:\/\/[\w-_./]+:\d+' status: type: object properties: diff --git a/build/charts/antrea/templates/agent/clusterrole.yaml b/build/charts/antrea/templates/agent/clusterrole.yaml index a2a74e45beb..876016719ad 100644 --- a/build/charts/antrea/templates/agent/clusterrole.yaml +++ b/build/charts/antrea/templates/agent/clusterrole.yaml @@ -160,6 +160,20 @@ rules: - patch - create - delete + - apiGroups: + - crd.antrea.io + resources: + - packetcaptures + verbs: + - get + - watch + - list + - apiGroups: + - crd.antrea.io + resources: + - packetcaptures/status + verbs: + - update - apiGroups: - crd.antrea.io resources: @@ -241,6 +255,7 @@ rules: - secrets resourceNames: - antrea-bgp-passwords + - antrea-packetcapture-fileserver-auth verbs: - get - list diff --git a/build/yamls/antrea-aks.yml b/build/yamls/antrea-aks.yml index 2ea0d6d1641..082fa50066d 100644 --- a/build/yamls/antrea-aks.yml +++ b/build/yamls/antrea-aks.yml @@ -3060,7 +3060,7 @@ spec: properties: url: type: string - pattern: 's{0,1}ftps{0,1}:\/\/[\w-_./]+:\d+' + pattern: 'sftps{0,1}:\/\/[\w-_./]+:\d+' status: type: object properties: @@ -3967,9 +3967,12 @@ data: # be enabled, otherwise this flag will not take effect. # CleanupStaleUDPSvcConntrack: true - # Enable traceflow which provides packet tracing feature to diagnose network issue. + # Enable Traceflow which provides packet tracing feature to diagnose network issue. # Traceflow: true + # Enable PacketCapture feature which supports capturing packets to diagnose network issues. + # PacketCapture: false + # Enable NodePortLocal feature to make the Pods reachable externally through NodePort # NodePortLocal: true @@ -4672,6 +4675,20 @@ rules: - patch - create - delete + - apiGroups: + - crd.antrea.io + resources: + - packetcaptures + verbs: + - get + - watch + - list + - apiGroups: + - crd.antrea.io + resources: + - packetcaptures/status + verbs: + - update - apiGroups: - crd.antrea.io resources: @@ -4753,6 +4770,7 @@ rules: - secrets resourceNames: - antrea-bgp-passwords + - antrea-packetcapture-fileserver-auth verbs: - get - list @@ -5365,7 +5383,7 @@ spec: kubectl.kubernetes.io/default-container: antrea-agent # Automatically restart Pods with a RollingUpdate if the ConfigMap changes # See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments - checksum/config: 4325a243ab510df539883b6384a30cf8b04ff862796444a6c5c10999159479c5 + checksum/config: e2d1d8af083c88667ac4c22c87dea63e595b2f4f770190c32afb00c480440fe3 labels: app: antrea component: antrea-agent @@ -5603,7 +5621,7 @@ spec: annotations: # Automatically restart Pod if the ConfigMap changes # See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments - checksum/config: 4325a243ab510df539883b6384a30cf8b04ff862796444a6c5c10999159479c5 + checksum/config: e2d1d8af083c88667ac4c22c87dea63e595b2f4f770190c32afb00c480440fe3 labels: app: antrea component: antrea-controller diff --git a/build/yamls/antrea-crds.yml b/build/yamls/antrea-crds.yml index 57a4ca00448..6dd3719121a 100644 --- a/build/yamls/antrea-crds.yml +++ b/build/yamls/antrea-crds.yml @@ -3033,7 +3033,7 @@ spec: properties: url: type: string - pattern: 's{0,1}ftps{0,1}:\/\/[\w-_./]+:\d+' + pattern: 'sftps{0,1}:\/\/[\w-_./]+:\d+' status: type: object properties: diff --git a/build/yamls/antrea-eks.yml b/build/yamls/antrea-eks.yml index 69179538eb6..2403b02cba3 100644 --- a/build/yamls/antrea-eks.yml +++ b/build/yamls/antrea-eks.yml @@ -3060,7 +3060,7 @@ spec: properties: url: type: string - pattern: 's{0,1}ftps{0,1}:\/\/[\w-_./]+:\d+' + pattern: 'sftps{0,1}:\/\/[\w-_./]+:\d+' status: type: object properties: @@ -3967,9 +3967,12 @@ data: # be enabled, otherwise this flag will not take effect. # CleanupStaleUDPSvcConntrack: true - # Enable traceflow which provides packet tracing feature to diagnose network issue. + # Enable Traceflow which provides packet tracing feature to diagnose network issue. # Traceflow: true + # Enable PacketCapture feature which supports capturing packets to diagnose network issues. + # PacketCapture: false + # Enable NodePortLocal feature to make the Pods reachable externally through NodePort # NodePortLocal: true @@ -4672,6 +4675,20 @@ rules: - patch - create - delete + - apiGroups: + - crd.antrea.io + resources: + - packetcaptures + verbs: + - get + - watch + - list + - apiGroups: + - crd.antrea.io + resources: + - packetcaptures/status + verbs: + - update - apiGroups: - crd.antrea.io resources: @@ -4753,6 +4770,7 @@ rules: - secrets resourceNames: - antrea-bgp-passwords + - antrea-packetcapture-fileserver-auth verbs: - get - list @@ -5365,7 +5383,7 @@ spec: kubectl.kubernetes.io/default-container: antrea-agent # Automatically restart Pods with a RollingUpdate if the ConfigMap changes # See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments - checksum/config: 4325a243ab510df539883b6384a30cf8b04ff862796444a6c5c10999159479c5 + checksum/config: e2d1d8af083c88667ac4c22c87dea63e595b2f4f770190c32afb00c480440fe3 labels: app: antrea component: antrea-agent @@ -5604,7 +5622,7 @@ spec: annotations: # Automatically restart Pod if the ConfigMap changes # See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments - checksum/config: 4325a243ab510df539883b6384a30cf8b04ff862796444a6c5c10999159479c5 + checksum/config: e2d1d8af083c88667ac4c22c87dea63e595b2f4f770190c32afb00c480440fe3 labels: app: antrea component: antrea-controller diff --git a/build/yamls/antrea-gke.yml b/build/yamls/antrea-gke.yml index 78a46fdd32f..b7e77155fed 100644 --- a/build/yamls/antrea-gke.yml +++ b/build/yamls/antrea-gke.yml @@ -3060,7 +3060,7 @@ spec: properties: url: type: string - pattern: 's{0,1}ftps{0,1}:\/\/[\w-_./]+:\d+' + pattern: 'sftps{0,1}:\/\/[\w-_./]+:\d+' status: type: object properties: @@ -3967,9 +3967,12 @@ data: # be enabled, otherwise this flag will not take effect. # CleanupStaleUDPSvcConntrack: true - # Enable traceflow which provides packet tracing feature to diagnose network issue. + # Enable Traceflow which provides packet tracing feature to diagnose network issue. # Traceflow: true + # Enable PacketCapture feature which supports capturing packets to diagnose network issues. + # PacketCapture: false + # Enable NodePortLocal feature to make the Pods reachable externally through NodePort # NodePortLocal: true @@ -4672,6 +4675,20 @@ rules: - patch - create - delete + - apiGroups: + - crd.antrea.io + resources: + - packetcaptures + verbs: + - get + - watch + - list + - apiGroups: + - crd.antrea.io + resources: + - packetcaptures/status + verbs: + - update - apiGroups: - crd.antrea.io resources: @@ -4753,6 +4770,7 @@ rules: - secrets resourceNames: - antrea-bgp-passwords + - antrea-packetcapture-fileserver-auth verbs: - get - list @@ -5365,7 +5383,7 @@ spec: kubectl.kubernetes.io/default-container: antrea-agent # Automatically restart Pods with a RollingUpdate if the ConfigMap changes # See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments - checksum/config: f5cf00de39a27790a7e158a3eca79123de415b3b09d389ac984b74027bbfaade + checksum/config: 7e42a403d388e2ed556d9b41f4af83917eadd0863d4e2bef67353f5adb2ef6c3 labels: app: antrea component: antrea-agent @@ -5601,7 +5619,7 @@ spec: annotations: # Automatically restart Pod if the ConfigMap changes # See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments - checksum/config: f5cf00de39a27790a7e158a3eca79123de415b3b09d389ac984b74027bbfaade + checksum/config: 7e42a403d388e2ed556d9b41f4af83917eadd0863d4e2bef67353f5adb2ef6c3 labels: app: antrea component: antrea-controller diff --git a/build/yamls/antrea-ipsec.yml b/build/yamls/antrea-ipsec.yml index 853279551a4..34ea345f0d4 100644 --- a/build/yamls/antrea-ipsec.yml +++ b/build/yamls/antrea-ipsec.yml @@ -3060,7 +3060,7 @@ spec: properties: url: type: string - pattern: 's{0,1}ftps{0,1}:\/\/[\w-_./]+:\d+' + pattern: 'sftps{0,1}:\/\/[\w-_./]+:\d+' status: type: object properties: @@ -3980,9 +3980,12 @@ data: # be enabled, otherwise this flag will not take effect. # CleanupStaleUDPSvcConntrack: true - # Enable traceflow which provides packet tracing feature to diagnose network issue. + # Enable Traceflow which provides packet tracing feature to diagnose network issue. # Traceflow: true + # Enable PacketCapture feature which supports capturing packets to diagnose network issues. + # PacketCapture: false + # Enable NodePortLocal feature to make the Pods reachable externally through NodePort # NodePortLocal: true @@ -4685,6 +4688,20 @@ rules: - patch - create - delete + - apiGroups: + - crd.antrea.io + resources: + - packetcaptures + verbs: + - get + - watch + - list + - apiGroups: + - crd.antrea.io + resources: + - packetcaptures/status + verbs: + - update - apiGroups: - crd.antrea.io resources: @@ -4766,6 +4783,7 @@ rules: - secrets resourceNames: - antrea-bgp-passwords + - antrea-packetcapture-fileserver-auth verbs: - get - list @@ -5378,7 +5396,7 @@ spec: kubectl.kubernetes.io/default-container: antrea-agent # Automatically restart Pods with a RollingUpdate if the ConfigMap changes # See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments - checksum/config: 9e94f199d125877d889ba73e053c95b342e89323d0423cde074ae074df379494 + checksum/config: 7d8b0a065c3db85e34e127fdf38b820b32712657900e3f8fe2703d4310c40632 checksum/ipsec-secret: d0eb9c52d0cd4311b6d252a951126bf9bea27ec05590bed8a394f0f792dcb2a4 labels: app: antrea @@ -5660,7 +5678,7 @@ spec: annotations: # Automatically restart Pod if the ConfigMap changes # See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments - checksum/config: 9e94f199d125877d889ba73e053c95b342e89323d0423cde074ae074df379494 + checksum/config: 7d8b0a065c3db85e34e127fdf38b820b32712657900e3f8fe2703d4310c40632 labels: app: antrea component: antrea-controller diff --git a/build/yamls/antrea.yml b/build/yamls/antrea.yml index 284d225fb95..30f54afb0f1 100644 --- a/build/yamls/antrea.yml +++ b/build/yamls/antrea.yml @@ -3060,7 +3060,7 @@ spec: properties: url: type: string - pattern: 's{0,1}ftps{0,1}:\/\/[\w-_./]+:\d+' + pattern: 'sftps{0,1}:\/\/[\w-_./]+:\d+' status: type: object properties: @@ -3967,9 +3967,12 @@ data: # be enabled, otherwise this flag will not take effect. # CleanupStaleUDPSvcConntrack: true - # Enable traceflow which provides packet tracing feature to diagnose network issue. + # Enable Traceflow which provides packet tracing feature to diagnose network issue. # Traceflow: true + # Enable PacketCapture feature which supports capturing packets to diagnose network issues. + # PacketCapture: false + # Enable NodePortLocal feature to make the Pods reachable externally through NodePort # NodePortLocal: true @@ -4672,6 +4675,20 @@ rules: - patch - create - delete + - apiGroups: + - crd.antrea.io + resources: + - packetcaptures + verbs: + - get + - watch + - list + - apiGroups: + - crd.antrea.io + resources: + - packetcaptures/status + verbs: + - update - apiGroups: - crd.antrea.io resources: @@ -4753,6 +4770,7 @@ rules: - secrets resourceNames: - antrea-bgp-passwords + - antrea-packetcapture-fileserver-auth verbs: - get - list @@ -5365,7 +5383,7 @@ spec: kubectl.kubernetes.io/default-container: antrea-agent # Automatically restart Pods with a RollingUpdate if the ConfigMap changes # See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments - checksum/config: 8256bc0d365d60f16d0bdef14cf674be49d525ee1cd921e531f8bf7e521e1421 + checksum/config: 2b4d82bcb825d50926115bad2125097f85aed424bfc49147444314cad8b7826a labels: app: antrea component: antrea-agent @@ -5601,7 +5619,7 @@ spec: annotations: # Automatically restart Pod if the ConfigMap changes # See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments - checksum/config: 8256bc0d365d60f16d0bdef14cf674be49d525ee1cd921e531f8bf7e521e1421 + checksum/config: 2b4d82bcb825d50926115bad2125097f85aed424bfc49147444314cad8b7826a labels: app: antrea component: antrea-controller diff --git a/cmd/antrea-agent/agent.go b/cmd/antrea-agent/agent.go index 60bd61503aa..867445e8cf9 100644 --- a/cmd/antrea-agent/agent.go +++ b/cmd/antrea-agent/agent.go @@ -60,6 +60,7 @@ import ( "antrea.io/antrea/pkg/agent/nodeip" npl "antrea.io/antrea/pkg/agent/nodeportlocal" "antrea.io/antrea/pkg/agent/openflow" + "antrea.io/antrea/pkg/agent/packetcapture" "antrea.io/antrea/pkg/agent/proxy" proxytypes "antrea.io/antrea/pkg/agent/proxy/types" "antrea.io/antrea/pkg/agent/querier" @@ -117,6 +118,7 @@ func run(o *Options) error { informerFactory := informers.NewSharedInformerFactoryWithOptions(k8sClient, informerDefaultResync, informers.WithTransform(k8s.NewTrimmer(k8s.TrimNode))) crdInformerFactory := crdinformers.NewSharedInformerFactoryWithOptions(crdClient, informerDefaultResync, crdinformers.WithTransform(k8s.NewTrimmer())) traceflowInformer := crdInformerFactory.Crd().V1beta1().Traceflows() + packetCaptureInformer := crdInformerFactory.Crd().V1alpha1().PacketCaptures() egressInformer := crdInformerFactory.Crd().V1beta1().Egresses() externalIPPoolInformer := crdInformerFactory.Crd().V1beta1().ExternalIPPools() trafficControlInformer := crdInformerFactory.Crd().V1alpha2().TrafficControls() @@ -650,6 +652,19 @@ func run(o *Options) error { o.enableAntreaProxy) } + var packetCaptureController *packetcapture.Controller + if features.DefaultFeatureGate.Enabled(features.PacketCapture) { + packetCaptureController, err = packetcapture.NewPacketCaptureController( + k8sClient, + crdClient, + packetCaptureInformer, + ifaceStore, + ) + if err != nil { + return fmt.Errorf("error when creating PacketCapture controller: %v", err) + } + } + if err := antreaClientProvider.RunOnce(); err != nil { return err } @@ -808,6 +823,10 @@ func run(o *Options) error { go traceflowController.Run(stopCh) } + if features.DefaultFeatureGate.Enabled(features.PacketCapture) { + go packetCaptureController.Run(stopCh) + } + if o.enableAntreaProxy { go proxier.GetProxyProvider().Run(stopCh) diff --git a/docs/feature-gates.md b/docs/feature-gates.md index 41da9eaac1a..5e3a6b9eb42 100644 --- a/docs/feature-gates.md +++ b/docs/feature-gates.md @@ -62,6 +62,7 @@ edit the Agent configuration in the | `L7FlowExporter` | Agent | `false` | Alpha | v1.15 | N/A | N/A | Yes | | | `BGPPolicy` | Agent | `false` | Alpha | v2.1 | N/A | N/A | No | | | `NodeLatencyMonitor` | Agent | `false` | Alpha | v2.1 | N/A | N/A | No | | +| `PacketCapture` | Agent | `false` | Alpha | v2.2 | N/A | N/A | No | | ## Description and Requirements of Features @@ -531,3 +532,12 @@ experienced by Pod traffic. #### Requirements for this Feature - Linux Nodes only - the feature has not been tested on Windows Nodes yet. + +### PacketCapture + +`PacketCapture` allows user to capture live traffic packets from specified flows for further analysis. +Refer to this [document](packetcapture-guide.md) for more information. + +#### Requirements for this Feature + +This feature is only supported on Linux for now. diff --git a/docs/packetcapture-guide.md b/docs/packetcapture-guide.md new file mode 100644 index 00000000000..044f791ff1e --- /dev/null +++ b/docs/packetcapture-guide.md @@ -0,0 +1,87 @@ +# Packet Capture User Guide + +Starting with Antrea v2.2, Antrea supports PacketCapture for network diagnosis. +It can capture specified number of packets from real traffic and upload them to a +supported storage location. Users can create a `PacketCapture` CR to trigger +packet capture on the target traffic flow. + +## Prerequisites + +PacketCapture is disabled by default. If you +want to enable this feature, you need to set feature gate `PacketCapture` to `true` in +the `antrea-config` ConfigMap for `antrea-agent`. + +```yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: antrea-config + namespace: kube-system +data: + antrea-agent.conf: | + featureGates: + PacketCapture: true +``` + +## Start a new PacketCapture + +When starting a new packet capture, you can provide the following information to identify +the target traffic flow: + +* Source Pod, or IP address +* Destination Pod, or IP address +* Transport protocol (TCP/UDP/ICMP) +* Transport ports + +You can start a new packet capture by creating a `PacketCapture` CR. An optional `fileServer` +field can be specified to store the generated packets file. Before that, +a Secret named `antrea-packetcapture-fileserver-auth` located in the same Namespace where +Antrea is deployed must exist and carry the authentication information for the target file server. +You can also create the Secret using the following `kubectl` command: + +```bash +kubectl create secret generic antrea-packetcapture-fileserver-auth -n kube-system --from-literal=username='' --from-literal=password='' +``` + +If no `fileServer` field is present in the CR, the captured packets file will be saved in the +antrea-agent Pod (the one on the same Node with the source or destination Pod in the CR). The result +path information will be available in `.status.FilePath`. + +And here is an example of `PacketCapture` CR: + +```yaml +apiVersion: crd.antrea.io/v1alpha1 +kind: PacketCapture +metadata: + name: pc-test +spec: + fileServer: + url: sftp://127.0.0.1:22/upload # Define your own sftp url here. + timeout: 60 + captureConfig: + firstN: + number: 5 + source: + pod: + namespace: default + name: frontend + destination: + # Available options for source/destination could be `pod` (a Pod), `ip` (a specific IP address). These 2 options are mutually exclusive. + pod: + namespace: default + name: backend + packet: + ipFamily: IPv4 + protocol: TCP # support arbitrary number values and string values in [TCP,UDP,ICMP] (case insensitive) + transportHeader: + tcp: + dstPort: 8080 # Destination port needs to be set when the protocol is TCP/UDP. +``` + +The CR above starts a new packet capture of TCP flows from a Pod named `frontend` +to the port 8080 of a Pod named `backend` using TCP protocol. It will capture the first 5 packets +that meet this criterion and upload them to the specified sftp server. Users can download the +packet file from the sftp server (or from the local antrea-agent Pod) and analyze its content +with network diagnose tools like Wireshark or tcpdump. + +Note: This feature is not supported on Windows for now. diff --git a/go.mod b/go.mod index 639898c0693..78f0a6c058d 100644 --- a/go.mod +++ b/go.mod @@ -28,6 +28,7 @@ require ( github.com/gogo/protobuf v1.3.2 github.com/google/btree v1.1.3 github.com/google/uuid v1.6.0 + github.com/gopacket/gopacket v1.2.0 github.com/hashicorp/memberlist v0.5.1 github.com/k8snetworkplumbingwg/network-attachment-definition-client v1.3.0 github.com/k8snetworkplumbingwg/sriov-cni v2.1.0+incompatible diff --git a/go.sum b/go.sum index ebe91ef77e4..00462204281 100644 --- a/go.sum +++ b/go.sum @@ -381,6 +381,8 @@ github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+ github.com/googleapis/gnostic v0.0.0-20170729233727-0c5108395e2d/go.mod h1:sJBsCZ4ayReDTBIg8b9dl28c5xFWyhBTVRp3pOg5EKY= github.com/googleapis/gnostic v0.1.0/go.mod h1:sJBsCZ4ayReDTBIg8b9dl28c5xFWyhBTVRp3pOg5EKY= github.com/googleapis/gnostic v0.3.1/go.mod h1:on+2t9HRStVgn95RSsFWFz+6Q0Snyqv1awfrALZdbtU= +github.com/gopacket/gopacket v1.2.0 h1:eXbzFad7f73P1n2EJHQlsKuvIMJjVXK5tXoSca78I3A= +github.com/gopacket/gopacket v1.2.0/go.mod h1:BrAKEy5EOGQ76LSqh7DMAr7z0NNPdczWm2GxCG7+I8M= github.com/gophercloud/gophercloud v0.1.0/go.mod h1:vxM41WHh5uqHVBMZHzuwNOHh8XEoIEcSTewFxm1c5g8= github.com/gorilla/websocket v0.0.0-20170926233335-4201258b820c/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ= github.com/gorilla/websocket v1.4.0/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ= diff --git a/hack/.notableofcontents b/hack/.notableofcontents index 476abf7da99..5b29b5dea1b 100644 --- a/hack/.notableofcontents +++ b/hack/.notableofcontents @@ -38,6 +38,7 @@ docs/noencap-hybrid-modes.md docs/octant-plugin-installation.md docs/os-issues.md docs/ovs-offload.md +docs/packetcapture-guide.md docs/prometheus-integration.md docs/secondary-network.md docs/security.md diff --git a/pkg/agent/packetcapture/capture/bpf.go b/pkg/agent/packetcapture/capture/bpf.go new file mode 100644 index 00000000000..65da6b1646b --- /dev/null +++ b/pkg/agent/packetcapture/capture/bpf.go @@ -0,0 +1,220 @@ +// Copyright 2024 Antrea Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package capture + +import ( + "encoding/binary" + "net" + + "golang.org/x/net/bpf" + "k8s.io/apimachinery/pkg/util/intstr" + + crdv1alpha1 "antrea.io/antrea/pkg/apis/crd/v1alpha1" +) + +const ( + lengthByte int = 1 + lengthHalf int = 2 + lengthWord int = 4 + bitsPerWord int = 32 + etherTypeIPv4 uint32 = 0x0800 + + jumpMask uint32 = 0x1fff + ip4SourcePort uint32 = 14 + ip4DestinationPort uint32 = 16 + ip4HeaderSize uint32 = 14 + ip4HeaderFlags uint32 = 20 +) + +var ( + returnDrop = bpf.RetConstant{Val: 0} + returnKeep = bpf.RetConstant{Val: 0x40000} + loadIPv4SourcePort = bpf.LoadIndirect{Off: ip4SourcePort, Size: lengthHalf} + loadIPv4DestinationPort = bpf.LoadIndirect{Off: ip4DestinationPort, Size: lengthHalf} + loadEtherKind = bpf.LoadAbsolute{Off: 12, Size: lengthHalf} + loadIPv4SourceAddress = bpf.LoadAbsolute{Off: 26, Size: lengthWord} + loadIPv4DestinationAddress = bpf.LoadAbsolute{Off: 30, Size: lengthWord} + loadIPv4Protocol = bpf.LoadAbsolute{Off: 23, Size: lengthByte} +) + +var ProtocolMap = map[string]uint32{ + "UDP": 17, + "TCP": 6, + "ICMP": 1, +} + +func loadIPv4HeaderOffset(skipTrue uint8) []bpf.Instruction { + return []bpf.Instruction{ + bpf.LoadAbsolute{Off: ip4HeaderFlags, Size: lengthHalf}, // flags+fragment offset, since we need to calc where the src/dst port is + bpf.JumpIf{Cond: bpf.JumpBitsSet, Val: jumpMask, SkipTrue: skipTrue}, // check if there is a L4 header + bpf.LoadMemShift{Off: ip4HeaderSize}, // calculate the size of IP header + } +} + +func compareProtocolIP4(skipTrue, skipFalse uint8) bpf.Instruction { + return bpf.JumpIf{Cond: bpf.JumpEqual, Val: etherTypeIPv4, SkipTrue: skipTrue, SkipFalse: skipFalse} +} + +func compareProtocol(protocol uint32, skipTrue, skipFalse uint8) bpf.Instruction { + return bpf.JumpIf{Cond: bpf.JumpEqual, Val: protocol, SkipTrue: skipTrue, SkipFalse: skipFalse} +} + +// compilePacketFilter compiles the CRD spec to bpf instructions. For now, we only focus on +// ipv4 traffic. Compared to the raw BPF filter supported by libpcap, we only need to support +// limited use cases, so an expression parser is not needed. +func compilePacketFilter(packetSpec *crdv1alpha1.Packet, srcIP, dstIP net.IP) []bpf.Instruction { + size := uint8(calculateInstructionsSize(packetSpec)) + + // ipv4 check + inst := []bpf.Instruction{loadEtherKind} + // skip means how many instructions we need to skip if the compare fails. + // for example, for now we have 2 instructions, and the total size is 17, if ipv4 + // check failed, we need to jump to the end (ret #0), skip 17-3=14 instructions. + // if check succeed, skipTrue means we jump to the next instruction. Here 3 means we + // have 3 instructions so far. + inst = append(inst, compareProtocolIP4(0, size-3)) + + if packetSpec != nil { + if packetSpec.Protocol != nil { + var proto uint32 + if packetSpec.Protocol.Type == intstr.Int { + proto = uint32(packetSpec.Protocol.IntVal) + } else { + proto = ProtocolMap[packetSpec.Protocol.StrVal] + } + + inst = append(inst, loadIPv4Protocol) + inst = append(inst, compareProtocol(proto, 0, size-5)) + } + } + + // source ip + if srcIP != nil { + inst = append(inst, loadIPv4SourceAddress) + addrVal := binary.BigEndian.Uint32(srcIP[len(srcIP)-4:]) + // from here we need to check the inst length to calculate skipFalse. If no protocol is set, there will be no related bpf instructions. + inst = append(inst, bpf.JumpIf{Cond: bpf.JumpEqual, Val: addrVal, SkipTrue: 0, SkipFalse: size - uint8(len(inst)) - 2}) + + } + // dst ip + if dstIP != nil { + inst = append(inst, loadIPv4DestinationAddress) + addrVal := binary.BigEndian.Uint32(dstIP[len(dstIP)-4:]) + inst = append(inst, bpf.JumpIf{Cond: bpf.JumpEqual, Val: addrVal, SkipTrue: 0, SkipFalse: size - uint8(len(inst)) - 2}) + } + + // ports + var srcPort, dstPort uint16 + if packetSpec.TransportHeader.TCP != nil { + if packetSpec.TransportHeader.TCP.SrcPort != nil { + srcPort = uint16(*packetSpec.TransportHeader.TCP.SrcPort) + } + if packetSpec.TransportHeader.TCP.DstPort != nil { + dstPort = uint16(*packetSpec.TransportHeader.TCP.DstPort) + } + } else if packetSpec.TransportHeader.UDP != nil { + if packetSpec.TransportHeader.UDP.SrcPort != nil { + srcPort = uint16(*packetSpec.TransportHeader.UDP.SrcPort) + } + if packetSpec.TransportHeader.UDP.DstPort != nil { + dstPort = uint16(*packetSpec.TransportHeader.UDP.DstPort) + } + } + + if srcPort > 0 || dstPort > 0 { + skipTrue := size - uint8(len(inst)) - 3 + inst = append(inst, loadIPv4HeaderOffset(skipTrue)...) + if srcPort > 0 { + inst = append(inst, loadIPv4SourcePort) + inst = append(inst, bpf.JumpIf{Cond: bpf.JumpEqual, Val: uint32(srcPort), SkipTrue: 0, SkipFalse: size - uint8(len(inst)) - 2}) + } + if dstPort > 0 { + inst = append(inst, loadIPv4DestinationPort) + inst = append(inst, bpf.JumpIf{Cond: bpf.JumpEqual, Val: uint32(dstPort), SkipTrue: 0, SkipFalse: size - uint8(len(inst)) - 2}) + } + + } + + // return + inst = append(inst, returnKeep) + inst = append(inst, returnDrop) + + return inst + +} + +// We need to figure out how long the instruction list will be first. It will be used in the instructions' jump case. +// For example, If you provide all the filters supported by `PacketCapture`, it will end with the following BPF filter string: +// 'ip proto 6 and src host 127.0.0.1 and dst host 127.0.0.1 and src port 123 and dst port 124' +// And using `tcpdump -i '' -d` will generate the following BPF instructions: +// (000) ldh [12] # Load 2B at 12 (Ethertype) +// (001) jeq #0x800 jt 2 jf 16 # Ethertype: If IPv4, goto #2, else #16 +// (002) ldb [23] # Load 1B at 23 (IPv4 Protocol) +// (003) jeq #0x6 jt 4 jf 16 # IPv4 Protocol: If TCP, goto #4, #16 +// (004) ld [26] # Load 4B at 26 (source address) +// (005) jeq #0x7f000001 jt 6 jf 16 # If bytes match(127.0.0.1), goto #6, else #16 +// (006) ld [30] # Load 4B at 30 (dest address) +// (007) jeq #0x7f000001 jt 8 jf 16 # If bytes match(127.0.0.1), goto #8, else #16 +// (008) ldh [20] # Load 2B at 20 (13b Fragment Offset) +// (009) jset #0x1fff jt 16 jf 10 # Use 0x1fff as a mask for fragment offset; If fragment offset != 0, #10, else #16 +// (010) ldxb 4*([14]&0xf) # x = IP header length +// (011) ldh [x + 14] # Load 2B at x+14 (TCP Source Port) +// (012) jeq #0x7b jt 13 jf 16 # TCP Source Port: If 123, goto #13, else #16 +// (013) ldh [x + 16] # Load 2B at x+16 (TCP dst port) +// (014) jeq #0x7c jt 15 jf 16 # TCP dst port: If 123, goto $15, else #16 +// (015) ret #262144 # MATCH +// (016) ret #0 # NOMATCH + +func calculateInstructionsSize(packet *crdv1alpha1.Packet) int { + count := 0 + // load ethertype + count++ + // ip check + count++ + + if packet != nil { + // protocol check + if packet.Protocol != nil { + count += 2 + } + transPort := packet.TransportHeader + if transPort.TCP != nil { + // load Fragment Offset + count += 3 + if transPort.TCP.SrcPort != nil { + count += 2 + } + if transPort.TCP.DstPort != nil { + count += 2 + } + + } else if transPort.UDP != nil { + count += 3 + if transPort.UDP.SrcPort != nil { + count += 2 + } + if transPort.UDP.DstPort != nil { + count += 2 + } + } + } + // src and dst ip + count += 4 + + // ret command + count += 2 + return count + +} diff --git a/pkg/agent/packetcapture/capture/bpf_test.go b/pkg/agent/packetcapture/capture/bpf_test.go new file mode 100644 index 00000000000..1f911135a52 --- /dev/null +++ b/pkg/agent/packetcapture/capture/bpf_test.go @@ -0,0 +1,184 @@ +// Copyright 2024 Antrea Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package capture + +import ( + "net" + "testing" + + "github.com/stretchr/testify/assert" + "golang.org/x/net/bpf" + "k8s.io/apimachinery/pkg/util/intstr" + + crdv1alpha1 "antrea.io/antrea/pkg/apis/crd/v1alpha1" +) + +var ( + testTCPProtocol = intstr.FromString("TCP") + testUDPProtocol = intstr.FromString("UDP") + testSrcPort int32 = 80 + testDstPort int32 = 80 +) + +func TestCalculateInstructionsSize(t *testing.T) { + tt := []struct { + name string + packet *crdv1alpha1.Packet + count int + }{ + { + name: "proto and host and port", + packet: &crdv1alpha1.Packet{ + Protocol: &testTCPProtocol, + TransportHeader: crdv1alpha1.TransportHeader{ + TCP: &crdv1alpha1.TCPHeader{ + SrcPort: &testSrcPort, + DstPort: &testDstPort, + }, + }, + }, + count: 17, + }, + { + name: "proto with host", + packet: &crdv1alpha1.Packet{ + Protocol: &testTCPProtocol, + }, + count: 10, + }, + { + name: "proto with src port", + packet: &crdv1alpha1.Packet{ + Protocol: &testTCPProtocol, + TransportHeader: crdv1alpha1.TransportHeader{ + TCP: &crdv1alpha1.TCPHeader{ + SrcPort: &testSrcPort, + }, + }, + }, + count: 15, + }, + { + name: "proto with dst port", + packet: &crdv1alpha1.Packet{ + Protocol: &testUDPProtocol, + TransportHeader: crdv1alpha1.TransportHeader{ + UDP: &crdv1alpha1.UDPHeader{ + DstPort: &testDstPort, + }, + }, + }, + count: 15, + }, + + { + name: "any proto", + packet: &crdv1alpha1.Packet{}, + count: 8, + }, + } + + for _, item := range tt { + t.Run(item.name, func(t *testing.T) { + assert.Equal(t, item.count, calculateInstructionsSize(item.packet)) + }) + } +} + +func TestPacketCaptureCompileBPF(t *testing.T) { + tt := []struct { + name string + srcIP net.IP + dstIP net.IP + spec *crdv1alpha1.PacketCaptureSpec + inst []bpf.Instruction + }{ + { + name: "with-proto-and-port", + srcIP: net.ParseIP("127.0.0.1"), + dstIP: net.ParseIP("127.0.0.2"), + spec: &crdv1alpha1.PacketCaptureSpec{ + Packet: &crdv1alpha1.Packet{ + Protocol: &testTCPProtocol, + TransportHeader: crdv1alpha1.TransportHeader{ + TCP: &crdv1alpha1.TCPHeader{ + SrcPort: &testSrcPort, + DstPort: &testDstPort, + }}, + }, + }, + inst: []bpf.Instruction{ + bpf.LoadAbsolute{Off: 12, Size: 2}, + bpf.JumpIf{Cond: bpf.JumpEqual, Val: 0x800, SkipFalse: 14}, + bpf.LoadAbsolute{Off: 23, Size: 1}, // ip protocol + bpf.JumpIf{Cond: bpf.JumpEqual, Val: 0x6, SkipFalse: 12}, // tcp + bpf.LoadAbsolute{Off: 26, Size: 4}, + bpf.JumpIf{Cond: bpf.JumpEqual, Val: 0x7f000001, SkipTrue: 0, SkipFalse: 10}, + bpf.LoadAbsolute{Off: 30, Size: 4}, + bpf.JumpIf{Cond: bpf.JumpEqual, Val: 0x7f000002, SkipTrue: 0, SkipFalse: 8}, + bpf.LoadAbsolute{Off: 20, Size: 2}, // flags+fragment offset, since we need to calc where the src/dst port is + bpf.JumpIf{Cond: bpf.JumpBitsSet, Val: 0x1fff, SkipTrue: 6}, // do we have an L4 header? + bpf.LoadMemShift{Off: 14}, // calculate size of IP header + bpf.LoadIndirect{Off: 14, Size: 2}, // src port + bpf.JumpIf{Cond: bpf.JumpEqual, Val: 0x50, SkipFalse: 3}, // port 23 + bpf.LoadIndirect{Off: 16, Size: 2}, // dst port + bpf.JumpIf{Cond: bpf.JumpEqual, Val: 0x50, SkipFalse: 1}, // port 23 + bpf.RetConstant{Val: 262144}, + bpf.RetConstant{Val: 0}, + }, + }, + { + name: "udp-proto-str", + srcIP: net.ParseIP("127.0.0.1"), + dstIP: net.ParseIP("127.0.0.2"), + spec: &crdv1alpha1.PacketCaptureSpec{ + Packet: &crdv1alpha1.Packet{ + Protocol: &testUDPProtocol, + TransportHeader: crdv1alpha1.TransportHeader{ + UDP: &crdv1alpha1.UDPHeader{ + SrcPort: &testSrcPort, + DstPort: &testDstPort, + }}, + }, + }, + inst: []bpf.Instruction{ + bpf.LoadAbsolute{Off: 12, Size: 2}, + bpf.JumpIf{Cond: bpf.JumpEqual, Val: 0x800, SkipFalse: 14}, + bpf.LoadAbsolute{Off: 23, Size: 1}, // ip protocol + bpf.JumpIf{Cond: bpf.JumpEqual, Val: 0x11, SkipFalse: 12}, // tcp + bpf.LoadAbsolute{Off: 26, Size: 4}, + bpf.JumpIf{Cond: bpf.JumpEqual, Val: 0x7f000001, SkipTrue: 0, SkipFalse: 10}, + bpf.LoadAbsolute{Off: 30, Size: 4}, + bpf.JumpIf{Cond: bpf.JumpEqual, Val: 0x7f000002, SkipTrue: 0, SkipFalse: 8}, + bpf.LoadAbsolute{Off: 20, Size: 2}, // flags+fragment offset, since we need to calc where the src/dst port is + bpf.JumpIf{Cond: bpf.JumpBitsSet, Val: 0x1fff, SkipTrue: 6}, // do we have an L4 header? + bpf.LoadMemShift{Off: 14}, // calculate size of IP header + bpf.LoadIndirect{Off: 14, Size: 2}, // src port + bpf.JumpIf{Cond: bpf.JumpEqual, Val: 0x50, SkipFalse: 3}, // port 23 + bpf.LoadIndirect{Off: 16, Size: 2}, // dst port + bpf.JumpIf{Cond: bpf.JumpEqual, Val: 0x50, SkipFalse: 1}, // port 23 + bpf.RetConstant{Val: 262144}, + bpf.RetConstant{Val: 0}, + }, + }, + } + + for _, item := range tt { + t.Run(item.name, func(t *testing.T) { + result := compilePacketFilter(item.spec.Packet, item.srcIP, item.dstIP) + assert.Equal(t, item.inst, result) + }) + } +} diff --git a/pkg/agent/packetcapture/capture/pcap_linux.go b/pkg/agent/packetcapture/capture/pcap_linux.go new file mode 100644 index 00000000000..29858b8cf8a --- /dev/null +++ b/pkg/agent/packetcapture/capture/pcap_linux.go @@ -0,0 +1,66 @@ +// Copyright 2024 Antrea Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package capture + +import ( + "context" + "net" + + "github.com/gopacket/gopacket" + "github.com/gopacket/gopacket/layers" + "github.com/gopacket/gopacket/pcapgo" + "golang.org/x/net/bpf" + "k8s.io/klog/v2" + + crdv1alpha1 "antrea.io/antrea/pkg/apis/crd/v1alpha1" +) + +const ( + // Max packet size for pcap capture. + maxSnapshotBytes = 65536 +) + +type pcapCapture struct { +} + +func NewPcapCapture() (*pcapCapture, error) { + return &pcapCapture{}, nil +} + +func (p *pcapCapture) Capture(ctx context.Context, device string, srcIP, dstIP net.IP, packet *crdv1alpha1.Packet) (chan gopacket.Packet, error) { + eth, err := pcapgo.NewEthernetHandle(device) + if err != nil { + return nil, err + } + + eth.SetPromiscuous(false) + eth.SetCaptureLength(maxSnapshotBytes) + + inst := compilePacketFilter(packet, srcIP, dstIP) + klog.V(5).InfoS("Generated bpf instructions for Packetcapture", "device", device, "srcIP", srcIP, "dstIP", dstIP, "packetSpec", packet, "bpf", inst) + rawInst, err := bpf.Assemble(inst) + if err != nil { + return nil, err + } + err = eth.SetBPF(rawInst) + if err != nil { + return nil, err + } + + packetSource := gopacket.NewPacketSource(eth, layers.LinkTypeEthernet) + packetSource.NoCopy = true + return packetSource.PacketsCtx(ctx), nil + +} diff --git a/pkg/agent/packetcapture/capture/pcap_windows.go b/pkg/agent/packetcapture/capture/pcap_windows.go new file mode 100644 index 00000000000..323e027abfd --- /dev/null +++ b/pkg/agent/packetcapture/capture/pcap_windows.go @@ -0,0 +1,36 @@ +// Copyright 2024 Antrea Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package capture + +import ( + "context" + "errors" + "net" + + "github.com/gopacket/gopacket" + + crdv1alpha1 "antrea.io/antrea/pkg/apis/crd/v1alpha1" +) + +type pcapCapture struct { +} + +func NewPcapCapture() (*pcapCapture, error) { + return nil, errors.New("PacketCapture is not implemented on Windows") +} + +func (p *pcapCapture) Capture(ctx context.Context, device string, srcIP, dstIP net.IP, packet *crdv1alpha1.Packet) (chan gopacket.Packet, error) { + return nil, errors.New("PacketCapture is not implemented on Windows") +} diff --git a/pkg/agent/packetcapture/capture_interface.go b/pkg/agent/packetcapture/capture_interface.go new file mode 100644 index 00000000000..b7b77d9fcd8 --- /dev/null +++ b/pkg/agent/packetcapture/capture_interface.go @@ -0,0 +1,28 @@ +// Copyright 2024 Antrea Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package packetcapture + +import ( + "context" + "net" + + "github.com/gopacket/gopacket" + + crdv1alpha1 "antrea.io/antrea/pkg/apis/crd/v1alpha1" +) + +type PacketCapturer interface { + Capture(ctx context.Context, device string, srcIP, dstIP net.IP, packet *crdv1alpha1.Packet) (chan gopacket.Packet, error) +} diff --git a/pkg/agent/packetcapture/packetcapture_controller.go b/pkg/agent/packetcapture/packetcapture_controller.go new file mode 100644 index 00000000000..a24fbda6531 --- /dev/null +++ b/pkg/agent/packetcapture/packetcapture_controller.go @@ -0,0 +1,780 @@ +// Copyright 2024 Antrea Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package packetcapture + +import ( + "context" + "errors" + "fmt" + "maps" + "net" + "os" + "path/filepath" + "slices" + "strings" + "sync" + "time" + + "github.com/gopacket/gopacket" + "github.com/gopacket/gopacket/layers" + "github.com/gopacket/gopacket/pcapgo" + "github.com/spf13/afero" + "golang.org/x/crypto/ssh" + "golang.org/x/time/rate" + v1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/conversion" + "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/apimachinery/pkg/util/wait" + clientset "k8s.io/client-go/kubernetes" + "k8s.io/client-go/tools/cache" + "k8s.io/client-go/util/retry" + "k8s.io/client-go/util/workqueue" + "k8s.io/klog/v2" + + "antrea.io/antrea/pkg/agent/interfacestore" + "antrea.io/antrea/pkg/agent/packetcapture/capture" + "antrea.io/antrea/pkg/agent/util" + crdv1alpha1 "antrea.io/antrea/pkg/apis/crd/v1alpha1" + clientsetversioned "antrea.io/antrea/pkg/client/clientset/versioned" + crdinformers "antrea.io/antrea/pkg/client/informers/externalversions/crd/v1alpha1" + crdlisters "antrea.io/antrea/pkg/client/listers/crd/v1alpha1" + "antrea.io/antrea/pkg/util/auth" + "antrea.io/antrea/pkg/util/env" + "antrea.io/antrea/pkg/util/sftp" +) + +type storageProtocolType string + +const ( + sftpProtocol storageProtocolType = "sftp" +) + +const ( + controllerName = "PacketCaptureController" + resyncPeriod time.Duration = 0 + + minRetryDelay = 5 * time.Second + maxRetryDelay = 60 * time.Second + + defaultWorkers = 2 + + // defines how many capture request we can handle concurrently. waiting captures will be + // marked as Pending until they can be processed. +<<<<<<< HEAD + maxConcurrentCaptures = 16 + defaultTimeoutDuration = 60 * time.Second +======= + maxConcurrentCaptures = 16 + + contextTimeoutErrMsg = "context deadline exceeded" + defaultTimeoutDuration = 60 * time.Second + +>>>>>>> cc9ff24e1 (update) + captureStatusUpdatePeriod = 10 * time.Second + + // PacketCapture uses a dedicated Secret object to store authentication information for a file server. + // #nosec G101 + fileServerAuthSecretName = "antrea-packetcapture-fileserver-auth" +) + +type packetCapturePhase string + +const ( + packetCapturePhasePending packetCapturePhase = "" + packetCapturePhaseRunning packetCapturePhase = "Running" + packetCapturePhaseCompleted packetCapturePhase = "Completed" +) + +var ( + packetDirectory = filepath.Join(os.TempDir(), "antrea", "packetcapture", "packets") + defaultFS = afero.NewOsFs() +) + +type packetCaptureState struct { + // name is the PacketCapture name. + name string + // capturedPacketsNum records how many packets have been captured. Due to the RateLimiter, + // this may not be the real-time data. + capturedPacketsNum int32 + // targetCapturedPacketsNum is the target number limit for a PacketCapture. When numCapturedPackets == targetCapturedPacketsNum, it means + // the PacketCapture is done successfully. + targetCapturedPacketsNum int32 + // updateRateLimiter controls the frequency of the updates to PacketCapture status. + updateRateLimiter *rate.Limiter + // pcapngFile is the file object for the packet file. + pcapngFile afero.File + // pcapngWriter is the writer for the packet file. + pcapngWriter *pcapgo.NgWriter + // phase is the phase of the PacketCapture. + phase packetCapturePhase + // filePath is the final path shown in PacketCapture's status. + filePath string + // err is the latest error observed in the capture. + err error + // cancel is the cancel function for capture context. + cancel context.CancelFunc +} + +func (pcs *packetCaptureState) isCaptureSuccessful() bool { + return pcs.capturedPacketsNum == pcs.targetCapturedPacketsNum && pcs.targetCapturedPacketsNum > 0 +} + +type Controller struct { + kubeClient clientset.Interface + crdClient clientsetversioned.Interface + packetCaptureInformer crdinformers.PacketCaptureInformer + packetCaptureLister crdlisters.PacketCaptureLister + packetCaptureSynced cache.InformerSynced + interfaceStore interfacestore.InterfaceStore + queue workqueue.TypedRateLimitingInterface[string] + sftpUploader sftp.Uploader + captureInterface PacketCapturer + mutex sync.Mutex + // A name-phase mapping for all PacketCapture CRs. + captures map[string]*packetCaptureState + numRunningCaptures int +} + +func NewPacketCaptureController( + kubeClient clientset.Interface, + crdClient clientsetversioned.Interface, + packetCaptureInformer crdinformers.PacketCaptureInformer, + interfaceStore interfacestore.InterfaceStore, +) (*Controller, error) { + c := &Controller{ + kubeClient: kubeClient, + crdClient: crdClient, + packetCaptureInformer: packetCaptureInformer, + packetCaptureLister: packetCaptureInformer.Lister(), + packetCaptureSynced: packetCaptureInformer.Informer().HasSynced, + interfaceStore: interfaceStore, + queue: workqueue.NewTypedRateLimitingQueueWithConfig( + workqueue.NewTypedItemExponentialFailureRateLimiter[string](minRetryDelay, maxRetryDelay), + workqueue.TypedRateLimitingQueueConfig[string]{Name: "packetcapture"}, + ), + sftpUploader: sftp.NewUploader(), + captures: make(map[string]*packetCaptureState), + } + + packetCaptureInformer.Informer().AddEventHandlerWithResyncPeriod(cache.ResourceEventHandlerFuncs{ + AddFunc: c.addPacketCapture, + UpdateFunc: c.updatePacketCapture, + DeleteFunc: c.deletePacketCapture, + }, resyncPeriod) + + capture, err := capture.NewPcapCapture() + if err != nil { + return nil, err + } + c.captureInterface = capture + return c, nil +} + +func (c *Controller) enqueuePacketCapture(pc *crdv1alpha1.PacketCapture) { + c.queue.Add(pc.Name) +} + +// Run will create defaultWorkers workers (go routines) which will process the PacketCapture events from the +// workqueue. +func (c *Controller) Run(stopCh <-chan struct{}) { + defer c.queue.ShutDown() + + klog.InfoS("Starting packetcapture controller", "name", controllerName) + defer klog.InfoS("Shutting down packetcapture controller", "name", controllerName) + + cacheSynced := []cache.InformerSynced{c.packetCaptureSynced} + if !cache.WaitForNamedCacheSync(controllerName, stopCh, cacheSynced...) { + return + } + + err := defaultFS.MkdirAll(packetDirectory, 0755) + if err != nil { + klog.ErrorS(err, "Couldn't create the directory for storing captured packets", "directory", packetDirectory) + return + } + + for i := 0; i < defaultWorkers; i++ { + go wait.Until(c.worker, time.Second, stopCh) + } + <-stopCh +} + +func (c *Controller) addPacketCapture(obj interface{}) { + pc := obj.(*crdv1alpha1.PacketCapture) + klog.V(2).InfoS("Processing PacketCapture ADD event", "name", pc.Name) + c.enqueuePacketCapture(pc) +} + +func (c *Controller) updatePacketCapture(oldObj, newObj interface{}) { + newPc := newObj.(*crdv1alpha1.PacketCapture) + oldPc := oldObj.(*crdv1alpha1.PacketCapture) + if newPc.Generation != oldPc.Generation { + klog.V(2).InfoS("Processing PacketCapture UPDATE event", "name", newPc.Name) + c.enqueuePacketCapture(newPc) + } +} + +func (c *Controller) deletePacketCapture(obj interface{}) { + pc := obj.(*crdv1alpha1.PacketCapture) + klog.V(2).InfoS("Processing PacketCapture DELETE event", "name", pc.Name) + c.enqueuePacketCapture(pc) +} + +func nameToPath(name string) string { + return filepath.Join(packetDirectory, name+".pcapng") +} + +func (c *Controller) worker() { + for c.processPacketCaptureItem() { + } +} + +func (c *Controller) processPacketCaptureItem() bool { + key, quit := c.queue.Get() + if quit { + return false + } + defer c.queue.Done(key) + if err := c.syncPacketCapture(key); err == nil { + c.queue.Forget(key) + } else { + c.queue.AddRateLimited(key) + klog.ErrorS(err, "Error syncing PacketCapture, requeueing", "key", key) + } + return true +} + +func (c *Controller) syncPacketCapture(pcName string) error { + cleanupStatus := func() { + c.mutex.Lock() + defer c.mutex.Unlock() + state := c.captures[pcName] + if state != nil { + if state.cancel != nil { + state.cancel() + } + delete(c.captures, pcName) + } + } + + pc, err := c.packetCaptureLister.Get(pcName) + if apierrors.IsNotFound(err) { + c.cleanupPacketCapture(pcName) + cleanupStatus() + return nil + } + + // Capture will not occur on this Node if a corresponding Pod interface is not found. + device := c.getTargetCaptureDevice(pc) + if device == "" { + klog.V(4).InfoS("Skipping process PacketCapture", "name", pcName) + return nil + } + + if err := c.validatePacketCapture(&pc.Spec); err != nil { + klog.ErrorS(err, "Invalid PacketCapture", "name", pc.Name) + if updateErr := c.updateStatus(context.Background(), pcName, &packetCaptureState{err: err}); updateErr != nil { + klog.ErrorS(err, "Failed to update PacketCapture status", "name", pc.Name) + } + cleanupStatus() + return nil + } + + state := func() *packetCaptureState { + c.mutex.Lock() + defer c.mutex.Unlock() + state := c.captures[pcName] + if state == nil { + state = &packetCaptureState{} + c.captures[pcName] = state + } + phase := state.phase + klog.InfoS("Syncing PacketCapture", "name", pcName, "phase", phase) + if phase != packetCapturePhasePending { + return state + } + + if c.numRunningCaptures >= maxConcurrentCaptures { + err = fmt.Errorf("PacketCapture running count reach limit") + } else { + timeout := defaultTimeoutDuration + if pc.Spec.Timeout != nil { + timeout = time.Duration(*pc.Spec.Timeout) * time.Second + } + ctx, cancel := context.WithTimeout(context.Background(), timeout) + state.cancel = cancel + if err = c.startPacketCapture(ctx, pc, device); err != nil { + phase = packetCapturePhaseCompleted + } else { + phase = packetCapturePhaseRunning + c.numRunningCaptures += 1 + } + } + state.phase = phase + state.err = err + c.captures[pcName] = state + return state + }() + + if updateErr := c.updateStatus(context.Background(), pcName, state); updateErr != nil { + return fmt.Errorf("error when patching status: %w", updateErr) + } + return err +} + +func (c *Controller) validatePacketCapture(spec *crdv1alpha1.PacketCaptureSpec) error { + if spec.Packet != nil { + protocol := spec.Packet.Protocol + if protocol != nil { + if protocol.Type == intstr.String { + if _, ok := capture.ProtocolMap[strings.ToUpper(protocol.StrVal)]; !ok { + return fmt.Errorf("invalid protocol string, supported values are: %v (case insensitive)", slices.Collect(maps.Keys(capture.ProtocolMap))) + } + } + } + } + return nil +} + +func (c *Controller) cleanupPacketCapture(pcName string) { + path := nameToPath(pcName) + if err := defaultFS.RemoveAll(path); err == nil { + klog.V(2).InfoS("Deleted the captured pcap file successfully", "name", pcName, "path", path) + } else { + klog.ErrorS(err, "Failed to delete the captured pcap file", "name", pcName, "path", path) + } +} + +func getPacketFileAndWriter(name string) (afero.File, *pcapgo.NgWriter, error) { + filePath := nameToPath(name) + var file afero.File + if _, err := os.Stat(filePath); err == nil { + klog.InfoS("Packet file already exists. This may be caused by an unexpected termination, will delete it", "path", filePath) + if err := defaultFS.Remove(filePath); err != nil { + return nil, nil, err + } + } + file, err := defaultFS.Create(filePath) + if err != nil { + return nil, nil, fmt.Errorf("failed to create pcapng file: %w", err) + } + writer, err := pcapgo.NewNgWriter(file, layers.LinkTypeEthernet) + if err != nil { + return nil, nil, fmt.Errorf("couldn't initialize a pcap writer: %w", err) + } + return file, writer, nil +} + +// getTargetCaptureDevice is trying to locate the target device for packet capture. If the target +// Pod does not exist on the current Node, the agent on this Node will not perform the capture. +// In the PacketCapture spec, at least one of `.Spec.Source.Pod` or `.Spec.Destination.Pod` +// should be set. +func (c *Controller) getTargetCaptureDevice(pc *crdv1alpha1.PacketCapture) string { + var pod, ns string + if pc.Spec.Source.Pod != nil { + pod = pc.Spec.Source.Pod.Name + ns = pc.Spec.Source.Pod.Namespace + } else { + pod = pc.Spec.Destination.Pod.Name + ns = pc.Spec.Destination.Pod.Namespace + } + + podInterfaces := c.interfaceStore.GetContainerInterfacesByPod(pod, ns) + if len(podInterfaces) == 0 { + return "" + } +<<<<<<< HEAD +======= + +>>>>>>> 59f80d372 (1) + return podInterfaces[0].InterfaceName +} + + + +// startPacketCapture starts the capture on the target device. The actual capture process will be started +// in a separated go routine. +func (c *Controller) startPacketCapture(ctx context.Context, pc *crdv1alpha1.PacketCapture, device string) error { + klog.V(4).InfoS("Started processing PacketCapture", "name", pc.Name) + pcState := c.captures[pc.Name] + pcState.name = pc.Name + srcIP, dstIp, err := c.parseIPs(ctx, pc) + if err != nil { + return err + } + klog.V(2).InfoS("Prepare capture on the current Node", "name", pc.Name, "device", device) + pcState.targetCapturedPacketsNum = pc.Spec.CaptureConfig.FirstN.Number + file, writer, err := getPacketFileAndWriter(pc.Name) + if err != nil { + return err + } + pcState.pcapngFile = file + pcState.pcapngWriter = writer + pcState.updateRateLimiter = rate.NewLimiter(rate.Every(captureStatusUpdatePeriod), 1) + go func() { + captureErr := c.performCapture(ctx, pc, pcState, device, srcIP, dstIp) + func() { + c.mutex.Lock() + defer c.mutex.Unlock() + c.numRunningCaptures -= 1 + state := c.captures[pc.Name] + if state != nil { + state.phase = packetCapturePhaseCompleted + state.err = captureErr + } + + }() + c.enqueuePacketCapture(pc) + }() + return nil +} + +func (c *Controller) performCapture( + ctx context.Context, + pc *crdv1alpha1.PacketCapture, + captureState *packetCaptureState, + device string, + srcIP, dstIP net.IP, +) error { + packets, err := c.captureInterface.Capture(ctx, device, srcIP, dstIP, pc.Spec.Packet) + if err != nil { + klog.ErrorS(err, "Failed to start capture") + return err + } + klog.InfoS("Start capture packets", "name", pc.Name, "device", device) + for { + select { + case packet := <-packets: + c.mutex.Lock() + if captureState.isCaptureSuccessful() { + c.mutex.Unlock() + return nil + } + captureState.capturedPacketsNum++ + c.mutex.Unlock() + ci := gopacket.CaptureInfo{ + Timestamp: time.Now(), + CaptureLength: len(packet.Data()), + Length: len(packet.Data()), + } + err = captureState.pcapngWriter.WritePacket(ci, packet.Data()) + if err != nil { + return fmt.Errorf("couldn't write packets: %w", err) + } + klog.V(5).InfoS("Capture packets", "name", captureState.name, "count", + captureState.capturedPacketsNum, "len", ci.Length) + + c.mutex.Lock() + reachTarget := captureState.isCaptureSuccessful() + c.mutex.Unlock() + // use rate limiter to reduce the times we need to update status. + if reachTarget || captureState.updateRateLimiter.Allow() { + // if reach the target. flush the file and upload it. + if reachTarget { + path := env.GetPodName() + ":" + nameToPath(pc.Name) + statusPath := path + if err = captureState.pcapngWriter.Flush(); err != nil { + return err + } + if pc.Spec.FileServer != nil { + err = c.uploadPackets(ctx, pc, captureState.pcapngFile) + klog.V(4).InfoS("Upload captured packets", "name", pc.Name, "path", path) + statusPath = fmt.Sprintf("%s/%s.pcapng", pc.Spec.FileServer.URL, pc.Name) + } + c.mutex.Lock() + captureState.filePath = statusPath + c.mutex.Unlock() + if err != nil { + return err + } + if err := captureState.pcapngFile.Close(); err != nil { + klog.ErrorS(err, "Close pcapng file error", "name", pc.Name, "path", path) + } + } + // report capture status. + c.enqueuePacketCapture(pc) + } + case <-ctx.Done(): + return ctx.Err() + } + } +} + +func (c *Controller) getPodIP(ctx context.Context, podRef *crdv1alpha1.PodReference) (net.IP, error) { + podInterfaces := c.interfaceStore.GetContainerInterfacesByPod(podRef.Name, podRef.Namespace) + var podIP net.IP + if len(podInterfaces) > 0 { + podIP = podInterfaces[0].GetIPv4Addr() + } else { + pod, err := c.kubeClient.CoreV1().Pods(podRef.Namespace).Get(context.TODO(), podRef.Name, metav1.GetOptions{}) + if err != nil { + return nil, fmt.Errorf("failed to get Pod %s/%s: %w", podRef.Namespace, podRef.Name, err) + } + podIPs := make([]net.IP, len(pod.Status.PodIPs)) + for i, ip := range pod.Status.PodIPs { + podIPs[i] = net.ParseIP(ip.IP) + } + podIP = util.GetIPv4Addr(podIPs) + } + if podIP == nil { + return nil, fmt.Errorf("cannot find IP with IPv4 address family for Pod %s/%s", podRef.Namespace, podRef.Name) + } + return podIP, nil +} + +func (c *Controller) parseIPs(ctx context.Context, pc *crdv1alpha1.PacketCapture) (srcIP, dstIP net.IP, err error) { + if pc.Spec.Source.Pod != nil { + srcIP, err = c.getPodIP(ctx, pc.Spec.Source.Pod) + } else if pc.Spec.Source.IP != nil { + srcIP = net.ParseIP(*pc.Spec.Source.IP) + if srcIP == nil { + err = fmt.Errorf("invalid source IP address: %s", *pc.Spec.Source.IP) + } + } + + if pc.Spec.Destination.Pod != nil { + dstIP, err = c.getPodIP(ctx, pc.Spec.Destination.Pod) + } else if pc.Spec.Destination.IP != nil { + dstIP = net.ParseIP(*pc.Spec.Destination.IP) + if dstIP == nil { + err = fmt.Errorf("invalid destination IP address: %s", *pc.Spec.Destination.IP) + } + } + return +} + +func (c *Controller) getUploaderByProtocol(protocol storageProtocolType) (sftp.Uploader, error) { + if protocol == sftpProtocol { + return c.sftpUploader, nil + } + return nil, fmt.Errorf("unsupported protocol %s", protocol) +} + +func (c *Controller) generatePacketsPathForServer(name string) string { + return name + ".pcapng" +} + +func (c *Controller) uploadPackets(ctx context.Context, pc *crdv1alpha1.PacketCapture, outputFile afero.File) error { + klog.V(2).InfoS("Uploading captured packets for PacketCapture", "name", pc.Name) + uploader, err := c.getUploaderByProtocol(sftpProtocol) + if err != nil { + return fmt.Errorf("failed to upload packets while getting uploader: %w", err) + } + if _, err := outputFile.Seek(0, 0); err != nil { + return fmt.Errorf("failed to upload to the file server while setting offset: %v", err) + } + authSecret := v1.SecretReference{ + Name: fileServerAuthSecretName, + Namespace: env.GetAntreaNamespace(), + } + serverAuth, err := auth.GetAuthConfigurationFromSecret(ctx, auth.BasicAuthenticationType, &authSecret, c.kubeClient) + if err != nil { + klog.ErrorS(err, "Failed to get authentication for the file server", "name", pc.Name, "authSecret", authSecret) + return err + } + if serverAuth.BasicAuthentication == nil { + return fmt.Errorf("failed to get basic authentication info for the file server") + } + cfg := &ssh.ClientConfig{ + User: serverAuth.BasicAuthentication.Username, + Auth: []ssh.AuthMethod{ssh.Password(serverAuth.BasicAuthentication.Password)}, + // #nosec G106: skip host key check here and users can specify their own checks if needed + HostKeyCallback: ssh.InsecureIgnoreHostKey(), + Timeout: time.Second, + } + return uploader.Upload(pc.Spec.FileServer.URL, c.generatePacketsPathForServer(pc.Name), cfg, outputFile) +} + +func (c *Controller) updateStatus(ctx context.Context, name string, state *packetCaptureState) error { + toUpdate, getErr := c.packetCaptureLister.Get(name) + if getErr != nil { + klog.InfoS("Didn't find the original PacketCapture, skip updating status", "name", name) + return nil + } + conditions := []crdv1alpha1.PacketCaptureCondition{} + t := metav1.Now() + c.mutex.Lock() + updatedStatus := crdv1alpha1.PacketCaptureStatus{ + NumberCaptured: state.capturedPacketsNum, + FilePath: state.filePath, + } + + if state.err != nil { + updatedStatus.FilePath = "" + conditions = append(conditions, crdv1alpha1.PacketCaptureCondition{ + Type: crdv1alpha1.PacketCaptureCompleted, + Status: metav1.ConditionStatus(v1.ConditionFalse), + LastTransitionTime: metav1.Now(), + Reason: "CaptureFailed", + Message: state.err.Error(), + }) + + if errors.Is(state.err, context.DeadlineExceeded) { + conditions = []crdv1alpha1.PacketCaptureCondition{ + { + Type: crdv1alpha1.PacketCaptureCompleted, + Status: metav1.ConditionStatus(v1.ConditionTrue), + LastTransitionTime: t, + Reason: "Timeout", + Message: state.err.Error(), + }, + } + } else if state.isCaptureSuccessful() { + conditions = []crdv1alpha1.PacketCaptureCondition{ + { + Type: crdv1alpha1.PacketCaptureCompleted, + Status: metav1.ConditionStatus(v1.ConditionTrue), + LastTransitionTime: t, + Reason: "Succeed", + }, + } + } + if toUpdate.Spec.FileServer != nil && state.filePath != "" { + conditions = append(conditions, crdv1alpha1.PacketCaptureCondition{ + Type: crdv1alpha1.PacketCaptureFileUploaded, + Status: metav1.ConditionStatus(v1.ConditionFalse), + LastTransitionTime: t, + Reason: "UploadFailed", + Message: state.err.Error(), + }) + } + } else { + if state.isCaptureSuccessful() { + conditions = []crdv1alpha1.PacketCaptureCondition{ + { + Type: crdv1alpha1.PacketCaptureCompleted, + Status: metav1.ConditionStatus(v1.ConditionTrue), + LastTransitionTime: t, + Reason: "Succeed", + }, + } + if toUpdate.Spec.FileServer != nil { + conditions = append(conditions, crdv1alpha1.PacketCaptureCondition{ + Type: crdv1alpha1.PacketCaptureFileUploaded, + Status: metav1.ConditionStatus(v1.ConditionTrue), + LastTransitionTime: t, + Reason: "Succeed", + }) + } + } else if state.phase == packetCapturePhaseRunning { + conditions = append(conditions, crdv1alpha1.PacketCaptureCondition{ + Type: crdv1alpha1.PacketCaptureRunning, + Status: metav1.ConditionStatus(v1.ConditionTrue), + LastTransitionTime: t, + }) + } else { + conditions = append(conditions, crdv1alpha1.PacketCaptureCondition{ + Type: crdv1alpha1.PacketCaptureRunning, + Status: metav1.ConditionStatus(v1.ConditionTrue), + LastTransitionTime: t, + }) + } + + } + c.mutex.Unlock() + updatedStatus.Conditions = conditions + + if retryErr := retry.RetryOnConflict(retry.DefaultRetry, func() error { + if toUpdate.Status.FilePath != "" { + updatedStatus.FilePath = toUpdate.Status.FilePath + } + if updatedStatus.NumberCaptured == 0 && toUpdate.Status.NumberCaptured > 0 { + updatedStatus.NumberCaptured = toUpdate.Status.NumberCaptured + } + + updatedStatus.Conditions = mergeConditions(toUpdate.Status.Conditions, updatedStatus.Conditions) + if packetCaptureStatusEqual(toUpdate.Status, updatedStatus) { + return nil + } + toUpdate.Status = updatedStatus + klog.V(2).InfoS("Updating PacketCapture", "name", name, "status", toUpdate.Status) + _, updateErr := c.crdClient.CrdV1alpha1().PacketCaptures().UpdateStatus(context.TODO(), toUpdate, metav1.UpdateOptions{}) + if updateErr != nil && apierrors.IsConflict(updateErr) { + var getErr error + if toUpdate, getErr = c.crdClient.CrdV1alpha1().PacketCaptures().Get(context.TODO(), name, metav1.GetOptions{}); getErr != nil { + return getErr + } + } + // Return the error from UPDATE. + return updateErr + }); retryErr != nil { + return retryErr + } + klog.V(2).InfoS("Updated PacketCapture", "name", name) + return nil +} + +func conditionEqualsIgnoreLastTransitionTime(a, b crdv1alpha1.PacketCaptureCondition) bool { + a1 := a + a1.LastTransitionTime = metav1.Date(2018, 1, 1, 0, 0, 0, 0, time.UTC) + b1 := b + b1.LastTransitionTime = metav1.Date(2018, 1, 1, 0, 0, 0, 0, time.UTC) + return a1 == b1 +} + +var semanticIgnoreLastTransitionTime = conversion.EqualitiesOrDie( + conditionSliceEqualsIgnoreLastTransitionTime, +) + +func packetCaptureStatusEqual(oldStatus, newStatus crdv1alpha1.PacketCaptureStatus) bool { + return semanticIgnoreLastTransitionTime.DeepEqual(oldStatus, newStatus) +} + +func conditionSliceEqualsIgnoreLastTransitionTime(as, bs []crdv1alpha1.PacketCaptureCondition) bool { + if len(as) != len(bs) { + return false + } + for i := range as { + a := as[i] + b := bs[i] + if !conditionEqualsIgnoreLastTransitionTime(a, b) { + return false + } + } + return true +} + +func mergeConditions(oldConditions, newConditions []crdv1alpha1.PacketCaptureCondition) []crdv1alpha1.PacketCaptureCondition { + finalConditions := make([]crdv1alpha1.PacketCaptureCondition, 0) + newConditionMap := make(map[crdv1alpha1.PacketCaptureConditionType]crdv1alpha1.PacketCaptureCondition) + addedConditions := sets.New[string]() + for _, condition := range newConditions { + newConditionMap[condition.Type] = condition + } + for _, oldCondition := range oldConditions { + newCondition, exists := newConditionMap[oldCondition.Type] + if !exists { + finalConditions = append(finalConditions, oldCondition) + continue + } + // Use the original Condition if the only change is about lastTransition time + if conditionEqualsIgnoreLastTransitionTime(newCondition, oldCondition) { + finalConditions = append(finalConditions, oldCondition) + } else { + // Use the latest Condition. + finalConditions = append(finalConditions, newCondition) + } + addedConditions.Insert(string(newCondition.Type)) + } + for key, newCondition := range newConditionMap { + if !addedConditions.Has(string(key)) { + finalConditions = append(finalConditions, newCondition) + } + } + return finalConditions +} diff --git a/pkg/agent/packetcapture/packetcapture_controller_test.go b/pkg/agent/packetcapture/packetcapture_controller_test.go new file mode 100644 index 00000000000..338b3255aa2 --- /dev/null +++ b/pkg/agent/packetcapture/packetcapture_controller_test.go @@ -0,0 +1,556 @@ +// Copyright 2024 Antrea Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package packetcapture + +import ( + "context" + "errors" + "fmt" + "io" + "net" + "testing" + "time" + + "github.com/gopacket/gopacket" + "github.com/gopacket/gopacket/layers" + "github.com/spf13/afero" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.uber.org/mock/gomock" + "golang.org/x/crypto/ssh" + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/client-go/informers" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/kubernetes/fake" + + "antrea.io/antrea/pkg/agent/interfacestore" + "antrea.io/antrea/pkg/agent/util" + crdv1alpha1 "antrea.io/antrea/pkg/apis/crd/v1alpha1" + fakeversioned "antrea.io/antrea/pkg/client/clientset/versioned/fake" + crdinformers "antrea.io/antrea/pkg/client/informers/externalversions" + "antrea.io/antrea/pkg/util/k8s" +) + +var ( + pod1IPv4 = "192.168.10.10" + pod2IPv4 = "192.168.11.10" + + ipv6 = "2001:db8::68" + pod1MAC, _ = net.ParseMAC("aa:bb:cc:dd:ee:0f") + pod2MAC, _ = net.ParseMAC("aa:bb:cc:dd:ee:00") + ofPortPod1 = uint32(1) + ofPortPod2 = uint32(2) + + icmpProto = intstr.FromString("ICMP") + invalidProto = intstr.FromString("INVALID") + + pod1 = v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pod-1", + Namespace: "default", + }, + Status: v1.PodStatus{ + PodIP: pod1IPv4, + }, + } + pod2 = v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pod-2", + Namespace: "default", + }, + Status: v1.PodStatus{ + PodIP: pod2IPv4, + }, + } + pod3 = v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pod-3", + Namespace: "default", + }, + } + + secret1 = v1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: fileServerAuthSecretName, + Namespace: "kube-system", + }, + Data: map[string][]byte{ + "username": []byte("username"), + "password": []byte("password"), + }, + } +) + +func generateTestSecret() *v1.Secret { + return &v1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "AAA", + Namespace: "default", + }, + Data: map[string][]byte{ + "username": []byte("AAA"), + "password": []byte("BBBCCC"), + }, + } +} + +func genTestCR(name string, num int32) *crdv1alpha1.PacketCapture { + result := &crdv1alpha1.PacketCapture{ + ObjectMeta: metav1.ObjectMeta{Name: name, UID: types.UID(fmt.Sprintf("uid-%s", name))}, + Spec: crdv1alpha1.PacketCaptureSpec{ + Source: crdv1alpha1.Source{ + Pod: &crdv1alpha1.PodReference{ + Namespace: pod1.Namespace, + Name: pod1.Name, + }, + }, + Destination: crdv1alpha1.Destination{ + Pod: &crdv1alpha1.PodReference{ + Namespace: pod2.Namespace, + Name: pod2.Name, + }, + }, + CaptureConfig: crdv1alpha1.CaptureConfig{ + FirstN: &crdv1alpha1.PacketCaptureFirstNConfig{ + Number: num, + }, + }, + Packet: &crdv1alpha1.Packet{ + Protocol: &icmpProto, + }, + FileServer: &crdv1alpha1.PacketCaptureFileServer{ + URL: "sftp://127.0.0.1:22/aaa", + }, + }, + } + return result +} + +type testUploader struct { + url string + fileName string +} + +func (uploader *testUploader) Upload(url string, fileName string, config *ssh.ClientConfig, outputFile io.Reader) error { + if url != uploader.url { + return fmt.Errorf("expected url: %s for uploader, got: %s", uploader.url, url) + } + if fileName != uploader.fileName { + return fmt.Errorf("expected filename: %s for uploader, got: %s", uploader.fileName, fileName) + } + return nil +} + +func craftTestPacket() gopacket.Packet { + buffer := gopacket.NewSerializeBuffer() + options := gopacket.SerializeOptions{} + rawBytes := []byte{10, 20, 30} + gopacket.SerializeLayers(buffer, options, + &layers.Ethernet{ + SrcMAC: net.HardwareAddr{0xFF, 0xAA, 0xFA, 0xAA, 0xFF, 0xAA}, + DstMAC: net.HardwareAddr{0xBD, 0xBD, 0xBD, 0xBD, 0xBD, 0xBD}, + }, + &layers.IPv4{ + SrcIP: net.IP{127, 0, 0, 1}, + DstIP: net.IP{8, 8, 8, 8}, + }, + &layers.TCP{ + SrcPort: layers.TCPPort(4321), + DstPort: layers.TCPPort(80), + }, + gopacket.Payload(rawBytes), + ) + return gopacket.NewPacket(buffer.Bytes(), layers.LayerTypeEthernet, gopacket.NoCopy) +} + +type testCapture struct { +} + +func (p *testCapture) Capture(ctx context.Context, device string, srcIP, dstIP net.IP, packet *crdv1alpha1.Packet) (chan gopacket.Packet, error) { + ch := make(chan gopacket.Packet, 15) + for i := 0; i < 15; i++ { + ch <- craftTestPacket() + } + return ch, nil +} + +type fakePacketCaptureController struct { + *Controller + kubeClient kubernetes.Interface + mockController *gomock.Controller + crdClient *fakeversioned.Clientset + crdInformerFactory crdinformers.SharedInformerFactory + informerFactory informers.SharedInformerFactory +} + +func newFakePacketCaptureController(t *testing.T, runtimeObjects []runtime.Object, initObjects []runtime.Object) *fakePacketCaptureController { + controller := gomock.NewController(t) + objs := append(runtimeObjects, &pod1, &pod2, &pod3, &secret1, generateTestSecret()) + kubeClient := fake.NewSimpleClientset(objs...) + crdClient := fakeversioned.NewSimpleClientset(initObjects...) + crdInformerFactory := crdinformers.NewSharedInformerFactory(crdClient, 0) + packetCaptureInformer := crdInformerFactory.Crd().V1alpha1().PacketCaptures() + informerFactory := informers.NewSharedInformerFactory(kubeClient, 0) + + ifaceStore := interfacestore.NewInterfaceStore() + addPodInterface(ifaceStore, pod1.Namespace, pod1.Name, []string{pod1IPv4, ipv6}, pod1MAC.String(), int32(ofPortPod1)) + addPodInterface(ifaceStore, pod2.Namespace, pod2.Name, []string{pod2IPv4}, pod2MAC.String(), int32(ofPortPod2)) + + pcController, _ := NewPacketCaptureController( + kubeClient, + crdClient, + packetCaptureInformer, + ifaceStore, + ) + pcController.sftpUploader = &testUploader{} + pcController.captureInterface = &testCapture{} + t.Setenv("POD_NAME", "antrea-agent") + return &fakePacketCaptureController{ + Controller: pcController, + kubeClient: kubeClient, + mockController: controller, + crdClient: crdClient, + crdInformerFactory: crdInformerFactory, + informerFactory: informerFactory, + } +} + +func addPodInterface(ifaceStore interfacestore.InterfaceStore, podNamespace, podName string, podIPs []string, podMac string, ofPort int32) { + containerName := k8s.NamespacedName(podNamespace, podName) + var ifIPs []net.IP + for _, ip := range podIPs { + ifIPs = append(ifIPs, net.ParseIP(ip)) + } + mac, _ := net.ParseMAC(podMac) + ifaceStore.AddInterface(&interfacestore.InterfaceConfig{ + IPs: ifIPs, + MAC: mac, + InterfaceName: util.GenerateContainerInterfaceName(podName, podNamespace, containerName), + ContainerInterfaceConfig: &interfacestore.ContainerInterfaceConfig{PodName: podName, PodNamespace: podNamespace, ContainerID: containerName}, + OVSPortConfig: &interfacestore.OVSPortConfig{OFPort: ofPort}, + }) +} + +// TestPacketCaptureControllerRun was used to validate the whole run process is working. It doesn't wait for +// the testing pc to finish. on sandbox env, no good solution to open raw socket. +func TestPacketCaptureControllerRun(t *testing.T) { + // create test os + defaultFS = afero.NewMemMapFs() + defaultFS.MkdirAll("/tmp/antrea/packetcapture/packets", 0755) + pcs := []struct { + name string + pc *crdv1alpha1.PacketCapture + expectConditionStatus metav1.ConditionStatus + }{ + { + name: "start packetcapture", + expectConditionStatus: metav1.ConditionTrue, + pc: &crdv1alpha1.PacketCapture{ + ObjectMeta: metav1.ObjectMeta{Name: "pc1", UID: "uid1"}, + Spec: crdv1alpha1.PacketCaptureSpec{ + Source: crdv1alpha1.Source{ + Pod: &crdv1alpha1.PodReference{ + Namespace: pod1.Namespace, + Name: pod1.Name, + }, + }, + Destination: crdv1alpha1.Destination{ + Pod: &crdv1alpha1.PodReference{ + Namespace: pod2.Namespace, + Name: pod2.Name, + }, + }, + CaptureConfig: crdv1alpha1.CaptureConfig{ + FirstN: &crdv1alpha1.PacketCaptureFirstNConfig{ + Number: 10, + }, + }, + Packet: &crdv1alpha1.Packet{ + Protocol: &icmpProto, + }, + FileServer: &crdv1alpha1.PacketCaptureFileServer{ + URL: "sftp://127.0.0.1:22/aaa", + }, + }, + }, + }, + { + name: "invalid proto", + expectConditionStatus: metav1.ConditionFalse, + pc: &crdv1alpha1.PacketCapture{ + ObjectMeta: metav1.ObjectMeta{Name: "pc2", UID: "uid2"}, + Spec: crdv1alpha1.PacketCaptureSpec{ + Source: crdv1alpha1.Source{ + Pod: &crdv1alpha1.PodReference{ + Namespace: pod1.Namespace, + Name: pod1.Name, + }, + }, + Destination: crdv1alpha1.Destination{ + Pod: &crdv1alpha1.PodReference{ + Namespace: pod2.Namespace, + Name: pod2.Name, + }, + }, + CaptureConfig: crdv1alpha1.CaptureConfig{ + FirstN: &crdv1alpha1.PacketCaptureFirstNConfig{ + Number: 10, + }, + }, + Packet: &crdv1alpha1.Packet{ + Protocol: &invalidProto, + }, + FileServer: &crdv1alpha1.PacketCaptureFileServer{ + URL: "sftp://127.0.0.1:22/aaa", + }, + }, + }, + }, + { + name: "timeout-case", + expectConditionStatus: metav1.ConditionFalse, + pc: &crdv1alpha1.PacketCapture{ + ObjectMeta: metav1.ObjectMeta{Name: "pc3", UID: "uid3"}, + Spec: crdv1alpha1.PacketCaptureSpec{ + Source: crdv1alpha1.Source{ + Pod: &crdv1alpha1.PodReference{ + Namespace: pod1.Namespace, + Name: pod1.Name, + }, + }, + Destination: crdv1alpha1.Destination{ + Pod: &crdv1alpha1.PodReference{ + Namespace: pod2.Namespace, + Name: pod2.Name, + }, + }, + CaptureConfig: crdv1alpha1.CaptureConfig{ + FirstN: &crdv1alpha1.PacketCaptureFirstNConfig{ + Number: 100, + }, + }, + Packet: &crdv1alpha1.Packet{ + Protocol: &icmpProto, + }, + FileServer: &crdv1alpha1.PacketCaptureFileServer{ + URL: "sftp://127.0.0.1:22/aaa", + }, + }, + }, + }, + } + + objs := []runtime.Object{} + for _, pc := range pcs { + objs = append(objs, pc.pc) + } + pcc := newFakePacketCaptureController(t, nil, objs) + stopCh := make(chan struct{}) + defer close(stopCh) + pcc.crdInformerFactory.Start(stopCh) + pcc.crdInformerFactory.WaitForCacheSync(stopCh) + pcc.informerFactory.Start(stopCh) + pcc.informerFactory.WaitForCacheSync(stopCh) + for _, item := range pcs { + t.Run(item.name, func(t *testing.T) { + fileName := item.pc.Name + ".pcapng" + pcc.sftpUploader = &testUploader{fileName: fileName, url: "sftp://127.0.0.1:22/aaa"} + }) + + go pcc.Run(stopCh) + time.Sleep(500 * time.Millisecond) + result, err := pcc.crdClient.CrdV1alpha1().PacketCaptures().Get(context.Background(), item.pc.Name, metav1.GetOptions{}) + assert.Nil(t, err) + for _, cond := range result.Status.Conditions { + if cond.Type == crdv1alpha1.PacketCaptureCompleted { + assert.Equal(t, item.expectConditionStatus, cond.Status) + } + if cond.Type == crdv1alpha1.PacketCaptureFileUploaded { + assert.Equal(t, item.expectConditionStatus, cond.Status) + } + } + + if item.expectConditionStatus == metav1.ConditionTrue { + assert.Equal(t, int32(10), result.Status.NumberCaptured) + assert.Equal(t, "sftp://127.0.0.1:22/aaa/pc1.pcapng", result.Status.FilePath) + } + + // delete cr + err = pcc.crdClient.CrdV1alpha1().PacketCaptures().Delete(context.TODO(), item.pc.Name, metav1.DeleteOptions{}) + require.NoError(t, err) + + stopCh <- struct{}{} + } + +} + +func TestMergeConditions(t *testing.T) { + tt := []struct { + name string + new []crdv1alpha1.PacketCaptureCondition + old []crdv1alpha1.PacketCaptureCondition + expected []crdv1alpha1.PacketCaptureCondition + }{ + + { + name: "use-old", + new: []crdv1alpha1.PacketCaptureCondition{ + { + Type: crdv1alpha1.PacketCaptureCompleted, + LastTransitionTime: metav1.Now(), + }, + { + Type: crdv1alpha1.PacketCaptureFileUploaded, + LastTransitionTime: metav1.Now(), + }, + }, + old: []crdv1alpha1.PacketCaptureCondition{ + { + Type: crdv1alpha1.PacketCaptureCompleted, + LastTransitionTime: metav1.Now(), + }, + }, + expected: []crdv1alpha1.PacketCaptureCondition{ + { + Type: crdv1alpha1.PacketCaptureCompleted, + LastTransitionTime: metav1.Now(), + }, + { + Type: crdv1alpha1.PacketCaptureFileUploaded, + LastTransitionTime: metav1.Now(), + }, + }, + }, + { + name: "use-new", + new: []crdv1alpha1.PacketCaptureCondition{ + { + Type: crdv1alpha1.PacketCaptureCompleted, + LastTransitionTime: metav1.Now(), + Status: metav1.ConditionTrue, + }, + { + Type: crdv1alpha1.PacketCaptureFileUploaded, + LastTransitionTime: metav1.Now(), + }, + }, + old: []crdv1alpha1.PacketCaptureCondition{ + { + Type: crdv1alpha1.PacketCaptureCompleted, + LastTransitionTime: metav1.Now(), + Status: metav1.ConditionFalse, + }, + }, + expected: []crdv1alpha1.PacketCaptureCondition{ + { + Type: crdv1alpha1.PacketCaptureCompleted, + LastTransitionTime: metav1.Now(), + Status: metav1.ConditionTrue, + }, + { + Type: crdv1alpha1.PacketCaptureFileUploaded, + LastTransitionTime: metav1.Now(), + }, + }, + }, + } + + for _, item := range tt { + t.Run(item.name, func(t *testing.T) { + result := mergeConditions(item.old, item.new) + assert.True(t, conditionSliceEqualsIgnoreLastTransitionTime(item.expected, result)) + }) + } +} + +func TestUpdatePacketCaptureStatus(t *testing.T) { + tt := []struct { + name string + state *packetCaptureState + expectedStatus *crdv1alpha1.PacketCaptureStatus + }{ + { + name: "upload-error", + state: &packetCaptureState{ + capturedPacketsNum: 15, + targetCapturedPacketsNum: 15, + filePath: "/tmp/a.pcapng", + err: errors.New("failed to upload"), + }, + expectedStatus: &crdv1alpha1.PacketCaptureStatus{ + NumberCaptured: 15, + Conditions: []crdv1alpha1.PacketCaptureCondition{ + { + Type: crdv1alpha1.PacketCaptureCompleted, + Status: metav1.ConditionStatus(v1.ConditionTrue), + Reason: "Succeed", + }, + { + Type: crdv1alpha1.PacketCaptureFileUploaded, + Status: metav1.ConditionStatus(v1.ConditionFalse), + Reason: "UploadFailed", + Message: "failed to upload", + }, + }, + }, + }, + { + name: "running", + state: &packetCaptureState{ + capturedPacketsNum: 1, + targetCapturedPacketsNum: 15, + }, + expectedStatus: &crdv1alpha1.PacketCaptureStatus{ + NumberCaptured: 1, + Conditions: []crdv1alpha1.PacketCaptureCondition{ + { + Type: crdv1alpha1.PacketCaptureRunning, + Status: metav1.ConditionStatus(v1.ConditionTrue), + }, + }, + }, + }, + } + + objs := []runtime.Object{} + for _, item := range tt { + objs = append(objs, genTestCR(item.name, item.state.targetCapturedPacketsNum)) + } + + pcc := newFakePacketCaptureController(t, nil, objs) + stopCh := make(chan struct{}) + defer close(stopCh) + pcc.crdInformerFactory.Start(stopCh) + pcc.crdInformerFactory.WaitForCacheSync(stopCh) + pcc.informerFactory.Start(stopCh) + pcc.informerFactory.WaitForCacheSync(stopCh) + + for _, item := range tt { + t.Run(item.name, func(t *testing.T) { + err := pcc.updateStatus(context.Background(), item.name, item.state) + require.NoError(t, err) + result, err := pcc.crdClient.CrdV1alpha1().PacketCaptures().Get(context.TODO(), item.name, metav1.GetOptions{}) + require.NoError(t, err) + if !packetCaptureStatusEqual(*item.expectedStatus, result.Status) { + t.Errorf("updated status don't match: %+v %+v", *item.expectedStatus, result.Status) + } + }) + } + +} diff --git a/pkg/agent/supportbundlecollection/support_bundle_controller_test.go b/pkg/agent/supportbundlecollection/support_bundle_controller_test.go index c9ce97a0d61..5589d189fbc 100644 --- a/pkg/agent/supportbundlecollection/support_bundle_controller_test.go +++ b/pkg/agent/supportbundlecollection/support_bundle_controller_test.go @@ -91,7 +91,7 @@ func TestSupportBundleCollectionAdd(t *testing.T) { supportBundleCollection: generateSupportbundleCollection("supportBundle3", "https://10.220.175.92:22/root/supportbundle"), expectedCompleted: false, agentDumper: &mockAgentDumper{}, - uploader: &testUploader{}, + uploader: &testFailedUploader{}, }, { name: "Add SupportBundleCollection with retry logics", diff --git a/pkg/apiserver/handlers/featuregates/handler_test.go b/pkg/apiserver/handlers/featuregates/handler_test.go index 401a14d9562..54ec43c13a4 100644 --- a/pkg/apiserver/handlers/featuregates/handler_test.go +++ b/pkg/apiserver/handlers/featuregates/handler_test.go @@ -73,6 +73,7 @@ func Test_getGatesResponse(t *testing.T) { {Component: "agent", Name: "NodeLatencyMonitor", Status: "Disabled", Version: "ALPHA"}, {Component: "agent", Name: "NodeNetworkPolicy", Status: "Disabled", Version: "ALPHA"}, {Component: "agent", Name: "NodePortLocal", Status: "Enabled", Version: "GA"}, + {Component: "agent", Name: "PacketCapture", Status: "Disabled", Version: "ALPHA"}, {Component: "agent", Name: "SecondaryNetwork", Status: "Disabled", Version: "ALPHA"}, {Component: "agent", Name: "ServiceExternalIP", Status: "Disabled", Version: "ALPHA"}, {Component: "agent", Name: "ServiceTrafficDistribution", Status: "Enabled", Version: "BETA"}, diff --git a/pkg/features/antrea_features.go b/pkg/features/antrea_features.go index 8dc612a9340..0c8086f4c8b 100644 --- a/pkg/features/antrea_features.go +++ b/pkg/features/antrea_features.go @@ -73,6 +73,10 @@ const ( // Allows to trace path from a generated packet. Traceflow featuregate.Feature = "Traceflow" + // alpha: v2.2 + // Allows to capture packets for a flow. + PacketCapture featuregate.Feature = "PacketCapture" + // alpha: v0.9 // Flow exporter exports IPFIX flow records of Antrea flows seen in conntrack module. FlowExporter featuregate.Feature = "FlowExporter" @@ -196,6 +200,7 @@ var ( ServiceTrafficDistribution: {Default: true, PreRelease: featuregate.Beta}, CleanupStaleUDPSvcConntrack: {Default: true, PreRelease: featuregate.Beta}, Traceflow: {Default: true, PreRelease: featuregate.Beta}, + PacketCapture: {Default: false, PreRelease: featuregate.Alpha}, AntreaIPAM: {Default: false, PreRelease: featuregate.Alpha}, FlowExporter: {Default: false, PreRelease: featuregate.Alpha}, NetworkPolicyStats: {Default: true, PreRelease: featuregate.Beta}, @@ -244,6 +249,7 @@ var ( SupportBundleCollection, TopologyAwareHints, Traceflow, + PacketCapture, TrafficControl, EgressTrafficShaping, EgressSeparateSubnet, @@ -301,6 +307,7 @@ var ( NodeNetworkPolicy: {}, L7FlowExporter: {}, NodeLatencyMonitor: {}, + PacketCapture: {}, } // supportedFeaturesOnExternalNode records the features supported on an external // Node. Antrea Agent checks the enabled features if it is running on an diff --git a/test/e2e/packetcapture_test.go b/test/e2e/packetcapture_test.go new file mode 100644 index 00000000000..84b1c44af27 --- /dev/null +++ b/test/e2e/packetcapture_test.go @@ -0,0 +1,728 @@ +// Copyright 2024 Antrea Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package e2e + +import ( + "context" + "fmt" + "net" + "sort" + "strings" + "testing" + "time" + + "github.com/stretchr/testify/require" + appsv1 "k8s.io/api/apps/v1" + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/conversion" + "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/apimachinery/pkg/util/wait" + + crdv1alpha1 "antrea.io/antrea/pkg/apis/crd/v1alpha1" + agentconfig "antrea.io/antrea/pkg/config/agent" + "antrea.io/antrea/pkg/features" +) + +var ( + pcSecretNamespace = "kube-system" + // #nosec G101 + pcSecretName = "antrea-packetcapture-fileserver-auth" + tcpServerPodName = "tcp-server" + pcToolboxPodName = "toolbox" + udpServerPodName = "udp-server" + nonExistPodName = "non-existing-pod" + + tcpProto = intstr.FromString("TCP") + icmpProto = intstr.FromString("ICMP") + udpProto = intstr.FromString("UDP") + + testServerPort int32 = 80 + testNonExistPort int32 = 8085 + + pcTimeoutReason = "PacketCapture timeout" + pcShortTimeout = uint16(5) +) + +type pcTestCase struct { + name string + pc *crdv1alpha1.PacketCapture + expectedStatus crdv1alpha1.PacketCaptureStatus + + // required IP version, skip if not match. + ipVersion int + // Source Pod to run ping for live-traffic PacketCapture. + srcPod string +} + +func genSFTPService() *v1.Service { + selector := map[string]string{"app": "sftp"} + return &v1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: "sftp", + Labels: selector, + }, + Spec: v1.ServiceSpec{ + Type: v1.ServiceTypeNodePort, + Selector: selector, + Ports: []v1.ServicePort{ + { + Port: 22, + TargetPort: intstr.FromInt32(22), + NodePort: 30010, + }, + }, + }, + } +} + +func genSFTPDeployment() *appsv1.Deployment { + replicas := int32(1) + selector := map[string]string{"app": "sftp"} + return &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: "sftp", + Labels: selector, + }, + Spec: appsv1.DeploymentSpec{ + Replicas: &replicas, + Selector: &metav1.LabelSelector{ + MatchLabels: selector, + }, + Template: v1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Name: "sftp", + Labels: selector, + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "sftp", + Image: "antrea/sftp", + ImagePullPolicy: v1.PullIfNotPresent, + Args: []string{"foo:pass:::upload"}, + }, + }, + }, + }, + }, + } +} + +func createUDPServerPod(name string, ns string, portNum int32, serverNode string) error { + port := v1.ContainerPort{Name: fmt.Sprintf("port-%d", portNum), ContainerPort: portNum} + return NewPodBuilder(name, ns, agnhostImage). + OnNode(serverNode). + WithContainerName("agnhost"). + WithArgs([]string{"serve-hostname", "--udp", "--http=false", "--port", fmt.Sprint(portNum)}). + WithPorts([]v1.ContainerPort{port}). + Create(testData) +} + +// TestPacketCapture is the top-level test which contains all subtests for +// PacketCapture related test cases, so they can share setup, teardown. +func TestPacketCapture(t *testing.T) { + data, err := setupTest(t) + if err != nil { + t.Fatalf("Error when setting up test: %v", err) + } + defer teardownTest(t, data) + + var previousAgentPacketCaptureEnableState bool + ac := func(config *agentconfig.AgentConfig) { + previousAgentPacketCaptureEnableState = config.FeatureGates[string(features.PacketCapture)] + config.FeatureGates[string(features.PacketCapture)] = true + } + if err := data.mutateAntreaConfigMap(nil, ac, false, true); err != nil { + t.Fatalf("Failed to enable PacketCapture flag: %v", err) + } + defer func() { + ac := func(config *agentconfig.AgentConfig) { + config.FeatureGates[string(features.PacketCapture)] = previousAgentPacketCaptureEnableState + } + if err := data.mutateAntreaConfigMap(nil, ac, false, true); err != nil { + t.Errorf("Failed to disable PacketCapture flag: %v", err) + } + }() + + // setup sftp server for test. + secretUserName := "foo" + secretPassword := "pass" + _, err = data.clientset.AppsV1().Deployments(data.testNamespace).Create(context.TODO(), genSFTPDeployment(), metav1.CreateOptions{}) + require.NoError(t, err) + _, err = data.clientset.CoreV1().Services(data.testNamespace).Create(context.TODO(), genSFTPService(), metav1.CreateOptions{}) + require.NoError(t, err) + failOnError(data.waitForDeploymentReady(t, data.testNamespace, "sftp", defaultTimeout), t) + + sec := &v1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: pcSecretName, + Namespace: pcSecretNamespace, + }, + Data: map[string][]byte{ + "username": []byte(secretUserName), + "password": []byte(secretPassword), + }, + } + _, err = data.clientset.CoreV1().Secrets(pcSecretNamespace).Create(context.TODO(), sec, metav1.CreateOptions{}) + require.NoError(t, err) + defer data.clientset.CoreV1().Secrets(pcSecretNamespace).Delete(context.TODO(), pcSecretName, metav1.DeleteOptions{}) + + t.Run("testPacketCaptureBasic", func(t *testing.T) { + testPacketCaptureBasic(t, data) + }) + t.Run("testPacketCapture", func(t *testing.T) { + testPacketCapture(t, data) + }) + +} + +func testPacketCapture(t *testing.T, data *TestData) { + nodeIdx := 0 + if len(clusterInfo.windowsNodes) != 0 { + nodeIdx = clusterInfo.windowsNodes[0] + } + node1 := nodeName(nodeIdx) + + err := data.createServerPodWithLabels(tcpServerPodName, data.testNamespace, serverPodPort, nil) + require.NoError(t, err) + err = data.createToolboxPodOnNode(pcToolboxPodName, data.testNamespace, node1, false) + require.NoError(t, err) + + podIPs := waitForPodIPs(t, data, []PodInfo{ + {tcpServerPodName, getOSString(), "", data.testNamespace}, + {pcToolboxPodName, getOSString(), "", data.testNamespace}, + }) + + // Give a little time for Windows containerd Nodes to set up OVS. + // Containerd configures port asynchronously, which could cause execution time of installing flow longer than docker. + time.Sleep(time.Second * 1) + + tcpServerPodIP := podIPs[tcpServerPodName].IPv4.String() + + testcases := []pcTestCase{ + { + name: "timeout-case", + ipVersion: 4, + srcPod: pcToolboxPodName, + pc: &crdv1alpha1.PacketCapture{ + ObjectMeta: metav1.ObjectMeta{ + Name: randName(fmt.Sprintf("%s-timeout-case-", data.testNamespace)), + }, + Spec: crdv1alpha1.PacketCaptureSpec{ + Timeout: &pcShortTimeout, + Source: crdv1alpha1.Source{ + Pod: &crdv1alpha1.PodReference{ + Namespace: data.testNamespace, + Name: pcToolboxPodName, + }, + }, + Destination: crdv1alpha1.Destination{ + IP: &tcpServerPodIP, + }, + CaptureConfig: crdv1alpha1.CaptureConfig{ + FirstN: &crdv1alpha1.PacketCaptureFirstNConfig{ + Number: 500, + }, + }, + FileServer: &crdv1alpha1.PacketCaptureFileServer{ + URL: fmt.Sprintf("sftp://%s:30010/upload", controlPlaneNodeIPv4()), + }, + Packet: &crdv1alpha1.Packet{ + Protocol: &tcpProto, + IPFamily: v1.IPv4Protocol, + TransportHeader: crdv1alpha1.TransportHeader{ + TCP: &crdv1alpha1.TCPHeader{ + DstPort: &testNonExistPort, + }, + }, + }, + }, + }, + expectedStatus: crdv1alpha1.PacketCaptureStatus{ + Conditions: []crdv1alpha1.PacketCaptureCondition{ + { + Type: crdv1alpha1.PacketCaptureCompleted, + Status: metav1.ConditionStatus(v1.ConditionTrue), + LastTransitionTime: metav1.Now(), + Reason: "Timeout", + Message: "context deadline exceeded", + }, + }, + }, + }, + { + + name: nonExistPodName, + ipVersion: 4, + srcPod: pcToolboxPodName, + pc: &crdv1alpha1.PacketCapture{ + ObjectMeta: metav1.ObjectMeta{ + Name: randName(fmt.Sprintf("%s-%s-", data.testNamespace, nonExistPodName)), + }, + Spec: crdv1alpha1.PacketCaptureSpec{ + Source: crdv1alpha1.Source{ + Pod: &crdv1alpha1.PodReference{ + Namespace: data.testNamespace, + Name: pcToolboxPodName, + }, + }, + Destination: crdv1alpha1.Destination{ + Pod: &crdv1alpha1.PodReference{ + Namespace: data.testNamespace, + Name: nonExistPodName, + }, + }, + CaptureConfig: crdv1alpha1.CaptureConfig{ + FirstN: &crdv1alpha1.PacketCaptureFirstNConfig{ + Number: 5, + }, + }, + FileServer: &crdv1alpha1.PacketCaptureFileServer{ + URL: fmt.Sprintf("sftp://%s:30010/upload", controlPlaneNodeIPv4()), + }, + }, + }, + + expectedStatus: crdv1alpha1.PacketCaptureStatus{ + Conditions: []crdv1alpha1.PacketCaptureCondition{ + { + Type: crdv1alpha1.PacketCaptureCompleted, + Status: metav1.ConditionStatus(v1.ConditionFalse), + LastTransitionTime: metav1.Now(), + Reason: "CaptureFailed", + Message: fmt.Sprintf("failed to get Pod %s/%s: pods \"%s\" not found", data.testNamespace, nonExistPodName, nonExistPodName), + }, + }, + }, + }, + } + t.Run("testPacketCapture", func(t *testing.T) { + for _, tc := range testcases { + tc := tc + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + runPacketCaptureTest(t, data, tc) + }) + } + }) +} + +// testPacketCaptureTCP verifies if PacketCapture can capture tcp packets. this function only contains basic +// cases with pod-to-pod. +func testPacketCaptureBasic(t *testing.T, data *TestData) { + nodeIdx := 0 + if len(clusterInfo.windowsNodes) != 0 { + nodeIdx = clusterInfo.windowsNodes[0] + } + node1 := nodeName(nodeIdx) + + err := createUDPServerPod(udpServerPodName, data.testNamespace, serverPodPort, node1) + defer data.DeletePodAndWait(defaultTimeout, udpServerPodName, data.testNamespace) + require.NoError(t, err) + // test tcp server pod + err = data.createServerPodWithLabels(tcpServerPodName, data.testNamespace, serverPodPort, nil) + defer data.DeletePodAndWait(defaultTimeout, tcpServerPodName, data.testNamespace) + require.NoError(t, err) + err = data.createToolboxPodOnNode(pcToolboxPodName, data.testNamespace, node1, false) + defer data.DeletePodAndWait(defaultTimeout, pcToolboxPodName, data.testNamespace) + require.NoError(t, err) + + testcases := []pcTestCase{ + { + name: "ipv4-tcp", + ipVersion: 4, + srcPod: pcToolboxPodName, + pc: &crdv1alpha1.PacketCapture{ + ObjectMeta: metav1.ObjectMeta{ + Name: randName(fmt.Sprintf("%s-ipv4-tcp-", data.testNamespace)), + }, + Spec: crdv1alpha1.PacketCaptureSpec{ + Source: crdv1alpha1.Source{ + Pod: &crdv1alpha1.PodReference{ + Namespace: data.testNamespace, + Name: pcToolboxPodName, + }, + }, + Destination: crdv1alpha1.Destination{ + Pod: &crdv1alpha1.PodReference{ + Namespace: data.testNamespace, + Name: tcpServerPodName, + }, + }, + CaptureConfig: crdv1alpha1.CaptureConfig{ + FirstN: &crdv1alpha1.PacketCaptureFirstNConfig{ + Number: 5, + }, + }, + FileServer: &crdv1alpha1.PacketCaptureFileServer{ + URL: fmt.Sprintf("sftp://%s:30010/upload", controlPlaneNodeIPv4()), + }, + Packet: &crdv1alpha1.Packet{ + Protocol: &tcpProto, + IPFamily: v1.IPv4Protocol, + TransportHeader: crdv1alpha1.TransportHeader{ + TCP: &crdv1alpha1.TCPHeader{ + DstPort: &testServerPort, + }, + }, + }, + }, + }, + expectedStatus: crdv1alpha1.PacketCaptureStatus{ + NumberCaptured: 5, + Conditions: []crdv1alpha1.PacketCaptureCondition{ + { + Type: crdv1alpha1.PacketCaptureCompleted, + Status: metav1.ConditionStatus(v1.ConditionTrue), + LastTransitionTime: metav1.Now(), + Reason: "Succeed", + }, + { + Type: crdv1alpha1.PacketCaptureFileUploaded, + Status: metav1.ConditionStatus(v1.ConditionTrue), + LastTransitionTime: metav1.Now(), + Reason: "Succeed", + }, + }, + }, + }, + { + name: "ipv4-udp", + ipVersion: 4, + srcPod: pcToolboxPodName, + pc: &crdv1alpha1.PacketCapture{ + ObjectMeta: metav1.ObjectMeta{ + Name: randName(fmt.Sprintf("%s-ipv4-udp-", data.testNamespace)), + }, + Spec: crdv1alpha1.PacketCaptureSpec{ + Source: crdv1alpha1.Source{ + Pod: &crdv1alpha1.PodReference{ + Namespace: data.testNamespace, + Name: pcToolboxPodName, + }, + }, + Destination: crdv1alpha1.Destination{ + Pod: &crdv1alpha1.PodReference{ + Namespace: data.testNamespace, + Name: udpServerPodName, + }, + }, + CaptureConfig: crdv1alpha1.CaptureConfig{ + FirstN: &crdv1alpha1.PacketCaptureFirstNConfig{ + Number: 5, + }, + }, + FileServer: &crdv1alpha1.PacketCaptureFileServer{ + URL: fmt.Sprintf("sftp://%s:30010/upload", controlPlaneNodeIPv4()), + }, + Packet: &crdv1alpha1.Packet{ + Protocol: &udpProto, + IPFamily: v1.IPv4Protocol, + TransportHeader: crdv1alpha1.TransportHeader{ + UDP: &crdv1alpha1.UDPHeader{ + DstPort: &testServerPort, + }, + }, + }, + }, + }, + expectedStatus: crdv1alpha1.PacketCaptureStatus{ + NumberCaptured: 5, + Conditions: []crdv1alpha1.PacketCaptureCondition{ + { + Type: crdv1alpha1.PacketCaptureCompleted, + Status: metav1.ConditionStatus(v1.ConditionTrue), + LastTransitionTime: metav1.Now(), + Reason: "Succeed", + }, + { + Type: crdv1alpha1.PacketCaptureFileUploaded, + Status: metav1.ConditionStatus(v1.ConditionTrue), + LastTransitionTime: metav1.Now(), + Reason: "Succeed", + }, + }, + }, + }, + { + name: "ipv4-icmp", + ipVersion: 4, + srcPod: pcToolboxPodName, + pc: &crdv1alpha1.PacketCapture{ + ObjectMeta: metav1.ObjectMeta{ + Name: randName(fmt.Sprintf("%s-ipv4-icmp-", data.testNamespace)), + }, + Spec: crdv1alpha1.PacketCaptureSpec{ + Source: crdv1alpha1.Source{ + Pod: &crdv1alpha1.PodReference{ + Namespace: data.testNamespace, + Name: pcToolboxPodName, + }, + }, + Destination: crdv1alpha1.Destination{ + Pod: &crdv1alpha1.PodReference{ + Namespace: data.testNamespace, + Name: tcpServerPodName, + }, + }, + CaptureConfig: crdv1alpha1.CaptureConfig{ + FirstN: &crdv1alpha1.PacketCaptureFirstNConfig{ + Number: 5, + }, + }, + FileServer: &crdv1alpha1.PacketCaptureFileServer{ + URL: fmt.Sprintf("sftp://%s:30010/upload", controlPlaneNodeIPv4()), + }, + Packet: &crdv1alpha1.Packet{ + Protocol: &icmpProto, + IPFamily: v1.IPv4Protocol, + }, + }, + }, + expectedStatus: crdv1alpha1.PacketCaptureStatus{ + NumberCaptured: 5, + Conditions: []crdv1alpha1.PacketCaptureCondition{ + { + Type: crdv1alpha1.PacketCaptureCompleted, + Status: metav1.ConditionStatus(v1.ConditionTrue), + LastTransitionTime: metav1.Now(), + Reason: "Succeed", + }, + { + Type: crdv1alpha1.PacketCaptureFileUploaded, + Status: metav1.ConditionStatus(v1.ConditionTrue), + LastTransitionTime: metav1.Now(), + Reason: "Succeed", + }, + }, + }, + }, + } + t.Run("testPacketCaptureBasic", func(t *testing.T) { + for _, tc := range testcases { + tc := tc + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + runPacketCaptureTest(t, data, tc) + }) + } + }) +} + +func getOSString() string { + if len(clusterInfo.windowsNodes) != 0 { + return "windows" + } else { + return "linux" + } +} + +func runPacketCaptureTest(t *testing.T, data *TestData, tc pcTestCase) { + switch tc.ipVersion { + case 4: + skipIfNotIPv4Cluster(t) + case 6: + skipIfNotIPv6Cluster(t) + } + // wait for toolbox + waitForPodIPs(t, data, []PodInfo{{pcToolboxPodName, getOSString(), "", data.testNamespace}}) + + dstPodName := "" + if tc.pc.Spec.Destination.Pod != nil { + dstPodName = tc.pc.Spec.Destination.Pod.Name + } + var dstPodIPs *PodIPs + if dstPodName != nonExistPodName && dstPodName != "" { + // wait for pods to be ready first + podIPs := waitForPodIPs(t, data, []PodInfo{{dstPodName, getOSString(), "", data.testNamespace}}) + dstPodIPs = podIPs[dstPodName] + } + + if _, err := data.crdClient.CrdV1alpha1().PacketCaptures().Create(context.TODO(), tc.pc, metav1.CreateOptions{}); err != nil { + t.Fatalf("Error when creating PacketCapture: %v", err) + } + defer func() { + if err := data.crdClient.CrdV1alpha1().PacketCaptures().Delete(context.TODO(), tc.pc.Name, metav1.DeleteOptions{}); err != nil { + t.Errorf("Error when deleting PacketCapture: %v", err) + } + }() + + if dstPodName != nonExistPodName && tc.expectedStatus.Conditions[0].Message != pcTimeoutReason { + srcPod := tc.srcPod + if dstIP := tc.pc.Spec.Destination.IP; dstIP != nil { + ip := net.ParseIP(*dstIP) + if ip.To4() != nil { + dstPodIPs = &PodIPs{IPv4: &ip} + } else { + dstPodIPs = &PodIPs{IPv6: &ip} + } + } + time.Sleep(time.Second * 2) + protocol := *tc.pc.Spec.Packet.Protocol + server := dstPodIPs.IPv4.String() + if tc.ipVersion == 6 { + server = dstPodIPs.IPv6.String() + } + // wait for CR running. + + _, err := data.waitForPacketCapture(t, tc.pc.Name, 0, isPacketCaptureRunning) + if err != nil { + t.Fatalf("Error: Waiting PacketCapture to Running failed: %v", err) + } + // Send an ICMP echo packet from the source Pod to the destination. + if protocol == icmpProto { + if err := data.RunPingCommandFromTestPod(PodInfo{srcPod, getOSString(), "", data.testNamespace}, + data.testNamespace, dstPodIPs, toolboxContainerName, 10, 0, false); err != nil { + t.Logf("Ping(%s) '%s' -> '%v' failed: ERROR (%v)", protocol.StrVal, srcPod, *dstPodIPs, err) + } + } else if protocol == tcpProto { + for i := 1; i <= 10; i++ { + if err := data.runNetcatCommandFromTestPodWithProtocol(tc.srcPod, data.testNamespace, toolboxContainerName, server, serverPodPort, "tcp"); err != nil { + t.Logf("Netcat(TCP) '%s' -> '%v' failed: ERROR (%v)", srcPod, server, err) + } + } + } else if protocol == udpProto { + for i := 1; i <= 10; i++ { + if err := data.runNetcatCommandFromTestPodWithProtocol(tc.srcPod, data.testNamespace, toolboxContainerName, server, serverPodPort, "udp"); err != nil { + t.Logf("Netcat(UDP) '%s' -> '%v' failed: ERROR (%v)", srcPod, server, err) + } + } + } + } + + timeout := tc.pc.Spec.Timeout + if timeout == nil { + tv := uint16(15) + timeout = &tv + } + + if strings.Contains(tc.name, "timeout") { + // wait more for status update. + tv := *timeout + uint16(10) + timeout = &tv + } + + pc, err := data.waitForPacketCapture(t, tc.pc.Name, int(*timeout), isPacketCaptureReady) + if err != nil { + t.Fatalf("Error: Get PacketCapture failed: %v", err) + } + tc.expectedStatus.FilePath = pc.Status.FilePath + + // remove pending condition as it's random + newCond := []crdv1alpha1.PacketCaptureCondition{} + for _, cond := range pc.Status.Conditions { + if cond.Type == crdv1alpha1.PacketCapturePending || cond.Type == crdv1alpha1.PacketCaptureRunning { + continue + } + newCond = append(newCond, cond) + } + pc.Status.Conditions = newCond + if !packetCaptureStatusEqual(pc.Status, tc.expectedStatus) { + t.Errorf("CR status not match, actual: %+v, expected: %+v", pc.Status, tc.expectedStatus) + } +} + +func (data *TestData) waitForPacketCapture(t *testing.T, name string, specTimeout int, fn func(*crdv1alpha1.PacketCapture) bool) (*crdv1alpha1.PacketCapture, error) { + var pc *crdv1alpha1.PacketCapture + var err error + var timeout = time.Duration(60) * time.Second + if specTimeout > 0 { + timeout = time.Duration(specTimeout) * time.Second + } + if err = wait.PollUntilContextTimeout(context.Background(), defaultInterval, timeout, true, func(ctx context.Context) (bool, error) { + pc, err = data.crdClient.CrdV1alpha1().PacketCaptures().Get(ctx, name, metav1.GetOptions{}) + if err != nil { + return false, nil + } + if fn(pc) { + return true, nil + } + return false, nil + + }); err != nil { + if pc != nil { + t.Errorf("Latest PacketCapture status: %s %+v", pc.Name, pc.Status) + } + return nil, err + } + return pc, nil +} + +func isPacketCaptureReady(pc *crdv1alpha1.PacketCapture) bool { + if len(pc.Status.Conditions) == 0 { + return false + } + + for _, cond := range pc.Status.Conditions { + if cond.Type == crdv1alpha1.PacketCaptureCompleted { + return true + } + } + return false + +} + +func isPacketCaptureRunning(pc *crdv1alpha1.PacketCapture) bool { + if len(pc.Status.Conditions) == 0 { + return false + } + + for _, cond := range pc.Status.Conditions { + if cond.Type == crdv1alpha1.PacketCaptureRunning && cond.Status == metav1.ConditionTrue { + return true + } + } + return false + +} + +func conditionEqualsIgnoreLastTransitionTime(a, b crdv1alpha1.PacketCaptureCondition) bool { + a1 := a + a1.LastTransitionTime = metav1.Date(2018, 1, 1, 0, 0, 0, 0, time.UTC) + b1 := b + b1.LastTransitionTime = metav1.Date(2018, 1, 1, 0, 0, 0, 0, time.UTC) + return a1 == b1 +} + +var semanticIgnoreLastTransitionTime = conversion.EqualitiesOrDie( + conditionSliceEqualsIgnoreLastTransitionTime, +) + +func packetCaptureStatusEqual(oldStatus, newStatus crdv1alpha1.PacketCaptureStatus) bool { + return semanticIgnoreLastTransitionTime.DeepEqual(oldStatus, newStatus) +} + +func conditionSliceEqualsIgnoreLastTransitionTime(as, bs []crdv1alpha1.PacketCaptureCondition) bool { + + sort.Slice(as, func(i, j int) bool { + return as[i].Type < as[j].Type + }) + sort.Slice(bs, func(i, j int) bool { + return bs[i].Type < bs[j].Type + }) + + if len(as) != len(bs) { + return false + } + for i := range as { + a := as[i] + b := bs[i] + if !conditionEqualsIgnoreLastTransitionTime(a, b) { + return false + } + } + return true +} From f674f00a8c987f82fb8a8aa245cbd805ea801398 Mon Sep 17 00:00:00 2001 From: Hang Yan Date: Sat, 9 Nov 2024 09:59:46 +0800 Subject: [PATCH 2/3] update Signed-off-by: Hang Yan --- build/charts/antrea/crds/packetcapture.yaml | 2 +- build/yamls/antrea-aks.yml | 2 +- build/yamls/antrea-crds.yml | 2 +- build/yamls/antrea-eks.yml | 2 +- build/yamls/antrea-gke.yml | 2 +- build/yamls/antrea-ipsec.yml | 2 +- build/yamls/antrea.yml | 2 +- pkg/agent/packetcapture/capture/bpf.go | 3 +- .../packetcapture/packetcapture_controller.go | 199 +++++----- .../packetcapture_controller_test.go | 353 ++++++++++-------- pkg/apis/crd/v1alpha1/types.go | 12 +- .../crd/v1alpha1/zz_generated.deepcopy.go | 2 +- test/e2e/packetcapture_test.go | 156 +++----- 13 files changed, 346 insertions(+), 393 deletions(-) diff --git a/build/charts/antrea/crds/packetcapture.yaml b/build/charts/antrea/crds/packetcapture.yaml index fec130517ed..013a878b241 100644 --- a/build/charts/antrea/crds/packetcapture.yaml +++ b/build/charts/antrea/crds/packetcapture.yaml @@ -160,7 +160,7 @@ spec: properties: url: type: string - pattern: 'sftps{0,1}:\/\/[\w-_./]+:\d+' + pattern: 'sftp:\/\/[\w-_./]+:\d+' status: type: object properties: diff --git a/build/yamls/antrea-aks.yml b/build/yamls/antrea-aks.yml index 082fa50066d..e30c8c27f9b 100644 --- a/build/yamls/antrea-aks.yml +++ b/build/yamls/antrea-aks.yml @@ -3060,7 +3060,7 @@ spec: properties: url: type: string - pattern: 'sftps{0,1}:\/\/[\w-_./]+:\d+' + pattern: 'sftp:\/\/[\w-_./]+:\d+' status: type: object properties: diff --git a/build/yamls/antrea-crds.yml b/build/yamls/antrea-crds.yml index 6dd3719121a..436f15f0ddf 100644 --- a/build/yamls/antrea-crds.yml +++ b/build/yamls/antrea-crds.yml @@ -3033,7 +3033,7 @@ spec: properties: url: type: string - pattern: 'sftps{0,1}:\/\/[\w-_./]+:\d+' + pattern: 'sftp:\/\/[\w-_./]+:\d+' status: type: object properties: diff --git a/build/yamls/antrea-eks.yml b/build/yamls/antrea-eks.yml index 2403b02cba3..32e4cd34bb7 100644 --- a/build/yamls/antrea-eks.yml +++ b/build/yamls/antrea-eks.yml @@ -3060,7 +3060,7 @@ spec: properties: url: type: string - pattern: 'sftps{0,1}:\/\/[\w-_./]+:\d+' + pattern: 'sftp:\/\/[\w-_./]+:\d+' status: type: object properties: diff --git a/build/yamls/antrea-gke.yml b/build/yamls/antrea-gke.yml index b7e77155fed..a1733ffd279 100644 --- a/build/yamls/antrea-gke.yml +++ b/build/yamls/antrea-gke.yml @@ -3060,7 +3060,7 @@ spec: properties: url: type: string - pattern: 'sftps{0,1}:\/\/[\w-_./]+:\d+' + pattern: 'sftp:\/\/[\w-_./]+:\d+' status: type: object properties: diff --git a/build/yamls/antrea-ipsec.yml b/build/yamls/antrea-ipsec.yml index 34ea345f0d4..92d831c2263 100644 --- a/build/yamls/antrea-ipsec.yml +++ b/build/yamls/antrea-ipsec.yml @@ -3060,7 +3060,7 @@ spec: properties: url: type: string - pattern: 'sftps{0,1}:\/\/[\w-_./]+:\d+' + pattern: 'sftp:\/\/[\w-_./]+:\d+' status: type: object properties: diff --git a/build/yamls/antrea.yml b/build/yamls/antrea.yml index 30f54afb0f1..deea6342fdf 100644 --- a/build/yamls/antrea.yml +++ b/build/yamls/antrea.yml @@ -3060,7 +3060,7 @@ spec: properties: url: type: string - pattern: 'sftps{0,1}:\/\/[\w-_./]+:\d+' + pattern: 'sftp:\/\/[\w-_./]+:\d+' status: type: object properties: diff --git a/pkg/agent/packetcapture/capture/bpf.go b/pkg/agent/packetcapture/capture/bpf.go index 65da6b1646b..841b721a25c 100644 --- a/pkg/agent/packetcapture/capture/bpf.go +++ b/pkg/agent/packetcapture/capture/bpf.go @@ -17,6 +17,7 @@ package capture import ( "encoding/binary" "net" + "strings" "golang.org/x/net/bpf" "k8s.io/apimachinery/pkg/util/intstr" @@ -92,7 +93,7 @@ func compilePacketFilter(packetSpec *crdv1alpha1.Packet, srcIP, dstIP net.IP) [] if packetSpec.Protocol.Type == intstr.Int { proto = uint32(packetSpec.Protocol.IntVal) } else { - proto = ProtocolMap[packetSpec.Protocol.StrVal] + proto = ProtocolMap[strings.ToUpper(packetSpec.Protocol.StrVal)] } inst = append(inst, loadIPv4Protocol) diff --git a/pkg/agent/packetcapture/packetcapture_controller.go b/pkg/agent/packetcapture/packetcapture_controller.go index a24fbda6531..bf9efa7a948 100644 --- a/pkg/agent/packetcapture/packetcapture_controller.go +++ b/pkg/agent/packetcapture/packetcapture_controller.go @@ -71,22 +71,12 @@ const ( minRetryDelay = 5 * time.Second maxRetryDelay = 60 * time.Second - defaultWorkers = 2 + defaultWorkers = 4 // defines how many capture request we can handle concurrently. waiting captures will be // marked as Pending until they can be processed. -<<<<<<< HEAD maxConcurrentCaptures = 16 - defaultTimeoutDuration = 60 * time.Second -======= - maxConcurrentCaptures = 16 - - contextTimeoutErrMsg = "context deadline exceeded" - defaultTimeoutDuration = 60 * time.Second - ->>>>>>> cc9ff24e1 (update) captureStatusUpdatePeriod = 10 * time.Second - // PacketCapture uses a dedicated Secret object to store authentication information for a file server. // #nosec G101 fileServerAuthSecretName = "antrea-packetcapture-fileserver-auth" @@ -95,9 +85,9 @@ const ( type packetCapturePhase string const ( - packetCapturePhasePending packetCapturePhase = "" - packetCapturePhaseRunning packetCapturePhase = "Running" - packetCapturePhaseCompleted packetCapturePhase = "Completed" + packetCapturePhasePending packetCapturePhase = "" + packetCapturePhaseStarted packetCapturePhase = "Started" + packetCapturePhaseComplete packetCapturePhase = "Complete" ) var ( @@ -106,20 +96,12 @@ var ( ) type packetCaptureState struct { - // name is the PacketCapture name. - name string // capturedPacketsNum records how many packets have been captured. Due to the RateLimiter, // this may not be the real-time data. capturedPacketsNum int32 // targetCapturedPacketsNum is the target number limit for a PacketCapture. When numCapturedPackets == targetCapturedPacketsNum, it means // the PacketCapture is done successfully. targetCapturedPacketsNum int32 - // updateRateLimiter controls the frequency of the updates to PacketCapture status. - updateRateLimiter *rate.Limiter - // pcapngFile is the file object for the packet file. - pcapngFile afero.File - // pcapngWriter is the writer for the packet file. - pcapngWriter *pcapgo.NgWriter // phase is the phase of the PacketCapture. phase packetCapturePhase // filePath is the final path shown in PacketCapture's status. @@ -295,43 +277,41 @@ func (c *Controller) syncPacketCapture(pcName string) error { return nil } - state := func() *packetCaptureState { + state := func() packetCaptureState { c.mutex.Lock() defer c.mutex.Unlock() state := c.captures[pcName] if state == nil { - state = &packetCaptureState{} + state = &packetCaptureState{targetCapturedPacketsNum: pc.Spec.CaptureConfig.FirstN.Number} c.captures[pcName] = state } phase := state.phase klog.InfoS("Syncing PacketCapture", "name", pcName, "phase", phase) if phase != packetCapturePhasePending { - return state + return *state } if c.numRunningCaptures >= maxConcurrentCaptures { err = fmt.Errorf("PacketCapture running count reach limit") } else { - timeout := defaultTimeoutDuration - if pc.Spec.Timeout != nil { - timeout = time.Duration(*pc.Spec.Timeout) * time.Second - } + // crd spec make sure it's not nil + timeout := time.Duration(*pc.Spec.Timeout) * time.Second ctx, cancel := context.WithTimeout(context.Background(), timeout) state.cancel = cancel - if err = c.startPacketCapture(ctx, pc, device); err != nil { - phase = packetCapturePhaseCompleted + if err = c.startPacketCapture(ctx, pc, state, device); err != nil { + phase = packetCapturePhaseComplete } else { - phase = packetCapturePhaseRunning + phase = packetCapturePhaseStarted c.numRunningCaptures += 1 } } state.phase = phase state.err = err c.captures[pcName] = state - return state + return *state }() - if updateErr := c.updateStatus(context.Background(), pcName, state); updateErr != nil { + if updateErr := c.updateStatus(context.Background(), pcName, &state); updateErr != nil { return fmt.Errorf("error when patching status: %w", updateErr) } return err @@ -398,43 +378,22 @@ func (c *Controller) getTargetCaptureDevice(pc *crdv1alpha1.PacketCapture) strin if len(podInterfaces) == 0 { return "" } -<<<<<<< HEAD -======= - ->>>>>>> 59f80d372 (1) return podInterfaces[0].InterfaceName } - - // startPacketCapture starts the capture on the target device. The actual capture process will be started // in a separated go routine. -func (c *Controller) startPacketCapture(ctx context.Context, pc *crdv1alpha1.PacketCapture, device string) error { - klog.V(4).InfoS("Started processing PacketCapture", "name", pc.Name) - pcState := c.captures[pc.Name] - pcState.name = pc.Name - srcIP, dstIp, err := c.parseIPs(ctx, pc) - if err != nil { - return err - } - klog.V(2).InfoS("Prepare capture on the current Node", "name", pc.Name, "device", device) - pcState.targetCapturedPacketsNum = pc.Spec.CaptureConfig.FirstN.Number - file, writer, err := getPacketFileAndWriter(pc.Name) - if err != nil { - return err - } - pcState.pcapngFile = file - pcState.pcapngWriter = writer - pcState.updateRateLimiter = rate.NewLimiter(rate.Every(captureStatusUpdatePeriod), 1) +func (c *Controller) startPacketCapture(ctx context.Context, pc *crdv1alpha1.PacketCapture, pcState *packetCaptureState, device string) error { + klog.V(2).InfoS("Started processing PacketCapture on the current Node", "name", pc.Name, "device", device) go func() { - captureErr := c.performCapture(ctx, pc, pcState, device, srcIP, dstIp) + captureErr := c.performCapture(ctx, pc, pcState, device) func() { c.mutex.Lock() defer c.mutex.Unlock() c.numRunningCaptures -= 1 state := c.captures[pc.Name] if state != nil { - state.phase = packetCapturePhaseCompleted + state.phase = packetCapturePhaseComplete state.err = captureErr } @@ -449,66 +408,68 @@ func (c *Controller) performCapture( pc *crdv1alpha1.PacketCapture, captureState *packetCaptureState, device string, - srcIP, dstIP net.IP, ) error { + srcIP, dstIP, err := c.parseIPs(ctx, pc) + if err != nil { + return err + } + pcapngFile, pcapngWriter, err := getPacketFileAndWriter(pc.Name) + if err != nil { + return err + } + updateRateLimiter := rate.NewLimiter(rate.Every(captureStatusUpdatePeriod), 1) packets, err := c.captureInterface.Capture(ctx, device, srcIP, dstIP, pc.Spec.Packet) if err != nil { klog.ErrorS(err, "Failed to start capture") return err } - klog.InfoS("Start capture packets", "name", pc.Name, "device", device) + klog.InfoS("Starting capture packets", "name", pc.Name, "device", device) for { select { case packet := <-packets: c.mutex.Lock() - if captureState.isCaptureSuccessful() { - c.mutex.Unlock() - return nil - } captureState.capturedPacketsNum++ + reachTarget := captureState.isCaptureSuccessful() + klog.V(5).InfoS("Captured packets count", "name", pc.Name, "count", captureState.capturedPacketsNum) c.mutex.Unlock() ci := gopacket.CaptureInfo{ Timestamp: time.Now(), CaptureLength: len(packet.Data()), Length: len(packet.Data()), } - err = captureState.pcapngWriter.WritePacket(ci, packet.Data()) + err = pcapngWriter.WritePacket(ci, packet.Data()) if err != nil { return fmt.Errorf("couldn't write packets: %w", err) } - klog.V(5).InfoS("Capture packets", "name", captureState.name, "count", - captureState.capturedPacketsNum, "len", ci.Length) - - c.mutex.Lock() - reachTarget := captureState.isCaptureSuccessful() - c.mutex.Unlock() - // use rate limiter to reduce the times we need to update status. - if reachTarget || captureState.updateRateLimiter.Allow() { - // if reach the target. flush the file and upload it. - if reachTarget { - path := env.GetPodName() + ":" + nameToPath(pc.Name) - statusPath := path - if err = captureState.pcapngWriter.Flush(); err != nil { - return err - } - if pc.Spec.FileServer != nil { - err = c.uploadPackets(ctx, pc, captureState.pcapngFile) - klog.V(4).InfoS("Upload captured packets", "name", pc.Name, "path", path) - statusPath = fmt.Sprintf("%s/%s.pcapng", pc.Spec.FileServer.URL, pc.Name) - } - c.mutex.Lock() - captureState.filePath = statusPath - c.mutex.Unlock() - if err != nil { - return err - } - if err := captureState.pcapngFile.Close(); err != nil { - klog.ErrorS(err, "Close pcapng file error", "name", pc.Name, "path", path) - } + klog.V(5).InfoS("Captured packet length", "name", pc.Name, "len", ci.Length) + + // if reach the target. flush the file and upload it. + if reachTarget { + path := env.GetPodName() + ":" + nameToPath(pc.Name) + statusPath := path + if err = pcapngWriter.Flush(); err != nil { + return err + } + if pc.Spec.FileServer != nil { + err = c.uploadPackets(ctx, pc, pcapngFile) + klog.V(4).InfoS("Upload captured packets", "name", pc.Name, "path", path) + statusPath = fmt.Sprintf("%s/%s.pcapng", pc.Spec.FileServer.URL, pc.Name) + } + c.mutex.Lock() + captureState.filePath = statusPath + c.mutex.Unlock() + if err != nil { + return err + } + if err := pcapngFile.Close(); err != nil { + klog.ErrorS(err, "Close pcapng file error", "name", pc.Name, "path", path) } - // report capture status. + return nil + } else if updateRateLimiter.Allow() { + // use rate limiter to reduce the times we need to update status. c.enqueuePacketCapture(pc) } + case <-ctx.Done(): return ctx.Err() } @@ -521,7 +482,7 @@ func (c *Controller) getPodIP(ctx context.Context, podRef *crdv1alpha1.PodRefere if len(podInterfaces) > 0 { podIP = podInterfaces[0].GetIPv4Addr() } else { - pod, err := c.kubeClient.CoreV1().Pods(podRef.Namespace).Get(context.TODO(), podRef.Name, metav1.GetOptions{}) + pod, err := c.kubeClient.CoreV1().Pods(podRef.Namespace).Get(ctx, podRef.Name, metav1.GetOptions{}) if err != nil { return nil, fmt.Errorf("failed to get Pod %s/%s: %w", podRef.Namespace, podRef.Name, err) } @@ -540,15 +501,21 @@ func (c *Controller) getPodIP(ctx context.Context, podRef *crdv1alpha1.PodRefere func (c *Controller) parseIPs(ctx context.Context, pc *crdv1alpha1.PacketCapture) (srcIP, dstIP net.IP, err error) { if pc.Spec.Source.Pod != nil { srcIP, err = c.getPodIP(ctx, pc.Spec.Source.Pod) + if err != nil { + return + } } else if pc.Spec.Source.IP != nil { srcIP = net.ParseIP(*pc.Spec.Source.IP) if srcIP == nil { err = fmt.Errorf("invalid source IP address: %s", *pc.Spec.Source.IP) + return } } - if pc.Spec.Destination.Pod != nil { dstIP, err = c.getPodIP(ctx, pc.Spec.Destination.Pod) + if err != nil { + return + } } else if pc.Spec.Destination.IP != nil { dstIP = net.ParseIP(*pc.Spec.Destination.IP) if dstIP == nil { @@ -608,7 +575,6 @@ func (c *Controller) updateStatus(ctx context.Context, name string, state *packe } conditions := []crdv1alpha1.PacketCaptureCondition{} t := metav1.Now() - c.mutex.Lock() updatedStatus := crdv1alpha1.PacketCaptureStatus{ NumberCaptured: state.capturedPacketsNum, FilePath: state.filePath, @@ -617,7 +583,7 @@ func (c *Controller) updateStatus(ctx context.Context, name string, state *packe if state.err != nil { updatedStatus.FilePath = "" conditions = append(conditions, crdv1alpha1.PacketCaptureCondition{ - Type: crdv1alpha1.PacketCaptureCompleted, + Type: crdv1alpha1.PacketCaptureComplete, Status: metav1.ConditionStatus(v1.ConditionFalse), LastTransitionTime: metav1.Now(), Reason: "CaptureFailed", @@ -627,17 +593,16 @@ func (c *Controller) updateStatus(ctx context.Context, name string, state *packe if errors.Is(state.err, context.DeadlineExceeded) { conditions = []crdv1alpha1.PacketCaptureCondition{ { - Type: crdv1alpha1.PacketCaptureCompleted, + Type: crdv1alpha1.PacketCaptureComplete, Status: metav1.ConditionStatus(v1.ConditionTrue), LastTransitionTime: t, Reason: "Timeout", - Message: state.err.Error(), }, } } else if state.isCaptureSuccessful() { conditions = []crdv1alpha1.PacketCaptureCondition{ { - Type: crdv1alpha1.PacketCaptureCompleted, + Type: crdv1alpha1.PacketCaptureComplete, Status: metav1.ConditionStatus(v1.ConditionTrue), LastTransitionTime: t, Reason: "Succeed", @@ -653,11 +618,22 @@ func (c *Controller) updateStatus(ctx context.Context, name string, state *packe Message: state.err.Error(), }) } + if state.phase == packetCapturePhasePending { + conditions = []crdv1alpha1.PacketCaptureCondition{ + { + Type: crdv1alpha1.PacketCaptureStarted, + Status: metav1.ConditionStatus(v1.ConditionFalse), + LastTransitionTime: t, + Reason: "StartFailed", + Message: state.err.Error(), + }, + } + } } else { if state.isCaptureSuccessful() { conditions = []crdv1alpha1.PacketCaptureCondition{ { - Type: crdv1alpha1.PacketCaptureCompleted, + Type: crdv1alpha1.PacketCaptureComplete, Status: metav1.ConditionStatus(v1.ConditionTrue), LastTransitionTime: t, Reason: "Succeed", @@ -671,22 +647,21 @@ func (c *Controller) updateStatus(ctx context.Context, name string, state *packe Reason: "Succeed", }) } - } else if state.phase == packetCapturePhaseRunning { + } else if state.phase == packetCapturePhaseStarted { conditions = append(conditions, crdv1alpha1.PacketCaptureCondition{ - Type: crdv1alpha1.PacketCaptureRunning, + Type: crdv1alpha1.PacketCaptureStarted, Status: metav1.ConditionStatus(v1.ConditionTrue), LastTransitionTime: t, }) - } else { + } else if state.phase == packetCapturePhasePending { conditions = append(conditions, crdv1alpha1.PacketCaptureCondition{ - Type: crdv1alpha1.PacketCaptureRunning, - Status: metav1.ConditionStatus(v1.ConditionTrue), + Type: crdv1alpha1.PacketCaptureStarted, + Status: metav1.ConditionStatus(v1.ConditionFalse), LastTransitionTime: t, }) } } - c.mutex.Unlock() updatedStatus.Conditions = conditions if retryErr := retry.RetryOnConflict(retry.DefaultRetry, func() error { @@ -703,10 +678,10 @@ func (c *Controller) updateStatus(ctx context.Context, name string, state *packe } toUpdate.Status = updatedStatus klog.V(2).InfoS("Updating PacketCapture", "name", name, "status", toUpdate.Status) - _, updateErr := c.crdClient.CrdV1alpha1().PacketCaptures().UpdateStatus(context.TODO(), toUpdate, metav1.UpdateOptions{}) + _, updateErr := c.crdClient.CrdV1alpha1().PacketCaptures().UpdateStatus(ctx, toUpdate, metav1.UpdateOptions{}) if updateErr != nil && apierrors.IsConflict(updateErr) { var getErr error - if toUpdate, getErr = c.crdClient.CrdV1alpha1().PacketCaptures().Get(context.TODO(), name, metav1.GetOptions{}); getErr != nil { + if toUpdate, getErr = c.crdClient.CrdV1alpha1().PacketCaptures().Get(ctx, name, metav1.GetOptions{}); getErr != nil { return getErr } } diff --git a/pkg/agent/packetcapture/packetcapture_controller_test.go b/pkg/agent/packetcapture/packetcapture_controller_test.go index 338b3255aa2..d34fa1fdc58 100644 --- a/pkg/agent/packetcapture/packetcapture_controller_test.go +++ b/pkg/agent/packetcapture/packetcapture_controller_test.go @@ -16,7 +16,6 @@ package packetcapture import ( "context" - "errors" "fmt" "io" "net" @@ -38,6 +37,8 @@ import ( "k8s.io/client-go/informers" "k8s.io/client-go/kubernetes" "k8s.io/client-go/kubernetes/fake" + "k8s.io/client-go/tools/cache" + "k8s.io/client-go/util/workqueue" "antrea.io/antrea/pkg/agent/interfacestore" "antrea.io/antrea/pkg/agent/util" @@ -50,15 +51,19 @@ import ( var ( pod1IPv4 = "192.168.10.10" pod2IPv4 = "192.168.11.10" + pod3IPv4 = "192.168.12.10" - ipv6 = "2001:db8::68" - pod1MAC, _ = net.ParseMAC("aa:bb:cc:dd:ee:0f") - pod2MAC, _ = net.ParseMAC("aa:bb:cc:dd:ee:00") - ofPortPod1 = uint32(1) - ofPortPod2 = uint32(2) + ipv6 = "2001:db8::68" + pod1MAC, _ = net.ParseMAC("aa:bb:cc:dd:ee:0f") + pod2MAC, _ = net.ParseMAC("aa:bb:cc:dd:ee:00") + ofPortPod1 = uint32(1) + ofPortPod2 = uint32(2) + testCaptureTimeout = uint32(1) + testCaptureNum int32 = 15 icmpProto = intstr.FromString("ICMP") invalidProto = intstr.FromString("INVALID") + testFTPUrl = "sftp://127.0.0.1:22/path" pod1 = v1.Pod{ ObjectMeta: metav1.ObjectMeta{ @@ -83,6 +88,13 @@ var ( Name: "pod-3", Namespace: "default", }, + Status: v1.PodStatus{ + PodIPs: []v1.PodIP{ + { + IP: pod3IPv4, + }, + }, + }, } secret1 = v1.Secret{ @@ -97,19 +109,6 @@ var ( } ) -func generateTestSecret() *v1.Secret { - return &v1.Secret{ - ObjectMeta: metav1.ObjectMeta{ - Name: "AAA", - Namespace: "default", - }, - Data: map[string][]byte{ - "username": []byte("AAA"), - "password": []byte("BBBCCC"), - }, - } -} - func genTestCR(name string, num int32) *crdv1alpha1.PacketCapture { result := &crdv1alpha1.PacketCapture{ ObjectMeta: metav1.ObjectMeta{Name: name, UID: types.UID(fmt.Sprintf("uid-%s", name))}, @@ -135,8 +134,9 @@ func genTestCR(name string, num int32) *crdv1alpha1.PacketCapture { Protocol: &icmpProto, }, FileServer: &crdv1alpha1.PacketCaptureFileServer{ - URL: "sftp://127.0.0.1:22/aaa", + URL: testFTPUrl, }, + Timeout: &testCaptureTimeout, }, } return result @@ -145,14 +145,18 @@ func genTestCR(name string, num int32) *crdv1alpha1.PacketCapture { type testUploader struct { url string fileName string + // for concurrent cases, no need to check + checkFileName bool } func (uploader *testUploader) Upload(url string, fileName string, config *ssh.ClientConfig, outputFile io.Reader) error { if url != uploader.url { return fmt.Errorf("expected url: %s for uploader, got: %s", uploader.url, url) } - if fileName != uploader.fileName { - return fmt.Errorf("expected filename: %s for uploader, got: %s", uploader.fileName, fileName) + if uploader.checkFileName { + if fileName != uploader.fileName { + return fmt.Errorf("expected filename: %s, got: %s ", uploader.fileName, fileName) + } } return nil } @@ -183,7 +187,7 @@ type testCapture struct { } func (p *testCapture) Capture(ctx context.Context, device string, srcIP, dstIP net.IP, packet *crdv1alpha1.Packet) (chan gopacket.Packet, error) { - ch := make(chan gopacket.Packet, 15) + ch := make(chan gopacket.Packet, testCaptureNum) for i := 0; i < 15; i++ { ch <- craftTestPacket() } @@ -201,7 +205,7 @@ type fakePacketCaptureController struct { func newFakePacketCaptureController(t *testing.T, runtimeObjects []runtime.Object, initObjects []runtime.Object) *fakePacketCaptureController { controller := gomock.NewController(t) - objs := append(runtimeObjects, &pod1, &pod2, &pod3, &secret1, generateTestSecret()) + objs := append(runtimeObjects, &pod1, &pod2, &pod3, &secret1) kubeClient := fake.NewSimpleClientset(objs...) crdClient := fakeversioned.NewSimpleClientset(initObjects...) crdInformerFactory := crdinformers.NewSharedInformerFactory(crdClient, 0) @@ -212,15 +216,34 @@ func newFakePacketCaptureController(t *testing.T, runtimeObjects []runtime.Objec addPodInterface(ifaceStore, pod1.Namespace, pod1.Name, []string{pod1IPv4, ipv6}, pod1MAC.String(), int32(ofPortPod1)) addPodInterface(ifaceStore, pod2.Namespace, pod2.Name, []string{pod2IPv4}, pod2MAC.String(), int32(ofPortPod2)) - pcController, _ := NewPacketCaptureController( - kubeClient, - crdClient, - packetCaptureInformer, - ifaceStore, - ) + // NewPacketCaptureController dont work on windows + pcController, err := NewPacketCaptureController(kubeClient, crdClient, packetCaptureInformer, ifaceStore) + if err != nil { + pcController = &Controller{ + kubeClient: kubeClient, + crdClient: crdClient, + packetCaptureInformer: packetCaptureInformer, + packetCaptureLister: packetCaptureInformer.Lister(), + packetCaptureSynced: packetCaptureInformer.Informer().HasSynced, + interfaceStore: ifaceStore, + captures: make(map[string]*packetCaptureState), + } + packetCaptureInformer.Informer().AddEventHandlerWithResyncPeriod(cache.ResourceEventHandlerFuncs{ + AddFunc: pcController.addPacketCapture, + UpdateFunc: pcController.updatePacketCapture, + DeleteFunc: pcController.deletePacketCapture, + }, resyncPeriod) + } + pcController.sftpUploader = &testUploader{} pcController.captureInterface = &testCapture{} + pcController.queue = workqueue.NewTypedRateLimitingQueueWithConfig( + workqueue.NewTypedItemExponentialFailureRateLimiter[string](time.Millisecond*50, time.Millisecond*200), + workqueue.TypedRateLimitingQueueConfig[string]{Name: "packetcapture"}, + ) + t.Setenv("POD_NAME", "antrea-agent") + t.Setenv("POD_NAMESPACE", "kube-system") return &fakePacketCaptureController{ Controller: pcController, kubeClient: kubeClient, @@ -247,20 +270,75 @@ func addPodInterface(ifaceStore interfacestore.InterfaceStore, podNamespace, pod }) } +func TestMultiplePacketCaptures(t *testing.T) { + defaultFS = afero.NewMemMapFs() + packetsDir := "/tmp/antrea/packetcapture/packets" + defaultFS.MkdirAll(packetsDir, 0755) + nameFunc := func(id int) string { + return fmt.Sprintf("pc-%d", id) + } + var objs []runtime.Object + for i := 0; i < 20; i++ { + objs = append(objs, genTestCR(nameFunc(i), int32(testCaptureNum))) + } + pcc := newFakePacketCaptureController(t, nil, objs) + pcc.sftpUploader = &testUploader{url: testFTPUrl} + stopCh := make(chan struct{}) + defer close(stopCh) + pcc.crdInformerFactory.Start(stopCh) + pcc.crdInformerFactory.WaitForCacheSync(stopCh) + pcc.informerFactory.Start(stopCh) + pcc.informerFactory.WaitForCacheSync(stopCh) + go pcc.Run(stopCh) + assert.Eventually(t, func() bool { + items, err := pcc.crdClient.CrdV1alpha1().PacketCaptures().List(context.Background(), metav1.ListOptions{}) + if err != nil { + return false + } + for _, result := range items.Items { + for _, cond := range result.Status.Conditions { + if cond.Type == crdv1alpha1.PacketCaptureComplete || cond.Type == crdv1alpha1.PacketCaptureFileUploaded { + if cond.Status == metav1.ConditionFalse { + return false + } + } + } + } + pcc.mutex.Lock() + if pcc.numRunningCaptures != 0 { + return false + } + pcc.mutex.Unlock() + return true + }, 5*time.Second, 50*time.Millisecond) + for i := 0; i < 20; i++ { + err := pcc.crdClient.CrdV1alpha1().PacketCaptures().Delete(context.TODO(), nameFunc(i), metav1.DeleteOptions{}) + require.NoError(t, err) + } + assert.Eventually(t, func() bool { + pcc.mutex.Lock() + if len(pcc.captures) != 0 { + return false + } + pcc.mutex.Unlock() + return true + }, 2*time.Second, 20*time.Millisecond) + +} + // TestPacketCaptureControllerRun was used to validate the whole run process is working. It doesn't wait for // the testing pc to finish. on sandbox env, no good solution to open raw socket. func TestPacketCaptureControllerRun(t *testing.T) { - // create test os - defaultFS = afero.NewMemMapFs() - defaultFS.MkdirAll("/tmp/antrea/packetcapture/packets", 0755) pcs := []struct { - name string - pc *crdv1alpha1.PacketCapture - expectConditionStatus metav1.ConditionStatus + name string + pc *crdv1alpha1.PacketCapture + expectCompleteStatus metav1.ConditionStatus + expectUploadStatus metav1.ConditionStatus }{ { - name: "start packetcapture", - expectConditionStatus: metav1.ConditionTrue, + name: "start packetcapture", + expectCompleteStatus: metav1.ConditionTrue, + expectUploadStatus: metav1.ConditionTrue, pc: &crdv1alpha1.PacketCapture{ ObjectMeta: metav1.ObjectMeta{Name: "pc1", UID: "uid1"}, Spec: crdv1alpha1.PacketCaptureSpec{ @@ -278,7 +356,7 @@ func TestPacketCaptureControllerRun(t *testing.T) { }, CaptureConfig: crdv1alpha1.CaptureConfig{ FirstN: &crdv1alpha1.PacketCaptureFirstNConfig{ - Number: 10, + Number: 15, }, }, Packet: &crdv1alpha1.Packet{ @@ -287,14 +365,50 @@ func TestPacketCaptureControllerRun(t *testing.T) { FileServer: &crdv1alpha1.PacketCaptureFileServer{ URL: "sftp://127.0.0.1:22/aaa", }, + Timeout: &testCaptureTimeout, }, }, }, { - name: "invalid proto", - expectConditionStatus: metav1.ConditionFalse, + name: "parse ip", + expectCompleteStatus: metav1.ConditionTrue, + expectUploadStatus: metav1.ConditionTrue, pc: &crdv1alpha1.PacketCapture{ ObjectMeta: metav1.ObjectMeta{Name: "pc2", UID: "uid2"}, + Spec: crdv1alpha1.PacketCaptureSpec{ + Source: crdv1alpha1.Source{ + Pod: &crdv1alpha1.PodReference{ + Namespace: pod1.Namespace, + Name: pod1.Name, + }, + }, + Destination: crdv1alpha1.Destination{ + Pod: &crdv1alpha1.PodReference{ + Namespace: pod3.Namespace, + Name: pod3.Name, + }, + }, + CaptureConfig: crdv1alpha1.CaptureConfig{ + FirstN: &crdv1alpha1.PacketCaptureFirstNConfig{ + Number: 15, + }, + }, + Packet: &crdv1alpha1.Packet{ + Protocol: &icmpProto, + }, + FileServer: &crdv1alpha1.PacketCaptureFileServer{ + URL: "sftp://127.0.0.1:22/aaa", + }, + Timeout: &testCaptureTimeout, + }, + }, + }, + { + name: "invalid proto", + expectCompleteStatus: metav1.ConditionFalse, + expectUploadStatus: metav1.ConditionFalse, + pc: &crdv1alpha1.PacketCapture{ + ObjectMeta: metav1.ObjectMeta{Name: "pc4", UID: "uid4"}, Spec: crdv1alpha1.PacketCaptureSpec{ Source: crdv1alpha1.Source{ Pod: &crdv1alpha1.PodReference{ @@ -310,7 +424,7 @@ func TestPacketCaptureControllerRun(t *testing.T) { }, CaptureConfig: crdv1alpha1.CaptureConfig{ FirstN: &crdv1alpha1.PacketCaptureFirstNConfig{ - Number: 10, + Number: 15, }, }, Packet: &crdv1alpha1.Packet{ @@ -319,14 +433,16 @@ func TestPacketCaptureControllerRun(t *testing.T) { FileServer: &crdv1alpha1.PacketCaptureFileServer{ URL: "sftp://127.0.0.1:22/aaa", }, + Timeout: &testCaptureTimeout, }, }, }, { - name: "timeout-case", - expectConditionStatus: metav1.ConditionFalse, + name: "upload failed", + expectCompleteStatus: metav1.ConditionTrue, + expectUploadStatus: metav1.ConditionFalse, pc: &crdv1alpha1.PacketCapture{ - ObjectMeta: metav1.ObjectMeta{Name: "pc3", UID: "uid3"}, + ObjectMeta: metav1.ObjectMeta{Name: "pc5", UID: "uid5"}, Spec: crdv1alpha1.PacketCaptureSpec{ Source: crdv1alpha1.Source{ Pod: &crdv1alpha1.PodReference{ @@ -342,15 +458,16 @@ func TestPacketCaptureControllerRun(t *testing.T) { }, CaptureConfig: crdv1alpha1.CaptureConfig{ FirstN: &crdv1alpha1.PacketCaptureFirstNConfig{ - Number: 100, + Number: 15, }, }, Packet: &crdv1alpha1.Packet{ Protocol: &icmpProto, }, FileServer: &crdv1alpha1.PacketCaptureFileServer{ - URL: "sftp://127.0.0.1:22/aaa", + URL: "sftp://127.0.0.1:22/aaa-invalid", }, + Timeout: &testCaptureTimeout, }, }, }, @@ -361,43 +478,43 @@ func TestPacketCaptureControllerRun(t *testing.T) { objs = append(objs, pc.pc) } pcc := newFakePacketCaptureController(t, nil, objs) + pcc.sftpUploader = &testUploader{url: "sftp://127.0.0.1:22/aaa"} stopCh := make(chan struct{}) defer close(stopCh) + defer defaultFS.Remove(packetDirectory) pcc.crdInformerFactory.Start(stopCh) pcc.crdInformerFactory.WaitForCacheSync(stopCh) pcc.informerFactory.Start(stopCh) pcc.informerFactory.WaitForCacheSync(stopCh) + go pcc.Run(stopCh) for _, item := range pcs { t.Run(item.name, func(t *testing.T) { - fileName := item.pc.Name + ".pcapng" - pcc.sftpUploader = &testUploader{fileName: fileName, url: "sftp://127.0.0.1:22/aaa"} + assert.Eventually(t, func() bool { + result, err := pcc.crdClient.CrdV1alpha1().PacketCaptures().Get(context.Background(), item.pc.Name, metav1.GetOptions{}) + if err != nil { + return false + } + for _, cond := range result.Status.Conditions { + if cond.Type == crdv1alpha1.PacketCaptureComplete && item.expectCompleteStatus != cond.Status { + return false + } + if cond.Type == crdv1alpha1.PacketCaptureFileUploaded && item.expectUploadStatus != cond.Status { + return false + } + } + if item.expectCompleteStatus == metav1.ConditionTrue { + if result.Status.NumberCaptured != testCaptureNum { + return false + } + } + // delete cr + err = pcc.crdClient.CrdV1alpha1().PacketCaptures().Delete(context.TODO(), item.pc.Name, metav1.DeleteOptions{}) + return err == nil + + }, 2*time.Second, 20*time.Millisecond) + stopCh <- struct{}{} }) - - go pcc.Run(stopCh) - time.Sleep(500 * time.Millisecond) - result, err := pcc.crdClient.CrdV1alpha1().PacketCaptures().Get(context.Background(), item.pc.Name, metav1.GetOptions{}) - assert.Nil(t, err) - for _, cond := range result.Status.Conditions { - if cond.Type == crdv1alpha1.PacketCaptureCompleted { - assert.Equal(t, item.expectConditionStatus, cond.Status) - } - if cond.Type == crdv1alpha1.PacketCaptureFileUploaded { - assert.Equal(t, item.expectConditionStatus, cond.Status) - } - } - - if item.expectConditionStatus == metav1.ConditionTrue { - assert.Equal(t, int32(10), result.Status.NumberCaptured) - assert.Equal(t, "sftp://127.0.0.1:22/aaa/pc1.pcapng", result.Status.FilePath) - } - - // delete cr - err = pcc.crdClient.CrdV1alpha1().PacketCaptures().Delete(context.TODO(), item.pc.Name, metav1.DeleteOptions{}) - require.NoError(t, err) - - stopCh <- struct{}{} } - } func TestMergeConditions(t *testing.T) { @@ -412,7 +529,7 @@ func TestMergeConditions(t *testing.T) { name: "use-old", new: []crdv1alpha1.PacketCaptureCondition{ { - Type: crdv1alpha1.PacketCaptureCompleted, + Type: crdv1alpha1.PacketCaptureComplete, LastTransitionTime: metav1.Now(), }, { @@ -422,13 +539,13 @@ func TestMergeConditions(t *testing.T) { }, old: []crdv1alpha1.PacketCaptureCondition{ { - Type: crdv1alpha1.PacketCaptureCompleted, + Type: crdv1alpha1.PacketCaptureComplete, LastTransitionTime: metav1.Now(), }, }, expected: []crdv1alpha1.PacketCaptureCondition{ { - Type: crdv1alpha1.PacketCaptureCompleted, + Type: crdv1alpha1.PacketCaptureComplete, LastTransitionTime: metav1.Now(), }, { @@ -441,7 +558,7 @@ func TestMergeConditions(t *testing.T) { name: "use-new", new: []crdv1alpha1.PacketCaptureCondition{ { - Type: crdv1alpha1.PacketCaptureCompleted, + Type: crdv1alpha1.PacketCaptureComplete, LastTransitionTime: metav1.Now(), Status: metav1.ConditionTrue, }, @@ -452,14 +569,14 @@ func TestMergeConditions(t *testing.T) { }, old: []crdv1alpha1.PacketCaptureCondition{ { - Type: crdv1alpha1.PacketCaptureCompleted, + Type: crdv1alpha1.PacketCaptureComplete, LastTransitionTime: metav1.Now(), Status: metav1.ConditionFalse, }, }, expected: []crdv1alpha1.PacketCaptureCondition{ { - Type: crdv1alpha1.PacketCaptureCompleted, + Type: crdv1alpha1.PacketCaptureComplete, LastTransitionTime: metav1.Now(), Status: metav1.ConditionTrue, }, @@ -478,79 +595,3 @@ func TestMergeConditions(t *testing.T) { }) } } - -func TestUpdatePacketCaptureStatus(t *testing.T) { - tt := []struct { - name string - state *packetCaptureState - expectedStatus *crdv1alpha1.PacketCaptureStatus - }{ - { - name: "upload-error", - state: &packetCaptureState{ - capturedPacketsNum: 15, - targetCapturedPacketsNum: 15, - filePath: "/tmp/a.pcapng", - err: errors.New("failed to upload"), - }, - expectedStatus: &crdv1alpha1.PacketCaptureStatus{ - NumberCaptured: 15, - Conditions: []crdv1alpha1.PacketCaptureCondition{ - { - Type: crdv1alpha1.PacketCaptureCompleted, - Status: metav1.ConditionStatus(v1.ConditionTrue), - Reason: "Succeed", - }, - { - Type: crdv1alpha1.PacketCaptureFileUploaded, - Status: metav1.ConditionStatus(v1.ConditionFalse), - Reason: "UploadFailed", - Message: "failed to upload", - }, - }, - }, - }, - { - name: "running", - state: &packetCaptureState{ - capturedPacketsNum: 1, - targetCapturedPacketsNum: 15, - }, - expectedStatus: &crdv1alpha1.PacketCaptureStatus{ - NumberCaptured: 1, - Conditions: []crdv1alpha1.PacketCaptureCondition{ - { - Type: crdv1alpha1.PacketCaptureRunning, - Status: metav1.ConditionStatus(v1.ConditionTrue), - }, - }, - }, - }, - } - - objs := []runtime.Object{} - for _, item := range tt { - objs = append(objs, genTestCR(item.name, item.state.targetCapturedPacketsNum)) - } - - pcc := newFakePacketCaptureController(t, nil, objs) - stopCh := make(chan struct{}) - defer close(stopCh) - pcc.crdInformerFactory.Start(stopCh) - pcc.crdInformerFactory.WaitForCacheSync(stopCh) - pcc.informerFactory.Start(stopCh) - pcc.informerFactory.WaitForCacheSync(stopCh) - - for _, item := range tt { - t.Run(item.name, func(t *testing.T) { - err := pcc.updateStatus(context.Background(), item.name, item.state) - require.NoError(t, err) - result, err := pcc.crdClient.CrdV1alpha1().PacketCaptures().Get(context.TODO(), item.name, metav1.GetOptions{}) - require.NoError(t, err) - if !packetCaptureStatusEqual(*item.expectedStatus, result.Status) { - t.Errorf("updated status don't match: %+v %+v", *item.expectedStatus, result.Status) - } - }) - } - -} diff --git a/pkg/apis/crd/v1alpha1/types.go b/pkg/apis/crd/v1alpha1/types.go index e40282c0349..80fc665d035 100644 --- a/pkg/apis/crd/v1alpha1/types.go +++ b/pkg/apis/crd/v1alpha1/types.go @@ -450,7 +450,7 @@ type PacketCaptureFileServer struct { type PacketCaptureSpec struct { // Timeout is the timeout for this capture session. If not specified, defaults to 60s. - Timeout *uint16 `json:"timeout,omitempty"` + Timeout *uint32 `json:"timeout,omitempty"` CaptureConfig CaptureConfig `json:"captureConfig"` // Source is the traffic source we want to perform capture on. Both `Source` and `Destination` is required // for a capture session, and at least one `Pod` should be present either in the source or the destination. @@ -480,13 +480,11 @@ type PacketCaptureStatus struct { type PacketCaptureConditionType string const ( - // PacketCapturePending means this request is still pending. - PacketCapturePending PacketCaptureConditionType = "PacketCapturePending" - // PacketCaptureRunning means antrea is processing this capture request. - PacketCaptureRunning PacketCaptureConditionType = "PacketCaptureRunning" - // PacketCaptureCompleted means enough packets have been captured and saved in an antrea-agent Pod locally already, but results haven't been + // PacketCaptureStarted means this request has been started. + PacketCaptureStarted PacketCaptureConditionType = "PacketCaptureStarted" + // PacketCaptureComplete means enough packets have been captured and saved in an antrea-agent Pod locally already, but results haven't been // uploaded yet (if a file server has been configured). - PacketCaptureCompleted PacketCaptureConditionType = "PacketCaptureCompleted" + PacketCaptureComplete PacketCaptureConditionType = "PacketCaptureComplete" // PacketCaptureFileUploaded means the captured packets file has been uploaded to the target file server. PacketCaptureFileUploaded PacketCaptureConditionType = "PacketCaptureFileUploaded" ) diff --git a/pkg/apis/crd/v1alpha1/zz_generated.deepcopy.go b/pkg/apis/crd/v1alpha1/zz_generated.deepcopy.go index dd90a610ed5..17b9f50d0f2 100644 --- a/pkg/apis/crd/v1alpha1/zz_generated.deepcopy.go +++ b/pkg/apis/crd/v1alpha1/zz_generated.deepcopy.go @@ -721,7 +721,7 @@ func (in *PacketCaptureSpec) DeepCopyInto(out *PacketCaptureSpec) { *out = *in if in.Timeout != nil { in, out := &in.Timeout, &out.Timeout - *out = new(uint16) + *out = new(uint32) **out = **in } in.CaptureConfig.DeepCopyInto(&out.CaptureConfig) diff --git a/test/e2e/packetcapture_test.go b/test/e2e/packetcapture_test.go index 84b1c44af27..c0d9e7025bb 100644 --- a/test/e2e/packetcapture_test.go +++ b/test/e2e/packetcapture_test.go @@ -32,30 +32,9 @@ import ( "k8s.io/apimachinery/pkg/util/wait" crdv1alpha1 "antrea.io/antrea/pkg/apis/crd/v1alpha1" - agentconfig "antrea.io/antrea/pkg/config/agent" "antrea.io/antrea/pkg/features" ) -var ( - pcSecretNamespace = "kube-system" - // #nosec G101 - pcSecretName = "antrea-packetcapture-fileserver-auth" - tcpServerPodName = "tcp-server" - pcToolboxPodName = "toolbox" - udpServerPodName = "udp-server" - nonExistPodName = "non-existing-pod" - - tcpProto = intstr.FromString("TCP") - icmpProto = intstr.FromString("ICMP") - udpProto = intstr.FromString("UDP") - - testServerPort int32 = 80 - testNonExistPort int32 = 8085 - - pcTimeoutReason = "PacketCapture timeout" - pcShortTimeout = uint16(5) -) - type pcTestCase struct { name string pc *crdv1alpha1.PacketCapture @@ -110,7 +89,7 @@ func genSFTPDeployment() *appsv1.Deployment { Containers: []v1.Container{ { Name: "sftp", - Image: "antrea/sftp", + Image: "ghcr.io/atmoz/sftp/debian:latest", ImagePullPolicy: v1.PullIfNotPresent, Args: []string{"foo:pass:::upload"}, }, @@ -134,32 +113,20 @@ func createUDPServerPod(name string, ns string, portNum int32, serverNode string // TestPacketCapture is the top-level test which contains all subtests for // PacketCapture related test cases, so they can share setup, teardown. func TestPacketCapture(t *testing.T) { + skipIfFeatureDisabled(t, features.PacketCapture, true, false) + skipIfHasWindowsNodes(t) data, err := setupTest(t) if err != nil { t.Fatalf("Error when setting up test: %v", err) } defer teardownTest(t, data) - var previousAgentPacketCaptureEnableState bool - ac := func(config *agentconfig.AgentConfig) { - previousAgentPacketCaptureEnableState = config.FeatureGates[string(features.PacketCapture)] - config.FeatureGates[string(features.PacketCapture)] = true - } - if err := data.mutateAntreaConfigMap(nil, ac, false, true); err != nil { - t.Fatalf("Failed to enable PacketCapture flag: %v", err) - } - defer func() { - ac := func(config *agentconfig.AgentConfig) { - config.FeatureGates[string(features.PacketCapture)] = previousAgentPacketCaptureEnableState - } - if err := data.mutateAntreaConfigMap(nil, ac, false, true); err != nil { - t.Errorf("Failed to disable PacketCapture flag: %v", err) - } - }() - // setup sftp server for test. secretUserName := "foo" secretPassword := "pass" + // #nosec G101 + pcSecretName := "antrea-packetcapture-fileserver-auth" + pcSecretNamespace := "kube-system" _, err = data.clientset.AppsV1().Deployments(data.testNamespace).Create(context.TODO(), genSFTPDeployment(), metav1.CreateOptions{}) require.NoError(t, err) _, err = data.clientset.CoreV1().Services(data.testNamespace).Create(context.TODO(), genSFTPService(), metav1.CreateOptions{}) @@ -183,33 +150,39 @@ func TestPacketCapture(t *testing.T) { t.Run("testPacketCaptureBasic", func(t *testing.T) { testPacketCaptureBasic(t, data) }) - t.Run("testPacketCapture", func(t *testing.T) { - testPacketCapture(t, data) - }) } -func testPacketCapture(t *testing.T, data *TestData) { +// testPacketCaptureTCP verifies if PacketCapture can capture tcp packets. this function only contains basic +// cases with pod-to-pod. +func testPacketCaptureBasic(t *testing.T, data *TestData) { nodeIdx := 0 - if len(clusterInfo.windowsNodes) != 0 { - nodeIdx = clusterInfo.windowsNodes[0] - } node1 := nodeName(nodeIdx) + tcpServerPodName := "tcp-server" + pcToolboxPodName := "toolbox" + udpServerPodName := "udp-server" + icmpProto := intstr.FromString("ICMP") + udpProto := intstr.FromString("UDP") + tcpProto := intstr.FromString("TCP") + testServerPort := int32(80) + pcShortTimeout := uint32(5) + nonExistPodName := "non-existing-pod" + testNonExistPort := int32(8085) - err := data.createServerPodWithLabels(tcpServerPodName, data.testNamespace, serverPodPort, nil) + err := createUDPServerPod(udpServerPodName, data.testNamespace, serverPodPort, node1) + defer data.DeletePodAndWait(defaultTimeout, udpServerPodName, data.testNamespace) + require.NoError(t, err) + // test tcp server pod + err = data.createServerPodWithLabels(tcpServerPodName, data.testNamespace, serverPodPort, nil) + defer data.DeletePodAndWait(defaultTimeout, tcpServerPodName, data.testNamespace) require.NoError(t, err) err = data.createToolboxPodOnNode(pcToolboxPodName, data.testNamespace, node1, false) + defer data.DeletePodAndWait(defaultTimeout, pcToolboxPodName, data.testNamespace) require.NoError(t, err) - podIPs := waitForPodIPs(t, data, []PodInfo{ {tcpServerPodName, getOSString(), "", data.testNamespace}, {pcToolboxPodName, getOSString(), "", data.testNamespace}, }) - - // Give a little time for Windows containerd Nodes to set up OVS. - // Containerd configures port asynchronously, which could cause execution time of installing flow longer than docker. - time.Sleep(time.Second * 1) - tcpServerPodIP := podIPs[tcpServerPodName].IPv4.String() testcases := []pcTestCase{ @@ -254,17 +227,15 @@ func testPacketCapture(t *testing.T, data *TestData) { expectedStatus: crdv1alpha1.PacketCaptureStatus{ Conditions: []crdv1alpha1.PacketCaptureCondition{ { - Type: crdv1alpha1.PacketCaptureCompleted, + Type: crdv1alpha1.PacketCaptureComplete, Status: metav1.ConditionStatus(v1.ConditionTrue), LastTransitionTime: metav1.Now(), Reason: "Timeout", - Message: "context deadline exceeded", }, }, }, }, { - name: nonExistPodName, ipVersion: 4, srcPod: pcToolboxPodName, @@ -295,11 +266,10 @@ func testPacketCapture(t *testing.T, data *TestData) { }, }, }, - expectedStatus: crdv1alpha1.PacketCaptureStatus{ Conditions: []crdv1alpha1.PacketCaptureCondition{ { - Type: crdv1alpha1.PacketCaptureCompleted, + Type: crdv1alpha1.PacketCaptureComplete, Status: metav1.ConditionStatus(v1.ConditionFalse), LastTransitionTime: metav1.Now(), Reason: "CaptureFailed", @@ -308,39 +278,6 @@ func testPacketCapture(t *testing.T, data *TestData) { }, }, }, - } - t.Run("testPacketCapture", func(t *testing.T) { - for _, tc := range testcases { - tc := tc - t.Run(tc.name, func(t *testing.T) { - t.Parallel() - runPacketCaptureTest(t, data, tc) - }) - } - }) -} - -// testPacketCaptureTCP verifies if PacketCapture can capture tcp packets. this function only contains basic -// cases with pod-to-pod. -func testPacketCaptureBasic(t *testing.T, data *TestData) { - nodeIdx := 0 - if len(clusterInfo.windowsNodes) != 0 { - nodeIdx = clusterInfo.windowsNodes[0] - } - node1 := nodeName(nodeIdx) - - err := createUDPServerPod(udpServerPodName, data.testNamespace, serverPodPort, node1) - defer data.DeletePodAndWait(defaultTimeout, udpServerPodName, data.testNamespace) - require.NoError(t, err) - // test tcp server pod - err = data.createServerPodWithLabels(tcpServerPodName, data.testNamespace, serverPodPort, nil) - defer data.DeletePodAndWait(defaultTimeout, tcpServerPodName, data.testNamespace) - require.NoError(t, err) - err = data.createToolboxPodOnNode(pcToolboxPodName, data.testNamespace, node1, false) - defer data.DeletePodAndWait(defaultTimeout, pcToolboxPodName, data.testNamespace) - require.NoError(t, err) - - testcases := []pcTestCase{ { name: "ipv4-tcp", ipVersion: 4, @@ -385,7 +322,7 @@ func testPacketCaptureBasic(t *testing.T, data *TestData) { NumberCaptured: 5, Conditions: []crdv1alpha1.PacketCaptureCondition{ { - Type: crdv1alpha1.PacketCaptureCompleted, + Type: crdv1alpha1.PacketCaptureComplete, Status: metav1.ConditionStatus(v1.ConditionTrue), LastTransitionTime: metav1.Now(), Reason: "Succeed", @@ -443,7 +380,7 @@ func testPacketCaptureBasic(t *testing.T, data *TestData) { NumberCaptured: 5, Conditions: []crdv1alpha1.PacketCaptureCondition{ { - Type: crdv1alpha1.PacketCaptureCompleted, + Type: crdv1alpha1.PacketCaptureComplete, Status: metav1.ConditionStatus(v1.ConditionTrue), LastTransitionTime: metav1.Now(), Reason: "Succeed", @@ -496,7 +433,7 @@ func testPacketCaptureBasic(t *testing.T, data *TestData) { NumberCaptured: 5, Conditions: []crdv1alpha1.PacketCaptureCondition{ { - Type: crdv1alpha1.PacketCaptureCompleted, + Type: crdv1alpha1.PacketCaptureComplete, Status: metav1.ConditionStatus(v1.ConditionTrue), LastTransitionTime: metav1.Now(), Reason: "Succeed", @@ -523,14 +460,15 @@ func testPacketCaptureBasic(t *testing.T, data *TestData) { } func getOSString() string { - if len(clusterInfo.windowsNodes) != 0 { - return "windows" - } else { - return "linux" - } + return "linux" } func runPacketCaptureTest(t *testing.T, data *TestData, tc pcTestCase) { + pcToolboxPodName := "toolbox" + icmpProto := intstr.FromString("ICMP") + udpProto := intstr.FromString("UDP") + tcpProto := intstr.FromString("TCP") + nonExistPodName := "non-existing-pod" switch tc.ipVersion { case 4: skipIfNotIPv4Cluster(t) @@ -560,7 +498,7 @@ func runPacketCaptureTest(t *testing.T, data *TestData, tc pcTestCase) { } }() - if dstPodName != nonExistPodName && tc.expectedStatus.Conditions[0].Message != pcTimeoutReason { + if !strings.Contains(tc.pc.Name, "non-exist") && !strings.Contains(tc.pc.Name, "timeout") { srcPod := tc.srcPod if dstIP := tc.pc.Spec.Destination.IP; dstIP != nil { ip := net.ParseIP(*dstIP) @@ -605,13 +543,13 @@ func runPacketCaptureTest(t *testing.T, data *TestData, tc pcTestCase) { timeout := tc.pc.Spec.Timeout if timeout == nil { - tv := uint16(15) + tv := uint32(15) timeout = &tv } if strings.Contains(tc.name, "timeout") { // wait more for status update. - tv := *timeout + uint16(10) + tv := *timeout + uint32(10) timeout = &tv } @@ -620,11 +558,14 @@ func runPacketCaptureTest(t *testing.T, data *TestData, tc pcTestCase) { t.Fatalf("Error: Get PacketCapture failed: %v", err) } tc.expectedStatus.FilePath = pc.Status.FilePath - + if strings.Contains(tc.name, "timeout") { + // if can be 0 or less thant target number. + tc.expectedStatus.NumberCaptured = pc.Status.NumberCaptured + } // remove pending condition as it's random newCond := []crdv1alpha1.PacketCaptureCondition{} for _, cond := range pc.Status.Conditions { - if cond.Type == crdv1alpha1.PacketCapturePending || cond.Type == crdv1alpha1.PacketCaptureRunning { + if cond.Type == crdv1alpha1.PacketCaptureStarted { continue } newCond = append(newCond, cond) @@ -665,9 +606,8 @@ func isPacketCaptureReady(pc *crdv1alpha1.PacketCapture) bool { if len(pc.Status.Conditions) == 0 { return false } - for _, cond := range pc.Status.Conditions { - if cond.Type == crdv1alpha1.PacketCaptureCompleted { + if cond.Type == crdv1alpha1.PacketCaptureComplete { return true } } @@ -679,9 +619,8 @@ func isPacketCaptureRunning(pc *crdv1alpha1.PacketCapture) bool { if len(pc.Status.Conditions) == 0 { return false } - for _, cond := range pc.Status.Conditions { - if cond.Type == crdv1alpha1.PacketCaptureRunning && cond.Status == metav1.ConditionTrue { + if cond.Type == crdv1alpha1.PacketCaptureStarted && cond.Status == metav1.ConditionTrue { return true } } @@ -706,7 +645,6 @@ func packetCaptureStatusEqual(oldStatus, newStatus crdv1alpha1.PacketCaptureStat } func conditionSliceEqualsIgnoreLastTransitionTime(as, bs []crdv1alpha1.PacketCaptureCondition) bool { - sort.Slice(as, func(i, j int) bool { return as[i].Type < as[j].Type }) From fd77ff5c8453eecb3605f1e71c511dd1d7ed5d87 Mon Sep 17 00:00:00 2001 From: Hang Yan Date: Sat, 9 Nov 2024 12:14:18 +0800 Subject: [PATCH 3/3] update Signed-off-by: Hang Yan --- .../packetcapture/packetcapture_controller.go | 56 +++++++++---------- .../packetcapture_controller_test.go | 10 ++-- pkg/apis/crd/v1alpha1/types.go | 2 +- .../crd/v1alpha1/zz_generated.deepcopy.go | 2 +- test/e2e/packetcapture_test.go | 6 +- 5 files changed, 37 insertions(+), 39 deletions(-) diff --git a/pkg/agent/packetcapture/packetcapture_controller.go b/pkg/agent/packetcapture/packetcapture_controller.go index bf9efa7a948..dd29cb2f376 100644 --- a/pkg/agent/packetcapture/packetcapture_controller.go +++ b/pkg/agent/packetcapture/packetcapture_controller.go @@ -127,7 +127,7 @@ type Controller struct { sftpUploader sftp.Uploader captureInterface PacketCapturer mutex sync.Mutex - // A name-phase mapping for all PacketCapture CRs. + // A name-state mapping for all PacketCapture CRs. captures map[string]*packetCaptureState numRunningCaptures int } @@ -270,7 +270,7 @@ func (c *Controller) syncPacketCapture(pcName string) error { if err := c.validatePacketCapture(&pc.Spec); err != nil { klog.ErrorS(err, "Invalid PacketCapture", "name", pc.Name) - if updateErr := c.updateStatus(context.Background(), pcName, &packetCaptureState{err: err}); updateErr != nil { + if updateErr := c.updateStatus(context.Background(), pcName, packetCaptureState{err: err}); updateErr != nil { klog.ErrorS(err, "Failed to update PacketCapture status", "name", pc.Name) } cleanupStatus() @@ -311,7 +311,7 @@ func (c *Controller) syncPacketCapture(pcName string) error { return *state }() - if updateErr := c.updateStatus(context.Background(), pcName, &state); updateErr != nil { + if updateErr := c.updateStatus(context.Background(), pcName, state); updateErr != nil { return fmt.Errorf("error when patching status: %w", updateErr) } return err @@ -423,7 +423,7 @@ func (c *Controller) performCapture( klog.ErrorS(err, "Failed to start capture") return err } - klog.InfoS("Starting capture packets", "name", pc.Name, "device", device) + klog.InfoS("Starting packet capture", "name", pc.Name, "device", device) for { select { case packet := <-packets: @@ -567,7 +567,7 @@ func (c *Controller) uploadPackets(ctx context.Context, pc *crdv1alpha1.PacketCa return uploader.Upload(pc.Spec.FileServer.URL, c.generatePacketsPathForServer(pc.Name), cfg, outputFile) } -func (c *Controller) updateStatus(ctx context.Context, name string, state *packetCaptureState) error { +func (c *Controller) updateStatus(ctx context.Context, name string, state packetCaptureState) error { toUpdate, getErr := c.packetCaptureLister.Get(name) if getErr != nil { klog.InfoS("Didn't find the original PacketCapture, skip updating status", "name", name) @@ -582,32 +582,30 @@ func (c *Controller) updateStatus(ctx context.Context, name string, state *packe if state.err != nil { updatedStatus.FilePath = "" - conditions = append(conditions, crdv1alpha1.PacketCaptureCondition{ - Type: crdv1alpha1.PacketCaptureComplete, - Status: metav1.ConditionStatus(v1.ConditionFalse), - LastTransitionTime: metav1.Now(), - Reason: "CaptureFailed", - Message: state.err.Error(), - }) - if errors.Is(state.err, context.DeadlineExceeded) { - conditions = []crdv1alpha1.PacketCaptureCondition{ - { - Type: crdv1alpha1.PacketCaptureComplete, - Status: metav1.ConditionStatus(v1.ConditionTrue), - LastTransitionTime: t, - Reason: "Timeout", - }, - } + conditions = append(conditions, crdv1alpha1.PacketCaptureCondition{ + Type: crdv1alpha1.PacketCaptureComplete, + Status: metav1.ConditionStatus(v1.ConditionTrue), + LastTransitionTime: t, + Reason: "Timeout", + }) + } else if state.isCaptureSuccessful() { - conditions = []crdv1alpha1.PacketCaptureCondition{ - { - Type: crdv1alpha1.PacketCaptureComplete, - Status: metav1.ConditionStatus(v1.ConditionTrue), - LastTransitionTime: t, - Reason: "Succeed", - }, - } + // most likely failed to upload after capture succeed. + conditions = append(conditions, crdv1alpha1.PacketCaptureCondition{ + Type: crdv1alpha1.PacketCaptureComplete, + Status: metav1.ConditionStatus(v1.ConditionTrue), + LastTransitionTime: t, + Reason: "Succeed", + }) + } else { + conditions = append(conditions, crdv1alpha1.PacketCaptureCondition{ + Type: crdv1alpha1.PacketCaptureComplete, + Status: metav1.ConditionStatus(v1.ConditionFalse), + LastTransitionTime: metav1.Now(), + Reason: "CaptureFailed", + Message: state.err.Error(), + }) } if toUpdate.Spec.FileServer != nil && state.filePath != "" { conditions = append(conditions, crdv1alpha1.PacketCaptureCondition{ diff --git a/pkg/agent/packetcapture/packetcapture_controller_test.go b/pkg/agent/packetcapture/packetcapture_controller_test.go index d34fa1fdc58..aafe573c27a 100644 --- a/pkg/agent/packetcapture/packetcapture_controller_test.go +++ b/pkg/agent/packetcapture/packetcapture_controller_test.go @@ -58,7 +58,7 @@ var ( pod2MAC, _ = net.ParseMAC("aa:bb:cc:dd:ee:00") ofPortPod1 = uint32(1) ofPortPod2 = uint32(2) - testCaptureTimeout = uint32(1) + testCaptureTimeout = int32(1) testCaptureNum int32 = 15 icmpProto = intstr.FromString("ICMP") @@ -272,8 +272,9 @@ func addPodInterface(ifaceStore interfacestore.InterfaceStore, podNamespace, pod func TestMultiplePacketCaptures(t *testing.T) { defaultFS = afero.NewMemMapFs() - packetsDir := "/tmp/antrea/packetcapture/packets" - defaultFS.MkdirAll(packetsDir, 0755) + defer func() { + defaultFS = afero.NewOsFs() + }() nameFunc := func(id int) string { return fmt.Sprintf("pc-%d", id) } @@ -326,8 +327,7 @@ func TestMultiplePacketCaptures(t *testing.T) { } -// TestPacketCaptureControllerRun was used to validate the whole run process is working. It doesn't wait for -// the testing pc to finish. on sandbox env, no good solution to open raw socket. +// TestPacketCaptureControllerRun was used to validate the whole run process is working. func TestPacketCaptureControllerRun(t *testing.T) { pcs := []struct { name string diff --git a/pkg/apis/crd/v1alpha1/types.go b/pkg/apis/crd/v1alpha1/types.go index 80fc665d035..839b227101f 100644 --- a/pkg/apis/crd/v1alpha1/types.go +++ b/pkg/apis/crd/v1alpha1/types.go @@ -450,7 +450,7 @@ type PacketCaptureFileServer struct { type PacketCaptureSpec struct { // Timeout is the timeout for this capture session. If not specified, defaults to 60s. - Timeout *uint32 `json:"timeout,omitempty"` + Timeout *int32 `json:"timeout,omitempty"` CaptureConfig CaptureConfig `json:"captureConfig"` // Source is the traffic source we want to perform capture on. Both `Source` and `Destination` is required // for a capture session, and at least one `Pod` should be present either in the source or the destination. diff --git a/pkg/apis/crd/v1alpha1/zz_generated.deepcopy.go b/pkg/apis/crd/v1alpha1/zz_generated.deepcopy.go index 17b9f50d0f2..69d03586b3c 100644 --- a/pkg/apis/crd/v1alpha1/zz_generated.deepcopy.go +++ b/pkg/apis/crd/v1alpha1/zz_generated.deepcopy.go @@ -721,7 +721,7 @@ func (in *PacketCaptureSpec) DeepCopyInto(out *PacketCaptureSpec) { *out = *in if in.Timeout != nil { in, out := &in.Timeout, &out.Timeout - *out = new(uint32) + *out = new(int32) **out = **in } in.CaptureConfig.DeepCopyInto(&out.CaptureConfig) diff --git a/test/e2e/packetcapture_test.go b/test/e2e/packetcapture_test.go index c0d9e7025bb..55b49e934bf 100644 --- a/test/e2e/packetcapture_test.go +++ b/test/e2e/packetcapture_test.go @@ -165,7 +165,7 @@ func testPacketCaptureBasic(t *testing.T, data *TestData) { udpProto := intstr.FromString("UDP") tcpProto := intstr.FromString("TCP") testServerPort := int32(80) - pcShortTimeout := uint32(5) + pcShortTimeout := int32(5) nonExistPodName := "non-existing-pod" testNonExistPort := int32(8085) @@ -543,13 +543,13 @@ func runPacketCaptureTest(t *testing.T, data *TestData, tc pcTestCase) { timeout := tc.pc.Spec.Timeout if timeout == nil { - tv := uint32(15) + tv := int32(15) timeout = &tv } if strings.Contains(tc.name, "timeout") { // wait more for status update. - tv := *timeout + uint32(10) + tv := *timeout + int32(10) timeout = &tv }