openshift · memodi · Oct 29, 2024 · Oct 29, 2024 · Oct 30, 2024 · Oct 30, 2024
diff --git a/reliability-v2/config/qe-index.yaml b/reliability-v2/config/qe-index.yaml
@@ -0,0 +1,25 @@
+apiVersion: operators.coreos.com/v1alpha1
+kind: CatalogSource
+metadata:
+  name: qe-app-registry
+  namespace: openshift-marketplace
+spec:
+  displayName: QE Catalog
+  image: quay.io/openshift-qe-optional-operators/aosqe-index:v${CLUSTER_VERSION}
+  sourceType: grpc
+---
+apiVersion: operator.openshift.io/v1alpha1
+kind: ImageContentSourcePolicy
+metadata:
+  name: brew-registry
+spec:
+  repositoryDigestMirrors:
+  - mirrors:
+    - brew.registry.redhat.io
+    source: registry.redhat.io
+  - mirrors:
+    - brew.registry.redhat.io
+    source: registry.stage.redhat.io
+  - mirrors:
+    - brew.registry.redhat.io
+    source: registry-proxy.engineering.redhat.com
diff --git a/...ty-v2/config/reliability-no-template.yaml → ...lity-v2/config/reliability-netobserv.yaml b/...ty-v2/config/reliability-no-template.yaml → ...lity-v2/config/reliability-netobserv.yaml
@@ -29,6 +29,8 @@ reliability:
         - oc get project -l purpose=reliability
         - func check_nodes
         - kubectl get pods -A -o wide | egrep -v "Completed|Running"
+        - func check_flowcollector
+        - func check_netobserv_pods
         # Run test case as scripts. KUBECONFIG of the current user is set as env variable by reliability-v2. 
         #- . <path_to_script>/create-delete-pod-ensure-service.sh
 
@@ -148,4 +150,4 @@ reliability:
           AWS_DEFAULT_REGION: us-east-2
           AWS_ACCESS_KEY_ID: xxxx
           AWS_SECRET_ACCESS_KEY: xxxx
-          CLOUD_TYPE: aws
+          CLOUD_TYPE: aws
diff --git a/reliability-v2/start.sh b/reliability-v2/start.sh
@@ -28,6 +28,8 @@ Usage: $(basename "${0}") [-p <path_to_auth_files>] [-n <folder_name> ] [-t <tim
 
   -h                           : Help
 
+  -o <operator(s) to enable>   : To enable optional operators, pass csv list of operators to install. Optional.
+
 END
 }
 
@@ -36,7 +38,7 @@ if [[ "$1" = "" ]];then
     exit 1
 fi
 
-while getopts ":n:t:p:c:r:iuh" opt; do
+while getopts ":n:t:p:c:r:o:iuh" opt; do
     case ${opt} in
     n)
         folder_name=${OPTARG}
@@ -59,6 +61,10 @@ while getopts ":n:t:p:c:r:iuh" opt; do
     u)
         upgrade=true
         ;;
+    o)
+        operators=${OPTARG}
+        IFS=',' read -ra operatorsToInstall <<< "$OPTARG"
+        ;;
     h)
         _usage
         exit 1
@@ -186,6 +192,18 @@ function dhms_to_seconds {
     echo "Total seconds to run is: $SECONDS_TO_RUN"
 }
 
+function setup_netobserv(){
+    log "Setting up Network Observability operator"
+    git clone https://github.com/openshift-qe/ocp-qe-perfscale-ci.git --branch netobserv-perf-tests
+    OCPQE_PERFSCALE_DIR=$PWD/ocp-qe-perfscale-ci
+    source ocp-qe-perfscale-ci/scripts/env.sh
+    source ocp-qe-perfscale-ci/scripts/netobserv.sh
+    deploy_lokistack
+    deploy_kafka
+    deploy_netobserv
+    ceateFlowCollector "-p KafkaConsumerReplicas=6"
+}
+
 RELIABILITY_DIR=$(cd $(dirname ${BASH_SOURCE[0]});pwd)
 SECONDS_TO_RUN=0
 start_log=start_$(date +"%Y%m%d_%H%M%S").log
@@ -361,6 +379,24 @@ if [[ $os == "linux" ]]; then
     date_end_format=$(date --date=@$timestamp_end)
 elif [[ $os == "mac" ]]; then date_end_format=$(date -j -f "%s" $timestamp_end "+%Y-%m-%d %H:%M:%S")
 fi
+
+# Configure QE index image if optional operators needs to be deployed
+# and call respective functions for the operator set up
+if [[ $operators ]]; then
+    CLUSTER_VERSION=$(oc get clusterversion/version -o jsonpath='{.spec.channel}' | cut -d'-' -f 2)
+    export CLUSTER_VERSION
+    log "Setting up QE index image for optional operators"
+    envsubst < config/qe-index.yaml | oc apply -f -
+
+    for operator in "${operatorsToInstall[@]}"; do
+        if [[ $operator == "netobserv" ]]; then
+            setup_netobserv
+        fi
+    done
+fi
+
+
+
 log "info" "Reliability test will run $time_to_run. Test will end on $date_end_format. \
 If you want to halt the test before that, open another terminal and 'touch halt' under reliability-v2 folder."
 log "warning" "DO NOT CTRL+c or terminate this session."
@@ -422,6 +458,18 @@ fi
 if [[ -z $tolerance_rate ]]; then
     tolerance_rate=1
 fi
+
+# Clean up operators setup if operators were installed
+if [[ $operators ]]; then
+    for operator in "${operatorsToInstall[@]}"; do
+        if [[ $operator == "netobserv" ]]; then
+            # shellcheck source=reliability-v2/ocp-qe-perfscale-ci/scripts/netobserv.sh
+            source ${RELIABILITY_DIR}/${OCPQE_PERFSCALE_DIR}/scripts/netobserv.sh
+            nukeobserv
+        fi
+    done
+fi
+
 cd $folder_name
 if [ ! -f reliability.log ]; then
     echo "reliability.log is not found."

diff --git a/reliability-v2/tasks/Tasks.py b/reliability-v2/tasks/Tasks.py
@@ -399,3 +399,42 @@ def shell_task(self,task,user,group_name):
             task_name=task[start_index+1:]
         self.__log_result(rc,task_name)
         return (result,rc)
+
+    # check flowcollector status for netobserv
+    def check_flowcollector(self, user):
+        self.logger.info(f"[Task] User {user}: check flowcollector")
+        # Check if nodes are Ready
+        (result, rc) = oc(
+            f"get flowcollector --no-headers| grep -v ' Ready'",
+            self.__get_kubeconfig(user),
+            ignore_log=True,
+            ignore_slack=True,
+        )
+        if rc == 0:
+            self.logger.error(f"Flowcollector is not Ready: {result}")
+            slackIntegration.error(f"Flowcollector not Ready: {result}")
+            rc_return = 1
+        elif rc == 1 and result == "":
+            self.logger.info(f"Flowcollector is Ready.")
+            rc_return = 0
+        return (result, rc_return)
+
+    # check netobserv pods health
+    def check_netobserv_pods(self, user):
+        self.logger.info(f"[Task] User {user}: check pods")
+        # Check if nodes are Ready
+        for ns in ("netobserv", "netobserv-privileged"):
+            (result, rc) = oc(
+                f"get pods -n ${ns} -o wide --no-headers| grep -v ' Ready'",
+                self.__get_kubeconfig(user),
+                ignore_log=True,
+                ignore_slack=True,
+            )
+            if rc == 0:
+                self.logger.error(f"Some pods are not Ready in ${ns} ns: {result}")
+                slackIntegration.error(f"Some pods are not Ready in ns ${ns}: {result}")
+                rc_return = 1
+            elif rc == 1 and result == "":
+                self.logger.info(f"Pods in ns ${ns} are healthy.")
+                rc_return = 0
+        return (result, rc_return)