From 215df40e8f7b7691a57660bd291d4b69b13f23b7 Mon Sep 17 00:00:00 2001 From: am-agrawa Date: Fri, 5 Apr 2024 23:52:34 +0530 Subject: [PATCH 01/32] code base --- ocs_ci/helpers/dr_helpers.py | 2 + .../DR/multiclusterobservability.yaml | 21 +++ .../DR/observability-metrics-configmap.yaml | 21 +++ ocs_ci/templates/DR/thanos.yaml | 15 +++ .../test_rdr_monitoring_dashboard.py | 121 ++++++++++++++++++ 5 files changed, 180 insertions(+) create mode 100644 ocs_ci/templates/DR/multiclusterobservability.yaml create mode 100644 ocs_ci/templates/DR/observability-metrics-configmap.yaml create mode 100644 ocs_ci/templates/DR/thanos.yaml create mode 100644 tests/functional/disaster-recovery/regional-dr/test_rdr_monitoring_dashboard.py diff --git a/ocs_ci/helpers/dr_helpers.py b/ocs_ci/helpers/dr_helpers.py index 21d36bf18a6..fbe483f17c5 100644 --- a/ocs_ci/helpers/dr_helpers.py +++ b/ocs_ci/helpers/dr_helpers.py @@ -5,6 +5,7 @@ import json import logging import tempfile +import boto3 from ocs_ci.framework import config from ocs_ci.ocs import constants, ocp @@ -30,6 +31,7 @@ CommandFailed, run_cmd, ) +from botocore.exceptions import BotoCoreError logger = logging.getLogger(__name__) diff --git a/ocs_ci/templates/DR/multiclusterobservability.yaml b/ocs_ci/templates/DR/multiclusterobservability.yaml new file mode 100644 index 00000000000..a107d1865a4 --- /dev/null +++ b/ocs_ci/templates/DR/multiclusterobservability.yaml @@ -0,0 +1,21 @@ +# This config file is used to enable ACM observability +--- +apiVersion: observability.open-cluster-management.io/v1beta2 +kind: MultiClusterObservability +metadata: + name: observability +spec: + enableDownsampling: true + observabilityAddonSpec: + enableMetrics: true + interval: 300 + storageConfig: + alertmanagerStorageSize: 1Gi + compactStorageSize: 100Gi + metricObjectStorage: + key: thanos.yaml + name: thanos-object-storage + receiveStorageSize: 100Gi + ruleStorageSize: 1Gi + storageClass: thin-csi-odf + storeStorageSize: 10Gi diff --git a/ocs_ci/templates/DR/observability-metrics-configmap.yaml b/ocs_ci/templates/DR/observability-metrics-configmap.yaml new file mode 100644 index 00000000000..ab5ee5a9a6e --- /dev/null +++ b/ocs_ci/templates/DR/observability-metrics-configmap.yaml @@ -0,0 +1,21 @@ +apiVersion: v1 +metadata: + name: mp-custom-allowlist + namespace: open-cluster-management-observability +data: + metrics_list.yaml: | + names: + - odf_system_health_status + - odf_system_map + - odf_system_raw_capacity_total_bytes + - odf_system_raw_capacity_used_bytes + - ceph_rbd_mirror_snapshot_sync_bytes + - ceph_rbd_mirror_snapshot_snapshots + - ceph_rbd_mirror_snapshot_sync_time_sum + matches: + - __name__="csv_succeeded",exported_namespace="openshift-storage",name=~"odf-operator.*" + - __name__="csv_succeeded",exported_namespace="openshift-dr-system",name=~"odr-cluster-operator.*" + - __name__="csv_succeeded",exported_namespace="openshift-operators",name=~"volsync.*" + recording_rules: + - record: count_persistentvolumeclaim_total + expr: count(kube_persistentvolumeclaim_info) diff --git a/ocs_ci/templates/DR/thanos.yaml b/ocs_ci/templates/DR/thanos.yaml new file mode 100644 index 00000000000..22fd653ef0d --- /dev/null +++ b/ocs_ci/templates/DR/thanos.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Secret +metadata: + name: thanos-object-storage + namespace: open-cluster-management-observability +type: Opaque +stringData: + thanos.yaml: | + type: s3 + config: + bucket: PLACEHOLDER + endpoint: PLACEHOLDER + insecure: true + access_key: PLACEHOLDER + secret_key: PLACEHOLDER diff --git a/tests/functional/disaster-recovery/regional-dr/test_rdr_monitoring_dashboard.py b/tests/functional/disaster-recovery/regional-dr/test_rdr_monitoring_dashboard.py new file mode 100644 index 00000000000..b4e6e48fb30 --- /dev/null +++ b/tests/functional/disaster-recovery/regional-dr/test_rdr_monitoring_dashboard.py @@ -0,0 +1,121 @@ +import logging +import pytest +import yaml + +from time import sleep + +from ocs_ci.framework import config +from ocs_ci.framework.pytest_customization.marks import ( + rdr_ui_failover_config_required, + rdr_ui_relocate_config_required, +) +from ocs_ci.framework.testlib import tier1, skipif_ocs_version +from ocs_ci.framework.pytest_customization.marks import turquoise_squad +from ocs_ci.helpers import dr_helpers +from ocs_ci.ocs import constants +from ocs_ci.ocs.acm.acm import AcmAddClusters +from ocs_ci.helpers.dr_helpers_ui import ( + dr_submariner_validation_from_ui, + check_cluster_status_on_acm_console, + failover_relocate_ui, +) +from ocs_ci.ocs.resources.drpc import DRPC +from ocs_ci.utility.utils import run_cmd + +logger = logging.getLogger(__name__) + + +@tier1 +@turquoise_squad +@skipif_ocs_version("<4.14") +class TestRDRMonitoringDashboard: + """ + Test to enable ACM observability and validate DR monitoring dashboard for RDR on the RHACM console. + + """ + + @rdr_ui_failover_config_required + @pytest.mark.polarion_id("OCS-XXXX") + def test_rdr_monitoring_dashboard( + self, + setup_acm_ui, + dr_workload, + workload_type, + ): + """ + Test to enable ACM observability and validate DR monitoring dashboard for RDR on the RHACM console. + + """ + + # acm_obj = AcmAddClusters() + # workload_type = [constants.SUBSCRIPTION, constants.APPLICATION_SET] + # for workload in workload_type: + # workload == constants.SUBSCRIPTION: + rdr_workload = dr_workload(num_of_subscription=2, num_of_appset=2) + # drpc_obj = DRPC(namespace=rdr_workload.workload_namespace) + + # drpc_obj = DRPC( + # namespace=constants.GITOPS_CLUSTER_NAMESPACE, + # resource_name=f"{rdr_workload.appset_placement_name}-drpc", + # ) + + logger.info("Enable ACM MultiClusterObservability") + run_cmd(f"oc create -f {multiclusterobservability.yaml}") + + def build_bucket_name(acm_indexes): + """ + Create backupname from cluster names + Args: + acm_indexes (list): List of acm indexes + """ + bucket_name = "" + for index in acm_indexes: + bucket_name += config.clusters[index].ENV_DATA["cluster_name"] + return bucket_name + + # Configuring s3 bucket + self.meta_obj.get_meta_access_secret_keys() + + endpoint_url = ("https://s3.amazonaws.com",) + + # bucket name formed like '{acm_active_cluster}-{acm_passive_cluster}' + self.meta_obj.bucket_name = build_bucket_name(acm_indexes, observability) + # create s3 bucket + create_s3_bucket( + self.meta_obj.access_key, + self.meta_obj.secret_key, + self.meta_obj.bucket_name, + ) + # Label the hub cluster to enable VolumeSynchronizationDelayAlert + run_cmd( + "oc label namespace openshift-operators openshift.io/cluster-monitoring='true'" + ) + + # oc get MultiClusterObservability observability -o jsonpath='{.status.conditions[1].status}' + + def create_s3_bucket(access_key, secret_key, bucket_name): + """ + Create s3 bucket + Args: + access_key (str): S3 access key + secret_key (str): S3 secret key + acm_indexes (list): List of acm indexes + """ + client = boto3.resource( + "s3", + verify=True, + endpoint_url="https://s3.amazonaws.com", + aws_access_key_id=access_key, + aws_secret_access_key=secret_key, + ) + try: + client.create_bucket( + Bucket=bucket_name, + CreateBucketConfiguration={ + "LocationConstraint": constants.AWS_REGION + }, + ) + logger.info(f"Successfully created backup bucket: {bucket_name}") + except BotoCoreError as e: + logger.error(f"Failed to create s3 bucket {e}") + raise From 8c57bed7dab4cb954783d877c4da0ebc810a8d03 Mon Sep 17 00:00:00 2001 From: am-agrawa Date: Sun, 7 Apr 2024 00:28:39 +0530 Subject: [PATCH 02/32] code base to enable ACM observability Signed-off-by: am-agrawa --- ocs_ci/deployment/deployment.py | 70 +++++++++++++++++++++++++++++++++ ocs_ci/ocs/exceptions.py | 14 +------ 2 files changed, 71 insertions(+), 13 deletions(-) diff --git a/ocs_ci/deployment/deployment.py b/ocs_ci/deployment/deployment.py index fd93df65be7..5e29f88e2cd 100644 --- a/ocs_ci/deployment/deployment.py +++ b/ocs_ci/deployment/deployment.py @@ -43,6 +43,11 @@ get_lvm_full_version, check_cephcluster_status, ) +from ocs_ci.ocs.constants import ( + MULTICLUSTEROBSERVABILITY_PATH, + OBSERVABILITYMETRICSCONFIGMAP_PATH, + THANOS_PATH, +) from ocs_ci.ocs.exceptions import ( CephHealthException, ChannelNotFound, @@ -57,6 +62,8 @@ UnexpectedDeploymentConfiguration, ResourceNotFoundError, ACMClusterConfigurationException, + MDRDeploymentException, + ACMObservabilityNotEnabled, ) from ocs_ci.deployment.cert_manager import deploy_cert_manager from ocs_ci.deployment.zones import create_dummy_zone_labels @@ -3444,6 +3451,69 @@ def deploy(self): else: self.enable_managed_serviceaccount() + @retry(ACMObservabilityNotEnabled, tries=10, delay=5, backoff=5) + def thanos_secret(self): + """ + Create thanos secret yaml by using Noobaa or AWS bucket (AWS bucket is used in this function) + + """ + secret_dict = load_auth_config().get("AUTH", {}) + access_key = secret_dict["AWS"]["AWS_ACCESS_KEY_ID"] + secret_key = secret_dict["AWS"]["AWS_SECRET_ACCESS_KEY"] + thanos_secret_data = templating.load_yaml(self.thanos_yaml_file) + thanos_secret_data["stringData"]["thanos.yaml"][ + "bucket" + ] = self.build_bucket_name() + thanos_secret_data["stringData"]["thanos.yaml"][ + "endpoint" + ] = "https://s3.amazonaws.com" + thanos_secret_data["stringData"]["thanos.yaml"]["access_key"] = access_key + thanos_secret_data["stringData"]["thanos.yaml"]["secret_key"] = secret_key + thanos_data_yaml = tempfile.NamedTemporaryFile( + mode="w+", prefix="thanos", delete=False + ) + templating.dump_data_to_temp_yaml(thanos_secret_data, thanos_data_yaml.name) + + logger.info( + "Creating thanos.yaml needed for ACM observability after passing required params" + ) + run_cmd(f"oc create -f {THANOS_PATH}") + + logger.info("Allow some time for ACM Observability to be enabled") + time.sleep(120) + + check_observability_status = run_cmd( + "oc get MultiClusterObservability observability -o jsonpath='{.status.conditions[1].status}'" + ) + if check_observability_status: + logger.info("ACM observability is successfully enabled") + else: + raise ACMObservabilityNotEnabled( + "ACM Observability is not enabled, status is False" + ) + + def enable_acm_observability(self): + """ + Function to enable ACM observability for enabling DR monitoring dashboard for Regional DR on the RHACM console. + + """ + + logger.info("Enable ACM MultiClusterObservability") + run_cmd(f"oc create -f {MULTICLUSTEROBSERVABILITY_PATH}") + + logger.info("Whitelist RBD metrics and create configmap") + run_cmd(f"oc create -f {OBSERVABILITYMETRICSCONFIGMAP_PATH}") + + logger.info("Enable thanos secret yaml") + self.thanos_secret() + + logger.info( + "Add label for cluster-monitoring needed to fire VolumeSyncronizationDelayAlert" + ) + run_cmd( + "oc label namespace openshift-operators openshift.io/cluster-monitoring='true'" + ) + class MDRMultiClusterDROperatorsDeploy(MultiClusterDROperatorsDeploy): """ diff --git a/ocs_ci/ocs/exceptions.py b/ocs_ci/ocs/exceptions.py index 116ba7a65c9..f00d5b7b1a5 100644 --- a/ocs_ci/ocs/exceptions.py +++ b/ocs_ci/ocs/exceptions.py @@ -696,19 +696,7 @@ class UsernameNotFoundException(Exception): pass -class MultiStorageClusterExternalCephHealth(Exception): - pass - - -class StorageSizeNotReflectedException(Exception): - pass - - -class ClusterNotInSTSModeException(Exception): - pass - - -class APIRequestError(Exception): +class ACMObservabilityNotEnabled(Exception): pass From e66879c5f92c3de0ad42af9062636d6e2e20a37a Mon Sep 17 00:00:00 2001 From: am-agrawa Date: Sun, 7 Apr 2024 00:53:14 +0530 Subject: [PATCH 03/32] Remove file Signed-off-by: am-agrawa --- .../test_rdr_monitoring_dashboard.py | 121 ------------------ 1 file changed, 121 deletions(-) delete mode 100644 tests/functional/disaster-recovery/regional-dr/test_rdr_monitoring_dashboard.py diff --git a/tests/functional/disaster-recovery/regional-dr/test_rdr_monitoring_dashboard.py b/tests/functional/disaster-recovery/regional-dr/test_rdr_monitoring_dashboard.py deleted file mode 100644 index b4e6e48fb30..00000000000 --- a/tests/functional/disaster-recovery/regional-dr/test_rdr_monitoring_dashboard.py +++ /dev/null @@ -1,121 +0,0 @@ -import logging -import pytest -import yaml - -from time import sleep - -from ocs_ci.framework import config -from ocs_ci.framework.pytest_customization.marks import ( - rdr_ui_failover_config_required, - rdr_ui_relocate_config_required, -) -from ocs_ci.framework.testlib import tier1, skipif_ocs_version -from ocs_ci.framework.pytest_customization.marks import turquoise_squad -from ocs_ci.helpers import dr_helpers -from ocs_ci.ocs import constants -from ocs_ci.ocs.acm.acm import AcmAddClusters -from ocs_ci.helpers.dr_helpers_ui import ( - dr_submariner_validation_from_ui, - check_cluster_status_on_acm_console, - failover_relocate_ui, -) -from ocs_ci.ocs.resources.drpc import DRPC -from ocs_ci.utility.utils import run_cmd - -logger = logging.getLogger(__name__) - - -@tier1 -@turquoise_squad -@skipif_ocs_version("<4.14") -class TestRDRMonitoringDashboard: - """ - Test to enable ACM observability and validate DR monitoring dashboard for RDR on the RHACM console. - - """ - - @rdr_ui_failover_config_required - @pytest.mark.polarion_id("OCS-XXXX") - def test_rdr_monitoring_dashboard( - self, - setup_acm_ui, - dr_workload, - workload_type, - ): - """ - Test to enable ACM observability and validate DR monitoring dashboard for RDR on the RHACM console. - - """ - - # acm_obj = AcmAddClusters() - # workload_type = [constants.SUBSCRIPTION, constants.APPLICATION_SET] - # for workload in workload_type: - # workload == constants.SUBSCRIPTION: - rdr_workload = dr_workload(num_of_subscription=2, num_of_appset=2) - # drpc_obj = DRPC(namespace=rdr_workload.workload_namespace) - - # drpc_obj = DRPC( - # namespace=constants.GITOPS_CLUSTER_NAMESPACE, - # resource_name=f"{rdr_workload.appset_placement_name}-drpc", - # ) - - logger.info("Enable ACM MultiClusterObservability") - run_cmd(f"oc create -f {multiclusterobservability.yaml}") - - def build_bucket_name(acm_indexes): - """ - Create backupname from cluster names - Args: - acm_indexes (list): List of acm indexes - """ - bucket_name = "" - for index in acm_indexes: - bucket_name += config.clusters[index].ENV_DATA["cluster_name"] - return bucket_name - - # Configuring s3 bucket - self.meta_obj.get_meta_access_secret_keys() - - endpoint_url = ("https://s3.amazonaws.com",) - - # bucket name formed like '{acm_active_cluster}-{acm_passive_cluster}' - self.meta_obj.bucket_name = build_bucket_name(acm_indexes, observability) - # create s3 bucket - create_s3_bucket( - self.meta_obj.access_key, - self.meta_obj.secret_key, - self.meta_obj.bucket_name, - ) - # Label the hub cluster to enable VolumeSynchronizationDelayAlert - run_cmd( - "oc label namespace openshift-operators openshift.io/cluster-monitoring='true'" - ) - - # oc get MultiClusterObservability observability -o jsonpath='{.status.conditions[1].status}' - - def create_s3_bucket(access_key, secret_key, bucket_name): - """ - Create s3 bucket - Args: - access_key (str): S3 access key - secret_key (str): S3 secret key - acm_indexes (list): List of acm indexes - """ - client = boto3.resource( - "s3", - verify=True, - endpoint_url="https://s3.amazonaws.com", - aws_access_key_id=access_key, - aws_secret_access_key=secret_key, - ) - try: - client.create_bucket( - Bucket=bucket_name, - CreateBucketConfiguration={ - "LocationConstraint": constants.AWS_REGION - }, - ) - logger.info(f"Successfully created backup bucket: {bucket_name}") - except BotoCoreError as e: - logger.error(f"Failed to create s3 bucket {e}") - raise From 692be43bc8191fb8be4df9cea0db76f32d9160ae Mon Sep 17 00:00:00 2001 From: am-agrawa Date: Sun, 7 Apr 2024 01:12:29 +0530 Subject: [PATCH 04/32] added notes in yaml files Signed-off-by: am-agrawa --- ocs_ci/templates/DR/multiclusterobservability.yaml | 1 + ocs_ci/templates/DR/observability-metrics-configmap.yaml | 5 +++++ ocs_ci/templates/DR/thanos.yaml | 6 ++++++ 3 files changed, 12 insertions(+) diff --git a/ocs_ci/templates/DR/multiclusterobservability.yaml b/ocs_ci/templates/DR/multiclusterobservability.yaml index a107d1865a4..9a9bb269e61 100644 --- a/ocs_ci/templates/DR/multiclusterobservability.yaml +++ b/ocs_ci/templates/DR/multiclusterobservability.yaml @@ -1,4 +1,5 @@ # This config file is used to enable ACM observability + --- apiVersion: observability.open-cluster-management.io/v1beta2 kind: MultiClusterObservability diff --git a/ocs_ci/templates/DR/observability-metrics-configmap.yaml b/ocs_ci/templates/DR/observability-metrics-configmap.yaml index ab5ee5a9a6e..b3b911f8538 100644 --- a/ocs_ci/templates/DR/observability-metrics-configmap.yaml +++ b/ocs_ci/templates/DR/observability-metrics-configmap.yaml @@ -1,3 +1,8 @@ +# This config file is allow data to reflect on the DR monitoring dashboard +# by whitelisting ceph_rbd_* metrics +# Additionally we whitelist a few odf_* metrics but those are optional + +--- apiVersion: v1 metadata: name: mp-custom-allowlist diff --git a/ocs_ci/templates/DR/thanos.yaml b/ocs_ci/templates/DR/thanos.yaml index 22fd653ef0d..4e7e6db780b 100644 --- a/ocs_ci/templates/DR/thanos.yaml +++ b/ocs_ci/templates/DR/thanos.yaml @@ -1,3 +1,9 @@ +# This config file is used to enable ACM observability. +# When the Observability service is enabled, the hub cluster is always configured +# to collect and send metrics to the configured Thanos instance, regardless of whether +# hub self-management is enabled or not. + +--- apiVersion: v1 kind: Secret metadata: From b5b336f715fa2095be30ea9cb1aaf2d76e676b35 Mon Sep 17 00:00:00 2001 From: am-agrawa Date: Fri, 17 May 2024 16:16:15 +0530 Subject: [PATCH 05/32] code fixes and feedback Signed-off-by: am-agrawa --- ocs_ci/deployment/deployment.py | 80 +++++++++++++------ .../DR/multiclusterobservability.yaml | 2 +- 2 files changed, 58 insertions(+), 24 deletions(-) diff --git a/ocs_ci/deployment/deployment.py b/ocs_ci/deployment/deployment.py index 5e29f88e2cd..3b89c2d5463 100644 --- a/ocs_ci/deployment/deployment.py +++ b/ocs_ci/deployment/deployment.py @@ -44,9 +44,7 @@ check_cephcluster_status, ) from ocs_ci.ocs.constants import ( - MULTICLUSTEROBSERVABILITY_PATH, OBSERVABILITYMETRICSCONFIGMAP_PATH, - THANOS_PATH, ) from ocs_ci.ocs.exceptions import ( CephHealthException, @@ -155,6 +153,7 @@ from ocs_ci.helpers import helpers from ocs_ci.helpers.helpers import ( set_configmap_log_level_rook_ceph_operator, + get_default_storage_class, ) from ocs_ci.ocs.ui.helpers_ui import ui_deployment_conditions from ocs_ci.utility.utils import get_az_count @@ -490,6 +489,7 @@ def do_deploy_rdr(self): dr_conf = self.get_rdr_conf() deploy_dr = get_multicluster_dr_deployment()(dr_conf) deploy_dr.deploy() + deploy_dr.enable def do_deploy_lvmo(self): """ @@ -3404,6 +3404,10 @@ def __init__(self, dr_conf): self.rbd = dr_conf.get("rbd_dr_scenario", False) # CephFS For future usecase self.cephfs = dr_conf.get("cephfs_dr_scenario", False) + self.thanos_yaml_file = os.path.join(constants.THANOS_PATH) + self.multiclusterobservability_file = os.path.join( + constants.MULTICLUSTEROBSERVABILITY_PATH + ) def deploy(self): """ @@ -3419,6 +3423,7 @@ def deploy(self): self.configure_mirror_peer() rbddops.deploy() self.deploy_dr_policy() + self.enable_acm_observability() # Enable cluster backup on both ACMs for i in acm_indexes: @@ -3451,6 +3456,18 @@ def deploy(self): else: self.enable_managed_serviceaccount() + retry(CommandFailed, tries=10, delay=30) + + def check_observability_status(self): + """ + Check observability status + Returns (bool): True or False + + """ + return run_cmd( + "oc get MultiClusterObservability observability -o jsonpath='{.status.conditions[1].status}'" + ) + @retry(ACMObservabilityNotEnabled, tries=10, delay=5, backoff=5) def thanos_secret(self): """ @@ -3461,14 +3478,18 @@ def thanos_secret(self): access_key = secret_dict["AWS"]["AWS_ACCESS_KEY_ID"] secret_key = secret_dict["AWS"]["AWS_SECRET_ACCESS_KEY"] thanos_secret_data = templating.load_yaml(self.thanos_yaml_file) - thanos_secret_data["stringData"]["thanos.yaml"][ + thanos_secret_data["stringData"]["thanos.yaml"]["config"][ "bucket" ] = self.build_bucket_name() - thanos_secret_data["stringData"]["thanos.yaml"][ + thanos_secret_data["stringData"]["thanos.yaml"]["config"][ "endpoint" ] = "https://s3.amazonaws.com" - thanos_secret_data["stringData"]["thanos.yaml"]["access_key"] = access_key - thanos_secret_data["stringData"]["thanos.yaml"]["secret_key"] = secret_key + thanos_secret_data["stringData"]["thanos.yaml"]["config"][ + "access_key" + ] = access_key + thanos_secret_data["stringData"]["thanos.yaml"]["config"][ + "secret_key" + ] = secret_key thanos_data_yaml = tempfile.NamedTemporaryFile( mode="w+", prefix="thanos", delete=False ) @@ -3477,38 +3498,51 @@ def thanos_secret(self): logger.info( "Creating thanos.yaml needed for ACM observability after passing required params" ) - run_cmd(f"oc create -f {THANOS_PATH}") - - logger.info("Allow some time for ACM Observability to be enabled") - time.sleep(120) + run_cmd(f"oc create -f {thanos_data_yaml.name}") - check_observability_status = run_cmd( - "oc get MultiClusterObservability observability -o jsonpath='{.status.conditions[1].status}'" - ) - if check_observability_status: + if self.check_observability_status(): logger.info("ACM observability is successfully enabled") else: - raise ACMObservabilityNotEnabled( - "ACM Observability is not enabled, status is False" - ) + logger.error("ACM observability is not enabled") def enable_acm_observability(self): """ Function to enable ACM observability for enabling DR monitoring dashboard for Regional DR on the RHACM console. """ + config.switch_acm_ctx() - logger.info("Enable ACM MultiClusterObservability") - run_cmd(f"oc create -f {MULTICLUSTEROBSERVABILITY_PATH}") + defaultstorageclass = get_default_storage_class() - logger.info("Whitelist RBD metrics and create configmap") - run_cmd(f"oc create -f {OBSERVABILITYMETRICSCONFIGMAP_PATH}") + logger.info( + "Enabling ACM MultiClusterObservability for DR monitoring dashboard" + ) + + # load multiclusterobservability.yaml + multiclusterobservability_yaml_data = templating.load_yaml( + self.multiclusterobservability_file + ) + multiclusterobservability_yaml_data["spec"]["storageConfig"][ + "storageClass" + ] = defaultstorageclass[0] + multiclusterobservability_data_yaml = tempfile.NamedTemporaryFile( + mode="w+", prefix="multiclusterobservability", delete=False + ) + templating.dump_data_to_temp_yaml( + multiclusterobservability_yaml_data, + multiclusterobservability_data_yaml.name, + ) - logger.info("Enable thanos secret yaml") + run_cmd(f"oc create -f {multiclusterobservability_data_yaml.name}") + + logger.info("Create thanos secret yaml") self.thanos_secret() + logger.info("Whitelist RBD metrics by creating configmap") + run_cmd(f"oc create -f {OBSERVABILITYMETRICSCONFIGMAP_PATH}") + logger.info( - "Add label for cluster-monitoring needed to fire VolumeSyncronizationDelayAlert" + "Add label for cluster-monitoring needed to fire VolumeSyncronizationDelayAlert on the Hub cluster" ) run_cmd( "oc label namespace openshift-operators openshift.io/cluster-monitoring='true'" diff --git a/ocs_ci/templates/DR/multiclusterobservability.yaml b/ocs_ci/templates/DR/multiclusterobservability.yaml index 9a9bb269e61..5537d28601e 100644 --- a/ocs_ci/templates/DR/multiclusterobservability.yaml +++ b/ocs_ci/templates/DR/multiclusterobservability.yaml @@ -18,5 +18,5 @@ spec: name: thanos-object-storage receiveStorageSize: 100Gi ruleStorageSize: 1Gi - storageClass: thin-csi-odf + storageClass: PLACEHOLDER storeStorageSize: 10Gi From 6a2991e7f86fb9236549d6bebc85083642231d12 Mon Sep 17 00:00:00 2001 From: am-agrawa Date: Fri, 17 May 2024 16:32:48 +0530 Subject: [PATCH 06/32] revert unwanted code changes Signed-off-by: am-agrawa --- ocs_ci/ocs/exceptions.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/ocs_ci/ocs/exceptions.py b/ocs_ci/ocs/exceptions.py index f00d5b7b1a5..2097566c9d8 100644 --- a/ocs_ci/ocs/exceptions.py +++ b/ocs_ci/ocs/exceptions.py @@ -696,6 +696,22 @@ class UsernameNotFoundException(Exception): pass +class MultiStorageClusterExternalCephHealth(Exception): + pass + + +class StorageSizeNotReflectedException(Exception): + pass + + +class ClusterNotInSTSModeException(Exception): + pass + + +class APIRequestError(Exception): + pass + + class ACMObservabilityNotEnabled(Exception): pass From f6f83ffe4e3da69b699a1e6c1082f602cc4083b7 Mon Sep 17 00:00:00 2001 From: am-agrawa Date: Fri, 17 May 2024 16:37:03 +0530 Subject: [PATCH 07/32] revert unwanted code changes-2 Signed-off-by: am-agrawa --- ocs_ci/deployment/deployment.py | 1 - ocs_ci/ocs/constants.py | 1 - 2 files changed, 2 deletions(-) diff --git a/ocs_ci/deployment/deployment.py b/ocs_ci/deployment/deployment.py index 3b89c2d5463..fc8e7b22524 100644 --- a/ocs_ci/deployment/deployment.py +++ b/ocs_ci/deployment/deployment.py @@ -489,7 +489,6 @@ def do_deploy_rdr(self): dr_conf = self.get_rdr_conf() deploy_dr = get_multicluster_dr_deployment()(dr_conf) deploy_dr.deploy() - deploy_dr.enable def do_deploy_lvmo(self): """ diff --git a/ocs_ci/ocs/constants.py b/ocs_ci/ocs/constants.py index 4a1486306c5..3d075d41ca9 100644 --- a/ocs_ci/ocs/constants.py +++ b/ocs_ci/ocs/constants.py @@ -2551,7 +2551,6 @@ "&rows_per_page=25&delta=1296000&contains=submariner-operator-bundle-container-v" ) SUBMARINER_BREW_REPO = "brew.registry.redhat.io/rh-osbs/iib" -SUBCTL_DOWNSTREAM_URL = "registry.redhat.io/rhacm2/" # Multicluster related From 4ec140212f82ac0966fb34ffbea93195821b29a8 Mon Sep 17 00:00:00 2001 From: am-agrawa Date: Wed, 29 May 2024 19:48:05 +0530 Subject: [PATCH 08/32] rebase on 29may24 Signed-off-by: am-agrawa --- ocs_ci/templates/DR/observability-metrics-configmap.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocs_ci/templates/DR/observability-metrics-configmap.yaml b/ocs_ci/templates/DR/observability-metrics-configmap.yaml index b3b911f8538..a1fa5ee5502 100644 --- a/ocs_ci/templates/DR/observability-metrics-configmap.yaml +++ b/ocs_ci/templates/DR/observability-metrics-configmap.yaml @@ -1,6 +1,6 @@ # This config file is allow data to reflect on the DR monitoring dashboard # by whitelisting ceph_rbd_* metrics -# Additionally we whitelist a few odf_* metrics but those are optional +# Additionally we whitelist a few odf_* metrics but those are optional. --- apiVersion: v1 From 4da9c251bd75ef1671070fd6f44ff629d4f05b81 Mon Sep 17 00:00:00 2001 From: am-agrawa Date: Wed, 29 May 2024 20:15:10 +0530 Subject: [PATCH 09/32] feedback fixes Signed-off-by: am-agrawa --- ocs_ci/ocs/constants.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ocs_ci/ocs/constants.py b/ocs_ci/ocs/constants.py index 3d075d41ca9..4a1486306c5 100644 --- a/ocs_ci/ocs/constants.py +++ b/ocs_ci/ocs/constants.py @@ -2551,6 +2551,7 @@ "&rows_per_page=25&delta=1296000&contains=submariner-operator-bundle-container-v" ) SUBMARINER_BREW_REPO = "brew.registry.redhat.io/rh-osbs/iib" +SUBCTL_DOWNSTREAM_URL = "registry.redhat.io/rhacm2/" # Multicluster related From bb7c4d3ec4da3fa6007651bf3ef71475ce2d278b Mon Sep 17 00:00:00 2001 From: am-agrawa Date: Wed, 29 May 2024 20:17:00 +0530 Subject: [PATCH 10/32] fix retry marker Signed-off-by: am-agrawa --- ocs_ci/deployment/deployment.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ocs_ci/deployment/deployment.py b/ocs_ci/deployment/deployment.py index fc8e7b22524..eaed9ed6d5e 100644 --- a/ocs_ci/deployment/deployment.py +++ b/ocs_ci/deployment/deployment.py @@ -3455,8 +3455,7 @@ def deploy(self): else: self.enable_managed_serviceaccount() - retry(CommandFailed, tries=10, delay=30) - + @retry(CommandFailed, tries=10, delay=30) def check_observability_status(self): """ Check observability status From 426fd3add0f85cfbc5134462755d05e0d30826e9 Mon Sep 17 00:00:00 2001 From: am-agrawa Date: Wed, 29 May 2024 23:51:54 +0530 Subject: [PATCH 11/32] import error correction Signed-off-by: am-agrawa --- ocs_ci/deployment/deployment.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/ocs_ci/deployment/deployment.py b/ocs_ci/deployment/deployment.py index eaed9ed6d5e..0f98b1ff227 100644 --- a/ocs_ci/deployment/deployment.py +++ b/ocs_ci/deployment/deployment.py @@ -43,9 +43,6 @@ get_lvm_full_version, check_cephcluster_status, ) -from ocs_ci.ocs.constants import ( - OBSERVABILITYMETRICSCONFIGMAP_PATH, -) from ocs_ci.ocs.exceptions import ( CephHealthException, ChannelNotFound, @@ -3537,7 +3534,7 @@ def enable_acm_observability(self): self.thanos_secret() logger.info("Whitelist RBD metrics by creating configmap") - run_cmd(f"oc create -f {OBSERVABILITYMETRICSCONFIGMAP_PATH}") + run_cmd(f"oc create -f {constants.OBSERVABILITYMETRICSCONFIGMAP_PATH}") logger.info( "Add label for cluster-monitoring needed to fire VolumeSyncronizationDelayAlert on the Hub cluster" From ab50d4c96294712f1b4393a7fd43fc543a491630 Mon Sep 17 00:00:00 2001 From: am-agrawa Date: Thu, 30 May 2024 15:32:47 +0530 Subject: [PATCH 12/32] increase timeout for gitops to be be ready Signed-off-by: am-agrawa --- ocs_ci/deployment/deployment.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ocs_ci/deployment/deployment.py b/ocs_ci/deployment/deployment.py index 0f98b1ff227..cb184b43379 100644 --- a/ocs_ci/deployment/deployment.py +++ b/ocs_ci/deployment/deployment.py @@ -253,8 +253,8 @@ def deploy_gitops_operator(self, switch_ctx=None): self.wait_for_subscription( constants.GITOPS_OPERATOR_NAME, namespace=constants.OPENSHIFT_OPERATORS ) - logger.info("Sleeping for 120 seconds after subscribing to GitOps Operator") - time.sleep(120) + logger.info("Sleeping for 180 seconds after subscribing to GitOps Operator") + time.sleep(180) subscriptions = ocp.OCP( kind=constants.SUBSCRIPTION_WITH_ACM, resource_name=constants.GITOPS_OPERATOR_NAME, From ff71c75935c16f5e30d2890fddeb08a05a190297 Mon Sep 17 00:00:00 2001 From: am-agrawa Date: Fri, 31 May 2024 13:55:18 +0530 Subject: [PATCH 13/32] add path in constants for yaml Signed-off-by: am-agrawa --- ocs_ci/ocs/constants.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/ocs_ci/ocs/constants.py b/ocs_ci/ocs/constants.py index 4a1486306c5..5f19f5fe1a7 100644 --- a/ocs_ci/ocs/constants.py +++ b/ocs_ci/ocs/constants.py @@ -2760,6 +2760,13 @@ CLUSTERROLEBINDING_APPSET_PULLMODEL_PATH = os.path.join( TEMPLATE_DIR, "DR", "clusterrolebinding_appset_pullmodel.yaml" ) +THANOS_PATH = os.path.join(TEMPLATE_DIR, "DR", "thanos.yaml") +MULTICLUSTEROBSERVABILITY_PATH = os.path.join( + TEMPLATE_DIR, "DR", "multiclusterobservability.yaml" +) +OBSERVABILITYMETRICSCONFIGMAP_PATH = os.path.join( + TEMPLATE_DIR, "DR", "observability-metrics-configmap.yaml" +) APPLICATION_SET = "ApplicationSet" PLACEMENT = "Placement" GITOPS_CLUSTER_NAMESPACE = "openshift-gitops" From 257de49568c10ebf5eb329643bc3ae85c81fd2e7 Mon Sep 17 00:00:00 2001 From: am-agrawa Date: Mon, 3 Jun 2024 13:51:58 +0530 Subject: [PATCH 14/32] validate_serviceexport before ocs-install-verification Signed-off-by: am-agrawa --- ocs_ci/deployment/deployment.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ocs_ci/deployment/deployment.py b/ocs_ci/deployment/deployment.py index cb184b43379..7d53e98e30e 100644 --- a/ocs_ci/deployment/deployment.py +++ b/ocs_ci/deployment/deployment.py @@ -93,6 +93,7 @@ setup_ceph_debug, get_osd_count, StorageCluster, + validate_serviceexport, ) from ocs_ci.ocs.uninstall import uninstall_ocs from ocs_ci.ocs.utils import ( @@ -416,6 +417,7 @@ def do_deploy_ocs(self): .get("multiClusterService") .get("enabled") ), "Failed to update StorageCluster globalnet" + validate_serviceexport() ocs_install_verification( timeout=2000, ocs_registry_image=ocs_registry_image ) From 55544b68864bd09720ae3f40559617360292338e Mon Sep 17 00:00:00 2001 From: am-agrawa Date: Tue, 4 Jun 2024 23:00:32 +0530 Subject: [PATCH 15/32] insatll oadp first, enable obs. before policy creation Signed-off-by: am-agrawa --- ocs_ci/deployment/deployment.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ocs_ci/deployment/deployment.py b/ocs_ci/deployment/deployment.py index 7d53e98e30e..34187ae002e 100644 --- a/ocs_ci/deployment/deployment.py +++ b/ocs_ci/deployment/deployment.py @@ -669,8 +669,8 @@ def deploy_cluster(self, log_cli_level="DEBUG"): self.do_deploy_lvmo() self.do_deploy_submariner() self.do_gitops_deploy() - self.do_deploy_ocs() self.do_deploy_oadp() + self.do_deploy_ocs() self.do_deploy_rdr() self.do_deploy_fusion() self.do_deploy_odf_provider_mode() @@ -3420,8 +3420,8 @@ def deploy(self): rbddops = RBDDRDeployOps() self.configure_mirror_peer() rbddops.deploy() - self.deploy_dr_policy() self.enable_acm_observability() + self.deploy_dr_policy() # Enable cluster backup on both ACMs for i in acm_indexes: From f6b89daa8024c02c114a896db94f82c2157031ff Mon Sep 17 00:00:00 2001 From: am-agrawa Date: Tue, 4 Jun 2024 23:02:25 +0530 Subject: [PATCH 16/32] rebase, 4th june24 Signed-off-by: am-agrawa --- ocs_ci/deployment/deployment.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocs_ci/deployment/deployment.py b/ocs_ci/deployment/deployment.py index 34187ae002e..0fd523d3784 100644 --- a/ocs_ci/deployment/deployment.py +++ b/ocs_ci/deployment/deployment.py @@ -3500,7 +3500,7 @@ def thanos_secret(self): if self.check_observability_status(): logger.info("ACM observability is successfully enabled") else: - logger.error("ACM observability is not enabled") + logger.error("ACM observability could not be enabled") def enable_acm_observability(self): """ From 1b749285214b070fc800e3c7345af0e2d88f884b Mon Sep 17 00:00:00 2001 From: am-agrawa Date: Wed, 5 Jun 2024 12:47:30 +0530 Subject: [PATCH 17/32] increase serviceexport time Signed-off-by: am-agrawa --- ocs_ci/ocs/resources/storage_cluster.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocs_ci/ocs/resources/storage_cluster.py b/ocs_ci/ocs/resources/storage_cluster.py index 880c853c450..46198648033 100644 --- a/ocs_ci/ocs/resources/storage_cluster.py +++ b/ocs_ci/ocs/resources/storage_cluster.py @@ -2675,7 +2675,7 @@ def patch_storage_cluster_for_custom_storage_class( return False -@retry(AssertionError, 50, 10, 1) +@retry(AssertionError, 50, 20, 5) def validate_serviceexport(): """ validate the serviceexport resource From 8c0f228a30df9a8a7d1f7f721093c52afe1b07e4 Mon Sep 17 00:00:00 2001 From: am-agrawa Date: Wed, 5 Jun 2024 18:37:25 +0530 Subject: [PATCH 18/32] add retry to blockpool status check Signed-off-by: am-agrawa --- ocs_ci/deployment/deployment.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ocs_ci/deployment/deployment.py b/ocs_ci/deployment/deployment.py index 0fd523d3784..514781cbcae 100644 --- a/ocs_ci/deployment/deployment.py +++ b/ocs_ci/deployment/deployment.py @@ -2604,6 +2604,7 @@ class RBDDRDeployOps(object): def deploy(self): self.configure_rbd() + @retry(ResourceWrongStatusException, tries=10, delay=5) def configure_rbd(self): st_string = '{.items[?(@.metadata.ownerReferences[*].kind=="StorageCluster")].spec.mirroring.enabled}' query_mirroring = ( From f980e6547b870eb51d7b8f90c843b354223c9161 Mon Sep 17 00:00:00 2001 From: am-agrawa Date: Thu, 18 Jul 2024 20:54:15 +0530 Subject: [PATCH 19/32] rebase on 18july24 Signed-off-by: am-agrawa --- ocs_ci/deployment/deployment.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ocs_ci/deployment/deployment.py b/ocs_ci/deployment/deployment.py index 514781cbcae..7dcca13448c 100644 --- a/ocs_ci/deployment/deployment.py +++ b/ocs_ci/deployment/deployment.py @@ -57,7 +57,6 @@ UnexpectedDeploymentConfiguration, ResourceNotFoundError, ACMClusterConfigurationException, - MDRDeploymentException, ACMObservabilityNotEnabled, ) from ocs_ci.deployment.cert_manager import deploy_cert_manager @@ -3224,7 +3223,7 @@ def validate_dpa(self): resource = backupstorage.get() if resource["status"].get("phase") != "Available": raise ACMClusterConfigurationException( - "Backupstoragelocation resource is not in 'Avaialble' phase" + "Backupstoragelocation resource is not in 'Available' phase" ) logger.info("Dataprotection application successful") From fe283161a3e1d307593f54869265cbfbf917caf1 Mon Sep 17 00:00:00 2001 From: am-agrawa Date: Thu, 18 Jul 2024 20:56:59 +0530 Subject: [PATCH 20/32] rebase on 18july24 Signed-off-by: am-agrawa --- ocs_ci/deployment/deployment.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocs_ci/deployment/deployment.py b/ocs_ci/deployment/deployment.py index 7dcca13448c..abc606e4be2 100644 --- a/ocs_ci/deployment/deployment.py +++ b/ocs_ci/deployment/deployment.py @@ -3214,7 +3214,7 @@ def validate_dpa(self): if veleropod[0]["status"]["phase"] != "Running": raise ACMClusterConfigurationException("Velero pod not in 'Running' phase") - # Check backupstoragelocation resource in "Available" phase + # Check backupstoragelocation resource is in "Available" phase backupstorage = ocp.OCP( kind="BackupStorageLocation", resource_name="default", From c9fc357e386e85c5b58471cf317758bff5e5bc5b Mon Sep 17 00:00:00 2001 From: Aman Agrawal Date: Thu, 18 Jul 2024 23:24:46 +0530 Subject: [PATCH 21/32] new rebase on 18july24 Signed-off-by: am-agrawa --- ocs_ci/deployment/deployment.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocs_ci/deployment/deployment.py b/ocs_ci/deployment/deployment.py index abc606e4be2..83019a0e547 100644 --- a/ocs_ci/deployment/deployment.py +++ b/ocs_ci/deployment/deployment.py @@ -3214,7 +3214,7 @@ def validate_dpa(self): if veleropod[0]["status"]["phase"] != "Running": raise ACMClusterConfigurationException("Velero pod not in 'Running' phase") - # Check backupstoragelocation resource is in "Available" phase + # Check backupstoragelocation resource is in "Available" phase or not backupstorage = ocp.OCP( kind="BackupStorageLocation", resource_name="default", From 73d861551e52cc79035287d9fa0ff4ba0003fde9 Mon Sep 17 00:00:00 2001 From: Aman Agrawal Date: Thu, 18 Jul 2024 23:29:28 +0530 Subject: [PATCH 22/32] re-try Signed-off-by: am-agrawa --- ocs_ci/deployment/deployment.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocs_ci/deployment/deployment.py b/ocs_ci/deployment/deployment.py index 83019a0e547..abc606e4be2 100644 --- a/ocs_ci/deployment/deployment.py +++ b/ocs_ci/deployment/deployment.py @@ -3214,7 +3214,7 @@ def validate_dpa(self): if veleropod[0]["status"]["phase"] != "Running": raise ACMClusterConfigurationException("Velero pod not in 'Running' phase") - # Check backupstoragelocation resource is in "Available" phase or not + # Check backupstoragelocation resource is in "Available" phase backupstorage = ocp.OCP( kind="BackupStorageLocation", resource_name="default", From 493b59dc1cb261258bd90e96e901607ef93b5215 Mon Sep 17 00:00:00 2001 From: am-agrawa Date: Fri, 19 Jul 2024 00:38:10 +0530 Subject: [PATCH 23/32] flake8 issue Signed-off-by: am-agrawa --- ocs_ci/helpers/dr_helpers.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/ocs_ci/helpers/dr_helpers.py b/ocs_ci/helpers/dr_helpers.py index fbe483f17c5..21d36bf18a6 100644 --- a/ocs_ci/helpers/dr_helpers.py +++ b/ocs_ci/helpers/dr_helpers.py @@ -5,7 +5,6 @@ import json import logging import tempfile -import boto3 from ocs_ci.framework import config from ocs_ci.ocs import constants, ocp @@ -31,7 +30,6 @@ CommandFailed, run_cmd, ) -from botocore.exceptions import BotoCoreError logger = logging.getLogger(__name__) From 9a6dc61dcdea9db47f2c667daca3fcf5fcf096b5 Mon Sep 17 00:00:00 2001 From: am-agrawa Date: Tue, 23 Jul 2024 13:10:08 +0530 Subject: [PATCH 24/32] rebase on 23july24 Signed-off-by: am-agrawa --- ocs_ci/deployment/deployment.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ocs_ci/deployment/deployment.py b/ocs_ci/deployment/deployment.py index abc606e4be2..e35e64103aa 100644 --- a/ocs_ci/deployment/deployment.py +++ b/ocs_ci/deployment/deployment.py @@ -253,8 +253,8 @@ def deploy_gitops_operator(self, switch_ctx=None): self.wait_for_subscription( constants.GITOPS_OPERATOR_NAME, namespace=constants.OPENSHIFT_OPERATORS ) - logger.info("Sleeping for 180 seconds after subscribing to GitOps Operator") - time.sleep(180) + logger.info("Sleeping for 120 seconds after subscribing to GitOps Operator") + time.sleep(120) subscriptions = ocp.OCP( kind=constants.SUBSCRIPTION_WITH_ACM, resource_name=constants.GITOPS_OPERATOR_NAME, From e5e2d1c2c2c1554ff00146304fcdf65a89373823 Mon Sep 17 00:00:00 2001 From: am-agrawa Date: Tue, 23 Jul 2024 17:59:55 +0530 Subject: [PATCH 25/32] code fixes Signed-off-by: am-agrawa --- ocs_ci/deployment/deployment.py | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/ocs_ci/deployment/deployment.py b/ocs_ci/deployment/deployment.py index e35e64103aa..85b94c71362 100644 --- a/ocs_ci/deployment/deployment.py +++ b/ocs_ci/deployment/deployment.py @@ -3402,10 +3402,6 @@ def __init__(self, dr_conf): self.rbd = dr_conf.get("rbd_dr_scenario", False) # CephFS For future usecase self.cephfs = dr_conf.get("cephfs_dr_scenario", False) - self.thanos_yaml_file = os.path.join(constants.THANOS_PATH) - self.multiclusterobservability_file = os.path.join( - constants.MULTICLUSTEROBSERVABILITY_PATH - ) def deploy(self): """ @@ -3471,10 +3467,8 @@ def thanos_secret(self): Create thanos secret yaml by using Noobaa or AWS bucket (AWS bucket is used in this function) """ - secret_dict = load_auth_config().get("AUTH", {}) - access_key = secret_dict["AWS"]["AWS_ACCESS_KEY_ID"] - secret_key = secret_dict["AWS"]["AWS_SECRET_ACCESS_KEY"] - thanos_secret_data = templating.load_yaml(self.thanos_yaml_file) + self.meta_obj.get_meta_access_secret_keys() + thanos_secret_data = templating.load_yaml(constants.THANOS_PATH) thanos_secret_data["stringData"]["thanos.yaml"]["config"][ "bucket" ] = self.build_bucket_name() @@ -3483,10 +3477,10 @@ def thanos_secret(self): ] = "https://s3.amazonaws.com" thanos_secret_data["stringData"]["thanos.yaml"]["config"][ "access_key" - ] = access_key + ] = self.meta_obj.access_key thanos_secret_data["stringData"]["thanos.yaml"]["config"][ "secret_key" - ] = secret_key + ] = self.meta_obj.secret_key thanos_data_yaml = tempfile.NamedTemporaryFile( mode="w+", prefix="thanos", delete=False ) @@ -3517,7 +3511,7 @@ def enable_acm_observability(self): # load multiclusterobservability.yaml multiclusterobservability_yaml_data = templating.load_yaml( - self.multiclusterobservability_file + constants.MULTICLUSTEROBSERVABILITY_PATH ) multiclusterobservability_yaml_data["spec"]["storageConfig"][ "storageClass" From 019af84f9a222e5fc2d060ada578060f4f48414c Mon Sep 17 00:00:00 2001 From: am-agrawa Date: Tue, 23 Jul 2024 22:03:37 +0530 Subject: [PATCH 26/32] code fixes after local run Signed-off-by: am-agrawa --- ocs_ci/deployment/deployment.py | 51 +++++++++++-------- .../DR/observability-metrics-configmap.yaml | 1 + 2 files changed, 31 insertions(+), 21 deletions(-) diff --git a/ocs_ci/deployment/deployment.py b/ocs_ci/deployment/deployment.py index 85b94c71362..a928bd7a2f2 100644 --- a/ocs_ci/deployment/deployment.py +++ b/ocs_ci/deployment/deployment.py @@ -157,7 +157,6 @@ from ocs_ci.utility.ibmcloud import run_ibmcloud_cmd from ocs_ci.deployment.cnv import CNVInstaller - logger = logging.getLogger(__name__) @@ -3450,37 +3449,50 @@ def deploy(self): else: self.enable_managed_serviceaccount() - @retry(CommandFailed, tries=10, delay=30) + @retry(ACMObservabilityNotEnabled, tries=10, delay=30) def check_observability_status(self): """ Check observability status Returns (bool): True or False """ - return run_cmd( - "oc get MultiClusterObservability observability -o jsonpath='{.status.conditions[1].status}'" + + acm_observability_status = bool( + run_cmd( + "oc get MultiClusterObservability observability -o jsonpath='{.status.conditions[1].status}'" + ) ) - @retry(ACMObservabilityNotEnabled, tries=10, delay=5, backoff=5) + if acm_observability_status: + logger.info("ACM observability is successfully enabled") + else: + logger.error("ACM observability could not be enabled, re-trying...") + raise ACMObservabilityNotEnabled + def thanos_secret(self): """ Create thanos secret yaml by using Noobaa or AWS bucket (AWS bucket is used in this function) """ + acm_indexes = get_all_acm_indexes() self.meta_obj.get_meta_access_secret_keys() thanos_secret_data = templating.load_yaml(constants.THANOS_PATH) - thanos_secret_data["stringData"]["thanos.yaml"]["config"][ - "bucket" - ] = self.build_bucket_name() - thanos_secret_data["stringData"]["thanos.yaml"]["config"][ - "endpoint" - ] = "https://s3.amazonaws.com" - thanos_secret_data["stringData"]["thanos.yaml"]["config"][ - "access_key" - ] = self.meta_obj.access_key - thanos_secret_data["stringData"]["thanos.yaml"]["config"][ - "secret_key" - ] = self.meta_obj.secret_key + thanos_bucket_name = ( + f"dr-thanos-bucket-{config.clusters[0].ENV_DATA['cluster_name']}" + ) + self.create_s3_bucket( + self.meta_obj.access_key, + self.meta_obj.secret_key, + thanos_bucket_name, + ) + logger.info(f"ACM indexes {acm_indexes}") + navigate_thanos_yaml = thanos_secret_data["stringData"]["thanos.yaml"] + navigate_thanos_yaml = yaml.safe_load(navigate_thanos_yaml) + navigate_thanos_yaml["config"]["bucket"] = thanos_bucket_name + navigate_thanos_yaml["config"]["endpoint"] = "s3.amazonaws.com" + navigate_thanos_yaml["config"]["access_key"] = self.meta_obj.access_key + navigate_thanos_yaml["config"]["secret_key"] = self.meta_obj.secret_key + thanos_secret_data["stringData"]["thanos.yaml"] = str(navigate_thanos_yaml) thanos_data_yaml = tempfile.NamedTemporaryFile( mode="w+", prefix="thanos", delete=False ) @@ -3491,10 +3503,7 @@ def thanos_secret(self): ) run_cmd(f"oc create -f {thanos_data_yaml.name}") - if self.check_observability_status(): - logger.info("ACM observability is successfully enabled") - else: - logger.error("ACM observability could not be enabled") + self.check_observability_status() def enable_acm_observability(self): """ diff --git a/ocs_ci/templates/DR/observability-metrics-configmap.yaml b/ocs_ci/templates/DR/observability-metrics-configmap.yaml index a1fa5ee5502..aae9c543533 100644 --- a/ocs_ci/templates/DR/observability-metrics-configmap.yaml +++ b/ocs_ci/templates/DR/observability-metrics-configmap.yaml @@ -4,6 +4,7 @@ --- apiVersion: v1 +kind: ConfigMap metadata: name: mp-custom-allowlist namespace: open-cluster-management-observability From ca12f5ad75f10e88315164e8efc2980dfcee77fa Mon Sep 17 00:00:00 2001 From: am-agrawa Date: Tue, 30 Jul 2024 22:22:08 +0530 Subject: [PATCH 27/32] fix stale element Signed-off-by: am-agrawa --- ocs_ci/ocs/acm/acm.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/ocs_ci/ocs/acm/acm.py b/ocs_ci/ocs/acm/acm.py index 413e0d2b2b8..8c154f01d59 100644 --- a/ocs_ci/ocs/acm/acm.py +++ b/ocs_ci/ocs/acm/acm.py @@ -7,7 +7,11 @@ from selenium.webdriver.support.wait import WebDriverWait from selenium.webdriver.support import expected_conditions as ec from selenium.webdriver.common.by import By -from selenium.common.exceptions import NoSuchElementException +from selenium.common.exceptions import ( + NoSuchElementException, + StaleElementReferenceException, + TimeoutException, +) from ocs_ci.helpers.helpers import create_unique_resource_name from ocs_ci.ocs import constants from ocs_ci.ocs.acm.acm_constants import ( @@ -43,6 +47,7 @@ from ocs_ci.utility import templating from ocs_ci.ocs.resources.ocs import OCS from ocs_ci.helpers.helpers import create_project +from ocs_ci.utility.retry import retry log = logging.getLogger(__name__) @@ -56,6 +61,7 @@ class AcmAddClusters(AcmPageNavigator): def __init__(self): super().__init__() self.page_nav = self.acm_page_nav + self.driver = SeleniumDriver() def import_cluster_ui(self, cluster_name, kubeconfig_location): """ @@ -234,7 +240,9 @@ def install_submariner_ui(self, globalnet=True): log.info("Click on 'Submariner add-ons' tab") self.do_click(self.page_nav["submariner-tab"]) log.info("Click on 'Install Submariner add-ons' button") - self.do_click(self.page_nav["install-submariner-btn"], timeout=120) + retry((StaleElementReferenceException, TimeoutException), retries=5, delay=10)( + self.do_click + )(self.page_nav["install-submariner-btn"], enable_screenshot=True) log.info("Click on 'Target clusters'") self.do_click(self.page_nav["target-clusters"]) log.info(f"Select 1st cluster which is {cluster_name_a}") From 73d02b5bb3b61ca4f59f38ec26bf3421f3f1eee7 Mon Sep 17 00:00:00 2001 From: am-agrawa Date: Wed, 31 Jul 2024 13:09:08 +0530 Subject: [PATCH 28/32] revert all changes, mark stale to true in do_click Signed-off-by: am-agrawa --- ocs_ci/ocs/acm/acm.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/ocs_ci/ocs/acm/acm.py b/ocs_ci/ocs/acm/acm.py index 8c154f01d59..24fb6a6c513 100644 --- a/ocs_ci/ocs/acm/acm.py +++ b/ocs_ci/ocs/acm/acm.py @@ -9,8 +9,6 @@ from selenium.webdriver.common.by import By from selenium.common.exceptions import ( NoSuchElementException, - StaleElementReferenceException, - TimeoutException, ) from ocs_ci.helpers.helpers import create_unique_resource_name from ocs_ci.ocs import constants @@ -47,7 +45,6 @@ from ocs_ci.utility import templating from ocs_ci.ocs.resources.ocs import OCS from ocs_ci.helpers.helpers import create_project -from ocs_ci.utility.retry import retry log = logging.getLogger(__name__) @@ -240,9 +237,11 @@ def install_submariner_ui(self, globalnet=True): log.info("Click on 'Submariner add-ons' tab") self.do_click(self.page_nav["submariner-tab"]) log.info("Click on 'Install Submariner add-ons' button") - retry((StaleElementReferenceException, TimeoutException), retries=5, delay=10)( - self.do_click - )(self.page_nav["install-submariner-btn"], enable_screenshot=True) + self.do_click( + self.page_nav["install-submariner-btn"], + enable_screenshot=True, + avoid_stale=True, + ) log.info("Click on 'Target clusters'") self.do_click(self.page_nav["target-clusters"]) log.info(f"Select 1st cluster which is {cluster_name_a}") From 9aca8861953d4bc003ab2a5febee11c7e4c9eccd Mon Sep 17 00:00:00 2001 From: am-agrawa Date: Fri, 2 Aug 2024 14:52:07 +0530 Subject: [PATCH 29/32] another stale element fix Signed-off-by: am-agrawa --- ocs_ci/ocs/ui/acm_ui.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/ocs_ci/ocs/ui/acm_ui.py b/ocs_ci/ocs/ui/acm_ui.py index 1f9c79041e5..72921130d2d 100644 --- a/ocs_ci/ocs/ui/acm_ui.py +++ b/ocs_ci/ocs/ui/acm_ui.py @@ -82,7 +82,12 @@ def navigate_clusters_page(self, timeout=120): self.choose_expanded_mode( mode=True, locator=self.acm_page_nav["Infrastructure"] ) - self.do_click(locator=self.acm_page_nav["Clusters_page"], timeout=timeout) + self.do_click( + locator=self.acm_page_nav["Clusters_page"], + timeout=timeout, + enable_screenshot=True, + avoid_stale=True, + ) def navigate_bare_metal_assets_page(self): """ @@ -810,7 +815,7 @@ def fill_network_info(self): left_shift_offset = len(remote_text) - index self.do_send_keys( self.acm_page_nav["cc_vsphere_network_name"], - f"{left_shift_offset*Keys.ARROW_LEFT}{constants.SPACE}", + f"{left_shift_offset * Keys.ARROW_LEFT}{constants.SPACE}", ) except ValueError: raise ACMClusterDeployException( From fabb65b9861963e9a0b860e030b7717d8f3ee095 Mon Sep 17 00:00:00 2001 From: am-agrawa Date: Mon, 5 Aug 2024 23:48:42 +0530 Subject: [PATCH 30/32] feedback cmt Signed-off-by: am-agrawa --- ocs_ci/deployment/deployment.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ocs_ci/deployment/deployment.py b/ocs_ci/deployment/deployment.py index a928bd7a2f2..f4b3d8a4145 100644 --- a/ocs_ci/deployment/deployment.py +++ b/ocs_ci/deployment/deployment.py @@ -3453,7 +3453,8 @@ def deploy(self): def check_observability_status(self): """ Check observability status - Returns (bool): True or False + + raises ACMObservabilityNotEnabled: if the cmd returns False, ACM observability is not enabled """ From 6ab059811462191289ca0b6f0b06b831052dd3e7 Mon Sep 17 00:00:00 2001 From: am-agrawa Date: Tue, 6 Aug 2024 13:40:55 +0530 Subject: [PATCH 31/32] replace run_cmd to exec_cmd Signed-off-by: am-agrawa --- ocs_ci/deployment/deployment.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/ocs_ci/deployment/deployment.py b/ocs_ci/deployment/deployment.py index f4b3d8a4145..67155c3b20c 100644 --- a/ocs_ci/deployment/deployment.py +++ b/ocs_ci/deployment/deployment.py @@ -333,7 +333,7 @@ def do_gitops_deploy(self): for cluster in managed_clusters: if cluster["metadata"]["name"] != constants.ACM_LOCAL_CLUSTER: config.switch_to_cluster_by_name(cluster["metadata"]["name"]) - run_cmd( + exec_cmd( f"oc create -f {constants.CLUSTERROLEBINDING_APPSET_PULLMODEL_PATH}" ) @@ -3459,7 +3459,7 @@ def check_observability_status(self): """ acm_observability_status = bool( - run_cmd( + exec_cmd( "oc get MultiClusterObservability observability -o jsonpath='{.status.conditions[1].status}'" ) ) @@ -3502,7 +3502,7 @@ def thanos_secret(self): logger.info( "Creating thanos.yaml needed for ACM observability after passing required params" ) - run_cmd(f"oc create -f {thanos_data_yaml.name}") + exec_cmd(f"oc create -f {thanos_data_yaml.name}") self.check_observability_status() @@ -3534,18 +3534,18 @@ def enable_acm_observability(self): multiclusterobservability_data_yaml.name, ) - run_cmd(f"oc create -f {multiclusterobservability_data_yaml.name}") + exec_cmd(f"oc create -f {multiclusterobservability_data_yaml.name}") logger.info("Create thanos secret yaml") self.thanos_secret() logger.info("Whitelist RBD metrics by creating configmap") - run_cmd(f"oc create -f {constants.OBSERVABILITYMETRICSCONFIGMAP_PATH}") + exec_cmd(f"oc create -f {constants.OBSERVABILITYMETRICSCONFIGMAP_PATH}") logger.info( "Add label for cluster-monitoring needed to fire VolumeSyncronizationDelayAlert on the Hub cluster" ) - run_cmd( + exec_cmd( "oc label namespace openshift-operators openshift.io/cluster-monitoring='true'" ) From ba6ab917b6c6331456b0c79c914951fd59535b21 Mon Sep 17 00:00:00 2001 From: am-agrawa Date: Tue, 6 Aug 2024 14:10:05 +0530 Subject: [PATCH 32/32] doctext formatting Signed-off-by: am-agrawa --- ocs_ci/deployment/deployment.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ocs_ci/deployment/deployment.py b/ocs_ci/deployment/deployment.py index 67155c3b20c..2cbf2e7cf70 100644 --- a/ocs_ci/deployment/deployment.py +++ b/ocs_ci/deployment/deployment.py @@ -3454,7 +3454,8 @@ def check_observability_status(self): """ Check observability status - raises ACMObservabilityNotEnabled: if the cmd returns False, ACM observability is not enabled + Raises: + ACMObservabilityNotEnabled: if the cmd returns False, ACM observability is not enabled """