Skip to content

Commit

Permalink
Test resize OSD when one of the worker nodes got restarted or one of …
Browse files Browse the repository at this point in the history
…the resources got deleted in the middle of the process (#9765)


Signed-off-by: Itzhak Kave <ikave@ibm.com>
Co-authored-by: Itzhak Kave <ikave@ibm.com>
  • Loading branch information
yitzhak12 and Itzhak Kave authored May 13, 2024
1 parent 0d1697f commit 86bb916
Show file tree
Hide file tree
Showing 3 changed files with 99 additions and 9 deletions.
22 changes: 22 additions & 0 deletions ocs_ci/helpers/disruption_helpers.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import logging
import os
import random

from ocs_ci.ocs.resources import pod
from ocs_ci.ocs import constants, ocp
Expand Down Expand Up @@ -267,3 +268,24 @@ def check_new_pid(self, node_name=None):
raise TimeoutExpiredError(
f"Waiting for pid of ceph-{self.resource} in {node_name}"
)


def delete_resource_multiple_times(resource_name, num_of_iterations):
"""
Delete a specific resource(osd, rook-operator, mon, etc,.) multiple times.
Args:
resource_name (str): The resource name to delete
num_of_iterations (int): The number of iterations we delete the resource
"""
d = Disruptions()
d.set_resource(resource_name)
resource_id = random.randrange(d.resource_count)

for i in range(num_of_iterations):
log.info(
f"Iteration {i}: Delete resource {resource_name} with id {resource_id}"
)
d.set_resource(resource_name)
d.delete_resource(resource_id)
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
verify_storage_device_class,
verify_device_class_in_osd_tree,
get_deviceset_count,
resize_osd,
)
from ocs_ci.ocs.cluster import check_ceph_osd_tree, CephCluster
from ocs_ci.utility.utils import ceph_health_check, TimeoutSampler, convert_device_size
Expand Down Expand Up @@ -374,3 +375,23 @@ def update_resize_osd_count(old_storage_size):
config.RUN["resize_osd_count"] = config.RUN.get("resize_osd_count", 0) + 1
else:
logger.warning("The osd size has not increased")


def basic_resize_osd(old_storage_size):
"""
The function perform the basic resize osd scenario. It increases the osd size by multiply 2
Args:
old_storage_size (str): The old storagecluster storage size(which represent the old osd size)
Returns:
str: The new storage size after increasing the osd size
"""
logger.info(f"The current osd size is {old_storage_size}")
size = int(old_storage_size[0:-2])
size_type = old_storage_size[-2:]
new_storage_size = f"{size * 2}{size_type}"
logger.info(f"Increase the osd size to {new_storage_size}")
resize_osd(new_storage_size)
return new_storage_size
65 changes: 56 additions & 9 deletions tests/functional/z_cluster/cluster_expansion/test_resize_osd.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,22 +18,28 @@
ignore_leftovers,
ManageTest,
tier1,
tier4b,
tier4c,
)
from ocs_ci.ocs.constants import VOLUME_MODE_BLOCK
from ocs_ci.ocs.resources.osd_resize import (
from ocs_ci.ocs.constants import VOLUME_MODE_BLOCK, OSD, ROOK_OPERATOR, MON_DAEMON
from ocs_ci.helpers.osd_resize import (
ceph_verification_steps_post_resize_osd,
check_ceph_health_after_resize_osd,
check_resize_osd_pre_conditions,
update_resize_osd_count,
basic_resize_osd,
)
from ocs_ci.ocs.resources.pod import (
get_osd_pods,
calculate_md5sum_of_pod_files,
verify_md5sum_on_pod_files,
)
from ocs_ci.ocs.resources.pvc import get_deviceset_pvcs, get_deviceset_pvs
from ocs_ci.ocs.resources.storage_cluster import resize_osd, get_storage_size
from ocs_ci.ocs.resources.storage_cluster import get_storage_size
from ocs_ci.helpers.sanity_helpers import Sanity
from ocs_ci.ocs.node import get_nodes, wait_for_nodes_status
from ocs_ci.ocs.cluster import is_vsphere_ipi_cluster
from ocs_ci.helpers.disruption_helpers import delete_resource_multiple_times


logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -160,12 +166,53 @@ def test_resize_osd(self):
Test resize OSD
"""
self.prepare_data_before_resize_osd()
self.new_storage_size = basic_resize_osd(self.old_storage_size)
self.verification_steps_post_resize_osd()

@tier4b
@polarion_id("OCS-5780")
def test_resize_osd_with_node_restart(self, nodes):
"""
Test resize OSD when one of the worker nodes got restarted in the middle of the process
logger.info(f"The current osd size is {self.old_storage_size}")
size = int(self.old_storage_size[0:-2])
size_type = self.old_storage_size[-2:]
self.new_storage_size = f"{size * 2}{size_type}"
logger.info(f"Increase the osd size to {self.new_storage_size}")
resize_osd(self.new_storage_size)
"""
self.prepare_data_before_resize_osd()
self.new_storage_size = basic_resize_osd(self.old_storage_size)
# Restart one of the worker nodes while additional storage is being added
wnode = random.choice(get_nodes())
logger.info(f"Restart the worker node: {wnode.name}")
if is_vsphere_ipi_cluster():
nodes.restart_nodes(nodes=[wnode], wait=False)
wait_for_nodes_status(node_names=[wnode], timeout=300)
else:
nodes.restart_nodes(nodes=[wnode], wait=True)

self.verification_steps_post_resize_osd()

@tier4c
@pytest.mark.parametrize(
argnames=["resource_name", "num_of_iterations"],
argvalues=[
pytest.param(
*[OSD, 3],
marks=pytest.mark.polarion_id("OCS-5781"),
),
pytest.param(
*[ROOK_OPERATOR, 3],
marks=pytest.mark.polarion_id("OCS-5782"),
),
pytest.param(
*[MON_DAEMON, 5],
marks=pytest.mark.polarion_id("OCS-5783"),
),
],
)
def test_resize_osd_with_resource_delete(self, resource_name, num_of_iterations):
"""
Test resize OSD when one of the resources got deleted in the middle of the process
"""
self.prepare_data_before_resize_osd()
self.new_storage_size = basic_resize_osd(self.old_storage_size)
delete_resource_multiple_times(resource_name, num_of_iterations)
self.verification_steps_post_resize_osd()

0 comments on commit 86bb916

Please sign in to comment.