Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix test_change_client_ocs_version_and_stop_heartbeat test #9395

Merged
merged 9 commits into from
May 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions ocs_ci/ocs/resources/storageconsumer.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,8 @@ def set_ocs_version(self, version):
+ "'",
"--subresource",
"status",
"--namespace",
config.cluster_ctx.ENV_DATA["cluster_namespace"],
]
exec_cmd(" ".join(cmd))

Expand Down
47 changes: 18 additions & 29 deletions ocs_ci/utility/prometheus.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,38 +41,27 @@ def check_alert_list(
target_alerts = [
alert for alert in alerts if alert.get("labels").get("alertname") == label
]

logger.info(f"Checking properties of found {label} alerts")
if ignore_more_occurences:
for state in states:
delete = False
for key, alert in reversed(list(enumerate(target_alerts))):
if alert.get("state") == state:
if delete:
d_msg = f"Ignoring {alert} as alert already appeared."
logger.debug(d_msg)
target_alerts.pop(key)
else:
delete = True
assert_msg = (
f"Incorrect number of {label} alerts ({len(target_alerts)} "
f"instead of {len(states)} with states: {states})."
f"\nAlerts: {target_alerts}"
)
assert len(target_alerts) == len(states), assert_msg

for key, state in enumerate(states):

assert_msg = "Alert message for alert {label} is not correct"
assert target_alerts[key]["annotations"]["message"] == msg, assert_msg

assert_msg = f"Alert {label} doesn't have {severity} severity"
assert (
target_alerts[key]["annotations"]["severity_level"] == severity
), assert_msg

assert_msg = f"Alert {label} is not in {state} state"
assert target_alerts[key]["state"] == state, assert_msg
target_alerts = [
alert
for alert in target_alerts
if alert["annotations"]["message"] == msg
and alert["annotations"]["severity_level"] == severity
and alert["state"] == state
]
assert_msg = (
f"There was not found alert {label} with message: {msg}, "
f"severity: {severity} in state: {state}"
)
assert target_alerts, assert_msg
if not ignore_more_occurences:
assert_msg = (
f"There are multiple instances of alert {label} with "
f"message: {msg}, severity: {severity} in state: {state}"
)
assert len(target_alerts) == 1, assert_msg

logger.info("Alerts were triggered correctly during utilization")

Expand Down
2 changes: 1 addition & 1 deletion tests/functional/monitoring/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -1177,7 +1177,7 @@ def change_client_version():
nonlocal client
nonlocal original_cluster
# run_time of operation
run_time = 60 * 3
run_time = 60 * 7
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@fbalak why is this value set at 60*7 ? Could you please clarify to me?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

StorageClientHeartbeatMissed - description has 120(s) for Warning and 300(s) for Critical.

Expressions in Prometheus rules confirming. Waring expression for reference.
[(time() - 120) > (ocs_storage_client_last_heartbeat > 0)](https://console-openshift-console.apps.ibm-baremetal1.qe.rh-ocs.com/monitoring/query-browser?query0=(time()%20-%20120)%20%3E%20(ocs_storage_client_last_heartbeat%20%3E%200))

image
image

StorageClientIncompatibleOperatorVersion immediate as far as I understand. No interval in the description.

So all together should be 420sec enough, which is the same as Filip set here

client.stop_heartbeat()
client.set_ocs_version("4.13.0")
logger.info(f"Waiting for {run_time} seconds")
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import logging
import pytest

from ocs_ci.framework import config
from ocs_ci.framework.pytest_customization.marks import blue_squad
from ocs_ci.framework.testlib import (
tier4c,
Expand Down Expand Up @@ -41,16 +42,32 @@ def test_change_client_ocs_version_and_stop_heartbeat(
client_name = measure_change_client_ocs_version_and_stop_heartbeat.get(
"metadata"
).get("client_name")
cluster_namespace = config.ENV_DATA["cluster_namespace"]
cluster_name = config.ENV_DATA["storage_cluster_name"]
target_alerts = [
{
"label": constants.ALERT_STORAGECLIENTHEARTBEATMISSED,
"msg": f"Storage Client ({client_name}) heartbeat missed for more than 120 (s). "
"Lossy network connectivity might exist",
"msg": (
f"Storage Client ({client_name}) heartbeat missed for more than 120 (s) "
f"in namespace:cluster {cluster_namespace}:{cluster_name}."
),
"severity": "warning",
},
{
"label": constants.ALERT_STORAGECLIENTHEARTBEATMISSED,
"msg": (
f"Storage Client ({client_name}) heartbeat missed for more than 300 (s) "
f"in namespace:cluster {cluster_namespace}:{cluster_name}."
),
"severity": "critical",
},
{
"label": constants.ALERT_STORAGECLIENTINCOMPATIBLEOPERATORVERSION,
"msg": f"Storage Client Operator ({client_name}) differs by more "
"than 1 minor version. Client configuration may be incompatible and unsupported",
"msg": (
f"Storage Client Operator ({client_name}) differs by more than 1 minor "
f"version in namespace:cluster {cluster_namespace}:{cluster_name}."
),
"severity": "critical",
},
]
states = ["firing"]
Expand All @@ -61,14 +78,7 @@ def test_change_client_ocs_version_and_stop_heartbeat(
msg=target_alert["msg"],
alerts=alerts,
states=states,
severity="error",
)
prometheus.check_alert_list(
label=target_alert["label"],
msg=target_alert["msg"],
alerts=alerts,
states=states,
severity="warning",
severity=target_alert["severity"],
)
api.check_alert_cleared(
label=target_alert["label"],
Expand Down
Loading