Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Verify Node Ready #908

Merged
merged 1 commit into from
Oct 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 28 additions & 2 deletions benchmark_runner/common/oc/oc.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
PodNotCompletedTimeout, PodTerminateTimeout, PodNameNotExist, LoginFailed, VMNotCreateTimeout, VMDeleteTimeout, \
YAMLNotExist, VMNameNotExist, VMNotInitializedTimeout, VMNotReadyTimeout, VMStateTimeout, VMNotCompletedTimeout, \
ExecFailed, PodFailed, DVStatusTimeout, CSVNotCreateTimeout, UpgradeNotStartTimeout, OperatorInstallationTimeout, \
OperatorUpgradeTimeout, ODFHealthCheckTimeout)
OperatorUpgradeTimeout, ODFHealthCheckTimeout, NodeNotReady)
from benchmark_runner.common.ssh.ssh import SSH
from benchmark_runner.main.environment_variables import environment_variables

Expand Down Expand Up @@ -375,7 +375,6 @@ def wait_for_patch(self, pod_name: str, label: str, label_uuid: bool, namespace:
else:
raise PodNotReadyTimeout(label)


def wait_for_odf_healthcheck(self, pod_name: str, namespace: str,
timeout: int = SHORT_TIMEOUT):
"""
Expand Down Expand Up @@ -441,6 +440,33 @@ def get_worker_nodes(self):
"""
return self.run(fr""" {self.__cli} get nodes -l node-role.kubernetes.io/worker= -o jsonpath="{{range .items[*]}}{{.metadata.name}}{{'\n'}}{{end}}" """)

@staticmethod
@typechecked
def check_node_status(nodes_list: list):
"""
This method check node status
@param nodes_list:
@return: True when all nodes in ready status
"""
# Check if any node is not in 'Ready' status
for node in nodes_list:
node_name, node_status = node.split()
if node_status != 'Ready':
raise NodeNotReady(node_name, node_status)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Cleaner to return False here, and let the caller decide whether to raise an exception.

Copy link
Collaborator Author

@ebattat ebattat Oct 10, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we must raise an error if one of the nodes is not ready, as this should cause a CI failure.


# If no nodes are found in a non-ready state
return True

def verify_nodes_ready(self):
"""
This method verifies that all nodes are in 'Ready' status.
If any node is not ready, it raises an error with the node name and its status.
@return: True is all in 'Ready' status
"""
# Get the node name and status for all nodes
nodes_list = self.run(f"{self.__cli} get nodes --no-headers | awk '{{print $1, $2}}'").splitlines()
return self.check_node_status(nodes_list)
ebattat marked this conversation as resolved.
Show resolved Hide resolved

def delete_available_released_pv(self):
"""
This method deletes available or released pv because that avoid launching new pv
Expand Down
7 changes: 7 additions & 0 deletions benchmark_runner/common/oc/oc_exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,3 +164,10 @@ class ODFHealthCheckTimeout(OCError):
def __init__(self):
self.message = f"ODF health check timeout"
super(ODFHealthCheckTimeout, self).__init__(self.message)


class NodeNotReady(OCError):
"""This exception returns node not ready timeout error"""
def __init__(self, node_name, node_status):
self.message = f"Node {node_name} is not ready. Current status: {node_status}"
super(NodeNotReady, self).__init__(self.message)
2 changes: 2 additions & 0 deletions benchmark_runner/main/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,7 @@ def upgrade_ocp_bare_metal(step: str):
elif step == 'verify_bare_metal_upgrade_complete':
if bare_metal_operations.is_cluster_upgraded(oc, cnv_version=cnv_version, odf_version=odf_version, lso_version=lso_version):
bare_metal_operations.verify_cluster_is_up(oc)
oc.verify_nodes_ready()
else:
error_message = f'OCP {upgrade_ocp_version} upgrade failed'
logger.error(error_message)
Expand All @@ -190,6 +191,7 @@ def install_resources():
logger.info(f'Start Bare-Metal OpenShift resources installation')
oc = bare_metal_operations.oc_login()
bare_metal_operations.verify_cluster_is_up(oc)
oc.verify_nodes_ready()
bare_metal_operations.install_ocp_resources(resources=resources)
bare_metal_operations.disconnect_from_provisioner()
logger.info(f'End Bare-Metal OpenShift resources installation')
Expand Down
10 changes: 10 additions & 0 deletions tests/integration/benchmark_runner/common/oc/test_oc.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,3 +232,13 @@ def test_collect_prometheus():
time.sleep(10)
tarball = snapshot.retrieve_snapshot(post_wait_time=1)
assert tarfile.is_tarfile(tarball)


def test_verify_nodes_ready():
"""
This method test nodes are ready
@return:
"""
oc = OC(kubeadmin_password=test_environment_variable['kubeadmin_password'])
oc.login()
assert oc.verify_nodes_ready()
16 changes: 15 additions & 1 deletion tests/unittest/benchmark_runner/common/oc/test_oc.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import mock
import pytest
from benchmark_runner.common.oc.oc import OC
from benchmark_runner.common.oc.oc_exceptions import YAMLNotExist, LoginFailed
from benchmark_runner.common.oc.oc_exceptions import YAMLNotExist, LoginFailed, NodeNotReady


def test_bad_login():
Expand Down Expand Up @@ -47,3 +47,17 @@ def test_short_uuid():
with mock.patch.object(OC, 'get_long_uuid', new=dummy_long_uuid):
oc = OC()
assert oc._OC__get_short_uuid(workload='stressng_pod') == 'bb2be20e'


def test_check_node_status_ready():
oc = OC()
result = oc.check_node_status(nodes_list=['node-0 Ready', 'node-1 Ready', 'node-2 Ready'])
assert result


def test_check_node_status_not_ready():
oc = OC()
with pytest.raises(NodeNotReady) as exc_info:
oc.check_node_status(nodes_list=['node-0 Ready', 'node-1 NotReady', 'node-2 Ready'])
# Check that the exception message is as expected
assert str(exc_info.value) == "Node node-1 is not ready. Current status: NotReady"