diff --git a/.github/workflows/canary-integration-test.yml b/.github/workflows/canary-integration-test.yml index 1f1607288f65..032fd06bb04b 100644 --- a/.github/workflows/canary-integration-test.yml +++ b/.github/workflows/canary-integration-test.yml @@ -64,7 +64,7 @@ jobs: - name: test external script create-external-cluster-resources.py run: | - toolbox=$(kubectl get pod -l app=rook-ceph-tools -n rook-ceph -o jsonpath='{.items[0].metadata.name}') + toolbox=$(kubectl get pod -l app=rook-ceph-tools -n rook-ceph -o jsonpath='{.items[*].metadata.name}') timeout 15 sh -c "until kubectl -n rook-ceph exec $toolbox -- ceph mgr dump -f json|jq --raw-output .active_addr|grep -Eosq \"(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\" ; do sleep 1 && echo 'waiting for the manager IP to be available'; done" mgr_raw=$(kubectl -n rook-ceph exec $toolbox -- ceph mgr dump -f json|jq --raw-output .active_addr) timeout 60 sh -c "until kubectl -n rook-ceph exec $toolbox -- curl --silent --show-error ${mgr_raw%%:*}:9283; do echo 'waiting for mgr prometheus exporter to be ready' && sleep 1; done" diff --git a/Documentation/ceph-common-issues.md b/Documentation/ceph-common-issues.md index beedb1693888..ba4f18f899eb 100644 --- a/Documentation/ceph-common-issues.md +++ b/Documentation/ceph-common-issues.md @@ -56,7 +56,7 @@ After you verify the basic health of the running pods, next you will want to run The [rook-ceph-tools pod](./ceph-toolbox.md) provides a simple environment to run Ceph tools. Once the pod is up and running, connect to the pod to execute Ceph commands to evaluate that current state of the cluster. ```console -kubectl -n rook-ceph exec -it $(kubectl -n rook-ceph get pod -l "app=rook-ceph-tools" -o jsonpath='{.items[0].metadata.name}') bash +kubectl -n rook-ceph exec -it $(kubectl -n rook-ceph get pod -l "app=rook-ceph-tools" -o jsonpath='{.items[*].metadata.name}') bash ``` #### Ceph Commands diff --git a/Documentation/ceph-upgrade.md b/Documentation/ceph-upgrade.md index 6437bf6e70b4..1b1eac9141c3 100644 --- a/Documentation/ceph-upgrade.md +++ b/Documentation/ceph-upgrade.md @@ -159,7 +159,7 @@ The Rook toolbox contains the Ceph tools that can give you status details of the `ceph status` command. Let's look at an output sample and review some of the details: ```sh -TOOLS_POD=$(kubectl -n $ROOK_CLUSTER_NAMESPACE get pod -l "app=rook-ceph-tools" -o jsonpath='{.items[0].metadata.name}') +TOOLS_POD=$(kubectl -n $ROOK_CLUSTER_NAMESPACE get pod -l "app=rook-ceph-tools" -o jsonpath='{.items[*].metadata.name}') kubectl -n $ROOK_CLUSTER_NAMESPACE exec -it $TOOLS_POD -- ceph status ``` diff --git a/tests/scripts/github-action-helper.sh b/tests/scripts/github-action-helper.sh index 0269f2e052c3..28cecb8316af 100755 --- a/tests/scripts/github-action-helper.sh +++ b/tests/scripts/github-action-helper.sh @@ -160,9 +160,13 @@ function deploy_cluster() { function wait_for_prepare_pod() { timeout 180 bash <<-'EOF' - until kubectl -n rook-ceph logs --follow job/$(kubectl -n rook-ceph get job -l app=rook-ceph-osd-prepare -o jsonpath='.items[0].metadata.name}') || true; do + while true; do + if [[ "$(kubectl -n rook-ceph get pod -l app=rook-ceph-osd-prepare --field-selector=status.phase=Running)" -gt 1 ]]; then + break + fi sleep 5 done + kubectl -n rook-ceph logs --follow pod/$(kubectl -n rook-ceph get pod -l app=rook-ceph-osd-prepare -o jsonpath='{.items[0].metadata.name}') EOF timeout 60 bash <<-'EOF' until kubectl -n rook-ceph logs $(kubectl -n rook-ceph get pod -l app=rook-ceph-osd,ceph_daemon_id=0 -o jsonpath='{.items[*].metadata.name}') --all-containers || true; do @@ -213,7 +217,7 @@ function deploy_first_rook_cluster() { function wait_for_rgw_pods() { for _ in {1..120}; do - if kubectl -n "$1" get pod -l app=rook-ceph-rgw -o jsonpath='{.items[0].metadata.name}'; then + if [ "$(kubectl -n "$1" get pod -l app=rook-ceph-rgw --field-selector=status.phase=Running|wc -l)" -gt 1 ] ; then echo "rgw pods found" break fi diff --git a/tests/scripts/validate_cluster.sh b/tests/scripts/validate_cluster.sh index 20569b6dbbb5..1b2c62616ae2 100755 --- a/tests/scripts/validate_cluster.sh +++ b/tests/scripts/validate_cluster.sh @@ -25,7 +25,7 @@ OSD_COUNT=$2 ############# # FUNCTIONS # ############# -EXEC_COMMAND="kubectl -n rook-ceph exec $(kubectl get pod -l app=rook-ceph-tools -n rook-ceph -o jsonpath='{.items[0].metadata.name}') -- ceph --connect-timeout 3" +EXEC_COMMAND="kubectl -n rook-ceph exec $(kubectl get pod -l app=rook-ceph-tools -n rook-ceph -o jsonpath='{.items[*].metadata.name}') -- ceph --connect-timeout 3" trap display_status SIGINT ERR