-
Notifications
You must be signed in to change notification settings - Fork 5.7k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
chore: updates to Numaplane health checks (#21671)
Signed-off-by: Julie Vogelman <[email protected]>
- Loading branch information
Showing
12 changed files
with
620 additions
and
185 deletions.
There are no files selected for viewing
124 changes: 84 additions & 40 deletions
124
resource_customizations/numaplane.numaproj.io/ISBServiceRollout/health.lua
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,60 +1,104 @@ | ||
local hs = {} | ||
local healthyCondition = {} | ||
-- return true if degraded, along with the reason | ||
function isDegraded(obj) | ||
if obj.status == nil then | ||
return false, "" | ||
end | ||
-- check phase=Failed, healthy condition failed, progressive upgrade failed | ||
if obj.status.phase == "Failed" then | ||
return true, obj.status.message | ||
end | ||
|
||
-- check for certain cases of "Progressing" | ||
if obj.status.conditions ~= nil then | ||
for i, condition in ipairs(obj.status.conditions) do | ||
if condition.type == "ChildResourcesHealthy" and condition.status == "False" and condition.reason == "ISBSvcFailed" then | ||
return true, condition.message | ||
elseif condition.type == "ProgressiveUpgradeSucceeded" and condition.status == "False" then | ||
return true, "Progressive upgrade failed" | ||
end | ||
end | ||
end | ||
|
||
if obj.status == nil then -- if there's no Status at all, we haven't been reconciled | ||
hs.status = "Progressing" | ||
hs.message = "Not yet reconciled" | ||
return hs | ||
return false, "" | ||
end | ||
|
||
if obj.metadata.generation ~= obj.status.observedGeneration then | ||
hs.status = "Progressing" | ||
hs.message = "Not yet reconciled" | ||
return hs | ||
end | ||
function isProgressing(obj) | ||
-- if there's no Status at all, we haven't been reconciled | ||
if obj.status == nil then | ||
return true, "Not yet reconciled" | ||
end | ||
|
||
if obj.status.phase == "Pending" then | ||
hs.status = "Progressing" | ||
hs.message = "Phase=Pending" | ||
return hs | ||
end | ||
if obj.metadata.generation ~= obj.status.observedGeneration then | ||
return true, "Not yet reconciled" | ||
end | ||
|
||
if obj.status.upgradeInProgress ~= nil and obj.status.upgradeInProgress ~= "" then | ||
hs.status = "Progressing" | ||
hs.message = "Update in progress" | ||
return hs | ||
end | ||
-- if we are in the middle of an upgrade | ||
if obj.status.upgradeInProgress ~= nil and obj.status.upgradeInProgress ~= "" or obj.status.phase == "Pending" then | ||
-- first check if Progressive Upgrade Failed; in that case, we won't return true (because "Degraded" will take precedence) | ||
progressiveUpgradeFailed = false | ||
if obj.status.conditions ~= nil then | ||
for i, condition in ipairs(obj.status.conditions) do | ||
if condition.type == "ProgressiveUpgradeSucceeded" and condition.status == "False" then | ||
progressiveUpgradeFailed = true | ||
end | ||
end | ||
end | ||
|
||
-- now check the Conditions | ||
if progressiveUpgradeFailed == false then | ||
return true, "Update in progress" | ||
end | ||
end | ||
|
||
if obj.status.conditions ~= nil then | ||
for i, condition in ipairs(obj.status.conditions) do | ||
if condition.type == "ChildResourcesHealthy" then | ||
healthyCondition = condition | ||
-- if the child is Progressing | ||
if obj.status.conditions ~= nil then | ||
for i, condition in ipairs(obj.status.conditions) do | ||
if condition.type == "ChildResourcesHealthy" and condition.status == "False" and condition.reason == "Progressing" then | ||
return true, "Child Progressing" | ||
end | ||
end | ||
end | ||
|
||
return false, "" | ||
end | ||
|
||
if (healthyCondition ~= {} and healthyCondition.status == "False" and healthyCondition.reason == "ISBSvcFailed") or obj.status.phase == "Failed" then | ||
hs.status = "Degraded" | ||
if obj.status.phase == "Failed" then | ||
hs.message = obj.status.message | ||
else | ||
hs.message = healthyCondition.message | ||
-- return true if healthy, along with the reason | ||
function isHealthy(obj) | ||
if obj.status == nil then | ||
return false, "" | ||
end | ||
return hs | ||
elseif healthyCondition ~= {} and healthyCondition.status == "False" and healthyCondition.reason == "Progressing" then | ||
|
||
if obj.status.conditions ~= nil then | ||
for i, condition in ipairs(obj.status.conditions) do | ||
if condition.type == "ChildResourcesHealthy" and condition.status == "True" then | ||
return true, "Healthy" | ||
end | ||
end | ||
end | ||
end | ||
|
||
local hs = {} | ||
|
||
|
||
progressing, reason = isProgressing(obj) | ||
if progressing then | ||
hs.status = "Progressing" | ||
hs.message = healthyCondition.message | ||
hs.message = reason | ||
return hs | ||
end | ||
|
||
degraded, reason = isDegraded(obj) | ||
if degraded then | ||
hs.status = "Degraded" | ||
hs.message = reason | ||
return hs | ||
elseif healthyCondition ~= {} and healthyCondition.status == "True" and obj.status.phase == "Deployed" then | ||
end | ||
|
||
healthy, reason = isHealthy(obj) | ||
if healthy then | ||
hs.status = "Healthy" | ||
hs.message = healthyCondition.message | ||
hs.message = reason | ||
return hs | ||
end | ||
|
||
hs.status = "Unknown" | ||
hs.message = "Unknown ISBService status" | ||
return hs | ||
hs.message = "Unknown status" | ||
return hs |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
60 changes: 60 additions & 0 deletions
60
...umaplane.numaproj.io/ISBServiceRollout/testdata/ISBServiceRollout/progressive-failed.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
apiVersion: numaplane.numaproj.io/v1alpha1 | ||
kind: ISBServiceRollout | ||
metadata: | ||
annotations: | ||
kubectl.kubernetes.io/last-applied-configuration: | | ||
{"apiVersion":"numaplane.numaproj.io/v1alpha1","kind":"ISBServiceRollout","metadata":{"annotations":{},"labels":{"argocd.argoproj.io/instance":"demo-app"},"name":"my-isbsvc","namespace":"example-namespace"},"spec":{"interStepBufferService":{"spec":{"jetstream":{"persistence":{"volumeSize":"1Gi"},"version":"2.10.3"}}}}} | ||
creationTimestamp: "2025-01-26T05:38:04Z" | ||
finalizers: | ||
- numaplane.numaproj.io/numaplane-controller | ||
generation: 4 | ||
labels: | ||
argocd.argoproj.io/instance: demo-app | ||
name: my-isbsvc | ||
namespace: example-namespace | ||
resourceVersion: "664511" | ||
uid: c45f8283-f799-45a3-8058-ac462756e654 | ||
spec: | ||
interStepBufferService: | ||
metadata: {} | ||
spec: | ||
jetstream: | ||
persistence: | ||
volumeSize: 1Gi | ||
version: 2.10.3 | ||
status: | ||
conditions: | ||
- lastTransitionTime: "2025-01-26T05:38:04Z" | ||
message: Successful | ||
observedGeneration: 4 | ||
reason: Successful | ||
status: "True" | ||
type: ChildResourceDeployed | ||
- lastTransitionTime: "2025-01-26T05:39:04Z" | ||
message: Successful | ||
observedGeneration: 4 | ||
reason: Successful | ||
status: "True" | ||
type: ChildResourcesHealthy | ||
- lastTransitionTime: "2025-01-26T05:38:04Z" | ||
message: no need for pause | ||
observedGeneration: 4 | ||
reason: NoPause | ||
status: "False" | ||
type: PausingPipelines | ||
- lastTransitionTime: "2025-01-26T05:48:44Z" | ||
message: New Child Object example-namespace/my-isbsvc-2 Failed | ||
observedGeneration: 4 | ||
reason: Failed | ||
status: "False" | ||
type: ProgressiveUpgradeSucceeded | ||
message: Deployed | ||
nameCount: 3 | ||
observedGeneration: 4 | ||
pauseRequestStatus: {} | ||
phase: Deployed | ||
progressiveStatus: | ||
upgradingChildStatus: | ||
assessmentResult: Success | ||
name: my-isbsvc-2 | ||
nextAssessmentTime: "2025-01-26T05:48:05Z" |
Oops, something went wrong.