Skip to content

Commit

Permalink
Demo pods should restart when failed (#2487)
Browse files Browse the repository at this point in the history
* Demo pods should restart when failed

### What's done:
 * Added `podBackoffLimit` to `KubernetesConfig` of save-demo
 * Changed backoff limit from 0 to 6
  • Loading branch information
sanyavertolet authored Aug 23, 2023
1 parent 7dd9fb4 commit 2797c76
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ data class ConfigProperties(
}

/**
*
* @property apiServerUrl URL of Kubernetes API Server. See [docs on accessing API from within a pod](https://kubernetes.io/docs/tasks/run-application/access-api-from-pod/)
* @property serviceAccount Name of [ServiceAccount](https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/) that will be used
* to authenticate save-demo to the API server
Expand All @@ -44,6 +43,7 @@ data class ConfigProperties(
* @property agentCpuLimitations configures CPU [Limitations] for demo-agent pods
* @property agentMemoryLimitations configures memory [Limitations] for demo-agent pods
* @property agentEphemeralStorageLimitations configures ephemeral storage [Limitations] for demo-agent pods
* @property podBackoffLimit number of attempts to restart the pod before it is considered to be failed, [DEFAULT_BACKOFF_LIMIT] by default
*/
data class KubernetesConfig(
val apiServerUrl: String,
Expand All @@ -55,7 +55,8 @@ data class KubernetesConfig(
val agentNamespace: String = currentNamespace,
val agentCpuLimitations: Limitations? = defaultAgentCpuLimitations,
val agentMemoryLimitations: Limitations? = defaultAgentMemoryLimitations,
val agentEphemeralStorageLimitations: Limitations? = defaultEphemeralStorageLimitations
val agentEphemeralStorageLimitations: Limitations? = defaultEphemeralStorageLimitations,
val podBackoffLimit: Int = DEFAULT_BACKOFF_LIMIT,
) {
/**
* Data class that configures demo-agent limitations:
Expand Down Expand Up @@ -87,6 +88,7 @@ data class KubernetesConfig(
fun limitsQuantity() = Quantity(limits)
}
companion object {
private const val DEFAULT_BACKOFF_LIMIT = 6
private val defaultAgentCpuLimitations = Limitations("200m", "2")
private val defaultAgentMemoryLimitations = Limitations("300Mi", "500Mi")
private val defaultEphemeralStorageLimitations = Limitations("100Mi", "500Mi")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ fun getJobObjectForDemo(
spec = JobSpec().apply {
parallelism = REPLICAS_PER_DEMO
ttlSecondsAfterFinished = TTL_AFTER_COMPLETED
backoffLimit = 0
backoffLimit = kubernetesSettings.podBackoffLimit
template = PodTemplateSpec().apply {
spec = PodSpec().apply {
subdomain = kubernetesSettings.agentSubdomainName
Expand Down

0 comments on commit 2797c76

Please sign in to comment.