Skip to content

Commit

Permalink
Add option to override driver + executor memory limit using webhook
Browse files Browse the repository at this point in the history
Signed-off-by: Netanel Levine <[email protected]>
  • Loading branch information
netanellevine committed Jan 13, 2025
1 parent e6c2337 commit dfab4bb
Show file tree
Hide file tree
Showing 8 changed files with 128 additions and 0 deletions.
8 changes: 8 additions & 0 deletions api/v1beta2/sparkapplication_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -524,6 +524,10 @@ type DriverSpec struct {
// Maps to `spark.kubernetes.driver.request.cores` that is available since Spark 3.0.
// +optional
CoreRequest *string `json:"coreRequest,omitempty"`
// MemoryLimit is overriding the driver pod memory limit.
// By default, the memory limit and request are equal - use carefully.
// +optional
MemoryLimit *string `json:"memoryLimit,omitempty"`
// JavaOptions is a string of extra JVM options to pass to the driver. For instance,
// GC settings or other logging.
// +optional
Expand Down Expand Up @@ -562,6 +566,10 @@ type ExecutorSpec struct {
// Maps to `spark.kubernetes.executor.request.cores` that is available since Spark 2.4.
// +optional
CoreRequest *string `json:"coreRequest,omitempty"`
// MemoryLimit is overriding the executor pod memory limit.
// By default, the memory limit and request are equal - use carefully.
// +optional
MemoryLimit *string `json:"memoryLimit,omitempty"`
// JavaOptions is a string of extra JVM options to pass to the executors. For instance,
// GC settings or other logging.
// +optional
Expand Down
10 changes: 10 additions & 0 deletions api/v1beta2/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -2970,6 +2970,11 @@ spec:
description: Memory is the amount of memory to request for
the pod.
type: string
memoryLimit:
description: |-
MemoryLimit is overriding the driver pod memory limit.
By default, the memory limit and request are equal - use carefully.
type: string
memoryOverhead:
description: MemoryOverhead is the amount of off-heap memory
to allocate in cluster mode, in MiB unless otherwise specified.
Expand Down Expand Up @@ -7756,6 +7761,11 @@ spec:
description: Memory is the amount of memory to request for
the pod.
type: string
memoryLimit:
description: |-
MemoryLimit is overriding the executor pod memory limit.
By default, the memory limit and request are equal - use carefully.
type: string
memoryOverhead:
description: MemoryOverhead is the amount of off-heap memory
to allocate in cluster mode, in MiB unless otherwise specified.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2919,6 +2919,11 @@ spec:
description: Memory is the amount of memory to request for the
pod.
type: string
memoryLimit:
description: |-
MemoryLimit is overriding the driver pod memory limit.
By default, the memory limit and request are equal - use carefully.
type: string
memoryOverhead:
description: MemoryOverhead is the amount of off-heap memory to
allocate in cluster mode, in MiB unless otherwise specified.
Expand Down Expand Up @@ -7675,6 +7680,11 @@ spec:
description: Memory is the amount of memory to request for the
pod.
type: string
memoryLimit:
description: |-
MemoryLimit is overriding the executor pod memory limit.
By default, the memory limit and request are equal - use carefully.
type: string
memoryOverhead:
description: MemoryOverhead is the amount of off-heap memory to
allocate in cluster mode, in MiB unless otherwise specified.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2970,6 +2970,11 @@ spec:
description: Memory is the amount of memory to request for
the pod.
type: string
memoryLimit:
description: |-
MemoryLimit is overriding the driver pod memory limit.
By default, the memory limit and request are equal - use carefully.
type: string
memoryOverhead:
description: MemoryOverhead is the amount of off-heap memory
to allocate in cluster mode, in MiB unless otherwise specified.
Expand Down Expand Up @@ -7756,6 +7761,11 @@ spec:
description: Memory is the amount of memory to request for
the pod.
type: string
memoryLimit:
description: |-
MemoryLimit is overriding the executor pod memory limit.
By default, the memory limit and request are equal - use carefully.
type: string
memoryOverhead:
description: MemoryOverhead is the amount of off-heap memory
to allocate in cluster mode, in MiB unless otherwise specified.
Expand Down
10 changes: 10 additions & 0 deletions config/crd/bases/sparkoperator.k8s.io_sparkapplications.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2919,6 +2919,11 @@ spec:
description: Memory is the amount of memory to request for the
pod.
type: string
memoryLimit:
description: |-
MemoryLimit is overriding the driver pod memory limit.
By default, the memory limit and request are equal - use carefully.
type: string
memoryOverhead:
description: MemoryOverhead is the amount of off-heap memory to
allocate in cluster mode, in MiB unless otherwise specified.
Expand Down Expand Up @@ -7675,6 +7680,11 @@ spec:
description: Memory is the amount of memory to request for the
pod.
type: string
memoryLimit:
description: |-
MemoryLimit is overriding the executor pod memory limit.
By default, the memory limit and request are equal - use carefully.
type: string
memoryOverhead:
description: MemoryOverhead is the amount of off-heap memory to
allocate in cluster mode, in MiB unless otherwise specified.
Expand Down
26 changes: 26 additions & 0 deletions docs/api-docs.md
Original file line number Diff line number Diff line change
Expand Up @@ -559,6 +559,19 @@ Maps to <code>spark.kubernetes.driver.request.cores</code> that is available sin
</tr>
<tr>
<td>
<code>memoryLimit</code><br/>
<em>
string
</em>
</td>
<td>
<em>(Optional)</em>
<p>MemoryLimit is overriding the driver pod memory limit.
By default, the memory limit and request are equal - use carefully.</p>
</td>
</tr>
<tr>
<td>
<code>javaOptions</code><br/>
<em>
string
Expand Down Expand Up @@ -812,6 +825,19 @@ Maps to <code>spark.kubernetes.executor.request.cores</code> that is available s
</tr>
<tr>
<td>
<code>memoryLimit</code><br/>
<em>
string
</em>
</td>
<td>
<em>(Optional)</em>
<p>MemoryLimit is overriding the executor pod memory limit.
By default, the memory limit and request are equal - use carefully.</p>
</td>
</tr>
<tr>
<td>
<code>javaOptions</code><br/>
<em>
string
Expand Down
44 changes: 44 additions & 0 deletions internal/webhook/sparkpod_defaulter.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,49 @@ func (d *SparkPodDefaulter) Default(ctx context.Context, obj runtime.Object) err
return nil
}

func convertJavaMemoryStringToK8sMemoryString(memory string) string {
if strings.HasSuffix(memory, "Gi") || strings.HasSuffix(memory, "Mi") {
return memory
}
// Convert the memory string from 'g' to 'Gi' and from 'm' to 'Mi.
if strings.HasSuffix(memory, "g") || strings.HasSuffix(memory, "m") {
return strings.ToUpper(memory) + "i"
}

return memory

}

func addMemoryLimit(pod *corev1.Pod, app *v1beta2.SparkApplication) error {
i := findContainer(pod)
if i < 0 {
return fmt.Errorf("failed to add memory limit as Spark container was not found in pod %s", pod.Name)
}

var memoryLimit *string
if util.IsDriverPod(pod) {
memoryLimit = app.Spec.Driver.MemoryLimit
} else if util.IsExecutorPod(pod) {
memoryLimit = app.Spec.Executor.MemoryLimit
}

if memoryLimit == nil {
return nil
}

// Convert memory limit to a Kubernetes-compatible quantity
limitQuantity, err := resource.ParseQuantity(convertJavaMemoryStringToK8sMemoryString(*memoryLimit))
if err != nil {
return fmt.Errorf("failed to parse memory limit %s: %v", *memoryLimit, err)
}

// Apply the memory limit to the container's resources
logger.Info(fmt.Sprintf("Adding memory limit %s to container in pod %s", *memoryLimit, pod.Name))
pod.Spec.Containers[i].Resources.Limits[corev1.ResourceMemory] = limitQuantity
logger.Info(fmt.Sprintf("Successfully added memory limit %s to container in pod %s", limitQuantity.String(), pod.Name))
return nil
}

func (d *SparkPodDefaulter) isSparkJobNamespace(ns string) bool {
return d.sparkJobNamespaces[metav1.NamespaceAll] || d.sparkJobNamespaces[ns]
}
Expand All @@ -123,6 +166,7 @@ func mutateSparkPod(pod *corev1.Pod, app *v1beta2.SparkApplication) error {
addNodeSelectors,
addAffinity,
addTolerations,
addMemoryLimit,
addGPU,
addPrometheusConfig,
addContainerSecurityContext,
Expand Down

0 comments on commit dfab4bb

Please sign in to comment.