Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix/cpu-load #144

Merged
merged 10 commits into from
Apr 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion chart/templates/clusterrole.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,5 @@ rules:
verbs: [ "get" ]
- apiGroups: [ "coordination.k8s.io" ]
resources: [ "leases" ]
verbs: [ "create", "get", "delete" ]
verbs: [ "create", "delete", "get" ]
{{- end }}
9 changes: 9 additions & 0 deletions chart/templates/daemonset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ spec:
selector:
matchLabels:
app.kubernetes.io/name: {{ include "kubeip.name" . }}
updateStrategy:
type: RollingUpdate
rollingUpdate:
maxUnavailable: 1
template:
metadata:
labels:
Expand All @@ -20,6 +24,11 @@ spec:
{{- if .Values.daemonSet.nodeSelector }}
{{- toYaml .Values.daemonSet.nodeSelector | nindent 8 }}
{{- end }}
tolerations:
- operator: "Exists"
effect: "NoSchedule"
- operator: "Exists"
effect: "NoExecute"
containers:
- name: kubeip
image: "{{ .Values.image.repository }}"
Expand Down
12 changes: 8 additions & 4 deletions chart/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@ serviceAccount:
name: kubeip-service-account
annotations:
gcpServiceAccountEmail: kubeip-service-account@workload-id-117715.iam.gserviceaccount.com
# annotations:
# awsRoleArn: "your-aws-role-arn"
# gcpServiceAccountEmail: "your-google-service-account-email"
# annotations:
# awsRoleArn: "your-aws-role-arn"
# gcpServiceAccountEmail: "your-google-service-account-email"


# Role-Based Access Control (RBAC) configuration.
rbac:
Expand All @@ -40,3 +40,7 @@ daemonSet:
resources:
requests:
cpu: 100m
memory: 64Mi
limits:
cpu: 100m
memory: 128Mi
66 changes: 31 additions & 35 deletions cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ func assignAddress(c context.Context, log *logrus.Entry, client kubernetes.Inter
func run(c context.Context, log *logrus.Entry, cfg *config.Config) error {
ctx, cancel := context.WithCancel(c)
defer cancel()

// add debug mode to context
if cfg.DevelopMode {
ctx = context.WithValue(ctx, developModeKey, true)
Expand Down Expand Up @@ -167,43 +168,37 @@ func run(c context.Context, log *logrus.Entry, cfg *config.Config) error {
if err != nil {
return errors.Wrap(err, "initializing assigner")
}
// assign static public IP address
errorCh := make(chan error, 1) // buffered channel to avoid goroutine leak
go func() {
defer close(errorCh) // close the channel when the goroutine exits to avoid goroutine leak
e := assignAddress(ctx, log, clientset, assigner, n, cfg)
if e != nil {
errorCh <- e
}
}()

for {
select {
case err = <-errorCh:
if err != nil {
return errors.Wrap(err, "assigning static public IP address")
}
case <-ctx.Done():
log.Infof("kubeip agent gracefully stopped")
if cfg.ReleaseOnExit {
log.Infof("releasing static public IP address")
err = func() error {
releaseCtx, releaseCancel := context.WithTimeout(context.Background(), unassignTimeout) // release the static public IP address within 5 minutes
defer releaseCancel()
// use a different context for releasing the static public IP address since the main context is canceled
if err = assigner.Unassign(releaseCtx, n.Instance, n.Zone); err != nil {
return errors.Wrap(err, "failed to release static public IP address")
}
return nil
}()
if err != nil {
return err //nolint:wrapcheck
}
log.Infof("static public IP address released")
}
return nil
err = assignAddress(ctx, log, clientset, assigner, n, cfg)
if err != nil {
return errors.Wrap(err, "assigning static public IP address")
}

// pause the agent to prevent it from exiting immediately after assigning the static public IP address
// wait for the context to be done: SIGTERM, SIGINT
<-ctx.Done()
log.Infof("shutting down kubeip agent")

// release the static public IP address on exit
if cfg.ReleaseOnExit {
log.Infof("releasing static public IP address")
if releaseErr := releaseIP(assigner, n); releaseErr != nil { //nolint:contextcheck
return releaseErr
}
log.Infof("static public IP address released")
}
return nil
}

func releaseIP(assigner address.Assigner, n *types.Node) error {
releaseCtx, releaseCancel := context.WithTimeout(context.Background(), unassignTimeout)
defer releaseCancel()

if err := assigner.Unassign(releaseCtx, n.Instance, n.Zone); err != nil {
return errors.Wrap(err, "failed to release static public IP address")
}

return nil
}

func runCmd(c *cli.Context) error {
Expand All @@ -213,7 +208,8 @@ func runCmd(c *cli.Context) error {
cfg := config.NewConfig(c)

if err := run(ctx, log, cfg); err != nil {
log.Fatalf("eks-lens agent failed: %v", err)
log.WithError(err).Error("error running kubeip agent")
return err
}

return nil
Expand Down
3 changes: 2 additions & 1 deletion examples/aws/eks.tf
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,8 @@ resource "kubernetes_daemonset" "kubeip_daemonset" {
}
resources {
requests = {
cpu = "100m"
cpu = "10m"
memory = "32Mi"
}
}
}
Expand Down
3 changes: 2 additions & 1 deletion examples/gcp/gke.tf
Original file line number Diff line number Diff line change
Expand Up @@ -336,7 +336,8 @@ resource "kubernetes_daemonset" "kubeip_daemonset" {
}
resources {
requests = {
cpu = "100m"
cpu = "10m"
memory = "32Mi"
}
}
}
Expand Down
Loading