From 96b3d5d94ea634a1e8a8b6a13916747f0eea38d2 Mon Sep 17 00:00:00 2001 From: Antonio Ojea Date: Sun, 26 May 2024 14:29:56 +0000 Subject: [PATCH] move to CNI controller and remove bridge plugin --- .github/workflows/e2e.yml | 10 +- Dockerfile | 1 - cmd/kindnetd/main.go | 146 +--------- install-kindnet-bridge.yaml | 123 --------- pkg/cni/cni.go | 197 -------------- pkg/cni/controller.go | 391 +++++++++++++++++++++++++++ {cmd/kindnetd => pkg/net}/ethtool.go | 2 +- 7 files changed, 408 insertions(+), 462 deletions(-) delete mode 100644 install-kindnet-bridge.yaml delete mode 100644 pkg/cni/cni.go create mode 100644 pkg/cni/controller.go rename {cmd/kindnetd => pkg/net}/ethtool.go (99%) diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml index 6dcbecec..3d5f3f95 100644 --- a/.github/workflows/e2e.yml +++ b/.github/workflows/e2e.yml @@ -53,9 +53,8 @@ jobs: fail-fast: false matrix: ipFamily: ["ipv4", "ipv6", "dual"] - cniMode: ["ptp", "bridge"] env: - JOB_NAME: "kindnetd-e2e-${{ matrix.ipFamily }}-${{ matrix.cniMode }}" + JOB_NAME: "kindnetd-e2e-${{ matrix.ipFamily }}" IP_FAMILY: ${{ matrix.ipFamily }} steps: - name: Check out code @@ -118,17 +117,10 @@ jobs: /usr/local/bin/kind load docker-image ghcr.io/aojea/kindnetd:test --name ${{ env.KIND_CLUSTER_NAME}} - name: install ptp plugin - if: ${{ matrix.cniMode == 'ptp' }} run: | sed -i s#aojea/kindnetd.*#aojea/kindnetd:test# install-kindnet.yaml /usr/local/bin/kubectl apply -f ./install-kindnet.yaml - - name: install bridge plugin - if: ${{ matrix.cniMode == 'bridge' }} - run: | - sed -i s#aojea/kindnetd.*#aojea/kindnetd:test# install-kindnet-bridge.yaml - /usr/local/bin/kubectl apply -f ./install-kindnet-bridge.yaml - - name: Get Cluster status run: | # wait network is ready diff --git a/Dockerfile b/Dockerfile index 10e38fa9..127c32fe 100644 --- a/Dockerfile +++ b/Dockerfile @@ -38,7 +38,6 @@ RUN echo "Installing CNI binaries ..." \ && find /opt/cni/bin -type f -not \( \ -iname host-local \ -o -iname ptp \ - -o -iname bridge \ -o -iname portmap \ \) \ -delete diff --git a/cmd/kindnetd/main.go b/cmd/kindnetd/main.go index d6906789..5e23f6e2 100644 --- a/cmd/kindnetd/main.go +++ b/cmd/kindnetd/main.go @@ -27,12 +27,8 @@ import ( "golang.org/x/sys/unix" "github.com/aojea/kindnet/pkg/cni" - utilnet "github.com/aojea/kindnet/pkg/net" "github.com/aojea/kindnet/pkg/router" - corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/labels" - "k8s.io/apimachinery/pkg/util/sets" "k8s.io/client-go/informers" "k8s.io/client-go/kubernetes" "k8s.io/client-go/rest" @@ -60,7 +56,7 @@ const ( AllFamily IPFamily = unix.AF_UNSPEC IPv4Family IPFamily = unix.AF_INET IPv6Family IPFamily = unix.AF_INET6 - DualStackFamily IPFamily = 12 // AF_INET + AF_INET6 + DualStackFamily IPFamily = unix.AF_UNSPEC ) var ( @@ -119,18 +115,8 @@ func main() { }() signal.Notify(signalCh, os.Interrupt, unix.SIGINT) - go func() { - select { - case <-signalCh: - klog.Infof("Exiting: received signal") - cancel() - case <-ctx.Done(): - } - }() - informersFactory := informers.NewSharedInformerFactory(clientset, 0) nodeInformer := informersFactory.Core().V1().Nodes() - nodeLister := nodeInformer.Lister() // obtain the host and pod ip addresses hostIP, podIP := os.Getenv("HOST_IP"), os.Getenv("POD_IP") @@ -142,28 +128,6 @@ func main() { )) } - mtu, err := utilnet.GetMTU(int(AllFamily)) - klog.Infof("setting mtu %d for CNI \n", mtu) - if err != nil { - klog.Infof("Failed to get MTU size from interface eth0, using kernel default MTU size error:%v", err) - } - - // CNI_BRIDGE env variable uses the CNI bridge plugin, defaults to ptp - useBridge := len(os.Getenv("CNI_BRIDGE")) > 0 - // disable offloading in the bridge if exists - disableOffload := false - if useBridge { - disableOffload = len(os.Getenv("DISABLE_CNI_BRIDGE_OFFLOAD")) > 0 - } - // used to track if the cni config inputs changed and write the config - cniConfigWriter := &cni.CNIConfigWriter{ - Path: cni.CNIConfigPath, - Bridge: useBridge, - MTU: mtu, - } - klog.Infof("Configuring CNI path: %s bridge: %v disableOffload: %v mtu: %d", - cni.CNIConfigPath, useBridge, disableOffload, mtu) - // enforce ip masquerade rules noMaskIPv4Subnets, noMaskIPv6Subnets := getNoMasqueradeSubnets(clientset) // detect the cluster IP family based on the Cluster CIDR akka PodSubnet @@ -209,111 +173,31 @@ func main() { }() } - // setup nodes reconcile function, closes over arguments - reconcileNodes := makeNodesReconciler(cniConfigWriter, hostIP, ipFamily, clientset) - // main control loop informersFactory.Start(ctx.Done()) - // routes controller + // CNI config controller go func() { - err := router.New(hostname, clientset, nodeInformer).Run(ctx, 5) + err := cni.New(hostname, clientset, nodeInformer, int(ipFamily)).Run(ctx, 1) if err != nil { klog.Infof("error running router controller: %v", err) } }() - for { - // Gets the Nodes information from the API - // TODO: use a proper controller instead - var nodes []*corev1.Node - var err error - for i := 0; i < 5; i++ { - nodes, err = nodeLister.List(labels.Everything()) - if err == nil { - break - } - klog.Infof("Failed to get nodes, retrying after error: %v", err) - time.Sleep(time.Second * time.Duration(i)) - } - if err != nil { - panic("Reached maximum retries obtaining node list: " + err.Error()) - } - - // reconcile the nodes with retries - for i := 0; i < 5; i++ { - err = reconcileNodes(nodes) - if err == nil { - break - } - klog.Infof("Failed to reconcile routes, retrying after error: %v", err) - time.Sleep(time.Second * time.Duration(i)) - } + // routes controller + go func() { + err := router.New(hostname, clientset, nodeInformer).Run(ctx, 5) if err != nil { - panic("Maximum retries reconciling node routes: " + err.Error()) - } - - // disable offload if required - if disableOffload { - err = SetChecksumOffloading("kind-br", false, false) - if err != nil { - klog.Infof("Failed to disable offloading on interface kind-br: %v", err) - } else { - disableOffload = false - } - } - - // rate limit - select { - case <-ctx.Done(): - return - default: - time.Sleep(10 * time.Second) - } - } -} - -// nodeNodesReconciler returns a reconciliation func for nodes -func makeNodesReconciler(cniConfig *cni.CNIConfigWriter, hostIP string, ipFamily IPFamily, clientset *kubernetes.Clientset) func([]*corev1.Node) error { - // reconciles a node - reconcileNode := func(node *corev1.Node) error { - // first get this node's IPs - // we don't support more than one IP address per IP family for simplification - nodeIPs := internalIPs(node) - klog.Infof("Handling node with IPs: %v\n", nodeIPs) - // This is our node. We don't need to add routes, but we might need to - // update the cni config and "annotate" our external IPs - if nodeIPs.Has(hostIP) { - klog.Info("handling current node\n") - // compute the current cni config inputs - if err := cniConfig.Write( - cni.ComputeCNIConfigInputs(node), - ); err != nil { - return err - } - } - return nil - } - - // return a reconciler for all the nodes - return func(nodes []*corev1.Node) error { - for _, node := range nodes { - if err := reconcileNode(node); err != nil { - return err - } + klog.Infof("error running router controller: %v", err) } - return nil - } -} + }() -// internalIPs returns the internal IP address for node -func internalIPs(node *corev1.Node) sets.Set[string] { - ips := sets.New[string]() - // check the node.Status.Addresses - for _, address := range node.Status.Addresses { - if address.Type == "InternalIP" { - ips.Insert(address.Address) - } + select { + case <-signalCh: + klog.Infof("Exiting: received signal") + cancel() + case <-ctx.Done(): } - return ips + // Time for gracefully shutdown + time.Sleep(1 * time.Second) } diff --git a/install-kindnet-bridge.yaml b/install-kindnet-bridge.yaml deleted file mode 100644 index dd8a2ec5..00000000 --- a/install-kindnet-bridge.yaml +++ /dev/null @@ -1,123 +0,0 @@ ---- -kind: ClusterRole -apiVersion: rbac.authorization.k8s.io/v1 -metadata: - name: kindnet -rules: - - apiGroups: - - "" - resources: - - nodes - verbs: - - list - - watch - - patch - - apiGroups: - - "" - resources: - - configmaps - verbs: - - get ---- -kind: ClusterRoleBinding -apiVersion: rbac.authorization.k8s.io/v1 -metadata: - name: kindnet -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: ClusterRole - name: kindnet -subjects: -- kind: ServiceAccount - name: kindnet - namespace: kube-system ---- -apiVersion: v1 -kind: ServiceAccount -metadata: - name: kindnet - namespace: kube-system ---- -apiVersion: apps/v1 -kind: DaemonSet -metadata: - name: kindnet - namespace: kube-system - labels: - tier: node - app: kindnet - k8s-app: kindnet -spec: - selector: - matchLabels: - app: kindnet - template: - metadata: - labels: - tier: node - app: kindnet - k8s-app: kindnet - spec: - hostNetwork: true - tolerations: - - operator: Exists - effect: NoSchedule - serviceAccountName: kindnet - initContainers: - - name: install-cni-bin - image: ghcr.io/aojea/kindnetd:v1.1.0 - command: ['sh', '-c', 'cd /opt/cni/bin; for i in * ; do cat $i > /cni/$i ; chmod +x /cni/$i ; done'] - volumeMounts: - - name: cni-bin - mountPath: /cni - containers: - - name: kindnet-cni - image: ghcr.io/aojea/kindnetd:v1.1.0 - env: - - name: HOST_IP - valueFrom: - fieldRef: - fieldPath: status.hostIP - - name: POD_IP - valueFrom: - fieldRef: - fieldPath: status.podIP - - name: CNI_BRIDGE - value: "true" - volumeMounts: - - name: cni-cfg - mountPath: /etc/cni/net.d - - name: xtables-lock - mountPath: /run/xtables.lock - readOnly: false - - name: lib-modules - mountPath: /lib/modules - readOnly: true - resources: - requests: - cpu: "100m" - memory: "50Mi" - limits: - cpu: "100m" - memory: "50Mi" - securityContext: - privileged: false - capabilities: - add: ["NET_RAW", "NET_ADMIN"] - volumes: - - name: cni-bin - hostPath: - path: /opt/cni/bin - type: DirectoryOrCreate - - name: cni-cfg - hostPath: - path: /etc/cni/net.d - type: DirectoryOrCreate - - name: xtables-lock - hostPath: - path: /run/xtables.lock - type: FileOrCreate - - name: lib-modules - hostPath: - path: /lib/modules ---- diff --git a/pkg/cni/cni.go b/pkg/cni/cni.go deleted file mode 100644 index 8bf53e92..00000000 --- a/pkg/cni/cni.go +++ /dev/null @@ -1,197 +0,0 @@ -/* -Copyright 2019 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package cni - -import ( - "io" - "os" - "reflect" - "text/template" - - "github.com/pkg/errors" - - corev1 "k8s.io/api/core/v1" - netutils "k8s.io/utils/net" -) - -// cniConfigPath is where kindnetd will write the computed CNI config -const CNIConfigPath = "/etc/cni/net.d/10-kindnet.conflist" - -/* cni config management */ - -// CNIConfigInputs is supplied to the CNI config template -type CNIConfigInputs struct { - PodCIDRs []string - DefaultRoutes []string - Mtu int -} - -// ComputeCNIConfigInputs computes the template inputs for CNIConfigWriter -func ComputeCNIConfigInputs(node *corev1.Node) CNIConfigInputs { - defaultRoutes := []string{"0.0.0.0/0", "::/0"} - // check if is a dualstack cluster - if len(node.Spec.PodCIDRs) > 1 { - return CNIConfigInputs{ - PodCIDRs: node.Spec.PodCIDRs, - DefaultRoutes: defaultRoutes, - } - } - // the cluster is single stack - // we use the legacy node.Spec.PodCIDR for backwards compatibility - podCIDRs := []string{node.Spec.PodCIDR} - // This is a single stack cluster - defaultRoute := defaultRoutes[:1] - if netutils.IsIPv6CIDRString(podCIDRs[0]) { - defaultRoute = defaultRoutes[1:] - } - return CNIConfigInputs{ - PodCIDRs: podCIDRs, - DefaultRoutes: defaultRoute, - } -} - -const cniConfigTemplate = ` -{ - "cniVersion": "0.4.0", - "name": "kindnet", - "plugins": [ - { - "type": "ptp", - "ipMasq": false, - "ipam": { - "type": "host-local", - "dataDir": "/run/cni-ipam-state", - "routes": [ - {{$first := true}} - {{- range $route := .DefaultRoutes}} - {{if $first}}{{$first = false}}{{else}},{{end}} - { "dst": "{{ $route }}" } - {{- end}} - ], - "ranges": [ - {{$first := true}} - {{- range $cidr := .PodCIDRs}} - {{if $first}}{{$first = false}}{{else}},{{end}} - [ { "subnet": "{{ $cidr }}" } ] - {{- end}} - ] - } - {{if .Mtu}}, - "mtu": {{ .Mtu }} - {{end}} - }, - { - "type": "portmap", - "capabilities": { - "portMappings": true - } - } - ] -} -` - -const cniConfigTemplateBridge = ` -{ - "cniVersion": "0.4.0", - "name": "kindnet", - "plugins": [ - { - "type": "bridge", - "bridge": "kind-br", - "ipMasq": false, - "isGateway": true, - "isDefaultGateway": true, - "hairpinMode": true, - "ipam": { - "type": "host-local", - "dataDir": "/run/cni-ipam-state", - "ranges": [ - {{$first := true}} - {{- range $cidr := .PodCIDRs}} - {{if $first}}{{$first = false}}{{else}},{{end}} - [ { "subnet": "{{ $cidr }}" } ] - {{- end}} - ] - } - {{if .Mtu}}, - "mtu": {{ .Mtu }} - {{end}} - }, - { - "type": "portmap", - "capabilities": { - "portMappings": true - } - } - ] -} -` - -// CNIConfigWriter no-ops re-writing config with the same inputs -// NOTE: should only be called from a single goroutine -type CNIConfigWriter struct { - Path string - lastInputs CNIConfigInputs - MTU int - Bridge bool -} - -// Write will write the config based on -func (c *CNIConfigWriter) Write(inputs CNIConfigInputs) error { - if reflect.DeepEqual(inputs, c.lastInputs) { - return nil - } - - // use an extension not recognized by CNI to write the contents initially - // https://github.com/containerd/go-cni/blob/891c2a41e18144b2d7921f971d6c9789a68046b2/opts.go#L170 - // then we can rename to atomically make the file appear - f, err := os.Create(c.Path + ".temp") - if err != nil { - return err - } - - template := cniConfigTemplate - if c.Bridge { - template = cniConfigTemplateBridge - } - - // actually write the config - if err := writeCNIConfig(f, template, inputs); err != nil { - f.Close() - os.Remove(f.Name()) - return err - } - _ = f.Sync() - _ = f.Close() - - // then we can rename to the target config path - if err := os.Rename(f.Name(), c.Path); err != nil { - return err - } - - // we're safely done now, record the inputs - c.lastInputs = inputs - return nil -} - -func writeCNIConfig(w io.Writer, rawTemplate string, data CNIConfigInputs) error { - t, err := template.New("cni-json").Parse(rawTemplate) - if err != nil { - return errors.Wrap(err, "failed to parse cni template") - } - return t.Execute(w, &data) -} diff --git a/pkg/cni/controller.go b/pkg/cni/controller.go new file mode 100644 index 00000000..b4fab778 --- /dev/null +++ b/pkg/cni/controller.go @@ -0,0 +1,391 @@ +/* +Copyright 2019 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package cni + +import ( + "context" + "fmt" + "io" + "os" + "reflect" + "text/template" + "time" + + utilnet "github.com/aojea/kindnet/pkg/net" + + v1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" + "k8s.io/apimachinery/pkg/util/wait" + coreinformers "k8s.io/client-go/informers/core/v1" + clientset "k8s.io/client-go/kubernetes" + "k8s.io/client-go/kubernetes/scheme" + v1core "k8s.io/client-go/kubernetes/typed/core/v1" + corelisters "k8s.io/client-go/listers/core/v1" + "k8s.io/client-go/tools/cache" + "k8s.io/client-go/tools/record" + "k8s.io/client-go/util/workqueue" + "k8s.io/klog/v2" + netutils "k8s.io/utils/net" +) + +// cniConfigPath is where kindnetd will write the computed CNI config +const CNIConfigPath = "/etc/cni/net.d/10-kindnet.conflist" + +/* cni config management */ + +// CNIConfigInputs is supplied to the CNI config template +type CNIConfigInputs struct { + PodCIDRs []string + DefaultRoutes []string + Mtu int +} + +const controllerName = "router" + +type Controller struct { + nodeName string + + client clientset.Interface + eventBroadcaster record.EventBroadcaster + eventRecorder record.EventRecorder + + workqueue workqueue.RateLimitingInterface + + nodeLister corelisters.NodeLister + nodesSynced cache.InformerSynced + + configWriter *CNIConfigWriter +} + +// TODO add fsnotify watcher to detect external changes on the CNI config file +func New(nodeName string, client clientset.Interface, nodeInformer coreinformers.NodeInformer, ipFamily int) *Controller { + klog.V(2).Info("Creating CNI config controller") + + broadcaster := record.NewBroadcaster() + broadcaster.StartStructuredLogging(0) + broadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: client.CoreV1().Events("")}) + recorder := broadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: controllerName}) + + c := &Controller{ + nodeName: nodeName, + client: client, + nodeLister: nodeInformer.Lister(), + nodesSynced: nodeInformer.Informer().HasSynced, + workqueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), controllerName), + eventBroadcaster: broadcaster, + eventRecorder: recorder, + } + _, err := nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ + AddFunc: c.enqueueNode, + UpdateFunc: func(old, new interface{}) { + c.enqueueNode(new) + }, + DeleteFunc: c.enqueueNode, + }) + if err != nil { + klog.Infof("unexpected error adding event handler to informer: %v", err) + } + + mtu, err := utilnet.GetMTU(ipFamily) + klog.Infof("setting mtu %d for CNI \n", mtu) + if err != nil { + klog.Infof("Failed to get MTU size from interface eth0, using kernel default MTU size error:%v", err) + } + + // used to track if the cni config inputs changed and write the config + c.configWriter = &CNIConfigWriter{ + Path: CNIConfigPath, + MTU: mtu, + } + klog.Infof("Configuring CNI path: %s mtu: %d", CNIConfigPath, mtu) + + return c +} + +func (c *Controller) enqueueNode(obj interface{}) { + node, ok := obj.(*v1.Node) + if !ok { + return + } + // process our own node only + if c.nodeName != node.Name { + return + } + if len(node.Spec.PodCIDRs) == 0 { + klog.Infof("Node %s has no CIDR, ignoring\n", node.Name) + return + } + var key string + var err error + if key, err = cache.MetaNamespaceKeyFunc(obj); err != nil { + utilruntime.HandleError(err) + return + } + c.workqueue.Add(key) +} + +func (c *Controller) Run(ctx context.Context, workers int) error { + defer utilruntime.HandleCrash() + defer c.workqueue.ShutDown() + logger := klog.FromContext(ctx) + + // Start the informer factories to begin populating the informer caches + logger.Info("Starting CNI controller") + + // Wait for the caches to be synced before starting workers + logger.Info("Waiting for informer caches to sync") + + if ok := cache.WaitForCacheSync(ctx.Done(), c.nodesSynced); !ok { + return fmt.Errorf("failed to wait for caches to sync") + } + + logger.Info("Starting workers", "count", workers) + // Launch two workers to process Foo resources + for i := 0; i < workers; i++ { + go wait.UntilWithContext(ctx, c.runWorker, time.Second) + } + + logger.Info("Started workers") + <-ctx.Done() + logger.Info("Shutting down workers") + + return nil +} + +func (c *Controller) runWorker(ctx context.Context) { + for c.processNextWorkItem(ctx) { + } +} + +func (c *Controller) processNextWorkItem(ctx context.Context) bool { + obj, shutdown := c.workqueue.Get() + logger := klog.FromContext(ctx) + + if shutdown { + return false + } + + // We wrap this block in a func so we can defer c.workqueue.Done. + err := func(key string) error { + // We call Done here so the workqueue knows we have finished + // processing this item. We also must remember to call Forget if we + // do not want this work item being re-queued. For example, we do + // not call Forget if a transient error occurs, instead the item is + // put back on the workqueue and attempted again after a back-off + // period. + defer c.workqueue.Done(key) + // Run the syncHandler, passing it the namespace/name string of the + // Foo resource to be synced. + if err := c.syncHandler(ctx, key); err != nil { + // Put the item back on the workqueue to handle any transient errors. + c.workqueue.AddRateLimited(key) + return fmt.Errorf("error syncing '%s': %s, requeuing", key, err.Error()) + } + // Finally, if no error occurs we Forget this item so it does not + // get queued again until another change happens. + c.workqueue.Forget(key) + logger.Info("Successfully synced", "resourceName", key) + return nil + }(obj.(string)) + + if err != nil { + utilruntime.HandleError(err) + return true + } + + return true +} + +func (c *Controller) syncHandler(ctx context.Context, key string) error { + node, err := c.nodeLister.Get(key) + if err != nil { + if apierrors.IsNotFound(err) { + // Node has been deleted, best effort to delete the CNI config fle + err := os.Remove(CNIConfigPath) + if err != nil { + klog.Infof("node %s has been deleted, error deleting its CNI configuration: %v", node.Name, err) + } + return nil + } + return err + } + // compute the current cni config inputs + err = c.configWriter.Write( + ComputeCNIConfigInputs(node), + ) + if err != nil { + return err + } + return nil +} + +// ComputeCNIConfigInputs computes the template inputs for CNIConfigWriter +func ComputeCNIConfigInputs(node *v1.Node) CNIConfigInputs { + defaultRoutes := []string{"0.0.0.0/0", "::/0"} + // check if is a dualstack cluster + if len(node.Spec.PodCIDRs) > 1 { + return CNIConfigInputs{ + PodCIDRs: node.Spec.PodCIDRs, + DefaultRoutes: defaultRoutes, + } + } + // the cluster is single stack + // we use the legacy node.Spec.PodCIDR for backwards compatibility + podCIDRs := []string{node.Spec.PodCIDR} + // This is a single stack cluster + defaultRoute := defaultRoutes[:1] + if netutils.IsIPv6CIDRString(podCIDRs[0]) { + defaultRoute = defaultRoutes[1:] + } + return CNIConfigInputs{ + PodCIDRs: podCIDRs, + DefaultRoutes: defaultRoute, + } +} + +const cniConfigTemplate = ` +{ + "cniVersion": "0.4.0", + "name": "kindnet", + "plugins": [ + { + "type": "ptp", + "ipMasq": false, + "ipam": { + "type": "host-local", + "dataDir": "/run/cni-ipam-state", + "routes": [ + {{$first := true}} + {{- range $route := .DefaultRoutes}} + {{if $first}}{{$first = false}}{{else}},{{end}} + { "dst": "{{ $route }}" } + {{- end}} + ], + "ranges": [ + {{$first := true}} + {{- range $cidr := .PodCIDRs}} + {{if $first}}{{$first = false}}{{else}},{{end}} + [ { "subnet": "{{ $cidr }}" } ] + {{- end}} + ] + } + {{if .Mtu}}, + "mtu": {{ .Mtu }} + {{end}} + }, + { + "type": "portmap", + "capabilities": { + "portMappings": true + } + } + ] +} +` + +const cniConfigTemplateBridge = ` +{ + "cniVersion": "0.4.0", + "name": "kindnet", + "plugins": [ + { + "type": "bridge", + "bridge": "kind-br", + "ipMasq": false, + "isGateway": true, + "isDefaultGateway": true, + "hairpinMode": true, + "ipam": { + "type": "host-local", + "dataDir": "/run/cni-ipam-state", + "ranges": [ + {{$first := true}} + {{- range $cidr := .PodCIDRs}} + {{if $first}}{{$first = false}}{{else}},{{end}} + [ { "subnet": "{{ $cidr }}" } ] + {{- end}} + ] + } + {{if .Mtu}}, + "mtu": {{ .Mtu }} + {{end}} + }, + { + "type": "portmap", + "capabilities": { + "portMappings": true + } + } + ] +} +` + +// CNIConfigWriter no-ops re-writing config with the same inputs +// NOTE: should only be called from a single goroutine +type CNIConfigWriter struct { + Path string + lastInputs CNIConfigInputs + MTU int + Bridge bool +} + +// Write will write the config based on +func (c *CNIConfigWriter) Write(inputs CNIConfigInputs) error { + if reflect.DeepEqual(inputs, c.lastInputs) { + return nil + } + + // use an extension not recognized by CNI to write the contents initially + // https://github.com/containerd/go-cni/blob/891c2a41e18144b2d7921f971d6c9789a68046b2/opts.go#L170 + // then we can rename to atomically make the file appear + f, err := os.Create(c.Path + ".temp") + if err != nil { + return err + } + + template := cniConfigTemplate + if c.Bridge { + template = cniConfigTemplateBridge + } + + // actually write the config + if err := writeCNIConfig(f, template, inputs); err != nil { + f.Close() + os.Remove(f.Name()) + return err + } + _ = f.Sync() + _ = f.Close() + + // then we can rename to the target config path + if err := os.Rename(f.Name(), c.Path); err != nil { + return err + } + + // we're safely done now, record the inputs + c.lastInputs = inputs + return nil +} + +func writeCNIConfig(w io.Writer, rawTemplate string, data CNIConfigInputs) error { + t, err := template.New("cni-json").Parse(rawTemplate) + if err != nil { + return fmt.Errorf("failed to parse cni template: %w", err) + } + return t.Execute(w, &data) +} diff --git a/cmd/kindnetd/ethtool.go b/pkg/net/ethtool.go similarity index 99% rename from cmd/kindnetd/ethtool.go rename to pkg/net/ethtool.go index b0fb5966..ffd14f94 100644 --- a/cmd/kindnetd/ethtool.go +++ b/pkg/net/ethtool.go @@ -1,4 +1,4 @@ -package main +package net import ( "fmt"