Skip to content

Commit

Permalink
migrate to nftables and simplify masquerading
Browse files Browse the repository at this point in the history
Change-Id: I2dec4bc455241f2407b02a07aae475f70c178f65
  • Loading branch information
aojea committed Sep 5, 2024
1 parent 77f8c59 commit 020431f
Show file tree
Hide file tree
Showing 4 changed files with 168 additions and 83 deletions.
50 changes: 16 additions & 34 deletions cmd/kindnetd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,17 +62,19 @@ const (
)

var (
useBridge bool
networkpolicies bool
hostnameOverride string
clusterCIDR string
useBridge bool
networkpolicies bool
hostnameOverride string
masquerading bool
noMasqueradeCIDRs string
)

func init() {
flag.BoolVar(&useBridge, "cni-bridge", false, "If set, enable the CNI bridge plugin (default is the ptp plugin)")
flag.BoolVar(&networkpolicies, "network-policy", false, "If set, enable Network Policies")
flag.StringVar(&hostnameOverride, "hostname-override", "", "If non-empty, will be used as the name of the Node that kube-network-policies is running on. If unset, the node name is assumed to be the same as the node's hostname.")
flag.StringVar(&clusterCIDR, "cluster-cidr", "", "CIDR Range for Pods in cluster.")
flag.BoolVar(&masquerading, "masquerading", true, "masquerade with the Node IP the cluster to external traffic")
flag.StringVar(&noMasqueradeCIDRs, "no-masquerade-cidr", "", "Comma seperated list of CIDRs that will not be masqueraded.")

flag.Usage = func() {
fmt.Fprint(os.Stderr, "Usage: kindnet [options]\n\n")
Expand Down Expand Up @@ -162,40 +164,20 @@ func main() {
klog.Infof("Configuring CNI path: %s bridge: %v disableOffload: %v mtu: %d",
cniConfigPath, useBridge, disableOffload, mtu)

// enforce ip masquerade rules
noMaskIPv4Subnets, noMaskIPv6Subnets := getNoMasqueradeSubnets(clusterCIDR, clientset)

// create an ipMasqAgent for IPv4
if len(noMaskIPv4Subnets) > 0 {
klog.Infof("noMask IPv4 subnets: %v", noMaskIPv4Subnets)
masqAgentIPv4, err := NewIPMasqAgent(false, noMaskIPv4Subnets)
// create an ipMasqAgent
if masquerading {
klog.Infof("masquerading cluster traffic")
masqAgent, err := NewIPMasqAgent(nodeInformer, noMasqueradeCIDRs)
if err != nil {
panic(err.Error())
klog.Fatalf("error creating masquerading agent: %v", err)
}
go func() {
if err := masqAgentIPv4.SyncRulesForever(time.Second * 60); err != nil {
panic(err)
}
}()
} else {
klog.Infof("Skipping ipMasqAgent for IPv4")
}

// create an ipMasqAgent for IPv6
if len(noMaskIPv6Subnets) > 0 {
klog.Infof("noMask IPv6 subnets: %v", noMaskIPv6Subnets)
masqAgentIPv6, err := NewIPMasqAgent(true, noMaskIPv6Subnets)
if err != nil {
panic(err.Error())
if err := masqAgent.SyncRulesForever(ctx, time.Second*60); err != nil {
klog.Info("error running masquerading agent: %v", err)

Check failure on line 176 in cmd/kindnetd/main.go

View workflow job for this annotation

GitHub Actions / test (1.22.x)

k8s.io/klog/v2.Info call has possible Printf formatting directive %v
}

go func() {
if err := masqAgentIPv6.SyncRulesForever(time.Second * 60); err != nil {
panic(err)
}
}()
defer masqAgent.CleanRules()
} else {
klog.Infof("Skipping ipMasqAgent for IPv6")
klog.Infof("Skipping ipMasqAgent")
}

// setup nodes reconcile function, closes over arguments
Expand Down
196 changes: 151 additions & 45 deletions cmd/kindnetd/masq.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,48 +17,63 @@ limitations under the License.
package main

import (
"context"
"fmt"
"strings"
"time"

"github.com/coreos/go-iptables/iptables"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/util/sets"
coreinformers "k8s.io/client-go/informers/core/v1"
v1 "k8s.io/client-go/listers/core/v1"
"k8s.io/client-go/tools/cache"
"k8s.io/klog/v2"
"k8s.io/utils/ptr"
"sigs.k8s.io/knftables"
)

// NewIPMasqAgent returns a new IPMasqAgent
func NewIPMasqAgent(ipv6 bool, noMasqueradeCIDRs []string) (*IPMasqAgent, error) {
protocol := iptables.ProtocolIPv4
if ipv6 {
protocol = iptables.ProtocolIPv6
}
ipt, err := iptables.NewWithProtocol(protocol)
// NewIPMasqAgent returns a new IPMasqAgent that avoids masquerading the intra-cluster traffic
// but allows to masquerade the cluster to external traffic.
func NewIPMasqAgent(nodeInformer coreinformers.NodeInformer, noMasqueradeCIDRs string) (*IPMasqAgent, error) {
klog.V(2).Info("Initializing nftables")
nft, err := knftables.New(knftables.InetFamily, "kindnet")
if err != nil {
return nil, err
}

// TODO: validate cidrs
v4, v6 := splitCIDRs(strings.Split(noMasqueradeCIDRs, ","))
return &IPMasqAgent{
iptables: ipt,
masqChain: masqChainName,
noMasqueradeCIDRs: noMasqueradeCIDRs,
nft: nft,
nodeLister: nodeInformer.Lister(),
nodesSynced: nodeInformer.Informer().HasSynced,
noMasqV4: v4,
noMasqV6: v6,
}, nil
}

// IPMasqAgent is based on https://github.com/kubernetes-incubator/ip-masq-agent
// but collapsed into kindnetd and made ipv6 aware in an opinionated and simplified
// fashion using "github.com/coreos/go-iptables"
type IPMasqAgent struct {
iptables *iptables.IPTables
masqChain string
noMasqueradeCIDRs []string
nft knftables.Interface
nodeLister v1.NodeLister
nodesSynced cache.InformerSynced
noMasqV4 []string
noMasqV6 []string
flushed bool
}

// SyncRulesForever syncs ip masquerade rules forever
// these rules only needs to be installed once, but we run it periodically to check that are
// not deleted by an external program. It fails if can't sync the rules during 3 iterations
// TODO: aggregate errors
func (ma *IPMasqAgent) SyncRulesForever(interval time.Duration) error {
func (ma *IPMasqAgent) SyncRulesForever(ctx context.Context, interval time.Duration) error {
if !cache.WaitForNamedCacheSync("kindnet-ipmasq", ctx.Done(), ma.nodesSynced) {
return fmt.Errorf("error syncing cache")
}
klog.Info("Syncing nftables rules")
errs := 0
for {
if err := ma.SyncRules(); err != nil {
if err := ma.SyncRules(ctx); err != nil {
errs++
if errs > 3 {
return fmt.Errorf("can't synchronize rules after 3 attempts: %v", err)
Expand All @@ -70,41 +85,132 @@ func (ma *IPMasqAgent) SyncRulesForever(interval time.Duration) error {
}
}

// name of nat chain for iptables masquerade rules
const masqChainName = "KIND-MASQ-AGENT"

// SyncRules syncs ip masquerade rules
func (ma *IPMasqAgent) SyncRules() error {
// make sure our custom chain for non-masquerade exists
exists := false
chains, err := ma.iptables.ListChains("nat")
if err != nil {
return fmt.Errorf("failed to list chains: %v", err)
func (ma *IPMasqAgent) SyncRules(ctx context.Context) error {

table := &knftables.Table{
Comment: knftables.PtrTo("rules for kindnet masquerading"),
}
for _, ch := range chains {
if ch == ma.masqChain {
exists = true
break
}
tx := ma.nft.NewTransaction()
// do it once to delete the existing table
if !ma.flushed {
tx.Add(table)
tx.Delete(table)
ma.flushed = true
}
if !exists {
if err = ma.iptables.NewChain("nat", ma.masqChain); err != nil {
return err
}
tx.Add(table)

// add set with the CIDRs that should not be masqueraded
tx.Add(&knftables.Set{
Name: "noMasqV4",
Type: "ipv4_addr",
Flags: []knftables.SetFlag{knftables.IntervalFlag},
Comment: ptr.To("IPv4 CIDRs that should not be masqueraded"),
})
tx.Flush(&knftables.Set{
Name: "noMasqV4",
})
tx.Add(&knftables.Set{
Name: "noMasqV6",
Type: "ipv6_addr",
Flags: []knftables.SetFlag{knftables.IntervalFlag},
Comment: ptr.To("IPv6 CIDRs that should not be masqueraded"),
})
tx.Flush(&knftables.Set{
Name: "noMasqV6",
})

v4CIDRs := sets.New[string](ma.noMasqV4...)
v6CIDRs := sets.New[string](ma.noMasqV6...)

nodes, err := ma.nodeLister.List(labels.Everything())
if err != nil {
return err
}

// Packets to this network should not be masquerade, pods should be able to talk to other pods
for _, cidr := range ma.noMasqueradeCIDRs {
if err := ma.iptables.AppendUnique("nat", ma.masqChain, "-d", cidr, "-j", "RETURN", "-m", "comment", "--comment", "kind-masq-agent: local traffic is not subject to MASQUERADE"); err != nil {
return err
}
// don't masquerade the traffic directed to the Pods
for _, node := range nodes {
podCIDRsv4, podCIDRsv6 := splitCIDRs(node.Spec.PodCIDRs)
v4CIDRs.Insert(podCIDRsv4...)
v6CIDRs.Insert(podCIDRsv6...)
}

for _, cidr := range v4CIDRs.UnsortedList() {
tx.Add(&knftables.Element{
Set: "noMasqV4",
Key: []string{cidr},
})
}
for _, cidr := range v6CIDRs.UnsortedList() {
tx.Add(&knftables.Element{
Set: "noMasqV6",
Key: []string{cidr},
})
}
hook := knftables.PostroutingHook
chainName := string(hook)
tx.Add(&knftables.Chain{
Name: chainName,
Type: knftables.PtrTo(knftables.NATType),
Hook: knftables.PtrTo(hook),
Priority: knftables.PtrTo(knftables.SNATPriority + "-5"),
})
tx.Flush(&knftables.Chain{
Name: chainName,
})

tx.Add(&knftables.Rule{
Chain: chainName,
Rule: "ct state established,related accept",
Comment: ptr.To("skip stablished"),
})

// skip local traffic
tx.Add(&knftables.Rule{
Chain: chainName,
Rule: "fib daddr type local accept",
Comment: ptr.To("skip local traffic"),
})

// Masquerade all the other traffic
if err := ma.iptables.AppendUnique("nat", ma.masqChain, "-j", "MASQUERADE", "-m", "comment", "--comment", "kind-masq-agent: outbound traffic is subject to MASQUERADE (must be last in chain)"); err != nil {
// ignore other Pods and defined cidrs
tx.Add(&knftables.Rule{
Chain: chainName,
Rule: knftables.Concat(
"ip", "daddr", "@", "noMasqV4", "accept",
),
Comment: ptr.To("no masquerade IPv4 traffic"),
})

tx.Add(&knftables.Rule{
Chain: chainName,
Rule: knftables.Concat(
"ip6", "daddr", "@", "noMasqV6", "accept",
),
Comment: ptr.To("no masquerade IPv6 traffic"),
})

// masquerade the rest of the traffic
tx.Add(&knftables.Rule{
Chain: chainName,
Rule: "masquerade",
Comment: ptr.To("masquerade traffic"),
})

if err := ma.nft.Run(ctx, tx); err != nil {
klog.Infof("error syncing nftables rules %v", err)
return err
}
return nil
}

func (ma *IPMasqAgent) CleanRules() {
tx := ma.nft.NewTransaction()
// Add+Delete is idempotent and won't return an error if the table doesn't already
// exist.
tx.Add(&knftables.Table{})
tx.Delete(&knftables.Table{})

// Send all non-LOCAL destination traffic to our custom KIND-MASQ-AGENT chain
return ma.iptables.AppendUnique("nat", "POSTROUTING", "-m", "addrtype", "!", "--dst-type", "LOCAL", "-j", ma.masqChain, "-m", "comment", "--comment", "kind-masq-agent: ensure nat POSTROUTING directs all non-LOCAL destination traffic to our custom KIND-MASQ-AGENT chain")
if err := ma.nft.Run(context.TODO(), tx); err != nil {
klog.Infof("error deleting nftables rules %v", err)
}
}
3 changes: 1 addition & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@ module sigs.k8s.io/kind/images/kindnetd
go 1.22.0

require (
github.com/coreos/go-iptables v0.7.0
github.com/pkg/errors v0.9.1
github.com/vishvananda/netlink v1.3.0
k8s.io/api v0.30.3
k8s.io/apimachinery v0.30.3
k8s.io/client-go v0.30.3
k8s.io/klog/v2 v2.130.1
sigs.k8s.io/knftables v0.0.16
sigs.k8s.io/kube-network-policies v0.5.1
)

Expand All @@ -30,7 +30,6 @@ require (
github.com/prometheus/common v0.53.0 // indirect
github.com/prometheus/procfs v0.14.0 // indirect
golang.org/x/sync v0.7.0 // indirect
sigs.k8s.io/knftables v0.0.16 // indirect
sigs.k8s.io/network-policy-api v0.1.5 // indirect
)

Expand Down
2 changes: 0 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@ github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM
github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ=
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/coreos/go-iptables v0.7.0 h1:XWM3V+MPRr5/q51NuWSgU0fqMad64Zyxs8ZUoMsamr8=
github.com/coreos/go-iptables v0.7.0/go.mod h1:Qe8Bv2Xik5FyTXwgIbLAnv2sWSBmvWdFETJConOQ//Q=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
Expand Down

0 comments on commit 020431f

Please sign in to comment.