Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

OCPBUGS-36735: Wait for carrier before announcing IPs via GARP/NA #112

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 31 additions & 21 deletions cmd/sriov/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"fmt"
"runtime"
"strings"
"time"

"github.com/containernetworking/cni/pkg/skel"
"github.com/containernetworking/cni/pkg/types"
Expand Down Expand Up @@ -126,6 +127,8 @@ func cmdAdd(args *skel.CmdArgs) error {
result.Interfaces[0].Mtu = *netConf.MTU
}

doAnnounce := false

// run the IPAM plugin
if netConf.IPAM.Type != "" {
var r types.Result
Expand Down Expand Up @@ -161,31 +164,12 @@ func cmdAdd(args *skel.CmdArgs) error {

if !netConf.DPDKMode {
err = netns.Do(func(_ ns.NetNS) error {
err := ipam.ConfigureIface(args.IfName, newResult)
if err != nil {
return err
}

/* After IPAM configuration is done, the following needs to handle the case of an IP address being reused by a different pods.
* This is achieved by sending Gratuitous ARPs and/or Unsolicited Neighbor Advertisements unconditionally.
* Although we set arp_notify and ndisc_notify unconditionally on the interface (please see EnableArpAndNdiscNotify()), the kernel
* only sends GARPs/Unsolicited NA when the interface goes from down to up, or when the link-layer address changes on the interfaces.
* These scenarios are perfectly valid and recommended to be enabled for optimal network performance.
* However for our specific case, which the kernel is unaware of, is the reuse of IP addresses across pods where each pod has a different
* link-layer address for it's SRIOV interface. The ARP/Neighbor cache residing in neighbors would be invalid if an IP address is reused.
* In order to update the cache, the GARP/Unsolicited NA packets should be sent for performance reasons. Otherwise, the neighbors
* may be sending packets with the incorrect link-layer address. Eventually, most network stacks would send ARPs and/or Neighbor
* Solicitation packets when the connection is unreachable. This would correct the invalid cache; however this may take a significant
* amount of time to complete.
*
* The error is ignored here because enabling this feature is only a performance enhancement.
*/
_ = utils.AnnounceIPs(args.IfName, newResult.IPs)
return nil
return ipam.ConfigureIface(args.IfName, newResult)
})
if err != nil {
return err
}
doAnnounce = true
}
result = newResult
}
Expand All @@ -209,6 +193,32 @@ func cmdAdd(args *skel.CmdArgs) error {
return fmt.Errorf("error saving the pci allocation for vf pci address %s: %v", netConf.DeviceID, err)
}

if doAnnounce {
_ = netns.Do(func(_ ns.NetNS) error {
/* After IPAM configuration is done, the following needs to handle the case of an IP address being reused by a different pods.
* This is achieved by sending Gratuitous ARPs and/or Unsolicited Neighbor Advertisements unconditionally.
* Although we set arp_notify and ndisc_notify unconditionally on the interface (please see EnableArpAndNdiscNotify()), the kernel
* only sends GARPs/Unsolicited NA when the interface goes from down to up, or when the link-layer address changes on the interfaces.
* These scenarios are perfectly valid and recommended to be enabled for optimal network performance.
* However for our specific case, which the kernel is unaware of, is the reuse of IP addresses across pods where each pod has a different
* link-layer address for it's SRIOV interface. The ARP/Neighbor cache residing in neighbors would be invalid if an IP address is reused.
* In order to update the cache, the GARP/Unsolicited NA packets should be sent for performance reasons. Otherwise, the neighbors
* may be sending packets with the incorrect link-layer address. Eventually, most network stacks would send ARPs and/or Neighbor
* Solicitation packets when the connection is unreachable. This would correct the invalid cache; however this may take a significant
* amount of time to complete.
*/

/* The interface might not yet have carrier. Wait for it for a short time. */
hasCarrier := utils.WaitForCarrier(args.IfName, 200*time.Millisecond)

/* The error is ignored here because enabling this feature is only a performance enhancement. */
err := utils.AnnounceIPs(args.IfName, result.IPs)

logging.Debug("announcing IPs", "hasCarrier", hasCarrier, "IPs", result.IPs, "announceError", err)
return nil
})
}

return types.PrintResult(result, netConf.CNIVersion)
}

Expand Down
2 changes: 2 additions & 0 deletions pkg/utils/netlink_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ type MyNetlink struct {
NetlinkManager
}

var netLinkLib NetlinkManager = &MyNetlink{}

// LinkByName implements NetlinkManager
func (n *MyNetlink) LinkByName(name string) (netlink.Link, error) {
return netlink.LinkByName(name)
Expand Down
39 changes: 36 additions & 3 deletions pkg/utils/packet.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,13 @@ import (
"fmt"
"net"
"syscall"
"time"

current "github.com/containernetworking/cni/pkg/types/100"
"github.com/vishvananda/netlink"
"golang.org/x/net/icmp"
"golang.org/x/net/ipv6"
"golang.org/x/sys/unix"
)

var (
Expand Down Expand Up @@ -164,10 +166,8 @@ func SendUnsolicitedNeighborAdvertisement(srcIP net.IP, linkObj netlink.Link) er

// AnnounceIPs sends either a GARP or Unsolicited NA depending on the IP address type (IPv4 vs. IPv6 respectively) configured on the interface.
func AnnounceIPs(ifName string, ipConfigs []*current.IPConfig) error {
myNetLink := MyNetlink{}

// Retrieve the interface name in the container.
linkObj, err := myNetLink.LinkByName(ifName)
linkObj, err := netLinkLib.LinkByName(ifName)
if err != nil {
return fmt.Errorf("failed to get netlink device with name %q: %v", ifName, err)
}
Expand Down Expand Up @@ -196,3 +196,36 @@ func AnnounceIPs(ifName string, ipConfigs []*current.IPConfig) error {
}
return nil
}

// Blocking wait for interface ifName to have carrier (!NO_CARRIER flag).
func WaitForCarrier(ifName string, waitTime time.Duration) bool {
var nextSleepDuration time.Duration

start := time.Now()

for nextSleepDuration == 0 || time.Since(start) < waitTime {
if nextSleepDuration == 0 {
nextSleepDuration = 2 * time.Millisecond
} else {
time.Sleep(nextSleepDuration)
/* Grow wait time exponentionally (factor 1.5). */
nextSleepDuration += nextSleepDuration / 2
}

linkObj, err := netLinkLib.LinkByName(ifName)
if err != nil {
return false
}

/* Wait for carrier, i.e. IFF_UP|IFF_RUNNING. Note that there is also
* IFF_LOWER_UP, but we follow iproute2 ([1]).
*
* [1] https://git.kernel.org/pub/scm/network/iproute2/iproute2.git/tree/ip/ipaddress.c?id=f9601b10c21145f76c3d46c163bac39515ed2061#n86
*/
if linkObj.Attrs().RawFlags&(unix.IFF_UP|unix.IFF_RUNNING) == (unix.IFF_UP | unix.IFF_RUNNING) {
return true
}
}

return false
}
52 changes: 52 additions & 0 deletions pkg/utils/packet_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
package utils

import (
"sync/atomic"
"time"

mocks_utils "github.com/k8snetworkplumbingwg/sriov-cni/pkg/utils/mocks"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
"github.com/stretchr/testify/mock"

"github.com/vishvananda/netlink"
"golang.org/x/sys/unix"
)

var _ = Describe("Packets", func() {

Context("WaitForCarrier", func() {
It("should wait until the link has IFF_UP flag", func() {
DeferCleanup(func(old NetlinkManager) { netLinkLib = old }, netLinkLib)

mockedNetLink := &mocks_utils.NetlinkManager{}
netLinkLib = mockedNetLink

rawFlagsAtomic := new(uint32)
*rawFlagsAtomic = unix.IFF_UP

fakeLink := &FakeLink{LinkAttrs: netlink.LinkAttrs{
Index: 1000,
Name: "dummylink",
RawFlags: atomic.LoadUint32(rawFlagsAtomic),
}}

mockedNetLink.On("LinkByName", "dummylink").Return(fakeLink, nil).Run(func(args mock.Arguments) {
fakeLink.RawFlags = atomic.LoadUint32(rawFlagsAtomic)
})

hasCarrier := make(chan bool)
go func() {
hasCarrier <- WaitForCarrier("dummylink", 5*time.Second)
}()

Consistently(hasCarrier, "100ms").ShouldNot(Receive())

go func() {
atomic.StoreUint32(rawFlagsAtomic, unix.IFF_UP|unix.IFF_RUNNING)
}()

Eventually(hasCarrier, "300ms").Should(Receive())
})
})
})