Skip to content

Commit

Permalink
Fix a flaky failure in VM Agent e2e test "testExternalNode" (#6154)
Browse files Browse the repository at this point in the history
The failure occurs when the check is performed before the default route on the
uplink is removed, then two interfaces are found, which leads to a mismatch with
the expectation.

The fix is to introduce a retry when checking the VM network configurations, and
add a check on the count of the default route before return.

Signed-off-by: Wenying Dong <[email protected]>
  • Loading branch information
wenyingd authored Apr 2, 2024
1 parent 62bfd6b commit a8d78bc
Showing 1 changed file with 72 additions and 36 deletions.
108 changes: 72 additions & 36 deletions test/e2e/vmagent_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -236,15 +236,17 @@ func setupVMAgentTest(t *testing.T, data *TestData) ([]vmInfo, error) {
vms := strings.Split(testOptions.linuxVMs, " ")
for _, vm := range vms {
t.Logf("Get info for Linux VM: %s", vm)
tempVM := getVMInfo(t, data, vm)
tempVM, err := getVMInfo(t, data, vm)
require.NoError(t, err)
vmList = append(vmList, tempVM)
}
}
if testOptions.windowsVMs != "" {
vms := strings.Split(testOptions.windowsVMs, " ")
for _, vm := range vms {
t.Logf("Get info for Windows VM: %s", vm)
tempVM := getWindowsVMInfo(t, data, vm)
tempVM, err := getWindowsVMInfo(t, data, vm)
require.NoError(t, err)
vmList = append(vmList, tempVM)
}
}
Expand All @@ -267,11 +269,13 @@ func teardownVMAgentTest(t *testing.T, data *TestData, vmList []vmInfo) {
verifyUpLinkAfterCleanup := func(vm vmInfo) {
err := wait.PollUntilContextTimeout(context.Background(), 10*time.Second, 1*time.Minute, true, func(ctx context.Context) (done bool, err error) {
var tempVM vmInfo
var getVMErr error
if vm.osType == linuxOS {
tempVM = getVMInfo(t, data, vm.nodeName)
tempVM, getVMErr = getVMInfo(t, data, vm.nodeName)
} else {
tempVM = getWindowsVMInfo(t, data, vm.nodeName)
tempVM, getVMErr = getWindowsVMInfo(t, data, vm.nodeName)
}
require.NoError(t, getVMErr)
if vm.ifName != tempVM.ifName {
t.Logf("Retry, unexpected uplink interface name, expected %s, got %s", vm.ifName, tempVM.ifName)
return false, nil
Expand Down Expand Up @@ -330,14 +334,26 @@ func testExternalNode(t *testing.T, data *TestData, vmList []vmInfo) {
exists, err := verifyInterfaceIsInOVS(t, data, vm)
return exists, err
})
assert.NoError(t, err, "Failed to verify host interface in OVS, vmInfo %+v", vm)
require.NoError(t, err, "Failed to verify host interface in OVS, vmInfo %+v", vm)

var tempVM vmInfo
if vm.osType == windowsOS {
tempVM = getWindowsVMInfo(t, data, vm.nodeName)
} else {
tempVM = getVMInfo(t, data, vm.nodeName)
}
err = wait.PollUntilContextTimeout(context.Background(), 2*time.Second, 20*time.Second, true, func(ctx context.Context) (done bool, err error) {
var getVMErr error
if vm.osType == windowsOS {
tempVM, getVMErr = getWindowsVMInfo(t, data, vm.nodeName)
} else {
tempVM, getVMErr = getVMInfo(t, data, vm.nodeName)
}
if getVMErr != nil {
return false, getVMErr
}
vmIFs := strings.Split(tempVM.ifName, "\n")
if len(vmIFs) > 1 {
return false, nil
}
return true, nil
})
require.NoError(t, err)
assert.Equal(t, vm.ifName, tempVM.ifName, "Failed to verify uplink interface")
assert.Equal(t, vm.ip, tempVM.ip, "Failed to verify uplink IP")
}
Expand All @@ -349,50 +365,70 @@ func testExternalNode(t *testing.T, data *TestData, vmList []vmInfo) {
}
}

func getVMInfo(t *testing.T, data *TestData, nodeName string) (info vmInfo) {
var vm vmInfo
vm.nodeName = nodeName
var cmd string
cmd = "ip -o -4 route show to default | awk '{print $5}'"
vm.osType = linuxOS
func getVMInfo(t *testing.T, data *TestData, nodeName string) (vmInfo, error) {
vm := vmInfo{nodeName: nodeName, osType: linuxOS}
cmd := "ip -o -4 route show to default | awk '{print $5}'"
rc, ifName, stderr, err := data.RunCommandOnNode(nodeName, cmd)
require.NoError(t, err, "Failed to run command <%s> on VM %s, err %v", cmd, nodeName, err)
require.Equal(t, 0, rc, "Failed to run command: <%s>, stdout: <%v>, stderr: <%v>", cmd, ifName, stderr)

if err != nil {
t.Logf("Failed to run command <%s> on VM %s, err %v", cmd, nodeName, err)
return vm, err
}
if rc != 0 {
return vm, fmt.Errorf("failed to run command: <%s>, stdout: <%v>, stderr: <%v>", cmd, ifName, stderr)
}
vm.ifName = strings.TrimSpace(ifName)

cmd = fmt.Sprintf("ifconfig %s | awk '/inet / {print $2}'| sed 's/addr://'", vm.ifName)
rc, ifIP, stderr, err := data.RunCommandOnNode(nodeName, cmd)
require.NoError(t, err, "Failed to run command <%s> on VM %s, err %v", cmd, nodeName, err)
require.Equal(t, 0, rc, "Failed to run command: <%s>, stdout: <%v>, stderr: <%v>", cmd, ifIP, stderr)

if err != nil {
t.Logf("Failed to run command <%s> on VM %s, err %v", cmd, nodeName, err)
return vm, err
}
if rc != 0 {
return vm, fmt.Errorf("failed to run command: <%s>, stdout: <%v>, stderr: <%v>", cmd, ifIP, stderr)
}
vm.ip = strings.TrimSpace(ifIP)
return vm

return vm, nil
}

func getWindowsVMInfo(t *testing.T, data *TestData, nodeName string) (vm vmInfo) {
func getWindowsVMInfo(t *testing.T, data *TestData, nodeName string) (vmInfo, error) {
var err error
vm.nodeName = nodeName
vm.osType = windowsOS
vm := vmInfo{nodeName: nodeName, osType: windowsOS}
cmd := fmt.Sprintf("powershell 'Get-WmiObject -Class Win32_IP4RouteTable | Where { $_.destination -eq \"0.0.0.0\" -and $_.mask -eq \"0.0.0.0\"} | Sort-Object metric1 | select interfaceindex | ft -HideTableHeaders'")
rc, ifIndex, stderr, err := data.RunCommandOnNode(nodeName, cmd)
require.NoError(t, err, "Failed to run command <%s> on VM %s, err %v", cmd, nodeName, err)
require.Equal(t, 0, rc, "Failed to run command: <%s>, stdout: <%v>, stderr: <%v>", cmd, ifIndex, stderr)

if err != nil {
t.Logf("Failed to run command <%s> on VM %s, err %v", cmd, nodeName, err)
return vm, err
}
if rc != 0 {
return vm, fmt.Errorf("failed to run command: <%s>, stdout: <%v>, stderr: <%v>", cmd, ifIndex, stderr)
}
vm.ifIndex = strings.TrimSpace(ifIndex)

cmd = fmt.Sprintf("powershell 'Get-NetAdapter -IfIndex %s | select name | ft -HideTableHeaders'", vm.ifIndex)
rc, ifName, stderr, err := data.RunCommandOnNode(nodeName, cmd)
require.NoError(t, err, "Failed to run command <%s> on VM %s, err %v", cmd, nodeName, err)
require.Equal(t, 0, rc, "Failed to run command: <%s>, stdout: <%v>, stderr: <%v>", cmd, ifName, stderr)

if err != nil {
t.Logf("Failed to run command <%s> on VM %s, err %v", cmd, nodeName, err)
return vm, err
}
if rc != 0 {
return vm, fmt.Errorf("failed to run command: <%s>, stdout: <%v>, stderr: <%v>", cmd, ifName, stderr)
}
vm.ifName = strings.TrimSpace(ifName)

cmd = fmt.Sprintf("powershell 'Get-NetIPAddress -AddressFamily IPv4 -ifIndex %s| select IPAddress| ft -HideTableHeaders'", vm.ifIndex)
rc, ifIP, stderr, err := data.RunCommandOnNode(nodeName, cmd)
require.NoError(t, err, "Failed to run command <%s> on VM %s, err %v", cmd, nodeName, err)
require.Equal(t, 0, rc, "Failed to run command: <%s>, stdout: <%v>, stderr: <%v>", cmd, ifIP, stderr)

if err != nil {
t.Logf("Failed to run command <%s> on VM %s, err %v", cmd, nodeName, err)
return vm, err
}
if rc != 0 {
return vm, fmt.Errorf("failed to run command: <%s>, stdout: <%v>, stderr: <%v>", cmd, ifIP, stderr)
}
vm.ip = strings.TrimSpace(ifIP)
return vm

return vm, nil
}

func startAntreaAgent(t *testing.T, data *TestData, vm vmInfo) {
Expand Down

0 comments on commit a8d78bc

Please sign in to comment.