Skip to content

Commit

Permalink
Merge pull request #4359 from ty-dc/fix/ip-allocate
Browse files Browse the repository at this point in the history
Fix: statefulSet applications failed to create in multi-NIC scenarios.
  • Loading branch information
weizhoublue authored Dec 11, 2024
2 parents f9feb6b + 07e5b60 commit 1a52011
Show file tree
Hide file tree
Showing 10 changed files with 418 additions and 58 deletions.
109 changes: 85 additions & 24 deletions pkg/ipam/allocate.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ func (i *ipam) Allocate(ctx context.Context, addArgs *models.IpamAddArgs) (*mode
releaseStsOutdatedIPFlag := false
if i.config.EnableStatefulSet && podTopController.APIVersion == appsv1.SchemeGroupVersion.String() && podTopController.Kind == constant.KindStatefulSet {
if endpoint != nil {
releaseStsOutdatedIPFlag, err = i.releaseStsOutdatedIPIfNeed(ctx, addArgs, pod, endpoint, podTopController)
releaseStsOutdatedIPFlag, err = i.releaseStsOutdatedIPIfNeed(ctx, addArgs, pod, endpoint, podTopController, IsMultipleNicWithNoName(pod.Annotations))
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -114,13 +114,15 @@ func (i *ipam) Allocate(ctx context.Context, addArgs *models.IpamAddArgs) (*mode
}

func (i *ipam) releaseStsOutdatedIPIfNeed(ctx context.Context, addArgs *models.IpamAddArgs,
pod *corev1.Pod, endpoint *spiderpoolv2beta1.SpiderEndpoint, podTopController types.PodTopController) (bool, error) {
pod *corev1.Pod, endpoint *spiderpoolv2beta1.SpiderEndpoint, podTopController types.PodTopController, isMultipleNicWithNoName bool) (bool, error) {
logger := logutils.FromContext(ctx)

preliminary, err := i.getPoolCandidates(ctx, addArgs, pod, podTopController)
if err != nil {
return false, err
}
logger.Sugar().Infof("Preliminary IPPool candidates: %s", preliminary)

poolMap := make(map[string]map[string]struct{})
for _, candidates := range preliminary {
if _, ok := poolMap[candidates.NIC]; !ok {
Expand All @@ -131,38 +133,97 @@ func (i *ipam) releaseStsOutdatedIPIfNeed(ctx context.Context, addArgs *models.I
poolMap[candidates.NIC][pool] = struct{}{}
}
}
endpointMap := make(map[string]map[string]struct{})
for _, ip := range endpoint.Status.Current.IPs {
if _, ok := endpointMap[ip.NIC]; !ok {
endpointMap[ip.NIC] = make(map[string]struct{})
}
logger.Sugar().Debugf("The current mapping between the Pod's IPPool candidates and NICs: %v", poolMap)

// Spiderpool assigns IP addresses to NICs one by one.
// Some NICs may have their IP pools changed, while others may remain unchanged.
// Record these changes and differences to handle specific NICs accordingly.
releaseEndpointIPsFlag := false
needReleaseEndpointIPs := []spiderpoolv2beta1.IPAllocationDetail{}
noReleaseEndpointIPs := []spiderpoolv2beta1.IPAllocationDetail{}
for index, ip := range endpoint.Status.Current.IPs {
if ip.IPv4Pool != nil && *ip.IPv4Pool != "" {
endpointMap[ip.NIC][*ip.IPv4Pool] = struct{}{}
if isMultipleNicWithNoName {
if _, ok := poolMap[strconv.Itoa(index)][*ip.IPv4Pool]; !ok {
// If using the multi-NIC feature through ipam.spidernet.io/ippools without specifying interface names,
// and if the IP pool of one NIC changes, only reclaiming the corresponding endpoint IP could cause the IPAM allocation method to lose allocation records.
// When the interface name is not specified, the allocated NIC name might be "", which cannot be handled properly.
// If isMultipleNicWithNoName is true, all NIC IP addresses will be reclaimed and reallocated.
logger.Sugar().Infof("StatefulSet Pod need to release IP, owned pool: %v, expected pools: %v", *ip.IPv4Pool, poolMap[strconv.Itoa(index)])
releaseEndpointIPsFlag = true
break
}
}
// All other cases determine here whether an IP address needs to be reclaimed.
if _, ok := poolMap[ip.NIC][*ip.IPv4Pool]; !ok && ip.NIC == *addArgs.IfName {
// The multi-NIC feature can be used in the following two ways:
// 1. By specifying additional NICs through k8s.v1.cni.cncf.io/networks and configure the default pool.
// 2. By using ipam.spidernet.io/ippools (excluding cases where the interface name is empty).
// When a change is detected in the corresponding NIC's IP pool,
// the IP information for that NIC will be automatically reclaimed and reallocated.
logger.Sugar().Infof("StatefulSet Pod need to release IP, owned pool: %v, expected pools: %v", *ip.IPv4Pool, poolMap[ip.NIC])
releaseEndpointIPsFlag = true
needReleaseEndpointIPs = append(needReleaseEndpointIPs, ip)
continue
}
}
if ip.IPv6Pool != nil && *ip.IPv6Pool != "" {
endpointMap[ip.NIC][*ip.IPv6Pool] = struct{}{}
if isMultipleNicWithNoName {
if _, ok := poolMap[strconv.Itoa(index)][*ip.IPv6Pool]; !ok {
logger.Sugar().Infof("StatefulSet Pod need to release IP, owned pool: %v, expected pools: %v", *ip.IPv6Pool, poolMap[strconv.Itoa(index)])
releaseEndpointIPsFlag = true
break
}
}
if _, ok := poolMap[ip.NIC][*ip.IPv6Pool]; !ok && ip.NIC == *addArgs.IfName {
logger.Sugar().Infof("StatefulSet Pod need to release IP, owned pool: %v, expected pools: %v", *ip.IPv6Pool, poolMap[ip.NIC])
releaseEndpointIPsFlag = true
needReleaseEndpointIPs = append(needReleaseEndpointIPs, ip)
continue
}
}

// According to the NIC allocation mechanism, we check whether the pool information for each NIC has changed.
// If there is no change, we do not need to reclaim the corresponding endpoint and IP for that NIC.
noReleaseEndpointIPs = append(noReleaseEndpointIPs, ip)
}
if !checkNicPoolExistence(endpointMap, poolMap) {
logger.Sugar().Info("StatefulSet Pod need to release IP: owned pool %v, expected pools: %v", endpointMap, poolMap)
if endpoint.DeletionTimestamp == nil {
logger.Sugar().Infof("delete outdated endpoint of statefulset pod: %v/%v", endpoint.Namespace, endpoint.Name)
if err := i.endpointManager.DeleteEndpoint(ctx, endpoint); err != nil {

if releaseEndpointIPsFlag {
if isMultipleNicWithNoName || len(needReleaseEndpointIPs) == len(endpoint.Status.Current.IPs) {
// The endpoint should be deleted in the following cases:
// 1. If the multi-NIC feature is used through ipam.spidernet.io/ippools without specifying the interface name, and the IPPool has changed.
// 2. In other multi-NIC or single-NIC scenarios, if the IPPool of all NICs has changed.
logger.Sugar().Infof("remove outdated of StatefulSet pod %s/%s: %v", endpoint.Namespace, endpoint.Name, endpoint.Status.Current.IPs)
if endpoint.DeletionTimestamp == nil {
logger.Sugar().Infof("delete outdated endpoint of statefulset pod: %v/%v", endpoint.Namespace, endpoint.Name)
if err := i.endpointManager.DeleteEndpoint(ctx, endpoint); err != nil {
return false, err
}
}
if err := i.endpointManager.RemoveFinalizer(ctx, endpoint); err != nil {
return false, fmt.Errorf("failed to clean statefulset pod's endpoint when expected ippool was changed: %v", err)
}
err := i.release(ctx, endpoint.Status.Current.UID, endpoint.Status.Current.IPs)
if err != nil {
return false, err
}
logger.Sugar().Infof("remove outdated of StatefulSet Pod IPs: %v in Pool", endpoint.Status.Current.IPs)
} else {
// Only update the endpoint and IP corresponding to the changed NIC.
logger.Sugar().Infof("try to update the endpoint IPs of the StatefulSet Pod. Old: %+v, New: %+v.", endpoint.Status.Current.IPs, noReleaseEndpointIPs)
if err := i.endpointManager.PatchEndpointAllocationIPs(ctx, endpoint, noReleaseEndpointIPs); err != nil {
return false, err
}
err := i.release(ctx, endpoint.Status.Current.UID, needReleaseEndpointIPs)
if err != nil {
return false, err
}
logger.Sugar().Infof("remove outdated of StatefulSet Pod IPs: %v in Pool", needReleaseEndpointIPs)
}
err := i.release(ctx, endpoint.Status.Current.UID, endpoint.Status.Current.IPs)
if err != nil {
return false, err
}
logger.Sugar().Info("remove outdated of StatefulSet pod %s/%s: %v", endpoint.Namespace, endpoint.Name, endpoint.Status.Current.IPs)
if err := i.endpointManager.RemoveFinalizer(ctx, endpoint); err != nil {
return false, fmt.Errorf("failed to clean statefulset pod's Endpoint when expected ippool was changed: %v", err)
}
endpoint = nil

return true, nil
} else {
logger.Sugar().Debugf("StatefulSet Pod does not need to release IP: owned pool %v, expected pools: %v", endpointMap, poolMap)
logger.Sugar().Debugf("StatefulSet Pod does not need to release IP: %v", endpoint.Status.Current.IPs)
}
return false, nil
}
Expand Down
27 changes: 6 additions & 21 deletions pkg/ipam/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,13 @@ import (
"context"
"encoding/json"
"fmt"
"net"
"strconv"

appsv1 "k8s.io/api/apps/v1"
batchv1 "k8s.io/api/batch/v1"
corev1 "k8s.io/api/core/v1"
"k8s.io/utils/strings/slices"
"net"
"strconv"

"github.com/spidernet-io/spiderpool/api/v1/agent/models"
subnetmanagercontrollers "github.com/spidernet-io/spiderpool/pkg/applicationcontroller/applicationinformers"
Expand Down Expand Up @@ -177,10 +178,10 @@ func IsMultipleNicWithNoName(anno map[string]string) bool {
return false
}

result := true
result := false
for _, v := range annoPodIPPools {
if v.NIC != "" {
result = false
if v.NIC == "" {
result = true
}
}

Expand Down Expand Up @@ -224,19 +225,3 @@ func validateAndMutateMultipleNICAnnotations(annoIPPoolsValue types.AnnoPodIPPoo

return nil
}

func checkNicPoolExistence(endpointMap, poolMap map[string]map[string]struct{}) bool {
for outerKey, innerMap := range endpointMap {
poolInnerMap, exists := poolMap[outerKey]
if !exists {
return false
}

for innerKey := range innerMap {
if _, exists := poolInnerMap[innerKey]; !exists {
return false
}
}
}
return true
}
3 changes: 2 additions & 1 deletion pkg/ippoolmanager/ippool_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -173,11 +173,12 @@ func (im *ipPoolManager) genRandomIP(ctx context.Context, ipPool *spiderpoolv2be
// Check if there is a duplicate Pod UID in IPPool.allocatedRecords.
// If so, we skip this allocation and assume that this Pod has already obtained an IP address in the pool.
if record.PodUID == string(pod.UID) {
logger.Sugar().Warnf("The Pod %s/%s UID %s already exists in the assigned IP %s", pod.Namespace, pod.Name, ip, string(pod.UID))
logger.Sugar().Infof("The Pod %s/%s UID %s already exists in the assigned IP %s", pod.Namespace, pod.Name, ip, string(pod.UID))
return net.ParseIP(ip), nil
}
used = append(used, ip)
}

usedIPs, err := spiderpoolip.ParseIPRanges(*ipPool.Spec.IPVersion, used)
if err != nil {
return nil, err
Expand Down
32 changes: 30 additions & 2 deletions pkg/workloadendpointmanager/workloadendpoint_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ type WorkloadEndpointManager interface {
UpdateAllocationNICName(ctx context.Context, endpoint *spiderpoolv2beta1.SpiderEndpoint, nic string) (*spiderpoolv2beta1.PodIPAllocation, error)
ReleaseEndpointIPs(ctx context.Context, endpoint *spiderpoolv2beta1.SpiderEndpoint, uid string) ([]spiderpoolv2beta1.IPAllocationDetail, error)
ReleaseEndpointAndFinalizer(ctx context.Context, namespace, podName string, cached bool) error
PatchEndpointAllocationIPs(ctx context.Context, endpoint *spiderpoolv2beta1.SpiderEndpoint, endpointIPs []spiderpoolv2beta1.IPAllocationDetail) error
}

type workloadEndpointManager struct {
Expand Down Expand Up @@ -164,7 +165,21 @@ func (em *workloadEndpointManager) PatchIPAllocationResults(ctx context.Context,
return nil
}

endpoint.Status.Current.IPs = append(endpoint.Status.Current.IPs, convert.ConvertResultsToIPDetails(results, isMultipleNicWithNoName)...)
// Using ipam.spidernet.io/ippools to specify multiple NICs,
// if only one NIC's IP pool changes, only the changed NIC needs to have its IP address reassigned, while the other NICs remain unaffected.
convertResults := convert.ConvertResultsToIPDetails(results, isMultipleNicWithNoName)
for _, result := range convertResults {
exists := false
for _, existingIP := range endpoint.Status.Current.IPs {
if existingIP.NIC == result.NIC {
exists = true
break
}
}
if !exists {
endpoint.Status.Current.IPs = append(endpoint.Status.Current.IPs, result)
}
}
logger.Sugar().Infof("try to update SpiderEndpoint %s", endpoint)
return em.client.Update(ctx, endpoint)
}
Expand Down Expand Up @@ -218,7 +233,6 @@ func (em *workloadEndpointManager) UpdateAllocationNICName(ctx context.Context,
break
}
}

err := em.client.Update(ctx, endpoint)
if nil != err {
return nil, err
Expand All @@ -227,6 +241,20 @@ func (em *workloadEndpointManager) UpdateAllocationNICName(ctx context.Context,
return &endpoint.Status.Current, nil
}

// PatchEndpointAllocationIPs will patch the SpiderEndpoint status recorded IPs.
func (em *workloadEndpointManager) PatchEndpointAllocationIPs(ctx context.Context, endpoint *spiderpoolv2beta1.SpiderEndpoint, newEndpointIPs []spiderpoolv2beta1.IPAllocationDetail) error {
log := logutils.FromContext(ctx)

endpoint.Status.Current.IPs = newEndpointIPs
log.Sugar().Debugf("try to update SpiderEndpoint recorded IPs: %s", endpoint)
err := em.client.Update(ctx, endpoint)
if nil != err {
return err
}

return nil
}

// ReleaseEndpointIPs will release the SpiderEndpoint status recorded IPs.
func (em *workloadEndpointManager) ReleaseEndpointIPs(ctx context.Context, endpoint *spiderpoolv2beta1.SpiderEndpoint, podUID string) ([]spiderpoolv2beta1.IPAllocationDetail, error) {
log := logutils.FromContext(ctx)
Expand Down
47 changes: 47 additions & 0 deletions pkg/workloadendpointmanager/workloadendpoint_manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -718,5 +718,52 @@ var _ = Describe("WorkloadEndpointManager", Label("workloadendpoint_manager_test
Expect(err).To(MatchError(constant.ErrUnknown))
})
})

Describe("PatchEndpointAllocationIPs", func() {
var endpointT *spiderpoolv2beta1.SpiderEndpoint
var newEndpointIPs []spiderpoolv2beta1.IPAllocationDetail

BeforeEach(func() {
endpointT = &spiderpoolv2beta1.SpiderEndpoint{
ObjectMeta: metav1.ObjectMeta{
Name: "test-endpoint",
Namespace: "default",
},
Status: spiderpoolv2beta1.WorkloadEndpointStatus{
Current: spiderpoolv2beta1.PodIPAllocation{
IPs: []spiderpoolv2beta1.IPAllocationDetail{
{NIC: "eth0", IPv4: ptr.To("192.168.1.1/24")},
},
},
},
}

newEndpointIPs = []spiderpoolv2beta1.IPAllocationDetail{
{NIC: "eth0", IPv4: ptr.To("192.168.1.2/24")},
{NIC: "eth1", IPv4: ptr.To("192.168.1.3/24")},
}
})

It("successfully patches the SpiderEndpoint with new IPs", func() {
err := fakeClient.Create(ctx, endpointT)
Expect(err).NotTo(HaveOccurred())

err = endpointManager.PatchEndpointAllocationIPs(ctx, endpointT, newEndpointIPs)
Expect(err).NotTo(HaveOccurred())

var updatedEndpoint spiderpoolv2beta1.SpiderEndpoint
err = fakeClient.Get(ctx, types.NamespacedName{Namespace: endpointT.Namespace, Name: endpointT.Name}, &updatedEndpoint)
Expect(err).NotTo(HaveOccurred())
Expect(updatedEndpoint.Status.Current.IPs).To(Equal(newEndpointIPs))
})

It("fails to patch the SpiderEndpoint due to update error", func() {
patches := gomonkey.ApplyMethodReturn(fakeClient, "Update", constant.ErrUnknown)
defer patches.Reset()

err := endpointManager.PatchEndpointAllocationIPs(ctx, endpointT, newEndpointIPs)
Expect(err).To(MatchError(constant.ErrUnknown))
})
})
})
})
4 changes: 3 additions & 1 deletion test/doc/annotation.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,6 @@
| A00013 | It's invalid to specify one NIC corresponding IPPool in IPPools annotation with multiple NICs | p2 | | done | |
| A00014 | It's invalid to specify same NIC name for IPPools annotation with multiple NICs | p2 | | done | |
| A00015 | Use wildcard for 'ipam.spidernet.io/ippools' annotation to specify IPPools | p2 | | done | |
| A00016 | In the annotation ipam.spidernet.io/ippools for multi-NICs, when the IP pool for one NIC runs out of IPs, it should not exhaust IPs from other pools. | p2 | | done | |
| A00016 | In the annotation ipam.spidernet.io/ippools for multi-NICs, when the IP pool for one NIC runs out of IPs, it should not exhaust IPs from other pools. | p2 | | done | |
| A00017 | Stateful applications can use multiple NICs via k8s.v1.cni.cncf.io/networks, enabling creation, restart, and IP address changes. | p3 | | done | |
| A00018 | Stateful applications using the annotation ipam.spidernet.io/ippools without specifying a NIC name can still create Pods, restart them, and update their IP addresses. | p3 | | done | |
1 change: 0 additions & 1 deletion test/doc/ippoolcr.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,3 @@
| D00015 | The namespace where the pod resides does not match the namespaceName, and the IP cannot be assigned | p2 | | done | |
| D00016 | namespaceName has higher priority than namespaceAffinity | p3 | | done | |
| D00017 | Large IPv6 pool, correct statistics of IP number. | p3 | | done | |

Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

| Case ID | Title | Priority | Smoke | Status | Other |
| ------- | ------| -------- | ----- | ------ | ----- |
| Y00001 | test create spiderclaimparameter | p3 | | done |
| Y00002 | test create spiderclaimparameter for empty staticNics | p3 | | done |
| Y00003 | verify spidermultusconfig of the secondaryNics if is exist | p3 | | |
| Y00004 | if defaultNic is empty, webhook set default cluster network | p3 | | |
| Y00005 | the cniType of all the secondaryNics should be same | p3 | | |
| Y00001 | test create spiderclaimparameter | p3 | | done | |
| Y00002 | test create spiderclaimparameter for empty staticNics | p3 | | done | |
| Y00003 | verify spidermultusconfig of the secondaryNics if is exist | p3 | | | |
| Y00004 | if defaultNic is empty, webhook set default cluster network | p3 | | | |
| Y00005 | the cniType of all the secondaryNics should be same | p3 | | | |
6 changes: 3 additions & 3 deletions test/e2e/affinity/affinity_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -447,7 +447,7 @@ var _ = Describe("test Affinity", Label("affinity"), func() {
})
})

It("Successfully restarted statefulSet/pod with matching podSelector, ip remains the same", Label("L00008", "A00009"), func() {
It("After the statefulset changes the IP pool and restarts, the IP is changed correctly and the UID recorded in the endpoint is synchronized correctly.", Label("L00008", "A00009"), func() {
// A00009:Modify the annotated IPPool for a specified StatefulSet pod
// Generate ippool annotation string
podIppoolAnnoStr := common.GeneratePodIPPoolAnnotations(frame, common.NIC1, defaultV4PoolNameList, defaultV6PoolNameList)
Expand Down Expand Up @@ -496,7 +496,7 @@ var _ = Describe("test Affinity", Label("affinity"), func() {
object, err := common.GetWorkloadByName(frame, pod.Namespace, pod.Name)
Expect(err).NotTo(HaveOccurred())
Expect(object).NotTo(BeNil())
uidMap[string(object.UID)] = pod.Name
uidMap[string(object.Status.Current.UID)] = pod.Name
}
GinkgoWriter.Printf("StatefulSet %s/%s corresponding Pod IP allocations: %v \n", stsObject.Namespace, stsObject.Name, ipMap)

Expand Down Expand Up @@ -569,7 +569,7 @@ var _ = Describe("test Affinity", Label("affinity"), func() {
// WorkloadEndpoint UID remains the same
object, err := common.GetWorkloadByName(frame, pod.Namespace, pod.Name)
Expect(err).NotTo(HaveOccurred(), "Failed to get the same uid")
d, ok := uidMap[string(object.UID)]
d, ok := uidMap[string(object.Status.Current.UID)]
Expect(ok).To(BeFalse(), "Unexpectedly got the same uid")
GinkgoWriter.Printf("Pod %v workloadendpoint UID %v remains the same \n", d, object.UID)
}
Expand Down
Loading

0 comments on commit 1a52011

Please sign in to comment.