Skip to content

Commit

Permalink
Release cce-network-v2/2.12.13
Browse files Browse the repository at this point in the history
  • Loading branch information
gola committed Jan 22, 2025
1 parent aef37ce commit 926b8fb
Show file tree
Hide file tree
Showing 20 changed files with 306 additions and 74 deletions.
2 changes: 1 addition & 1 deletion cce-network-v2/VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
2.12.12
2.12.13
Binary file modified cce-network-v2/deploy/cce-network-v2-2.12.tar.gz
Binary file not shown.
4 changes: 2 additions & 2 deletions cce-network-v2/deploy/cce-network-v2/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@ type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 2.12.12
version: 2.12.13

# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
# It is recommended to use it with quotes.
appVersion: "2.12.12"
appVersion: "2.12.13"
10 changes: 8 additions & 2 deletions cce-network-v2/docs/release.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,12 @@ v2 版本新架构,支持VPC-ENI 辅助IP和vpc路由。版本发布历史如
3. 增加节点网络配置集功能 NetResourceConfigSet,支持指定节点独立配置网络资源。
4. 增加对 HPAS 实例的支持

#### 2.12.13 [20250122]
1. [Bug] 修复 ReuseIP CEP 跨 ENI 重用时, ENI 对象未更新导致 ReuseIP 在 ENI对象上残留的问题,解决使用固定 IP 的 Pod IP 非预期变更的问题
2. [Optimize] 优化 ubuntu22.04 中的 /lib/systemd/network/99-default.link 为 none,解决该参数被意外修改后导致 veth 的 mac 地址被非预期变更的问题
3. [Bug] 修复 RDMA 场景下,请求 HPC 接口返回结果为 nil 时的空指针问题
4. [Bug] 修复 VPC-ENI 模式下开启 RDMA 时的 Romote ENI Syncer 逻辑,纳管 CCE 历史创建的 ENI 时避免将 ENI 网卡错误识别为 RDMA ENI 导致 RDMA 网卡无法正常使用的问题

#### 2.12.12 [20250121]
1. [Bug] 修复 NetResourceConfigSet 对应的 ExtCniPlugins 字段修改不生效问题,支持节点维度的自定义自定义插件 ExtCniPlugins 字段配置
2. [Feature] 增加对 HPAS 实例的支持
Expand All @@ -17,7 +23,7 @@ v2 版本新架构,支持VPC-ENI 辅助IP和vpc路由。版本发布历史如
2. [Bug] 修改开启 RDMA 场景下 RDMA ENI 对象本地缓存过期状态相关逻辑,解决因 resync nrs timeout 而导致的新增 RDMA 节点初始化慢,大规模集群扩容速度慢的问题
3. [Optimize] 修改开启 RDMA 场景下 RDMA NetResourceSet 对象拼装规则,以及 ENI 对象的 LabelSelectorValue 的拼装规则,防止 RDMA NetResourceSet 名字超过限定值 253,防止 ENI 对象的 LabelSelectorValue 超过限定值 63,解决因 Node Name 超长而导致的 cce-network-agent panic 问题
4. [Optimize] 修改 RDMA ENI 对象更新逻辑,解决因 NodeName 变更时 ENI 对象未正常销毁而导致的 RDMA ENI 对象无法被更新而导致的节点无法就绪的问题
5. [Optimize] 优化开启 RDMA 模式时的 RDMA ENI 状态机处理逻辑,支持非终态 RDMA ENI 的处理流程,避免非终态状态 RDMA ENI 卡住节点NotReady 无法恢复的问题
5. [Optimize] 优化开启 RDMA 模式时的 RDMA ENI 状态机处理逻辑,支持非终态 RDMA ENI 的处理流程,避免非终态状态 RDMA ENI 卡住节点 NotReady 无法恢复的问题
6. [Optimize] 优化开启 RDMA 模式时,对 HPC OpenAPI 的请求逻辑,大幅降低请求频率,降低大规模集群下的 OpenAPI 请求压力
7. [Bug] 修复创建 Node Interface 对象时因初始值未判断而导致的出现 Instance is out of interfaces 导致节点就绪慢的问题

Expand Down Expand Up @@ -87,7 +93,7 @@ v2 版本新架构,支持VPC-ENI 辅助IP和vpc路由。版本发布历史如
3. [Feature] 增加 eni 安全组同步功能, 保持CCE ENI 和节点安全组同步
4. [Feature] 优化Pod调度算法,增加节点 ip capacity 自动适配,避免节点 IP 地址资源浪费
5. [Feature] 增加节点网络配置集功能 NetResourceConfigSet,支持指定节点独立配置网络资源
6. [Optimize] 修复 psts 对象在使用enableReuseIPAddress时可能更新 cep 时Addressing为空,不能记录错误信息的问题
6. [Optimize] 修复 psts 对象在使用 enableReuseIPAddress时可能更新 cep 时 Addressing 为空,不能记录错误信息的问题
7. [Optimize] 优化 operator 事件积压问题,避免事件长期超时积压
8. [Optimize] agent 优化 IP 地址 gc 算法,在达到 gc 周期后,支持按照 IP 地址清理已变更 cep 遗留地址的能力
9. [Optimize] 将动态 cep 与 nrs 生命周期绑定,减少 agent 被杀死的缩容时,遗留的 cep 对象数
Expand Down
7 changes: 4 additions & 3 deletions cce-network-v2/operator/watchers/cce_eni.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,15 @@ package watchers
import (
"context"

kerrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

operatorOption "github.com/baidubce/baiducloud-cce-cni-driver/cce-network-v2/operator/option"
"github.com/baidubce/baiducloud-cce-cni-driver/cce-network-v2/pkg/k8s"
ccev2 "github.com/baidubce/baiducloud-cce-cni-driver/cce-network-v2/pkg/k8s/apis/cce.baidubce.com/v2"
ccev2lister "github.com/baidubce/baiducloud-cce-cni-driver/cce-network-v2/pkg/k8s/client/listers/cce.baidubce.com/v2"
"github.com/baidubce/baiducloud-cce-cni-driver/cce-network-v2/pkg/k8s/watchers/cm"
"github.com/baidubce/baiducloud-cce-cni-driver/cce-network-v2/pkg/syncer"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

var ENIClient = &eniUpdaterImpl{}
Expand All @@ -46,7 +47,7 @@ func StartSynchronizingENI(ctx context.Context, eniManager syncer.ENIEventHandle
obj, err := enisLister.Get(key)

// Delete handling
if errors.IsNotFound(err) {
if kerrors.IsNotFound(err) {
return eniManager.Delete(key)
}
if err != nil {
Expand Down
14 changes: 13 additions & 1 deletion cce-network-v2/pkg/bce/bcesync/remote_eni_syncer.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (
"github.com/baidubce/baiducloud-cce-cni-driver/cce-network-v2/operator/watchers"
"github.com/baidubce/baiducloud-cce-cni-driver/cce-network-v2/pkg/bce/api/cloud"
"github.com/baidubce/baiducloud-cce-cni-driver/cce-network-v2/pkg/bce/api/eni"
bceutils "github.com/baidubce/baiducloud-cce-cni-driver/cce-network-v2/pkg/bce/utils"
"github.com/baidubce/baiducloud-cce-cni-driver/cce-network-v2/pkg/defaults"
"github.com/baidubce/baiducloud-cce-cni-driver/cce-network-v2/pkg/k8s"
ccev2 "github.com/baidubce/baiducloud-cce-cni-driver/cce-network-v2/pkg/k8s/apis/cce.baidubce.com/v2"
Expand Down Expand Up @@ -60,6 +61,9 @@ func (es *remoteVpcEniSyncher) syncENI(ctx context.Context) (result []eni.Eni, e
}
scopedLog := log.WithField(taskLogField, eniControllerName).
WithField("request", logfields.Json(listArgs))
// list eni from bce cloud, do not contain the eni which is underlay RDMA (HPC) or overlay RDMA (ERI)
// the underlay RDMA can be get by GetHPCEniID(ctx context.Context, instanceID string) (*hpc.EniList, error)
// the overlay RDMA can be get by ListERIs(ctx context.Context, args eni.ListEniArgs) ([]eni.Eni, error)
enis, err := es.bceclient.ListENIs(context.TODO(), listArgs)
if err != nil {
scopedLog.WithError(err).Errorf("sync eni failed")
Expand Down Expand Up @@ -120,8 +124,16 @@ func (es *remoteVpcEniSyncher) createExternalENI(eni *enisdk.Eni) (isExisted boo
if err != nil || len(nrsList) == 0 {
return
}
var resource *ccev2.NetResourceSet
for _, nrs := range nrsList {
// es.bceclient.ListENIs(context.TODO(), listArgs) only return the eni which is not RDMA ENI(HPC underlay RDMA/ERI overlay RDMA),
// so we need to check the instance type of nrs.Spec.ENI.InstanceType to ensure the eni is not RDMA ENI.
if nrs.Spec.ENI.InstanceType != bceutils.UnderlayRDMA && nrs.Spec.ENI.InstanceType != bceutils.OverlayRDMA {
resource = nrs
break
}
}

resource := nrsList[0]
scopeLog = scopeLog.WithField("nodeName", resource.Name)
scopeLog.Debugf("find node by instanceID success")
scopeLog.Infof("start to create external eni")
Expand Down
16 changes: 9 additions & 7 deletions cce-network-v2/pkg/bce/rdma/client/hpc_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,15 +87,17 @@ func (c *HpcClient) BatchAddPrivateIP(ctx context.Context, eniID string, private
PrivateIPAddressCount: count,
}
hpcEni, err := c.cloud.BatchAddHpcEniPrivateIP(ctx, args)
if err != nil {
return hpcEni.PrivateIPAddresses, err
}
if len(hpcEni.PrivateIPAddresses) == 0 {
return hpcEni.PrivateIPAddresses, fmt.Errorf("failed to batch add private ips in %s hpcEni", eniID)
if hpcEni == nil || len(hpcEni.PrivateIPAddresses) == 0 {
log.WithError(err).WithField("hpcENI", eniID).Warnf("failed to batch add private ips in hpc ENI")
if err == nil {
err = fmt.Errorf("failed to batch add private ips in hpc ENI %s", eniID)
}
return []string{}, err
}
log.Infof("batch add HpcEni private ips are %v", hpcEni.PrivateIPAddresses)

return hpcEni.PrivateIPAddresses, nil
log.Infof("batch add HpcEni private ips are %v in hpcENI %s", hpcEni.PrivateIPAddresses, eniID)

return hpcEni.PrivateIPAddresses, err
}

func (c *HpcClient) BatchDeletePrivateIP(ctx context.Context, eniID string, privateIPs []string) error {
Expand Down
31 changes: 20 additions & 11 deletions cce-network-v2/pkg/bce/vpceni/node_bbc.go
Original file line number Diff line number Diff line change
Expand Up @@ -329,14 +329,14 @@ func (n *bbcNetworkResourceSet) allocateIPCrossSubnet(ctx context.Context, sbnID
}

// ReuseIPs implements realNodeInf
func (n *bbcNetworkResourceSet) reuseIPs(ctx context.Context, ips []*models.PrivateIP, owner string) (string, error) {
func (n *bbcNetworkResourceSet) reuseIPs(ctx context.Context, ips []*models.PrivateIP, owner string) (eniID string, ipDeletedFromoldEni bool, ipsReleased []string, err error) {
if n.tryRefreshBBCENI() == nil {
return "", fmt.Errorf("bbc eni %s is not ready", n.instanceID)
return "", false, ipsReleased, fmt.Errorf("bbc eni %s is not ready", n.instanceID)
}

namespace, name, err := cache.SplitMetaNamespaceKey(owner)
if err != nil {
return "", fmt.Errorf("invalid owner %s: %v", owner, err)
return "", false, ipsReleased, fmt.Errorf("invalid owner %s: %v", owner, err)
}

scopeLog := n.log.WithFields(logrus.Fields{
Expand All @@ -346,9 +346,10 @@ func (n *bbcNetworkResourceSet) reuseIPs(ctx context.Context, ips []*models.Priv
"ips": logfields.Repr(ips),
})

// release ip from old bcc/ebc/bbc eni before reuse ip if necessary
// check ip conflict
// should to delete ip from the old eni
isLocalIP, err := n.rleaseOldIP(ctx, scopeLog, ips, namespace, name, func(ctx context.Context, scopedLog *logrus.Entry, eniID string, toReleaseIPs []string) error {
isLocalIP, ipsReleased, err := n.rleaseOldIP(ctx, scopeLog, ips, namespace, name, func(ctx context.Context, scopedLog *logrus.Entry, eniID string, toReleaseIPs []string) error {
eni, err := n.manager.enilister.Get(eniID)
if err != nil {
return fmt.Errorf("fail to release old ip. get eni %s failed: %v", eniID, err)
Expand All @@ -371,14 +372,22 @@ func (n *bbcNetworkResourceSet) reuseIPs(ctx context.Context, ips []*models.Priv
return err
})
if err != nil {
return "", err
return "", false, ipsReleased, err
}
if isLocalIP {
scopeLog.Info("ip is local, no need to release ip from bbc eni")
return n.bbceni.Name, nil
return n.bbceni.Name, false, ipsReleased, nil
} else {
if len(ipsReleased) > 0 {
scopeLog.Info("ip is not local, need to release ip from bbc eni")
ipDeletedFromoldEni = true
}
}
// check if all ips are released from old bcc/ebc/bbc eni before reuse ip
if ipDeletedFromoldEni && (len(ips) != len(ipsReleased)) {
scopeLog.Warnf("ip is not local, but only some ips (%v) are released from bbc eni", ipsReleased)
return "", ipDeletedFromoldEni, ipsReleased, fmt.Errorf("ip is not local, but only some ips (%v) are released from bbc eni", ipsReleased)
}

// TODO: release ip from bbc/ebc/vpc eni before reuse ip
var ipAndSubnets []bbc.IpAndSubnet
for _, pip := range ips {
ipAndSubnets = append(ipAndSubnets, bbc.IpAndSubnet{
Expand All @@ -398,14 +407,14 @@ func (n *bbcNetworkResourceSet) reuseIPs(ctx context.Context, ips []*models.Priv

if err != nil {
scopeLog.WithError(err).Error("failed to reuse ip cross subnet")
return "", err
return "", ipDeletedFromoldEni, ipsReleased, err
} else if len(resp.PrivateIps) == 0 {
scopeLog.Error("failed to reuse ip cross subnet without any error")
return "", fmt.Errorf("failed to reuse ip cross subnet without any error")
return "", ipDeletedFromoldEni, ipsReleased, fmt.Errorf("failed to reuse ip cross subnet without any error")
}
scopeLog.WithField("ips", logfields.Repr(ips)).Info("failed to reuse ip cross subnet")

return n.bbceni.Name, nil
return n.bbceni.Name, ipDeletedFromoldEni, ipsReleased, nil
}

var _ realNodeInf = &bbcNetworkResourceSet{}
25 changes: 18 additions & 7 deletions cce-network-v2/pkg/bce/vpceni/node_bcc.go
Original file line number Diff line number Diff line change
Expand Up @@ -435,13 +435,13 @@ func (n *bccNetworkResourceSet) allocateIPCrossSubnet(ctx context.Context, sbnID
}

// ReuseIPs implements realNodeInf
func (n *bccNetworkResourceSet) reuseIPs(ctx context.Context, ips []*models.PrivateIP, owner string) (eniID string, err error) {
func (n *bccNetworkResourceSet) reuseIPs(ctx context.Context, ips []*models.PrivateIP, owner string) (eniID string, ipDeletedFromoldEni bool, ipsReleased []string, err error) {
if n.k8sObj.Spec.ENI.UseMode == string(ccev2.ENIUseModePrimaryIP) {
return "", fmt.Errorf("allocate ip cross subnet not support primary ip mode")
return "", false, ipsReleased, fmt.Errorf("allocate ip cross subnet not support primary ip mode")
}
scopedLog := n.log.WithField("action", "reuseIPs")
if len(ips) == 0 {
return "", fmt.Errorf("no ip to reuse")
return "", false, ipsReleased, fmt.Errorf("no ip to reuse")
}

namespace, name, err := cache.SplitMetaNamespaceKey(owner)
Expand Down Expand Up @@ -478,21 +478,22 @@ func (n *bccNetworkResourceSet) reuseIPs(ctx context.Context, ips []*models.Priv
}
if action.AvailableForAllocationIPv4 == 0 && action.AvailableForAllocationIPv6 == 0 {
if action.AvailableInterfaces == 0 {
return "", fmt.Errorf("no available ip for allocation on node %s", n.k8sObj.Name)
return "", false, ipsReleased, fmt.Errorf("no available ip for allocation on node %s", n.k8sObj.Name)
}
_, eniID, err = n.CreateInterface(ctx, action, scopedLog)
if err != nil {
return "", fmt.Errorf("create interface failed: %v", err)
return "", false, ipsReleased, fmt.Errorf("create interface failed: %v", err)
}
return "", fmt.Errorf("no available eni for allocation on node %s, try to create new eni %s", n.k8sObj.Name, eniID)
return "", false, ipsReleased, fmt.Errorf("no available eni for allocation on node %s, try to create new eni %s", n.k8sObj.Name, eniID)
}
eniID = action.InterfaceID
scopedLog = scopedLog.WithField("eni", eniID)
scopedLog.Debug("prepare allocate ip cross subnet for eni")

// release ip from old bcc/ebc/bbc eni before reuse ip if necessary
// check ip conflict
// should to delete ip from the old eni
isLocalIP, err := n.rleaseOldIP(ctx, scopedLog, ips, namespace, name, func(ctx context.Context, scopedLog *logrus.Entry, eniID string, toReleaseIPs []string) error {
isLocalIP, ipsReleased, err := n.rleaseOldIP(ctx, scopedLog, ips, namespace, name, func(ctx context.Context, scopedLog *logrus.Entry, eniID string, toReleaseIPs []string) error {
scopedLog.WithField("oldENI", eniID).WithField("toReleaseIPs", toReleaseIPs)
err = n.manager.bceclient.BatchDeletePrivateIP(ctx, toReleaseIPs, eniID, false)
if err != nil {
Expand All @@ -510,6 +511,16 @@ func (n *bccNetworkResourceSet) reuseIPs(ctx context.Context, ips []*models.Priv
eniID = enis[0].Name
scopedLog.Infof("ip %s is local ip, directly reusable", ips[0].PrivateIPAddress)
return
} else {
if len(ipsReleased) > 0 {
scopedLog.WithField("ipsReleased", ipsReleased).Debugf("ips %v is not local ip, release from old eni success", ips)
ipDeletedFromoldEni = true
}
}
// check if all ips are released from old bcc/ebc/bbc eni before reuse ip
if ipDeletedFromoldEni && (len(ips) != len(ipsReleased)) {
scopedLog.Warnf("ip is not local, but only some ips (%v) are released from bcc/ebc eni", ipsReleased)
return "", ipDeletedFromoldEni, ipsReleased, fmt.Errorf("ip is not local, but only some ips (%v) are released from bcc/ebc eni", ipsReleased)
}

defer func() {
Expand Down
4 changes: 2 additions & 2 deletions cce-network-v2/pkg/bce/vpceni/node_ebc.go
Original file line number Diff line number Diff line change
Expand Up @@ -259,9 +259,9 @@ func (n *ebcNetworkResourceSet) allocateIPCrossSubnet(ctx context.Context, sbnID
return nil, "", fmt.Errorf("ebc primary interface with secondary IP mode not support allocate ip cross subnet")
}

func (n *ebcNetworkResourceSet) reuseIPs(ctx context.Context, ips []*models.PrivateIP, owner string) (eniID string, err error) {
func (n *ebcNetworkResourceSet) reuseIPs(ctx context.Context, ips []*models.PrivateIP, owner string) (eniID string, ipDeletedFromoldEni bool, ipsReleased []string, err error) {
if !n.usePrimaryENIWithSecondaryMode {
return n.bccNetworkResourceSet.reuseIPs(ctx, ips, owner)
}
return "", fmt.Errorf("ebc primary interface with secondary IP mode not support allocate ip cross subnet")
return "", false, ipsReleased, fmt.Errorf("ebc primary interface with secondary IP mode not support allocate ip cross subnet")
}
Loading

0 comments on commit 926b8fb

Please sign in to comment.