From 5c79c4fbfb0064ce6ac5601e836b5c97e7c52afb Mon Sep 17 00:00:00 2001 From: ywc689 Date: Mon, 8 Jul 2024 18:42:27 +0800 Subject: [PATCH 1/2] netif_addr: fix hw multicast address sync problems It fixes following problems in Previous implementation. 1. Neglected the fact that multicast IPv4/IPv6 address can be mapped to one multicast hw address, and a lower dpvs port may have multiple upper ports (such as vlan). 2. Multicast hw addresses could sync from kni multiple times or be deleted by mistake. 3. Interferences of linked down kni devices. Signed-off-by: ywc689 --- include/inetaddr.h | 4 +- include/netif.h | 28 +------- include/netif_addr.h | 51 ++++++++++--- src/inetaddr.c | 57 ++++++++++----- src/kni.c | 16 +++-- src/netif.c | 37 +++------- src/netif_addr.c | 166 +++++++++++++++++++++++-------------------- 7 files changed, 193 insertions(+), 166 deletions(-) diff --git a/include/inetaddr.h b/include/inetaddr.h index 9fb56269e..2a85e7611 100644 --- a/include/inetaddr.h +++ b/include/inetaddr.h @@ -46,7 +46,7 @@ struct inet_ifmcaddr { int af; union inet_addr addr; uint32_t flags; /* not used yet */ - rte_atomic32_t refcnt; + uint32_t refcnt; }; /* @@ -117,7 +117,7 @@ bool inet_chk_mcast_addr(int af, struct netif_port *dev, void inet_ifaddr_dad_failure(struct inet_ifaddr *ifa); -int idev_add_mcast_init(void *args); +int idev_add_mcast_init(struct netif_port *dev); int inet_addr_init(void); int inet_addr_term(void); diff --git a/include/netif.h b/include/netif.h index 951f50bb7..4347eb999 100644 --- a/include/netif.h +++ b/include/netif.h @@ -22,6 +22,7 @@ #include "list.h" #include "dpdk.h" #include "inetaddr.h" +#include "netif_addr.h" #include "global_data.h" #include "timer.h" #include "tc/tc.h" @@ -205,31 +206,6 @@ struct netif_ops { int (*op_get_xstats)(struct netif_port *dev, netif_nic_xstats_get_t **xstats); }; -struct netif_hw_addr { - struct list_head list; - struct rte_ether_addr addr; - rte_atomic32_t refcnt; - /* - * - sync only once! - * - * for HA in upper dev, no matter how many times it's added, - * only sync once to lower (when sync_cnt is zero). - * - * and HA (upper)'s refcnt++, to mark lower dev own's it. - * - * - when to unsync? - * - * when del if HA (upper dev)'s refcnt is 1 and syn_cnt is not zero. - * means lower dev is the only owner and need be unsync. - */ - int sync_cnt; -}; - -struct netif_hw_addr_list { - struct list_head addrs; - int count; -}; - struct netif_port { char name[IFNAMSIZ]; /* device name */ portid_t id; /* device id */ @@ -296,8 +272,6 @@ int netif_port_conf_get(struct netif_port *port, struct rte_eth_conf *eth_conf); int netif_port_conf_set(struct netif_port *port, const struct rte_eth_conf *conf); int netif_port_start(struct netif_port *port); // start nic and wait until up int netif_port_stop(struct netif_port *port); // stop nic -int netif_set_mc_list(struct netif_port *port); -int __netif_set_mc_list(struct netif_port *port); int netif_get_queue(struct netif_port *port, lcoreid_t id, queueid_t *qid); int netif_get_link(struct netif_port *dev, struct rte_eth_link *link); int netif_get_promisc(struct netif_port *dev, bool *promisc); diff --git a/include/netif_addr.h b/include/netif_addr.h index 1a6b97d71..c90da57cc 100644 --- a/include/netif_addr.h +++ b/include/netif_addr.h @@ -23,18 +23,42 @@ */ #ifndef __DPVS_NETIF_ADDR_H__ #define __DPVS_NETIF_ADDR_H__ -#include "netif.h" -int __netif_mc_add(struct netif_port *dev, const struct rte_ether_addr *addr); -int __netif_mc_del(struct netif_port *dev, const struct rte_ether_addr *addr); +enum { + HW_ADDR_F_FROM_KNI = 1, // from linux kni device in local layer +}; + +struct netif_hw_addr { + struct list_head list; + struct rte_ether_addr addr; + rte_atomic32_t refcnt; + uint16_t flags; + uint16_t sync_cnt; +}; + +struct netif_hw_addr_list { + struct list_head addrs; + int count; +}; + +struct netif_port; + +int __netif_hw_addr_add(struct netif_hw_addr_list *list, + const struct rte_ether_addr *addr, uint16_t flags); +int __netif_hw_addr_del(struct netif_hw_addr_list *list, + const struct rte_ether_addr *addr, uint16_t flags); + +int netif_set_mc_list(struct netif_port *dev); +int __netif_set_mc_list(struct netif_port *dev); + int netif_mc_add(struct netif_port *dev, const struct rte_ether_addr *addr); int netif_mc_del(struct netif_port *dev, const struct rte_ether_addr *addr); void netif_mc_flush(struct netif_port *dev); void netif_mc_init(struct netif_port *dev); -int __netif_mc_dump(struct netif_port *dev, - struct rte_ether_addr *addrs, size_t *naddr); -int netif_mc_dump(struct netif_port *dev, - struct rte_ether_addr *addrs, size_t *naddr); +int __netif_mc_dump(struct netif_port *dev, uint16_t filter_flags, + struct rte_ether_addr *addrs, size_t *naddr); +int netif_mc_dump(struct netif_port *dev, uint16_t filter_flags, + struct rte_ether_addr *addrs, size_t *naddr); int __netif_mc_print(struct netif_port *dev, char *buf, int *len, int *pnaddr); int netif_mc_print(struct netif_port *dev, @@ -45,10 +69,10 @@ int netif_mc_sync(struct netif_port *to, struct netif_port *from); int __netif_mc_unsync(struct netif_port *to, struct netif_port *from); int netif_mc_unsync(struct netif_port *to, struct netif_port *from); -int __netif_mc_sync_multiple(struct netif_port *to, struct netif_port *from); -int netif_mc_sync_multiple(struct netif_port *to, struct netif_port *from); -int __netif_mc_unsync_multiple(struct netif_port *to, struct netif_port *from); -int netif_mc_unsync_multiple(struct netif_port *to, struct netif_port *from); +int __netif_mc_sync_multiple(struct netif_port *to, struct netif_port *from, int sync_cnt); +int netif_mc_sync_multiple(struct netif_port *to, struct netif_port *from, int sync_cnt); +int __netif_mc_unsync_multiple(struct netif_port *to, struct netif_port *from, int sync_cnt); +int netif_mc_unsync_multiple(struct netif_port *to, struct netif_port *from, int sync_cnt); static inline int eth_addr_equal(const struct rte_ether_addr *addr1, const struct rte_ether_addr *addr2) @@ -69,4 +93,9 @@ static inline char *eth_addr_dump(const struct rte_ether_addr *ea, return buf; } +static bool inline hw_addr_from_kni(const struct netif_hw_addr *hwa) +{ + return !!(hwa->flags & HW_ADDR_F_FROM_KNI); +} + #endif /* __DPVS_NETIF_ADDR_H__ */ diff --git a/src/inetaddr.c b/src/inetaddr.c index d878ce06f..c5f13e0b6 100644 --- a/src/inetaddr.c +++ b/src/inetaddr.c @@ -90,15 +90,15 @@ static inline void idev_put(struct inet_device *idev) static inline void imc_hash(struct inet_ifmcaddr *imc, struct inet_device *idev) { list_add(&imc->d_list, &idev->this_ifm_list); - rte_atomic32_inc(&imc->refcnt); + ++imc->refcnt; } static inline void imc_unhash(struct inet_ifmcaddr *imc) { - assert(rte_atomic32_read(&imc->refcnt) > 1); + assert(imc->refcnt> 1); list_del(&imc->d_list); - rte_atomic32_dec(&imc->refcnt); + --imc->refcnt; } static struct inet_ifmcaddr *imc_lookup(int af, const struct inet_device *idev, @@ -109,7 +109,7 @@ static struct inet_ifmcaddr *imc_lookup(int af, const struct inet_device *idev, list_for_each_entry(imc, &idev->ifm_list[cid], d_list) { if (inet_addr_equal(af, &imc->addr, maddr)) { - rte_atomic32_inc(&imc->refcnt); + ++imc->refcnt; return imc; } } @@ -121,7 +121,7 @@ static void imc_put(struct inet_ifmcaddr *imc) { char ipstr[64]; - if (rte_atomic32_dec_and_test(&imc->refcnt)) { + if (--imc->refcnt == 0) { RTE_LOG(DEBUG, IFA, "[%02d] %s: del mcaddr %s\n", rte_lcore_id(), __func__, inet_ntop(imc->af, &imc->addr, ipstr, sizeof(ipstr))); @@ -136,20 +136,27 @@ static int idev_mc_add(int af, struct inet_device *idev, struct inet_ifmcaddr *imc; char ipstr[64]; - imc = imc_lookup(af, idev, maddr); - if (imc) { - imc_put(imc); - return EDPVS_EXIST; + if (imc_lookup(af, idev, maddr)) { + /* + * Hold the imc and return. + * + * Multiple IPv6 unicast address may be mapped to one IPv6 solicated-node + * multicast address. So increase the imc refcnt each time idev_mc_add called. + * + * Possibly imc added repeated? No, at least for now. The imc is set within the + * rigid program, not allowing user to configure it. + * */ + return EDPVS_OK; } imc = rte_calloc(NULL, 1, sizeof(struct inet_ifmcaddr), RTE_CACHE_LINE_SIZE); if (!imc) return EDPVS_NOMEM; - imc->af = af; - imc->idev = idev; - imc->addr = *maddr; - rte_atomic32_init(&imc->refcnt); + imc->af = af; + imc->idev = idev; + imc->addr = *maddr; + imc->refcnt = 1; imc_hash(imc, idev); @@ -169,7 +176,10 @@ static int idev_mc_del(int af, struct inet_device *idev, if (!imc) return EDPVS_NOTEXIST; - imc_unhash(imc); + if (--imc->refcnt == 2) { + imc_unhash(imc); + } + imc_put(imc); return EDPVS_OK; @@ -192,8 +202,6 @@ static int ifa_add_del_mcast(struct inet_ifaddr *ifa, bool add, bool is_master) if (add) { err = idev_mc_add(ifa->af, ifa->idev, &iaddr); - if (EDPVS_EXIST == err) - return EDPVS_OK; if (err) return err; if (is_master) { @@ -222,7 +230,7 @@ static int ifa_add_del_mcast(struct inet_ifaddr *ifa, bool add, bool is_master) } /* add ipv6 multicast address after port start */ -int idev_add_mcast_init(void *args) +static int __idev_add_mcast_init(void *args) { int err; struct inet_device *idev; @@ -278,6 +286,21 @@ int idev_add_mcast_init(void *args) return err; } +int idev_add_mcast_init(struct netif_port *dev) +{ + int err; + lcoreid_t cid; + + rte_eal_mp_remote_launch(__idev_add_mcast_init, dev, CALL_MAIN); + RTE_LCORE_FOREACH_WORKER(cid) { + err = rte_eal_wait_lcore(cid); + if (unlikely(err < 0)) + return err; + } + + return EDPVS_OK; +} + /* refer to linux:ipv6_chk_mcast_addr */ bool inet_chk_mcast_addr(int af, struct netif_port *dev, const union inet_addr *group, diff --git a/src/kni.c b/src/kni.c index 6da4799e5..b6af78ceb 100644 --- a/src/kni.c +++ b/src/kni.c @@ -108,7 +108,7 @@ static int kni_mc_list_cmp_set(struct netif_port *dev, rte_rwlock_write_lock(&dev->dev_lock); naddr_old = NELEMS(addrs_old); - err = __netif_mc_dump(dev, addrs_old, &naddr_old); + err = __netif_mc_dump(dev, HW_ADDR_F_FROM_KNI, addrs_old, &naddr_old); if (err != EDPVS_OK) { RTE_LOG(ERR, Kni, "%s: fail to get current mc list\n", __func__); goto out; @@ -162,14 +162,14 @@ static int kni_mc_list_cmp_set(struct netif_port *dev, /* nothing */ break; case 1: - err = __netif_mc_add(dev, &chg_lst.addrs[i]); + err = __netif_hw_addr_add(&dev->mc, &chg_lst.addrs[i], HW_ADDR_F_FROM_KNI); RTE_LOG(INFO, Kni, "%s: add mc addr: %s %s %s\n", __func__, eth_addr_dump(&chg_lst.addrs[i], mac, sizeof(mac)), dev->name, dpvs_strerror(err)); break; case 2: - err = __netif_mc_del(dev, &chg_lst.addrs[i]); + err = __netif_hw_addr_del(&dev->mc, &chg_lst.addrs[i], HW_ADDR_F_FROM_KNI); RTE_LOG(INFO, Kni, "%s: del mc addr: %s %s %s\n", __func__, eth_addr_dump(&chg_lst.addrs[i], mac, sizeof(mac)), @@ -246,7 +246,7 @@ static int kni_rtnl_check(void *arg) { struct netif_port *dev = arg; int fd = dev->kni.kni_rtnl_fd; - int n, i; + int n, i, link_flags = 0; char buf[4096]; struct nlmsghdr *nlh = (struct nlmsghdr *)buf; bool update = false; @@ -284,6 +284,14 @@ static int kni_rtnl_check(void *arg) /* note we should not update kni mac list for every event ! */ if (update) { RTE_LOG(DEBUG, Kni, "%d events received!\n", i); + if (EDPVS_OK != linux_get_link_status(dev->kni.name, &link_flags, NULL, 0)) { + RTE_LOG(ERR, Kni, "%s:undetermined kni link status\n", dev->kni.name); + return DTIMER_OK; + } + if (!(link_flags & IFF_UP)) { + RTE_LOG(DEBUG, Kni, "skip link down kni device %s\n", dev->kni.name); + return DTIMER_OK; + } if (kni_update_maddr(dev) == EDPVS_OK) RTE_LOG(DEBUG, Kni, "update maddr of %s OK!\n", dev->name); else diff --git a/src/netif.c b/src/netif.c index d2ae6cf91..4bc01a1bb 100644 --- a/src/netif.c +++ b/src/netif.c @@ -3298,7 +3298,7 @@ static int bond_set_mc_list(struct netif_port *dev) slave = dev->bond->master.slaves[i]; rte_rwlock_write_lock(&slave->dev_lock); - err = __netif_mc_sync_multiple(slave, dev); + err = __netif_mc_sync_multiple(slave, dev, dev->bond->master.slave_nb); rte_rwlock_write_unlock(&slave->dev_lock); if (err != EDPVS_OK) { @@ -3320,10 +3320,11 @@ static int dpdk_set_mc_list(struct netif_port *dev) if (rte_eth_allmulticast_get(dev->id) == 1) return EDPVS_OK; - err = __netif_mc_dump(dev, addrs, &naddr); + err = __netif_mc_dump(dev, 0, addrs, &naddr); if (err != EDPVS_OK) return err; + RTE_LOG(DEBUG, NETIF, "%s: configuring %lu multicast hw-addrs\n", dev->name, naddr); err = rte_eth_dev_set_mc_addr_list(dev->id, addrs, naddr); if (err) { RTE_LOG(WARNING, NETIF, "%s: rte_eth_dev_set_mc_addr_list failed -- %s," @@ -3506,6 +3507,7 @@ static struct netif_port* netif_rte_port_alloc(portid_t id, int nrxq, return NULL; } port->in_ptr->dev = port; + for (ii = 0; ii < DPVS_MAX_LCORE; ii++) { INIT_LIST_HEAD(&port->in_ptr->ifa_list[ii]); INIT_LIST_HEAD(&port->in_ptr->ifm_list[ii]); @@ -3916,7 +3918,6 @@ static int config_fdir_conf(struct rte_fdir_conf *fdir_conf) int netif_port_start(struct netif_port *port) { int ii, ret; - lcoreid_t cid; queueid_t qid; char promisc_on, allmulticast; char buf[512]; @@ -4058,13 +4059,10 @@ int netif_port_start(struct netif_port *port) port->netif_ops->op_update_addr(port); /* add in6_addr multicast address */ - rte_eal_mp_remote_launch(idev_add_mcast_init, port, CALL_MAIN); - RTE_LCORE_FOREACH_WORKER(cid) { - if ((ret = rte_eal_wait_lcore(cid)) < 0) { - RTE_LOG(WARNING, NETIF, "%s: lcore %d: multicast address add failed for device %s\n", - __func__, cid, port->name); - return ret; - } + if ((ret = idev_add_mcast_init(port)) != EDPVS_OK) { + RTE_LOG(WARNING, NETIF, "%s: idev_add_mcast_init failed -- %d(%s)\n", + __func__, ret, dpvs_strerror(ret)); + return ret; } /* update rss reta */ @@ -4096,25 +4094,6 @@ int netif_port_stop(struct netif_port *port) return EDPVS_OK; } -int __netif_set_mc_list(struct netif_port *dev) -{ - if (!dev->netif_ops->op_set_mc_list) - return EDPVS_NOTSUPP; - - return dev->netif_ops->op_set_mc_list(dev); -} - -int netif_set_mc_list(struct netif_port *dev) -{ - int err; - - rte_rwlock_write_lock(&dev->dev_lock); - err = __netif_set_mc_list(dev); - rte_rwlock_write_unlock(&dev->dev_lock); - - return err; -} - int netif_port_register(struct netif_port *port) { struct netif_port *cur; diff --git a/src/netif_addr.c b/src/netif_addr.c index 9859e7810..425041ead 100644 --- a/src/netif_addr.c +++ b/src/netif_addr.c @@ -25,14 +25,15 @@ #include "netif_addr.h" #include "kni.h" -static int __netif_hw_addr_add(struct netif_hw_addr_list *list, - const struct rte_ether_addr *addr) +int __netif_hw_addr_add(struct netif_hw_addr_list *list, + const struct rte_ether_addr *addr, uint16_t flags) { struct netif_hw_addr *ha; list_for_each_entry(ha, &list->addrs, list) { if (eth_addr_equal(&ha->addr, addr)) { rte_atomic32_inc(&ha->refcnt); + ha->flags |= flags; return EDPVS_OK; } } @@ -43,15 +44,15 @@ static int __netif_hw_addr_add(struct netif_hw_addr_list *list, rte_ether_addr_copy(addr, &ha->addr); rte_atomic32_set(&ha->refcnt, 1); - ha->sync_cnt = 0; + ha->flags = flags; list_add_tail(&ha->list, &list->addrs); list->count++; return EDPVS_OK; } -static int __netif_hw_addr_del(struct netif_hw_addr_list *list, - const struct rte_ether_addr *addr) +int __netif_hw_addr_del(struct netif_hw_addr_list *list, + const struct rte_ether_addr *addr, uint16_t flags) { struct netif_hw_addr *ha, *n; @@ -61,6 +62,8 @@ static int __netif_hw_addr_del(struct netif_hw_addr_list *list, list_del(&ha->list); list->count--; rte_free(ha); + } else { + ha->flags &= ~flags; } return EDPVS_OK; } @@ -81,25 +84,10 @@ static int __netif_hw_addr_sync(struct netif_hw_addr_list *to, eth_addr_dump(&ha->addr, mac, sizeof(mac)); /* for debug */ if (!ha->sync_cnt) { /* not synced to lower device */ - err = __netif_hw_addr_add(to, &ha->addr); + err = __netif_hw_addr_add(to, &ha->addr, 0); if (err == EDPVS_OK) { ha->sync_cnt++; rte_atomic32_inc(&ha->refcnt); - - /* - * when sync ha from upper to lower, - * we also need sync-back to lower's Linux kni device. - * if not, when lower's kni device mc-list changed, - * it may delete "synced" ha here by mistake. - * - * note on Linux two kni devices has no relationship. - * - * the whole logic should be: - * upper.kni -> uppper -> lower -> lower.kni - */ - if (kni_dev_exist(todev)) - linux_hw_mc_add(todev->kni.name, (uint8_t *)&ha->addr); - RTE_LOG(DEBUG, NETIF, "%s: sync %s to %s OK!\n", __func__, mac, todev->name); } else { @@ -111,14 +99,10 @@ static int __netif_hw_addr_sync(struct netif_hw_addr_list *to, /* both "ha->sync_cnt != 0" and "refcnt == 1" means * lower device is the only reference of this ha. * we can "unsync" from lower dev and remove it for upper. */ - err = __netif_hw_addr_del(to, &ha->addr); + err = __netif_hw_addr_del(to, &ha->addr, 0); if (err == EDPVS_OK) { - if (kni_dev_exist(todev)) - linux_hw_mc_del(todev->kni.name, (uint8_t *)&ha->addr); - RTE_LOG(DEBUG, NETIF, "%s: unsync %s to %s OK!\n", __func__, mac, todev->name); - list_del(&ha->list); rte_free(ha); from->count--; @@ -143,7 +127,8 @@ static int __netif_hw_addr_unsync(struct netif_hw_addr_list *to, static int __netif_hw_addr_sync_multiple(struct netif_hw_addr_list *to, struct netif_hw_addr_list *from, - struct netif_port *todev) + struct netif_port *todev, + int sync_cnt) { struct netif_hw_addr *ha, *n; int err = EDPVS_OK; @@ -153,14 +138,12 @@ static int __netif_hw_addr_sync_multiple(struct netif_hw_addr_list *to, eth_addr_dump(&ha->addr, mac, sizeof(mac)); /* for debug */ if (rte_atomic32_read(&ha->refcnt) == ha->sync_cnt) { - err = __netif_hw_addr_del(to, &ha->addr); + /* 'ha->refcnt == ha->sync_cnt' means the 'ha' has been removed from currecnt device + * and all references of this ha are from lower devices, so it's time to unsync. */ + err = __netif_hw_addr_del(to, &ha->addr, 0); if (err == EDPVS_OK) { - if (kni_dev_exist(todev)) - linux_hw_mc_del(todev->kni.name, (uint8_t *)&ha->addr); - RTE_LOG(DEBUG, NETIF, "%s: unsync %s to %s OK!\n", __func__, mac, todev->name); - ha->sync_cnt--; if (rte_atomic32_dec_and_test(&ha->refcnt)) { list_del(&ha->list); @@ -172,26 +155,12 @@ static int __netif_hw_addr_sync_multiple(struct netif_hw_addr_list *to, __func__, mac, todev->name); break; } - } else { - err = __netif_hw_addr_add(to, &ha->addr); + } else if (ha->sync_cnt < sync_cnt) { + /* sync to lower devices only once */ + err = __netif_hw_addr_add(to, &ha->addr, 0); if (err == EDPVS_OK) { ha->sync_cnt++; rte_atomic32_inc(&ha->refcnt); - - /* - * when sync ha from upper to lower, - * we also need sync-back to lower's Linux kni device. - * if not, when lower's kni device mc-list changed, - * it may delete "synced" ha here by mistake. - * - * note on Linux two kni devices has no relationship. - * - * the whole logic should be: - * upper.kni -> uppper -> lower -> lower.kni - */ - if (kni_dev_exist(todev)) - linux_hw_mc_add(todev->kni.name, (uint8_t *)&ha->addr); - RTE_LOG(DEBUG, NETIF, "%s: sync %s to %s OK!\n", __func__, mac, todev->name); } else { @@ -204,20 +173,30 @@ static int __netif_hw_addr_sync_multiple(struct netif_hw_addr_list *to, } static int __netif_hw_addr_unsync_multiple(struct netif_hw_addr_list *to, - struct netif_hw_addr_list *from) + struct netif_hw_addr_list *from, + int sync_cnt) { /* TODO: */ return EDPVS_INVAL; } -int __netif_mc_add(struct netif_port *dev, const struct rte_ether_addr *addr) +int __netif_set_mc_list(struct netif_port *dev) { - return __netif_hw_addr_add(&dev->mc, addr); + if (!dev->netif_ops->op_set_mc_list) + return EDPVS_NOTSUPP; + + return dev->netif_ops->op_set_mc_list(dev); } -int __netif_mc_del(struct netif_port *dev, const struct rte_ether_addr *addr) +int netif_set_mc_list(struct netif_port *dev) { - return __netif_hw_addr_del(&dev->mc, addr); + int err; + + rte_rwlock_write_lock(&dev->dev_lock); + err = __netif_set_mc_list(dev); + rte_rwlock_write_unlock(&dev->dev_lock); + + return err; } int netif_mc_add(struct netif_port *dev, const struct rte_ether_addr *addr) @@ -225,7 +204,7 @@ int netif_mc_add(struct netif_port *dev, const struct rte_ether_addr *addr) int err; rte_rwlock_write_lock(&dev->dev_lock); - err = __netif_mc_add(dev, addr); + err = __netif_hw_addr_add(&dev->mc, addr, 0); if (err == EDPVS_OK) err = __netif_set_mc_list(dev); rte_rwlock_write_unlock(&dev->dev_lock); @@ -238,7 +217,7 @@ int netif_mc_del(struct netif_port *dev, const struct rte_ether_addr *addr) int err; rte_rwlock_write_lock(&dev->dev_lock); - err = __netif_mc_del(dev, addr); + err = __netif_hw_addr_del(&dev->mc, addr, 0); if (err == EDPVS_OK) err = __netif_set_mc_list(dev); rte_rwlock_write_unlock(&dev->dev_lock); @@ -271,8 +250,8 @@ void netif_mc_init(struct netif_port *dev) rte_rwlock_write_unlock(&dev->dev_lock); } -int __netif_mc_dump(struct netif_port *dev, - struct rte_ether_addr *addrs, size_t *naddr) +int __netif_mc_dump(struct netif_port *dev, uint16_t filter_flags, + struct rte_ether_addr *addrs, size_t *naddr) { struct netif_hw_addr *ha; int off = 0; @@ -281,42 +260,74 @@ int __netif_mc_dump(struct netif_port *dev, return EDPVS_NOROOM; list_for_each_entry(ha, &dev->mc.addrs, list) - rte_ether_addr_copy(&ha->addr, &addrs[off++]); + if (!filter_flags || ha->flags & filter_flags) + rte_ether_addr_copy(&ha->addr, &addrs[off++]); *naddr = off; return EDPVS_OK; } -int netif_mc_dump(struct netif_port *dev, - struct rte_ether_addr *addrs, size_t *naddr) +int netif_mc_dump(struct netif_port *dev, uint16_t filter_flags, + struct rte_ether_addr *addrs, size_t *naddr) { int err; rte_rwlock_read_lock(&dev->dev_lock); - err = __netif_mc_dump(dev, addrs, naddr); + err = __netif_mc_dump(dev, filter_flags, addrs, naddr); rte_rwlock_read_unlock(&dev->dev_lock); return err; } +/* only used in __netif_mc_dump_all */ +struct netif_hw_addr_entry { + struct rte_ether_addr addr; + uint32_t refcnt; + uint16_t flags; + uint16_t sync_cnt; +}; + +static int __netif_mc_dump_all(struct netif_port *dev, uint16_t filter_flags, + struct netif_hw_addr_entry *addrs, size_t *naddr) +{ + struct netif_hw_addr *ha; + int off = 0; + + if (*naddr < dev->mc.count) + return EDPVS_NOROOM; + + list_for_each_entry(ha, &dev->mc.addrs, list) { + rte_ether_addr_copy(&ha->addr, &addrs[off].addr); + addrs[off].refcnt = rte_atomic32_read(&ha->refcnt); + addrs[off].flags = ha->flags; + addrs[off].sync_cnt = ha->sync_cnt; + off++; + } + + *naddr = off; + return EDPVS_OK; +} + int __netif_mc_print(struct netif_port *dev, char *buf, int *len, int *pnaddr) { - struct rte_ether_addr addrs[NETIF_MAX_HWADDR]; + struct netif_hw_addr_entry addrs[NETIF_MAX_HWADDR]; size_t naddr = NELEMS(addrs); int err, i; int strlen = 0; - err = __netif_mc_dump(dev, addrs, &naddr); + err = __netif_mc_dump_all(dev, 0, addrs, &naddr); if (err != EDPVS_OK) goto errout; for (i = 0; i < naddr && *len > strlen; i++) { err = snprintf(buf + strlen, *len - strlen, - " link %02x:%02x:%02x:%02x:%02x:%02x\n", - addrs[i].addr_bytes[0], addrs[i].addr_bytes[1], - addrs[i].addr_bytes[2], addrs[i].addr_bytes[3], - addrs[i].addr_bytes[4], addrs[i].addr_bytes[5]); + " link %02x:%02x:%02x:%02x:%02x:%02x %srefcnt %d, synced %d\n", + addrs[i].addr.addr_bytes[0], addrs[i].addr.addr_bytes[1], + addrs[i].addr.addr_bytes[2], addrs[i].addr.addr_bytes[3], + addrs[i].addr.addr_bytes[4], addrs[i].addr.addr_bytes[5], + addrs[i].flags & HW_ADDR_F_FROM_KNI ? "(kni) ": "", + addrs[i].refcnt, addrs[i].sync_cnt); if (err < 0) { err = EDPVS_NOROOM; goto errout; @@ -400,11 +411,11 @@ int netif_mc_unsync(struct netif_port *to, struct netif_port *from) return err; } -int __netif_mc_sync_multiple(struct netif_port *to, struct netif_port *from) +int __netif_mc_sync_multiple(struct netif_port *to, struct netif_port *from, int sync_cnt) { int err; - err = __netif_hw_addr_sync_multiple(&to->mc, &from->mc, to); + err = __netif_hw_addr_sync_multiple(&to->mc, &from->mc, to, sync_cnt); if (err == EDPVS_OK) err = __netif_set_mc_list(to); @@ -412,14 +423,15 @@ int __netif_mc_sync_multiple(struct netif_port *to, struct netif_port *from) } int netif_mc_sync_multiple(struct netif_port *to, - struct netif_port *from) + struct netif_port *from, + int sync_cnt) { int err; rte_rwlock_write_lock(&to->dev_lock); rte_rwlock_write_lock(&from->dev_lock); - err = __netif_mc_sync_multiple(to, from); + err = __netif_mc_sync_multiple(to, from, sync_cnt); rte_rwlock_write_unlock(&from->dev_lock); rte_rwlock_write_unlock(&to->dev_lock); @@ -427,25 +439,27 @@ int netif_mc_sync_multiple(struct netif_port *to, return err; } -int __netif_mc_unsync_multiple(struct netif_port *to, struct netif_port *from) +int __netif_mc_unsync_multiple(struct netif_port *to, + struct netif_port *from, + int sync_cnt) { int err; - err = __netif_hw_addr_unsync_multiple(&to->mc, &from->mc); + err = __netif_hw_addr_unsync_multiple(&to->mc, &from->mc, sync_cnt); if (err == EDPVS_OK) err = __netif_set_mc_list(to); return err; } -int netif_mc_unsync_multiple(struct netif_port *to, struct netif_port *from) +int netif_mc_unsync_multiple(struct netif_port *to, struct netif_port *from, int sync_cnt) { int err; rte_rwlock_write_lock(&to->dev_lock); rte_rwlock_write_lock(&from->dev_lock); - err = __netif_mc_unsync_multiple(to, from); + err = __netif_mc_unsync_multiple(to, from, sync_cnt); rte_rwlock_write_unlock(&from->dev_lock); rte_rwlock_write_unlock(&to->dev_lock); From 5b2fc59574489c6909efd45b856ff853e787d85d Mon Sep 17 00:00:00 2001 From: ywc689 Date: Wed, 10 Jul 2024 11:32:10 +0800 Subject: [PATCH 2/2] dpip: add 'maddr' subcommand to show multicast addresses Signed-off-by: ywc689 --- include/conf/inetaddr.h | 13 +++ include/conf/netif_addr.h | 37 +++++++++ include/conf/sockopts.h | 2 + include/inetaddr.h | 2 + include/netif_addr.h | 7 +- src/inetaddr.c | 139 +++++++++++++++++++++++-------- src/kni.c | 2 +- src/netif.c | 10 +++ src/netif_addr.c | 48 +++++++---- tools/dpip/Makefile | 2 +- tools/dpip/dpip.c | 2 +- tools/dpip/link.c | 6 +- tools/dpip/maddr.c | 168 ++++++++++++++++++++++++++++++++++++++ 13 files changed, 380 insertions(+), 58 deletions(-) create mode 100644 include/conf/netif_addr.h create mode 100644 tools/dpip/maddr.c diff --git a/include/conf/inetaddr.h b/include/conf/inetaddr.h index f97994f86..7a82b0d94 100644 --- a/include/conf/inetaddr.h +++ b/include/conf/inetaddr.h @@ -100,4 +100,17 @@ struct inet_addr_front { }; #endif /* CONFIG_DPVS_AGENT */ +struct inet_maddr_entry { + char ifname[IFNAMSIZ]; + union inet_addr maddr; + int af; + uint32_t flags; + uint32_t refcnt; +} __attribute__((__packed__)); + +struct inet_maddr_array { + int nmaddr; + struct inet_maddr_entry maddrs[0]; +} __attribute__((__packed__)); + #endif /* __DPVS_INETADDR_CONF_H__ */ diff --git a/include/conf/netif_addr.h b/include/conf/netif_addr.h new file mode 100644 index 000000000..1a127e871 --- /dev/null +++ b/include/conf/netif_addr.h @@ -0,0 +1,37 @@ +/* + * DPVS is a software load balancer (Virtual Server) based on DPDK. + * + * Copyright (C) 2021 iQIYI (www.iqiyi.com). + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#ifndef __DPVS_NETIF_ADDR_CONF_H__ +#define __DPVS_NETIF_ADDR_CONF_H__ + +enum { + HW_ADDR_F_FROM_KNI = 1, // from linux kni device in local layer +}; + +struct netif_hw_addr_entry { + char addr[18]; + uint32_t refcnt; + uint16_t flags; + uint16_t sync_cnt; +} __attribute__((__packed__)); + +struct netif_hw_addr_array { + int count; + struct netif_hw_addr_entry entries[0]; +} __attribute__((__packed__)); + +#endif diff --git a/include/conf/sockopts.h b/include/conf/sockopts.h index 2e3cd7595..83d02ccc9 100644 --- a/include/conf/sockopts.h +++ b/include/conf/sockopts.h @@ -84,6 +84,7 @@ DPVSMSG(SOCKOPT_SET_IFADDR_SET) \ DPVSMSG(SOCKOPT_SET_IFADDR_FLUSH) \ DPVSMSG(SOCKOPT_GET_IFADDR_SHOW) \ + DPVSMSG(SOCKOPT_GET_IFMADDR_SHOW) \ \ DPVSMSG(SOCKOPT_NETIF_SET_LCORE) \ DPVSMSG(SOCKOPT_NETIF_SET_PORT) \ @@ -98,6 +99,7 @@ DPVSMSG(SOCKOPT_NETIF_GET_PORT_XSTATS) \ DPVSMSG(SOCKOPT_NETIF_GET_PORT_EXT_INFO) \ DPVSMSG(SOCKOPT_NETIF_GET_BOND_STATUS) \ + DPVSMSG(SOCKOPT_NETIF_GET_MADDR)\ DPVSMSG(SOCKOPT_NETIF_GET_MAX) \ \ DPVSMSG(SOCKOPT_SET_NEIGH_ADD) \ diff --git a/include/inetaddr.h b/include/inetaddr.h index 2a85e7611..44276659f 100644 --- a/include/inetaddr.h +++ b/include/inetaddr.h @@ -31,10 +31,12 @@ struct inet_device { struct list_head ifa_list[DPVS_MAX_LCORE]; /* inet_ifaddr list */ struct list_head ifm_list[DPVS_MAX_LCORE]; /* inet_ifmcaddr list*/ uint32_t ifa_cnt[DPVS_MAX_LCORE]; + uint32_t ifm_cnt[DPVS_MAX_LCORE]; rte_atomic32_t refcnt; /* not used yet */ #define this_ifa_list ifa_list[rte_lcore_id()] #define this_ifm_list ifm_list[rte_lcore_id()] #define this_ifa_cnt ifa_cnt[rte_lcore_id()] +#define this_ifm_cnt ifm_cnt[rte_lcore_id()] }; /* diff --git a/include/netif_addr.h b/include/netif_addr.h index c90da57cc..e1d98aa8e 100644 --- a/include/netif_addr.h +++ b/include/netif_addr.h @@ -24,9 +24,7 @@ #ifndef __DPVS_NETIF_ADDR_H__ #define __DPVS_NETIF_ADDR_H__ -enum { - HW_ADDR_F_FROM_KNI = 1, // from linux kni device in local layer -}; +#include "conf/netif_addr.h" struct netif_hw_addr { struct list_head list; @@ -55,6 +53,7 @@ int netif_mc_add(struct netif_port *dev, const struct rte_ether_addr *addr); int netif_mc_del(struct netif_port *dev, const struct rte_ether_addr *addr); void netif_mc_flush(struct netif_port *dev); void netif_mc_init(struct netif_port *dev); + int __netif_mc_dump(struct netif_port *dev, uint16_t filter_flags, struct rte_ether_addr *addrs, size_t *naddr); int netif_mc_dump(struct netif_port *dev, uint16_t filter_flags, @@ -74,6 +73,8 @@ int netif_mc_sync_multiple(struct netif_port *to, struct netif_port *from, int s int __netif_mc_unsync_multiple(struct netif_port *to, struct netif_port *from, int sync_cnt); int netif_mc_unsync_multiple(struct netif_port *to, struct netif_port *from, int sync_cnt); +int netif_get_multicast_addrs(struct netif_port *dev, void **out, size_t *outlen); + static inline int eth_addr_equal(const struct rte_ether_addr *addr1, const struct rte_ether_addr *addr2) { diff --git a/src/inetaddr.c b/src/inetaddr.c index c5f13e0b6..077f9d844 100644 --- a/src/inetaddr.c +++ b/src/inetaddr.c @@ -91,6 +91,7 @@ static inline void imc_hash(struct inet_ifmcaddr *imc, struct inet_device *idev) { list_add(&imc->d_list, &idev->this_ifm_list); ++imc->refcnt; + ++idev->this_ifm_cnt; } static inline void imc_unhash(struct inet_ifmcaddr *imc) @@ -99,6 +100,7 @@ static inline void imc_unhash(struct inet_ifmcaddr *imc) list_del(&imc->d_list); --imc->refcnt; + --imc->idev->this_ifm_cnt; } static struct inet_ifmcaddr *imc_lookup(int af, const struct inet_device *idev, @@ -1833,6 +1835,39 @@ static int ifaddr_get_verbose(struct inet_device *idev, struct inet_addr_data_ar return err; } +static int ifmaddr_fill_entries(struct inet_device *idev, struct inet_maddr_array **parray, int *plen) +{ + lcoreid_t cid; + int ifm_cnt, len, off; + struct inet_ifmcaddr *ifm; + struct inet_maddr_array *array; + + cid = rte_lcore_id(); + ifm_cnt = idev->ifm_cnt[cid]; + + len = sizeof(struct inet_maddr_array) + ifm_cnt * sizeof(struct inet_maddr_entry); + array = rte_calloc(NULL, 1, len, RTE_CACHE_LINE_SIZE); + if (unlikely(!array)) + return EDPVS_NOMEM; + + off = 0; + list_for_each_entry(ifm, &idev->ifm_list[cid], d_list) { + strncpy(array->maddrs[off].ifname, ifm->idev->dev->name, + sizeof(array->maddrs[off].ifname) - 1); + array->maddrs[off].maddr = ifm->addr; + array->maddrs[off].af = ifm->af; + array->maddrs[off].flags = ifm->flags; + array->maddrs[off].refcnt = ifm->refcnt; + if (++off >= ifm_cnt) + break; + } + array->nmaddr = off; + + *parray = array; + *plen = len; + return EDPVS_OK; +} + static int ifa_sockopt_set(sockoptid_t opt, const void *conf, size_t size) { struct netif_port *dev; @@ -1967,60 +2002,92 @@ static int ifa_sockopt_get(sockoptid_t opt, const void *conf, size_t size, int err, len = 0; struct netif_port *dev; struct inet_device *idev = NULL; + struct inet_addr_data_array *array = NULL; const struct inet_addr_param *param = conf; - if (!conf || size < sizeof(struct inet_addr_param) || !out || !outsize) + struct inet_maddr_array *marray = NULL; + const char *ifname = conf; + + if (!conf || !out || !outsize) return EDPVS_INVAL; - if (opt != SOCKOPT_GET_IFADDR_SHOW) - return EDPVS_NOTSUPP; + switch (opt) { + case SOCKOPT_GET_IFADDR_SHOW: + if (size < sizeof(struct inet_addr_param) || param->ifa_ops != INET_ADDR_GET) + return EDPVS_INVAL; - if (param->ifa_ops != INET_ADDR_GET) - return EDPVS_INVAL; + if (param->ifa_entry.af != AF_INET && + param->ifa_entry.af != AF_INET6 && + param->ifa_entry.af != AF_UNSPEC) + return EDPVS_NOTSUPP; - if (param->ifa_entry.af != AF_INET && - param->ifa_entry.af != AF_INET6 && - param->ifa_entry.af != AF_UNSPEC) - return EDPVS_NOTSUPP; + if (strlen(param->ifa_entry.ifname)) { + dev = netif_port_get_by_name(param->ifa_entry.ifname); + if (!dev) { + RTE_LOG(WARNING, IFA, "%s: no such device: %s\n", + __func__, param->ifa_entry.ifname); + return EDPVS_NOTEXIST; + } + idev = dev_get_idev(dev); + if (!idev) + return EDPVS_RESOURCE; + } + + if (param->ifa_ops_flags & IFA_F_OPS_VERBOSE) + err = ifaddr_get_verbose(idev, &array, &len); + else if (param->ifa_ops_flags & IFA_F_OPS_STATS) + err = ifaddr_get_stats(idev, &array, &len); + else + err = ifaddr_get_basic(idev, &array, &len); - if (strlen(param->ifa_entry.ifname)) { - dev = netif_port_get_by_name(param->ifa_entry.ifname); + if (err != EDPVS_OK) { + RTE_LOG(WARNING, IFA, "%s: fail to get inet addresses -- %s!\n", + __func__, dpvs_strerror(err)); + return err; + } + + if (idev) + idev_put(idev); + + if (array) { + array->ops = INET_ADDR_GET; + array->ops_flags = param->ifa_ops_flags; + } + + *out = array; + *outsize = len; + break; + case SOCKOPT_GET_IFMADDR_SHOW: + if (!size || strlen(ifname) == 0) + return EDPVS_INVAL; + + dev = netif_port_get_by_name(ifname); if (!dev) { - RTE_LOG(WARNING, IFA, "%s: no such device: %s\n", - __func__, param->ifa_entry.ifname); + RTE_LOG(WARNING, IFA, "%s: no such device: %s\n", __func__, ifname); return EDPVS_NOTEXIST; } - idev = dev_get_idev(dev); if (!idev) return EDPVS_RESOURCE; - } - - if (param->ifa_ops_flags & IFA_F_OPS_VERBOSE) - err = ifaddr_get_verbose(idev, &array, &len); - else if (param->ifa_ops_flags & IFA_F_OPS_STATS) - err = ifaddr_get_stats(idev, &array, &len); - else - err = ifaddr_get_basic(idev, &array, &len); - if (err != EDPVS_OK) { - RTE_LOG(WARNING, IFA, "%s: fail to get inet addresses -- %s!\n", - __func__, dpvs_strerror(err)); - return err; - } + err = ifmaddr_fill_entries(idev, &marray, &len); + if (err != EDPVS_OK) { + RTE_LOG(WARNING, IFA, "%s: fail to get inet maddresses -- %s!\n", + __func__, dpvs_strerror(err)); + return err; + } - if (idev) idev_put(idev); - if (array) { - array->ops = INET_ADDR_GET; - array->ops_flags = param->ifa_ops_flags; + *out = marray; + *outsize = len; + break; + default: + *out = NULL; + *outsize = 0; + return EDPVS_NOTSUPP; } - - *out = array; - *outsize = len; - return EDPVS_OK; } @@ -2067,7 +2134,7 @@ static struct dpvs_sockopts ifa_sockopts = { .set_opt_max = SOCKOPT_SET_IFADDR_FLUSH, .set = ifa_sockopt_set, .get_opt_min = SOCKOPT_GET_IFADDR_SHOW, - .get_opt_max = SOCKOPT_GET_IFADDR_SHOW, + .get_opt_max = SOCKOPT_GET_IFMADDR_SHOW, .get = ifa_sockopt_get, }; diff --git a/src/kni.c b/src/kni.c index b6af78ceb..185371d42 100644 --- a/src/kni.c +++ b/src/kni.c @@ -34,7 +34,7 @@ #include "conf/common.h" #include "dpdk.h" #include "netif.h" -#include "netif_addr.h" +#include "conf/netif_addr.h" #include "ctrl.h" #include "kni.h" #include "vlan.h" diff --git a/src/netif.c b/src/netif.c index 4bc01a1bb..a40252f21 100644 --- a/src/netif.c +++ b/src/netif.c @@ -28,6 +28,7 @@ #include "conf/common.h" #include "netif.h" #include "netif_addr.h" +#include "conf/netif_addr.h" #include "vlan.h" #include "ctrl.h" #include "list.h" @@ -5180,6 +5181,15 @@ static int netif_sockopt_get(sockoptid_t opt, const void *in, size_t inlen, return EDPVS_NOTEXIST; ret = get_bond_status(port, out, outlen); break; + case SOCKOPT_NETIF_GET_MADDR: + if (!in) + return EDPVS_INVAL; + name = (char *)in; + port = netif_port_get_by_name(name); + if (!port) + return EDPVS_NOTEXIST; + ret = netif_get_multicast_addrs(port, out, outlen); + break; default: RTE_LOG(WARNING, NETIF, "[%s] invalid netif get cmd: %d\n", __func__, opt); diff --git a/src/netif_addr.c b/src/netif_addr.c index 425041ead..41bd0080f 100644 --- a/src/netif_addr.c +++ b/src/netif_addr.c @@ -23,6 +23,7 @@ */ #include "netif.h" #include "netif_addr.h" +#include "conf/netif_addr.h" #include "kni.h" int __netif_hw_addr_add(struct netif_hw_addr_list *list, @@ -279,16 +280,8 @@ int netif_mc_dump(struct netif_port *dev, uint16_t filter_flags, return err; } -/* only used in __netif_mc_dump_all */ -struct netif_hw_addr_entry { - struct rte_ether_addr addr; - uint32_t refcnt; - uint16_t flags; - uint16_t sync_cnt; -}; - static int __netif_mc_dump_all(struct netif_port *dev, uint16_t filter_flags, - struct netif_hw_addr_entry *addrs, size_t *naddr) + struct netif_hw_addr_entry *addrs, int *naddr) { struct netif_hw_addr *ha; int off = 0; @@ -297,7 +290,7 @@ static int __netif_mc_dump_all(struct netif_port *dev, uint16_t filter_flags, return EDPVS_NOROOM; list_for_each_entry(ha, &dev->mc.addrs, list) { - rte_ether_addr_copy(&ha->addr, &addrs[off].addr); + eth_addr_dump(&ha->addr, addrs[off].addr, sizeof(addrs[off].addr)); addrs[off].refcnt = rte_atomic32_read(&ha->refcnt); addrs[off].flags = ha->flags; addrs[off].sync_cnt = ha->sync_cnt; @@ -312,7 +305,7 @@ int __netif_mc_print(struct netif_port *dev, char *buf, int *len, int *pnaddr) { struct netif_hw_addr_entry addrs[NETIF_MAX_HWADDR]; - size_t naddr = NELEMS(addrs); + int naddr = NELEMS(addrs); int err, i; int strlen = 0; @@ -322,10 +315,8 @@ int __netif_mc_print(struct netif_port *dev, for (i = 0; i < naddr && *len > strlen; i++) { err = snprintf(buf + strlen, *len - strlen, - " link %02x:%02x:%02x:%02x:%02x:%02x %srefcnt %d, synced %d\n", - addrs[i].addr.addr_bytes[0], addrs[i].addr.addr_bytes[1], - addrs[i].addr.addr_bytes[2], addrs[i].addr.addr_bytes[3], - addrs[i].addr.addr_bytes[4], addrs[i].addr.addr_bytes[5], + " link %s %srefcnt %d, synced %d\n", + addrs[i].addr, addrs[i].flags & HW_ADDR_F_FROM_KNI ? "(kni) ": "", addrs[i].refcnt, addrs[i].sync_cnt); if (err < 0) { @@ -346,6 +337,33 @@ int __netif_mc_print(struct netif_port *dev, return err; } +int netif_get_multicast_addrs(struct netif_port *dev, void **out, size_t *outlen) +{ + int err; + size_t len; + struct netif_hw_addr_array *array; + + rte_rwlock_read_lock(&dev->dev_lock); + len = sizeof(*array) + dev->mc.count * sizeof(struct netif_hw_addr_entry); + array = rte_zmalloc(NULL, len, RTE_CACHE_LINE_SIZE); + if (unlikely(!array)) { + err = EDPVS_NOMEM; + } else { + array->count = dev->mc.count; + err = __netif_mc_dump_all(dev, 0, array->entries, &array->count); + } + rte_rwlock_read_unlock(&dev->dev_lock); + + if (err != EDPVS_OK) { + *out = NULL; + *outlen = 0; + } else { + *out = array; + *outlen = len; + } + return err; +} + int netif_mc_print(struct netif_port *dev, char *buf, int *len, int *pnaddr) { diff --git a/tools/dpip/Makefile b/tools/dpip/Makefile index e1bbe21e4..4dc648e38 100644 --- a/tools/dpip/Makefile +++ b/tools/dpip/Makefile @@ -39,7 +39,7 @@ DEFS = -D DPVS_MAX_LCORE=64 -D DPIP_VERSION=\"$(VERSION_STRING)\" CFLAGS += $(DEFS) -OBJS = ipset.o dpip.o utils.o route.o addr.o neigh.o link.o vlan.o \ +OBJS = ipset.o dpip.o utils.o route.o addr.o neigh.o link.o vlan.o maddr.o \ qsch.o cls.o tunnel.o ipset.o ipv6.o iftraf.o eal_mem.o flow.o \ ../../src/common.o ../keepalived/keepalived/check/sockopt.o diff --git a/tools/dpip/dpip.c b/tools/dpip/dpip.c index 596a534f5..a3b31f1c6 100644 --- a/tools/dpip/dpip.c +++ b/tools/dpip/dpip.c @@ -35,7 +35,7 @@ static void usage(void) " "DPIP_NAME" [OPTIONS] OBJECT { COMMAND | help }\n" "Parameters:\n" " OBJECT := { link | addr | route | neigh | vlan | tunnel | qsch | cls |\n" - " ipv6 | iftraf | eal-mem | ipset | flow }\n" + " ipv6 | iftraf | eal-mem | ipset | flow | maddr }\n" " COMMAND := { create | destroy | add | del | show (list) | set (change) |\n" " replace | flush | test | enable | disable }\n" "Options:\n" diff --git a/tools/dpip/link.c b/tools/dpip/link.c index f2d3f7798..fd3d5bd84 100644 --- a/tools/dpip/link.c +++ b/tools/dpip/link.c @@ -101,8 +101,10 @@ static inline int get_netif_port_list(void) if (g_nic_list) free(g_nic_list); g_nic_list = calloc(1, len); - if (!g_nic_list) + if (!g_nic_list) { + dpvs_sockopt_msg_free((void *)p_port_list); return EDPVS_NOMEM; + } memcpy(g_nic_list, p_port_list, len); @@ -501,6 +503,7 @@ static int dump_bond_status(char *name, int namelen) p_get->link_up_prop_delay); if (p_get->slave_nb > NETIF_MAX_BOND_SLAVES) { printf("too many slaves: %d\n", p_get->slave_nb); + dpvs_sockopt_msg_free(p_get); return EDPVS_INVAL; } for (i = 0; i < p_get->slave_nb; i++) { @@ -514,6 +517,7 @@ static int dump_bond_status(char *name, int namelen) } printf("\n"); + dpvs_sockopt_msg_free(p_get); return EDPVS_OK; } diff --git a/tools/dpip/maddr.c b/tools/dpip/maddr.c new file mode 100644 index 000000000..7bb82a544 --- /dev/null +++ b/tools/dpip/maddr.c @@ -0,0 +1,168 @@ +/* + * DPVS is a software load balancer (Virtual Server) based on DPDK. + * + * Copyright (C) 2021 iQIYI (www.iqiyi.com). + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#include "dpip.h" +#include "conf/sockopts.h" +#include "conf/common.h" +#include "conf/netif.h" +#include "conf/netif_addr.h" +#include "conf/inetaddr.h" +#include "sockopt.h" + +static void maddr_help(void) +{ + fprintf(stderr, "Usage: dpip maddr show [dev STRING]\n"); +} + +static int maddr_parse_args(struct dpip_conf *conf, char *ifname, size_t len) +{ + while (conf->argc > 0) { + if (strcmp(conf->argv[0], "dev") == 0) { + NEXTARG_CHECK(conf, "dev"); + snprintf(ifname, len, "%s", conf->argv[0]); + } + NEXTARG(conf); + } + + if (conf->argc > 0) { + fprintf(stderr, "too many arguments\n"); + return -1; + } + + return 0; +} + +static int hwm_get_and_dump(const char *ifname, size_t len, bool verbose) +{ + int i, err; + size_t outlen; + struct netif_hw_addr_array *out; + struct netif_hw_addr_entry *entry; + + err = dpvs_getsockopt(SOCKOPT_NETIF_GET_MADDR, ifname, len, (void **)&out, &outlen); + if (err != EDPVS_OK || !out || !outlen) + return err; + + for (i = 0; i < out->count; i++) { + entry = &out->entries[i]; + if (verbose) { + printf("\tlink %s%s\t\trefcnt %u\t\tsync %d\n", + entry->addr, entry->flags & HW_ADDR_F_FROM_KNI ? " (+kni)" : "", + entry->refcnt, entry->sync_cnt); + } else { + printf("\tlink %s\n", entry->addr); + } + } + + dpvs_sockopt_msg_free(out); + return EDPVS_OK; +} + +static int ifm_get_and_dump(const char *ifname, size_t len, bool verbose) +{ + int i, err; + size_t outlen; + struct inet_maddr_array *out; + struct inet_maddr_entry *entry; + char ipbuf[64]; + + err = dpvs_getsockopt(SOCKOPT_GET_IFMADDR_SHOW, ifname, len, (void **)&out, &outlen); + if (err != EDPVS_OK || !out || !outlen) + return err; + + for (i = 0; i < out->nmaddr; i++) { + entry = &out->maddrs[i]; + if (verbose) { + printf("\t%5s %s\t\tflags 0x%x\t\trefcnt %u\n", entry->af == AF_INET6 ? "inet6" : "inet", + inet_ntop(entry->af, &entry->maddr, ipbuf, sizeof(ipbuf)) ? ipbuf : "unknown", + entry->flags, entry->refcnt); + } else { + printf("\t%5s %s\n", entry->af == AF_INET6 ? "inet6" : "inet", + inet_ntop(entry->af, &entry->maddr, ipbuf, sizeof(ipbuf)) ? ipbuf : "unknown"); + } + } + + dpvs_sockopt_msg_free(out); + return EDPVS_OK; +} + +static int maddr_get_and_dump(const char *ifname, size_t len, bool verbose) +{ + int err; + + err = hwm_get_and_dump(ifname, len, verbose); + if (err != EDPVS_OK) + return err; + + return ifm_get_and_dump(ifname, len, verbose); +} + +static int maddr_do_cmd(struct dpip_obj *obj, dpip_cmd_t cmd, + struct dpip_conf *conf) +{ + int i, err; + size_t len; + char ifname[IFNAMSIZ] = { 0 }; + netif_nic_list_get_t *ports; + + if (maddr_parse_args(conf, ifname, sizeof(ifname)) != 0) + return EDPVS_INVAL; + + switch (conf->cmd) { + case DPIP_CMD_SHOW: + if (strlen(ifname) > 0) { + printf("%s:\n", ifname); + return maddr_get_and_dump(ifname, sizeof(ifname), conf->verbose); + } + + /* list all devices */ + err = dpvs_getsockopt(SOCKOPT_NETIF_GET_PORT_LIST, NULL, 0, (void **)&ports, &len); + if (err != EDPVS_OK || !ports || !len) + return err; + for (i = 0; i < ports->nic_num && i < NETIF_MAX_PORTS; i++) { + printf("%d:\t%s\n", ports->idname[i].id + 1, ports->idname[i].name); + err = maddr_get_and_dump(ports->idname[i].name, + sizeof(ports->idname[i].name), conf->verbose); + if (err != EDPVS_OK) { + dpvs_sockopt_msg_free(ports); + return err; + } + } + dpvs_sockopt_msg_free(ports); + break; + default: + return EDPVS_NOTSUPP; + } + + return EDPVS_OK; +} + +struct dpip_obj dpip_maddr = { + .name = "maddr", + .help = maddr_help, + .do_cmd = maddr_do_cmd, +}; + +static void __init maddr_init(void) +{ + dpip_register_obj(&dpip_maddr); +} + +static void __exit maddr_exit(void) +{ + dpip_unregister_obj(&dpip_maddr); +}