Skip to content

Commit

Permalink
zebra: Bring up 514 BGP neighbor sessions
Browse files Browse the repository at this point in the history
Issue:
When 514 inerfaces/neighbors are configured, it creates socket error,
"Cannot allocate memory", when back to back V6 RA messages are tried
to be sent over the socket. This prevents interface, to know its peer's
link local address. Socket error comes when 1) try to join ICMPv6 all
router multicast group, back to back for all interfaces 2)send back to
back RA for all interfaces

Fix:
1)For ICMPv6 join case, we check if the interface has already joined
all router group, if not try to join. On failure, retry joining after
random amount of time determined 1 ms to ICMPV6_JOIN_TIMER_EXP_MS(100 ms)
2) For RA issue case, batch sending of RA mesages using wheel timer

Testing:
Monitor BGP session running sh bgp summary command

Before fix:
r1# sh bgp summary

IPv4 Unicast Summary:
BGP router identifier 192.168.1.1, local AS number 1001 VRF default vrf-id 0
BGP table version 0
RIB entries 0, using 0 bytes of memory
Peers 515, using 12 MiB of memory

Neighbor        V         AS   MsgRcvd   MsgSent   TblVer  InQ OutQ  Up/Down State/PfxRcd   PfxSnt Desc
r1-eth0         4       1002        89        90        0    0    0 00:07:10            0        0 N/A
r1-eth1         4       1002        89        90        0    0    0 00:07:10            0        0 N/A
r1-eth2         4       1002        89        90        0    0    0 00:07:10            0        0 N/A
r1-eth3         4       1002        89        90        0    0    0 00:07:10            0        0 N/A
r1-eth4         4       1002        89        90        0    0    0 00:07:10            0        0 N/A
r1-eth5         4       1002        89        90        0    0    0 00:07:10            0        0 N/A

…..<snip>...
r1-eth252       4       1002        31        29        0    0    0 00:02:08            0        0 N/A
r1-eth253       4       1002        31        29        0    0    0 00:02:08            0        0 N/A
r1-eth254       4       1002        31        29        0    0    0 00:02:08            0        0 N/A
r1-eth255       4       1002        31        29        0    0    0 00:02:08            0        0 N/A
r1-eth256       4          0         0         0        0    0    0    never         Idle        0 N/A
r1-eth257       4          0         0         0        0    0    0    never         Idle        0 N/A
r1-eth258       4          0         0         0        0    0    0    never         Idle        0 N/A
r1-eth259       4          0         0         0        0    0    0    never         Idle        0 N/A
r1-eth260       4          0         0         0        0    0    0    never         Idle        0 N/A
……..<snip>…..
r1-eth511       4          0         0         0        0    0    0    never         Idle        0 N/A
r1-eth512       4          0         0         0        0    0    0    never         Idle        0 N/A
r1-eth513       4          0         0         0        0    0    0    never         Idle        0 N/A
r1-eth514       4          0         0         0        0    0    0    never         Idle        0 N/A
After fix:
r1# show bgp summary

IPv4 Unicast Summary:
BGP router identifier 192.168.1.1, local AS number 1001 VRF default vrf-id 0
BGP table version 0
RIB entries 0, using 0 bytes of memory
Peers 515, using 12 MiB of memory

Neighbor        V         AS   MsgRcvd   MsgSent   TblVer  InQ OutQ  Up/Down State/PfxRcd   PfxSnt Desc
r1-eth0         4       1002        87        87        0    0    0 00:07:04            0        0 N/A
r1-eth1         4       1002        87        87        0    0    0 00:07:04            0        0 N/A
r1-eth2         4       1002        87        87        0    0    0 00:07:04            0        0 N/A
r1-eth3         4       1002        64        67        0    0    0 00:05:09            0        0 N/A
r1-eth4         4       1002        87        87        0    0    0 00:07:04            0        0 N/A
r1-eth5         4       1002        87        87        0    0    0 00:07:04            0        0 N/A
r1-eth6         4       1002        67        70        0    0    0 00:05:22            0        0 N/A
r1-eth7         4       1002        87        87        0    0    0 00:07:04            0        0 N/A
r1-eth8         4       1002        87        87        0    0    0 00:07:04            0        0 N/A
....
r1-eth499       4       1002        43        43        0    0    0 00:03:22            0        0 N/A
r1-eth500       4       1002        43        43        0    0    0 00:03:22            0        0 N/A
r1-eth501       4       1002        19        22        0    0    0 00:01:21            0        0 N/A
r1-eth502       4       1002        43        43        0    0    0 00:03:22            0        0 N/A
r1-eth503       4       1002        43        43        0    0    0 00:03:22            0        0 N/A
r1-eth504       4       1002        20        23        0    0    0 00:01:30            0        0 N/A
r1-eth505       4       1002        43        43        0    0    0 00:03:22            0        0 N/A
r1-eth506       4       1002        43        43        0    0    0 00:03:22            0        0 N/A
r1-eth507       4       1002        22        25        0    0    0 00:01:39            0        0 N/A
r1-eth508       4       1002        43        43        0    0    0 00:03:22            0        0 N/A
r1-eth509       4       1002        17        20        0    0    0 00:01:13            0        0 N/A
r1-eth510       4       1002        43        43        0    0    0 00:03:22            0        0 N/A
r1-eth511       4       1002        43        43        0    0    0 00:03:22            0        0 N/A
r1-eth512       4       1002        19        22        0    0    0 00:01:22            0        0 N/A
r1-eth513       4       1002        43        43        0    0    0 00:03:22            0        0 N/A
r1-eth514       4       1002        43        43        0    0    0 00:03:22            0        0 N/A

Signed-off-by: Soumya Roy <[email protected]>
  • Loading branch information
Soumya Roy committed Feb 26, 2025
1 parent 6c3e1e4 commit 846a22d
Show file tree
Hide file tree
Showing 5 changed files with 213 additions and 4 deletions.
3 changes: 3 additions & 0 deletions zebra/interface.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,9 @@ struct zebra_if {
/* back pointer to the interface */
struct interface *ifp;

/* Event timer to batch ICMPv6 join requests */
struct event *icmpv6_join_timer;

enum zebra_if_flags flags;

/* Shutdown configuration. */
Expand Down
188 changes: 184 additions & 4 deletions zebra/rtadv.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
#include "vrf.h"
#include "ns.h"
#include "lib_errors.h"
#include "wheel.h"
#include "network.h"

#include "zebra/interface.h"
#include "zebra/rtadv.h"
Expand All @@ -36,6 +38,19 @@ extern struct zebra_privs_t zserv_privs;
static uint32_t interfaces_configured_for_ra_from_bgp;
#define RTADV_ADATA_SIZE 1024

#define PROC_IGMP6 "/proc/net/igmp6"

/* 32 hex chars
* say for 2001:db8:85a3::8a2e:370:7334
* hex string is 20010db885a3000000008a2e03707334,
* which is 32 chars long
*/
#define MAX_V6ADDR_LEN 32

#define MAX_INTERFACE_NAME_LEN 25

#define MAX_CHARS_PER_LINE 1024

#if defined(HAVE_RTADV)

#include "zebra/rtadv_clippy.c"
Expand All @@ -58,6 +73,10 @@ DEFINE_MTYPE_STATIC(ZEBRA, ADV_IF, "Advertised Interface");
#define ALLNODE "ff02::1"
#define ALLROUTER "ff02::2"

static bool is_interface_in_group(const char *ifname_in, const char *mcast_addr_in);
static bool v6_addr_hex_str_to_in6_addr(const char *hex_str, struct in6_addr *addr);


/* adv list node */
struct adv_if {
char name[IFNAMSIZ];
Expand Down Expand Up @@ -462,6 +481,63 @@ static void rtadv_send_packet(int sock, struct interface *ifp,
zif->ra_sent++;
}

static void start_icmpv6_join_timer(struct event *thread)
{
struct interface *ifp = EVENT_ARG(thread);
struct zebra_if *zif = ifp->info;
struct zebra_vrf *zvrf = rtadv_interface_get_zvrf(ifp);

if (if_join_all_router(zvrf->rtadv.sock, ifp)) {

/*Wait random amount of time between 1 ms to ICMPV6_JOIN_TIMER_EXP_MS ms*/
int random_ms = (frr_weak_random() % ICMPV6_JOIN_TIMER_EXP_MS) + 1;
event_add_timer_msec(zrouter.master, start_icmpv6_join_timer, ifp, random_ms,
&zif->icmpv6_join_timer);
}

if (IS_ZEBRA_DEBUG_EVENT)
zlog_debug("Processing ICMPv6 join on interface %s(%s:%u)", ifp->name,
ifp->vrf->name, ifp->ifindex);
}

void process_rtadv(void *arg)
{
struct interface *ifp = arg;
struct zebra_if *zif = ifp->info;
struct zebra_vrf *zvrf = rtadv_interface_get_zvrf(ifp);

if (zif->rtadv.inFastRexmit && zif->rtadv.UseFastRexmit) {
if (--zif->rtadv.NumFastReXmitsRemain <= 0)
zif->rtadv.inFastRexmit = 0;

if (IS_ZEBRA_DEBUG_SEND)
zlog_debug("Doing fast RA Rexmit on interface %s(%s:%u)", ifp->name,
ifp->vrf->name, ifp->ifindex);

rtadv_send_packet(zvrf->rtadv.sock, ifp, RA_ENABLE);
} else {
zif->rtadv.AdvIntervalTimer -=
(RTADV_TIMER_WHEEL_PERIOD_MS / RTADV_TIMER_WHEEL_SLOTS_NO);

/* Wait atleast AdvIntervalTimer time before sending next RA
* AdvIntervalTimer can go negative, when ra_wheel timer expiry
* interval is not a multiple of AdvIntervalTimer. Say ra_wheel
* expiry time is 10 ms and, AdvIntervalTimer == 1005 ms. Allowing
* AdvIntervalTimer to go negative and checking, gurantees that
* we have waited Wait atleast AdvIntervalTimer, so RA can be
* sent now.
*/
if (zif->rtadv.AdvIntervalTimer <= 0) {
zif->rtadv.AdvIntervalTimer = zif->rtadv.MaxRtrAdvInterval;
if (IS_ZEBRA_DEBUG_SEND)
zlog_debug("Doing regular RA Rexmit on interface %s(%s:%u)",
ifp->name, ifp->vrf->name, ifp->ifindex);

rtadv_send_packet(zvrf->rtadv.sock, ifp, RA_ENABLE);
}
}
}

static void rtadv_timer(struct event *thread)
{
struct zebra_vrf *zvrf = EVENT_ARG(thread);
Expand Down Expand Up @@ -1261,7 +1337,14 @@ static void rtadv_start_interface_events(struct zebra_vrf *zvrf,
if (adv_if != NULL)
return; /* Already added */

if_join_all_router(zvrf->rtadv.sock, zif->ifp);
if (if_join_all_router(zvrf->rtadv.sock, zif->ifp)) {

/*Failed to join on 1st attempt, wait random amount of time between 1 ms
to ICMPV6_JOIN_TIMER_EXP_MS ms*/
int random_ms = (frr_weak_random() % ICMPV6_JOIN_TIMER_EXP_MS) + 1;
event_add_timer_msec(zrouter.master, start_icmpv6_join_timer, zif->ifp, random_ms,
&zif->icmpv6_join_timer);
}

if (adv_if_list_count(&zvrf->rtadv.adv_if) == 1)
rtadv_event(zvrf, RTADV_START, 0);
Expand All @@ -1281,6 +1364,9 @@ void ipv6_nd_suppress_ra_set(struct interface *ifp,
if (status == RA_SUPPRESS) {
/* RA is currently enabled */
if (zif->rtadv.AdvSendAdvertisements) {

/* Try to delete from the ra wheel */
wheel_remove_item(zrouter.ra_wheel, ifp);
rtadv_send_packet(zvrf->rtadv.sock, ifp, RA_SUPPRESS);
zif->rtadv.AdvSendAdvertisements = 0;
zif->rtadv.AdvIntervalTimer = 0;
Expand Down Expand Up @@ -1311,6 +1397,7 @@ void ipv6_nd_suppress_ra_set(struct interface *ifp,
RTADV_NUM_FAST_REXMITS;
}

wheel_add_item(zrouter.ra_wheel, ifp);
rtadv_start_interface_events(zvrf, zif);
}
}
Expand Down Expand Up @@ -1438,6 +1525,12 @@ void rtadv_stop_ra(struct interface *ifp)
zif = ifp->info;
zvrf = rtadv_interface_get_zvrf(ifp);

/*Try to delete from ra wheels */
wheel_remove_item(zrouter.ra_wheel, ifp);

/*Turn off event for ICMPv6 join*/
EVENT_OFF(zif->icmpv6_join_timer);

if (zif->rtadv.AdvSendAdvertisements)
rtadv_send_packet(zvrf->rtadv.sock, ifp, RA_SUPPRESS);
}
Expand Down Expand Up @@ -1730,8 +1823,7 @@ static void rtadv_event(struct zebra_vrf *zvrf, enum rtadv_event event, int val)
case RTADV_START:
event_add_read(zrouter.master, rtadv_read, zvrf, rtadv->sock,
&rtadv->ra_read);
event_add_event(zrouter.master, rtadv_timer, zvrf, 0,
&rtadv->ra_timer);

break;
case RTADV_STOP:
EVENT_OFF(rtadv->ra_timer);
Expand Down Expand Up @@ -1862,24 +1954,112 @@ void rtadv_cmd_init(void)
install_element(VIEW_NODE, &show_ipv6_nd_ra_if_cmd);
}

static bool v6_addr_hex_str_to_in6_addr(const char *hex_str, struct in6_addr *addr)
{
size_t str_len = strlen(hex_str);

if (str_len != MAX_V6ADDR_LEN) {
flog_err_sys(EC_LIB_SYSTEM_CALL, "Invalid V6 addr hex len %zu", str_len);
return false;
}

for (int i = 0; i < 16; i++) {
char byte_str[3] = { hex_str[i * 2], hex_str[i * 2 + 1], '\0' };
addr->s6_addr[i] = (uint8_t)strtol(byte_str, NULL, 16);
}

return true;
}

/* Checks if an interface is part of a multicast group, no null check for input strings */
static bool is_interface_in_group(const char *ifname_in, const char *mcast_addr_in)
{
#ifdef __linux__
char line[MAX_CHARS_PER_LINE];
char ifname_found[MAX_INTERFACE_NAME_LEN];
char mcast_addr_found_hex_str[MAX_V6ADDR_LEN + 5];
struct in6_addr mcast_addr_in_bin;
struct in6_addr mcast_addr_found_bin;
int if_index = -1;
int ifname_in_len = 0;
int ifname_found_len = 0;

FILE *fp = fopen(PROC_IGMP6, "r");

if (!fp) {
flog_err_sys(EC_LIB_SYSTEM_CALL, "Failed to open %s", PROC_IGMP6);
return false;
}

/* Convert input IPv6 address to binary */
if (inet_pton(AF_INET6, mcast_addr_in, &mcast_addr_in_bin) != 1) {
flog_err_sys(EC_LIB_SYSTEM_CALL, "Invalid IPv6 address format %s", mcast_addr_in);
fclose(fp);
return false;
}

/* Convert binary to hex format */
while (fgets(line, sizeof(line), fp)) {
sscanf(line, "%d %s %s", &if_index, ifname_found, mcast_addr_found_hex_str);

ifname_in_len = strlen(ifname_in);
ifname_found_len = strlen(ifname_found);
if (ifname_in_len != ifname_found_len)
continue;

/* Locate 'x' if "0x" is present or not, if present go past that */
const char *clean_mcast_addr_hex_str = strchr(mcast_addr_found_hex_str, 'x');
if (clean_mcast_addr_hex_str) {
clean_mcast_addr_hex_str++;
} else {
clean_mcast_addr_hex_str = mcast_addr_found_hex_str;
}

if (!v6_addr_hex_str_to_in6_addr(clean_mcast_addr_hex_str, &mcast_addr_found_bin))
continue;

if ((!strncmp(ifname_in, ifname_found, ifname_in_len)) &&
(!IPV6_ADDR_CMP(&mcast_addr_in_bin, &mcast_addr_found_bin)))

/* Already joined */
return true;
}

fclose(fp);

#endif

/* Not joined */
return false;
}

static int if_join_all_router(int sock, struct interface *ifp)
{
int ret;

struct ipv6_mreq mreq;

if (is_interface_in_group(ifp->name, ALLROUTER))

/* Interface is already part of the group, so return sucess */
return 0;

memset(&mreq, 0, sizeof(mreq));
inet_pton(AF_INET6, ALLROUTER, &mreq.ipv6mr_multiaddr);
mreq.ipv6mr_interface = ifp->ifindex;

ret = setsockopt(sock, IPPROTO_IPV6, IPV6_JOIN_GROUP, (char *)&mreq,
sizeof(mreq));
if (ret < 0)

if (ret < 0) {
flog_err_sys(EC_LIB_SOCKET,
"%s(%u): Failed to join group, socket %u error %s",
ifp->name, ifp->ifindex, sock,
safe_strerror(errno));

return ret;
}

if (IS_ZEBRA_DEBUG_EVENT)
zlog_debug(
"%s(%s:%u): Join All-Routers multicast group, socket %u",
Expand Down
1 change: 1 addition & 0 deletions zebra/rtadv.h
Original file line number Diff line number Diff line change
Expand Up @@ -460,6 +460,7 @@ extern void zebra_interface_radv_enable(ZAPI_HANDLER_ARGS);
extern uint32_t rtadv_get_interfaces_configured_from_bgp(void);
extern bool rtadv_compiled_in(void);
extern void rtadv_init(void);
extern void process_rtadv(void *arg);

#ifdef __cplusplus
}
Expand Down
18 changes: 18 additions & 0 deletions zebra/zebra_router.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include "zebra/zebra_tc.h"
#include "debug.h"
#include "zebra_script.h"
#include "wheel.h"

DEFINE_MTYPE_STATIC(ZEBRA, RIB_TABLE_INFO, "RIB table info");
DEFINE_MTYPE_STATIC(ZEBRA, ZEBRA_RT_TABLE, "Zebra VRF table");
Expand Down Expand Up @@ -220,10 +221,22 @@ uint32_t zebra_router_get_next_sequence(void)
memory_order_relaxed);
}

static inline unsigned int interface_hash_key(const void *arg)
{
const struct interface *ifp = arg;

return ifp->ifindex;
}

void zebra_router_terminate(void)
{
struct zebra_router_table *zrt, *tmp;

if (zrouter.ra_wheel) {
wheel_delete(zrouter.ra_wheel);
zrouter.ra_wheel = NULL;
}

EVENT_OFF(zrouter.t_rib_sweep);

RB_FOREACH_SAFE (zrt, zebra_router_table_head, &zrouter.tables, tmp)
Expand Down Expand Up @@ -278,6 +291,11 @@ void zebra_router_init(bool asic_offload, bool notify_on_ack,

zrouter.nhg_keep = ZEBRA_DEFAULT_NHG_KEEP_TIMER;

/*Init V6 RA batching stuffs*/
zrouter.ra_wheel = wheel_init(zrouter.master, RTADV_TIMER_WHEEL_PERIOD_MS,
RTADV_TIMER_WHEEL_SLOTS_NO, interface_hash_key, process_rtadv,
NULL);

zebra_vxlan_init();
zebra_mlag_init();
zebra_neigh_init();
Expand Down
7 changes: 7 additions & 0 deletions zebra/zebra_router.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,12 +112,19 @@ struct zebra_mlag_info {
struct event *t_write;
};

#define RTADV_TIMER_WHEEL_PERIOD_MS 1000
#define RTADV_TIMER_WHEEL_SLOTS_NO 100
#define ICMPV6_JOIN_TIMER_EXP_MS 100

struct zebra_router {
atomic_bool in_shutdown;

/* Thread master */
struct event_loop *master;

/* Wheel to process V6 RA update */
struct timer_wheel *ra_wheel;

/* Lists of clients who have connected to us */
struct list *client_list;

Expand Down

0 comments on commit 846a22d

Please sign in to comment.