Skip to content

Commit

Permalink
smc_run.bpf: An eBPF implemented smc_run with IPPROTO_SMC
Browse files Browse the repository at this point in the history
Usage: smc_run.bpf COMMAND
Usage: smc_run.bpf [-h] [-v] [-s [load|unload]] [-p pid] [-n 0|1]
Usage: export SMC_RUN_BPF=1; smc_run COMMAND

Signed-off-by: D. Wythe <[email protected]>
  • Loading branch information
D. Wythe committed Jun 18, 2024
1 parent 75e767d commit 25e7ac4
Show file tree
Hide file tree
Showing 5 changed files with 678 additions and 2 deletions.
24 changes: 22 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,13 @@
# http://www.eclipse.org/legal/epl-v10.html
#

SMC_TOOLS_RELEASE = 1.8.3
SMC_TOOLS_RELEASE = 1.8.4
VER_MAJOR = $(shell echo $(SMC_TOOLS_RELEASE) | cut -d '.' -f 1)

ARCHTYPE = $(shell uname -m)
ARCH := $(shell getconf LONG_BIT)
DISTRO := $(shell lsb_release -si 2>/dev/null)
VMLINUX = $(wildcard /sys/kernel/btf/vmlinux)

ifneq ("${V}","1")
MAKEFLAGS += --quiet
Expand All @@ -23,8 +24,10 @@ else
cmd =
endif
CCC = $(call cmd," CC ",$@)${CC}
CLANG ?= clang
LINK = $(call cmd," LINK ",$@)${CC}
GEN = $(call cmd," GEN ",$@)sed
BPFTOOL ?= bpftool
DESTDIR ?=
PREFIX = /usr
BINDIR = ${PREFIX}/bin
Expand Down Expand Up @@ -58,7 +61,7 @@ LIBDIR = ${PREFIX}/lib
endif
endif

all: libsmc-preload.so libsmc-preload32.so smcd smcr smcss smc_pnet
all: libsmc-preload.so libsmc-preload32.so smcd smcr smcss smc_pnet smc_run.bpf

CFLAGS ?= -Wall -O3 -g
ifneq ($(shell sh -c 'command -v pkg-config'),)
Expand All @@ -78,6 +81,18 @@ else
MACHINE_OPT32="-m32"
endif

vmlinux.h: ${VMLINUX}
${BPFTOOL} btf dump file ${VMLINUX} format c > $@

smc_run.bpf.bpf.o: smc_run.bpf.bpf.c vmlinux.h
${CLANG} -O2 -target bpf -g -c smc_run.bpf.bpf.c -o $@

smc_run.bpf.skel.h: smc_run.bpf.bpf.o
${BPFTOOL} gen skeleton smc_run.bpf.bpf.o > $@

smc_run.bpf.o: smc_run.bpf.c smc_run.bpf.skel.h
${CLANG} ${ALL_CFLAGS} -c smc_run.bpf.c

util.o: util.c util.h
${CCC} ${ALL_CFLAGS} -c util.c

Expand Down Expand Up @@ -140,6 +155,9 @@ smc_pnet: smc_pnet.c smctools_common.h
smcss: smcss.o libnetlink.o
${CCC} ${ALL_CFLAGS} $^ ${ALL_LDFLAGS} -o $@

smc_run.bpf: smc_run.bpf.o
${CLANG} $^ ${ALL_LDFLAGS} -lbpf -o $@

install: all
echo " INSTALL"
install -d -m755 $(DESTDIR)$(LIBDIR) $(DESTDIR)$(BINDIR) $(DESTDIR)$(MANDIR)/man7 \
Expand All @@ -150,6 +168,7 @@ install: all
# install $(INSTALL_FLAGS_LIB) libsmc-preload32.so $(DESTDIR)$(LIBDIR32)/libsmc-preload.so
#endif
install $(INSTALL_FLAGS_BIN) smc_run $(DESTDIR)$(BINDIR)
install $(INSTALL_FLAGS_BIN) smc_run.bpf $(DESTDIR)$(BINDIR)
install $(INSTALL_FLAGS_BIN) smcd $(DESTDIR)$(BINDIR)
install $(INSTALL_FLAGS_BIN) smcr $(DESTDIR)$(BINDIR)
install $(INSTALL_FLAGS_BIN) smcss $(DESTDIR)$(BINDIR)
Expand Down Expand Up @@ -210,3 +229,4 @@ check:
clean:
echo " CLEAN"
rm -f *.o *.so *.a smc smcd smcr smcss smc_pnet
rm -f vmlinux.h smc_run.bpf.skel.h smc_run.bpf
54 changes: 54 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ This package consists of the following tools:
- `smc_pnet` : C program for PNET Table handling
- `smc_rnics` : List available RDMA NICs
- `smc_run` : preload library environment setup script.
- `smc_run.bpf` : eBPF version of smc_run
- `smcss` : C program for displaying the information about active
SMC sockets.

Expand All @@ -25,6 +26,59 @@ table.

In addition the package contains the `AF_SMC` manpage (`man af_smc`).

Build
-------
You can build smc_run.bpf and all its dependencies by script:
```bash
sudo yum install clang libbpf libbpf-devel
make
make install
```

Usage
------

### smc_run.bpf

An eBPF implemented smc_run based on IPPROTO_SMC:

- Support to transparent replacement based on command (Just like smc_run)
- Supprot to transparent replacement based on pid configuration. And supports the inheritance of this capability between parent and child processes
- Support to transparent replacement based on per netns configuration

__smc_run.bpf COMMAND__
- Equivalent to smc_run but with IPPROTO_SMC via eBPF

__smc_run.bpf -p pid__
- To add the process with target pid to the map. Afterward, all socket() calls of the process and its descendant processes will be replaced from IPPROTO_TCP to IPPROTO_SMC.
- Mapping will be automatically deleted when process exits.
- Specifically, COMMAND mode is actually works like following:
```
smc_run.bpf -p $$
COMMAND
exit
```

__smc_run.bpf -n 1__
- To make all socket() calls of the current netns to be replaced from IPPROTO_TCP to IPPROTO_SMC.
- Turn off it by smc_run.bpf -n 0

For example :

```
ip netns add test
ip link set eth2 down
ip link set eth2 netns test
ip netns exec test ip link set eth2 up
# turn on
ip netns exec test smc_run.bpf -n 1
# smc
ip netns exec test curl http://smc_server
# turn off
ip netns exec test smc_run.bpf -n 0
# tcp
ip netns exec test curl http://smc_server
```

License
-------
Expand Down
10 changes: 10 additions & 0 deletions smc_run
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,16 @@ export SMC_DEBUG;
#
# Execute the specified command.
#

if [ -n "$SMC_RUN_BPF" ] && [ "$SMC_RUN_BPF" -ne 0 ]; then
smc_run.bpf -p $$ > /dev/null 2>&1
if [ $? -eq 0 ]; then
exec "$@"
exit $?;
fi
# failover to AF_SMC with LD_PRELOAD
fi

export LD_PRELOAD=$LD_PRELOAD:$LIB_NAME;

exec "$@"
Expand Down
134 changes: 134 additions & 0 deletions smc_run.bpf.bpf.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* eBPF version of smc_run
*
* Copyright (c) 2024, Alibaba Inc.
*
* Author: D. Wythe <[email protected]>
*/

#include "vmlinux.h"
#include <bpf/bpf_core_read.h>
#include <bpf/bpf_endian.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>

char LICENSE[] SEC("license") = "Dual BSD/GPL";

#ifndef AF_INET
#define AF_INET 2
#endif

#ifndef AF_INET6
#define AF_INET6 10
#endif

#ifndef IPPROTO_SMC
#define IPPROTO_SMC 256
#endif

#define CORE_READ(dst, src) bpf_core_read(dst, sizeof(*(dst)), src)

struct smc_run_strategy {
__u8 enable;
__u8 inherit;
};

struct
{
__uint(type, BPF_MAP_TYPE_HASH);
__uint(max_entries, 65536);
__type(key, pid_t);
__type(value, struct smc_run_strategy);
} smc_run_pid SEC(".maps");

struct
{
__uint(type, BPF_MAP_TYPE_HASH);
__uint(max_entries, 65536);
__type(key, int);
__type(value, struct smc_run_strategy);
} smc_run_netns SEC(".maps");

SEC("fentry/proc_free_inum")
int BPF_PROG(smc_run_on_net_cleanup, int ino)
{
bpf_map_delete_elem(&smc_run_netns, &ino);
return 0;
}

SEC("raw_tracepoint/sched_process_exit")
int smc_run_on_process_exit(void *ctx)
{
pid_t pid = bpf_get_current_pid_tgid() >> 32;
bpf_map_delete_elem(&smc_run_pid, &pid);
return 0;
}

SEC("raw_tracepoint/sched_process_fork")
int smc_run_on_process_fork(struct bpf_raw_tracepoint_args *ctx)
{
struct smc_run_strategy *match, init;
pid_t pid;

struct task_struct *parent = (struct task_struct *)ctx->args[0];
struct task_struct *child = (struct task_struct *)ctx->args[1];

if (CORE_READ(&pid, &parent->pid))
return 0;

match = bpf_map_lookup_elem(&smc_run_pid, &pid);
if (match && match->enable && match->inherit)
{
if (CORE_READ(&pid, &child->pid))
return 0;
init.enable = init.inherit = 1;
bpf_map_update_elem(&smc_run_pid, &pid, &init, BPF_NOEXIST);
}

return 0;
}

SEC("fmod_ret/update_socket_protocol")
int BPF_PROG(smc_run, int family, int type, int protocol)
{
struct smc_run_strategy *match, init;
struct task_struct *task;
int netns_ino;
pid_t pid;

if (family != AF_INET && family != AF_INET6)
goto nop;

if ((type & 0xf) != SOCK_STREAM)
goto nop;

if (protocol != 0 && protocol != IPPROTO_TCP)
goto nop;

pid = bpf_get_current_pid_tgid() >> 32;
match = bpf_map_lookup_elem(&smc_run_pid, &pid);
if (match)
goto found;

task = bpf_get_current_task_btf();
if (!task)
goto nop;

netns_ino = task->nsproxy->net_ns->ns.inum;
match = bpf_map_lookup_elem(&smc_run_netns, &netns_ino);

if (match) {
if (match->enable) {
init.enable = 1;
init.inherit = 0;
/* speed up */
bpf_map_update_elem(&smc_run_pid, &pid, &init, BPF_NOEXIST);
}
goto found;
}
nop:
return protocol;
found:
return match->enable ? IPPROTO_SMC : protocol;
}
Loading

0 comments on commit 25e7ac4

Please sign in to comment.