From 87e79892538f0b86798d65b754a967dd5bfd976f Mon Sep 17 00:00:00 2001 From: Manu Bretelle Date: Thu, 20 Oct 2022 07:40:09 -0700 Subject: [PATCH 1/4] adding/updating ci files --- .github/workflows/test.yml | 177 ++++++++++++++++++ README | 18 -- ...ample_flags-for-bpf_perf_event_output.diff | 47 +++++ ...-iptables-iptables-legacy-in-the-bpf_.diff | 77 ++++++++ ci/vmtest/configs/DENYLIST | 6 + ci/vmtest/configs/DENYLIST.s390x | 5 + ci/vmtest/helpers.sh | 36 ++++ ci/vmtest/run_selftests.sh | 130 +++++++++++++ 8 files changed, 478 insertions(+), 18 deletions(-) create mode 100644 .github/workflows/test.yml create mode 100644 ci/diffs/0001-bpf-Fix-sample_flags-for-bpf_perf_event_output.diff create mode 100644 ci/diffs/0001-selftests-bpf-S-iptables-iptables-legacy-in-the-bpf_.diff create mode 100644 ci/vmtest/configs/DENYLIST create mode 100644 ci/vmtest/configs/DENYLIST.s390x create mode 100755 ci/vmtest/helpers.sh create mode 100755 ci/vmtest/run_selftests.sh diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000000000..f8c0017341c44 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,177 @@ +name: bpf-ci + +on: + pull_request: + +concurrency: + group: ci-test-${{ github.head_ref }} + cancel-in-progress: true + +jobs: + llvm-toolchain: + runs-on: ubuntu-latest + outputs: + llvm: ${{ steps.llvm-toolchain-impl.outputs.version }} + steps: + - id: llvm-version + uses: libbpf/ci/get-llvm-version@master + - id: llvm-toolchain-impl + shell: bash + run: echo "version=llvm-${{ steps.llvm-version.outputs.version }}" >> $GITHUB_OUTPUT + set-matrix: + needs: llvm-toolchain + runs-on: ubuntu-latest + outputs: + build-matrix: ${{ steps.set-matrix-impl.outputs.build_matrix }} + test-matrix: ${{ steps.set-matrix-impl.outputs.test_matrix }} + steps: + - id: set-matrix-impl + shell: python3 -I {0} + run: | + from json import dumps + import os + + def set_output(name, value): + """Write an output variable to the GitHub output file.""" + with open(os.getenv("GITHUB_OUTPUT"), "a") as f: + f.write(f"{name}={value}\n") + + matrix = [ + {"kernel": "LATEST", "runs_on": ["ubuntu-latest", "self-hosted"], "arch": "x86_64", "toolchain": "gcc"}, + {"kernel": "LATEST", "runs_on": ["ubuntu-latest", "self-hosted"], "arch": "x86_64", "toolchain": "${{ needs.llvm-toolchain.outputs.llvm }}"}, + {"kernel": "LATEST", "runs_on": ["z15", "self-hosted"], "arch": "s390x", "toolchain": "gcc"}, + ] + self_hosted_repos = [ + "kernel-patches/bpf", + "kernel-patches/vmtest", + ] + + # Only a few repository within "kernel-patches" use self-hosted runners. + if "${{ github.repository_owner }}" != "kernel-patches" or "${{ github.repository }}" not in self_hosted_repos: + # Outside of those repositories, remove the self-hosted label and skip + # any testing on s390x, as no suitable runners will be available. + for idx in range(len(matrix) - 1, -1, -1): + if "z15" in matrix[idx]["runs_on"]: + del matrix[idx] + else: + matrix[idx]["runs_on"].remove("self-hosted") + + build_matrix = {"include": matrix} + set_output("build_matrix", dumps(build_matrix)) + + tests = ["test_progs", "test_progs_no_alu32", "test_maps", "test_verifier"] + test_matrix = {"include": [{**config, **{"test": test}} + for config in matrix + for test in tests]} + set_output("test_matrix", dumps(test_matrix)) + build: + name: build for ${{ matrix.arch }} with ${{ matrix.toolchain }} + needs: set-matrix + runs-on: ${{ matrix.runs_on }} + timeout-minutes: 100 + strategy: + fail-fast: false + matrix: ${{ fromJSON(needs.set-matrix.outputs.build-matrix) }} + env: + KERNEL: ${{ matrix.kernel }} + REPO_ROOT: ${{ github.workspace }} + REPO_PATH: "" + steps: + - uses: actions/checkout@v3 + - if: ${{ github.repository == 'kernel-patches/vmtest' }} + name: Download bpf-next tree + uses: libbpf/ci/get-linux-source@master + with: + dest: '.kernel' + - if: ${{ github.repository == 'kernel-patches/vmtest' }} + name: Move linux source in place + shell: bash + run: | + rm -rf .kernel/.git + cp -rf .kernel/. . + rm -rf .kernel + - uses: libbpf/ci/patch-kernel@master + with: + patches-root: '${{ github.workspace }}/ci/diffs' + repo-root: '${{ github.workspace }}' + - name: Setup build environment + uses: libbpf/ci/setup-build-env@master + - name: Build kernel image + uses: libbpf/ci/build-linux@master + with: + arch: ${{ matrix.arch }} + toolchain: ${{ matrix.toolchain }} + - name: Build selftests + uses: libbpf/ci/build-selftests@master + with: + vmlinux_btf: ${{ github.workspace }}/vmlinux + toolchain: ${{ matrix.toolchain }} + - name: Build samples + uses: libbpf/ci/build-samples@master + with: + vmlinux_btf: ${{ github.workspace }}/vmlinux + toolchain: ${{ matrix.toolchain }} + - name: Tar artifacts + run: | + file_list="" + if [ "${{ github.repository }}" == "kernel-patches/vmtest" ]; then + # Package up a bunch of additional infrastructure to support running + # 'make kernelrelease' and bpf tool checks later on. + file_list="$(find . -iname Makefile | xargs) \ + scripts/ \ + tools/testing/selftests/bpf/ \ + tools/include/ \ + tools/bpf/bpftool/"; + fi + # zstd is installed by default in the runner images. + tar -cf - \ + .config \ + arch/*/boot/bzImage \ + include/config/auto.conf \ + include/generated/autoconf.h \ + ${file_list} \ + --exclude '*.h' \ + selftests/bpf/ \ + vmlinux | zstd -T0 -19 -o vmlinux-${{ matrix.arch }}-${{ matrix.toolchain }}.tar.zst + - uses: actions/upload-artifact@v3 + with: + name: vmlinux-${{ matrix.arch }}-${{ matrix.toolchain }} + if-no-files-found: error + path: vmlinux-${{ matrix.arch }}-${{ matrix.toolchain }}.tar.zst + test: + name: ${{ matrix.test }} on ${{ matrix.arch }} with ${{ matrix.toolchain }} + needs: [set-matrix, build] + strategy: + fail-fast: false + matrix: ${{ fromJSON(needs.set-matrix.outputs.test-matrix) }} + runs-on: ${{ matrix.runs_on }} + timeout-minutes: 100 + env: + KERNEL: ${{ matrix.kernel }} + REPO_ROOT: ${{ github.workspace }} + REPO_PATH: "" + steps: + - uses: actions/checkout@v3 + - uses: actions/download-artifact@v3 + with: + name: vmlinux-${{ matrix.arch }}-${{ matrix.toolchain }} + path: . + - name: Untar artifacts + # zstd is installed by default in the runner images. + run: zstd -d -T0 vmlinux-${{ matrix.arch }}-${{ matrix.toolchain }}.tar.zst --stdout | tar -xf - + - name: Prepare rootfs + uses: libbpf/ci/prepare-rootfs@master + with: + project-name: 'libbpf' + arch: ${{ matrix.arch }} + kernel: ${{ matrix.kernel }} + kernel-root: '.' + image-output: '/tmp/root.img' + test: ${{ matrix.test }} + - name: Run selftests + uses: libbpf/ci/run-qemu@master + with: + arch: ${{ matrix.arch}} + img: '/tmp/root.img' + vmlinuz: '${{ github.workspace }}/vmlinuz' + kernel-root: '.' diff --git a/README b/README index 669ac7c322927..e69de29bb2d1d 100644 --- a/README +++ b/README @@ -1,18 +0,0 @@ -Linux kernel -============ - -There are several guides for kernel developers and users. These guides can -be rendered in a number of formats, like HTML and PDF. Please read -Documentation/admin-guide/README.rst first. - -In order to build the documentation, use ``make htmldocs`` or -``make pdfdocs``. The formatted documentation can also be read online at: - - https://www.kernel.org/doc/html/latest/ - -There are various text files in the Documentation/ subdirectory, -several of them using the Restructured Text markup notation. - -Please read the Documentation/process/changes.rst file, as it contains the -requirements for building and running the kernel, and information about -the problems which may result by upgrading your kernel. diff --git a/ci/diffs/0001-bpf-Fix-sample_flags-for-bpf_perf_event_output.diff b/ci/diffs/0001-bpf-Fix-sample_flags-for-bpf_perf_event_output.diff new file mode 100644 index 0000000000000..5c1a2036135a1 --- /dev/null +++ b/ci/diffs/0001-bpf-Fix-sample_flags-for-bpf_perf_event_output.diff @@ -0,0 +1,47 @@ +From 01cc82fac426032c414258f2c322f63e75cf04c0 Mon Sep 17 00:00:00 2001 +From: Sumanth Korikkar +Date: Fri, 7 Oct 2022 10:13:27 +0200 +Subject: [PATCH] bpf: Fix sample_flags for bpf_perf_event_output + +* Raw data is also filled by bpf_perf_event_output. +* Add sample_flags to indicate raw data. +* This eliminates the segfaults as shown below: + Run ./samples/bpf/trace_output + BUG pid 9 cookie 1001000000004 sized 4 + BUG pid 9 cookie 1001000000004 sized 4 + BUG pid 9 cookie 1001000000004 sized 4 + Segmentation fault (core dumped) + +Fixes: 838d9bb62d13 ("perf: Use sample_flags for raw_data") +Signed-off-by: Sumanth Korikkar +Signed-off-by: Andrii Nakryiko +Acked-by: Namhyung Kim +Acked-by: Jiri Olsa +Link: https://lore.kernel.org/bpf/20221007081327.1047552-1-sumanthk@linux.ibm.com +--- + kernel/trace/bpf_trace.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c +index 688552df95ca..596335fe62e2 100644 +--- a/kernel/trace/bpf_trace.c ++++ b/kernel/trace/bpf_trace.c +@@ -687,6 +687,7 @@ BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map, + + perf_sample_data_init(sd, 0, 0); + sd->raw = &raw; ++ sd->sample_flags |= PERF_SAMPLE_RAW; + + err = __bpf_perf_event_output(regs, map, flags, sd); + +@@ -745,6 +746,7 @@ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, + perf_fetch_caller_regs(regs); + perf_sample_data_init(sd, 0, 0); + sd->raw = &raw; ++ sd->sample_flags |= PERF_SAMPLE_RAW; + + ret = __bpf_perf_event_output(regs, map, flags, sd); + out: +-- +2.30.2 + diff --git a/ci/diffs/0001-selftests-bpf-S-iptables-iptables-legacy-in-the-bpf_.diff b/ci/diffs/0001-selftests-bpf-S-iptables-iptables-legacy-in-the-bpf_.diff new file mode 100644 index 0000000000000..e1e5f01a59930 --- /dev/null +++ b/ci/diffs/0001-selftests-bpf-S-iptables-iptables-legacy-in-the-bpf_.diff @@ -0,0 +1,77 @@ +From de9c8d848d90cf2e53aced50b350827442ca5a4f Mon Sep 17 00:00:00 2001 +From: Martin KaFai Lau +Date: Wed, 12 Oct 2022 15:12:35 -0700 +Subject: [PATCH] selftests/bpf: S/iptables/iptables-legacy/ in the bpf_nf and + xdp_synproxy test + +The recent vm image in CI has reported error in selftests that use +the iptables command. Manu Bretelle has pointed out the difference +in the recent vm image that the iptables is sym-linked to the iptables-nft. +With this knowledge, I can also reproduce the CI error by manually running +with the 'iptables-nft'. + +This patch is to replace the iptables command with iptables-legacy +to unblock the CI tests. + +Signed-off-by: Martin KaFai Lau +Signed-off-by: Andrii Nakryiko +Acked-by: David Vernet +Link: https://lore.kernel.org/bpf/20221012221235.3529719-1-martin.lau@linux.dev +--- + tools/testing/selftests/bpf/prog_tests/bpf_nf.c | 6 +++--- + tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c | 6 +++--- + 2 files changed, 6 insertions(+), 6 deletions(-) + +diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c +index 8a838ea8bdf3..c8ba4009e4ab 100644 +--- a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c ++++ b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c +@@ -49,14 +49,14 @@ static int connect_to_server(int srv_fd) + + static void test_bpf_nf_ct(int mode) + { +- const char *iptables = "iptables -t raw %s PREROUTING -j CONNMARK --set-mark 42/0"; ++ const char *iptables = "iptables-legacy -t raw %s PREROUTING -j CONNMARK --set-mark 42/0"; + int srv_fd = -1, client_fd = -1, srv_client_fd = -1; + struct sockaddr_in peer_addr = {}; + struct test_bpf_nf *skel; + int prog_fd, err; + socklen_t len; + u16 srv_port; +- char cmd[64]; ++ char cmd[128]; + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), +@@ -69,7 +69,7 @@ static void test_bpf_nf_ct(int mode) + + /* Enable connection tracking */ + snprintf(cmd, sizeof(cmd), iptables, "-A"); +- if (!ASSERT_OK(system(cmd), "iptables")) ++ if (!ASSERT_OK(system(cmd), cmd)) + goto end; + + srv_port = (mode == TEST_XDP) ? 5005 : 5006; +diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c b/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c +index 75550a40e029..c72083885b6d 100644 +--- a/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c ++++ b/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c +@@ -94,12 +94,12 @@ static void test_synproxy(bool xdp) + SYS("sysctl -w net.ipv4.tcp_syncookies=2"); + SYS("sysctl -w net.ipv4.tcp_timestamps=1"); + SYS("sysctl -w net.netfilter.nf_conntrack_tcp_loose=0"); +- SYS("iptables -t raw -I PREROUTING \ ++ SYS("iptables-legacy -t raw -I PREROUTING \ + -i tmp1 -p tcp -m tcp --syn --dport 8080 -j CT --notrack"); +- SYS("iptables -t filter -A INPUT \ ++ SYS("iptables-legacy -t filter -A INPUT \ + -i tmp1 -p tcp -m tcp --dport 8080 -m state --state INVALID,UNTRACKED \ + -j SYNPROXY --sack-perm --timestamp --wscale 7 --mss 1460"); +- SYS("iptables -t filter -A INPUT \ ++ SYS("iptables-legacy -t filter -A INPUT \ + -i tmp1 -m state --state INVALID -j DROP"); + + ctrl_file = SYS_OUT("./xdp_synproxy --iface tmp1 --ports 8080 \ +-- +2.30.2 + diff --git a/ci/vmtest/configs/DENYLIST b/ci/vmtest/configs/DENYLIST new file mode 100644 index 0000000000000..d12cf9fae5eee --- /dev/null +++ b/ci/vmtest/configs/DENYLIST @@ -0,0 +1,6 @@ +# TEMPORARY +btf_dump/btf_dump: syntax +kprobe_multi_test/bench_attach +core_reloc/enum64val +core_reloc/size___diff_sz +core_reloc/type_based___diff_sz diff --git a/ci/vmtest/configs/DENYLIST.s390x b/ci/vmtest/configs/DENYLIST.s390x new file mode 100644 index 0000000000000..e6829c94bdaae --- /dev/null +++ b/ci/vmtest/configs/DENYLIST.s390x @@ -0,0 +1,5 @@ +deny_namespace # not yet in bpf denylist +tc_redirect/tc_redirect_dtime # very flaky +lru_bug # not yet in bpf-next denylist +usdt/basic # failing verifier due to bounds check after LLVM update +usdt/multispec # same as above diff --git a/ci/vmtest/helpers.sh b/ci/vmtest/helpers.sh new file mode 100755 index 0000000000000..3b2cda0153b71 --- /dev/null +++ b/ci/vmtest/helpers.sh @@ -0,0 +1,36 @@ +# $1 - start or end +# $2 - fold identifier, no spaces +# $3 - fold section description +foldable() { + local YELLOW='\033[1;33m' + local NOCOLOR='\033[0m' + if [ $1 = "start" ]; then + line="::group::$2" + if [ ! -z "${3:-}" ]; then + line="$line - ${YELLOW}$3${NOCOLOR}" + fi + else + line="::endgroup::" + fi + echo -e "$line" +} + +__print() { + local TITLE="" + if [[ -n $2 ]]; then + TITLE=" title=$2" + fi + echo "::$1${TITLE}::$3" +} + +# $1 - title +# $2 - message +print_error() { + __print error $1 $2 +} + +# $1 - title +# $2 - message +print_notice() { + __print notice $1 $2 +} diff --git a/ci/vmtest/run_selftests.sh b/ci/vmtest/run_selftests.sh new file mode 100755 index 0000000000000..c30c01a5ad6d8 --- /dev/null +++ b/ci/vmtest/run_selftests.sh @@ -0,0 +1,130 @@ +#!/bin/bash + +# run_selftest.sh will run the tests within /${PROJECT_NAME}/selftests/bpf +# If no specific test names are given, all test will be ran, otherwise, it will +# run the test passed as parameters. +# There is 2 ways to pass test names. +# 1) command-line arguments to this script +# 2) a comma-separated list of test names passed as `run_tests` boot parameters. +# test names passed as any of those methods will be ran. + +set -euo pipefail + +source $(cd $(dirname $0) && pwd)/helpers.sh + +ARCH=$(uname -m) + +STATUS_FILE=/exitstatus + +declare -a TEST_NAMES=() + +read_lists() { + (for path in "$@"; do + if [[ -s "$path" ]]; then + cat "$path" + fi; + done) | cut -d'#' -f1 | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//' | tr -s '\n' ',' +} + +TEST_PROGS_ARGS="" +# Disabled due to issue +# if [[ "$(nproc)" -gt 2 ]]; then +# TEST_PROGS_ARGS="-j" +# fi + +read_test_names() { + foldable start read_test_names "Reading test names from boot parameters and command line arguments" + # Check if test names were passed as boot parameter. + # We expect `run_tests` to be a comma-separated list of test names. + IFS=',' read -r -a test_names_from_boot <<< \ + "$(sed -n 's/.*run_tests=\([^ ]*\).*/\1/p' /proc/cmdline)" + + echo "${#test_names_from_boot[@]} tests extracted from boot parameters: ${test_names_from_boot[*]}" + # Sort and only keep unique test names from both boot params and arguments + # TEST_NAMES will contain a sorted list of uniq tests to be ran. + # Only do this if any of $test_names_from_boot[@] or $@ has elements as + # "printf '%s\0'" will otherwise generate an empty element. + if [[ ${#test_names_from_boot[@]} -gt 0 || $# -gt 0 ]] + then + readarray -t TEST_NAMES < \ + <(printf '%s\0' "${test_names_from_boot[@]}" "$@" | \ + sort --zero-terminated --unique | \ + xargs --null --max-args=1) + fi + foldable end read_test_names +} + +test_progs() { + foldable start test_progs "Testing test_progs" + # "&& true" does not change the return code (it is not executed + # if the Python script fails), but it prevents exiting on a + # failure due to the "set -e". + ./test_progs ${DENYLIST:+-d"$DENYLIST"} ${ALLOWLIST:+-a"$ALLOWLIST"} ${TEST_PROGS_ARGS} && true + echo "test_progs:$?" >>"${STATUS_FILE}" + foldable end test_progs +} + +test_progs_no_alu32() { + foldable start test_progs-no_alu32 "Testing test_progs-no_alu32" + ./test_progs-no_alu32 ${DENYLIST:+-d"$DENYLIST"} ${ALLOWLIST:+-a"$ALLOWLIST"} ${TEST_PROGS_ARGS} && true + echo "test_progs-no_alu32:$?" >>"${STATUS_FILE}" + foldable end test_progs-no_alu32 +} + +test_maps() { + foldable start test_maps "Testing test_maps" + taskset 0xF ./test_maps && true + echo "test_maps:$?" >>"${STATUS_FILE}" + foldable end test_maps +} + +test_verifier() { + foldable start test_verifier "Testing test_verifier" + ./test_verifier && true + echo "test_verifier:$?" >>"${STATUS_FILE}" + foldable end test_verifier +} + +foldable end vm_init + +foldable start kernel_config "Kconfig" + +zcat /proc/config.gz + +foldable end kernel_config + +configs_path=${PROJECT_NAME}/selftests/bpf +local_configs_path=${PROJECT_NAME}/vmtest/configs +DENYLIST=$(read_lists \ + "$configs_path/DENYLIST" \ + "$configs_path/DENYLIST.${ARCH}" \ + "$local_configs_path/DENYLIST" \ + "$local_configs_path/DENYLIST.${ARCH}" \ +) +ALLOWLIST=$(read_lists \ + "$configs_path/ALLOWLIST" \ + "$configs_path/ALLOWLIST.${ARCH}" \ + "$local_configs_path/ALLOWLIST" \ + "$local_configs_path/ALLOWLIST.${ARCH}" \ +) + +echo "DENYLIST: ${DENYLIST}" +echo "ALLOWLIST: ${ALLOWLIST}" + +cd ${PROJECT_NAME}/selftests/bpf + +# populate TEST_NAMES +read_test_names "$@" +# if we don't have any test name provided to the script, we run all tests. +if [ ${#TEST_NAMES[@]} -eq 0 ]; then + test_progs + test_progs_no_alu32 + test_maps + test_verifier +else + # else we run the tests passed as command-line arguments and through boot + # parameter. + for test_name in "${TEST_NAMES[@]}"; do + "${test_name}" + done +fi From ea02d64f2b2bc9e4710d0e362fb22f78328c7435 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Mon, 17 Oct 2022 18:01:06 +0800 Subject: [PATCH 2/4] sched: Add helper kstat_cpu_softirqs_sum() Similar to kstat_cpu_irqs_sum(), it counts the sum of all software interrupts on a specified CPU. Signed-off-by: Zhen Lei --- include/linux/kernel_stat.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index ddb5a358fd829..61d427c1962bf 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -67,6 +67,17 @@ static inline unsigned int kstat_softirqs_cpu(unsigned int irq, int cpu) return kstat_cpu(cpu).softirqs[irq]; } +static inline unsigned int kstat_cpu_softirqs_sum(int cpu) +{ + int i; + unsigned int sum = 0; + + for (i = 0; i < NR_SOFTIRQS; i++) + sum += kstat_softirqs_cpu(i, cpu); + + return sum; +} + /* * Number of interrupts per specific IRQ source, since bootup */ From 95c15db9be77d23523588a879381161478623760 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Mon, 17 Oct 2022 18:01:07 +0800 Subject: [PATCH 3/4] sched: Add helper nr_context_switches_cpu() Returns the number of context switches on the specified CPU, which can be used to diagnose rcu stall. Signed-off-by: Zhen Lei --- include/linux/kernel_stat.h | 1 + kernel/sched/core.c | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 61d427c1962bf..ac4f9302b559c 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -52,6 +52,7 @@ DECLARE_PER_CPU(struct kernel_cpustat, kernel_cpustat); #define kstat_cpu(cpu) per_cpu(kstat, cpu) #define kcpustat_cpu(cpu) per_cpu(kernel_cpustat, cpu) +extern unsigned long long nr_context_switches_cpu(int cpu); extern unsigned long long nr_context_switches(void); extern unsigned int kstat_irqs_cpu(unsigned int irq, int cpu); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 5800b0623ff30..a0d19f67f2046 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5229,6 +5229,11 @@ bool single_task_running(void) } EXPORT_SYMBOL(single_task_running); +unsigned long long nr_context_switches_cpu(int cpu) +{ + return cpu_rq(cpu)->nr_switches; +} + unsigned long long nr_context_switches(void) { int i; From 8b2b698264326015326acc8b4029084caa867be1 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Mon, 17 Oct 2022 18:01:08 +0800 Subject: [PATCH 4/4] rcu: Add RCU stall diagnosis information In some extreme cases, such as the I/O pressure test, the CPU usage may be 100%, causing RCU stall. In this case, the printed information about current is not useful. Displays the number and usage of hard interrupts, soft interrupts, and context switches that are generated within half of the CPU stall timeout, can help us make a general judgment. In other cases, we can preliminarily determine whether an infinite loop occurs when local_irq, local_bh or preempt is disabled. For example: rcu: INFO: rcu_preempt self-detected stall on CPU rcu: 0-....: (1250 ticks this GP) rcu: hardirqs softirqs csw/system rcu: number: 624 45 0 rcu: cputime: 69 1 2425 ==> 2500(ms) The example above shows that the number of hard and soft interrupts is small, there is zero context switching, and the system takes up a lot of time. We can quickly conclude that the current task is infinitely looped with preempt_disable(). The impact on system performance is negligible because snapshot is recorded only one time after 1/2 CPU stall timeout. Signed-off-by: Zhen Lei --- kernel/rcu/tree.h | 11 +++++++++++ kernel/rcu/tree_stall.h | 40 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index fcb5d696eb170..6ca06ad7b7842 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -158,6 +158,16 @@ union rcu_noqs { u16 s; /* Set of bits, aggregate OR here. */ }; +struct rcu_snap_record { + unsigned long gp_seq; + u64 cputime_irq; + u64 cputime_softirq; + u64 cputime_system; + unsigned int nr_hardirqs; + unsigned int nr_softirqs; + unsigned long long nr_csw; +}; + /* Per-CPU data for read-copy update. */ struct rcu_data { /* 1) quiescent-state and grace-period handling : */ @@ -262,6 +272,7 @@ struct rcu_data { short rcu_onl_gp_flags; /* ->gp_flags at last online. */ unsigned long last_fqs_resched; /* Time of last rcu_resched(). */ unsigned long last_sched_clock; /* Jiffies of last rcu_sched_clock_irq(). */ + struct rcu_snap_record snap_record; long lazy_len; /* Length of buffered lazy callbacks. */ int cpu; diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h index 5653560573e22..08cfcf526f7d2 100644 --- a/kernel/rcu/tree_stall.h +++ b/kernel/rcu/tree_stall.h @@ -211,8 +211,10 @@ static void rcu_stall_kick_kthreads(void) */ static void rcu_iw_handler(struct irq_work *iwp) { + int cpu = smp_processor_id(); struct rcu_data *rdp; struct rcu_node *rnp; + struct rcu_snap_record *r; rdp = container_of(iwp, struct rcu_data, rcu_iw); rnp = rdp->mynode; @@ -222,6 +224,16 @@ static void rcu_iw_handler(struct irq_work *iwp) rdp->rcu_iw_pending = false; } raw_spin_unlock_rcu_node(rnp); + + r = &rdp->snap_record; + r->cputime_irq = kcpustat_this_cpu->cpustat[CPUTIME_IRQ]; + r->cputime_softirq = kcpustat_this_cpu->cpustat[CPUTIME_SOFTIRQ]; + r->cputime_system = kcpustat_this_cpu->cpustat[CPUTIME_SYSTEM]; + r->nr_hardirqs = kstat_cpu_irqs_sum(cpu); + r->nr_softirqs = kstat_cpu_softirqs_sum(cpu); + r->nr_csw = nr_context_switches_cpu(cpu); + barrier(); + r->gp_seq = rdp->gp_seq; } ////////////////////////////////////////////////////////////////////////////// @@ -428,6 +440,32 @@ static bool rcu_is_rcuc_kthread_starving(struct rcu_data *rdp, unsigned long *jp return j > 2 * HZ; } +static void print_cpu_stat_info(int cpu) +{ + u64 *cpustat; + unsigned long half_timeout; + struct rcu_snap_record *r; + struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); + + r = &rdp->snap_record; + if (r->gp_seq != rdp->gp_seq) + return; + + cpustat = kcpustat_this_cpu->cpustat; + half_timeout = rcu_jiffies_till_stall_check() / 2; + + pr_err(" hardirqs softirqs csw/system\n"); + pr_err(" number: %8d %10d %12lld\n", + kstat_cpu_irqs_sum(cpu) - r->nr_hardirqs, + kstat_cpu_softirqs_sum(cpu) - r->nr_softirqs, + nr_context_switches_cpu(cpu) - r->nr_csw); + pr_err("cputime: %8lld %10lld %12lld ==> %lld(ms)\n", + div_u64(cpustat[CPUTIME_IRQ] - r->cputime_irq, NSEC_PER_MSEC), + div_u64(cpustat[CPUTIME_SOFTIRQ] - r->cputime_softirq, NSEC_PER_MSEC), + div_u64(cpustat[CPUTIME_SYSTEM] - r->cputime_system, NSEC_PER_MSEC), + jiffies64_to_msecs(half_timeout)); +} + /* * Print out diagnostic information for the specified stalled CPU. * @@ -484,6 +522,8 @@ static void print_cpu_stall_info(int cpu) data_race(rcu_state.n_force_qs) - rcu_state.n_force_qs_gpstart, rcuc_starved ? buf : "", falsepositive ? " (false positive?)" : ""); + + print_cpu_stat_info(cpu); } /* Complain about starvation of grace-period kthread. */