diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index bbc8badd3..0eed44b8e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -8,6 +8,10 @@ on: permissions: contents: read +defaults: + run: + shell: bash + jobs: unit-test: diff --git a/CHANGELOG.md b/CHANGELOG.md index e2b55763f..9b191a977 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ - Added Yarn version 4.3.1. - Added Yarn version 3.8.3. +- Support reading container memory limits from cgroups (v1 and v2) ## [v255] - 2024-06-21 diff --git a/etc/cgroups.sh b/etc/cgroups.sh new file mode 100755 index 000000000..63f779c90 --- /dev/null +++ b/etc/cgroups.sh @@ -0,0 +1,202 @@ +#!/usr/bin/env bash + +# stdin is the output of e.g. /proc/self/cgroup +cgroup_util_find_controller_from_procfs_cgroup_contents() { + local usage="Usage (stdin is /proc/self/cgroup format): ${FUNCNAME[0]} CONTROLLER" + # there may be an entry for a v1 controller like: + # 7:memory:/someprefix + # if not, then there can be an entry for a v2 unified hierarchy, e.g.: + # 0::/ + # we look for the v1 first, as there may be hybrid setups where some controllers are still v1 + # so if there is an entry for "memory", a v1 controller is in charge, even if others are v2 + ( + set -o pipefail + grep -E -e '^[0-9]+:('"${1:?$usage}"')?:/.*' | sort -r -n -k 1 -t ":" | head -n1 + ) +} + +cgroup_util_get_controller_version_from_procfs_cgroup_line() { + readarray -d':' -t line # -t removes trailing delimiter + # with e.g. 'docker run --cgroup-parent foo:bar, the third (relative path) section would contain a colon + if (( ${#line[@]} < 3 )); then + exit 1 + fi + if [[ ${line[0]} == "0" ]]; then + echo "2" + else + echo "1" + fi +} + +cgroup_util_get_controller_path_from_procfs_cgroup_line() { + readarray -d':' line # no -t, we want any trailing delims for concatenation via printf + if (( ${#line[@]} < 3 )); then + exit 1 + fi + # with e.g. 'docker run --cgroup-parent foo:bar, the third (relative path) section would contain a colon, so we have to output from 3 until the end + printf "%s" "${line[@]:2}" +} + +# stdin is the output of e.g. /proc/self/mountinfo +# $1 is a controller name, which is matched against the mount options using -O (so it could be a comma-separated list, too) +cgroup_util_find_v1_mount_from_procfs_mountinfo_contents() { + local usage="Usage (stdin is /proc/self/cgroup format): ${FUNCNAME[0]} CONTROLLER" + # must specify --list explicitly or it might output tree parts after all... + findmnt --list --noheadings --first-only -t cgroup -O "${1:?$usage}" -o target -F <(cat) +} + +# stdin is the output of e.g. /proc/self/mountinfo +cgroup_util_find_v2_mount_from_procfs_mountinfo_contents() { + # must specify --list explicitly or it might output tree parts after all... + findmnt --list --noheadings --first-only -t cgroup2 -o target -F <(cat) +} + +# $1 is the controller name, $2 is the mount root from /proc/self/mountinfo, $3 is the mount relative dir from /proc/self/cgroup +cgroup_util_find_v1_path() { + local usage="Usage: ${FUNCNAME[0]} CONTROLLER MOUNT CGROUP" + local relpath=${3:?$usage} + # strip trailing slash if present (it would also be if it was just "/") + relpath=${relpath%/} + cur="${2:?$usage}${relpath}" + while true; do + if [[ -d "$cur" ]] && compgen -G "${cur}/${1:?$usage}.*" > /dev/null; then + echo "$cur" + return 0 + elif [[ "$cur" == "$2" ]]; then + break # we are at the mount, and it does not exist + fi + cur=$(dirname "$cur") + done + return 1 +} + +# $1 is the controller name, $2 is the mount root from /proc/self/mountinfo, $3 is the mount relative dir from /proc/self/cgroup +cgroup_util_find_v2_path() { + local usage="Usage: ${FUNCNAME[0]} CONTROLLER MOUNT CGROUP" + local retval=${3:?$usage} + # strip trailing slash if present (it would also be if it was just "/") + retval=${2:?$usage}${retval%/} + if grep -Eqs '(^|\s)'"${1:?$usage}"'($|\s)' "${retval}/cgroup.controllers"; then + echo "$retval" + return 0 + else + # so it captures the exit status of grep, otherwise it is that of the if + return + fi +} + +# this ignores memory.soft_limit_in_bytes on purpose for the reasons outlined in https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html#id1 +cgroup_util_read_cgroupv1_memory_limit() { + local usage="Usage: ${FUNCNAME[0]} PATH" + local f="${1:?$usage}/memory.limit_in_bytes" + if [[ -r "$f" ]]; then + [[ -n ${CGROUP_UTIL_VERBOSE-} ]] && echo "Using limit from '${f}'" >&2 + cat "$f" + return + else + return 9 + fi +} + +# this reads memory.high first, then falls back to memory.max, memory.low, or memory.min +cgroup_util_read_cgroupv2_memory_limit() { + local usage="Usage: ${FUNCNAME[0]} PATH" + + local f + local limit + # memory.high is the the best limit to read ("This is the main mechanism to control memory usage of a cgroup.", https://www.kernel.org/doc/html/v5.15/admin-guide/cgroup-v2.html) + # we fall back to memory.max first (the final "safety net" limit), then memory.low (best-effort memory protection, e.g. OCI memory.reservation or Docker --memory-reservation), then finally memory.min (hard guaranteed minimum) + for f in "${1:?$usage}/memory.high" "${1}/memory.max" "${1}/memory.low" "${1}/memory.min"; do + if [[ -r "$f" ]]; then + limit=$(cat "$f") + if [[ "$limit" != "max" && "$limit" != "0" ]]; then + [[ -n ${CGROUP_UTIL_VERBOSE-} ]] && echo "Using limit from '${f}'" >&2 + echo "$limit" + return + fi + fi + done + + return 9 +} + +# reads a cgroup v1 (memory.limit_in_bytes) or v2 (memory.high, fallback to memory.max, fallback to memory.low, fallback to memory.min) +# if env var CGROUP_UTIL_PROCFS_ROOT is passed, it will be used instead of '/proc' to find '/proc/self/cgroup', '/proc/self/mountinfo' etc (useful for testing, defaults to '/proc') +# if env var CGROUP_UTIL_CGROUPFS_PREFIX is passed, it will be prepended to any /sys/fs/cgroup or similar path used (useful for testing, defaults to '') +# pass a value for env var CGROUP_UTIL_VERBOSE to enable verbose mode +cgroup_util_read_cgroup_memory_limit() { + if [[ -z "${CGROUP_UTIL_PROCFS_ROOT-}" ]]; then + local CGROUP_UTIL_PROCFS_ROOT=/proc + fi + + # this value is used as a threshold for "silly" maximums returned e.g. by Docker on a cgroups v1 system + local maximum=$((8 * 1024 * 1024 * 1024 * 1024)) # 8 TB + + local controller=memory + + local procfs_cgroup_entry + procfs_cgroup_entry=$(cgroup_util_find_controller_from_procfs_cgroup_contents "$controller" < "${CGROUP_UTIL_PROCFS_ROOT}/self/cgroup") || { + [[ -n ${CGROUP_UTIL_VERBOSE-} ]] && echo "Could not find cgroup controller '${controller}' in '${CGROUP_UTIL_PROCFS_ROOT}/self/cgroup'" >&2 + return 3 + } + + local controller_version + controller_version=$(echo "$procfs_cgroup_entry" | cgroup_util_get_controller_version_from_procfs_cgroup_line) || { + [[ -n ${CGROUP_UTIL_VERBOSE-} ]] && echo "Could not determine version for cgroup controller '${controller}' from '${CGROUP_UTIL_PROCFS_ROOT}/self/cgroup'" >&2 + return 4 + } + + local controller_path + controller_path=$(echo "$procfs_cgroup_entry" | cgroup_util_get_controller_path_from_procfs_cgroup_line) || { + [[ -n ${CGROUP_UTIL_VERBOSE-} ]] && echo "Could not determine path for cgroup controller '${controller}' from '${CGROUP_UTIL_PROCFS_ROOT}/self/cgroup'" >&2 + return 5 + } + + local controller_mount + controller_mount=$(cgroup_util_find_v"$controller_version"_mount_from_procfs_mountinfo_contents "$controller" < "${CGROUP_UTIL_PROCFS_ROOT}/self/mountinfo") || { + [[ -n ${CGROUP_UTIL_VERBOSE-} ]] && echo "Could not determine mount point for cgroup controller '${controller}' from '${CGROUP_UTIL_PROCFS_ROOT}/self/mountinfo'" >&2 + return 6 + } + # for testing purposes, a prefix can be passed to "relocate" the /sys/fs/cgroup/... location we are reading from next + controller_mount="${CGROUP_UTIL_CGROUPFS_PREFIX-}${controller_mount}" + + local location + location=$(cgroup_util_find_v"$controller_version"_path "$controller" "$controller_mount" "$controller_path") || { + [[ -n ${CGROUP_UTIL_VERBOSE-} ]] && echo "Could not find a location for cgroup controller '${controller}'" >&2 + return 7 + } + + [[ -n ${CGROUP_UTIL_VERBOSE-} ]] && echo "Reading cgroup v${controller_version} limit from '${location}'" >&2 + + local limit + limit=$(cgroup_util_read_cgroupv"$controller_version"_memory_limit "$location") || return + + if (( maximum > 0 && limit <= maximum )); then + echo "$limit" + return + else + [[ -n ${CGROUP_UTIL_VERBOSE-} ]] && echo "Ignoring cgroup memory limit of ${limit} Bytes (exceeds maximum of ${maximum} Bytes)" >&2 + return 99 + fi +} + +# reads a cgroup v1 (memory.limit_in_bytes) or v2 (memory.high, fallback to memory.max, fallback to memory.low, fallback to memory.min) +# optional argument is a file path to fall back to for reading a default value, useful e.g. when reading on a system that has a "fake" limit info file (defaults to '/sys/fs/cgroup/memory/memory.limit_in_bytes') +# if env var CGROUP_UTIL_PROCFS_ROOT is passed, it will be used instead of '/proc' to find '/proc/self/cgroup', '/proc/self/mountinfo' etc (useful for testing, defaults to '/proc') +# if env var CGROUP_UTIL_CGROUPFS_PREFIX is passed, it will be prepended to any /sys/fs/cgroup or similar path used (useful for testing, defaults to '') +# pass a value for env var CGROUP_UTIL_VERBOSE to enable verbose mode +cgroup_util_read_cgroup_memory_limit_with_fallback() { + local fallback=${1-"${CGROUP_UTIL_CGROUPFS_PREFIX-}/sys/fs/cgroup/memory/memory.limit_in_bytes"} + + cgroup_util_read_cgroup_memory_limit || { + local retval=$? + + if ((retval != 99)) && [[ -r "$fallback" ]]; then + [[ -n ${CGROUP_UTIL_VERBOSE-} ]] && echo "Reading fallback limit from '${fallback}'" >&2 + cat "$fallback" + return + fi + + return "$retval" + } +} diff --git a/lib/environment.sh b/lib/environment.sh old mode 100644 new mode 100755 index 2be01abfb..4b342531a --- a/lib/environment.sh +++ b/lib/environment.sh @@ -86,7 +86,14 @@ write_profile() { local bp_dir="$1" local build_dir="$2" mkdir -p "$build_dir/.profile.d" - cp "$bp_dir"/profile/* "$build_dir/.profile.d/" + cp "$bp_dir"/profile/nodejs.sh "$build_dir/.profile.d/" + write_web_concurrency "$bp_dir" "$build_dir/.profile.d/WEB_CONCURRENCY.sh" +} + +write_web_concurrency() { + local bp_dir="$1" + # concatenate these two together + cat "$bp_dir"/etc/cgroups.sh "$bp_dir"/profile/WEB_CONCURRENCY.sh > "$2" } write_ci_profile() { diff --git a/profile/WEB_CONCURRENCY.sh b/profile/WEB_CONCURRENCY.sh index 9e7d7eddf..a541dbd75 100755 --- a/profile/WEB_CONCURRENCY.sh +++ b/profile/WEB_CONCURRENCY.sh @@ -31,17 +31,28 @@ log_concurrency() { detect_memory() { local default=$1 - if [ -e /sys/fs/cgroup/memory/memory.limit_in_bytes ]; then - echo $(($(cat /sys/fs/cgroup/memory/memory.limit_in_bytes) / 1048576)) + local memory_limit + memory_limit=$(cgroup_util_read_cgroup_memory_limit_with_fallback) && { + echo $(( memory_limit / 1024 / 1024 )) + return + } + + if (($? == 99)); then + dne_memory else echo "$default" - fi + fi +} + +dne_memory() { + echo "129024" } bound_memory() { local detected=$1 # Memory is bound to the maximum memory of known dyno types: ~126 GB - local max_detected_memory=129024 + local max_detected_memory + max_detected_memory=$(dne_memory) if (( detected > max_detected_memory )); then echo "$max_detected_memory" else diff --git a/test/run b/test/run index 0438a323f..077357ae0 100755 --- a/test/run +++ b/test/run @@ -557,35 +557,55 @@ testBuildWithUserCacheDirectoriesCamel() { } testConcurrency1X() { - LOG_CONCURRENCY=true MEMORY_AVAILABLE=512 capture "$(pwd)"/profile/WEB_CONCURRENCY.sh + # write_web_concurrency concatenates a vendored library file and our own logic into one file + WEB_CONCURRENCY_SH=$(mktemp) + chmod u+x "$WEB_CONCURRENCY_SH" + write_web_concurrency "$(pwd)" "$WEB_CONCURRENCY_SH" + LOG_CONCURRENCY=true MEMORY_AVAILABLE=512 capture "$WEB_CONCURRENCY_SH" assertCaptured "Detected 512 MB available memory, 512 MB limit per process (WEB_MEMORY)" assertCaptured "Recommending WEB_CONCURRENCY=1" assertCapturedSuccess } testConcurrency2X() { - LOG_CONCURRENCY=true MEMORY_AVAILABLE=1024 capture "$(pwd)"/profile/WEB_CONCURRENCY.sh + # write_web_concurrency concatenates a vendored library file and our own logic into one file + WEB_CONCURRENCY_SH=$(mktemp) + chmod u+x "$WEB_CONCURRENCY_SH" + write_web_concurrency "$(pwd)" "$WEB_CONCURRENCY_SH" + LOG_CONCURRENCY=true MEMORY_AVAILABLE=1024 capture "$WEB_CONCURRENCY_SH" assertCaptured "Detected 1024 MB available memory, 512 MB limit per process (WEB_MEMORY)" assertCaptured "Recommending WEB_CONCURRENCY=2" assertCapturedSuccess } testConcurrencyPerformanceM() { - LOG_CONCURRENCY=true MEMORY_AVAILABLE=2560 capture "$(pwd)"/profile/WEB_CONCURRENCY.sh + # write_web_concurrency concatenates a vendored library file and our own logic into one file + WEB_CONCURRENCY_SH=$(mktemp) + chmod u+x "$WEB_CONCURRENCY_SH" + write_web_concurrency "$(pwd)" "$WEB_CONCURRENCY_SH" + LOG_CONCURRENCY=true MEMORY_AVAILABLE=2560 capture "$WEB_CONCURRENCY_SH" assertCaptured "Detected 2560 MB available memory, 512 MB limit per process (WEB_MEMORY)" assertCaptured "Recommending WEB_CONCURRENCY=5" assertCapturedSuccess } testConcurrencyPerformanceL() { - LOG_CONCURRENCY=true MEMORY_AVAILABLE=14336 capture "$(pwd)"/profile/WEB_CONCURRENCY.sh - assertCaptured "Detected 14336 MB available memory, 512 MB limit per process (WEB_MEMORY)" - assertCaptured "Recommending WEB_CONCURRENCY=28" - assertCapturedSuccess + # write_web_concurrency concatenates a vendored library file and our own logic into one file + WEB_CONCURRENCY_SH=$(mktemp) + chmod u+x "$WEB_CONCURRENCY_SH" + write_web_concurrency "$(pwd)" "$WEB_CONCURRENCY_SH" + LOG_CONCURRENCY=true MEMORY_AVAILABLE=14336 capture "$WEB_CONCURRENCY_SH" + assertCaptured "Detected 14336 MB available memory, 512 MB limit per process (WEB_MEMORY)" + assertCaptured "Recommending WEB_CONCURRENCY=28" + assertCapturedSuccess } testConcurrencyCustomLimit() { - LOG_CONCURRENCY=true MEMORY_AVAILABLE=1024 WEB_MEMORY=256 capture "$(pwd)"/profile/WEB_CONCURRENCY.sh + # write_web_concurrency concatenates a vendored library file and our own logic into one file + WEB_CONCURRENCY_SH=$(mktemp) + chmod u+x "$WEB_CONCURRENCY_SH" + write_web_concurrency "$(pwd)" "$WEB_CONCURRENCY_SH" + LOG_CONCURRENCY=true MEMORY_AVAILABLE=1024 WEB_MEMORY=256 capture "$WEB_CONCURRENCY_SH" assertCaptured "Detected 1024 MB available memory, 256 MB limit per process (WEB_MEMORY)" assertCaptured "Recommending WEB_CONCURRENCY=4" assertCapturedSuccess @@ -594,7 +614,11 @@ testConcurrencyCustomLimit() { # When /sys/fs/cgroup/memory/memory.limit_in_bytes lies and gives a ridiculous value # This happens on Dokku for example testConcurrencyTooHigh() { - LOG_CONCURRENCY=true MEMORY_AVAILABLE=10000000000 capture "$(pwd)"/profile/WEB_CONCURRENCY.sh + # write_web_concurrency concatenates a vendored library file and our own logic into one file + WEB_CONCURRENCY_SH=$(mktemp) + chmod u+x "$WEB_CONCURRENCY_SH" + write_web_concurrency "$(pwd)" "$WEB_CONCURRENCY_SH" + LOG_CONCURRENCY=true MEMORY_AVAILABLE=10000000000 capture "$WEB_CONCURRENCY_SH" assertCaptured "Could not determine a reasonable value for WEB_CONCURRENCY" assertCaptured "Recommending WEB_CONCURRENCY=1" assertCapturedSuccess