Skip to content

Deploy Perf Env Nightly CI #595

Deploy Perf Env Nightly CI

Deploy Perf Env Nightly CI #595

# Nightly CI https://github.com/marketplace/actions/deploy-nightly
# This is a nightly CI Pipeline against Performance environment
# GitHub Actions is limited 6 hours for each job
name: Deploy Perf Env Nightly CI
on:
# Run on Nightly
schedule:
- cron: '0 4 * * 2' # run Tuesday at 4 AM UTC/ 0 AM EDT
# Run on Monday after assisted installer and operator completed
workflow_run:
workflows: ["Deploy Operator Perf Env Weekly CI"]
types:
- completed
workflow_dispatch:
# on:
# push:
# branches: [ main ]
# Ensures that only one deploy task per branch/environment will run at a time.
concurrency:
group: performance-environment
jobs:
initialize_nightly:
name: initialize perf nightly
runs-on: ubuntu-latest
outputs:
start_time_output: ${{ steps.nightly_start_step.outputs.start_time }}
steps:
- uses: actions/checkout@v3
- id: nightly_start_step
run: echo "start_time=$(printf '%(%s)T' -1)" >> $GITHUB_OUTPUT
- name: ⚙️ Set SSH key
env:
PROVISION_PRIVATE_KEY: ${{ secrets.PERF_PROVISION_PRIVATE_KEY }}
RUNNER_PATH: ${{ secrets.RUNNER_PATH }}
PROVISION_IP: ${{ secrets.PERF_PROVISION_IP }}
PROVISION_USER: ${{ secrets.PERF_PROVISION_USER }}
run: |
mkdir -p "$RUNNER_PATH/.ssh/"
echo "$PROVISION_PRIVATE_KEY" > $RUNNER_PATH/private_key.txt
sudo chmod 600 $RUNNER_PATH/private_key.txt
echo "PROVISION_PRIVATE_KEY_PATH=$RUNNER_PATH/private_key.txt" >> "$GITHUB_ENV"
cat >> "$RUNNER_PATH/.ssh/config" <<END
Host provision
HostName $PROVISION_IP
User $PROVISION_USER
IdentityFile $RUNNER_PATH/private_key.txt
StrictHostKeyChecking no
ServerAliveInterval 30
ServerAliveCountMax 5
END
- name: ⚙️ Set Kubeconfig and hosts
env:
KUBECONFIG_CONTENTS: ${{ secrets.PERF_KUBECONFIG }}
RUNNER_PATH: ${{ secrets.RUNNER_PATH }}
OCP_HOSTS: ${{ secrets.PERF_OCP_HOSTS }}
run: |
mkdir -p "$RUNNER_PATH/.kube/"
echo "$KUBECONFIG_CONTENTS" > "$RUNNER_PATH/.kube/config"
echo "KUBECONFIG_PATH=$RUNNER_PATH/.kube/config" >> "$GITHUB_ENV"
sudo tee -a /etc/hosts <<< "$OCP_HOSTS" > /dev/null
- name: ⚙ Remove images on provision and copy config to provision
env:
CONTAINER_KUBECONFIG_PATH: ${{ secrets.CONTAINER_KUBECONFIG_PATH }}
RUNNER_PATH: ${{ secrets.RUNNER_PATH }}
run: |
echo '>>>>>>>>>>>>>>>>>>>>>>>>>> Remove image on provision >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>'
echo "if [[ \"\$(sudo podman images -q quay.io/ebattat/benchmark-runner 2> /dev/null)\" != \"\" ]]; then sudo podman rmi -f \$(sudo podman images -q quay.io/ebattat/benchmark-runner 2> /dev/null); fi" > "$RUNNER_PATH/remove_image.sh"
scp -r "$RUNNER_PATH/remove_image.sh" provision:"/tmp/remove_image.sh"
ssh -t provision "chmod +x /tmp/remove_image.sh;/tmp/./remove_image.sh;rm -f /tmp/remove_image.sh"
echo '>>>>>>>>>>>>>>>>>>>>>>>>>> Copy config to provision >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>'
scp -r "$RUNNER_PATH/.kube/config" provision:"$CONTAINER_KUBECONFIG_PATH"
workload:
name: workload
runs-on: ubuntu-latest
outputs:
job_status: ${{ steps.job_step.outputs.status }}
needs: initialize_nightly
strategy:
# run one job every time
max-parallel: 1
# continue to next job if failed
fail-fast: false
matrix:
workload:
- 'uperf_pod'
- 'uperf_kata'
- 'uperf_vm'
- 'hammerdb_pod_mariadb'
- 'hammerdb_kata_mariadb'
- 'hammerdb_vm_mariadb'
- 'hammerdb_pod_postgres'
- 'hammerdb_kata_postgres'
- 'hammerdb_vm_postgres'
- 'hammerdb_pod_postgres_lso'
- 'hammerdb_kata_postgres_lso'
- 'hammerdb_vm_postgres_lso'
- 'hammerdb_pod_mssql'
- 'hammerdb_kata_mssql'
- 'hammerdb_vm_mssql'
- 'vdbench_pod'
- 'vdbench_kata'
- 'vdbench_vm'
- 'vdbench_pod_scale'
- 'vdbench_kata_scale'
- 'vdbench_vm_scale'
- 'clusterbuster'
- 'bootstorm_vm_scale'
- 'windows_vm_scale'
steps:
- uses: actions/checkout@v3
- name: Set up Python 3.10
uses: actions/setup-python@v4
with:
python-version: '3.10'
- name: Install latest benchmark-runner
run: |
python -m pip install --upgrade pip
pip install benchmark-runner
- name: ⚙️ Set SSH key
env:
PROVISION_PRIVATE_KEY: ${{ secrets.PERF_PROVISION_PRIVATE_KEY }}
RUNNER_PATH: ${{ secrets.RUNNER_PATH }}
PROVISION_IP: ${{ secrets.PERF_PROVISION_IP }}
PROVISION_USER: ${{ secrets.PERF_PROVISION_USER }}
run: |
mkdir -p "$RUNNER_PATH/.ssh/"
echo "$PROVISION_PRIVATE_KEY" > $RUNNER_PATH/private_key.txt
sudo chmod 600 $RUNNER_PATH/private_key.txt
echo "PROVISION_PRIVATE_KEY_PATH=$RUNNER_PATH/private_key.txt" >> "$GITHUB_ENV"
cat >> "$RUNNER_PATH/.ssh/config" <<END
Host provision
HostName $PROVISION_IP
User $PROVISION_USER
IdentityFile $RUNNER_PATH/private_key.txt
StrictHostKeyChecking no
ServerAliveInterval 30
ServerAliveCountMax 5
END
- name: ⚙️ Set Kubeconfig and hosts
env:
KUBECONFIG_CONTENTS: ${{ secrets.PERF_KUBECONFIG }}
RUNNER_PATH: ${{ secrets.RUNNER_PATH }}
OCP_HOSTS: ${{ secrets.PERF_OCP_HOSTS }}
run: |
mkdir -p "$RUNNER_PATH/.kube/"
echo "$KUBECONFIG_CONTENTS" > "$RUNNER_PATH/.kube/config"
echo "KUBECONFIG_PATH=$RUNNER_PATH/.kube/config" >> "$GITHUB_ENV"
sudo tee -a /etc/hosts <<< "$OCP_HOSTS" > /dev/null
- name: ✔️ Run workload ${{ matrix.workload }}
env:
KUBEADMIN_PASSWORD: ${{ secrets.PERF_KUBEADMIN_PASSWORD }}
PIN_NODE_BENCHMARK_OPERATOR: ${{ secrets.PERF_PIN_NODE_BENCHMARK_OPERATOR }}
PIN_NODE1: ${{ secrets.PERF_PIN_NODE1 }}
PIN_NODE2: ${{ secrets.PERF_PIN_NODE2 }}
ELASTICSEARCH: ${{ secrets.PERF_ELASTICSEARCH }}
ELASTICSEARCH_PORT: ${{ secrets.PERF_ELASTICSEARCH_PORT }}
ELASTICSEARCH_USER: ${{ secrets.PERF_ELASTICSEARCH_USER }}
ELASTICSEARCH_PASSWORD: ${{ secrets.PERF_ELASTICSEARCH_PASSWORD }}
RUNNER_PATH: ${{ secrets.RUNNER_PATH }}
CONTAINER_KUBECONFIG_PATH: ${{ secrets.CONTAINER_KUBECONFIG_PATH }}
IBM_REGION_NAME: ${{ secrets.IBM_REGION_NAME }}
IBM_ENDPOINT_URL: ${{ secrets.IBM_ENDPOINT_URL }}
IBM_ACCESS_KEY_ID: ${{ secrets.IBM_ACCESS_KEY_ID }}
IBM_SECRET_ACCESS_KEY: ${{ secrets.IBM_SECRET_ACCESS_KEY }}
IBM_BUCKET: ${{ secrets.IBM_BUCKET }}
IBM_KEY: ${{ secrets.IBM_KEY }}
RUN_ARTIFACTS_URL: ${{ secrets.PERF_RUN_ARTIFACTS_URL }}
SCALE_NODES: ${{ secrets.PERF_SCALE_NODES }}
REDIS: ${{ secrets.REDIS }}
WORKER_DISK_IDS: ${{ secrets.PERF_WORKER_DISK_IDS }}
TIMEOUT: 8000
SCALE: 2
BOOTSTORM_SCALE: 80
WINDOWS_SCALE: 40
THREADS_LIMIT: 20
RUN_TYPE: 'perf_ci'
ENABLE_PROMETHEUS_SNAPSHOT: 'True'
WINDOWS_URL: ${{ secrets.PERF_WINDOWS_URL }}
run: |
build=$(pip freeze | grep benchmark-runner | sed 's/==/=/g')
build_version="$(cut -d'=' -f2 <<<"$build")"
echo '>>>>>>>>>>>>>>>>>>>>>>>>>> Start E2E workload: ${{ matrix.workload }} >>>>>>>>>>>>>>>>>>>>>>>>>>'
scp -r "$RUNNER_PATH/.kube/config" provision:"$CONTAINER_KUBECONFIG_PATH"
# SCALE: Run vdbench/ bootstorm
if [[ '${{ matrix.workload }}' == 'vdbench_pod_scale' || '${{ matrix.workload }}' == 'vdbench_kata_scale' || '${{ matrix.workload }}' == 'vdbench_vm_scale' || '${{ matrix.workload }}' == 'bootstorm_vm_scale' || '${{ matrix.workload }}' == 'windows_vm_scale' ]]
then
workload=$(awk -F_ '{print $1"_"$2}' <<< '${{ matrix.workload }}')
# bootstorm_vm_scale: no need redis for synchronization but need SCALE and THREADS_LIMIT
if [[ '${{ matrix.workload }}' == 'bootstorm_vm_scale' ]]
then
# Warm-up: Pull the Fedora image from quay.io for each node
ssh -t provision "podman run --rm -t -e WORKLOAD='$workload' -e KUBEADMIN_PASSWORD='$KUBEADMIN_PASSWORD' -e SCALE='$SCALE' -e SCALE_NODES=$SCALE_NODES -e REDIS='$REDIS' -e RUN_ARTIFACTS_URL='$RUN_ARTIFACTS_URL' -e BUILD_VERSION='$build_version' -e RUN_TYPE='$RUN_TYPE' -e KATA_CPUOFFLINE_WORKAROUND='True' -e SAVE_ARTIFACTS_LOCAL='False' -e ENABLE_PROMETHEUS_SNAPSHOT='$ENABLE_PROMETHEUS_SNAPSHOT' -e THREADS_LIMIT='$THREADS_LIMIT' -e WINDOWS_URL='$WINDOWS_URL' -e TIMEOUT='$TIMEOUT' -e log_level='INFO' -v '$CONTAINER_KUBECONFIG_PATH':'$CONTAINER_KUBECONFIG_PATH' --privileged 'quay.io/ebattat/benchmark-runner:latest'"
SCALE=$BOOTSTORM_SCALE
elif [[ '${{ matrix.workload }}' == 'windows_vm_scale' ]]
then
# Warm-up is not required because the DV is loaded before running the test
SCALE=$WINDOWS_SCALE
fi
# SCALE_NODES is a list, not add ''
ssh -t provision "podman run --rm -t -e WORKLOAD='$workload' -e KUBEADMIN_PASSWORD='$KUBEADMIN_PASSWORD' -e SCALE='$SCALE' -e SCALE_NODES=$SCALE_NODES -e REDIS='$REDIS' -e PIN_NODE_BENCHMARK_OPERATOR='$PIN_NODE_BENCHMARK_OPERATOR' -e PIN_NODE1='$PIN_NODE1' -e PIN_NODE2='$PIN_NODE2' -e ELASTICSEARCH='$ELASTICSEARCH' -e ELASTICSEARCH_PORT='$ELASTICSEARCH_PORT' -e ELASTICSEARCH_USER='$ELASTICSEARCH_USER' -e ELASTICSEARCH_PASSWORD='$ELASTICSEARCH_PASSWORD' -e IBM_REGION_NAME='$IBM_REGION_NAME' -e IBM_ENDPOINT_URL='$IBM_ENDPOINT_URL' -e IBM_ACCESS_KEY_ID='$IBM_ACCESS_KEY_ID' -e IBM_SECRET_ACCESS_KEY='$IBM_SECRET_ACCESS_KEY' -e IBM_BUCKET='$IBM_BUCKET' -e IBM_KEY='$IBM_KEY' -e RUN_ARTIFACTS_URL='$RUN_ARTIFACTS_URL' -e BUILD_VERSION='$build_version' -e RUN_TYPE='$RUN_TYPE' -e KATA_CPUOFFLINE_WORKAROUND='True' -e SAVE_ARTIFACTS_LOCAL='False' -e ENABLE_PROMETHEUS_SNAPSHOT='$ENABLE_PROMETHEUS_SNAPSHOT' -e THREADS_LIMIT='$THREADS_LIMIT' -e WINDOWS_URL='$WINDOWS_URL' -e TIMEOUT='$TIMEOUT' -e log_level='INFO' -v '$CONTAINER_KUBECONFIG_PATH':'$CONTAINER_KUBECONFIG_PATH' --privileged 'quay.io/ebattat/benchmark-runner:latest'"
else
if [[ '${{ matrix.workload }}' == 'clusterbuster' ]]
then
# clusterbuster: disable prometheus logs
ENABLE_PROMETHEUS_SNAPSHOT='False'
fi
ssh -t provision "podman run --rm -t -e WORKLOAD='${{ matrix.workload }}' -e KUBEADMIN_PASSWORD='$KUBEADMIN_PASSWORD' -e PIN_NODE_BENCHMARK_OPERATOR='$PIN_NODE_BENCHMARK_OPERATOR' -e PIN_NODE1='$PIN_NODE1' -e PIN_NODE2='$PIN_NODE2' -e ELASTICSEARCH='$ELASTICSEARCH' -e ELASTICSEARCH_PORT='$ELASTICSEARCH_PORT' -e ELASTICSEARCH_USER='$ELASTICSEARCH_USER' -e ELASTICSEARCH_PASSWORD='$ELASTICSEARCH_PASSWORD' -e IBM_REGION_NAME='$IBM_REGION_NAME' -e IBM_ENDPOINT_URL='$IBM_ENDPOINT_URL' -e IBM_ACCESS_KEY_ID='$IBM_ACCESS_KEY_ID' -e IBM_SECRET_ACCESS_KEY='$IBM_SECRET_ACCESS_KEY' -e IBM_BUCKET='$IBM_BUCKET' -e IBM_KEY='$IBM_KEY' -e RUN_ARTIFACTS_URL='$RUN_ARTIFACTS_URL' -e BUILD_VERSION='$build_version' -e RUN_TYPE='$RUN_TYPE' -e KATA_CPUOFFLINE_WORKAROUND='True' -e SAVE_ARTIFACTS_LOCAL='False' -e ENABLE_PROMETHEUS_SNAPSHOT='$ENABLE_PROMETHEUS_SNAPSHOT' -e WORKER_DISK_IDS=$WORKER_DISK_IDS -e TIMEOUT='$TIMEOUT' -e log_level='INFO' -v '$CONTAINER_KUBECONFIG_PATH':'$CONTAINER_KUBECONFIG_PATH' --privileged 'quay.io/ebattat/benchmark-runner:latest'"
fi
ssh -t provision "podman rmi -f 'quay.io/ebattat/benchmark-runner:latest'"
echo '>>>>>>>>>>>>>>>>>>>>>>>>>> End E2E workload: ${{ matrix.workload }} >>>>>>>>>>>>>>>>>>>>>>>>>>>>'
- id: job_step
if: always()
run: |
if [[ "${{ job.status }}" == "failure" || "${{ job.status }}" == "cancelled" ]]; then echo "status=${{ job.status }}" >> $GITHUB_OUTPUT; fi
finalize_nightly:
name: finalize nightly
runs-on: ubuntu-latest
if: always()
needs: [initialize_nightly, workload]
steps:
- uses: actions/checkout@v3
- name: Set up Python 3.10
uses: actions/setup-python@v4
with:
python-version: '3.10'
- name: ⚙ Set START CI TIME
run: echo "START_CI=${{ needs.initialize_nightly.outputs.start_time_output }}" >> "$GITHUB_ENV"
- name: Install latest benchmark-runner
run: |
python -m pip install --upgrade pip
pip install benchmark-runner
- name: ⚙️ Set SSH key
env:
PROVISION_PRIVATE_KEY: ${{ secrets.PERF_PROVISION_PRIVATE_KEY }}
RUNNER_PATH: ${{ secrets.RUNNER_PATH }}
PROVISION_IP: ${{ secrets.PERF_PROVISION_IP }}
PROVISION_USER: ${{ secrets.PERF_PROVISION_USER }}
run: |
mkdir -p "$RUNNER_PATH/.ssh/"
echo "$PROVISION_PRIVATE_KEY" > $RUNNER_PATH/private_key.txt
sudo chmod 600 $RUNNER_PATH/private_key.txt
echo "PROVISION_PRIVATE_KEY_PATH=$RUNNER_PATH/private_key.txt" >> "$GITHUB_ENV"
cat >> "$RUNNER_PATH/.ssh/config" <<END
Host provision
HostName $PROVISION_IP
User $PROVISION_USER
IdentityFile $RUNNER_PATH/private_key.txt
StrictHostKeyChecking no
ServerAliveInterval 30
ServerAliveCountMax 5
END
- name: ⚙️ Set Kubeconfig and hosts
env:
KUBECONFIG_CONTENTS: ${{ secrets.PERF_KUBECONFIG }}
RUNNER_PATH: ${{ secrets.RUNNER_PATH }}
OCP_HOSTS: ${{ secrets.PERF_OCP_HOSTS }}
run: |
mkdir -p "$RUNNER_PATH/.kube/"
echo "$KUBECONFIG_CONTENTS" > "$RUNNER_PATH/.kube/config"
echo "KUBECONFIG_PATH=$RUNNER_PATH/.kube/config" >> "$GITHUB_ENV"
sudo tee -a /etc/hosts <<< "$OCP_HOSTS" > /dev/null
- name: ✔️ Update status
env:
KUBEADMIN_PASSWORD: ${{ secrets.PERF_KUBEADMIN_PASSWORD }}
PIN_NODE_BENCHMARK_OPERATOR: ${{ secrets.PERF_PIN_NODE_BENCHMARK_OPERATOR }}
PIN_NODE1: ${{ secrets.PERF_PIN_NODE1 }}
PIN_NODE2: ${{ secrets.PERF_PIN_NODE2 }}
ELASTICSEARCH: ${{ secrets.PERF_ELASTICSEARCH }}
ELASTICSEARCH_PORT: ${{ secrets.PERF_ELASTICSEARCH_PORT }}
ELASTICSEARCH_USER: ${{ secrets.PERF_ELASTICSEARCH_USER }}
ELASTICSEARCH_PASSWORD: ${{ secrets.PERF_ELASTICSEARCH_PASSWORD }}
RUNNER_PATH: ${{ secrets.RUNNER_PATH }}
CONTAINER_KUBECONFIG_PATH: ${{ secrets.CONTAINER_KUBECONFIG_PATH }}
RUN_TYPE: 'perf_ci'
run: |
# get repository last id
declare -a repositories=('redhat-performance/benchmark-runner' 'cloud-bulldozer/benchmark-operator' 'cloud-bulldozer/benchmark-wrapper')
for repository in "${repositories[@]}"
do
git clone "https://github.com/$repository" "$RUNNER_PATH/$repository"
pushd "$RUNNER_PATH/$repository"
case $repository in
'redhat-performance/benchmark-runner')
echo "BENCHMARK_RUNNER_ID=$(git rev-parse @)" >> "$GITHUB_ENV"
BENCHMARK_RUNNER_ID=$(git rev-parse @)
echo $BENCHMARK_RUNNER_ID
;;
'cloud-bulldozer/benchmark-operator')
echo "BENCHMARK_OPERATOR_ID=$(git rev-parse @)" >> "$GITHUB_ENV"
BENCHMARK_OPERATOR_ID=$(git rev-parse @)
echo $BENCHMARK_OPERATOR_ID
;;
'cloud-bulldozer/benchmark-wrapper')
echo "BENCHMARK_WRAPPER_ID=$(git rev-parse @)" >> "$GITHUB_ENV"
BENCHMARK_WRAPPER_ID=$(git rev-parse @)
echo $BENCHMARK_WRAPPER_ID
;;
esac
done
build=$(pip freeze | grep benchmark-runner | sed 's/==/=/g')
build_version="$(cut -d'=' -f2 <<<"$build")"
end=$(printf '%(%s)T' -1)
ci_minutes_time=$(( (end - START_CI) / 60))
# Check for workload failure or success => return pass/failed
if [[ "${{needs.workload.outputs.job_status}}" == "failure" || "${{needs.workload.outputs.job_status}}" == "cancelled" ]]; then status="failed"; else status="pass"; fi
echo '>>>>>>>>>>>>>>>>>>>>>>>>>> Update CI status $status >>>>>>>>>>>>>>>>>>>>>>>>>>'
podman run --rm -e KUBEADMIN_PASSWORD="$KUBEADMIN_PASSWORD" -e PIN_NODE_BENCHMARK_OPERATOR="$PIN_NODE_BENCHMARK_OPERATOR" -e PIN_NODE1="$PIN_NODE1" -e PIN_NODE2="$PIN_NODE2" -e ELASTICSEARCH="$ELASTICSEARCH" -e ELASTICSEARCH_PORT="$ELASTICSEARCH_PORT" -e ELASTICSEARCH_USER="$ELASTICSEARCH_USER" -e ELASTICSEARCH_PASSWORD="$ELASTICSEARCH_PASSWORD" -e BUILD_VERSION="$build_version" -e CI_STATUS="$status" -e CI_MINUTES_TIME="$ci_minutes_time" -e BENCHMARK_RUNNER_ID="$BENCHMARK_RUNNER_ID" -e BENCHMARK_OPERATOR_ID="$BENCHMARK_OPERATOR_ID" -e BENCHMARK_WRAPPER_ID="$BENCHMARK_WRAPPER_ID" -e RUN_TYPE="$RUN_TYPE" -e TIMEOUT="3600" -e log_level="INFO" -v "$KUBECONFIG_PATH":"$CONTAINER_KUBECONFIG_PATH" --privileged "quay.io/ebattat/benchmark-runner:latest"
echo '>>>>>>>>>>>>>>>>>>>>>>>>>> Remove image on provision >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>'
echo "if [[ \"\$(sudo podman images -q quay.io/ebattat/benchmark-runner 2> /dev/null)\" != \"\" ]]; then sudo podman rmi -f \$(sudo podman images -q quay.io/ebattat/benchmark-runner 2> /dev/null); fi" > "$RUNNER_PATH/remove_image.sh"
scp -r "$RUNNER_PATH/remove_image.sh" provision:"/tmp/remove_image.sh"
ssh -t provision "chmod +x /tmp/remove_image.sh;/tmp/./remove_image.sh;rm -f /tmp/remove_image.sh"