Skip to content

Equinix k8s Action churn check #176

Equinix k8s Action churn check

Equinix k8s Action churn check #176

name: Equinix k8s Action churn check
on:
workflow_dispatch:
inputs:
termination_time:
description: 'Cluster termination time in hours after now'
required: true
default: '8'
control_plan:
description: 'control plane machine type'
required: false
default: 'c3.small.x86'
node_plan:
description: 'node machine type'
required: false
default: 'c3.small.x86'
metro:
description: 'metro location'
required: false
default: 'da'
schedule:
- cron: '0 18 * * *'
permissions:
pull-requests: write
contents: write
repository-projects: write
packages: write
jobs:
Create-k8s-cluster:
name: "Create Cluster"
uses: ./.github/workflows/create_equinix_k8s_cluster.yml
secrets: inherit
with:
termination_time: ${{ github.event.inputs.termination_time || '8' }}
control_plan: ${{ github.event.inputs.control_plan }}
node_plan: ${{ github.event.inputs.node_plan }}
metro: ${{ github.event.inputs.metro }}
Test-K8s:
name: "Test K8s"
needs: Create-k8s-cluster
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Config kube
uses: ./.github/actions/kube_config
with:
## kube config
artifact_name: kubeconfig-${{ github.run_id }}
- name: Install Prometheus
run: |
git clone --depth 1 https://github.com/prometheus-operator/kube-prometheus; cd kube-prometheus;
kubectl apply --server-side --validate=false -f manifests/setup
kubectl apply --validate=false -f manifests/
until kubectl -n monitoring get statefulset prometheus-k8s; do kubectl get all -n monitoring; echo "StatefulSet not created yet, waiting..."; sleep 5; done
kubectl wait deployments -n monitoring prometheus-adapter --for=condition=available --timeout 3m
kubectl rollout status --watch --timeout=600s statefulset -n monitoring prometheus-k8s
- name: Pull kube-burner
run: |
git clone https://github.com/kube-burner/kube-burner
curl -sS -L "https://github.com/kube-burner/kube-burner/releases/download/v1.9.5/kube-burner-V1.9.5-linux-x86_64.tar.gz" | tar -xzC kube-burner/ kube-burner
- name: Port forward prom
run: |
kubectl port-forward -n monitoring svc/prometheus-k8s 9090:9090 &
- name: Run node-density w/ churn w/o kepler
run: |
cp kube-burner/examples/metrics-profiles/metrics.yaml kube-burner/examples/workloads/kubelet-density/metrics.yaml
cd kube-burner/examples/workloads/kubelet-density
cp kubelet-density.yml kubelet-density-churn.yml
sed -i 's/qps: 2/qps: 20/g' kubelet-density-churn.yml
sed -i 's/burst: 2/burst: 20/g' kubelet-density-churn.yml
sed -i 's/jobIterations: 25/jobIterations: 80/g' kubelet-density-churn.yml
echo "indexers:" >> kubelet-density-churn.yml
echo " - type: local" >> kubelet-density-churn.yml
echo " metricsDirectory: collected-metrics" >> kubelet-density-churn.yml
sed -i 's/namespacedIterations: false/namespacedIterations: true/g' kubelet-density-churn.yml
sed -i '/namespacedIterations: true/a\
iterationsPerNamespace: 1\
churn: true\
churnCycles: 0\
churnDuration: 1h0m0s\
churnPercent: 10\
churnDelay: 2m0s\
churnDeletionStrategy: default\
' kubelet-density-churn.yml
export START_TIME=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
../../../kube-burner init -c kubelet-density-churn.yml -u http://localhost:9090 --metrics-profile metrics.yaml
# sleep a min after test
sleep 60
export END_TIME=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
touch /tmp/nokepler-churn-node-cpu.json
# Retrieve metrics related to node cpu utilization over time
curl -G 'http://localhost:9090/api/v1/query_range' \
--data-urlencode 'query=(100 * avg by (instance,mode) (rate(node_cpu_seconds_total{mode!="idle"}[1m])))' \
--data-urlencode "start=${START_TIME}" --data-urlencode "end=${END_TIME}" --data-urlencode "step=5" | tee /tmp/nokepler-churn-node-cpu.json
# Retrieve the kube-apiserver cpu and memory utilization over time
curl -G 'http://localhost:9090/api/v1/query_range' \
--data-urlencode 'query=sum (rate (container_cpu_usage_seconds_total{namespace="kube-system", pod=~"kube-apiserver-.*"}[1m])) / sum(machine_cpu_cores) * 100' \
--data-urlencode "start=${START_TIME}" --data-urlencode "end=${END_TIME}" --data-urlencode "step=5" \
| jq -r '.data.result[0].values[] | @tsv' | awk '{printf "Time: %s, Utilization: %s%%\n", strftime("%Y-%m-%d %H:%M:%S", $1), $2}' \
| tee -a /tmp/nokepler-stress-test-kube-apiserver-churn-metrics.txt
- name: Run kube-burner node-density w/o churn w/o kepler
run: |
cp kube-burner/examples/metrics-profiles/metrics.yaml kube-burner/examples/workloads/kubelet-density/metrics.yaml
cd kube-burner/examples/workloads/kubelet-density
sed -i 's/qps: 2/qps: 20/g' kubelet-density.yml
sed -i 's/burst: 2/burst: 20/g' kubelet-density.yml
sed -i 's/jobIterations: 25/jobIterations: 80/g' kubelet-density.yml
echo "indexers:" >> kubelet-density.yml
echo " - type: local" >> kubelet-density.yml
echo " metricsDirectory: collected-metrics" >> kubelet-density.yml
export START_TIME=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
../../../kube-burner init -c kubelet-density.yml -u http://localhost:9090 --metrics-profile metrics.yaml
# sleep a min after test
sleep 60
export END_TIME=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
touch /tmp/nokepler-node-cpu.json
# Retrieve metrics related to node cpu utilization over time
curl -G 'http://localhost:9090/api/v1/query_range' \
--data-urlencode 'query=(100 * avg by (instance,mode) (rate(node_cpu_seconds_total{mode!="idle"}[1m])))' \
--data-urlencode "start=${START_TIME}" --data-urlencode "end=${END_TIME}" --data-urlencode "step=5" | tee /tmp/nokepler-node-cpu.json
# Retrieve the kube-apiserver cpu and memory utilization over time
curl -G 'http://localhost:9090/api/v1/query_range' \
--data-urlencode 'query=sum (rate (container_cpu_usage_seconds_total{namespace="kube-system", pod=~"kube-apiserver-.*"}[1m])) / sum(machine_cpu_cores) * 100' \
--data-urlencode "start=${START_TIME}" --data-urlencode "end=${END_TIME}" --data-urlencode "step=5" \
| jq -r '.data.result[0].values[] | @tsv' | awk '{printf "Time: %s, Utilization: %s%%\n", strftime("%Y-%m-%d %H:%M:%S", $1), $2}' \
| tee -a /tmp/nokepler-stress-test-kube-apiserver-metrics.txt
- name: Install Kepler helm chart
run: |
# Install helm binary first
curl -fsSL -o get_helm.sh https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3
chmod 700 get_helm.sh
./get_helm.sh
export PATH=$PATH:/usr/local/bin
helm repo add kepler https://sustainable-computing-io.github.io/kepler-helm-chart
helm repo update
helm install kepler kepler/kepler --namespace kepler --create-namespace --set serviceMonitor.enabled=true --set image.tag=latest \
--set extraEnvVars.EXPOSE_COMPONENT_POWER="false" --set extraEnvVars.EXPOSE_ESTIMATED_IDLE_POWER_METRICS="true" --set extraEnvVars.EXPOSE_BPF_METRICS="false" \
--set extraEnvVars.EXPERIMENTAL_BPF_SAMPLE_RATE=1000
- name: Allow prometheus to scrape kepler metrics in kepler namespace
run: |
curl -O https://raw.githubusercontent.com/sustainable-computing-io/kepler/main/manifests/k8s/config/rbac/prometheus_role.yaml
sed -i 's/namespace: system/namespace: kepler/' prometheus_role.yaml
kubectl apply -f prometheus_role.yaml
kubectl apply -f - <<EOF
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
labels:
app.kubernetes.io/component: prometheus
app.kubernetes.io/instance: k8s
app.kubernetes.io/name: prometheus
sustainable-computing.io/app: kepler
name: prometheus-k8s
namespace: kepler
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: prometheus-k8s
subjects:
- kind: ServiceAccount
name: prometheus-k8s
namespace: monitoring
EOF
- name: Wait for Kepler metrics kepler_node_package_joules_total is not empty through Prometheus query
run: |
${GITHUB_WORKSPACE}/util/wait_for_prometheus.sh
- name: Run kube-burner node-density w/o churn w/kepler
run: |
cp kube-burner/examples/metrics-profiles/metrics.yaml kube-burner/examples/workloads/kubelet-density/metrics.yaml
cd kube-burner/examples/workloads/kubelet-density
export START_TIME=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
../../../kube-burner init -c kubelet-density.yml -u http://localhost:9090 --metrics-profile metrics.yaml
# sleep a min after test
sleep 60
export END_TIME=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
touch /tmp/kepler-stress-test-metrics.txt
# Retrieve metrics related to node cpu utilization over time
curl -G 'http://localhost:9090/api/v1/query_range' \
--data-urlencode 'query=(100 * avg by (instance,mode) (rate(node_cpu_seconds_total{mode!="idle"}[1m])))' \
--data-urlencode "start=${START_TIME}" --data-urlencode "end=${END_TIME}" --data-urlencode "step=5" | tee -a /tmp/kepler-node-cpu.json
# Retrieve the kube-apiserver cpu and memory utilization over time
curl -G 'http://localhost:9090/api/v1/query_range' \
--data-urlencode 'query=sum (rate (container_cpu_usage_seconds_total{namespace="kube-system", pod=~"kube-apiserver-.*"}[1m])) / sum(machine_cpu_cores) * 100' \
--data-urlencode "start=${START_TIME}" --data-urlencode "end=${END_TIME}" --data-urlencode "step=5" \
| jq -r '.data.result[0].values[] | @tsv' | awk '{printf "Time: %s, Utilization: %s%%\n", strftime("%Y-%m-%d %H:%M:%S", $1), $2}' \
| tee -a /tmp/kepler-stress-test-kube-apiserver-metrics.txt
# Retrieve metrics related to Kepler cpu and memory utilization over time
curl -G 'http://localhost:9090/api/v1/query_range' \
--data-urlencode 'query=sum (rate (container_cpu_usage_seconds_total{namespace="kepler"}[1m])) / sum(machine_cpu_cores) * 100' \
--data-urlencode "start=${START_TIME}" --data-urlencode "end=${END_TIME}" --data-urlencode "step=5" \
| jq -r '.data.result[0].values[] | @tsv' | awk '{printf "Time: %s, Utilization: %s%%\n", strftime("%Y-%m-%d %H:%M:%S", $1), $2}' \
| tee /tmp/kepler-stress-test-metrics.txt
- name: Run kube-burner node-density w/ churn w/ kepler
run: |
cp kube-burner/examples/metrics-profiles/metrics.yaml kube-burner/examples/workloads/kubelet-density/metrics.yaml
cd kube-burner/examples/workloads/kubelet-density
export START_TIME=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
../../../kube-burner init -c kubelet-density-churn.yml -u http://localhost:9090 --metrics-profile metrics.yaml
# sleep a min after test
sleep 60
export END_TIME=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
touch /tmp/kepler-churn-node-cpu.json
# Retrieve metrics related to node cpu utilization over time
curl -G 'http://localhost:9090/api/v1/query_range' \
--data-urlencode 'query=(100 * avg by (instance,mode) (rate(node_cpu_seconds_total{mode!="idle"}[1m])))' \
--data-urlencode "start=${START_TIME}" --data-urlencode "end=${END_TIME}" --data-urlencode "step=5" | tee /tmp/kepler-churn-node-cpu.json
# Retrieve the kube-apiserver cpu and memory utilization over time
curl -G 'http://localhost:9090/api/v1/query_range' \
--data-urlencode 'query=sum (rate (container_cpu_usage_seconds_total{namespace="kube-system", pod=~"kube-apiserver-.*"}[1m])) / sum(machine_cpu_cores) * 100' \
--data-urlencode "start=${START_TIME}" --data-urlencode "end=${END_TIME}" --data-urlencode "step=5" \
| jq -r '.data.result[0].values[] | @tsv' | awk '{printf "Time: %s, Utilization: %s%%\n", strftime("%Y-%m-%d %H:%M:%S", $1), $2}' \
| tee -a /tmp/kepler-stress-test-kube-apiserver-churn-metrics.txt
# Retrieve metrics related to Kepler cpu and memory utilization over time
curl -G 'http://localhost:9090/api/v1/query_range' \
--data-urlencode 'query=sum (rate (container_cpu_usage_seconds_total{namespace="kepler"}[1m])) / sum(machine_cpu_cores) * 100' \
--data-urlencode "start=${START_TIME}" --data-urlencode "end=${END_TIME}" --data-urlencode "step=5" \
| jq -r '.data.result[0].values[] | @tsv' | awk '{printf "Time: %s, Utilization: %s%%\n", strftime("%Y-%m-%d %H:%M:%S", $1), $2}' \
| tee -a /tmp/kepler-stress-test-churn-metrics.txt
- name: git add the non-churn metrics to docs
run: |
cd ${GITHUB_WORKSPACE}
git config --global user.email "dependabot[bot]@users.noreply.github.com"
git config --global user.name "dependabot[bot]"
END_TIME=$(date -u +"%Y-%m-%dT%H:%M:%SZ"); echo -n "| " $END_TIME >> docs/kepler-stress-test-baselined-metrics.md
MEAN_SD_OUTPUT=$(./util/calc_mean_sd.awk /tmp/kepler-stress-test-metrics.txt)
KUBE_APISERVER_WITHOUT_KEPLER_MEAN_SD_OUTPUT=$(./util/calc_mean_sd.awk /tmp/nokepler-stress-test-kube-apiserver-metrics.txt)
KUBE_APISERVER_WITH_KEPLER_MEAN_SD_OUTPUT=$(./util/calc_mean_sd.awk /tmp/kepler-stress-test-kube-apiserver-metrics.txt)
WITHOUT_KEPLER_MEAN=$(echo $KUBE_APISERVER_WITHOUT_KEPLER_MEAN_SD_OUTPUT | awk -F'|' '{print $2}' | sed 's/%//g')
WITH_KEPLER_MEAN=$(echo $KUBE_APISERVER_WITH_KEPLER_MEAN_SD_OUTPUT | awk -F'|' '{print $2}' | sed 's/%//g')
MEAN_DIFFERENCE=$(echo "scale=10; $WITH_KEPLER_MEAN - $WITHOUT_KEPLER_MEAN" | bc)
KUBE_APISERVER_MEAN_DIFFERENCE=$(printf "| %.10f%% |" $MEAN_DIFFERENCE)
echo $MEAN_SD_OUTPUT " " $KUBE_APISERVER_WITHOUT_KEPLER_MEAN_SD_OUTPUT " " $KUBE_APISERVER_WITH_KEPLER_MEAN_SD_OUTPUT " " $KUBE_APISERVER_MEAN_DIFFERENCE >> docs/kepler-stress-test-baselined-metrics.md
git add docs/kepler-stress-test-baselined-metrics.md
git commit -m "kepler stress test metrics result at ${END_TIME}" -s
git pull --rebase
git push origin main
env:
GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
- name: git add the churn metrics to docs
run: |
cd ${GITHUB_WORKSPACE}
git config --global user.email "dependabot[bot]@users.noreply.github.com"
git config --global user.name "dependabot[bot]"
END_TIME=$(date -u +"%Y-%m-%dT%H:%M:%SZ"); echo -n "| " $END_TIME >> docs/kepler-stress-test-baselined-metrics-churn.md
MEAN_SD_OUTPUT=$(./util/calc_mean_sd.awk /tmp/kepler-stress-test-churn-metrics.txt)
KUBE_APISERVER_WITHOUT_KEPLER_MEAN_SD_OUTPUT=$(./util/calc_mean_sd.awk /tmp/nokepler-stress-test-kube-apiserver-churn-metrics.txt)
KUBE_APISERVER_WITH_KEPLER_MEAN_SD_OUTPUT=$(./util/calc_mean_sd.awk /tmp/kepler-stress-test-kube-apiserver-churn-metrics.txt)
WITHOUT_KEPLER_MEAN=$(echo $KUBE_APISERVER_WITHOUT_KEPLER_MEAN_SD_OUTPUT | awk -F'|' '{print $2}' | sed 's/%//g')
WITH_KEPLER_MEAN=$(echo $KUBE_APISERVER_WITH_KEPLER_MEAN_SD_OUTPUT | awk -F'|' '{print $2}' | sed 's/%//g')
MEAN_DIFFERENCE=$(echo "scale=10; $WITH_KEPLER_MEAN - $WITHOUT_KEPLER_MEAN" | bc)
KUBE_APISERVER_MEAN_DIFFERENCE=$(printf "| %.10f%% |" $MEAN_DIFFERENCE)
echo $MEAN_SD_OUTPUT " " $KUBE_APISERVER_WITHOUT_KEPLER_MEAN_SD_OUTPUT " " $KUBE_APISERVER_WITH_KEPLER_MEAN_SD_OUTPUT " " $KUBE_APISERVER_MEAN_DIFFERENCE >> docs/kepler-stress-test-baselined-metrics-churn.md
git add docs/kepler-stress-test-baselined-metrics-churn.md
git commit -m "kepler stress test metrics result at ${END_TIME}" -s
git pull --rebase
git push origin main
env:
GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}