Equinix k8s Action churn check #176
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Equinix k8s Action churn check | |
on: | |
workflow_dispatch: | |
inputs: | |
termination_time: | |
description: 'Cluster termination time in hours after now' | |
required: true | |
default: '8' | |
control_plan: | |
description: 'control plane machine type' | |
required: false | |
default: 'c3.small.x86' | |
node_plan: | |
description: 'node machine type' | |
required: false | |
default: 'c3.small.x86' | |
metro: | |
description: 'metro location' | |
required: false | |
default: 'da' | |
schedule: | |
- cron: '0 18 * * *' | |
permissions: | |
pull-requests: write | |
contents: write | |
repository-projects: write | |
packages: write | |
jobs: | |
Create-k8s-cluster: | |
name: "Create Cluster" | |
uses: ./.github/workflows/create_equinix_k8s_cluster.yml | |
secrets: inherit | |
with: | |
termination_time: ${{ github.event.inputs.termination_time || '8' }} | |
control_plan: ${{ github.event.inputs.control_plan }} | |
node_plan: ${{ github.event.inputs.node_plan }} | |
metro: ${{ github.event.inputs.metro }} | |
Test-K8s: | |
name: "Test K8s" | |
needs: Create-k8s-cluster | |
runs-on: ubuntu-latest | |
steps: | |
- name: Checkout code | |
uses: actions/checkout@v4 | |
- name: Config kube | |
uses: ./.github/actions/kube_config | |
with: | |
## kube config | |
artifact_name: kubeconfig-${{ github.run_id }} | |
- name: Install Prometheus | |
run: | | |
git clone --depth 1 https://github.com/prometheus-operator/kube-prometheus; cd kube-prometheus; | |
kubectl apply --server-side --validate=false -f manifests/setup | |
kubectl apply --validate=false -f manifests/ | |
until kubectl -n monitoring get statefulset prometheus-k8s; do kubectl get all -n monitoring; echo "StatefulSet not created yet, waiting..."; sleep 5; done | |
kubectl wait deployments -n monitoring prometheus-adapter --for=condition=available --timeout 3m | |
kubectl rollout status --watch --timeout=600s statefulset -n monitoring prometheus-k8s | |
- name: Pull kube-burner | |
run: | | |
git clone https://github.com/kube-burner/kube-burner | |
curl -sS -L "https://github.com/kube-burner/kube-burner/releases/download/v1.9.5/kube-burner-V1.9.5-linux-x86_64.tar.gz" | tar -xzC kube-burner/ kube-burner | |
- name: Port forward prom | |
run: | | |
kubectl port-forward -n monitoring svc/prometheus-k8s 9090:9090 & | |
- name: Run node-density w/ churn w/o kepler | |
run: | | |
cp kube-burner/examples/metrics-profiles/metrics.yaml kube-burner/examples/workloads/kubelet-density/metrics.yaml | |
cd kube-burner/examples/workloads/kubelet-density | |
cp kubelet-density.yml kubelet-density-churn.yml | |
sed -i 's/qps: 2/qps: 20/g' kubelet-density-churn.yml | |
sed -i 's/burst: 2/burst: 20/g' kubelet-density-churn.yml | |
sed -i 's/jobIterations: 25/jobIterations: 80/g' kubelet-density-churn.yml | |
echo "indexers:" >> kubelet-density-churn.yml | |
echo " - type: local" >> kubelet-density-churn.yml | |
echo " metricsDirectory: collected-metrics" >> kubelet-density-churn.yml | |
sed -i 's/namespacedIterations: false/namespacedIterations: true/g' kubelet-density-churn.yml | |
sed -i '/namespacedIterations: true/a\ | |
iterationsPerNamespace: 1\ | |
churn: true\ | |
churnCycles: 0\ | |
churnDuration: 1h0m0s\ | |
churnPercent: 10\ | |
churnDelay: 2m0s\ | |
churnDeletionStrategy: default\ | |
' kubelet-density-churn.yml | |
export START_TIME=$(date -u +"%Y-%m-%dT%H:%M:%SZ") | |
../../../kube-burner init -c kubelet-density-churn.yml -u http://localhost:9090 --metrics-profile metrics.yaml | |
# sleep a min after test | |
sleep 60 | |
export END_TIME=$(date -u +"%Y-%m-%dT%H:%M:%SZ") | |
touch /tmp/nokepler-churn-node-cpu.json | |
# Retrieve metrics related to node cpu utilization over time | |
curl -G 'http://localhost:9090/api/v1/query_range' \ | |
--data-urlencode 'query=(100 * avg by (instance,mode) (rate(node_cpu_seconds_total{mode!="idle"}[1m])))' \ | |
--data-urlencode "start=${START_TIME}" --data-urlencode "end=${END_TIME}" --data-urlencode "step=5" | tee /tmp/nokepler-churn-node-cpu.json | |
# Retrieve the kube-apiserver cpu and memory utilization over time | |
curl -G 'http://localhost:9090/api/v1/query_range' \ | |
--data-urlencode 'query=sum (rate (container_cpu_usage_seconds_total{namespace="kube-system", pod=~"kube-apiserver-.*"}[1m])) / sum(machine_cpu_cores) * 100' \ | |
--data-urlencode "start=${START_TIME}" --data-urlencode "end=${END_TIME}" --data-urlencode "step=5" \ | |
| jq -r '.data.result[0].values[] | @tsv' | awk '{printf "Time: %s, Utilization: %s%%\n", strftime("%Y-%m-%d %H:%M:%S", $1), $2}' \ | |
| tee -a /tmp/nokepler-stress-test-kube-apiserver-churn-metrics.txt | |
- name: Run kube-burner node-density w/o churn w/o kepler | |
run: | | |
cp kube-burner/examples/metrics-profiles/metrics.yaml kube-burner/examples/workloads/kubelet-density/metrics.yaml | |
cd kube-burner/examples/workloads/kubelet-density | |
sed -i 's/qps: 2/qps: 20/g' kubelet-density.yml | |
sed -i 's/burst: 2/burst: 20/g' kubelet-density.yml | |
sed -i 's/jobIterations: 25/jobIterations: 80/g' kubelet-density.yml | |
echo "indexers:" >> kubelet-density.yml | |
echo " - type: local" >> kubelet-density.yml | |
echo " metricsDirectory: collected-metrics" >> kubelet-density.yml | |
export START_TIME=$(date -u +"%Y-%m-%dT%H:%M:%SZ") | |
../../../kube-burner init -c kubelet-density.yml -u http://localhost:9090 --metrics-profile metrics.yaml | |
# sleep a min after test | |
sleep 60 | |
export END_TIME=$(date -u +"%Y-%m-%dT%H:%M:%SZ") | |
touch /tmp/nokepler-node-cpu.json | |
# Retrieve metrics related to node cpu utilization over time | |
curl -G 'http://localhost:9090/api/v1/query_range' \ | |
--data-urlencode 'query=(100 * avg by (instance,mode) (rate(node_cpu_seconds_total{mode!="idle"}[1m])))' \ | |
--data-urlencode "start=${START_TIME}" --data-urlencode "end=${END_TIME}" --data-urlencode "step=5" | tee /tmp/nokepler-node-cpu.json | |
# Retrieve the kube-apiserver cpu and memory utilization over time | |
curl -G 'http://localhost:9090/api/v1/query_range' \ | |
--data-urlencode 'query=sum (rate (container_cpu_usage_seconds_total{namespace="kube-system", pod=~"kube-apiserver-.*"}[1m])) / sum(machine_cpu_cores) * 100' \ | |
--data-urlencode "start=${START_TIME}" --data-urlencode "end=${END_TIME}" --data-urlencode "step=5" \ | |
| jq -r '.data.result[0].values[] | @tsv' | awk '{printf "Time: %s, Utilization: %s%%\n", strftime("%Y-%m-%d %H:%M:%S", $1), $2}' \ | |
| tee -a /tmp/nokepler-stress-test-kube-apiserver-metrics.txt | |
- name: Install Kepler helm chart | |
run: | | |
# Install helm binary first | |
curl -fsSL -o get_helm.sh https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | |
chmod 700 get_helm.sh | |
./get_helm.sh | |
export PATH=$PATH:/usr/local/bin | |
helm repo add kepler https://sustainable-computing-io.github.io/kepler-helm-chart | |
helm repo update | |
helm install kepler kepler/kepler --namespace kepler --create-namespace --set serviceMonitor.enabled=true --set image.tag=latest \ | |
--set extraEnvVars.EXPOSE_COMPONENT_POWER="false" --set extraEnvVars.EXPOSE_ESTIMATED_IDLE_POWER_METRICS="true" --set extraEnvVars.EXPOSE_BPF_METRICS="false" \ | |
--set extraEnvVars.EXPERIMENTAL_BPF_SAMPLE_RATE=1000 | |
- name: Allow prometheus to scrape kepler metrics in kepler namespace | |
run: | | |
curl -O https://raw.githubusercontent.com/sustainable-computing-io/kepler/main/manifests/k8s/config/rbac/prometheus_role.yaml | |
sed -i 's/namespace: system/namespace: kepler/' prometheus_role.yaml | |
kubectl apply -f prometheus_role.yaml | |
kubectl apply -f - <<EOF | |
apiVersion: rbac.authorization.k8s.io/v1 | |
kind: RoleBinding | |
metadata: | |
labels: | |
app.kubernetes.io/component: prometheus | |
app.kubernetes.io/instance: k8s | |
app.kubernetes.io/name: prometheus | |
sustainable-computing.io/app: kepler | |
name: prometheus-k8s | |
namespace: kepler | |
roleRef: | |
apiGroup: rbac.authorization.k8s.io | |
kind: Role | |
name: prometheus-k8s | |
subjects: | |
- kind: ServiceAccount | |
name: prometheus-k8s | |
namespace: monitoring | |
EOF | |
- name: Wait for Kepler metrics kepler_node_package_joules_total is not empty through Prometheus query | |
run: | | |
${GITHUB_WORKSPACE}/util/wait_for_prometheus.sh | |
- name: Run kube-burner node-density w/o churn w/kepler | |
run: | | |
cp kube-burner/examples/metrics-profiles/metrics.yaml kube-burner/examples/workloads/kubelet-density/metrics.yaml | |
cd kube-burner/examples/workloads/kubelet-density | |
export START_TIME=$(date -u +"%Y-%m-%dT%H:%M:%SZ") | |
../../../kube-burner init -c kubelet-density.yml -u http://localhost:9090 --metrics-profile metrics.yaml | |
# sleep a min after test | |
sleep 60 | |
export END_TIME=$(date -u +"%Y-%m-%dT%H:%M:%SZ") | |
touch /tmp/kepler-stress-test-metrics.txt | |
# Retrieve metrics related to node cpu utilization over time | |
curl -G 'http://localhost:9090/api/v1/query_range' \ | |
--data-urlencode 'query=(100 * avg by (instance,mode) (rate(node_cpu_seconds_total{mode!="idle"}[1m])))' \ | |
--data-urlencode "start=${START_TIME}" --data-urlencode "end=${END_TIME}" --data-urlencode "step=5" | tee -a /tmp/kepler-node-cpu.json | |
# Retrieve the kube-apiserver cpu and memory utilization over time | |
curl -G 'http://localhost:9090/api/v1/query_range' \ | |
--data-urlencode 'query=sum (rate (container_cpu_usage_seconds_total{namespace="kube-system", pod=~"kube-apiserver-.*"}[1m])) / sum(machine_cpu_cores) * 100' \ | |
--data-urlencode "start=${START_TIME}" --data-urlencode "end=${END_TIME}" --data-urlencode "step=5" \ | |
| jq -r '.data.result[0].values[] | @tsv' | awk '{printf "Time: %s, Utilization: %s%%\n", strftime("%Y-%m-%d %H:%M:%S", $1), $2}' \ | |
| tee -a /tmp/kepler-stress-test-kube-apiserver-metrics.txt | |
# Retrieve metrics related to Kepler cpu and memory utilization over time | |
curl -G 'http://localhost:9090/api/v1/query_range' \ | |
--data-urlencode 'query=sum (rate (container_cpu_usage_seconds_total{namespace="kepler"}[1m])) / sum(machine_cpu_cores) * 100' \ | |
--data-urlencode "start=${START_TIME}" --data-urlencode "end=${END_TIME}" --data-urlencode "step=5" \ | |
| jq -r '.data.result[0].values[] | @tsv' | awk '{printf "Time: %s, Utilization: %s%%\n", strftime("%Y-%m-%d %H:%M:%S", $1), $2}' \ | |
| tee /tmp/kepler-stress-test-metrics.txt | |
- name: Run kube-burner node-density w/ churn w/ kepler | |
run: | | |
cp kube-burner/examples/metrics-profiles/metrics.yaml kube-burner/examples/workloads/kubelet-density/metrics.yaml | |
cd kube-burner/examples/workloads/kubelet-density | |
export START_TIME=$(date -u +"%Y-%m-%dT%H:%M:%SZ") | |
../../../kube-burner init -c kubelet-density-churn.yml -u http://localhost:9090 --metrics-profile metrics.yaml | |
# sleep a min after test | |
sleep 60 | |
export END_TIME=$(date -u +"%Y-%m-%dT%H:%M:%SZ") | |
touch /tmp/kepler-churn-node-cpu.json | |
# Retrieve metrics related to node cpu utilization over time | |
curl -G 'http://localhost:9090/api/v1/query_range' \ | |
--data-urlencode 'query=(100 * avg by (instance,mode) (rate(node_cpu_seconds_total{mode!="idle"}[1m])))' \ | |
--data-urlencode "start=${START_TIME}" --data-urlencode "end=${END_TIME}" --data-urlencode "step=5" | tee /tmp/kepler-churn-node-cpu.json | |
# Retrieve the kube-apiserver cpu and memory utilization over time | |
curl -G 'http://localhost:9090/api/v1/query_range' \ | |
--data-urlencode 'query=sum (rate (container_cpu_usage_seconds_total{namespace="kube-system", pod=~"kube-apiserver-.*"}[1m])) / sum(machine_cpu_cores) * 100' \ | |
--data-urlencode "start=${START_TIME}" --data-urlencode "end=${END_TIME}" --data-urlencode "step=5" \ | |
| jq -r '.data.result[0].values[] | @tsv' | awk '{printf "Time: %s, Utilization: %s%%\n", strftime("%Y-%m-%d %H:%M:%S", $1), $2}' \ | |
| tee -a /tmp/kepler-stress-test-kube-apiserver-churn-metrics.txt | |
# Retrieve metrics related to Kepler cpu and memory utilization over time | |
curl -G 'http://localhost:9090/api/v1/query_range' \ | |
--data-urlencode 'query=sum (rate (container_cpu_usage_seconds_total{namespace="kepler"}[1m])) / sum(machine_cpu_cores) * 100' \ | |
--data-urlencode "start=${START_TIME}" --data-urlencode "end=${END_TIME}" --data-urlencode "step=5" \ | |
| jq -r '.data.result[0].values[] | @tsv' | awk '{printf "Time: %s, Utilization: %s%%\n", strftime("%Y-%m-%d %H:%M:%S", $1), $2}' \ | |
| tee -a /tmp/kepler-stress-test-churn-metrics.txt | |
- name: git add the non-churn metrics to docs | |
run: | | |
cd ${GITHUB_WORKSPACE} | |
git config --global user.email "dependabot[bot]@users.noreply.github.com" | |
git config --global user.name "dependabot[bot]" | |
END_TIME=$(date -u +"%Y-%m-%dT%H:%M:%SZ"); echo -n "| " $END_TIME >> docs/kepler-stress-test-baselined-metrics.md | |
MEAN_SD_OUTPUT=$(./util/calc_mean_sd.awk /tmp/kepler-stress-test-metrics.txt) | |
KUBE_APISERVER_WITHOUT_KEPLER_MEAN_SD_OUTPUT=$(./util/calc_mean_sd.awk /tmp/nokepler-stress-test-kube-apiserver-metrics.txt) | |
KUBE_APISERVER_WITH_KEPLER_MEAN_SD_OUTPUT=$(./util/calc_mean_sd.awk /tmp/kepler-stress-test-kube-apiserver-metrics.txt) | |
WITHOUT_KEPLER_MEAN=$(echo $KUBE_APISERVER_WITHOUT_KEPLER_MEAN_SD_OUTPUT | awk -F'|' '{print $2}' | sed 's/%//g') | |
WITH_KEPLER_MEAN=$(echo $KUBE_APISERVER_WITH_KEPLER_MEAN_SD_OUTPUT | awk -F'|' '{print $2}' | sed 's/%//g') | |
MEAN_DIFFERENCE=$(echo "scale=10; $WITH_KEPLER_MEAN - $WITHOUT_KEPLER_MEAN" | bc) | |
KUBE_APISERVER_MEAN_DIFFERENCE=$(printf "| %.10f%% |" $MEAN_DIFFERENCE) | |
echo $MEAN_SD_OUTPUT " " $KUBE_APISERVER_WITHOUT_KEPLER_MEAN_SD_OUTPUT " " $KUBE_APISERVER_WITH_KEPLER_MEAN_SD_OUTPUT " " $KUBE_APISERVER_MEAN_DIFFERENCE >> docs/kepler-stress-test-baselined-metrics.md | |
git add docs/kepler-stress-test-baselined-metrics.md | |
git commit -m "kepler stress test metrics result at ${END_TIME}" -s | |
git pull --rebase | |
git push origin main | |
env: | |
GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} | |
- name: git add the churn metrics to docs | |
run: | | |
cd ${GITHUB_WORKSPACE} | |
git config --global user.email "dependabot[bot]@users.noreply.github.com" | |
git config --global user.name "dependabot[bot]" | |
END_TIME=$(date -u +"%Y-%m-%dT%H:%M:%SZ"); echo -n "| " $END_TIME >> docs/kepler-stress-test-baselined-metrics-churn.md | |
MEAN_SD_OUTPUT=$(./util/calc_mean_sd.awk /tmp/kepler-stress-test-churn-metrics.txt) | |
KUBE_APISERVER_WITHOUT_KEPLER_MEAN_SD_OUTPUT=$(./util/calc_mean_sd.awk /tmp/nokepler-stress-test-kube-apiserver-churn-metrics.txt) | |
KUBE_APISERVER_WITH_KEPLER_MEAN_SD_OUTPUT=$(./util/calc_mean_sd.awk /tmp/kepler-stress-test-kube-apiserver-churn-metrics.txt) | |
WITHOUT_KEPLER_MEAN=$(echo $KUBE_APISERVER_WITHOUT_KEPLER_MEAN_SD_OUTPUT | awk -F'|' '{print $2}' | sed 's/%//g') | |
WITH_KEPLER_MEAN=$(echo $KUBE_APISERVER_WITH_KEPLER_MEAN_SD_OUTPUT | awk -F'|' '{print $2}' | sed 's/%//g') | |
MEAN_DIFFERENCE=$(echo "scale=10; $WITH_KEPLER_MEAN - $WITHOUT_KEPLER_MEAN" | bc) | |
KUBE_APISERVER_MEAN_DIFFERENCE=$(printf "| %.10f%% |" $MEAN_DIFFERENCE) | |
echo $MEAN_SD_OUTPUT " " $KUBE_APISERVER_WITHOUT_KEPLER_MEAN_SD_OUTPUT " " $KUBE_APISERVER_WITH_KEPLER_MEAN_SD_OUTPUT " " $KUBE_APISERVER_MEAN_DIFFERENCE >> docs/kepler-stress-test-baselined-metrics-churn.md | |
git add docs/kepler-stress-test-baselined-metrics-churn.md | |
git commit -m "kepler stress test metrics result at ${END_TIME}" -s | |
git pull --rebase | |
git push origin main | |
env: | |
GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} |