removing eksctl and using aws cli #2
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This is a reusable workflow for running the Java E2E test for App Signals. | |
# It is meant to be called from another workflow. | |
# This E2E test is responsible for validating setting up a sample application on an EKS cluster and enabling | |
# App Signals using the staging image of the CloudWatch Agent Operator. It validates the generated telemetry | |
# including logs, metrics, and traces, then cleans up the cluster. The testing resources can be found in the | |
# ADOT java instrumentation repo: https://github.com/aws-observability/aws-otel-java-instrumentation/tree/main/testing | |
# Read more about reusable workflows: https://docs.github.com/en/actions/using-workflows/reusing-workflows#overview | |
name: App Signals Enablement Java E2E Testing | |
on: | |
workflow_dispatch: | |
workflow_call: | |
inputs: | |
# Ensure two tests do not run on the same cluster at the same time through GitHub Action concurrency | |
test-java-cluster-name: | |
required: true | |
type: string | |
tag: | |
description: 'Staging Artifact Tag' | |
required: false | |
default: 'staging' | |
type: string | |
permissions: | |
id-token: write | |
contents: read | |
env: | |
AWS_DEFAULT_REGION: us-west-2 | |
TEST_ACCOUNT: ${{ secrets.APP_SIGNALS_E2E_TEST_ACCOUNT_ID }} | |
SAMPLE_APP_NAMESPACE: sample-app-namespace | |
SAMPLE_APP_FRONTEND_SERVICE_IMAGE: ${{ secrets.APP_SIGNALS_E2E_SAMPLE_APP_FRONTEND_SVC_IMG }} | |
SAMPLE_APP_REMOTE_SERVICE_IMAGE: ${{ secrets.APP_SIGNALS_E2E_SAMPLE_APP_REMOTE_SVC_IMG }} | |
METRIC_NAMESPACE: ApplicationSignals | |
LOG_GROUP: /aws/application-signals/data | |
ECR_OPERATOR_STAGING_REPO: 506463145083.dkr.ecr.us-west-2.amazonaws.com/cwagent-operator-pre-release:staging | |
APPLICATION_SIGNALS_ADOT_IMAGE: 611364707713.dkr.ecr.us-west-2.amazonaws.com/adot-autoinstrumentation-java-operator-staging:1.33.0-SNAPSHOT-91cbba8 | |
APPLICATION_SIGNALS_CW_AGENT_IMAGE: 506463145083.dkr.ecr.us-west-2.amazonaws.com/cwagent-integration-test:eca8174758d95308006632ec4d5533d765db9ca8 | |
jobs: | |
appsignals-java-e2e-test: | |
runs-on: ubuntu-latest | |
steps: | |
# This step avoids code duplication for terraform templates and the validator | |
# To simplify, we get the entire repo | |
- name: Get testing resources from aws-application-signals-test-framework | |
uses: actions/checkout@v4 | |
with: | |
repository: aws-observability/aws-application-signals-test-framework | |
ref: ga-release | |
- name: Download enablement script | |
uses: actions/checkout@v4 | |
with: | |
repository: aws-observability/application-signals-demo | |
ref: main | |
path: enablement-script | |
sparse-checkout: | | |
scripts/eks/appsignals/enable-app-signals.sh | |
scripts/eks/appsignals/clean-app-signals.sh | |
sparse-checkout-cone-mode: false | |
- name: Generate testing id | |
run: echo TESTING_ID="${{ env.AWS_DEFAULT_REGION }}-${{ github.run_id }}-${{ github.run_number }}" >> $GITHUB_ENV | |
- name: Configure AWS Credentials | |
uses: aws-actions/configure-aws-credentials@v4 | |
with: | |
role-to-assume: arn:aws:iam::${{ env.TEST_ACCOUNT }}:role/${{ secrets.APP_SIGNALS_E2E_TEST_ROLE_NAME }} | |
aws-region: ${{ env.AWS_DEFAULT_REGION }} | |
# local directory to store the kubernetes config | |
- name: Create kubeconfig directory | |
run: mkdir -p ${{ github.workspace }}/.kube | |
- name: Set KUBECONFIG environment variable | |
run: echo KUBECONFIG="${{ github.workspace }}/.kube/config" >> $GITHUB_ENV | |
- name: Set up kubeconfig | |
run: | | |
aws eks update-kubeconfig --name ${{ inputs.test-java-cluster-name }} --region ${{ env.AWS_DEFAULT_REGION }} --endpoint https://api.beta.us-west-2.wesley.amazonaws.com | |
- name: Install eksctl | |
run: | | |
mkdir ${{ github.workspace }}/eksctl | |
curl -sLO "https://github.com/weaveworks/eksctl/releases/latest/download/eksctl_Linux_amd64.tar.gz" | |
tar -xzf eksctl_Linux_amd64.tar.gz -C ${{ github.workspace }}/eksctl && rm eksctl_Linux_amd64.tar.gz | |
echo "${{ github.workspace }}/eksctl" >> $GITHUB_PATH | |
- name: Set up terraform | |
uses: hashicorp/setup-terraform@v3 | |
with: | |
terraform_wrapper: false | |
- name: Deploy sample app via Terraform | |
uses: actions/checkout@v2 | |
with: | |
fetch-depth: 0 | |
run: | | |
echo "Current directory: $(pwd)" | |
echo "Listing files in $(pwd):" | |
ls -la | |
cd integration-tests/terraform/pulse | |
echo "Contents of main.tf:" | |
cat main.tf || echo "main.tf not found" | |
echo "Contents of variables.tf:" | |
cat variables.tf || echo "variables.tf not found" | |
terraform init | |
terraform validate | |
terraform apply -auto-approve \ | |
-var="test_id=${{ env.TESTING_ID }}" \ | |
-var="aws_region=${{ env.AWS_DEFAULT_REGION }}" \ | |
-var="kube_directory_path=${{ github.workspace }}/.kube" \ | |
-var="eks_cluster_name=${{ inputs.test-java-cluster-name }}" \ | |
-var="eks_cluster_context_name=$(kubectl config current-context)" \ | |
-var="test_namespace=${{ env.SAMPLE_APP_NAMESPACE }}" \ | |
-var="sample_app_image=${{ env.SAMPLE_APP_FRONTEND_SERVICE_IMAGE }}" \ | |
-var="sample_remote_app_image=${{ env.SAMPLE_APP_REMOTE_SERVICE_IMAGE }}" | |
# Enable App Signals on the test cluster | |
- name: Enable App Signals | |
working-directory: enablement-script/scripts/eks/appsignals | |
run: | | |
./enable-app-signals.sh \ | |
${{ inputs.test-java-cluster-name }} \ | |
${{ env.AWS_DEFAULT_REGION }} \ | |
${{ env.SAMPLE_APP_NAMESPACE }} | |
- name: Save CloudWatch Agent Operator image to environment before patching | |
run: | | |
echo "OLD_CW_AGENT_OPERATOR_IMAGE"=$(kubectl get pods -n amazon-cloudwatch -l app.kubernetes.io/name=amazon-cloudwatch-observability -o json | \ | |
jq '.items[0].status.containerStatuses[0].image') >> $GITHUB_ENV | |
- name: Patch the CloudWatch Agent Operator image and restart CloudWatch pods | |
run: | | |
kubectl patch deploy -n amazon-cloudwatch amazon-cloudwatch-observability-controller-manager --type='json' -p '[{"op": "replace", "path": "/spec/template/spec/containers/0/image", "value": "${{ env.ECR_OPERATOR_STAGING_REPO }}:${{ inputs.tag }}"}, {"op": "replace", "path": "/spec/template/spec/containers/0/imagePullPolicy", "value": "Always"}]]' | |
kubectl delete pods --all -n amazon-cloudwatch | |
sleep 10 | |
kubectl wait --for=condition=Ready pod --all -n amazon-cloudwatch | |
- name: Patch the CloudWatch Agent image and restart CloudWatch pods | |
run: | | |
kubectl patch amazoncloudwatchagents -n amazon-cloudwatch cloudwatch-agent --type='json' -p='[{"op": "replace", "path": "/spec/image", "value": "${{ env.APPLICATION_SIGNALS_CW_AGENT_IMAGE }}"}]' | |
kubectl delete pods --all -n amazon-cloudwatch | |
sleep 10 | |
kubectl wait --for=condition=Ready pod --all -n amazon-cloudwatch | |
- name: Patch the ADOT image and restart CloudWatch pods | |
run: | | |
kubectl patch deploy -namazon-cloudwatch amazon-cloudwatch-observability-controller-manager --type='json' \ | |
-p='[{"op": "replace", "path": "/spec/template/spec/containers/0/args/1", "value": "--auto-instrumentation-java-image=${{ env.APPLICATION_SIGNALS_ADOT_IMAGE }}"}]' | |
kubectl delete pods --all -n amazon-cloudwatch | |
sleep 10 | |
kubectl wait --for=condition=Ready pod --all -n amazon-cloudwatch | |
# Application pods need to be restarted for the | |
# app signals instrumentation to take effect | |
- name: Restart the app pods | |
run: kubectl delete pods --all -n ${{ env.SAMPLE_APP_NAMESPACE }} | |
- name: Wait for sample app pods to come up | |
run: | | |
kubectl wait --for=condition=Ready pod --all -n ${{ env.SAMPLE_APP_NAMESPACE }} \ | |
- name: Get remote service deployment name and IP | |
run: | | |
echo "REMOTE_SERVICE_DEPLOYMENT_NAME=$(kubectl get deployments -n ${{ env.SAMPLE_APP_NAMESPACE }} --selector=app=remote-app -o jsonpath='{.items[0].metadata.name}')" >> $GITHUB_ENV | |
echo "REMOTE_SERVICE_POD_IP=$(kubectl get pods -n ${{ env.SAMPLE_APP_NAMESPACE }} --selector=app=remote-app -o jsonpath='{.items[0].status.podIP}')" >> $GITHUB_ENV | |
- name: Log pod ADOT image ID | |
run: | | |
kubectl get pods -n ${{ env.SAMPLE_APP_NAMESPACE }} --output json | \ | |
jq '.items[0].status.initContainerStatuses[0].imageID' | |
- name: Log pod CWAgent image ID | |
run: | | |
kubectl get pods -n amazon-cloudwatch -l app.kubernetes.io/name=cloudwatch-agent -o json | \ | |
jq '.items[0].status.containerStatuses[0].imageID' | |
- name: Log pod Fluent Bit image ID | |
run: | | |
kubectl get pods -n amazon-cloudwatch -l k8s-app=fluent-bit -o json | \ | |
jq '.items[0].status.containerStatuses[0].imageID' | |
- name: Log pod CWAgent Operator image ID and save image to the environment | |
run: | | |
kubectl get pods -n amazon-cloudwatch -l app.kubernetes.io/name=amazon-cloudwatch-observability -o json | \ | |
jq '.items[0].status.containerStatuses[0].imageID' | |
echo "NEW_CW_AGENT_OPERATOR_IMAGE"=$(kubectl get pods -n amazon-cloudwatch -l app.kubernetes.io/name=amazon-cloudwatch-observability -o json | \ | |
jq '.items[0].status.containerStatuses[0].image') >> $GITHUB_ENV | |
# - name: Check if CW Agent Operator image has changed | |
# run: | | |
# if [ ${{ env.OLD_CW_AGENT_OPERATOR_IMAGE }} = ${{ env.NEW_CW_AGENT_OPERATOR_IMAGE }} ]; then | |
# echo "Operator image did not change" | |
# exit 1 | |
# fi | |
- name: Get the sample app endpoint | |
run: | | |
echo "APP_ENDPOINT=$(terraform output sample_app_endpoint)" >> $GITHUB_ENV | |
working-directory: terraform/eks | |
- name: Wait for app endpoint to come online | |
id: endpoint-check | |
run: | | |
attempt_counter=0 | |
max_attempts=30 | |
until $(curl --output /dev/null --silent --head --fail http://${{ env.APP_ENDPOINT }}); do | |
if [ ${attempt_counter} -eq ${max_attempts} ];then | |
echo "Max attempts reached" | |
exit 1 | |
fi | |
printf '.' | |
attempt_counter=$(($attempt_counter+1)) | |
sleep 10 | |
done | |
# This steps increases the speed of the validation by creating the telemetry data in advance | |
- name: Call all test APIs | |
continue-on-error: true | |
run: | | |
curl -S -s "http://${{ env.APP_ENDPOINT }}/outgoing-http-call" | |
curl -S -s "http://${{ env.APP_ENDPOINT }}/aws-sdk-call?ip=${{ env.REMOTE_SERVICE_POD_IP }}&testingId=${{ env.TESTING_ID }}" | |
curl -S -s "http://${{ env.APP_ENDPOINT }}/remote-service?ip=${{ env.REMOTE_SERVICE_POD_IP }}&testingId=${{ env.TESTING_ID }}" | |
curl -S -s "http://${{ env.APP_ENDPOINT }}/client-call" | |
- name: Build Gradle | |
run: ./gradlew | |
# Validation for app signals telemetry data | |
- name: Call endpoint and validate generated EMF logs | |
id: log-validation | |
if: steps.endpoint-check.outcome == 'success' && !cancelled() | |
run: ./gradlew validator:run --args='-c eks/log-validation.yml | |
--testing-id ${{ env.TESTING_ID }} | |
--endpoint http://${{ env.APP_ENDPOINT }} | |
--region ${{ env.AWS_DEFAULT_REGION }} | |
--account-id ${{ env.TEST_ACCOUNT }} | |
--metric-namespace ${{ env.METRIC_NAMESPACE }} | |
--log-group ${{ env.LOG_GROUP }} | |
--app-namespace ${{ env.SAMPLE_APP_NAMESPACE }} | |
--platform-info ${{ inputs.test-java-cluster-name }} | |
--service-name sample-application-${{ env.TESTING_ID }} | |
--remote-service-deployment-name ${{ env.REMOTE_SERVICE_DEPLOYMENT_NAME }} | |
--query-string ip=${{ env.REMOTE_SERVICE_POD_IP }}&testingId=${{ env.TESTING_ID }} | |
--rollup' | |
- name: Call endpoints and validate generated metrics | |
id: metric-validation | |
if: (success() || steps.log-validation.outcome == 'failure') && !cancelled() | |
run: ./gradlew validator:run --args='-c eks/metric-validation.yml | |
--testing-id ${{ env.TESTING_ID }} | |
--endpoint http://${{ env.APP_ENDPOINT }} | |
--region ${{ env.AWS_DEFAULT_REGION }} | |
--account-id ${{ env.TEST_ACCOUNT }} | |
--metric-namespace ${{ env.METRIC_NAMESPACE }} | |
--log-group ${{ env.LOG_GROUP }} | |
--app-namespace ${{ env.SAMPLE_APP_NAMESPACE }} | |
--platform-info ${{ inputs.test-java-cluster-name }} | |
--service-name sample-application-${{ env.TESTING_ID }} | |
--remote-service-name sample-remote-application-${{ env.TESTING_ID }} | |
--remote-service-deployment-name ${{ env.REMOTE_SERVICE_DEPLOYMENT_NAME }} | |
--query-string ip=${{ env.REMOTE_SERVICE_POD_IP }}&testingId=${{ env.TESTING_ID }} | |
--rollup' | |
- name: Call endpoints and validate generated traces | |
id: trace-validation | |
if: (success() || steps.log-validation.outcome == 'failure' || steps.metric-validation.outcome == 'failure') && !cancelled() | |
run: ./gradlew validator:run --args='-c eks/trace-validation.yml | |
--testing-id ${{ env.TESTING_ID }} | |
--endpoint http://${{ env.APP_ENDPOINT }} | |
--region ${{ env.AWS_DEFAULT_REGION }} | |
--account-id ${{ env.TEST_ACCOUNT }} | |
--metric-namespace ${{ env.METRIC_NAMESPACE }} | |
--log-group ${{ env.LOG_GROUP }} | |
--app-namespace ${{ env.SAMPLE_APP_NAMESPACE }} | |
--platform-info ${{ inputs.test-java-cluster-name }} | |
--service-name sample-application-${{ env.TESTING_ID }} | |
--remote-service-deployment-name ${{ env.REMOTE_SERVICE_DEPLOYMENT_NAME }} | |
--query-string ip=${{ env.REMOTE_SERVICE_POD_IP }}&testingId=${{ env.TESTING_ID }} | |
--rollup' | |
# Clean up Procedures | |
- name: Remove log group deletion command | |
if: always() | |
working-directory: enablement-script/scripts/eks/appsignals | |
run: | | |
delete_log_group="aws logs delete-log-group --log-group-name '${{ env.LOG_GROUP }}' --region \$REGION" | |
sed -i "s#$delete_log_group##g" clean-app-signals.sh | |
- name: Clean Up App Signals | |
if: always() | |
continue-on-error: true | |
working-directory: enablement-script/scripts/eks/appsignals | |
run: | | |
./clean-app-signals.sh \ | |
${{ inputs.test-java-cluster-name }} \ | |
${{ env.AWS_DEFAULT_REGION }} \ | |
${{ env.SAMPLE_APP_NAMESPACE }} | |
# This step also deletes lingering resources from previous test runs | |
- name: Delete all sample app resources | |
if: always() | |
continue-on-error: true | |
timeout-minutes: 10 | |
run: kubectl delete namespace ${{ env.SAMPLE_APP_NAMESPACE }} | |
- name: Terraform destroy | |
if: always() | |
continue-on-error: true | |
working-directory: terraform/eks | |
run: | | |
terraform destroy -auto-approve \ | |
-var="test_id=${{ env.TESTING_ID }}" \ | |
-var="aws_region=${{ env.AWS_DEFAULT_REGION }}" \ | |
-var="kube_directory_path=${{ github.workspace }}/.kube" \ | |
-var="eks_cluster_name=${{ inputs.test-java-cluster-name }}" \ | |
-var="test_namespace=${{ env.SAMPLE_APP_NAMESPACE }}" \ | |
-var="service_account_aws_access=service-account-${{ env.TESTING_ID }}" \ | |
-var="sample_app_image=${{ env.SAMPLE_APP_IMAGE }}" | |
- name: Remove aws access service account | |
if: always() | |
continue-on-error: true | |
run: | | |
eksctl delete iamserviceaccount \ | |
--name service-account-${{ env.TESTING_ID }} \ | |
--namespace ${{ env.SAMPLE_APP_NAMESPACE }} \ | |
--cluster ${{ inputs.test-java-cluster-name }} \ | |
--region ${{ env.AWS_DEFAULT_REGION }} |