Skip to content

removing eksctl and using aws cli #2

removing eksctl and using aws cli

removing eksctl and using aws cli #2

# This is a reusable workflow for running the Java E2E test for App Signals.
# It is meant to be called from another workflow.
# This E2E test is responsible for validating setting up a sample application on an EKS cluster and enabling
# App Signals using the staging image of the CloudWatch Agent Operator. It validates the generated telemetry
# including logs, metrics, and traces, then cleans up the cluster. The testing resources can be found in the
# ADOT java instrumentation repo: https://github.com/aws-observability/aws-otel-java-instrumentation/tree/main/testing
# Read more about reusable workflows: https://docs.github.com/en/actions/using-workflows/reusing-workflows#overview
name: App Signals Enablement Java E2E Testing
on:
workflow_dispatch:
workflow_call:
inputs:
# Ensure two tests do not run on the same cluster at the same time through GitHub Action concurrency
test-java-cluster-name:
required: true
type: string
tag:
description: 'Staging Artifact Tag'
required: false
default: 'staging'
type: string
permissions:
id-token: write
contents: read
env:
AWS_DEFAULT_REGION: us-west-2
TEST_ACCOUNT: ${{ secrets.APP_SIGNALS_E2E_TEST_ACCOUNT_ID }}
SAMPLE_APP_NAMESPACE: sample-app-namespace
SAMPLE_APP_FRONTEND_SERVICE_IMAGE: ${{ secrets.APP_SIGNALS_E2E_SAMPLE_APP_FRONTEND_SVC_IMG }}
SAMPLE_APP_REMOTE_SERVICE_IMAGE: ${{ secrets.APP_SIGNALS_E2E_SAMPLE_APP_REMOTE_SVC_IMG }}
METRIC_NAMESPACE: ApplicationSignals
LOG_GROUP: /aws/application-signals/data
ECR_OPERATOR_STAGING_REPO: 506463145083.dkr.ecr.us-west-2.amazonaws.com/cwagent-operator-pre-release:staging
APPLICATION_SIGNALS_ADOT_IMAGE: 611364707713.dkr.ecr.us-west-2.amazonaws.com/adot-autoinstrumentation-java-operator-staging:1.33.0-SNAPSHOT-91cbba8
APPLICATION_SIGNALS_CW_AGENT_IMAGE: 506463145083.dkr.ecr.us-west-2.amazonaws.com/cwagent-integration-test:eca8174758d95308006632ec4d5533d765db9ca8
jobs:
appsignals-java-e2e-test:
runs-on: ubuntu-latest
steps:
# This step avoids code duplication for terraform templates and the validator
# To simplify, we get the entire repo
- name: Get testing resources from aws-application-signals-test-framework
uses: actions/checkout@v4
with:
repository: aws-observability/aws-application-signals-test-framework
ref: ga-release
- name: Download enablement script
uses: actions/checkout@v4
with:
repository: aws-observability/application-signals-demo
ref: main
path: enablement-script
sparse-checkout: |
scripts/eks/appsignals/enable-app-signals.sh
scripts/eks/appsignals/clean-app-signals.sh
sparse-checkout-cone-mode: false
- name: Generate testing id
run: echo TESTING_ID="${{ env.AWS_DEFAULT_REGION }}-${{ github.run_id }}-${{ github.run_number }}" >> $GITHUB_ENV
- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: arn:aws:iam::${{ env.TEST_ACCOUNT }}:role/${{ secrets.APP_SIGNALS_E2E_TEST_ROLE_NAME }}
aws-region: ${{ env.AWS_DEFAULT_REGION }}
# local directory to store the kubernetes config
- name: Create kubeconfig directory
run: mkdir -p ${{ github.workspace }}/.kube
- name: Set KUBECONFIG environment variable
run: echo KUBECONFIG="${{ github.workspace }}/.kube/config" >> $GITHUB_ENV
- name: Set up kubeconfig
run: |
aws eks update-kubeconfig --name ${{ inputs.test-java-cluster-name }} --region ${{ env.AWS_DEFAULT_REGION }} --endpoint https://api.beta.us-west-2.wesley.amazonaws.com
- name: Install eksctl
run: |
mkdir ${{ github.workspace }}/eksctl
curl -sLO "https://github.com/weaveworks/eksctl/releases/latest/download/eksctl_Linux_amd64.tar.gz"
tar -xzf eksctl_Linux_amd64.tar.gz -C ${{ github.workspace }}/eksctl && rm eksctl_Linux_amd64.tar.gz
echo "${{ github.workspace }}/eksctl" >> $GITHUB_PATH
- name: Set up terraform
uses: hashicorp/setup-terraform@v3
with:
terraform_wrapper: false
- name: Deploy sample app via Terraform
uses: actions/checkout@v2
with:
fetch-depth: 0
run: |
echo "Current directory: $(pwd)"
echo "Listing files in $(pwd):"
ls -la
cd integration-tests/terraform/pulse
echo "Contents of main.tf:"
cat main.tf || echo "main.tf not found"
echo "Contents of variables.tf:"
cat variables.tf || echo "variables.tf not found"
terraform init
terraform validate
terraform apply -auto-approve \
-var="test_id=${{ env.TESTING_ID }}" \
-var="aws_region=${{ env.AWS_DEFAULT_REGION }}" \
-var="kube_directory_path=${{ github.workspace }}/.kube" \
-var="eks_cluster_name=${{ inputs.test-java-cluster-name }}" \
-var="eks_cluster_context_name=$(kubectl config current-context)" \
-var="test_namespace=${{ env.SAMPLE_APP_NAMESPACE }}" \
-var="sample_app_image=${{ env.SAMPLE_APP_FRONTEND_SERVICE_IMAGE }}" \
-var="sample_remote_app_image=${{ env.SAMPLE_APP_REMOTE_SERVICE_IMAGE }}"
# Enable App Signals on the test cluster
- name: Enable App Signals
working-directory: enablement-script/scripts/eks/appsignals
run: |
./enable-app-signals.sh \
${{ inputs.test-java-cluster-name }} \
${{ env.AWS_DEFAULT_REGION }} \
${{ env.SAMPLE_APP_NAMESPACE }}
- name: Save CloudWatch Agent Operator image to environment before patching
run: |
echo "OLD_CW_AGENT_OPERATOR_IMAGE"=$(kubectl get pods -n amazon-cloudwatch -l app.kubernetes.io/name=amazon-cloudwatch-observability -o json | \
jq '.items[0].status.containerStatuses[0].image') >> $GITHUB_ENV
- name: Patch the CloudWatch Agent Operator image and restart CloudWatch pods
run: |
kubectl patch deploy -n amazon-cloudwatch amazon-cloudwatch-observability-controller-manager --type='json' -p '[{"op": "replace", "path": "/spec/template/spec/containers/0/image", "value": "${{ env.ECR_OPERATOR_STAGING_REPO }}:${{ inputs.tag }}"}, {"op": "replace", "path": "/spec/template/spec/containers/0/imagePullPolicy", "value": "Always"}]]'
kubectl delete pods --all -n amazon-cloudwatch
sleep 10
kubectl wait --for=condition=Ready pod --all -n amazon-cloudwatch
- name: Patch the CloudWatch Agent image and restart CloudWatch pods
run: |
kubectl patch amazoncloudwatchagents -n amazon-cloudwatch cloudwatch-agent --type='json' -p='[{"op": "replace", "path": "/spec/image", "value": "${{ env.APPLICATION_SIGNALS_CW_AGENT_IMAGE }}"}]'
kubectl delete pods --all -n amazon-cloudwatch
sleep 10
kubectl wait --for=condition=Ready pod --all -n amazon-cloudwatch
- name: Patch the ADOT image and restart CloudWatch pods
run: |
kubectl patch deploy -namazon-cloudwatch amazon-cloudwatch-observability-controller-manager --type='json' \
-p='[{"op": "replace", "path": "/spec/template/spec/containers/0/args/1", "value": "--auto-instrumentation-java-image=${{ env.APPLICATION_SIGNALS_ADOT_IMAGE }}"}]'
kubectl delete pods --all -n amazon-cloudwatch
sleep 10
kubectl wait --for=condition=Ready pod --all -n amazon-cloudwatch
# Application pods need to be restarted for the
# app signals instrumentation to take effect
- name: Restart the app pods
run: kubectl delete pods --all -n ${{ env.SAMPLE_APP_NAMESPACE }}
- name: Wait for sample app pods to come up
run: |
kubectl wait --for=condition=Ready pod --all -n ${{ env.SAMPLE_APP_NAMESPACE }} \
- name: Get remote service deployment name and IP
run: |
echo "REMOTE_SERVICE_DEPLOYMENT_NAME=$(kubectl get deployments -n ${{ env.SAMPLE_APP_NAMESPACE }} --selector=app=remote-app -o jsonpath='{.items[0].metadata.name}')" >> $GITHUB_ENV
echo "REMOTE_SERVICE_POD_IP=$(kubectl get pods -n ${{ env.SAMPLE_APP_NAMESPACE }} --selector=app=remote-app -o jsonpath='{.items[0].status.podIP}')" >> $GITHUB_ENV
- name: Log pod ADOT image ID
run: |
kubectl get pods -n ${{ env.SAMPLE_APP_NAMESPACE }} --output json | \
jq '.items[0].status.initContainerStatuses[0].imageID'
- name: Log pod CWAgent image ID
run: |
kubectl get pods -n amazon-cloudwatch -l app.kubernetes.io/name=cloudwatch-agent -o json | \
jq '.items[0].status.containerStatuses[0].imageID'
- name: Log pod Fluent Bit image ID
run: |
kubectl get pods -n amazon-cloudwatch -l k8s-app=fluent-bit -o json | \
jq '.items[0].status.containerStatuses[0].imageID'
- name: Log pod CWAgent Operator image ID and save image to the environment
run: |
kubectl get pods -n amazon-cloudwatch -l app.kubernetes.io/name=amazon-cloudwatch-observability -o json | \
jq '.items[0].status.containerStatuses[0].imageID'
echo "NEW_CW_AGENT_OPERATOR_IMAGE"=$(kubectl get pods -n amazon-cloudwatch -l app.kubernetes.io/name=amazon-cloudwatch-observability -o json | \
jq '.items[0].status.containerStatuses[0].image') >> $GITHUB_ENV
# - name: Check if CW Agent Operator image has changed
# run: |
# if [ ${{ env.OLD_CW_AGENT_OPERATOR_IMAGE }} = ${{ env.NEW_CW_AGENT_OPERATOR_IMAGE }} ]; then
# echo "Operator image did not change"
# exit 1
# fi
- name: Get the sample app endpoint
run: |
echo "APP_ENDPOINT=$(terraform output sample_app_endpoint)" >> $GITHUB_ENV
working-directory: terraform/eks
- name: Wait for app endpoint to come online
id: endpoint-check
run: |
attempt_counter=0
max_attempts=30
until $(curl --output /dev/null --silent --head --fail http://${{ env.APP_ENDPOINT }}); do
if [ ${attempt_counter} -eq ${max_attempts} ];then
echo "Max attempts reached"
exit 1
fi
printf '.'
attempt_counter=$(($attempt_counter+1))
sleep 10
done
# This steps increases the speed of the validation by creating the telemetry data in advance
- name: Call all test APIs
continue-on-error: true
run: |
curl -S -s "http://${{ env.APP_ENDPOINT }}/outgoing-http-call"
curl -S -s "http://${{ env.APP_ENDPOINT }}/aws-sdk-call?ip=${{ env.REMOTE_SERVICE_POD_IP }}&testingId=${{ env.TESTING_ID }}"
curl -S -s "http://${{ env.APP_ENDPOINT }}/remote-service?ip=${{ env.REMOTE_SERVICE_POD_IP }}&testingId=${{ env.TESTING_ID }}"
curl -S -s "http://${{ env.APP_ENDPOINT }}/client-call"
- name: Build Gradle
run: ./gradlew
# Validation for app signals telemetry data
- name: Call endpoint and validate generated EMF logs
id: log-validation
if: steps.endpoint-check.outcome == 'success' && !cancelled()
run: ./gradlew validator:run --args='-c eks/log-validation.yml
--testing-id ${{ env.TESTING_ID }}
--endpoint http://${{ env.APP_ENDPOINT }}
--region ${{ env.AWS_DEFAULT_REGION }}
--account-id ${{ env.TEST_ACCOUNT }}
--metric-namespace ${{ env.METRIC_NAMESPACE }}
--log-group ${{ env.LOG_GROUP }}
--app-namespace ${{ env.SAMPLE_APP_NAMESPACE }}
--platform-info ${{ inputs.test-java-cluster-name }}
--service-name sample-application-${{ env.TESTING_ID }}
--remote-service-deployment-name ${{ env.REMOTE_SERVICE_DEPLOYMENT_NAME }}
--query-string ip=${{ env.REMOTE_SERVICE_POD_IP }}&testingId=${{ env.TESTING_ID }}
--rollup'
- name: Call endpoints and validate generated metrics
id: metric-validation
if: (success() || steps.log-validation.outcome == 'failure') && !cancelled()
run: ./gradlew validator:run --args='-c eks/metric-validation.yml
--testing-id ${{ env.TESTING_ID }}
--endpoint http://${{ env.APP_ENDPOINT }}
--region ${{ env.AWS_DEFAULT_REGION }}
--account-id ${{ env.TEST_ACCOUNT }}
--metric-namespace ${{ env.METRIC_NAMESPACE }}
--log-group ${{ env.LOG_GROUP }}
--app-namespace ${{ env.SAMPLE_APP_NAMESPACE }}
--platform-info ${{ inputs.test-java-cluster-name }}
--service-name sample-application-${{ env.TESTING_ID }}
--remote-service-name sample-remote-application-${{ env.TESTING_ID }}
--remote-service-deployment-name ${{ env.REMOTE_SERVICE_DEPLOYMENT_NAME }}
--query-string ip=${{ env.REMOTE_SERVICE_POD_IP }}&testingId=${{ env.TESTING_ID }}
--rollup'
- name: Call endpoints and validate generated traces
id: trace-validation
if: (success() || steps.log-validation.outcome == 'failure' || steps.metric-validation.outcome == 'failure') && !cancelled()
run: ./gradlew validator:run --args='-c eks/trace-validation.yml
--testing-id ${{ env.TESTING_ID }}
--endpoint http://${{ env.APP_ENDPOINT }}
--region ${{ env.AWS_DEFAULT_REGION }}
--account-id ${{ env.TEST_ACCOUNT }}
--metric-namespace ${{ env.METRIC_NAMESPACE }}
--log-group ${{ env.LOG_GROUP }}
--app-namespace ${{ env.SAMPLE_APP_NAMESPACE }}
--platform-info ${{ inputs.test-java-cluster-name }}
--service-name sample-application-${{ env.TESTING_ID }}
--remote-service-deployment-name ${{ env.REMOTE_SERVICE_DEPLOYMENT_NAME }}
--query-string ip=${{ env.REMOTE_SERVICE_POD_IP }}&testingId=${{ env.TESTING_ID }}
--rollup'
# Clean up Procedures
- name: Remove log group deletion command
if: always()
working-directory: enablement-script/scripts/eks/appsignals
run: |
delete_log_group="aws logs delete-log-group --log-group-name '${{ env.LOG_GROUP }}' --region \$REGION"
sed -i "s#$delete_log_group##g" clean-app-signals.sh
- name: Clean Up App Signals
if: always()
continue-on-error: true
working-directory: enablement-script/scripts/eks/appsignals
run: |
./clean-app-signals.sh \
${{ inputs.test-java-cluster-name }} \
${{ env.AWS_DEFAULT_REGION }} \
${{ env.SAMPLE_APP_NAMESPACE }}
# This step also deletes lingering resources from previous test runs
- name: Delete all sample app resources
if: always()
continue-on-error: true
timeout-minutes: 10
run: kubectl delete namespace ${{ env.SAMPLE_APP_NAMESPACE }}
- name: Terraform destroy
if: always()
continue-on-error: true
working-directory: terraform/eks
run: |
terraform destroy -auto-approve \
-var="test_id=${{ env.TESTING_ID }}" \
-var="aws_region=${{ env.AWS_DEFAULT_REGION }}" \
-var="kube_directory_path=${{ github.workspace }}/.kube" \
-var="eks_cluster_name=${{ inputs.test-java-cluster-name }}" \
-var="test_namespace=${{ env.SAMPLE_APP_NAMESPACE }}" \
-var="service_account_aws_access=service-account-${{ env.TESTING_ID }}" \
-var="sample_app_image=${{ env.SAMPLE_APP_IMAGE }}"
- name: Remove aws access service account
if: always()
continue-on-error: true
run: |
eksctl delete iamserviceaccount \
--name service-account-${{ env.TESTING_ID }} \
--namespace ${{ env.SAMPLE_APP_NAMESPACE }} \
--cluster ${{ inputs.test-java-cluster-name }} \
--region ${{ env.AWS_DEFAULT_REGION }}