Skip to content

Commit

Permalink
Refactor workflow to streamline instance startup and monitoring:
Browse files Browse the repository at this point in the history
- Added snapshot readiness check for volume creation
- Enhanced logging for detailed instance state tracking
- Extended timeout for instance status checks to 10 minutes
  • Loading branch information
lola831 committed Oct 15, 2024
1 parent 1a4eff9 commit 338835b
Showing 1 changed file with 32 additions and 3 deletions.
35 changes: 32 additions & 3 deletions .github/workflows/run-simulators.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,19 @@ jobs:
run: |
# Retrieve the latest snapshot ID
LATEST_SNAPSHOT_ID=$(aws ec2 describe-snapshots --owner-ids self --query 'Snapshots | sort_by(@, &StartTime) | [-1].SnapshotId' --output text)
echo "Using latest snapshot with ID: $LATEST_SNAPSHOT_ID"
echo "Checking availability for snapshot: $LATEST_SNAPSHOT_ID"
# Wait until snapshot is in 'completed' status
while true; do
snapshot_status=$(aws ec2 describe-snapshots --snapshot-ids $LATEST_SNAPSHOT_ID --query 'Snapshots[0].State' --output text)
if [ "$snapshot_status" == "completed" ]; then
echo "Snapshot is ready."
break
else
echo "Snapshot still in $snapshot_status state, waiting..."
sleep 10
fi
done
# Create a new volume from the latest snapshot
volume_id=$(aws ec2 create-volume --snapshot-id $LATEST_SNAPSHOT_ID --availability-zone us-west-1b --volume-type standard --size 100 --query "VolumeId" --output text)
Expand Down Expand Up @@ -67,8 +79,25 @@ jobs:
exit 1
fi
# wait for status checks to pass
TIMEOUT=300 # Timeout in seconds
- name: Get and Log Instance State
run: |
# Capture detailed instance status
instance_state=$(aws ec2 describe-instances --instance-ids $INSTANCE_ID | jq -r '.Reservations[].Instances[].State.Name')
instance_status=$(aws ec2 describe-instance-status --instance-ids $INSTANCE_ID | jq -r '.InstanceStatuses[0].InstanceStatus.Status')
system_status=$(aws ec2 describe-instance-status --instance-ids $INSTANCE_ID | jq -r '.InstanceStatuses[0].SystemStatus.Status')
echo "Instance State: $instance_state"
echo "Instance Status: $instance_status"
echo "System Status: $system_status"
# Check for any errors in status
if [[ "$instance_status" != "ok" || "$system_status" != "ok" ]]; then
echo "Instance failed to initialize correctly. Exiting job with failure."
exit 1
fi
- name: Wait for Status Checks to Pass
run: |
TIMEOUT=600 # Timeout in seconds
START_TIME=$(date +%s)
END_TIME=$((START_TIME + TIMEOUT))
while true; do
Expand Down

0 comments on commit 338835b

Please sign in to comment.