Skip to content

CICD: Runs a full GPU install on an EC2 instance #1392

CICD: Runs a full GPU install on an EC2 instance

CICD: Runs a full GPU install on an EC2 instance #1392

Workflow file for this run

name: cicd
on:
pull_request:
branches:
- main
types: [opened, synchronize, reopened]
workflow_dispatch:
# This allows it to be triggered manually in the github console
# You could put inputs here, but we don't need them.
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
# This causes it to cancel previous in-progress actions in the same PR
cancel-in-progress: true
env:
PYTHON_VERSION: "3.11"
POETRY_VERSION: "1.8.3"
# This is the token associated with "prod-biggies" (with shared credentials on 1password)
GROUNDLIGHT_API_TOKEN: ${{ secrets.GROUNDLIGHT_API_TOKEN }}
# This is the NGINX proxy endpoint
GROUNDLIGHT_ENDPOINT: http://localhost:30101
jobs:
test-general-edge-endpoint:
runs-on: ubuntu-22.04
steps:
- name: Check out code
uses: actions/checkout@v3
- name: Set up python
id: setup_python
uses: actions/setup-python@v4
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Install poetry
uses: snok/install-poetry@v1
with:
version: ${{ env.POETRY_VERSION }}
virtualenvs-create: true
virtualenvs-in-project: true
installer-parallel: true
- name: Load Cached venv
id: cached-poetry-dependencies
uses: actions/cache@v3
with:
path: .venv
key: venv-${{ runner.os }}-${{ env.PYTHON_VERSION }}-${{ hashFiles('**/poetry.lock') }}
- name: Install edge-endpoint's python dependencies
run: |
poetry install --no-interaction --no-root
- name: Run Unit Tests (that dont require docker)
run: |
# Point these tests to the cloud endpoint, because to test the edge-endpoint, they
# dont actually issue requests. They interally setup a test client and server.
# The cloud endpoint is needed so that the /me endpoint succeeds and we can actually
# use the GL client.
GROUNDLIGHT_ENDPOINT="https://api.groundlight.ai/"
source test/setup_plain_test_env.sh
poetry run pytest -vs -k "not _live"
- name: Install Docker
run: |
sudo apt-get update
sudo apt-get remove moby-runc
sudo apt-get install apt-transport-https ca-certificates curl software-properties-common
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add -
sudo add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable"
sudo apt-get update
sudo apt-get install docker-ce
- name: Build Docker Image
run: docker build --tag groundlight-edge .
- name: Start Docker Container
id: start_container
run: |
source test/setup_plain_test_env.sh
echo "EDGE_CONFIG=$EDGE_CONFIG"
container_id=$(docker run \
-e LOG_LEVEL=DEBUG \
-e EDGE_CONFIG \
-d -p 30101:30101 \
groundlight-edge)
echo "::set-output name=container_id::$container_id"
- name: Run Unit Tests (that do require docker)
run: |
GROUNDLIGHT_ENDPOINT=http://localhost:30101
source test/setup_plain_test_env.sh
poetry run pytest -k "_live"
- name: Dump Logs from Docker Container
if: always()
run: docker logs ${{ steps.start_container.outputs.container_id }}
- name: Stop Docker Container
# This ensures that we always stop the container regardless of the outcomes of
# the previous steps
if: always()
run: docker stop ${{ steps.start_container.outputs.container_id }}
validate-setup-ee:
runs-on: ubuntu-22.04
steps:
- name: Check out code
uses: actions/checkout@v3
- name: Install k3s
run: |
./deploy/bin/install-k3s.sh
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
aws-access-key-id: ${{ secrets.AWS_GL_PUBLIC_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_GL_PUBLIC_SECRET_ACCESS_KEY }}
aws-region: us-west-2
- name: Install poetry
uses: snok/install-poetry@v1
with:
version: ${{ env.POETRY_VERSION }}
virtualenvs-create: true
virtualenvs-in-project: true
installer-parallel: true
- name: Install edge-endpoint's python dependencies
run: |
poetry install --no-interaction --no-root
- name: Validate setup edge endpoint
run: |
make validate-setup-ee
# we run this seperately from validate-setup-ee since we run out of disk space doing
# both of them on the same runner and they can be slow so its best to do them in parallel
test-with-k3s:
runs-on: ubuntu-22.04
steps:
- name: Check out code
uses: actions/checkout@v3
- name: Install k3s
run: |
./deploy/bin/install-k3s.sh
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
aws-access-key-id: ${{ secrets.AWS_GL_PUBLIC_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_GL_PUBLIC_SECRET_ACCESS_KEY }}
aws-region: us-west-2
- name: Install poetry
uses: snok/install-poetry@v1
with:
version: ${{ env.POETRY_VERSION }}
virtualenvs-create: true
virtualenvs-in-project: true
installer-parallel: true
- name: Install edge-endpoint's python dependencies
run: |
poetry install --no-interaction --no-root
- name: Run tests with k3s
run: |
make test-with-k3s
# Run Groundlight SDK tests against the edge proxy endpoint
test-sdk:
runs-on: ubuntu-22.04
steps:
- name: Check out code
uses: actions/checkout@v3
- name: Set up python
uses: actions/setup-python@v4
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Install Docker
run: |
sudo apt-get update
sudo apt-get remove moby-runc
sudo apt-get install apt-transport-https ca-certificates curl software-properties-common
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add -
sudo add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable"
sudo apt-get update
sudo apt-get install docker-ce
- name: Build Docker Image
run: docker build --tag groundlight-edge .
- name: Start Docker Container
id: start_container
run: |
source test/setup_plain_test_env.sh
echo "EDGE_CONFIG=$EDGE_CONFIG"
container_id=$(docker run \
-e LOG_LEVEL=DEBUG \
-e EDGE_CONFIG \
-d -p 30101:30101 \
groundlight-edge)
echo "::set-output name=container_id::$container_id"
- name: Install poetry
uses: snok/install-poetry@v1
with:
version: ${{ env.POETRY_VERSION }}
virtualenvs-create: true
virtualenvs-in-project: true
installer-parallel: true
- name: Load Cached venv
id: cached-poetry-dependencies
uses: actions/cache@v3
with:
path: .venv
key: venv-${{ runner.os }}-${{ env.PYTHON_VERSION }}-${{ hashFiles('**/poetry.lock') }}
# Note that we're pulling the latest main from the SDK repo
# This might be ahead of what's published to pypi, but it's useful to test things before they're released.
- name: Checkout Groundlight SDK
uses: actions/checkout@v3
with:
repository: groundlight/python-sdk
path: groundlight-sdk
- name: Install Groundlight SDK dependencies
run: |
cd groundlight-sdk
make install
- name: Run Groundlight SDK tests against Prod API via Edge Proxy Endpoint
run: |
cd groundlight-sdk
make test-4edge
cd ..
- name: Dump Logs from Docker Container
if: always()
run: docker logs ${{ steps.start_container.outputs.container_id }}
- name: Stop Docker Container
# This ensures that we always stop the container regardless of the outcomes of
# the previous steps
if: always()
run: docker stop ${{ steps.start_container.outputs.container_id }}
G4-end-to-end:
# Note this job can run multiple times in parallel because the stack name is unique
# for the run. How much we want to do this is TBD.
runs-on: self-hosted
# Run this on any PR.
# Question: Should we wait until the other tests pass before running this?
#needs:
# - validate-setup-ee
# - test-with-k3s
# - test-sdk
env:
PULUMI_ACCESS_TOKEN: ${{ secrets.PULUMI_CICD_PAT }}
PYTHONUNBUFFERED: 1
defaults:
run:
working-directory: cicd/pulumi
steps:
- name: Check out code
uses: actions/checkout@v3
- name: Name the stack
run: |
# Set to expire in 60 minutes
EXPIRATION_TIME=$(($(date +%s) + 60 * 60))
STACK_NAME=ee-cicd-${{ github.run_id }}-expires-${EXPIRATION_TIME}
echo "STACK_NAME=${STACK_NAME}" | tee -a $GITHUB_ENV
# We give the stack a name including its expiration time so that the sweeper
# (in sweeper-eeut.yaml) knows when to get rid of it.
# This saves us having to clean up here, which can be quite slow (~7 minutes for a g4)
- name: Check that aws credentials are set
# Credentials come from an IAM profile on the runner instance
run: |
aws sts get-caller-identity
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Install Pulumi
run: |
curl -fsSL https://get.pulumi.com | sh
export HOME=$(eval echo ~$(whoami))
echo "$HOME/.pulumi/bin" >> $GITHUB_PATH
- name: Install uv
uses: astral-sh/setup-uv@v5
- name: Make sure uv is working
run: |
uv --version
uv sync
uv run python --version
- name: Check that pulumi is installed and authenticated
run: |
uv run pulumi whoami
- name: Prepare and preview pulumi stack
run: |
uv run pulumi stack init ${STACK_NAME}
uv run pulumi config set ee-cicd:targetCommit ${{ github.sha }}
uv run pulumi config
uv run pulumi preview
- name: Create the EEUT
run: |
uv run pulumi up --yes
- name: Check that EE install succeeded
run: |
uv run fab connect --patience=150
uv run fab wait-for-ee-setup
- name: Wait for K8 to load everything
run: |
uv run fab check-k8-deployments
uv run fab check-server-port
- name: Use groundlight sdk through EE
run: |
EEUT_IP=$(uv run pulumi stack output eeut_private_ip)
export GROUNDLIGHT_ENDPOINT=http://${EEUT_IP}:30101
uv run groundlight whoami
uv run groundlight list-detectors
- name: Thank the worker and shut down
if: always()
run: |
echo "Strong work, G4! Now go to sleep. The grim sweeper will visit soon."
# This saves money and frees up resources
uv run fab shutdown-instance
build-push-edge-endpoint-multiplatform:
if: ${{ github.ref == 'refs/heads/main' || github.event_name == 'workflow_dispatch' }}
# We only run this action if all the prior test actions succeed
needs:
- test-general-edge-endpoint
- test-sdk
- validate-setup-ee
- G4-end-to-end
runs-on: ubuntu-22.04
steps:
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
aws-access-key-id: ${{ secrets.AWS_GL_PUBLIC_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_GL_PUBLIC_SECRET_ACCESS_KEY }}
aws-region: us-west-2
- name: Login to Amazon ECR
id: login-ecr
uses: aws-actions/amazon-ecr-login@v1
with:
mask-password: "true"
- name: Check out code
uses: actions/checkout@v4
- name: Build and Push Multiplatform edge-endpoint Image to ECR
timeout-minutes: 45
run: ./deploy/bin/build-push-edge-endpoint-image.sh
update-glhub:
if: github.ref == 'refs/heads/main'
needs: validate-setup-ee
runs-on: ubuntu-latest
environment: live
steps:
- name: Checkout glhub
uses: actions/checkout@v4
with:
repository: groundlight/glhub
token: ${{ secrets.BOT_GITHUB_TOKEN }}
path: glhub
- name: Update GLHub
run: |
cd glhub
git config --global user.email "[email protected]"
git config --global user.name "edge-glhub-bot"
git submodule update --init --recursive
git submodule update --remote edge-endpoint
git add .
git commit -m "Update edge endpoint submodule"
git push https://edge-glhub-bot:${{ secrets.BOT_GITHUB_TOKEN }}@github.com/groundlight/glhub.git main
env:
GIT_AUTHOR_NAME: "edge-glhub-bot"
GIT_AUTHOR_EMAIL: "[email protected]"