groundlight · robotrapta · Jan 8, 2025 · Jan 9, 2025 · Jan 9, 2025 · Jan 15, 2025
@@ -12,3 +12,4 @@ rules:
   comments: disable
   trailing-spaces: disable
   empty-lines: disable
+  new-line-at-end-of-file: disable
@@ -21,3 +21,8 @@ paths:
       - '.*was deprecated.*'
       - '.*shellcheck.*:warning:.*'
       - '.*shellcheck.*:info:.*'
+
+      # The security warning of head.ref being dangerous is painfully stupid.
+      # It's worried that the commit hash string could be malicious.  (Never mind that
+      # an attacker generating PR's can much more easily just execute malicious code.)
+      - '.*github.event.pull_request.head.ref.*is potentially untrusted.*'
@@ -252,13 +252,130 @@ jobs:
         if: always()
         run: docker stop ${{ steps.start_container.outputs.container_id }}
 
+  G4-end-to-end:
+    # Note this job can run multiple times in parallel because the stack name is unique
+    # for the run.  How much we want to do this is TBD.
+    runs-on: self-hosted
+
+    # Run this on any PR.
+    # Question: Should we wait until the other tests pass before running this?
+    #needs:
+    #  - validate-setup-ee
+    #  - test-with-k3s
+    #  - test-sdk
+
+    env:
+      PULUMI_ACCESS_TOKEN: ${{ secrets.PULUMI_CICD_PAT }}
+      PYTHONUNBUFFERED: 1
+    defaults:
+      run:
+        working-directory: cicd/pulumi
+    steps:
+      - name: Check out code
+        uses: actions/checkout@v3
+
+      - name: Name the stack
+        run: |
+          # Set to expire in 60 minutes
+          EXPIRATION_TIME=$(($(date +%s) + 60 * 60))
+          STACK_NAME=ee-cicd-${{ github.run_id }}-expires-${EXPIRATION_TIME}
+          echo "STACK_NAME=${STACK_NAME}" | tee -a $GITHUB_ENV
+          # We give the stack a name including its expiration time so that the sweeper
+          # (in sweeper-eeut.yaml) knows when to get rid of it.
+          # This saves us having to clean up here, which can be quite slow (~7 minutes for a g4)
+
+      - name: Check that aws credentials are set
+        # Credentials come from an IAM profile on the runner instance
+        run: |
+          aws sts get-caller-identity
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ env.PYTHON_VERSION }}
+
+      - name: Install Pulumi
+        run: |
+          curl -fsSL https://get.pulumi.com | sh
+          export HOME=$(eval echo ~$(whoami))
+          echo "$HOME/.pulumi/bin" >> $GITHUB_PATH
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v5
+
+      - name: Make sure uv is working
+        run: |
+          uv --version
+          uv sync
+          uv run python --version
+
+      - name: Check that pulumi is installed and authenticated
+        run: |
+          uv run pulumi whoami
+
+      - name: Prepare pulumi stack
+        run: |
+          uv run pulumi stack init ${STACK_NAME}
+          uv run pulumi config
+
+      - name: Pick which commit we will test
+        run: |
+          echo "This is a bit subtle."
+          echo "We can't just test on 'main' for fairly obvious reasons - we"
+          echo "want to test the code in this PR's branch. The current commit"
+          echo "right here is ${GITHUB_SHA}, which is likely a merge commit."
+          echo "Merge commits are challenging. They are what would happen if"
+          echo "this PR were to be merged into its base branch. But they are"
+          echo "ephemeral things and not available in the public repo. So the"
+          echo "EEUT can't just check them out. Making them available to the"
+          echo "EEUT would require pushing them and polluting the repo. So,"
+          echo "for now, we are going to use the PR's head ref"
+          echo "${{ github.event.pull_request.head.ref }}, which is the commit"
+          echo "that was used to create the PR. Recognizing that this doesn't"
+          echo "reflect what will happen after merge. But it's simpler."
+
+          # TODO: test on the merge commit by pushing it to the repo with a temporary
+          # branch, and then clean up the branch later.
+
+          COMMIT_TO_TEST=${{ github.event.pull_request.head.ref }}
+          uv run pulumi config set ee-cicd:targetCommit ${COMMIT_TO_TEST}
+
+      - name: Create the EEUT instance
+        run: |
+          uv run pulumi up --yes 
+
+      - name: Check that EE install succeeded
+        run: |
+          uv run fab connect --patience=150
+          uv run fab wait-for-ee-setup
+
+      - name: Wait for K8 to load everything
+        run: |
+          uv run fab check-k8-deployments
+          uv run fab check-server-port
+
+      - name: Use groundlight sdk through EE
+        run: |
+          EEUT_IP=$(uv run pulumi stack output eeut_private_ip)
+          export GROUNDLIGHT_ENDPOINT=http://${EEUT_IP}:30101
+          uv run groundlight whoami
+          uv run groundlight list-detectors
+
+      - name: Thank the worker and shut down
+        if: always()
+        run: |
+          echo "Strong work, G4! Now go to sleep. The grim sweeper will visit soon."
+          # This saves money and frees up resources
+          uv run fab shutdown-instance
+
   build-push-edge-endpoint-multiplatform:
     if: ${{ github.ref == 'refs/heads/main' || github.event_name == 'workflow_dispatch' }}
     # We only run this action if all the prior test actions succeed
     needs:
       - test-general-edge-endpoint
       - test-sdk
       - validate-setup-ee
+      - G4-end-to-end
     runs-on: ubuntu-22.04
     steps:
       - name: Configure AWS credentials

@@ -0,0 +1,61 @@
+name: sweeper-eeut
+# This workflow tears down old EEUT stacks from pulumi.
+# We do this as a background sweeper job, because the teardown is VERY slow (~7 minutes for a g4)
+# and we don't want to slow down the main pipeline for that.
+on:
+  schedule:
+    - cron: '*/15 * * * *'  # Every 15 minutes
+      # Note cron workflows only run from the main branch.
+  push:
+    branches:
+      # If you're working on this stuff, name your branch e2e-something and this will run.
+      - e2e*
+concurrency:
+  group: sweeper-eeut
+env:
+  PYTHON_VERSION: "3.11"
+
+jobs:
+  destroy-expired-eeut-stacks:
+    #runs-on: ubuntu-22.04  # preferably
+    # Currently running on self-hosted because something is wrong with the AWS perms on the GH runners.
+    runs-on: self-hosted
+    env:
+      PULUMI_ACCESS_TOKEN: ${{ secrets.PULUMI_CICD_PAT }}
+    defaults:
+      run:
+        working-directory: cicd/pulumi
+    steps:
+      - name: Check out code
+        uses: actions/checkout@v3
+
+      - name: Set AWS credentials
+        uses: aws-actions/configure-aws-credentials@v2
+        with:
+          aws-region: us-west-2
+          # TODO: move these back to GH-provided secrets
+          # Currently using IAM roles on the self-hosted runner instance.
+          #aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
+          #aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+          #aws-session-token: ${{ secrets.AWS_SESSION_TOKEN }}
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ env.PYTHON_VERSION }}
+
+      - name: Install Pulumi
+        run: |
+          curl -fsSL https://get.pulumi.com | sh
+          export HOME=$(eval echo ~$(whoami))
+          echo "$HOME/.pulumi/bin" >> $GITHUB_PATH
+
+      - name: Check that pulumi is installed and authenticated
+        run: |
+          set -ex
+          pulumi whoami
+
+      - name: Destroy old EEUT stacks
+        working-directory: cicd/pulumi
+        run: |
+          ./sweep-destroy-eeut-stacks.sh
@@ -0,0 +1,102 @@
+#! /bin/bash
+# This script is intended to run on a new ubuntu instance to set it up 
+# Sets up an edge-endpoint environment.
+# It is tested in the CICD pipeline to install the edge-endpoint on a new 
+# g4dn.xlarge EC2 instance with Ubuntu 22.04LTS.
+
+# As a user-data script on ubuntu, this file probably lands at
+# /var/lib/cloud/instance/user-data.txt
+echo "Setting up Groundlight Edge Endpoint.  Follow along at /var/log/cloud-init-output.log" > /etc/motd
+
+echo "Starting cloud init.  Uptime: $(uptime)"
+
+# Set up signals about the status of the installation
+mkdir -p /opt/groundlight/ee-install-status
+touch /opt/groundlight/ee-install-status/installing
+SETUP_COMPLETE=0
+record_result() {
+    if [ "$SETUP_COMPLETE" -eq 0 ]; then
+        echo "Setup failed at $(date)"
+        touch /opt/groundlight/ee-install-status/failed
+        echo "Groundlight Edge Endpoint setup FAILED.  See /var/log/cloud-init-output.log for details." > /etc/motd
+    else
+        echo "Setup complete at $(date)"
+        echo "Groundlight Edge Endpoint setup complete.  See /var/log/cloud-init-output.log for details." > /etc/motd
+        touch /opt/groundlight/ee-install-status/success
+    fi
+    # Remove "installing" at the end to avoid a race where there is no status
+    rm -f /opt/groundlight/ee-install-status/installing
+}
+trap record_result EXIT
+
+set -e  # Exit on error of any command.
+
+wait_for_apt_lock() {
+    # We wait for any apt or dpkg processes to finish to avoid lock collisions
+    # Unattended-upgrades can hold the lock and cause the install to fail
+    while sudo fuser /var/lib/dpkg/lock-frontend >/dev/null 2>&1; do
+        echo "Another apt/dpkg process is running. Waiting for it to finish..."
+        sleep 5
+    done
+}
+
+# Install basic tools
+wait_for_apt_lock
+sudo apt update
+wait_for_apt_lock
+sudo apt install -y \
+    git \
+    vim \
+    tmux \
+    htop \
+    curl \
+    wget \
+    tree \
+    bash-completion \
+    ffmpeg
+
+# Download the edge-endpoint code
+CODE_BASE=/opt/groundlight/src/
+mkdir -p ${CODE_BASE}
+cd ${CODE_BASE}
+git clone https://github.com/groundlight/edge-endpoint
+cd edge-endpoint/
+# The launching script should update this to a specific commit.
+SPECIFIC_COMMIT="__EE_COMMIT_HASH__"
+if [ -n "$SPECIFIC_COMMIT" ]; then
+    # See if the string got substituted.  Note can't compare to the whole thing
+    # because that would be substituted too!
+    if [ "${SPECIFIC_COMMIT:0:10}" != "__EE_COMMIT" ]; then
+        echo "Checking out commit ${SPECIFIC_COMMIT}"
+        git checkout ${SPECIFIC_COMMIT}
+    else
+        echo "It appears the commit hash was not substituted.  Staying on main."
+    fi
+else
+    echo "A blank commit hash was provided.  Staying on main."
+fi
+
+# Set up k3s with GPU support
+./deploy/bin/install-k3s-nvidia.sh
+
+# Set up some shell niceties
+TARGET_USER="ubuntu"
+echo "alias k='kubectl'" >> /home/${TARGET_USER}/.bashrc
+echo "source <(kubectl completion bash)" >> /home/${TARGET_USER}/.bashrc
+echo "complete -F __start_kubectl k" >> /home/${TARGET_USER}/.bashrc
+echo "set -o vi" >> /home/${TARGET_USER}/.bashrc
+
+# Configure the edge-endpoint with environment variables
+export DEPLOYMENT_NAMESPACE="gl-edge"
+export INFERENCE_FLAVOR="GPU"
+export GROUNDLIGHT_API_TOKEN="api_token_not_set"
+
+# Install the edge-endpoint
+kubectl create namespace gl-edge
+kubectl config set-context edge --namespace=gl-edge --cluster=default --user=default
+kubectl config use-context edge
+./deploy/bin/setup-ee.sh
+
+# Indicate that setup is complete
+SETUP_COMPLETE=1
+echo "EE is installed into kubernetes, which will attempt to finish the setup."
@@ -0,0 +1,3 @@
+echo "This is a uv project.  Remember to 'uv run ...' everything"
+uv sync
+
@@ -0,0 +1,5 @@
+
+*.pyc
+venv/
+.venv/
+__pycache__/
@@ -0,0 +1,11 @@
+name: ee-cicd
+runtime:
+  name: python
+  options:
+    toolchain: uv
+description: CI/CD for Edge Endpoint
+config:
+  ee-cicd:instanceType: g4dn.xlarge
+  # Default to "main" so things are sensible if this doesn't get customized.
+  # But for testing purposes, this should be set to the specific commit you want to test.
+  ee-cicd:targetCommit: main
@@ -0,0 +1,5 @@
+# Pulumi automation
+
+Pulumi automation to build an EE from scratch in EC2 and run basic integration tests.
+
+
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		echo "This is a uv project. Remember to 'uv run ...' everything"
Copy link Member tyler-romero Jan 22, 2025 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others. Learn more. Oooohhh
		uv sync
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,5 @@
		# Pulumi automation

		Pulumi automation to build an EE from scratch in EC2 and run basic integration tests.