edge-inference on k3s on balena! (#64)

POC for edge-endpoint and inferenc-service successfully running on k3s on a machine with Balena OS.
groundlight · May 29, 2024 · 11febf6 · 11febf6
1 parent 1e65841
commit 11febf6
Show file tree

Hide file tree

Showing 14 changed files with 212 additions and 35 deletions.
diff --git a/.dockerignore b/.dockerignore
@@ -6,3 +6,4 @@
 Dockerfile
 README.md
 test_coverage_reports/
+deploy/balena-k3s/build
diff --git a/app/core/configs.py b/app/core/configs.py
@@ -34,7 +34,7 @@ class LocalInferenceConfig(BaseModel):
     enabled: bool = Field(False, description="Determines if local edge inference is enabled for a specific detector.")
     api_token: Optional[str] = Field(None, description="API token to fetch the inference model for this detector.")
     refresh_rate: float = Field(
-        120.0,
+        default=120.0,
         description=(
             "The refresh rate for the inference server (in seconds). This means how often to check for an updated model"
             " binary."

diff --git a/app/core/edge_inference.py b/app/core/edge_inference.py
@@ -159,6 +159,9 @@ def update_model(self, detector_id: str) -> bool:
             else None
         )
 
+        # fallback to env var if we dont have a token in the config
+        api_token = api_token or os.environ.get("GROUNDLIGHT_API_TOKEN", None)
+
         model_urls = fetch_model_urls(detector_id, api_token=api_token)
         cloud_binary_ksuid = model_urls.get("model_binary_id", None)
         if cloud_binary_ksuid is None:
@@ -245,7 +248,6 @@ def save_model_to_repository(
     os.makedirs(model_version_dir, exist_ok=True)
 
     # Add model-version specific files (model.py and model.buf)
-    # NOTE: these files should be static and not change between model versions
     create_file_from_template(
         template_values={"pipeline_config": pipeline_config},
         destination=os.path.join(model_version_dir, "model.py"),
@@ -258,6 +260,8 @@ def save_model_to_repository(
             f.write(binary_ksuid)
 
     # Add/Overwrite model configuration files (config.pbtxt and binary_labels.txt)
+    # Generally these files should be static. Changing them can make earlier
+    # model versions incompatible with newer ones.
     create_file_from_template(
         template_values={"model_name": detector_id},
         destination=os.path.join(model_dir, "config.pbtxt"),

diff --git a/balena.yml b/balena.yml
@@ -0,0 +1,7 @@
+# https://github.com/balena-io-experimental/balena-k3s/blob/main/balena.yml
+name: balena-k3s-edge-endpoint
+type: sw.application
+version: 0.2.8
+description: >-
+  Run the edge-endpoint on top of k3s on Balena. The inference-server
+  will also be deployed on the same k3s cluster.
diff --git a/deploy/balena-k3s/README.md b/deploy/balena-k3s/README.md
@@ -0,0 +1,26 @@
+# Running the edge-endpoint via k3s on a balena device
+
+## Setup
+Tested using an EC2 m6 instance with 64GB disk. Everything except for the triton inference server works on a RaspberryPi 5 (which has a 64bit OS and 8Gb RAM), but the inference server is too demanding for the RPi5.
+
+From the root of `edge-endpoint`, run:
+```bash
+balena login
+balena push <your-fleet>
+```
+This will build and push two "services" to the edge devices in your chosen fleet. The first is a [k3s server](https://docs.k3s.io/architecture) named `server`, which effectively acts as our k3s cluster node. The second is the `bastion` service, from which a user can access the k3s cluster (e.g. by running `kubectl get nodes`). The `bastion` service also contains a copy of this repo at `/app/edge-endpoint`.
+
+Now, we have our k3s cluster built and running, but we have not started our edge deployment.
+
+Configure the following variables via the `<fleet>/Variables` or `<device>/Device Variables` interfaces on the BalenaCloud dashboard:
+```
+GROUNDLIGHT_API_TOKEN - so that we can authorize the fetching of edge model binaries
+AWS_ACCESS_KEY_ID - so we can pull the edge-endpoint and gl-tritonserver images from ECR
+AWS_SECRET_ACCESS_KEY - needed along with AWS_ACCESS_KEY_ID
+```
+
+Now, ssh into `bastion` and run the following:
+```bash
+cd /app/edge-endpoint
+INFERENCE_FLAVOR="CPU" DEPLOYMENT_NAMESPACE="default" ./deploy/bin/cluster_setup.sh
+```
diff --git a/deploy/balena-k3s/bastion/Dockerfile b/deploy/balena-k3s/bastion/Dockerfile
@@ -0,0 +1,55 @@
+# https://github.com/balena-io-experimental/balena-k3s/blob/main/bastion/Dockerfile
+# ------- Build Stage -------
+FROM golang:1.21.0 AS arkade
+
+WORKDIR /src
+
+ARG ARKADE_VERSION=0.10.1
+ARG CGO_ENABLED=0
+
+ADD https://github.com/alexellis/arkade/archive/refs/tags/${ARKADE_VERSION}.tar.gz ./
+
+RUN tar xvf ${ARKADE_VERSION}.tar.gz --strip-components=1 && make build
+
+# ------- Runtime Stage -------
+FROM debian:bullseye-slim
+
+WORKDIR /app
+
+ENV DEBIAN_FRONTEND=noninteractive
+
+RUN apt-get update && apt-get install --no-install-recommends -y \
+    ca-certificates \
+    curl \
+    unzip \
+    dnsutils \
+    vim \
+    jq \
+    gettext-base && \
+    apt-get clean && rm -rf /var/lib/apt/lists/*
+
+# Note that currently this is only for ARM
+RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-aarch64.zip" -o "awscliv2.zip"
+RUN unzip awscliv2.zip && rm awscliv2.zip
+RUN ./aws/install
+RUN aws --version
+
+COPY --from=arkade /src/arkade /usr/local/bin/arkade
+
+ENV PATH "${PATH}:/root/.arkade/bin/"
+
+RUN arkade version && \
+    arkade get --progress=false \
+    [email protected] \
+    [email protected] \
+    [email protected] \
+    [email protected] \
+    [email protected]
+
+
+# Copy edge-endpoint to /app/edge-endpoint
+RUN mkdir -p /app/edge-endpoint
+COPY . /app/edge-endpoint
+
+ENTRYPOINT []
+CMD [ "tail" , "-f", "/dev/null" ]
diff --git a/deploy/balena-k3s/server/Dockerfile b/deploy/balena-k3s/server/Dockerfile
@@ -0,0 +1,10 @@
+# https://hub.docker.com/r/rancher/k3s/tags
+# https://github.com/k3s-io/k3s/blob/master/package/Dockerfile
+# https://github.com/balena-io-experimental/balena-k3s/blob/main/server/Dockerfile
+FROM rancher/k3s:v1.26.14-k3s1
+
+COPY server.sh /server.sh
+RUN chmod +x /server.sh
+
+ENTRYPOINT []
+CMD [ "/server.sh" ]
diff --git a/deploy/balena-k3s/server/server.sh b/deploy/balena-k3s/server/server.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+
+set -eu
+
+# https://docs.k3s.io/cli/server
+# https://docs.k3s.io/datastore/ha-embedded
+# https://github.com/balena-io-experimental/balena-k3s/blob/main/server/server.sh
+
+if [ -n "${K3S_URL:-}" ]; then
+    # shellcheck disable=SC2086
+    exec /bin/k3s server --server "${K3S_URL}" ${EXTRA_K3S_SERVER_ARGS:-}
+else
+    # shellcheck disable=SC2086
+    exec /bin/k3s server --cluster-init ${EXTRA_K3S_SERVER_ARGS:-}
+fi
diff --git a/deploy/bin/cluster_setup.sh b/deploy/bin/cluster_setup.sh
@@ -8,31 +8,26 @@
 # - detectors in the `inference_deployments` table
 # - image queries in the `image_queries_edge` table
 # For more on these tables you can examine the database file at
-# /opt/groundlight/edge/sqlite/sqlite.db on the attached volume (EFS/local). 
+# /opt/groundlight/edge/sqlite/sqlite.db on the attached volume (EFS/local).
 
 # Possible env vars:
 # - KUBECTL_CMD: path to kubectl command. Defaults to "kubectl" but can be set to "k3s kubectl" if using k3s
 # - INFERENCE_FLAVOR: "CPU" or "GPU". Defaults to "GPU"
 # - EDGE_CONFIG: contents of edge-config.yaml. If not set, will use configs/edge-config.yaml
-# - DEPLOY_LOCAL_VERSION: Indicates whether we are building the local version of the edge endpoint. 
+# - DEPLOY_LOCAL_VERSION: Indicates whether we are building the local version of the edge endpoint.
 #           If set to 0, we will attach an EFS instead of a local volume. Defaults to 1.
-# - EFS_VOLUME_ID: ID of the EFS volume to use if we are using the EFS version. 
+# - EFS_VOLUME_ID: ID of the EFS volume to use if we are using the EFS version.
 # - DEPLOYMENT_NAMESPACE: Namespace to deploy to. Defaults to the current namespace.
 
-
-
-# move to the root directory of the repo
-cd "$(dirname "$0")"/../..
-
 set -ex
 
 fail() {
     echo $1
     exit 1
 }
 
-# Function to check for conflicting PV. 
-# This is a robustness measure to guard against errors when a user tries to create a 
+# Function to check for conflicting PV.
+# This is a robustness measure to guard against errors when a user tries to create a
 # persistent volume with hostPath when we already have an EFS volume mounted or vice versa.
 check_pv_conflict() {
     local pv_name=$1
@@ -59,13 +54,16 @@ check_pv_conflict() {
 }
 
 
-K=${KUBECTL_CMD:-"k3s kubectl"}
+K=${KUBECTL_CMD:-"kubectl"}
 INFERENCE_FLAVOR=${INFERENCE_FLAVOR:-"GPU"}
 DB_RESET=$1
 DEPLOY_LOCAL_VERSION=${DEPLOY_LOCAL_VERSION:-1}
 DEPLOYMENT_NAMESPACE=${DEPLOYMENT_NAMESPACE:-$($K config view -o json | jq -r '.contexts[] | select(.name == "'$($K config current-context)'") | .context.namespace')}
 
 
+# move to the root directory of the repo
+cd "$(dirname "$0")"/../..
+
 # Secrets
 ./deploy/bin/make-aws-secret.sh
 
@@ -75,12 +73,20 @@ if ! $K get secret registry-credentials; then
 fi
 
 
-# Configmaps and deployments
+# Configmaps, secrets, and deployments
 $K delete configmap --ignore-not-found edge-config -n ${DEPLOYMENT_NAMESPACE}
 $K delete configmap --ignore-not-found inference-deployment-template -n ${DEPLOYMENT_NAMESPACE}
 $K delete configmap --ignore-not-found kubernetes-namespace -n ${DEPLOYMENT_NAMESPACE}
 $K delete configmap --ignore-not-found setup-db -n ${DEPLOYMENT_NAMESPACE}
 $K delete configmap --ignore-not-found db-reset -n ${DEPLOYMENT_NAMESPACE}
+$K delete secret --ignore-not-found groundlight-api-token -n ${DEPLOYMENT_NAMESPACE}
+
+set +x  # temporarily disable command echoing to avoid printing secrets
+if [[ -n "${GROUNDLIGHT_API_TOKEN}" ]]; then
+    echo "Creating groundlight-api-token secret"
+    $K create secret generic groundlight-api-token --from-literal=GROUNDLIGHT_API_TOKEN=${GROUNDLIGHT_API_TOKEN} -n ${DEPLOYMENT_NAMESPACE}
+fi
+set -x  # re-enable command echoing
 
 if [[ -n "${EDGE_CONFIG}" ]]; then
     echo "Creating config from EDGE_CONFIG env var"
@@ -136,7 +142,7 @@ if [[ "${DEPLOY_LOCAL_VERSION}" == "1" ]]; then
 
     $K apply -f deploy/k3s/local_persistent_volume.yaml
 else
-    # If environment variable EFS_VOLUME_ID is not set, exit 
+    # If environment variable EFS_VOLUME_ID is not set, exit
     if [[ -z "${EFS_VOLUME_ID}" ]]; then
         fail "EFS_VOLUME_ID environment variable not set"
     fi
@@ -154,7 +160,7 @@ fi
 
 # Check if the edge-endpoint-pvc exists. If not, create it
 if ! $K get pvc edge-endpoint-pvc; then
-    # If environment variable EFS_VOLUME_ID is not set, exit 
+    # If environment variable EFS_VOLUME_ID is not set, exit
     if [[ -z "${EFS_VOLUME_ID}" ]]; then
         fail "EFS_VOLUME_ID environment variable not set"
     fi

diff --git a/deploy/bin/make-aws-secret.sh b/deploy/bin/make-aws-secret.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 
-K=${KUBECTL_CMD:-"k3s kubectl"}
+K=${KUBECTL_CMD:-"kubectl"}
 
 if command -v docker >/dev/null 2>&1; then
     # Enable ECR login - make sure you have the aws client configured properly, or an IAM role

diff --git a/deploy/k3s/edge_deployment/edge_deployment.yaml b/deploy/k3s/edge_deployment/edge_deployment.yaml
@@ -40,12 +40,12 @@ spec:
       serviceAccountName: edge-endpoint-service-account
       initContainers:
       - name: database-prep
-        image: 723181461334.dkr.ecr.us-west-2.amazonaws.com/edge-endpoint:miscellaneous-c290a1810-dirty-8c48a5f159e9542
+        image: 723181461334.dkr.ecr.us-west-2.amazonaws.com/edge-endpoint:main-286f50eb5-dirty-3195280a7c7c28a
         imagePullPolicy: IfNotPresent
         env:
-        # Flag to indicate whether or not to reset all database tables. Resetting WILL delete 
+        # Flag to indicate whether or not to reset all database tables. Resetting WILL delete
         # all existing data in the database, so set this flag to 1 with caution.
-        - name: DB_RESET 
+        - name: DB_RESET
           valueFrom:
             configMapKeyRef:
               name: db-reset
@@ -59,7 +59,7 @@ spec:
 
       containers:
       - name: edge-endpoint
-        image: 723181461334.dkr.ecr.us-west-2.amazonaws.com/edge-endpoint:miscellaneous-c290a1810-dirty-8c48a5f159e9542
+        image: 723181461334.dkr.ecr.us-west-2.amazonaws.com/edge-endpoint:main-286f50eb5-dirty-3195280a7c7c28a
         imagePullPolicy: IfNotPresent
         ports:
         - containerPort: 6717
@@ -72,18 +72,18 @@ spec:
         # This feature flag is basically good for knowing when to use the python kubernetes API
         # (i.e., creating deployments, etc.). We don't want to use the python kubernetes API
         # if we are only running the edge logic server in docker.
-        # TODO: Once we have kubernetes-based tests, we can remove this feature flag. 
+        # TODO: Once we have kubernetes-based tests, we can remove this feature flag.
         - name: DEPLOY_DETECTOR_LEVEL_INFERENCE
           value: "1"
         volumeMounts:
         - name: edge-config-volume
-          mountPath: /etc/groundlight/edge-config 
+          mountPath: /etc/groundlight/edge-config
 
         - name: edge-endpoint-persistent-volume
           mountPath: /opt/groundlight/edge/sqlite
 
       - name: inference-model-updater
-        image: 723181461334.dkr.ecr.us-west-2.amazonaws.com/edge-endpoint:miscellaneous-c290a1810-dirty-8c48a5f159e9542
+        image: 723181461334.dkr.ecr.us-west-2.amazonaws.com/edge-endpoint:main-286f50eb5-dirty-3195280a7c7c28a
         imagePullPolicy: IfNotPresent
         command: ["/bin/bash", "-c"]
         args: ["poetry run python -m app.model_updater.update_models"]
@@ -92,6 +92,12 @@ spec:
           value: "INFO"
         - name: DEPLOY_DETECTOR_LEVEL_INFERENCE
           value: "1"
+        - name: GROUNDLIGHT_API_TOKEN
+          valueFrom:
+            secretKeyRef:
+              name: groundlight-api-token
+              key: GROUNDLIGHT_API_TOKEN
+              optional: true
         volumeMounts:
         - name: edge-config-volume
           mountPath: /etc/groundlight/edge-config
@@ -102,9 +108,9 @@ spec:
         - name: inference-deployment-template-volume
           mountPath: /etc/groundlight/inference-deployment
 
-        # In this setup the edge-endpoint-persistent-volume is mounted to 
+        # In this setup the edge-endpoint-persistent-volume is mounted to
         # two different paths in the inference-model-updater container.
-        # This allows the container to access both the sqlite database and 
+        # This allows the container to access both the sqlite database and
         # the path to the model repository without needing to create an extra PV and PVC.
         - name: edge-endpoint-persistent-volume
           mountPath: /opt/groundlight/edge/sqlite
@@ -119,7 +125,7 @@ spec:
       - name: edge-config-volume
         configMap:
           name: edge-config
-      - name: kubernetes-namespace 
+      - name: kubernetes-namespace
         configMap:
           name: kubernetes-namespace
       - name: inference-deployment-template-volume

diff --git a/deploy/k3s/inference_deployment/inference_deployment_template.yaml b/deploy/k3s/inference_deployment/inference_deployment_template.yaml
@@ -40,7 +40,7 @@ spec:
           maxUnavailable: 0  # Aim for no downtime during rollout
       containers:
       - name: inference-server
-        image: 723181461334.dkr.ecr.us-west-2.amazonaws.com/gl-tritonserver:929c52e9d-main
+        image: 723181461334.dkr.ecr.us-west-2.amazonaws.com/gl-tritonserver:63e8110ca-tyler-bumptorch
         imagePullPolicy: IfNotPresent
         # Tritonserver will look for models in /mnt/models and initialize them on startup.
         # When running multiple instances of Triton server on the same machine that use Python models,
@@ -88,4 +88,4 @@ spec:
       - name: dshm
         emptyDir:
           medium: Memory
-          sizeLimit: 512Mi
+          sizeLimit: 128Mi