diff --git a/.dockerignore b/.dockerignore
index f08dfcb4..d28930bb 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -6,3 +6,4 @@
 Dockerfile
 README.md
 test_coverage_reports/
+deploy/balena-k3s/build
\ No newline at end of file
diff --git a/app/core/configs.py b/app/core/configs.py
index c009f74c..64063b28 100644
--- a/app/core/configs.py
+++ b/app/core/configs.py
@@ -34,7 +34,7 @@ class LocalInferenceConfig(BaseModel):
     enabled: bool = Field(False, description="Determines if local edge inference is enabled for a specific detector.")
     api_token: Optional[str] = Field(None, description="API token to fetch the inference model for this detector.")
     refresh_rate: float = Field(
-        120.0,
+        default=120.0,
         description=(
             "The refresh rate for the inference server (in seconds). This means how often to check for an updated model"
             " binary."
diff --git a/app/core/edge_inference.py b/app/core/edge_inference.py
index 6af5973c..abb6dcd3 100644
--- a/app/core/edge_inference.py
+++ b/app/core/edge_inference.py
@@ -159,6 +159,9 @@ def update_model(self, detector_id: str) -> bool:
             else None
         )
 
+        # fallback to env var if we dont have a token in the config
+        api_token = api_token or os.environ.get("GROUNDLIGHT_API_TOKEN", None)
+
         model_urls = fetch_model_urls(detector_id, api_token=api_token)
         cloud_binary_ksuid = model_urls.get("model_binary_id", None)
         if cloud_binary_ksuid is None:
@@ -245,7 +248,6 @@ def save_model_to_repository(
     os.makedirs(model_version_dir, exist_ok=True)
 
     # Add model-version specific files (model.py and model.buf)
-    # NOTE: these files should be static and not change between model versions
     create_file_from_template(
         template_values={"pipeline_config": pipeline_config},
         destination=os.path.join(model_version_dir, "model.py"),
@@ -258,6 +260,8 @@ def save_model_to_repository(
             f.write(binary_ksuid)
 
     # Add/Overwrite model configuration files (config.pbtxt and binary_labels.txt)
+    # Generally these files should be static. Changing them can make earlier
+    # model versions incompatible with newer ones.
     create_file_from_template(
         template_values={"model_name": detector_id},
         destination=os.path.join(model_dir, "config.pbtxt"),
diff --git a/balena.yml b/balena.yml
new file mode 100644
index 00000000..6a78f3e0
--- /dev/null
+++ b/balena.yml
@@ -0,0 +1,7 @@
+# https://github.com/balena-io-experimental/balena-k3s/blob/main/balena.yml
+name: balena-k3s-edge-endpoint
+type: sw.application
+version: 0.2.8
+description: >-
+  Run the edge-endpoint on top of k3s on Balena. The inference-server
+  will also be deployed on the same k3s cluster.
\ No newline at end of file
diff --git a/deploy/balena-k3s/README.md b/deploy/balena-k3s/README.md
new file mode 100644
index 00000000..54b907af
--- /dev/null
+++ b/deploy/balena-k3s/README.md
@@ -0,0 +1,26 @@
+# Running the edge-endpoint via k3s on a balena device
+
+## Setup
+Tested using an EC2 m6 instance with 64GB disk. Everything except for the triton inference server works on a RaspberryPi 5 (which has a 64bit OS and 8Gb RAM), but the inference server is too demanding for the RPi5.
+
+From the root of `edge-endpoint`, run:
+```bash
+balena login
+balena push <your-fleet>
+```
+This will build and push two "services" to the edge devices in your chosen fleet. The first is a [k3s server](https://docs.k3s.io/architecture) named `server`, which effectively acts as our k3s cluster node. The second is the `bastion` service, from which a user can access the k3s cluster (e.g. by running `kubectl get nodes`). The `bastion` service also contains a copy of this repo at `/app/edge-endpoint`.
+
+Now, we have our k3s cluster built and running, but we have not started our edge deployment.
+
+Configure the following variables via the `<fleet>/Variables` or `<device>/Device Variables` interfaces on the BalenaCloud dashboard:
+```
+GROUNDLIGHT_API_TOKEN - so that we can authorize the fetching of edge model binaries
+AWS_ACCESS_KEY_ID - so we can pull the edge-endpoint and gl-tritonserver images from ECR
+AWS_SECRET_ACCESS_KEY - needed along with AWS_ACCESS_KEY_ID
+```
+
+Now, ssh into `bastion` and run the following:
+```bash
+cd /app/edge-endpoint
+INFERENCE_FLAVOR="CPU" DEPLOYMENT_NAMESPACE="default" ./deploy/bin/cluster_setup.sh
+```
diff --git a/deploy/balena-k3s/bastion/Dockerfile b/deploy/balena-k3s/bastion/Dockerfile
new file mode 100644
index 00000000..22aa6847
--- /dev/null
+++ b/deploy/balena-k3s/bastion/Dockerfile
@@ -0,0 +1,55 @@
+# https://github.com/balena-io-experimental/balena-k3s/blob/main/bastion/Dockerfile
+# ------- Build Stage -------
+FROM golang:1.21.0 AS arkade
+
+WORKDIR /src
+
+ARG ARKADE_VERSION=0.10.1
+ARG CGO_ENABLED=0
+
+ADD https://github.com/alexellis/arkade/archive/refs/tags/${ARKADE_VERSION}.tar.gz ./
+
+RUN tar xvf ${ARKADE_VERSION}.tar.gz --strip-components=1 && make build
+
+# ------- Runtime Stage -------
+FROM debian:bullseye-slim
+
+WORKDIR /app
+
+ENV DEBIAN_FRONTEND=noninteractive
+
+RUN apt-get update && apt-get install --no-install-recommends -y \
+    ca-certificates \
+    curl \
+    unzip \
+    dnsutils \
+    vim \
+    jq \
+    gettext-base && \
+    apt-get clean && rm -rf /var/lib/apt/lists/*
+
+# Note that currently this is only for ARM
+RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-aarch64.zip" -o "awscliv2.zip"
+RUN unzip awscliv2.zip && rm awscliv2.zip
+RUN ./aws/install
+RUN aws --version
+
+COPY --from=arkade /src/arkade /usr/local/bin/arkade
+
+ENV PATH "${PATH}:/root/.arkade/bin/"
+
+RUN arkade version && \
+    arkade get --progress=false \
+    flux@v0.39.0 \
+    helm@v3.11.1 \
+    k3sup@0.12.12 \
+    k9s@v0.27.2 \
+    kubectl@v1.26.2
+
+
+# Copy edge-endpoint to /app/edge-endpoint
+RUN mkdir -p /app/edge-endpoint
+COPY . /app/edge-endpoint
+
+ENTRYPOINT []
+CMD [ "tail" , "-f", "/dev/null" ]
\ No newline at end of file
diff --git a/deploy/balena-k3s/server/Dockerfile b/deploy/balena-k3s/server/Dockerfile
new file mode 100644
index 00000000..262ab441
--- /dev/null
+++ b/deploy/balena-k3s/server/Dockerfile
@@ -0,0 +1,10 @@
+# https://hub.docker.com/r/rancher/k3s/tags
+# https://github.com/k3s-io/k3s/blob/master/package/Dockerfile
+# https://github.com/balena-io-experimental/balena-k3s/blob/main/server/Dockerfile
+FROM rancher/k3s:v1.26.14-k3s1
+
+COPY server.sh /server.sh
+RUN chmod +x /server.sh
+
+ENTRYPOINT []
+CMD [ "/server.sh" ]
\ No newline at end of file
diff --git a/deploy/balena-k3s/server/server.sh b/deploy/balena-k3s/server/server.sh
new file mode 100644
index 00000000..9c1cb3e2
--- /dev/null
+++ b/deploy/balena-k3s/server/server.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+
+set -eu
+
+# https://docs.k3s.io/cli/server
+# https://docs.k3s.io/datastore/ha-embedded
+# https://github.com/balena-io-experimental/balena-k3s/blob/main/server/server.sh
+
+if [ -n "${K3S_URL:-}" ]; then
+    # shellcheck disable=SC2086
+    exec /bin/k3s server --server "${K3S_URL}" ${EXTRA_K3S_SERVER_ARGS:-}
+else
+    # shellcheck disable=SC2086
+    exec /bin/k3s server --cluster-init ${EXTRA_K3S_SERVER_ARGS:-}
+fi
\ No newline at end of file
diff --git a/deploy/bin/cluster_setup.sh b/deploy/bin/cluster_setup.sh
index f4181f23..3c749f12 100755
--- a/deploy/bin/cluster_setup.sh
+++ b/deploy/bin/cluster_setup.sh
@@ -8,22 +8,17 @@
 # - detectors in the `inference_deployments` table
 # - image queries in the `image_queries_edge` table
 # For more on these tables you can examine the database file at
-# /opt/groundlight/edge/sqlite/sqlite.db on the attached volume (EFS/local). 
+# /opt/groundlight/edge/sqlite/sqlite.db on the attached volume (EFS/local).
 
 # Possible env vars:
 # - KUBECTL_CMD: path to kubectl command. Defaults to "kubectl" but can be set to "k3s kubectl" if using k3s
 # - INFERENCE_FLAVOR: "CPU" or "GPU". Defaults to "GPU"
 # - EDGE_CONFIG: contents of edge-config.yaml. If not set, will use configs/edge-config.yaml
-# - DEPLOY_LOCAL_VERSION: Indicates whether we are building the local version of the edge endpoint. 
+# - DEPLOY_LOCAL_VERSION: Indicates whether we are building the local version of the edge endpoint.
 #           If set to 0, we will attach an EFS instead of a local volume. Defaults to 1.
-# - EFS_VOLUME_ID: ID of the EFS volume to use if we are using the EFS version. 
+# - EFS_VOLUME_ID: ID of the EFS volume to use if we are using the EFS version.
 # - DEPLOYMENT_NAMESPACE: Namespace to deploy to. Defaults to the current namespace.
 
-
-
-# move to the root directory of the repo
-cd "$(dirname "$0")"/../..
-
 set -ex
 
 fail() {
@@ -31,8 +26,8 @@ fail() {
     exit 1
 }
 
-# Function to check for conflicting PV. 
-# This is a robustness measure to guard against errors when a user tries to create a 
+# Function to check for conflicting PV.
+# This is a robustness measure to guard against errors when a user tries to create a
 # persistent volume with hostPath when we already have an EFS volume mounted or vice versa.
 check_pv_conflict() {
     local pv_name=$1
@@ -59,13 +54,16 @@ check_pv_conflict() {
 }
 
 
-K=${KUBECTL_CMD:-"k3s kubectl"}
+K=${KUBECTL_CMD:-"kubectl"}
 INFERENCE_FLAVOR=${INFERENCE_FLAVOR:-"GPU"}
 DB_RESET=$1
 DEPLOY_LOCAL_VERSION=${DEPLOY_LOCAL_VERSION:-1}
 DEPLOYMENT_NAMESPACE=${DEPLOYMENT_NAMESPACE:-$($K config view -o json | jq -r '.contexts[] | select(.name == "'$($K config current-context)'") | .context.namespace')}
 
 
+# move to the root directory of the repo
+cd "$(dirname "$0")"/../..
+
 # Secrets
 ./deploy/bin/make-aws-secret.sh
 
@@ -75,12 +73,20 @@ if ! $K get secret registry-credentials; then
 fi
 
 
-# Configmaps and deployments
+# Configmaps, secrets, and deployments
 $K delete configmap --ignore-not-found edge-config -n ${DEPLOYMENT_NAMESPACE}
 $K delete configmap --ignore-not-found inference-deployment-template -n ${DEPLOYMENT_NAMESPACE}
 $K delete configmap --ignore-not-found kubernetes-namespace -n ${DEPLOYMENT_NAMESPACE}
 $K delete configmap --ignore-not-found setup-db -n ${DEPLOYMENT_NAMESPACE}
 $K delete configmap --ignore-not-found db-reset -n ${DEPLOYMENT_NAMESPACE}
+$K delete secret --ignore-not-found groundlight-api-token -n ${DEPLOYMENT_NAMESPACE}
+
+set +x  # temporarily disable command echoing to avoid printing secrets
+if [[ -n "${GROUNDLIGHT_API_TOKEN}" ]]; then
+    echo "Creating groundlight-api-token secret"
+    $K create secret generic groundlight-api-token --from-literal=GROUNDLIGHT_API_TOKEN=${GROUNDLIGHT_API_TOKEN} -n ${DEPLOYMENT_NAMESPACE}
+fi
+set -x  # re-enable command echoing
 
 if [[ -n "${EDGE_CONFIG}" ]]; then
     echo "Creating config from EDGE_CONFIG env var"
@@ -136,7 +142,7 @@ if [[ "${DEPLOY_LOCAL_VERSION}" == "1" ]]; then
 
     $K apply -f deploy/k3s/local_persistent_volume.yaml
 else
-    # If environment variable EFS_VOLUME_ID is not set, exit 
+    # If environment variable EFS_VOLUME_ID is not set, exit
     if [[ -z "${EFS_VOLUME_ID}" ]]; then
         fail "EFS_VOLUME_ID environment variable not set"
     fi
@@ -154,7 +160,7 @@ fi
 
 # Check if the edge-endpoint-pvc exists. If not, create it
 if ! $K get pvc edge-endpoint-pvc; then
-    # If environment variable EFS_VOLUME_ID is not set, exit 
+    # If environment variable EFS_VOLUME_ID is not set, exit
     if [[ -z "${EFS_VOLUME_ID}" ]]; then
         fail "EFS_VOLUME_ID environment variable not set"
     fi
diff --git a/deploy/bin/make-aws-secret.sh b/deploy/bin/make-aws-secret.sh
index ee4ee54a..62811723 100755
--- a/deploy/bin/make-aws-secret.sh
+++ b/deploy/bin/make-aws-secret.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 
-K=${KUBECTL_CMD:-"k3s kubectl"}
+K=${KUBECTL_CMD:-"kubectl"}
 
 if command -v docker >/dev/null 2>&1; then
     # Enable ECR login - make sure you have the aws client configured properly, or an IAM role
diff --git a/deploy/k3s/edge_deployment/edge_deployment.yaml b/deploy/k3s/edge_deployment/edge_deployment.yaml
index 960a7319..727162be 100644
--- a/deploy/k3s/edge_deployment/edge_deployment.yaml
+++ b/deploy/k3s/edge_deployment/edge_deployment.yaml
@@ -40,12 +40,12 @@ spec:
       serviceAccountName: edge-endpoint-service-account
       initContainers:
       - name: database-prep
-        image: 723181461334.dkr.ecr.us-west-2.amazonaws.com/edge-endpoint:miscellaneous-c290a1810-dirty-8c48a5f159e9542
+        image: 723181461334.dkr.ecr.us-west-2.amazonaws.com/edge-endpoint:main-286f50eb5-dirty-3195280a7c7c28a
         imagePullPolicy: IfNotPresent
         env:
-        # Flag to indicate whether or not to reset all database tables. Resetting WILL delete 
+        # Flag to indicate whether or not to reset all database tables. Resetting WILL delete
         # all existing data in the database, so set this flag to 1 with caution.
-        - name: DB_RESET 
+        - name: DB_RESET
           valueFrom:
             configMapKeyRef:
               name: db-reset
@@ -59,7 +59,7 @@ spec:
 
       containers:
       - name: edge-endpoint
-        image: 723181461334.dkr.ecr.us-west-2.amazonaws.com/edge-endpoint:miscellaneous-c290a1810-dirty-8c48a5f159e9542
+        image: 723181461334.dkr.ecr.us-west-2.amazonaws.com/edge-endpoint:main-286f50eb5-dirty-3195280a7c7c28a
         imagePullPolicy: IfNotPresent
         ports:
         - containerPort: 6717
@@ -72,18 +72,18 @@ spec:
         # This feature flag is basically good for knowing when to use the python kubernetes API
         # (i.e., creating deployments, etc.). We don't want to use the python kubernetes API
         # if we are only running the edge logic server in docker.
-        # TODO: Once we have kubernetes-based tests, we can remove this feature flag. 
+        # TODO: Once we have kubernetes-based tests, we can remove this feature flag.
         - name: DEPLOY_DETECTOR_LEVEL_INFERENCE
           value: "1"
         volumeMounts:
         - name: edge-config-volume
-          mountPath: /etc/groundlight/edge-config 
+          mountPath: /etc/groundlight/edge-config
 
         - name: edge-endpoint-persistent-volume
           mountPath: /opt/groundlight/edge/sqlite
 
       - name: inference-model-updater
-        image: 723181461334.dkr.ecr.us-west-2.amazonaws.com/edge-endpoint:miscellaneous-c290a1810-dirty-8c48a5f159e9542
+        image: 723181461334.dkr.ecr.us-west-2.amazonaws.com/edge-endpoint:main-286f50eb5-dirty-3195280a7c7c28a
         imagePullPolicy: IfNotPresent
         command: ["/bin/bash", "-c"]
         args: ["poetry run python -m app.model_updater.update_models"]
@@ -92,6 +92,12 @@ spec:
           value: "INFO"
         - name: DEPLOY_DETECTOR_LEVEL_INFERENCE
           value: "1"
+        - name: GROUNDLIGHT_API_TOKEN
+          valueFrom:
+            secretKeyRef:
+              name: groundlight-api-token
+              key: GROUNDLIGHT_API_TOKEN
+              optional: true
         volumeMounts:
         - name: edge-config-volume
           mountPath: /etc/groundlight/edge-config
@@ -102,9 +108,9 @@ spec:
         - name: inference-deployment-template-volume
           mountPath: /etc/groundlight/inference-deployment
 
-        # In this setup the edge-endpoint-persistent-volume is mounted to 
+        # In this setup the edge-endpoint-persistent-volume is mounted to
         # two different paths in the inference-model-updater container.
-        # This allows the container to access both the sqlite database and 
+        # This allows the container to access both the sqlite database and
         # the path to the model repository without needing to create an extra PV and PVC.
         - name: edge-endpoint-persistent-volume
           mountPath: /opt/groundlight/edge/sqlite
@@ -119,7 +125,7 @@ spec:
       - name: edge-config-volume
         configMap:
           name: edge-config
-      - name: kubernetes-namespace 
+      - name: kubernetes-namespace
         configMap:
           name: kubernetes-namespace
       - name: inference-deployment-template-volume
diff --git a/deploy/k3s/inference_deployment/inference_deployment_template.yaml b/deploy/k3s/inference_deployment/inference_deployment_template.yaml
index c30ea58f..2782e07a 100644
--- a/deploy/k3s/inference_deployment/inference_deployment_template.yaml
+++ b/deploy/k3s/inference_deployment/inference_deployment_template.yaml
@@ -40,7 +40,7 @@ spec:
           maxUnavailable: 0  # Aim for no downtime during rollout
       containers:
       - name: inference-server
-        image: 723181461334.dkr.ecr.us-west-2.amazonaws.com/gl-tritonserver:929c52e9d-main
+        image: 723181461334.dkr.ecr.us-west-2.amazonaws.com/gl-tritonserver:63e8110ca-tyler-bumptorch
         imagePullPolicy: IfNotPresent
         # Tritonserver will look for models in /mnt/models and initialize them on startup.
         # When running multiple instances of Triton server on the same machine that use Python models,
@@ -88,4 +88,4 @@ spec:
       - name: dshm
         emptyDir:
           medium: Memory
-          sizeLimit: 512Mi
\ No newline at end of file
+          sizeLimit: 128Mi
\ No newline at end of file
diff --git a/deploy/k3s/service_account.yaml b/deploy/k3s/service_account.yaml
index 85b89044..de50c51e 100644
--- a/deploy/k3s/service_account.yaml
+++ b/deploy/k3s/service_account.yaml
@@ -1,8 +1,8 @@
 
-# To securely interact with the Kubernetes API from within a pod, 
-# Kubernetes uses a system called Role-Based Access Control (RBAC). 
-# When the Kubernetes Python client inside any pod tries to access 
-# the Kubernetes API, it needs to get auntheticated and authorized 
+# To securely interact with the Kubernetes API from within a pod,
+# Kubernetes uses a system called Role-Based Access Control (RBAC).
+# When the Kubernetes Python client inside any pod tries to access
+# the Kubernetes API, it needs to get auntheticated and authorized
 # to access Kubernetes resources inside the cluster.
 # Specifying a service account and a cluster role with full access
 # to all resources in the cluster is the simplest way to do this.
@@ -19,7 +19,7 @@ metadata:
   namespace: ${DEPLOYMENT_NAMESPACE}
 ---
 apiVersion: rbac.authorization.k8s.io/v1
-kind: Role 
+kind: Role
 metadata:
   namespace: ${DEPLOYMENT_NAMESPACE}
   name: limited-access-role
@@ -31,9 +31,12 @@ rules:
 - apiGroups: [""] # "" indicates the core API group
   resources: ["pods"]
   verbs: ["get", "list", "watch"]
+- apiGroups: [""]
+  resources: ["services"]
+  verbs: ["create", "get", "list", "watch", "delete", "update"]
 - apiGroups: ["apps"]
   resources: ["deployments"]
-  verbs: ["create", "get", "list", "watch", "delete", "update"]
+  verbs: ["create", "get", "list", "watch", "delete", "update", "patch"]
 ---
 apiVersion: rbac.authorization.k8s.io/v1
 kind: RoleBinding
diff --git a/docker-compose.yml b/docker-compose.yml
new file mode 100644
index 00000000..e6ad02d2
--- /dev/null
+++ b/docker-compose.yml
@@ -0,0 +1,44 @@
+version: "2.1"
+
+# Balena docker-compose.yml for standing up a k3s server and bastion
+# https://github.com/k3s-io/k3s/blob/master/docker-compose.yml
+
+services:
+  server:
+    # https://docs.k3s.io/advanced#running-k3s-in-docker
+    build:
+      context: deploy/balena-k3s/server
+    ulimits:
+      nproc: 65535
+      nofile:
+        soft: 65535
+        hard: 65535
+    privileged: true
+    network_mode: host
+    # https://docs.k3s.io/cli/server#cluster-options
+    environment:
+      K3S_KUBECONFIG_OUTPUT: "/shared/kubeconfig.yaml"
+      K3S_KUBECONFIG_MODE: "666"
+      EXTRA_K3S_SERVER_ARGS: ""
+    tmpfs:
+      - /run
+      - /var/run
+    volumes:
+      - k3s-server:/var/lib/rancher/k3s
+      - shared:/shared
+
+  bastion:
+    build:
+      context: .
+      dockerfile: deploy/balena-k3s/bastion/Dockerfile
+    network_mode: host
+    environment:
+      KUBECONFIG: "/shared/kubeconfig.yaml"
+    volumes:
+      - shared:/shared:ro
+    depends_on:
+      - server
+
+volumes:
+  k3s-server: {}
+  shared: {}
\ No newline at end of file