Working edge inference on rpi

groundlight · May 28, 2024 · 8be45a2 · 8be45a2
1 parent fd832b7
commit 8be45a2
Show file tree

Hide file tree

Showing 9 changed files with 72 additions and 12 deletions.
diff --git a/.dockerignore b/.dockerignore
@@ -6,3 +6,4 @@
 Dockerfile
 README.md
 test_coverage_reports/
+deploy/balena-k3s/build
diff --git a/app/core/edge_inference.py b/app/core/edge_inference.py
@@ -248,7 +248,6 @@ def save_model_to_repository(
     os.makedirs(model_version_dir, exist_ok=True)
 
     # Add model-version specific files (model.py and model.buf)
-    # NOTE: these files should be static and not change between model versions
     create_file_from_template(
         template_values={"pipeline_config": pipeline_config},
         destination=os.path.join(model_version_dir, "model.py"),
@@ -261,6 +260,8 @@ def save_model_to_repository(
             f.write(binary_ksuid)
 
     # Add/Overwrite model configuration files (config.pbtxt and binary_labels.txt)
+    # Generally these files should be static. Changing them can make earlier
+    # model versions incompatible with newer ones.
     create_file_from_template(
         template_values={"model_name": detector_id},
         destination=os.path.join(model_dir, "config.pbtxt"),

diff --git a/configs/edge-config.yaml b/configs/edge-config.yaml
@@ -35,6 +35,10 @@ detectors:
   # Blank id implies that no detectors have been configured to use motion detection
   # and/or local edge inference. The "default" templates are just examples of how one
   # might configure these features.
-  - detector_id: ''
+  # - detector_id: 'det_2djhf9bB8wpg1RGGO3xF785vMWI'
+  #   motion_detection_template: "default"
+  #   local_inference_template: "default"
+  # Harry's detector
+  - detector_id: 'det_2fnMl34p2LMt8oVbdH02bTgmyvU'
     motion_detection_template: "default"
-    local_inference_template: "default"
+    local_inference_template: "default"
diff --git a/deploy/balena-k3s/README.md b/deploy/balena-k3s/README.md
@@ -10,10 +10,17 @@ balena push <your-fleet>
 ```
 This will build and push two "services" to the edge devices in your chosen fleet. The first is a [k3s server](https://docs.k3s.io/architecture) named `server`, which effectively acts as our k3s cluster node. The second is the `bastion` service, from which a user can access the k3s cluster (e.g. by running `kubectl get nodes`). The `bastion` service also contains a copy of this repo at `/app/edge-endpoint`.
 
-Now, we have our k3s cluster built and running, but we have not started our edge deployment. Now, ssh into `bastion` and run the following:
+Now, we have our k3s cluster built and running, but we have not started our edge deployment.
+
+Configure the following variables via the `<fleet>/Variables` or `<device>/Device Variables` interfaces on the BalenaCloud dashboard:
+```
+GROUNDLIGHT_API_TOKEN - so that we can authorize the fetching of edge model binaries
+AWS_ACCESS_KEY_ID - so we can pull the edge-endpoint and gl-tritonserver images from ECR
+AWS_SECRET_ACCESS_KEY - needed along with AWS_ACCESS_KEY_ID
+```
+
+Now, ssh into `bastion` and run the following:
 ```bash
 cd /app/edge-endpoint
-aws configure  # configure credentials
-export GROUNDLIGHT_API_TOKEN=<your-token>
 KUBECTL_CMD="kubectl" INFERENCE_FLAVOR="CPU" DEPLOYMENT_NAMESPACE="default" ./deploy/bin/cluster_setup.sh
 ```
diff --git a/deploy/balena-k3s/bastion/Dockerfile b/deploy/balena-k3s/bastion/Dockerfile
@@ -46,12 +46,10 @@ RUN arkade version && \
     [email protected] \
     [email protected]
 
-# Install kustomize
-RUN curl -s "https://raw.githubusercontent.com/kubernetes-sigs/kustomize/master/hack/install_kustomize.sh" | bash
-RUN mv kustomize /usr/local/bin/kustomize
 
 # Copy edge-endpoint to /app/edge-endpoint
 RUN mkdir -p /app/edge-endpoint
 COPY . /app/edge-endpoint
 
+ENTRYPOINT []
 CMD [ "tail" , "-f", "/dev/null" ]
diff --git a/deploy/balena-k3s/server/Dockerfile.orin b/deploy/balena-k3s/server/Dockerfile.orin
@@ -0,0 +1,47 @@
+# https://hub.docker.com/r/rancher/k3s/tags
+# https://github.com/k3s-io/k3s/blob/master/package/Dockerfile
+# https://github.com/balena-io-experimental/balena-k3s/blob/main/server/Dockerfile
+
+
+# ----- k3s dockerfile (FROM rancher/k3s:v1.26.14-k3s1) -----
+# FROM alpine:3.18 as base
+FROM balenalib/jetson-orin-nano-devkit-nvme-alpine:3.18-build as base
+
+RUN apk add -U ca-certificates tar zstd tzdata
+
+# Need build the k3s binary before this step
+COPY ./build/k3s/build/out/data.tar.zst /
+
+RUN mkdir -p /image/etc/ssl/certs /image/run /image/var/run /image/tmp /image/lib/modules /image/lib/firmware && \
+    tar -xa -C /image -f /data.tar.zst && \
+    echo "root:x:0:0:root:/:/bin/sh" > /image/etc/passwd && \
+    echo "root:x:0:" > /image/etc/group && \
+    cp /etc/ssl/certs/ca-certificates.crt /image/etc/ssl/certs/ca-certificates.crt
+
+FROM scratch as collect
+ARG DRONE_TAG="dev"
+COPY --from=base /image /
+COPY --from=base /usr/share/zoneinfo /usr/share/zoneinfo
+RUN mkdir -p /etc && \
+    echo 'hosts: files dns' > /etc/nsswitch.conf && \
+    echo "PRETTY_NAME=\"K3s ${DRONE_TAG}\"" > /etc/os-release && \
+    chmod 1777 /tmp
+
+FROM scratch
+COPY --from=collect / /
+VOLUME /var/lib/kubelet
+VOLUME /var/lib/rancher/k3s
+VOLUME /var/lib/cni
+VOLUME /var/log
+ENV PATH="$PATH:/bin/aux"
+ENV CRI_CONFIG_FILE="/var/lib/rancher/k3s/agent/etc/crictl.yaml"
+
+# ENTRYPOINT ["/bin/k3s"]
+# CMD ["agent"]
+# ----- end k3s dockerfile -----
+
+COPY ./server/server.sh /server.sh
+RUN chmod +x /server.sh
+
+ENTRYPOINT []
+CMD [ "/server.sh" ]
diff --git a/deploy/k3s/inference_deployment/inference_deployment_template.yaml b/deploy/k3s/inference_deployment/inference_deployment_template.yaml
@@ -40,7 +40,7 @@ spec:
           maxUnavailable: 0  # Aim for no downtime during rollout
       containers:
       - name: inference-server
-        image: 723181461334.dkr.ecr.us-west-2.amazonaws.com/gl-tritonserver:49abd1068-main
+        image: 723181461334.dkr.ecr.us-west-2.amazonaws.com/gl-tritonserver:63e8110ca-tyler-bumptorch
         imagePullPolicy: IfNotPresent
         # Tritonserver will look for models in /mnt/models and initialize them on startup.
         # When running multiple instances of Triton server on the same machine that use Python models,
@@ -88,4 +88,4 @@ spec:
       - name: dshm
         emptyDir:
           medium: Memory
-          sizeLimit: 512Mi
+          sizeLimit: 128Mi
diff --git a/deploy/k3s/service_account.yaml b/deploy/k3s/service_account.yaml
@@ -36,7 +36,7 @@ rules:
   verbs: ["create", "get", "list", "watch", "delete", "update"]
 - apiGroups: ["apps"]
   resources: ["deployments"]
-  verbs: ["create", "get", "list", "watch", "delete", "update"]
+  verbs: ["create", "get", "list", "watch", "delete", "update", "patch"]
 ---
 apiVersion: rbac.authorization.k8s.io/v1
 kind: RoleBinding

diff --git a/docker-compose.yml b/docker-compose.yml
@@ -6,6 +6,8 @@ version: "2.1"
 services:
   server:
     # https://docs.k3s.io/advanced#running-k3s-in-docker
+    # TODO: how to build this image on balena's cloud builder?
+    # image: 723181461334.dkr.ecr.us-west-2.amazonaws.com/k3s-orin:latest
     build:
       context: deploy/balena-k3s/server
     ulimits: