clean

groundlight · May 28, 2024 · 172c40e · 172c40e
1 parent fd832b7
commit 172c40e
Show file tree

Hide file tree

Showing 8 changed files with 28 additions and 14 deletions.
diff --git a/.dockerignore b/.dockerignore
@@ -6,3 +6,4 @@
 Dockerfile
 README.md
 test_coverage_reports/
+deploy/balena-k3s/build
diff --git a/app/core/edge_inference.py b/app/core/edge_inference.py
@@ -248,7 +248,6 @@ def save_model_to_repository(
     os.makedirs(model_version_dir, exist_ok=True)
 
     # Add model-version specific files (model.py and model.buf)
-    # NOTE: these files should be static and not change between model versions
     create_file_from_template(
         template_values={"pipeline_config": pipeline_config},
         destination=os.path.join(model_version_dir, "model.py"),
@@ -261,6 +260,8 @@ def save_model_to_repository(
             f.write(binary_ksuid)
 
     # Add/Overwrite model configuration files (config.pbtxt and binary_labels.txt)
+    # Generally these files should be static. Changing them can make earlier
+    # model versions incompatible with newer ones.
     create_file_from_template(
         template_values={"model_name": detector_id},
         destination=os.path.join(model_dir, "config.pbtxt"),

diff --git a/balena.yml b/balena.yml
@@ -1,4 +1,7 @@
 # https://github.com/balena-io-experimental/balena-k3s/blob/main/balena.yml
-name: balena-k3s
+name: balena-k3s-edge-endpoint
 type: sw.application
-version: 0.2.7
+version: 0.2.8
+description: >-
+  Run the edge-endpoint on top of k3s on Balena. The inference-server
+  will also be deployed on the same k3s cluster.
diff --git a/configs/edge-config.yaml b/configs/edge-config.yaml
@@ -35,6 +35,10 @@ detectors:
   # Blank id implies that no detectors have been configured to use motion detection
   # and/or local edge inference. The "default" templates are just examples of how one
   # might configure these features.
-  - detector_id: ''
+  # - detector_id: 'det_2djhf9bB8wpg1RGGO3xF785vMWI'
+  #   motion_detection_template: "default"
+  #   local_inference_template: "default"
+  # Harry's detector
+  - detector_id: 'det_2fnMl34p2LMt8oVbdH02bTgmyvU'
     motion_detection_template: "default"
-    local_inference_template: "default"
+    local_inference_template: "default"
diff --git a/deploy/balena-k3s/README.md b/deploy/balena-k3s/README.md
@@ -10,10 +10,17 @@ balena push <your-fleet>
 ```
 This will build and push two "services" to the edge devices in your chosen fleet. The first is a [k3s server](https://docs.k3s.io/architecture) named `server`, which effectively acts as our k3s cluster node. The second is the `bastion` service, from which a user can access the k3s cluster (e.g. by running `kubectl get nodes`). The `bastion` service also contains a copy of this repo at `/app/edge-endpoint`.
 
-Now, we have our k3s cluster built and running, but we have not started our edge deployment. Now, ssh into `bastion` and run the following:
+Now, we have our k3s cluster built and running, but we have not started our edge deployment.
+
+Configure the following variables via the `<fleet>/Variables` or `<device>/Device Variables` interfaces on the BalenaCloud dashboard:
+```
+GROUNDLIGHT_API_TOKEN - so that we can authorize the fetching of edge model binaries
+AWS_ACCESS_KEY_ID - so we can pull the edge-endpoint and gl-tritonserver images from ECR
+AWS_SECRET_ACCESS_KEY - needed along with AWS_ACCESS_KEY_ID
+```
+
+Now, ssh into `bastion` and run the following:
 ```bash
 cd /app/edge-endpoint
-aws configure  # configure credentials
-export GROUNDLIGHT_API_TOKEN=<your-token>
 KUBECTL_CMD="kubectl" INFERENCE_FLAVOR="CPU" DEPLOYMENT_NAMESPACE="default" ./deploy/bin/cluster_setup.sh
 ```
diff --git a/deploy/balena-k3s/bastion/Dockerfile b/deploy/balena-k3s/bastion/Dockerfile
@@ -46,12 +46,10 @@ RUN arkade version && \
     [email protected] \
     [email protected]
 
-# Install kustomize
-RUN curl -s "https://raw.githubusercontent.com/kubernetes-sigs/kustomize/master/hack/install_kustomize.sh" | bash
-RUN mv kustomize /usr/local/bin/kustomize
 
 # Copy edge-endpoint to /app/edge-endpoint
 RUN mkdir -p /app/edge-endpoint
 COPY . /app/edge-endpoint
 
+ENTRYPOINT []
 CMD [ "tail" , "-f", "/dev/null" ]
diff --git a/deploy/k3s/inference_deployment/inference_deployment_template.yaml b/deploy/k3s/inference_deployment/inference_deployment_template.yaml
@@ -40,7 +40,7 @@ spec:
           maxUnavailable: 0  # Aim for no downtime during rollout
       containers:
       - name: inference-server
-        image: 723181461334.dkr.ecr.us-west-2.amazonaws.com/gl-tritonserver:49abd1068-main
+        image: 723181461334.dkr.ecr.us-west-2.amazonaws.com/gl-tritonserver:63e8110ca-tyler-bumptorch
         imagePullPolicy: IfNotPresent
         # Tritonserver will look for models in /mnt/models and initialize them on startup.
         # When running multiple instances of Triton server on the same machine that use Python models,
@@ -88,4 +88,4 @@ spec:
       - name: dshm
         emptyDir:
           medium: Memory
-          sizeLimit: 512Mi
+          sizeLimit: 128Mi
diff --git a/deploy/k3s/service_account.yaml b/deploy/k3s/service_account.yaml
@@ -36,7 +36,7 @@ rules:
   verbs: ["create", "get", "list", "watch", "delete", "update"]
 - apiGroups: ["apps"]
   resources: ["deployments"]
-  verbs: ["create", "get", "list", "watch", "delete", "update"]
+  verbs: ["create", "get", "list", "watch", "delete", "update", "patch"]
 ---
 apiVersion: rbac.authorization.k8s.io/v1
 kind: RoleBinding