diff --git a/.dockerignore b/.dockerignore index f08dfcb4..d28930bb 100644 --- a/.dockerignore +++ b/.dockerignore @@ -6,3 +6,4 @@ Dockerfile README.md test_coverage_reports/ +deploy/balena-k3s/build \ No newline at end of file diff --git a/app/core/configs.py b/app/core/configs.py index c009f74c..64063b28 100644 --- a/app/core/configs.py +++ b/app/core/configs.py @@ -34,7 +34,7 @@ class LocalInferenceConfig(BaseModel): enabled: bool = Field(False, description="Determines if local edge inference is enabled for a specific detector.") api_token: Optional[str] = Field(None, description="API token to fetch the inference model for this detector.") refresh_rate: float = Field( - 120.0, + default=120.0, description=( "The refresh rate for the inference server (in seconds). This means how often to check for an updated model" " binary." diff --git a/app/core/edge_inference.py b/app/core/edge_inference.py index 6af5973c..abb6dcd3 100644 --- a/app/core/edge_inference.py +++ b/app/core/edge_inference.py @@ -159,6 +159,9 @@ def update_model(self, detector_id: str) -> bool: else None ) + # fallback to env var if we dont have a token in the config + api_token = api_token or os.environ.get("GROUNDLIGHT_API_TOKEN", None) + model_urls = fetch_model_urls(detector_id, api_token=api_token) cloud_binary_ksuid = model_urls.get("model_binary_id", None) if cloud_binary_ksuid is None: @@ -245,7 +248,6 @@ def save_model_to_repository( os.makedirs(model_version_dir, exist_ok=True) # Add model-version specific files (model.py and model.buf) - # NOTE: these files should be static and not change between model versions create_file_from_template( template_values={"pipeline_config": pipeline_config}, destination=os.path.join(model_version_dir, "model.py"), @@ -258,6 +260,8 @@ def save_model_to_repository( f.write(binary_ksuid) # Add/Overwrite model configuration files (config.pbtxt and binary_labels.txt) + # Generally these files should be static. Changing them can make earlier + # model versions incompatible with newer ones. create_file_from_template( template_values={"model_name": detector_id}, destination=os.path.join(model_dir, "config.pbtxt"), diff --git a/balena.yml b/balena.yml new file mode 100644 index 00000000..6a78f3e0 --- /dev/null +++ b/balena.yml @@ -0,0 +1,7 @@ +# https://github.com/balena-io-experimental/balena-k3s/blob/main/balena.yml +name: balena-k3s-edge-endpoint +type: sw.application +version: 0.2.8 +description: >- + Run the edge-endpoint on top of k3s on Balena. The inference-server + will also be deployed on the same k3s cluster. \ No newline at end of file diff --git a/deploy/balena-k3s/README.md b/deploy/balena-k3s/README.md new file mode 100644 index 00000000..54b907af --- /dev/null +++ b/deploy/balena-k3s/README.md @@ -0,0 +1,26 @@ +# Running the edge-endpoint via k3s on a balena device + +## Setup +Tested using an EC2 m6 instance with 64GB disk. Everything except for the triton inference server works on a RaspberryPi 5 (which has a 64bit OS and 8Gb RAM), but the inference server is too demanding for the RPi5. + +From the root of `edge-endpoint`, run: +```bash +balena login +balena push +``` +This will build and push two "services" to the edge devices in your chosen fleet. The first is a [k3s server](https://docs.k3s.io/architecture) named `server`, which effectively acts as our k3s cluster node. The second is the `bastion` service, from which a user can access the k3s cluster (e.g. by running `kubectl get nodes`). The `bastion` service also contains a copy of this repo at `/app/edge-endpoint`. + +Now, we have our k3s cluster built and running, but we have not started our edge deployment. + +Configure the following variables via the `/Variables` or `/Device Variables` interfaces on the BalenaCloud dashboard: +``` +GROUNDLIGHT_API_TOKEN - so that we can authorize the fetching of edge model binaries +AWS_ACCESS_KEY_ID - so we can pull the edge-endpoint and gl-tritonserver images from ECR +AWS_SECRET_ACCESS_KEY - needed along with AWS_ACCESS_KEY_ID +``` + +Now, ssh into `bastion` and run the following: +```bash +cd /app/edge-endpoint +INFERENCE_FLAVOR="CPU" DEPLOYMENT_NAMESPACE="default" ./deploy/bin/cluster_setup.sh +``` diff --git a/deploy/balena-k3s/bastion/Dockerfile b/deploy/balena-k3s/bastion/Dockerfile new file mode 100644 index 00000000..22aa6847 --- /dev/null +++ b/deploy/balena-k3s/bastion/Dockerfile @@ -0,0 +1,55 @@ +# https://github.com/balena-io-experimental/balena-k3s/blob/main/bastion/Dockerfile +# ------- Build Stage ------- +FROM golang:1.21.0 AS arkade + +WORKDIR /src + +ARG ARKADE_VERSION=0.10.1 +ARG CGO_ENABLED=0 + +ADD https://github.com/alexellis/arkade/archive/refs/tags/${ARKADE_VERSION}.tar.gz ./ + +RUN tar xvf ${ARKADE_VERSION}.tar.gz --strip-components=1 && make build + +# ------- Runtime Stage ------- +FROM debian:bullseye-slim + +WORKDIR /app + +ENV DEBIAN_FRONTEND=noninteractive + +RUN apt-get update && apt-get install --no-install-recommends -y \ + ca-certificates \ + curl \ + unzip \ + dnsutils \ + vim \ + jq \ + gettext-base && \ + apt-get clean && rm -rf /var/lib/apt/lists/* + +# Note that currently this is only for ARM +RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-aarch64.zip" -o "awscliv2.zip" +RUN unzip awscliv2.zip && rm awscliv2.zip +RUN ./aws/install +RUN aws --version + +COPY --from=arkade /src/arkade /usr/local/bin/arkade + +ENV PATH "${PATH}:/root/.arkade/bin/" + +RUN arkade version && \ + arkade get --progress=false \ + flux@v0.39.0 \ + helm@v3.11.1 \ + k3sup@0.12.12 \ + k9s@v0.27.2 \ + kubectl@v1.26.2 + + +# Copy edge-endpoint to /app/edge-endpoint +RUN mkdir -p /app/edge-endpoint +COPY . /app/edge-endpoint + +ENTRYPOINT [] +CMD [ "tail" , "-f", "/dev/null" ] \ No newline at end of file diff --git a/deploy/balena-k3s/server/Dockerfile b/deploy/balena-k3s/server/Dockerfile new file mode 100644 index 00000000..262ab441 --- /dev/null +++ b/deploy/balena-k3s/server/Dockerfile @@ -0,0 +1,10 @@ +# https://hub.docker.com/r/rancher/k3s/tags +# https://github.com/k3s-io/k3s/blob/master/package/Dockerfile +# https://github.com/balena-io-experimental/balena-k3s/blob/main/server/Dockerfile +FROM rancher/k3s:v1.26.14-k3s1 + +COPY server.sh /server.sh +RUN chmod +x /server.sh + +ENTRYPOINT [] +CMD [ "/server.sh" ] \ No newline at end of file diff --git a/deploy/balena-k3s/server/server.sh b/deploy/balena-k3s/server/server.sh new file mode 100644 index 00000000..9c1cb3e2 --- /dev/null +++ b/deploy/balena-k3s/server/server.sh @@ -0,0 +1,15 @@ +#!/bin/sh + +set -eu + +# https://docs.k3s.io/cli/server +# https://docs.k3s.io/datastore/ha-embedded +# https://github.com/balena-io-experimental/balena-k3s/blob/main/server/server.sh + +if [ -n "${K3S_URL:-}" ]; then + # shellcheck disable=SC2086 + exec /bin/k3s server --server "${K3S_URL}" ${EXTRA_K3S_SERVER_ARGS:-} +else + # shellcheck disable=SC2086 + exec /bin/k3s server --cluster-init ${EXTRA_K3S_SERVER_ARGS:-} +fi \ No newline at end of file diff --git a/deploy/bin/cluster_setup.sh b/deploy/bin/cluster_setup.sh index f4181f23..3c749f12 100755 --- a/deploy/bin/cluster_setup.sh +++ b/deploy/bin/cluster_setup.sh @@ -8,22 +8,17 @@ # - detectors in the `inference_deployments` table # - image queries in the `image_queries_edge` table # For more on these tables you can examine the database file at -# /opt/groundlight/edge/sqlite/sqlite.db on the attached volume (EFS/local). +# /opt/groundlight/edge/sqlite/sqlite.db on the attached volume (EFS/local). # Possible env vars: # - KUBECTL_CMD: path to kubectl command. Defaults to "kubectl" but can be set to "k3s kubectl" if using k3s # - INFERENCE_FLAVOR: "CPU" or "GPU". Defaults to "GPU" # - EDGE_CONFIG: contents of edge-config.yaml. If not set, will use configs/edge-config.yaml -# - DEPLOY_LOCAL_VERSION: Indicates whether we are building the local version of the edge endpoint. +# - DEPLOY_LOCAL_VERSION: Indicates whether we are building the local version of the edge endpoint. # If set to 0, we will attach an EFS instead of a local volume. Defaults to 1. -# - EFS_VOLUME_ID: ID of the EFS volume to use if we are using the EFS version. +# - EFS_VOLUME_ID: ID of the EFS volume to use if we are using the EFS version. # - DEPLOYMENT_NAMESPACE: Namespace to deploy to. Defaults to the current namespace. - - -# move to the root directory of the repo -cd "$(dirname "$0")"/../.. - set -ex fail() { @@ -31,8 +26,8 @@ fail() { exit 1 } -# Function to check for conflicting PV. -# This is a robustness measure to guard against errors when a user tries to create a +# Function to check for conflicting PV. +# This is a robustness measure to guard against errors when a user tries to create a # persistent volume with hostPath when we already have an EFS volume mounted or vice versa. check_pv_conflict() { local pv_name=$1 @@ -59,13 +54,16 @@ check_pv_conflict() { } -K=${KUBECTL_CMD:-"k3s kubectl"} +K=${KUBECTL_CMD:-"kubectl"} INFERENCE_FLAVOR=${INFERENCE_FLAVOR:-"GPU"} DB_RESET=$1 DEPLOY_LOCAL_VERSION=${DEPLOY_LOCAL_VERSION:-1} DEPLOYMENT_NAMESPACE=${DEPLOYMENT_NAMESPACE:-$($K config view -o json | jq -r '.contexts[] | select(.name == "'$($K config current-context)'") | .context.namespace')} +# move to the root directory of the repo +cd "$(dirname "$0")"/../.. + # Secrets ./deploy/bin/make-aws-secret.sh @@ -75,12 +73,20 @@ if ! $K get secret registry-credentials; then fi -# Configmaps and deployments +# Configmaps, secrets, and deployments $K delete configmap --ignore-not-found edge-config -n ${DEPLOYMENT_NAMESPACE} $K delete configmap --ignore-not-found inference-deployment-template -n ${DEPLOYMENT_NAMESPACE} $K delete configmap --ignore-not-found kubernetes-namespace -n ${DEPLOYMENT_NAMESPACE} $K delete configmap --ignore-not-found setup-db -n ${DEPLOYMENT_NAMESPACE} $K delete configmap --ignore-not-found db-reset -n ${DEPLOYMENT_NAMESPACE} +$K delete secret --ignore-not-found groundlight-api-token -n ${DEPLOYMENT_NAMESPACE} + +set +x # temporarily disable command echoing to avoid printing secrets +if [[ -n "${GROUNDLIGHT_API_TOKEN}" ]]; then + echo "Creating groundlight-api-token secret" + $K create secret generic groundlight-api-token --from-literal=GROUNDLIGHT_API_TOKEN=${GROUNDLIGHT_API_TOKEN} -n ${DEPLOYMENT_NAMESPACE} +fi +set -x # re-enable command echoing if [[ -n "${EDGE_CONFIG}" ]]; then echo "Creating config from EDGE_CONFIG env var" @@ -136,7 +142,7 @@ if [[ "${DEPLOY_LOCAL_VERSION}" == "1" ]]; then $K apply -f deploy/k3s/local_persistent_volume.yaml else - # If environment variable EFS_VOLUME_ID is not set, exit + # If environment variable EFS_VOLUME_ID is not set, exit if [[ -z "${EFS_VOLUME_ID}" ]]; then fail "EFS_VOLUME_ID environment variable not set" fi @@ -154,7 +160,7 @@ fi # Check if the edge-endpoint-pvc exists. If not, create it if ! $K get pvc edge-endpoint-pvc; then - # If environment variable EFS_VOLUME_ID is not set, exit + # If environment variable EFS_VOLUME_ID is not set, exit if [[ -z "${EFS_VOLUME_ID}" ]]; then fail "EFS_VOLUME_ID environment variable not set" fi diff --git a/deploy/bin/make-aws-secret.sh b/deploy/bin/make-aws-secret.sh index ee4ee54a..62811723 100755 --- a/deploy/bin/make-aws-secret.sh +++ b/deploy/bin/make-aws-secret.sh @@ -1,6 +1,6 @@ #!/bin/bash -K=${KUBECTL_CMD:-"k3s kubectl"} +K=${KUBECTL_CMD:-"kubectl"} if command -v docker >/dev/null 2>&1; then # Enable ECR login - make sure you have the aws client configured properly, or an IAM role diff --git a/deploy/k3s/edge_deployment/edge_deployment.yaml b/deploy/k3s/edge_deployment/edge_deployment.yaml index 960a7319..727162be 100644 --- a/deploy/k3s/edge_deployment/edge_deployment.yaml +++ b/deploy/k3s/edge_deployment/edge_deployment.yaml @@ -40,12 +40,12 @@ spec: serviceAccountName: edge-endpoint-service-account initContainers: - name: database-prep - image: 723181461334.dkr.ecr.us-west-2.amazonaws.com/edge-endpoint:miscellaneous-c290a1810-dirty-8c48a5f159e9542 + image: 723181461334.dkr.ecr.us-west-2.amazonaws.com/edge-endpoint:main-286f50eb5-dirty-3195280a7c7c28a imagePullPolicy: IfNotPresent env: - # Flag to indicate whether or not to reset all database tables. Resetting WILL delete + # Flag to indicate whether or not to reset all database tables. Resetting WILL delete # all existing data in the database, so set this flag to 1 with caution. - - name: DB_RESET + - name: DB_RESET valueFrom: configMapKeyRef: name: db-reset @@ -59,7 +59,7 @@ spec: containers: - name: edge-endpoint - image: 723181461334.dkr.ecr.us-west-2.amazonaws.com/edge-endpoint:miscellaneous-c290a1810-dirty-8c48a5f159e9542 + image: 723181461334.dkr.ecr.us-west-2.amazonaws.com/edge-endpoint:main-286f50eb5-dirty-3195280a7c7c28a imagePullPolicy: IfNotPresent ports: - containerPort: 6717 @@ -72,18 +72,18 @@ spec: # This feature flag is basically good for knowing when to use the python kubernetes API # (i.e., creating deployments, etc.). We don't want to use the python kubernetes API # if we are only running the edge logic server in docker. - # TODO: Once we have kubernetes-based tests, we can remove this feature flag. + # TODO: Once we have kubernetes-based tests, we can remove this feature flag. - name: DEPLOY_DETECTOR_LEVEL_INFERENCE value: "1" volumeMounts: - name: edge-config-volume - mountPath: /etc/groundlight/edge-config + mountPath: /etc/groundlight/edge-config - name: edge-endpoint-persistent-volume mountPath: /opt/groundlight/edge/sqlite - name: inference-model-updater - image: 723181461334.dkr.ecr.us-west-2.amazonaws.com/edge-endpoint:miscellaneous-c290a1810-dirty-8c48a5f159e9542 + image: 723181461334.dkr.ecr.us-west-2.amazonaws.com/edge-endpoint:main-286f50eb5-dirty-3195280a7c7c28a imagePullPolicy: IfNotPresent command: ["/bin/bash", "-c"] args: ["poetry run python -m app.model_updater.update_models"] @@ -92,6 +92,12 @@ spec: value: "INFO" - name: DEPLOY_DETECTOR_LEVEL_INFERENCE value: "1" + - name: GROUNDLIGHT_API_TOKEN + valueFrom: + secretKeyRef: + name: groundlight-api-token + key: GROUNDLIGHT_API_TOKEN + optional: true volumeMounts: - name: edge-config-volume mountPath: /etc/groundlight/edge-config @@ -102,9 +108,9 @@ spec: - name: inference-deployment-template-volume mountPath: /etc/groundlight/inference-deployment - # In this setup the edge-endpoint-persistent-volume is mounted to + # In this setup the edge-endpoint-persistent-volume is mounted to # two different paths in the inference-model-updater container. - # This allows the container to access both the sqlite database and + # This allows the container to access both the sqlite database and # the path to the model repository without needing to create an extra PV and PVC. - name: edge-endpoint-persistent-volume mountPath: /opt/groundlight/edge/sqlite @@ -119,7 +125,7 @@ spec: - name: edge-config-volume configMap: name: edge-config - - name: kubernetes-namespace + - name: kubernetes-namespace configMap: name: kubernetes-namespace - name: inference-deployment-template-volume diff --git a/deploy/k3s/inference_deployment/inference_deployment_template.yaml b/deploy/k3s/inference_deployment/inference_deployment_template.yaml index c30ea58f..2782e07a 100644 --- a/deploy/k3s/inference_deployment/inference_deployment_template.yaml +++ b/deploy/k3s/inference_deployment/inference_deployment_template.yaml @@ -40,7 +40,7 @@ spec: maxUnavailable: 0 # Aim for no downtime during rollout containers: - name: inference-server - image: 723181461334.dkr.ecr.us-west-2.amazonaws.com/gl-tritonserver:929c52e9d-main + image: 723181461334.dkr.ecr.us-west-2.amazonaws.com/gl-tritonserver:63e8110ca-tyler-bumptorch imagePullPolicy: IfNotPresent # Tritonserver will look for models in /mnt/models and initialize them on startup. # When running multiple instances of Triton server on the same machine that use Python models, @@ -88,4 +88,4 @@ spec: - name: dshm emptyDir: medium: Memory - sizeLimit: 512Mi \ No newline at end of file + sizeLimit: 128Mi \ No newline at end of file diff --git a/deploy/k3s/service_account.yaml b/deploy/k3s/service_account.yaml index 85b89044..de50c51e 100644 --- a/deploy/k3s/service_account.yaml +++ b/deploy/k3s/service_account.yaml @@ -1,8 +1,8 @@ -# To securely interact with the Kubernetes API from within a pod, -# Kubernetes uses a system called Role-Based Access Control (RBAC). -# When the Kubernetes Python client inside any pod tries to access -# the Kubernetes API, it needs to get auntheticated and authorized +# To securely interact with the Kubernetes API from within a pod, +# Kubernetes uses a system called Role-Based Access Control (RBAC). +# When the Kubernetes Python client inside any pod tries to access +# the Kubernetes API, it needs to get auntheticated and authorized # to access Kubernetes resources inside the cluster. # Specifying a service account and a cluster role with full access # to all resources in the cluster is the simplest way to do this. @@ -19,7 +19,7 @@ metadata: namespace: ${DEPLOYMENT_NAMESPACE} --- apiVersion: rbac.authorization.k8s.io/v1 -kind: Role +kind: Role metadata: namespace: ${DEPLOYMENT_NAMESPACE} name: limited-access-role @@ -31,9 +31,12 @@ rules: - apiGroups: [""] # "" indicates the core API group resources: ["pods"] verbs: ["get", "list", "watch"] +- apiGroups: [""] + resources: ["services"] + verbs: ["create", "get", "list", "watch", "delete", "update"] - apiGroups: ["apps"] resources: ["deployments"] - verbs: ["create", "get", "list", "watch", "delete", "update"] + verbs: ["create", "get", "list", "watch", "delete", "update", "patch"] --- apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 00000000..e6ad02d2 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,44 @@ +version: "2.1" + +# Balena docker-compose.yml for standing up a k3s server and bastion +# https://github.com/k3s-io/k3s/blob/master/docker-compose.yml + +services: + server: + # https://docs.k3s.io/advanced#running-k3s-in-docker + build: + context: deploy/balena-k3s/server + ulimits: + nproc: 65535 + nofile: + soft: 65535 + hard: 65535 + privileged: true + network_mode: host + # https://docs.k3s.io/cli/server#cluster-options + environment: + K3S_KUBECONFIG_OUTPUT: "/shared/kubeconfig.yaml" + K3S_KUBECONFIG_MODE: "666" + EXTRA_K3S_SERVER_ARGS: "" + tmpfs: + - /run + - /var/run + volumes: + - k3s-server:/var/lib/rancher/k3s + - shared:/shared + + bastion: + build: + context: . + dockerfile: deploy/balena-k3s/bastion/Dockerfile + network_mode: host + environment: + KUBECONFIG: "/shared/kubeconfig.yaml" + volumes: + - shared:/shared:ro + depends_on: + - server + +volumes: + k3s-server: {} + shared: {} \ No newline at end of file