Merge pull request #24 from determined-ai/mldm_284_mlde_0281

added LLM RAG, fixed deployment images
determined-ai · Mar 8, 2024 · 2e6495d · 2e6495d
2 parents 56a75e6 + f8671f7
commit 2e6495d
Show file tree

Hide file tree

Showing 181 changed files with 48,429 additions and 75 deletions.
diff --git a/README.md b/README.md
@@ -1,6 +1,6 @@
 # PDK - Pachyderm | Determined | KServe
 ## Deployment and Setup Guide
-**Date/Revision:** January 02, 2024
+**Date/Revision:** February 23, 2024
 
 
 ![alt text][big_picture]

diff --git a/bring-your-own-model/PDK_implementation/container/deploy/common.py b/bring-your-own-model/PDK_implementation/container/deploy/common.py
@@ -243,6 +243,7 @@ def create_inference_service(
             tolerations=tol,
             pytorch=(
                 V1beta1TorchServeSpec(
+                    args=["--model-store=/mnt/models"],
                     protocol_version=version,
                     storage_uri=f"s3://{bucket_name}/{model_name}",
                     resources=(

diff --git a/bring-your-own-model/PDK_implementation/container/deploy/deploy.py b/bring-your-own-model/PDK_implementation/container/deploy/deploy.py
@@ -68,7 +68,12 @@ def create_mar_file(model_name, model_version):
 # =====================================================================================
 
 
-def create_properties_file(model_name, model_version):
+def create_properties_file(model_name, model_version, cloud_model_host):
+    print(f"--> Cloud Model Host: {cloud_model_host}")
+    model_store = "/mnt/models/model-store"
+    if cloud_model_host == "aws":
+        print("--> Changing Model Store to match AWS")
+        model_store = "/mnt/models"        
     config_properties = """inference_address=http://0.0.0.0:8085
 management_address=http://0.0.0.0:8083
 metrics_address=http://0.0.0.0:8082
@@ -81,8 +86,9 @@ def create_properties_file(model_name, model_version):
 NUM_WORKERS=1
 number_of_netty_threads=4
 job_queue_size=10
-model_store=/mnt/models/model-store
+model_store=%s
 model_snapshot={"name":"startup.cfg","modelCount":1,"models":{"%s":{"%s":{"defaultVersion":true,"marName":"%s.mar","minWorkers":1,"maxWorkers":5,"batchSize":1,"maxBatchDelay":5000,"responseTimeout":120}}}}""" % (
+        model_store,
         model_name,
         model_version,
         model_name,
@@ -124,7 +130,7 @@ def main():
     create_mar_file(model.name, model.version)
 
     # Create config.properties for .mar file, return files to upload to GCS bucket
-    model_files = create_properties_file(model.name, model.version)
+    model_files = create_properties_file(model.name, model.version, args.cloud_model_host)
 
     # Upload model artifacts to Cloud  bucket in the format for TorchServe
     upload_model(

diff --git a/bring-your-own-model/PDK_implementation/pipelines/_on_prem_deployment-pipeline.json b/bring-your-own-model/PDK_implementation/pipelines/_on_prem_deployment-pipeline.json
@@ -18,7 +18,7 @@
     "stdin": [
       "python deploy.py --deployment-name customer-churn --service-account-name pach-deploy --resource-requests cpu=2,memory=4Gi --resource-limits cpu=10,memory=8Gi"
     ],
-    "image": "pachyderm/pdk:byom-deploy-v0.0.4",
+    "image": "pachyderm/pdk:byom-deploy-v0.0.6",
     "secrets": [
       {
         "name": "pipeline-secret",

diff --git a/bring-your-own-model/PDK_implementation/pipelines/_on_prem_training-pipeline.json b/bring-your-own-model/PDK_implementation/pipelines/_on_prem_training-pipeline.json
@@ -19,7 +19,11 @@
     "stdin": [
       "python train.py --git-url https://[email protected]:/determined-ai/pdk.git --git-ref main --sub-dir bring-your-own-model/PDK_implementation/experiment --config const.yaml --repo customer-churn-data --model customer-churn --project pdk-customer-churn"
     ],
+<<<<<<< Updated upstream
     "image": "pachyderm/pdk:train-v0.0.5",
+=======
+    "image": "pachyderm/pdk:train-v0.0.6",
+>>>>>>> Stashed changes
     "secrets": [
       {
         "name": "pipeline-secret",

diff --git a/bring-your-own-model/PDK_implementation/pipelines/deployment-pipeline.json b/bring-your-own-model/PDK_implementation/pipelines/deployment-pipeline.json
@@ -18,7 +18,7 @@
     "stdin": [
       "python deploy.py --deployment-name customer-churn --cloud-model-host gcp --cloud-model-bucket pdk-repo-models --resource-requests cpu=2,memory=4Gi --resource-limits cpu=10,memory=8Gi"
     ],
-    "image": "pachyderm/pdk:byom-deploy-v0.0.4",
+    "image": "pachyderm/pdk:byom-deploy-v0.0.6",
     "secrets": [
       {
         "name": "pipeline-secret",

diff --git a/bring-your-own-model/PDK_implementation/pipelines/training-pipeline.json b/bring-your-own-model/PDK_implementation/pipelines/training-pipeline.json
@@ -19,7 +19,11 @@
     "stdin": [
       "python train.py --git-url https://[email protected]:/determined-ai/pdk.git --git-ref main --sub-dir bring-your-own-model/PDK_implementation/experiment --config const.yaml --repo customer-churn-data --model customer-churn --project pdk-customer-churn"
     ],
+<<<<<<< Updated upstream
     "image": "pachyderm/pdk:train-v0.0.5",
+=======
+    "image": "pachyderm/pdk:train-v0.0.6",
+>>>>>>> Stashed changes
     "secrets": [
       {
         "name": "pipeline-secret",

diff --git a/bring-your-own-model/readme.md b/bring-your-own-model/readme.md
@@ -4,7 +4,7 @@
 
 # PDK - Pachyderm | Determined | KServe
 ## Bringing Your Model to PDK
-**Date/Revision:** January 02, 2024
+**Date/Revision:** February 23, 2024
 
 In this section, we will train and deploy a simple customer churn model on PDK.
 

diff --git a/deploy/README.md b/deploy/README.md
@@ -4,7 +4,7 @@
 
 # PDK - Pachyderm | Determined | KServe
 ## Deployment and Setup Guide
-**Date/Revision:** January 02, 2024
+**Date/Revision:** February 23, 2024
 
 This page contains step-by-step guides for installing the infrastructure and all necessary components for the PDK environment, covering different Kubernetes plaforms.
 

diff --git a/deploy/deploy_aws.md b/deploy/deploy_aws.md
@@ -5,6 +5,7 @@
 
 # PDK - Pachyderm | Determined | KServe
 ## Deployment Guide for AWS
+<b>Date/Revision:</b> February 23, 2024
 
 
 This guide will walk you through the steps of deploying the PDK components to AWS.
@@ -23,8 +24,8 @@ The following software versions will be used for this installation:
 - Python: 3.8 and 3.9
 - Kubernetes (K8s): latest supported *(currently 1.27)*
 - Postgres: 13
-- MLDE (Determined.AI): latest *(currently 0.26.7)*
-- MLDM (Pachyderm): latest *(currently 2.8.2)*
+- MLDE (Determined.AI): latest *(currently 0.28.1)*
+- MLDM (Pachyderm): latest *(currently 2.8.4)*
 - KServe: 0.12.0-rc0 (Quickstart Environment)
 
 PS: some of the commands used here are sensitive to the version of the product(s) listed above.
@@ -702,7 +703,7 @@ kubectl apply -f  - <<EOF
 apiVersion: v1
 kind: PersistentVolume
 metadata:
-  name: efs-pv
+  name: pdk-pv
 spec:
   capacity:
     storage: 200Gi
@@ -718,7 +719,7 @@ spec:
 apiVersion: v1
 kind: PersistentVolumeClaim
 metadata:
-  name: efs-pvc
+  name: pdk-pvc
   namespace: default
 spec:
   accessModes:
@@ -737,7 +738,7 @@ kubectl apply -f  - <<EOF
 apiVersion: v1
 kind: PersistentVolume
 metadata:
-  name: efs-pv-gpu
+  name: pdk-pv-gpu
 spec:
   capacity:
     storage: 200Gi
@@ -753,7 +754,7 @@ spec:
 apiVersion: v1
 kind: PersistentVolumeClaim
 metadata:
-  name: efs-pvc
+  name: pdk-pvc
   namespace: gpu-pool
 spec:
   accessModes:
@@ -956,14 +957,20 @@ The next step is to setup the 3 databases that will be used by PDK. Since the AW
 - Use the postgres `psql` command line utility (`psql -h ${RDS_CONNECTION_URL} postgres postgres`)
 - Create a pod with psql and connect to the instance
 
+You will also need the password, which can be obtained by running this command:
+
+```bash
+echo $RDS_ADMIN_PASSWORD
+```
+
 To create the databases using the psql pod, use these commands:
 
 
 ```bash
 kubectl run psql -it --rm=true --image=postgres:13 --command -- psql -h ${RDS_CONNECTION_URL} -U postgres postgres
 
 # The prompt will freeze as it loads the pod. Wait for the message "If you don't see a command prompt, try pressing enter".
-# Then, type the password and press enter.
+# Then, type (or paste) the password and press enter.
 
 postgres=> CREATE DATABASE pachyderm;
 
@@ -1116,7 +1123,7 @@ proxy:
 
 determined:
   enabled: true
-  detVersion: "0.26.7"
+  detVersion: "0.28.1"
   imageRegistry: determinedai
   enterpriseEdition: false
   imagePullSecretName:
@@ -1175,7 +1182,7 @@ determined:
             volumes:
               - name: shared-fs
                 persistentVolumeClaim:
-                  claimName: efs-pvc
+                  claimName: pdk-pvc
     - pool_name: gpu-pool
       max_aux_containers_per_agent: 1
       kubernetes_namespace: gpu-pool
@@ -1193,7 +1200,7 @@ determined:
             volumes:
               - name: shared-fs
                 persistentVolumeClaim:
-                  claimName: efs-pvc
+                  claimName: pdk-pvc
             tolerations:
               - key: "nvidia.com/gpu"
                 operator: "Equal"

diff --git a/deploy/deploy_gcp.md b/deploy/deploy_gcp.md
@@ -4,15 +4,15 @@
 
 # PDK - Pachyderm | Determined | KServe
 ## Deployment Guide for Google Cloud
-<b>Date/Revision:</b> January 02, 2024
+<b>Date/Revision:</b> February 23, 2024
 
 This guide will walk you through the steps of deploying the PDK components to Google Cloud.
 
 ## Reference Architecture
 The installation will be performed on the following hardware:
 
 - 3x e2-standard-16 CPU-based nodes (16 vCPUs, 64GB RAM, 1000GB SSD)
-- 2x n1-standard-8 GPU-based nodes (4 NVIDIA-T4, 8 vCPUs, 30GB RAM, 1000GB SSD)
+- 2x n1-standard-8 GPU-based nodes (4 NVIDIA-T4, 16 vCPUs, 64GB RAM, 1000GB SSD)
 
 The 3 CPU-based nodes will be used to run the services for all 3 products, and the MLDM pipelines. The GPU-based nodes will be used to run MLDE experiments.
 
@@ -21,8 +21,8 @@ The following software versions will be used for this installation:
 - Python: 3.8 and 3.9
 - Kubernetes (K8s): latest supported *(currently 1.27)*
 - Postgres: 13
-- MLDE (Determined.AI): latest *(currently 0.26.7)*
-- MLDM (Pachyderm): latest *(currently 2.8.2)*
+- MLDE (Determined.AI): latest *(currently 0.28.1)*
+- MLDM (Pachyderm): latest *(currently 2.8.4)*
 - KServe: 0.12.0-rc0 (Quickstart Environment)
 
 PS: some of the commands used here are sensitive to the version of the product(s) listed above.
@@ -160,7 +160,7 @@ export GCP_ZONE="us-central1-c"
 export K8S_VERSION="1.27.3-gke.100"
 export KSERVE_MODELS_NAMESPACE="models"
 export CLUSTER_MACHINE_TYPE="e2-standard-16"
-export GPU_MACHINE_TYPE="n1-standard-8"
+export GPU_MACHINE_TYPE="n1-standard-16"
 export SQL_CPU="2"
 export SQL_MEM="7680MB"
 
@@ -320,7 +320,8 @@ gcloud container clusters create ${CLUSTER_NAME} \
   --enable-dataplane-v2 \
  	--workload-pool=${PROJECT_ID}.svc.id.goog \
  	--workload-metadata="GKE_METADATA" \
- 	--node-locations ${GCP_ZONE}
+ 	--node-locations ${GCP_ZONE} \
+  --tags pdk
 ```
 
 This process will take several minutes. The output  message will show the cluster configuration. You can also check the status of the provisioning in the Google Cloud Console.
@@ -357,7 +358,8 @@ gcloud container node-pools create "gpu-pool" \
 	--max-surge-upgrade 1 \
 	--max-unavailable-upgrade 0 \
   --scopes=storage-full,cloud-platform \
-	--node-locations ${GCP_ZONE}
+	--node-locations ${GCP_ZONE} \
+  --tags pdk
 ```
 
 This can take several minutes to complete. If it takes more than 1 hour, it will timeout the client. If that happens, track the progress of the provisioning process through the Google Cloud web console.
@@ -715,7 +717,7 @@ spec:
 kind: PersistentVolumeClaim
 apiVersion: v1
 metadata:
-  name: nfs
+  name: pdk-pvc
 spec:
   accessModes:
     - ReadWriteMany
@@ -747,7 +749,7 @@ spec:
 kind: PersistentVolumeClaim
 apiVersion: v1
 metadata:
-  name: nfs
+  name: pdk-pvc
 spec:
   accessModes:
     - ReadWriteMany
@@ -856,7 +858,7 @@ proxy:
   
 determined:
   enabled: true
-  detVersion: "0.26.7"
+  detVersion: "0.28.1"
   imageRegistry: determinedai
   enterpriseEdition: false
   imagePullSecretName:
@@ -894,7 +896,7 @@ determined:
         volumes:
           - name: pdk-pvc-nfs
             persistentVolumeClaim:
-              claimName: nfs
+              claimName: pdk-pvc
     gpuPodSpec:
       apiVersion: v1
       kind: Pod
@@ -907,7 +909,7 @@ determined:
         volumes:
           - name: pdk-pvc-nfs
             persistentVolumeClaim:
-              claimName: nfs
+              claimName: pdk-pvc
       metadata:
         labels:
           nodegroup-role: gpu-worker
@@ -930,7 +932,7 @@ determined:
             volumes:
               - name: pdk-pvc-nfs
                 persistentVolumeClaim:
-                  claimName: nfs
+                  claimName: pdk-pvc
     - pool_name: gpu-pool
       max_aux_containers_per_agent: 1
       kubernetes_namespace: gpu-pool
@@ -947,7 +949,7 @@ determined:
             volumes:
               - name: pdk-pvc-nfs
                 persistentVolumeClaim:
-                  claimName: nfs
+                  claimName: pdk-pvc
             tolerations:
               - key: "nvidia.com/gpu"
                 operator: "Equal"

diff --git a/deploy/deploy_k8s.md b/deploy/deploy_k8s.md
@@ -5,7 +5,7 @@
 
 # PDK - Pachyderm | Determined | KServe
 ## Deployment Guide for Kubernetes
-<b>Date/Revision:</b> January 02, 2024
+<b>Date/Revision:</b> February 23, 2024
 
 
 This guide will walk you through the steps of deploying the PDK components to a vanilla Kubernetes environment.
@@ -23,8 +23,8 @@ The following software versions will be used for this installation:
 - Python: 3.8 and 3.9
 - Kubernetes (K8s): latest supported *(currently 1.27)*
 - Postgres: 13
-- MLDE (Determined.AI): latest *(currently 0.26.7)*
-- MLDM (Pachyderm): latest *(currently 2.8.2)*
+- MLDE (Determined.AI): latest *(currently 0.28.1)*
+- MLDM (Pachyderm): latest *(currently 2.8.4)*
 - KServe: 0.12.0-rc0 (Quickstart Environment)
 
 PS: some of the commands used here are sensitive to the version of the product(s) listed above.
@@ -610,7 +610,7 @@ spec:
 kind: PersistentVolumeClaim
 apiVersion: v1
 metadata:
-  name: mlde-pvc
+  name: pdk-pvc
 spec:
   accessModes:
     - ReadWriteMany
@@ -639,7 +639,7 @@ spec:
 kind: PersistentVolumeClaim
 apiVersion: v1
 metadata:
-  name: mlde-pvc
+  name: pdk-pvc
   namespace: gpu-pool
 spec:
   accessModes:
@@ -722,7 +722,7 @@ proxy:
 
 determined:
   enabled: true
-  detVersion: "0.26.7"
+  detVersion: "0.28.1"
   imageRegistry: determinedai
   enterpriseEdition: false
   imagePullSecretName:
@@ -779,7 +779,7 @@ determined:
             volumes:
               - name: shared-fs
                 persistentVolumeClaim:
-                  claimName: mlde-pvc
+                  claimName: pdk-pvc
     - pool_name: gpu-pool
       max_aux_containers_per_agent: 1
       kubernetes_namespace: gpu-pool
@@ -796,7 +796,7 @@ determined:
             volumes:
               - name: shared-fs
                 persistentVolumeClaim:
-                  claimName: mlde-pvc
+                  claimName: pdk-pvc
             tolerations:
               - key: "nvidia.com/gpu"
                 operator: "Equal"

diff --git a/deploy/images/example_llm_chatui.png b/deploy/images/example_llm_chatui.png
diff --git a/deploy/images/example_llm_gcp_firewall.png b/deploy/images/example_llm_gcp_firewall.png
diff --git a/deploy/images/example_llm_model.png b/deploy/images/example_llm_model.png