Skip to content

Commit

Permalink
add inferenceservice, job and sr
Browse files Browse the repository at this point in the history
  • Loading branch information
rcarrata committed Sep 27, 2024
1 parent 23023c0 commit aeda745
Show file tree
Hide file tree
Showing 4 changed files with 205 additions and 0 deletions.
38 changes: 38 additions & 0 deletions bootstrap/ic-shared-llm/inference-service-granite-modelcar.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
apiVersion: serving.kserve.io/v1beta1
kind: InferenceService
metadata:
annotations:
openshift.io/display-name: granite-7b-instruct
serving.knative.openshift.io/enablePassthrough: 'true'
sidecar.istio.io/inject: 'true'
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
argocd.argoproj.io/sync-wave: "2"
argocd.argoproj.io/compare-options: IgnoreExtraneous
argocd.argoproj.io/sync-options: Prune=false
name: granite-7b-instruct
namespace: ic-shared-llm
labels:
opendatahub.io/dashboard: 'true'
spec:
predictor:
maxReplicas: 1
minReplicas: 1
model:
modelFormat:
name: vLLM
name: ''
resources:
limits:
cpu: '6'
memory: 24Gi
nvidia.com/gpu: '1'
requests:
cpu: '1'
memory: 8Gi
nvidia.com/gpu: '1'
runtime: vllm
storageUri: oci://quay.io/rh-aiservices-bu/granite-7b-instruct-modelcar:0.1
tolerations:
- effect: NoSchedule
key: nvidia.com/gpu
operator: Exists
49 changes: 49 additions & 0 deletions bootstrap/ic-shared-llm/job-enable-modelcar.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
apiVersion: batch/v1
kind: Job
metadata:
name: patch-inferenceservice-config
namespace: ic-shared-llm
annotations:
argocd.argoproj.io/sync-wave: "1"
argocd.argoproj.io/hook: Sync
argocd.argoproj.io/hook-delete-policy: HookSucceeded
spec:
backoffLimit: 4
template:
spec:
serviceAccount: modelcar-enable-sa
serviceAccountName: modelcar-enable-sa
containers:
- name: patch-configmap
image: registry.redhat.io/openshift4/ose-cli:v4.15.0
command: ["/bin/sh", "-c"]
args:
- |
# Wait for the operator to be in "Ready" state
echo "Waiting for the operator to be Ready..."
until [ "$(oc get dsci -n redhat-ods-applications default-dsci -o jsonpath='{.status.phase}')" = "Ready" ]; do
echo "Operator not ready, retrying in 10s..."
sleep 10
done
echo "Operator is Ready!"
# Fetch current storageInitializer config
config=$(oc get configmap inferenceservice-config -n redhat-ods-applications -o jsonpath='{.data.storageInitializer}')
# Check if "enableModelcar" is already enabled
if echo "$config" | grep '"enableModelcar": false'; then
echo "Patching configmap to enable modelcar..."
# Modify the config to enable modelcar using sed
newValue=$(echo "$config" | sed 's/"enableModelcar": false/"enableModelcar": true/')
newValueEscaped=$(echo "$newValue" | sed 's/\"/\\\"/g')
# Patch the configmap with the new value
oc patch configmap inferenceservice-config -n redhat-ods-applications --type='json' -p "[{\"op\": \"replace\", \"path\": \"/data/storageInitializer\", \"value\": \"$newValueEscaped\"}]"
else
echo "Modelcar is already enabled, no patching needed."
fi
# Restart the KServe controller to apply changes
oc delete pod -n redhat-ods-applications -l control-plane=kserve-controller-manager
restartPolicy: OnFailure
68 changes: 68 additions & 0 deletions bootstrap/ic-shared-llm/rbac-job-enable-modelcar.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: modelcar-enable-sa
namespace: ic-shared-llm
annotations:
argocd.argoproj.io/sync-wave: "0"
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: modelcar-enable-patch-role
namespace: redhat-ods-applications
annotations:
argocd.argoproj.io/sync-wave: "0"
rules:
- apiGroups: ["redhat.com"]
resources: ["dsci"]
verbs: ["get", "list", "watch"]
- apiGroups: [""]
resources: ["configmaps"]
verbs: ["get", "patch"]
- apiGroups: [""]
resources: ["pods"]
verbs: ["get", "list", "delete"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: modelcar-enable-patch-rolebinding
namespace: redhat-ods-applications
annotations:
argocd.argoproj.io/sync-wave: "0"
subjects:
- kind: ServiceAccount
name: modelcar-enable-sa
namespace: ic-shared-llm
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: modelcar-enable-patch-role # Fixed to bind the correct Role
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: modelcar-dsc-read
annotations:
argocd.argoproj.io/sync-wave: "0"
rules:
- apiGroups: ["dscinitialization.opendatahub.io"]
resources: ["dscinitializations"]
verbs: ["get", "list", "watch"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: modelcar-dsc-read-binding
annotations:
argocd.argoproj.io/sync-wave: "0"
subjects:
- kind: ServiceAccount
name: modelcar-enable-sa
namespace: ic-shared-llm
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: modelcar-dsc-read
50 changes: 50 additions & 0 deletions bootstrap/ic-shared-llm/service-runtime-vllm-granite-modelcar.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
---
apiVersion: serving.kserve.io/v1alpha1
kind: ServingRuntime
metadata:
annotations:
opendatahub.io/accelerator-name: migrated-gpu
opendatahub.io/apiProtocol: REST
opendatahub.io/recommended-accelerators: '["nvidia.com/gpu"]'
opendatahub.io/template-display-name: vLLM ServingRuntime for KServe
opendatahub.io/template-name: vllm-runtime
openshift.io/display-name: vllm
argocd.argoproj.io/sync-wave: "2"
name: vllm
namespace: ic-shared-llm
labels:
opendatahub.io/dashboard: 'true'
spec:
annotations:
prometheus.io/path: /metrics
prometheus.io/port: '8080'
containers:
- args:
- '--port=8080'
- '--model=/mnt/models'
- '--served-model-name={{.Name}}'
- '--distributed-executor-backend=mp'
command:
- python
- '-m'
- vllm.entrypoints.openai.api_server
env:
- name: HF_HOME
value: /tmp/hf_home
image: 'quay.io/modh/vllm@sha256:b51fde66f162f1a78e8c027320dddf214732d5345953b1599a84fe0f0168c619'
name: kserve-container
ports:
- containerPort: 8080
protocol: TCP
volumeMounts:
- mountPath: /dev/shm
name: shm
multiModel: false
supportedModelFormats:
- autoSelect: true
name: vLLM
volumes:
- emptyDir:
medium: Memory
sizeLimit: 2Gi
name: shm

0 comments on commit aeda745

Please sign in to comment.