Skip to content

Commit

Permalink
chore(gke): added example files for deployment on GKE
Browse files Browse the repository at this point in the history
  • Loading branch information
tengomucho committed Jan 10, 2025
1 parent c3aed1b commit 5afcc48
Show file tree
Hide file tree
Showing 3 changed files with 80 additions and 0 deletions.
51 changes: 51 additions & 0 deletions examples/gke/configs-tgi/deployment.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: tgi-deployment
spec:
replicas: 1
selector:
matchLabels:
app: tgi-server
template:
metadata:
labels:
app: tgi-server
hf.co/model: meta-llama-3-2-1b-instruct
hf.co/task: text-generation
spec:
containers:
- name: tgi-container
image: us-central1-docker.pkg.dev/gcp-partnership-412108/deep-learning-images/huggingface-text-generation-inference-tpu.0.2.3.py310:latest
resources:
requests:
google.com/tpu: 4
limits:
google.com/tpu: 4
env:
- name: MODEL_ID
value: meta-llama/Llama-3.2-1B-Instruct
- name: PORT
value: "8080"
- name: MAX_INPUT_TOKENS
value: "16383"
- name: MAX_TOTAL_TOKENS
value: "16384"
- name: MAX_BATCH_PREFILL_TOKENS
value: "65536"
- name: MAX_BATCH_SIZE
value: "4"
- name: HUGGING_FACE_HUB_TOKEN
valueFrom:
secretKeyRef:
name: hf-secret
key: hf_token
volumeMounts:
- mountPath: /data
name: data
volumes:
- name: data
emptyDir: {}
nodeSelector:
cloud.google.com/gke-tpu-accelerator: tpu-v5-lite-podslice
cloud.google.com/gke-tpu-topology: 2x2
17 changes: 17 additions & 0 deletions examples/gke/configs-tgi/ingress.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: tgi-ingress
annotations:
kubernetes.io/ingress.class: "gce"
spec:
rules:
- http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: tgi-service
port:
number: 8080
12 changes: 12 additions & 0 deletions examples/gke/configs-tgi/service.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
apiVersion: v1
kind: Service
metadata:
name: tgi-service
spec:
selector:
app: tgi-server
type: ClusterIP
ports:
- protocol: TCP
port: 8080
targetPort: 8080

0 comments on commit 5afcc48

Please sign in to comment.