Skip to content

Commit 1b6ab70

Browse files
committed
small fixes for pr
Signed-off-by: Kfir Toledo <kfir.toledo@ibm.com>
1 parent a80c163 commit 1b6ab70

File tree

12 files changed

+170
-61
lines changed

12 files changed

+170
-61
lines changed

deploy/components/inference-gateway/deployments.yaml

+3
Original file line numberDiff line numberDiff line change
@@ -48,3 +48,6 @@ spec:
4848
service: inference-extension
4949
initialDelaySeconds: 5
5050
periodSeconds: 10
51+
env:
52+
- name: KVCACHE_INDEXER_REDIS_ADDR
53+
value: ${REDIS_HOST}:${REDIS_PORT}

deploy/components/inference-gateway/inference-models.yaml

+31-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,37 @@ spec:
66
modelName: food-review
77
criticality: Critical
88
poolRef:
9-
name: vllm-llama3-8b-instruct
9+
name: ${POOL_NAME}
1010
targetModels:
1111
- name: food-review
1212
weight: 100
13+
---
14+
apiVersion: inference.networking.x-k8s.io/v1alpha2
15+
kind: InferenceModel
16+
metadata:
17+
name: base-model
18+
spec:
19+
modelName: meta-llama/Llama-3.1-8B-Instruct
20+
criticality: Critical
21+
poolRef:
22+
name: ${POOL_NAME}
23+
---
24+
apiVersion: inference.networking.x-k8s.io/v1alpha2
25+
kind: InferenceModel
26+
metadata:
27+
name: base-model-cpu
28+
spec:
29+
modelName: Qwen/Qwen2.5-1.5B-Instruct
30+
criticality: Critical
31+
poolRef:
32+
name: ${POOL_NAME}
33+
---
34+
apiVersion: inference.networking.x-k8s.io/v1alpha2
35+
kind: InferenceModel
36+
metadata:
37+
name: mistarli
38+
spec:
39+
modelName: mistralai/Mistral-7B-Instruct-v0.2
40+
criticality: Critical
41+
poolRef:
42+
name: ${POOL_NAME}
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
apiVersion: inference.networking.x-k8s.io/v1alpha2
22
kind: InferencePool
33
metadata:
4-
name: vllm-llama3-8b-instruct
4+
name: ${POOL_NAME}
55
spec:
66
targetPortNumber: 8000
77
selector:
8-
app: vllm-llama3-8b-instruct
8+
app: ${POOL_NAME}
99
extensionRef:
1010
name: endpoint-picker

deploy/components/vllm-p2p/deployments/redis-deployment.yaml

+31-3
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
apiVersion: apps/v1
22
kind: Deployment
33
metadata:
4-
name: ${REDIS_NAME}
4+
name: ${REDIS_SVC_NAME}
55
labels:
66
app.kubernetes.io/name: redis
77
app.kubernetes.io/component: redis-lookup-server
@@ -20,8 +20,36 @@ spec:
2020
containers:
2121
- name: lookup-server
2222
image: ${REDIS_IMAGE}:${REDIS_TAG}
23-
imagePullPolicy: Always
23+
imagePullPolicy: IfNotPresent
2424
command:
2525
- redis-server
2626
ports:
27-
- containerPort: ${REDIS_TARGET_PORT}
27+
- name: redis-port
28+
containerPort: ${REDIS_TARGET_PORT}
29+
protocol: TCP
30+
resources:
31+
limits:
32+
cpu: "4"
33+
memory: 10G
34+
requests:
35+
cpu: "4"
36+
memory: 8G
37+
terminationMessagePath: /dev/termination-log
38+
terminationMessagePolicy: File
39+
restartPolicy: Always
40+
terminationGracePeriodSeconds: 30
41+
dnsPolicy: ClusterFirst
42+
securityContext: {}
43+
schedulerName: default-scheduler
44+
strategy:
45+
type: RollingUpdate
46+
rollingUpdate:
47+
maxUnavailable: 25%
48+
maxSurge: 25%
49+
revisionHistoryLimit: 10
50+
progressDeadlineSeconds: 600
51+
# securityContext:
52+
# allowPrivilegeEscalation: false
53+
# capabilities:
54+
# drop:
55+
# - ALL

deploy/components/vllm-p2p/deployments/vllm-deployment.yaml

+77-14
Original file line numberDiff line numberDiff line change
@@ -13,17 +13,24 @@ spec:
1313
app.kubernetes.io/name: vllm
1414
app.kubernetes.io/component: vllm
1515
app.kubernetes.io/model: ${MODEL_LABEL}
16+
app: ${POOL_NAME}
1617
template:
1718
metadata:
1819
labels:
1920
app.kubernetes.io/name: vllm
2021
app.kubernetes.io/component: vllm
2122
app.kubernetes.io/model: ${MODEL_LABEL}
23+
app: ${POOL_NAME}
2224
spec:
25+
# securityContext:
26+
# runAsUser: ${PROXY_UID}
27+
# runAsNonRoot: true
28+
# seccompProfile:
29+
# type: RuntimeDefault
2330
containers:
2431
- name: vllm
2532
image: ${VLLM_IMAGE}:${VLLM_TAG}
26-
imagePullPolicy: Always
33+
imagePullPolicy: IfNotPresent
2734
command:
2835
- /bin/sh
2936
- "-c"
@@ -40,22 +47,78 @@ spec:
4047
ports:
4148
- name: http
4249
containerPort: 8000
43-
- name: lmcache-dist
50+
protocol: TCP
51+
- name: lmcache-dist # Assuming port 80 is used for LMCACHE_DISTRIBUTED_URL
4452
containerPort: 80
53+
protocol: TCP
54+
livenessProbe:
55+
failureThreshold: 3
56+
httpGet:
57+
path: /health
58+
port: 8000
59+
scheme: HTTP
60+
initialDelaySeconds: 15
61+
periodSeconds: 10
62+
successThreshold: 1
63+
timeoutSeconds: 1
64+
startupProbe:
65+
failureThreshold: 60
66+
httpGet:
67+
path: /health
68+
port: 8000
69+
scheme: HTTP
70+
initialDelaySeconds: 15
71+
periodSeconds: 10
72+
successThreshold: 1
73+
timeoutSeconds: 1
4574
env:
46-
- name: HF_TOKEN
47-
valueFrom:
48-
secretKeyRef:
49-
name: ${HF_SECRET_NAME}
50-
key: ${HF_SECRET_KEY}
75+
- name: HF_HOME
76+
value: /data
5177
- name: POD_IP
5278
valueFrom:
5379
fieldRef:
80+
apiVersion: v1
5481
fieldPath: status.podIP
55-
volumeMounts:
56-
- name: model-storage
57-
mountPath: ${VOLUME_MOUNT_PATH}
58-
volumes:
59-
- name: model-storage
60-
persistentVolumeClaim:
61-
claimName: ${PVC_NAME}
82+
- name: HF_TOKEN
83+
valueFrom:
84+
secretKeyRef:
85+
name: vllm-p2p-secrets
86+
key: hf_token_mistral7b
87+
- name: LMCACHE_LOOKUP_URL
88+
value: vllm-p2p-lookup-server-service.kvcache-manager.svc.cluster.local:8100
89+
- name: LMCACHE_ENABLE_DEBUG
90+
value: "True"
91+
- name: LMCACHE_ENABLE_P2P
92+
value: "True"
93+
- name: LMCACHE_LOCAL_CPU
94+
value: "True"
95+
- name: LMCACHE_MAX_LOCAL_CPU_SIZE
96+
value: "20"
97+
- name: LMCACHE_USE_EXPERIMENTAL
98+
value: "True"
99+
- name: VLLM_RPC_TIMEOUT
100+
value: "1000000"
101+
resources:
102+
limits:
103+
nvidia.com/gpu: "1"
104+
requests:
105+
cpu: "10"
106+
memory: 40Gi
107+
nvidia.com/gpu: "1"
108+
terminationMessagePath: /dev/termination-log
109+
terminationMessagePolicy: File
110+
securityContext:
111+
runAsNonRoot: false
112+
restartPolicy: Always
113+
terminationGracePeriodSeconds: 30
114+
dnsPolicy: ClusterFirst
115+
securityContext: {}
116+
schedulerName: default-scheduler
117+
strategy:
118+
type: RollingUpdate
119+
rollingUpdate:
120+
maxUnavailable: 0
121+
maxSurge: "100%"
122+
revisionHistoryLimit: 10
123+
progressDeadlineSeconds: 1200
124+

deploy/components/vllm-p2p/kustomization.yaml

+2-3
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ resources:
77
- deployments/vllm-deployment.yaml
88
- deployments/redis-deployment.yaml
99
- service/redis-service.yaml
10-
- pvc/volume.yaml
1110
- deployments/secret.yaml
1211

1312
images:
@@ -23,8 +22,8 @@ configMapGenerator:
2322
literals:
2423
- MODEL_NAME=${MODEL_NAME}
2524
- MODEL_LABEL=${MODEL_LABEL}
26-
- POOL_LABEL=${POOL_LABEL}
27-
- REDIS_ENABLED=${REDIS_ENABLED}
25+
- POOL_NAME=${POOL_NAME}
26+
- REDIS_ENABLED="true"
2827

2928
generatorOptions:
3029
disableNameSuffixHash: true

deploy/components/vllm-p2p/pvc/volume.yaml

-18
This file was deleted.

deploy/components/vllm-p2p/service/redis-service.yaml

+1-2
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
apiVersion: v1
22
kind: Service
33
metadata:
4-
name: ${REDIS_NAME}
5-
namespace: ${NAMESPACE}
4+
name: ${REDIS_SVC_NAME}
65
labels:
76
app.kubernetes.io/name: redis
87
app.kubernetes.io/component: redis-lookup-server

deploy/components/vllm/deployments.yaml

+3-3
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,14 @@ kind: Deployment
33
metadata:
44
name: ${VLLM_DEPLOYMENT_NAME}
55
spec:
6-
replicas: 3
6+
replicas: ${VLLM_REPLICA_COUNT}
77
selector:
88
matchLabels:
9-
app: vllm-llama3-8b-instruct
9+
app: ${POOL_NAME}
1010
template:
1111
metadata:
1212
labels:
13-
app: vllm-llama3-8b-instruct
13+
app: ${POOL_NAME}
1414
spec:
1515
securityContext:
1616
runAsUser: ${PROXY_UID}

deploy/environments/dev/kubernetes-kgateway/gateway-parameters.yaml

+3-3
Original file line numberDiff line numberDiff line change
@@ -3,20 +3,20 @@ kind: GatewayParameters
33
metadata:
44
name: custom-gw-params
55
spec:
6-
kube:
6+
kube:
77
envoyContainer:
88
securityContext:
99
allowPrivilegeEscalation: false
1010
readOnlyRootFilesystem: true
1111
runAsNonRoot: true
1212
runAsUser: "${PROXY_UID}"
1313
service:
14-
type: NodePort
14+
type: LoadBalancer
1515
extraLabels:
1616
gateway: custom
1717
podTemplate:
1818
extraLabels:
1919
gateway: custom
20-
securityContext:
20+
securityContext:
2121
seccompProfile:
2222
type: RuntimeDefault

deploy/environments/dev/kubernetes-vllm/vllm-p2p/kustomization.yaml

+2-2
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,6 @@ resources:
66

77
images:
88
- name: quay.io/vllm-d/vllm-d-dev:0.0.2
9-
newName: ${VLLM_P2P_IMAGE}
10-
newTag: ${VLLM_P2P_TAG}
9+
newName: ${VLLM_IMAGE}
10+
newTag: ${VLLM_TAG}
1111

0 commit comments

Comments
 (0)