Skip to content

Commit 4d32d75

Browse files
committed
Add max-model-len 8192 to avoid KV cache deadlock
1 parent 04a4842 commit 4d32d75

2 files changed

Lines changed: 2 additions & 1 deletion

File tree

platform/base/platform-controller/controller-deployment.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ spec:
2121
serviceAccountName: platform-controller
2222
containers:
2323
- name: manager
24-
image: ghcr.io/bdchatham/aphex-platform-controller:v1.0.1769558013
24+
image: ghcr.io/bdchatham/aphex-platform-controller:v1.0.1769559155
2525
imagePullPolicy: Always
2626
command:
2727
- /manager

platform/base/platform-controller/controller/controllers/agent_controller.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,7 @@ func (r *AgentReconciler) reconcileModelServer(ctx context.Context, agent *platf
156156
"--model", agent.Spec.Model.Name,
157157
"--host", "0.0.0.0",
158158
"--port", fmt.Sprintf("%d", port),
159+
"--max-model-len", "8192",
159160
},
160161
Ports: []corev1.ContainerPort{
161162
{

0 commit comments

Comments
 (0)