|
1 |
| -# Copyright (C) 2024 Intel Corporation |
| 1 | +# Copyright (C) 2025 Intel Corporation |
2 | 2 | # SPDX-License-Identifier: Apache-2.0
|
3 | 3 |
|
4 |
| -# Override CPU resource request and probe timing values in specific subcharts |
5 |
| -# |
6 |
| -# RESOURCES |
7 |
| -# |
8 |
| -# Resource request matching actual resource usage (with enough slack) |
9 |
| -# is important when service is scaled up, so that right amount of pods |
10 |
| -# get scheduled to right nodes. |
11 |
| -# |
12 |
| -# Because resource usage depends on the used devices, model, data type |
13 |
| -# and SW versions, and this top-level chart has overrides for them, |
14 |
| -# resource requests need to be specified here too. |
15 |
| -# |
16 |
| -# To test service without resource request, use "resources: {}". |
17 |
| -# |
18 |
| -# PROBES |
19 |
| -# |
20 |
| -# Inferencing pods startup / warmup takes *much* longer on CPUs than |
21 |
| -# with acceleration devices, and their responses are also slower, |
22 |
| -# especially when node is running several instances of these services. |
23 |
| -# |
24 |
| -# Kubernetes restarting pod before its startup finishes, or not |
25 |
| -# sending it queries because it's not in ready state due to slow |
26 |
| -# readiness responses, does really NOT help in getting faster responses. |
27 |
| -# |
28 |
| -# => probe timings need to be increased when running on CPU. |
29 |
| - |
30 |
| -tgi: |
31 |
| - # TODO: add Helm value also for TGI data type option: |
32 |
| - # https://github.com/opea-project/GenAIExamples/issues/330 |
33 |
| - LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 |
34 |
| - |
35 |
| - # Potentially suitable values for scaling CPU TGI 2.2 with Intel/neural-chat-7b-v3-3 @ 32-bit: |
36 |
| - resources: |
37 |
| - limits: |
38 |
| - cpu: 8 |
39 |
| - memory: 70Gi |
40 |
| - requests: |
41 |
| - cpu: 6 |
42 |
| - memory: 65Gi |
43 |
| - |
44 |
| - livenessProbe: |
45 |
| - initialDelaySeconds: 8 |
46 |
| - periodSeconds: 8 |
47 |
| - failureThreshold: 24 |
48 |
| - timeoutSeconds: 4 |
49 |
| - readinessProbe: |
50 |
| - initialDelaySeconds: 16 |
51 |
| - periodSeconds: 8 |
52 |
| - timeoutSeconds: 4 |
53 |
| - startupProbe: |
54 |
| - initialDelaySeconds: 10 |
55 |
| - periodSeconds: 5 |
56 |
| - failureThreshold: 180 |
57 |
| - timeoutSeconds: 2 |
58 |
| - |
59 |
| -teirerank: |
60 |
| - RERANK_MODEL_ID: "BAAI/bge-reranker-base" |
61 |
| - |
62 |
| - # Potentially suitable values for scaling CPU TEI v1.5 with BAAI/bge-reranker-base model: |
63 |
| - resources: |
64 |
| - limits: |
65 |
| - cpu: 4 |
66 |
| - memory: 30Gi |
67 |
| - requests: |
68 |
| - cpu: 2 |
69 |
| - memory: 25Gi |
70 |
| - |
71 |
| - livenessProbe: |
72 |
| - initialDelaySeconds: 8 |
73 |
| - periodSeconds: 8 |
74 |
| - failureThreshold: 24 |
75 |
| - timeoutSeconds: 4 |
76 |
| - readinessProbe: |
77 |
| - initialDelaySeconds: 8 |
78 |
| - periodSeconds: 8 |
79 |
| - timeoutSeconds: 4 |
80 |
| - startupProbe: |
81 |
| - initialDelaySeconds: 5 |
82 |
| - periodSeconds: 5 |
83 |
| - failureThreshold: 120 |
84 |
| - |
85 |
| -tei: |
86 |
| - EMBEDDING_MODEL_ID: "BAAI/bge-base-en-v1.5" |
87 |
| - |
88 |
| - # Potentially suitable values for scaling CPU TEI 1.5 with BAAI/bge-base-en-v1.5 model: |
89 |
| - resources: |
90 |
| - limits: |
91 |
| - cpu: 4 |
92 |
| - memory: 4Gi |
93 |
| - requests: |
94 |
| - cpu: 2 |
95 |
| - memory: 3Gi |
96 |
| - |
97 |
| - livenessProbe: |
98 |
| - initialDelaySeconds: 5 |
99 |
| - periodSeconds: 5 |
100 |
| - failureThreshold: 24 |
101 |
| - timeoutSeconds: 2 |
102 |
| - readinessProbe: |
103 |
| - initialDelaySeconds: 5 |
104 |
| - periodSeconds: 5 |
105 |
| - timeoutSeconds: 2 |
106 |
| - startupProbe: |
107 |
| - initialDelaySeconds: 5 |
108 |
| - periodSeconds: 5 |
109 |
| - failureThreshold: 120 |
| 4 | +image: |
| 5 | + repository: opea/chatqna |
0 commit comments