@@ -17,15 +17,202 @@ limitations under the License.
17
17
package v1alpha1
18
18
19
19
import (
20
+ "time"
21
+
22
+ "github.com/cortexlabs/cortex/pkg/types/status"
23
+ kcore "k8s.io/api/core/v1"
24
+ "k8s.io/apimachinery/pkg/api/resource"
20
25
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
26
+ "k8s.io/apimachinery/pkg/util/intstr"
21
27
)
22
28
23
29
// RealtimeAPISpec defines the desired state of RealtimeAPI
24
30
type RealtimeAPISpec struct {
31
+ // Pod configuration
32
+ // +kubebuilder:validation:Required
33
+ Pod PodSpec `json:"pod"`
34
+
35
+ // +kubebuilder:validation:Optional
36
+ // Autoscaling configuration
37
+ Autoscaling AutoscalingSpec `json:"autoscaling"`
38
+
39
+ // +kubebuilder:validation:Optional
40
+ // List of node groups on which this API can run (default: all node groups are eligible)
41
+ NodeGroups []string `json:"node_groups,omitempty"`
42
+
43
+ // +kubebuilder:validation:Optional
44
+ // Deployment strategy to use when replacing existing replicas with new ones
45
+ UpdateStrategy UpdateStratagySpec `json:"update_strategy"`
46
+
47
+ // +kubebuilder:validation:Optional
48
+ // Networking configuration
49
+ Networking NetworkingSpec `json:"networking"`
50
+ }
51
+
52
+ type PodSpec struct {
53
+ // +kubebuilder:validation:Optional
54
+ // +kubebuilder:default=8080
55
+ // Port to which requests will be sent to
56
+ Port int `json:"port"`
57
+
58
+ // +kubebuilder:validation:Optional
59
+ // +kubebuilder:default=1
60
+ // Maximum number of requests that will be concurrently sent into the container
61
+ MaxConcurrency int `json:"max_concurrency"`
62
+
63
+ // +kubebuilder:validation:Optional
64
+ // +kubebuilder:default=100
65
+ // Maximum number of requests per replica which will be queued
66
+ // (beyond max_concurrency) before requests are rejected with error code 503
67
+ MaxQueueLength int `json:"max_queue_length"`
68
+
69
+ // +kubebuilder:validation:Required
70
+ // Configurations for the containers to run
71
+ Containers []ContainerSpec `json:"containers"`
72
+ }
73
+
74
+ type ContainerSpec struct {
75
+ // +kubebuilder:validation:Required
76
+ // Name of the container
77
+ Name string `json:"name"`
78
+
79
+ // +kubebuilder:validation:Required
80
+ // Docker image to use for the container
81
+ Image string `json:"image"`
82
+
83
+ // +kubebuilder:validation:Optional
84
+ // Entrypoint (not executed within a shell)
85
+ Command []string `json:"command,omitempty"`
86
+
87
+ // +kubebuilder:validation:Optional
88
+ // Arguments to the entrypoint
89
+ Args []string `json:"args,omitempty"`
90
+
91
+ // +kubebuilder:validation:Optional
92
+ // Environment variables to set in the container
93
+ Env []kcore.EnvVar `json:"env,omitempty"`
94
+
95
+ // +kubebuilder:validation:Optional
96
+ // Compute resource requests
97
+ Compute * ComputeSpec `json:"compute,omitempty"`
98
+
99
+ // +kubebuilder:validation:Optional
100
+ // Periodic probe of container readiness;
101
+ // traffic will not be sent into the pod unless all containers' readiness probes are succeeding
102
+ ReadinessProbe * kcore.Probe `json:"readiness_probe,omitempty"`
103
+
104
+ // +kubebuilder:validation:Optional
105
+ // Periodic probe of container liveness; container will be restarted if the probe fails
106
+ LivenessProbe * kcore.Probe `json:"liveness_probe,omitempty"`
107
+ }
108
+
109
+ type ComputeSpec struct {
110
+ // +kubebuilder:validation:Optional
111
+ // CPU request for the container; one unit of CPU corresponds to one virtual CPU;
112
+ // fractional requests are allowed, and can be specified as a floating point number or via the "m" suffix
113
+ CPU * resource.Quantity `json:"cpu,omitempty"`
114
+
115
+ // +kubebuilder:validation:Optional
116
+ // GPU request for the container; one unit of GPU corresponds to one virtual GPU
117
+ GPU int `json:"gpu,omitempty"`
118
+
119
+ // +kubebuilder:validation:Optional
120
+ // Memory request for the container;
121
+ // one unit of memory is one byte and can be expressed as an integer or by using one of these suffixes: K, M, G, T
122
+ // (or their power-of two counterparts: Ki, Mi, Gi, Ti)
123
+ Mem * resource.Quantity `json:"mem,omitempty"`
124
+
125
+ // +kubebuilder:validation:Optional
126
+ // Size of shared memory (/dev/shm) for sharing data between multiple processes
127
+ Shm * resource.Quantity `json:"shm,omitempty"`
128
+ }
129
+
130
+ type AutoscalingSpec struct {
131
+ // +kubebuilder:validation:Optional
132
+ // +kubebuilder:default=1
133
+ // Minimum number of replicas
134
+ MinReplicas int `json:"min_replicas,omitempty"`
135
+
136
+ // +kubebuilder:validation:Optional
137
+ // +kubebuilder:default=100
138
+ // Maximum number of replicas
139
+ MaxReplicas int `json:"max_replicas,omitempty"`
140
+
141
+ // +kubebuilder:validation:Optional
142
+ // +kubebuilder:default=1
143
+ // Initial number of replicas
144
+ InitReplicas int `json:"init_replicas,omitempty"`
145
+
146
+ // +kubebuilder:validation:Optional
147
+ // Desired number of in-flight requests per replica (including requests actively being processed as well as queued),
148
+ // which the autoscaler tries to maintain
149
+ TargetInFlight int `json:"target_in_flight,omitempty"`
150
+
151
+ // +kubebuilder:validation:Optional
152
+ // +kubebuilder:default="60s"
153
+ // Duration over which to average the API's in-flight requests per replica
154
+ Window time.Duration `json:"window,omitempty"`
155
+
156
+ // +kubebuilder:validation:Optional
157
+ // +kubebuilder:default="5m"
158
+ // The API will not scale below the highest recommendation made during this period
159
+ DownscaleStabilizationPeriod time.Duration `json:"downscale_stabilization_period,omitempty"`
160
+
161
+ // +kubebuilder:validation:Optional
162
+ // +kubebuilder:default="1m"
163
+ // The API will not scale above the lowest recommendation made during this period
164
+ UpscaleStabilizationPeriod time.Duration `json:"upscale_stabilization_period,omitempty"`
165
+
166
+ // +kubebuilder:validation:Optional
167
+ // +kubebuilder:default="750m"
168
+ // Maximum factor by which to scale down the API on a single scaling event
169
+ MaxDownscaleFactor resource.Quantity `json:"max_downscale_factor,omitempty"`
170
+
171
+ // +kubebuilder:validation:Optional
172
+ // +kubebuilder:default="1500m"
173
+ // Maximum factor by which to scale up the API on a single scaling event
174
+ MaxUpscaleFactor resource.Quantity `json:"max_upscale_factor,omitempty"`
175
+
176
+ // +kubebuilder:validation:Optional
177
+ // +kubebuilder:default="50m"
178
+ // Any recommendation falling within this factor below the current number of replicas will not trigger a
179
+ // scale down event
180
+ DownscaleTolerance resource.Quantity `json:"downscale_tolerance,omitempty"`
181
+
182
+ // +kubebuilder:validation:Optional
183
+ // +kubebuilder:default="50m"
184
+ // Any recommendation falling within this factor above the current number of replicas will not trigger a scale up event
185
+ UpscaleTolerance resource.Quantity `json:"upscale_tolerance,omitempty"`
186
+ }
187
+
188
+ type UpdateStratagySpec struct {
189
+ // +kubebuilder:validation:Optional
190
+ // +kubebuilder:default="25%"
191
+ // Maximum number of replicas that can be scheduled above the desired number of replicas during an update;
192
+ // can be an absolute number, e.g. 5, or a percentage of desired replicas, e.g. 10% (default: 25%)
193
+ // (set to 0 to disable rolling updates)
194
+ MaxSurge intstr.IntOrString `json:"max_surge"`
195
+
196
+ // +kubebuilder:validation:Optional
197
+ // +kubebuilder:default="25%"
198
+ // maximum number of replicas that can be unavailable during an update; can be an absolute number,
199
+ // e.g. 5, or a percentage of desired replicas, e.g. 10%
200
+ MaxUnavailable intstr.IntOrString `json:"max_unavailable"`
201
+ }
202
+
203
+ type NetworkingSpec struct {
204
+ // +kubebuilder:validation:Optional
205
+ // Endpoint for the API
206
+ Endpoint string `json:"endpoint,omitempty"`
25
207
}
26
208
27
209
// RealtimeAPIStatus defines the observed state of RealtimeAPI
28
210
type RealtimeAPIStatus struct {
211
+ Status status.Code `json:"status"`
212
+ DesiredReplicas int `json:"desired_replicas"`
213
+ CurrentReplicas int `json:"current_replicas"`
214
+ ReadyReplicas int `json:"ready_replicas"`
215
+ Endpoint string `json:"endpoint,omitempty"`
29
216
}
30
217
31
218
//+kubebuilder:object:root=true
0 commit comments