Skip to content

Commit 8cdf9ae

Browse files
authored
Fix IstioD causing the Cortex cluster to no longer be reachable (#2342)
1 parent 3bfcd5b commit 8cdf9ae

21 files changed

+125
-73
lines changed

cli/cmd/cluster.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -358,7 +358,7 @@ var _clusterConfigureCmd = &cobra.Command{
358358
exit.Error(err)
359359
}
360360

361-
k8sClient, err := k8s.New("default", false, restConfig, scheme)
361+
k8sClient, err := k8s.New(consts.DefaultNamespace, false, restConfig, scheme)
362362
if err != nil {
363363
exit.Error(err)
364364
}
@@ -804,7 +804,7 @@ var _clusterHealthCmd = &cobra.Command{
804804
exit.Error(err)
805805
}
806806

807-
k8sClient, err := k8s.New("default", false, restConfig, scheme)
807+
k8sClient, err := k8s.New(consts.DefaultNamespace, false, restConfig, scheme)
808808
if err != nil {
809809
exit.Error(err)
810810
}

manager/install.sh

+12-5
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ function cluster_up() {
3636
create_eks
3737

3838
echo -n "○ updating cluster configuration "
39+
setup_namespaces
3940
setup_configmap
4041
echo ""
4142

@@ -195,6 +196,12 @@ function write_kubeconfig() {
195196
out=$(kubectl get pods 2>&1 || true); if [[ "$out" == *"must be logged in to the server"* ]]; then echo "error: your aws iam user does not have access to this cluster; to grant access, see https://docs.cortex.dev/v/${CORTEX_VERSION_MINOR}/"; exit 1; fi
196197
}
197198

199+
function setup_namespaces() {
200+
# doing a patch to prevent getting the kubectl.kubernetes.io/last-applied-configuration annotation warning
201+
kubectl patch namespace default -p '{"metadata": {"labels": {"istio-discovery": "enabled"}}}' >/dev/null
202+
kubectl apply -f manifests/namespaces.yaml >/dev/null
203+
}
204+
198205
function setup_configmap() {
199206
envsubst < manifests/default_cortex_cli_config.yaml > tmp_cli_config.yaml
200207
kubectl -n=default create configmap 'client-config' \
@@ -227,7 +234,9 @@ function setup_prometheus() {
227234
envsubst < manifests/prometheus-node-exporter.yaml | kubectl apply -f - >/dev/null
228235
envsubst < manifests/prometheus-monitoring.yaml | kubectl apply -f - >/dev/null
229236
python render_template.py $CORTEX_CLUSTER_CONFIG_FILE manifests/prometheus-additional-scrape-configs.yaml.j2 > prometheus-additional-scrape-configs.yaml
230-
kubectl create secret generic additional-scrape-configs --from-file=prometheus-additional-scrape-configs.yaml
237+
if ! kubectl get secret -n prometheus additional-scrape-configs >/dev/null 2>&1; then
238+
kubectl create secret generic -n prometheus additional-scrape-configs --from-file=prometheus-additional-scrape-configs.yaml > /dev/null
239+
fi
231240
}
232241

233242
function setup_grafana() {
@@ -360,8 +369,6 @@ function remove_nodegroups() {
360369
}
361370

362371
function setup_istio() {
363-
envsubst < manifests/istio-namespace.yaml | kubectl apply -f - >/dev/null
364-
365372
if ! grep -q "istio-customgateway-certs" <<< $(kubectl get secret -n istio-system); then
366373
WEBSITE=localhost
367374
openssl req -subj "/C=US/CN=$WEBSITE" -newkey rsa:2048 -nodes -keyout $WEBSITE.key -x509 -days 3650 -out $WEBSITE.crt >/dev/null 2>&1
@@ -530,8 +537,8 @@ function validate_cortex() {
530537
fi
531538

532539
if [ "$prometheus_ready" == "" ]; then
533-
readyReplicas=$(kubectl get statefulset -n default prometheus-prometheus -o jsonpath='{.status.readyReplicas}' 2> /dev/null)
534-
desiredReplicas=$(kubectl get statefulset -n default prometheus-prometheus -o jsonpath='{.status.replicas}' 2> /dev/null)
540+
readyReplicas=$(kubectl get statefulset -n prometheus prometheus-prometheus -o jsonpath='{.status.readyReplicas}' 2> /dev/null)
541+
desiredReplicas=$(kubectl get statefulset -n prometheus prometheus-prometheus -o jsonpath='{.status.replicas}' 2> /dev/null)
535542

536543
if [ "$readyReplicas" != "" ] && [ "$desiredReplicas" != "" ]; then
537544
if [ "$readyReplicas" == "$desiredReplicas" ]; then

manager/manifests/autoscaler.yaml.j2

+1-1
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ spec:
8282
args:
8383
- "--in-cluster"
8484
- "--port=8000"
85-
- "--prometheus-url=http://prometheus.default:9090"
85+
- "--prometheus-url=http://prometheus.prometheus:9090"
8686
- "--namespace=default"
8787
- "--cluster-config=/configs/cluster/cluster.yaml"
8888
ports:

manager/manifests/event-exporter.yaml

+4-4
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
apiVersion: v1
1616
kind: ServiceAccount
1717
metadata:
18-
namespace: default
18+
namespace: logging
1919
name: event-exporter
2020

2121
---
@@ -30,7 +30,7 @@ roleRef:
3030
name: view
3131
subjects:
3232
- kind: ServiceAccount
33-
namespace: default
33+
namespace: logging
3434
name: event-exporter
3535

3636
---
@@ -39,7 +39,7 @@ apiVersion: v1
3939
kind: ConfigMap
4040
metadata:
4141
name: event-exporter-config
42-
namespace: default
42+
namespace: logging
4343
data:
4444
config.yaml: |
4545
logLevel: error
@@ -61,7 +61,7 @@ apiVersion: apps/v1
6161
kind: Deployment
6262
metadata:
6363
name: event-exporter
64-
namespace: default
64+
namespace: logging
6565
spec:
6666
replicas: 1
6767
selector:

manager/manifests/fluent-bit.yaml.j2

+4-4
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ apiVersion: v1
1616
kind: ServiceAccount
1717
metadata:
1818
name: fluent-bit
19-
namespace: default
19+
namespace: logging
2020
---
2121
apiVersion: rbac.authorization.k8s.io/v1
2222
kind: ClusterRole
@@ -40,13 +40,13 @@ roleRef:
4040
subjects:
4141
- kind: ServiceAccount
4242
name: fluent-bit
43-
namespace: default
43+
namespace: logging
4444
---
4545
apiVersion: v1
4646
kind: ConfigMap
4747
metadata:
4848
name: fluent-bit-config
49-
namespace: default
49+
namespace: logging
5050
labels:
5151
k8s-app: fluent-bit
5252
data:
@@ -186,7 +186,7 @@ apiVersion: apps/v1
186186
kind: DaemonSet
187187
metadata:
188188
name: fluent-bit
189-
namespace: default
189+
namespace: logging
190190
spec:
191191
selector:
192192
matchLabels:

manager/manifests/grafana/grafana.yaml.j2

+1-1
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ data:
2828
"name": "prometheus",
2929
"orgId": 1,
3030
"type": "prometheus",
31-
"url": "http://prometheus.default:9090",
31+
"url": "http://prometheus.prometheus:9090",
3232
"version": 1,
3333
"isDefault": true
3434
}

manager/manifests/istio.yaml.j2

+23-3
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,10 @@ spec:
1818
profile: minimal
1919
hub: {{ env['CORTEX_IMAGE_ISTIO_PROXY_HUB'] }} # this is only used by proxy, since pilot overrides it (proxy doesn't have dedicated hub config)
2020
tag: {{ env['CORTEX_IMAGE_ISTIO_PROXY_TAG'] }} # this is only used by proxy, since pilot overrides it (proxy doesn't have dedicated tag config)
21+
meshConfig:
22+
discoverySelectors:
23+
- matchLabels:
24+
istio-discovery: enabled
2125
components:
2226
pilot: # "pilot" refers to the istiod container
2327
hub: {{ env['CORTEX_IMAGE_ISTIO_PILOT_HUB'] }}
@@ -26,7 +30,23 @@ spec:
2630
resources:
2731
requests:
2832
cpu: 100m # default is 500m
29-
memory: 200Mi # default is 2048Mi == 2Gi
33+
memory: 700Mi # default is 2048Mi == 2Gi
34+
hpaSpec:
35+
minReplicas: 1
36+
maxReplicas: 5
37+
metrics:
38+
- type: Resource
39+
resource:
40+
name: cpu
41+
targetAverageUtilization: 90
42+
- type: Resource
43+
resource:
44+
name: memory
45+
targetAverageUtilization: 90
46+
scaleTargetRef:
47+
apiVersion: apps/v1
48+
kind: Deployment
49+
name: istiod
3050
cni:
3151
enabled: false
3252
ingressGateways:
@@ -71,7 +91,7 @@ spec:
7191
replicaCount: 1
7292
hpaSpec:
7393
minReplicas: 1
74-
maxReplicas: 1 # edit autoscaleEnabled in values if increasing this
94+
maxReplicas: 1
7595
metrics:
7696
- type: Resource
7797
resource:
@@ -124,7 +144,7 @@ spec:
124144
replicaCount: 1
125145
hpaSpec:
126146
minReplicas: 1
127-
maxReplicas: 100 # edit autoscaleEnabled in values if increasing this
147+
maxReplicas: 100
128148
metrics:
129149
- type: Resource
130150
resource:

manager/manifests/istio-namespace.yaml renamed to manager/manifests/namespaces.yaml

+13
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,16 @@ apiVersion: v1
1616
kind: Namespace
1717
metadata:
1818
name: istio-system
19+
---
20+
21+
apiVersion: v1
22+
kind: Namespace
23+
metadata:
24+
name: logging
25+
---
26+
27+
apiVersion: v1
28+
kind: Namespace
29+
metadata:
30+
name: prometheus
31+
---

manager/manifests/prometheus-dcgm-exporter.yaml

+3-8
Original file line numberDiff line numberDiff line change
@@ -12,16 +12,11 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
apiVersion: v1
16-
kind: Namespace
17-
metadata:
18-
name: monitoring
19-
---
2015
apiVersion: v1
2116
kind: ServiceAccount
2217
metadata:
2318
name: dcgm-exporter
24-
namespace: default
19+
namespace: prometheus
2520
labels:
2621
app.kubernetes.io/name: dcgm-exporter
2722
app.kubernetes.io/instance: dcgm-exporter
@@ -31,7 +26,7 @@ apiVersion: apps/v1
3126
kind: DaemonSet
3227
metadata:
3328
name: dcgm-exporter
34-
namespace: default
29+
namespace: prometheus
3530
labels:
3631
app.kubernetes.io/name: dcgm-exporter
3732
app.kubernetes.io/instance: dcgm-exporter
@@ -106,7 +101,7 @@ apiVersion: monitoring.coreos.com/v1
106101
kind: PodMonitor
107102
metadata:
108103
name: dcgm-exporter
109-
namespace: default
104+
namespace: prometheus
110105
labels:
111106
monitoring.cortex.dev: dcgm-exporter
112107
app.kubernetes.io/name: dcgm-exporter

manager/manifests/prometheus-kube-state-metrics.yaml

+4-4
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ metadata:
1717
labels:
1818
app.kubernetes.io/name: kube-state-metrics
1919
name: kube-state-metrics
20-
namespace: default
20+
namespace: prometheus
2121
---
2222
apiVersion: rbac.authorization.k8s.io/v1
2323
kind: ClusterRole
@@ -180,13 +180,13 @@ roleRef:
180180
subjects:
181181
- kind: ServiceAccount
182182
name: kube-state-metrics
183-
namespace: default
183+
namespace: prometheus
184184
---
185185
apiVersion: apps/v1
186186
kind: Deployment
187187
metadata:
188188
name: kube-state-metrics
189-
namespace: default
189+
namespace: prometheus
190190
labels:
191191
app.kubernetes.io/name: kube-state-metrics
192192
app.kubernetes.io/version: "2.1.0"
@@ -245,7 +245,7 @@ apiVersion: monitoring.coreos.com/v1
245245
kind: PodMonitor
246246
metadata:
247247
name: kube-state-metrics
248-
namespace: default
248+
namespace: prometheus
249249
labels:
250250
name: kube-state-metrics
251251
monitoring.cortex.dev: kube-state-metrics

manager/manifests/prometheus-kubelet-exporter.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ metadata:
1919
k8s-app: kubelet
2020
monitoring.cortex.dev: kubelet-exporter
2121
name: kubelet
22-
namespace: default
22+
namespace: prometheus
2323
spec:
2424
endpoints:
2525
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token

manager/manifests/prometheus-monitoring.yaml

+9-1
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ apiVersion: monitoring.coreos.com/v1
2727
kind: Prometheus
2828
metadata:
2929
name: prometheus
30+
namespace: prometheus
3031
spec:
3132
image: $CORTEX_IMAGE_PROMETHEUS
3233
serviceAccountName: prometheus
@@ -73,6 +74,7 @@ apiVersion: v1
7374
kind: ServiceAccount
7475
metadata:
7576
name: prometheus
77+
namespace: prometheus
7678

7779
---
7880

@@ -114,14 +116,15 @@ roleRef:
114116
subjects:
115117
- kind: ServiceAccount
116118
name: prometheus
117-
namespace: default
119+
namespace: prometheus
118120

119121
---
120122

121123
apiVersion: v1
122124
kind: Service
123125
metadata:
124126
name: prometheus
127+
namespace: prometheus
125128
spec:
126129
type: ClusterIP
127130
ports:
@@ -136,6 +139,7 @@ apiVersion: monitoring.coreos.com/v1
136139
kind: PodMonitor
137140
metadata:
138141
name: istio-stats
142+
namespace: prometheus
139143
labels:
140144
monitoring.cortex.dev: "istio"
141145
spec:
@@ -187,6 +191,7 @@ apiVersion: monitoring.coreos.com/v1
187191
kind: PodMonitor
188192
metadata:
189193
name: proxy-stats
194+
namespace: prometheus
190195
labels:
191196
monitoring.cortex.dev: "proxy"
192197
spec:
@@ -240,6 +245,7 @@ apiVersion: monitoring.coreos.com/v1
240245
kind: PodMonitor
241246
metadata:
242247
name: async-stats
248+
namespace: prometheus
243249
labels:
244250
monitoring.cortex.dev: "dequeuer-async"
245251
spec:
@@ -294,6 +300,7 @@ apiVersion: monitoring.coreos.com/v1
294300
kind: PodMonitor
295301
metadata:
296302
name: prometheus-statsd-exporter
303+
namespace: prometheus
297304
labels:
298305
name: prometheus-statsd-exporter
299306
monitoring.cortex.dev: "statsd-exporter"
@@ -320,6 +327,7 @@ apiVersion: monitoring.coreos.com/v1
320327
kind: ServiceMonitor
321328
metadata:
322329
name: operator
330+
namespace: prometheus
323331
labels:
324332
name: operator
325333
monitoring.cortex.dev: "operator"

0 commit comments

Comments
 (0)