|
| 1 | +# Copyright (c) 2022 Gitpod GmbH. All rights reserved. |
| 2 | +# Licensed under the GNU Affero General Public License (AGPL). |
| 3 | +# See License-AGPL.txt in the project root for license information. |
| 4 | + |
| 5 | +apiVersion: monitoring.coreos.com/v1 |
| 6 | +kind: PrometheusRule |
| 7 | +metadata: |
| 8 | + labels: |
| 9 | + app.kubernetes.io/name: argocd |
| 10 | + app.kubernetes.io/part-of: kube-prometheus |
| 11 | + prometheus: k8s |
| 12 | + role: alert-rules |
| 13 | + name: argocd-monitoring-rules |
| 14 | + namespace: monitoring-satellite |
| 15 | +spec: |
| 16 | + groups: |
| 17 | + - name: argocd-apps |
| 18 | + rules: |
| 19 | + - alert: ArgoCDAppStuckInUnknown |
| 20 | + for: 1h |
| 21 | + annotations: |
| 22 | + description: App {{ $labels.name }} in {{ $labels.label_environment }} is stuck in `Unknown` for 1h. ArgoCD is probably generating errors when trying to compare live and desired manifests. |
| 23 | + summary: App {{ $labels.name }} is stuck in `Unknown` state. |
| 24 | + expr: label_replace(argocd_app_info{sync_status="Unknown"} * on(name) group_left(label_environment, label_team) argocd_app_labels, "team", "$1", "label_team", "(.*)") |
| 25 | + labels: |
| 26 | + severity: warning |
| 27 | + - alert: ArgoCDAppOutOfSyncForTooLong |
| 28 | + for: 1d |
| 29 | + annotations: |
| 30 | + description: App {{ $labels.name }} in {{ $labels.label_environment }} is `OutOfSync` for more than an entire day. The live manifests do not match with what is desired in git! |
| 31 | + summary: App {{ $labels.name }} is stuck in `OutOfSync` state. |
| 32 | + expr: label_replace(argocd_app_info{sync_status="OutOfSync"} * on(name) group_left(label_environment, label_team) argocd_app_labels, "team", "$1", "label_team", "(.*)") |
| 33 | + labels: |
| 34 | + severity: warning |
| 35 | + - alert: ArgoCDAppStuckInProgressing |
| 36 | + for: 1h |
| 37 | + annotations: |
| 38 | + description: App {{ $labels.name }} in {{ $labels.label_environment }} is stuck in `Progressing` for 1h. It is possible that the application is left in a weird state. |
| 39 | + summary: App {{ $labels.name }} is stuck in `Progressing` state. |
| 40 | + expr: label_replace(argocd_app_info{health_status="Progressing"} * on(name) group_left(label_environment, label_team) argocd_app_labels, "team", "$1", "label_team", "(.*)") |
| 41 | + labels: |
| 42 | + severity: warning |
| 43 | + - alert: ArgoCDAppDegraded |
| 44 | + for: 20m |
| 45 | + annotations: |
| 46 | + description: App {{ $labels.name }} in {{ $labels.label_environment }} is stuck in `Degraded`. This means that the synchronization failed requires investigation. |
| 47 | + summary: App {{ $labels.name }} is stuck in `Degraded` state. |
| 48 | + expr: label_replace(argocd_app_info{health_status="Degraded"} * on(name) group_left(label_environment, label_team) argocd_app_labels, "team", "$1", "label_team", "(.*)") |
| 49 | + labels: |
| 50 | + severity: warning |
0 commit comments