cortexproject · CharlieTLe · Sep 8, 2024 · Aug 19, 2024 · Aug 19, 2024 · Aug 20, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -3,7 +3,8 @@
 ## master / unreleased
 * [CHANGE] Use cortex v1.17.1
 * [CHANGE] Enable shuffle sharding in compactors
-* [CHANGE] Remove chunks support for dashboards
+* [CHANGE] Remove chunks support for dashboardsm
+* [CHANGE] Target 3M memory series per ingester instead of 1.5M
 * [CHANGE] Update jsonnet-libs to Fri Jul 19 12:51:49 2024 #57
 * [ENHANCEMENT] Configure `-ingester.client.grpc-compression` to be `snappy-block`
 * [ENHANCEMENT] Support Grafana 11 in Cortex Service Scaling Dashboard

diff --git a/cortex-mixin/alerts/alerts.libsonnet b/cortex-mixin/alerts/alerts.libsonnet
@@ -389,11 +389,11 @@
       rules: [
         {
           alert: 'CortexProvisioningTooManyActiveSeries',
-          // We target each ingester to 1.5M in-memory series. This alert fires if the average
-          // number of series / ingester in a Cortex cluster is > 1.6M for 2h (we compact
+          // We target each ingester to 3.0M in-memory series. This alert fires if the average
+          // number of series / ingester in a Cortex cluster is > 3.2M for 2h (we compact
           // the TSDB head every 2h).
           expr: |||
-            avg by (%s) (cortex_ingester_memory_series) > 1.6e6
+            avg by (%s) (cortex_ingester_memory_series) > 3.2e6
           ||| % [$._config.alert_aggregation_labels],
           'for': '2h',
           labels: {

diff --git a/cortex-mixin/docs/playbooks.md b/cortex-mixin/docs/playbooks.md
@@ -555,13 +555,13 @@ How to **investigate**:
 
 ### CortexProvisioningTooManyActiveSeries
 
-This alert fires if the average number of in-memory series per ingester is above our target (1.5M).
+This alert fires if the average number of in-memory series per ingester is above our target (3.0M).
 
 How to **fix**:
 - Scale up ingesters
   - To find out the Cortex clusters where ingesters should be scaled up and how many minimum replicas are expected:
     ```
-    ceil(sum by(cluster, namespace) (cortex_ingester_memory_series) / 1.5e6) >
+    ceil(sum by(cluster, namespace) (cortex_ingester_memory_series) / 3.0e6) >
     count by(cluster, namespace) (cortex_ingester_memory_series)
     ```
 - After the scale up, the in-memory series are expected to be reduced at the next TSDB head compaction (occurring every 2h)
@@ -595,7 +595,7 @@ How to **fix**:
     kubectl -n <namespace> delete pod ingester-XXX
     ```
   - Restarting an ingester typically reduces the memory allocated by mmap-ed files. After the restart, ingester may allocate this memory again over time, but it may give more time while working on a longer term solution
-- Check the `Cortex / Writes Resources` dashboard to see if the number of series per ingester is above the target (1.5M). If so:
+- Check the `Cortex / Writes Resources` dashboard to see if the number of series per ingester is above the target (3.0M). If so:
   - Scale up ingesters
   - Memory is expected to be reclaimed at the next TSDB head compaction (occurring every 2h)
 

diff --git a/cortex-mixin/recording_rules.libsonnet b/cortex-mixin/recording_rules.libsonnet
@@ -2,7 +2,7 @@ local utils = import 'mixin-utils/utils.libsonnet';
 
 {
   local _config = {
-    max_series_per_ingester: 1.5e6,
+    max_series_per_ingester: 3.0e6,
     max_samples_per_sec_per_ingester: 80e3,
     max_samples_per_sec_per_distributor: 240e3,
     limit_utilisation_target: 0.6,
@@ -148,7 +148,7 @@ local utils = import 'mixin-utils/utils.libsonnet';
             ||| % _config,
           },
           {
-            // Ingester should have 1.5M series in memory
+            // Ingester should have 3.0M series in memory
             record: 'cluster_namespace_deployment_reason:required_replicas:count',
             labels: {
               deployment: 'ingester',
@@ -167,7 +167,7 @@ local utils = import 'mixin-utils/utils.libsonnet';
           },
           {
             // We should be about to cover 60% of our limits,
-            // and ingester can have 1.5M series in memory
+            // and ingester can have 3.0M series in memory
             record: 'cluster_namespace_deployment_reason:required_replicas:count',
             labels: {
               deployment: 'ingester',