@@ -67,6 +67,8 @@ public class AutoForceMergeManager extends AbstractLifecycleComponent {
67
67
private NodeValidator nodeValidator ;
68
68
private ShardValidator shardValidator ;
69
69
private Integer allocatedProcessors ;
70
+ private String nodeId ;
71
+ private final AutoForceMergeMetrics autoForceMergeMetrics ;
70
72
private ResourceTrackerProvider .ResourceTrackers resourceTrackers ;
71
73
private final ForceMergeManagerSettings forceMergeManagerSettings ;
72
74
private final CommonStatsFlags flags = new CommonStatsFlags (CommonStatsFlags .Flag .Segments , CommonStatsFlags .Flag .Translog );
@@ -78,14 +80,16 @@ public AutoForceMergeManager(
78
80
ThreadPool threadPool ,
79
81
MonitorService monitorService ,
80
82
IndicesService indicesService ,
81
- ClusterService clusterService
83
+ ClusterService clusterService ,
84
+ AutoForceMergeMetrics autoForceMergeMetrics
82
85
) {
83
86
this .threadPool = threadPool ;
84
87
this .osService = monitorService .osService ();
85
88
this .fsService = monitorService .fsService ();
86
89
this .jvmService = monitorService .jvmService ();
87
90
this .clusterService = clusterService ;
88
91
this .indicesService = indicesService ;
92
+ this .autoForceMergeMetrics = autoForceMergeMetrics ;
89
93
this .forceMergeManagerSettings = new ForceMergeManagerSettings (clusterService , this ::modifySchedulerInterval );
90
94
this .task = new AsyncForceMergeTask ();
91
95
this .mergingShards = new HashSet <>();
@@ -98,6 +102,7 @@ protected void doStart() {
98
102
this .shardValidator = new ShardValidator ();
99
103
this .allocatedProcessors = OpenSearchExecutors .allocatedProcessors (clusterService .getSettings ());
100
104
this .resourceTrackers = ResourceTrackerProvider .create (threadPool );
105
+ this .nodeId = clusterService .localNode ().getId ();
101
106
}
102
107
103
108
@ Override
@@ -119,20 +124,44 @@ private void modifySchedulerInterval(TimeValue schedulerInterval) {
119
124
}
120
125
121
126
private void triggerForceMerge () {
122
- if (isValidForForceMerge () == false ) {
123
- return ;
127
+ long startTime = System .currentTimeMillis ();
128
+ try {
129
+ if (isValidForForceMerge () == false ) {
130
+ return ;
131
+ }
132
+ executeForceMergeOnShards ();
133
+ } finally {
134
+ autoForceMergeMetrics .recordInHistogram (
135
+ autoForceMergeMetrics .totalSchedulerExecutionTime ,
136
+ (double ) System .currentTimeMillis () - startTime ,
137
+ autoForceMergeMetrics .getTags (nodeId , null )
138
+ );
124
139
}
125
- executeForceMergeOnShards ();
126
140
}
127
141
128
142
private boolean isValidForForceMerge () {
129
143
if (configurationValidator .hasWarmNodes () == false ) {
130
144
resourceTrackers .stop ();
131
145
logger .debug ("No warm nodes found. Skipping Auto Force merge." );
146
+ autoForceMergeMetrics .incrementCounter (
147
+ autoForceMergeMetrics .totalMergesSkipped ,
148
+ 1.0 ,
149
+ autoForceMergeMetrics .getTags (nodeId , null )
150
+ );
132
151
return false ;
133
152
}
134
153
if (nodeValidator .validate ().isAllowed () == false ) {
135
154
logger .debug ("Node capacity constraints are not allowing to trigger auto ForceMerge" );
155
+ autoForceMergeMetrics .incrementCounter (
156
+ autoForceMergeMetrics .skipsFromNodeValidator ,
157
+ 1.0 ,
158
+ autoForceMergeMetrics .getTags (nodeId , null )
159
+ );
160
+ autoForceMergeMetrics .incrementCounter (
161
+ autoForceMergeMetrics .totalMergesSkipped ,
162
+ 1.0 ,
163
+ autoForceMergeMetrics .getTags (nodeId , null )
164
+ );
136
165
return false ;
137
166
}
138
167
return true ;
@@ -157,14 +186,47 @@ private void executeForceMergeOnShards() {
157
186
158
187
private void executeForceMergeForShard (IndexShard shard ) {
159
188
CompletableFuture .runAsync (() -> {
189
+ long startTime = System .currentTimeMillis ();
190
+ String shardId = String .valueOf (shard .shardId ().getId ());
160
191
try {
161
192
mergingShards .add (shard .shardId ().getId ());
193
+ autoForceMergeMetrics .incrementCounter (
194
+ autoForceMergeMetrics .totalMergesTriggered ,
195
+ 1.0 ,
196
+ autoForceMergeMetrics .getTags (nodeId , null )
197
+ );
198
+
199
+ CommonStats preStats = new CommonStats (indicesService .getIndicesQueryCache (), shard , flags );
200
+ if (preStats .getSegments () != null ) {
201
+ autoForceMergeMetrics .incrementCounter (
202
+ autoForceMergeMetrics .segmentCount ,
203
+ (double ) preStats .getSegments ().getCount (),
204
+ autoForceMergeMetrics .getTags (nodeId , shardId )
205
+ );
206
+ autoForceMergeMetrics .incrementCounter (
207
+ autoForceMergeMetrics .shardSize ,
208
+ (double ) preStats .getStore ().getSizeInBytes (),
209
+ autoForceMergeMetrics .getTags (nodeId , shardId )
210
+ );
211
+ }
212
+
162
213
shard .forceMerge (new ForceMergeRequest ().maxNumSegments (forceMergeManagerSettings .getSegmentCount ()));
163
214
logger .debug ("Merging is completed successfully for the shard {}" , shard .shardId ());
215
+
164
216
} catch (Exception e ) {
165
217
logger .error ("Error during force merge for shard {}\n Exception: {}" , shard .shardId (), e );
218
+ autoForceMergeMetrics .incrementCounter (
219
+ autoForceMergeMetrics .totalMergesFailed ,
220
+ 1.0 ,
221
+ autoForceMergeMetrics .getTags (nodeId , null )
222
+ );
166
223
} finally {
167
224
mergingShards .remove (shard .shardId ().getId ());
225
+ autoForceMergeMetrics .recordInHistogram (
226
+ autoForceMergeMetrics .shardForceMergeLatency ,
227
+ (double ) System .currentTimeMillis () - startTime ,
228
+ autoForceMergeMetrics .getTags (nodeId , shardId )
229
+ );
168
230
}
169
231
}, threadPool .executor (ThreadPool .Names .FORCE_MERGE ));
170
232
logger .info ("Successfully triggered force merge for shard {}" , shard .shardId ());
0 commit comments