Skip to content

Commit 22c345a

Browse files
committed
feat: expose MD raid component devices
Expose what component devices are part of a MD raid device, as well as the most common flags per-component. This will enable a future node_exporter metric showing which component of a RAID had failed. Signed-off-by: Robin H. Johnson <[email protected]> Signed-off-by: Robin H. Johnson <[email protected]>
1 parent baf5a5a commit 22c345a

File tree

2 files changed

+113
-42
lines changed

2 files changed

+113
-42
lines changed

mdstat.go

Lines changed: 74 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,28 @@ var (
2727
recoveryLinePctRE = regexp.MustCompile(`= (.+)%`)
2828
recoveryLineFinishRE = regexp.MustCompile(`finish=(.+)min`)
2929
recoveryLineSpeedRE = regexp.MustCompile(`speed=(.+)[A-Z]`)
30-
componentDeviceRE = regexp.MustCompile(`(.*)\[\d+\]`)
30+
componentDeviceRE = regexp.MustCompile(`(.*)\[(\d+)\](\([SF]+\))?`)
31+
personalitiesPrefix = "Personalities : "
3132
)
3233

34+
type MDStatComponent struct {
35+
// Name of the component device.
36+
Name string
37+
// DescriptorIndex number of component device, e.g. the order in the superblock.
38+
DescriptorIndex int32
39+
// Flags per Linux drivers/md/md.[ch] as of v6.12-rc1
40+
// Subset that are exposed in mdstat
41+
WriteMostly bool
42+
Journal bool
43+
Faulty bool // "Faulty" is what kernel source uses for "(F)"
44+
Spare bool
45+
Replacement bool
46+
// Some additional flags that are NOT exposed in procfs today; they may
47+
// be available via sysfs.
48+
// In_sync, Bitmap_sync, Blocked, WriteErrorSeen, FaultRecorded,
49+
// BlockedBadBlocks, WantReplacement, Candidate, ...
50+
}
51+
3352
// MDStat holds info parsed from /proc/mdstat.
3453
type MDStat struct {
3554
// Name of the device.
@@ -60,8 +79,8 @@ type MDStat struct {
6079
BlocksSyncedFinishTime float64
6180
// current sync speed (in Kilobytes/sec)
6281
BlocksSyncedSpeed float64
63-
// Name of md component devices
64-
Devices []string
82+
// component devices
83+
Devices []MDStatComponent
6584
}
6685

6786
// MDStat parses an mdstat-file (/proc/mdstat) and returns a slice of
@@ -82,38 +101,52 @@ func (fs FS) MDStat() ([]MDStat, error) {
82101
// parseMDStat parses data from mdstat file (/proc/mdstat) and returns a slice of
83102
// structs containing the relevant info.
84103
func parseMDStat(mdStatData []byte) ([]MDStat, error) {
104+
// TODO:
105+
// - parse global hotspares from the "unused devices" line.
85106
mdStats := []MDStat{}
86107
lines := strings.Split(string(mdStatData), "\n")
108+
knownRaidTypes := make(map[string]bool)
87109

88110
for i, line := range lines {
89111
if strings.TrimSpace(line) == "" || line[0] == ' ' ||
90-
strings.HasPrefix(line, "Personalities") ||
91112
strings.HasPrefix(line, "unused") {
92113
continue
93114
}
115+
// Personalities : [linear] [multipath] [raid0] [raid1] [raid6] [raid5] [raid4] [raid10]
116+
if len(knownRaidTypes) == 0 && strings.HasPrefix(line, personalitiesPrefix) {
117+
personalities := strings.Fields(line[len(personalitiesPrefix):])
118+
for _, word := range personalities {
119+
word := word[1 : len(word)-1]
120+
knownRaidTypes[word] = true
121+
}
122+
continue
123+
}
94124

95125
deviceFields := strings.Fields(line)
96126
if len(deviceFields) < 3 {
97127
return nil, fmt.Errorf("%w: Expected 3+ lines, got %q", ErrFileParse, line)
98128
}
99129
mdName := deviceFields[0] // mdx
100-
state := deviceFields[2] // active or inactive
130+
state := deviceFields[2] // active, inactive, broken
101131

102-
mdType := "unknown" // raid1, raid5, etc.
132+
mdType := "unknown" // raid1, raid5, etc.
133+
var deviceStartIndex int
103134
if len(deviceFields) > 3 { // mdType may be in the 3rd or 4th field
104-
if isRaidType(deviceFields[3]) {
135+
if isRaidType(deviceFields[3], knownRaidTypes) {
105136
mdType = deviceFields[3]
106-
} else if len(deviceFields) > 4 && isRaidType(deviceFields[4]) {
137+
deviceStartIndex = 4
138+
} else if len(deviceFields) > 4 && isRaidType(deviceFields[4], knownRaidTypes) {
107139
// if the 3rd field is (...), the 4th field is the mdType
108140
mdType = deviceFields[4]
141+
deviceStartIndex = 5
109142
}
110143
}
111144

112145
if len(lines) <= i+3 {
113146
return nil, fmt.Errorf("%w: Too few lines for md device: %q", ErrFileParse, mdName)
114147
}
115148

116-
// Failed disks have the suffix (F) & Spare disks have the suffix (S).
149+
// Failed (Faulty) disks have the suffix (F) & Spare disks have the suffix (S).
117150
fail := int64(strings.Count(line, "(F)"))
118151
spare := int64(strings.Count(line, "(S)"))
119152
active, total, down, size, err := evalStatusLine(lines[i], lines[i+1])
@@ -163,6 +196,11 @@ func parseMDStat(mdStatData []byte) ([]MDStat, error) {
163196
}
164197
}
165198

199+
devices, err := evalComponentDevices(deviceFields[deviceStartIndex:])
200+
if err != nil {
201+
return nil, fmt.Errorf("error parsing components in md device %q: %w", mdName, err)
202+
}
203+
166204
mdStats = append(mdStats, MDStat{
167205
Name: mdName,
168206
Type: mdType,
@@ -178,7 +216,7 @@ func parseMDStat(mdStatData []byte) ([]MDStat, error) {
178216
BlocksSyncedPct: pct,
179217
BlocksSyncedFinishTime: finish,
180218
BlocksSyncedSpeed: speed,
181-
Devices: evalComponentDevices(deviceFields),
219+
Devices: devices,
182220
})
183221
}
184222

@@ -188,11 +226,13 @@ func parseMDStat(mdStatData []byte) ([]MDStat, error) {
188226
// check if a string's format is like the mdType
189227
// Rule 1: mdType should not be like (...)
190228
// Rule 2: mdType should not be like sda[0]
191-
func isRaidType(mdType string) bool {
192-
return !strings.ContainsAny(mdType, "([")
229+
func isRaidType(mdType string, knownRaidTypes map[string]bool) bool {
230+
_, ok := knownRaidTypes[mdType]
231+
return !strings.ContainsAny(mdType, "([") && ok
193232
}
194233

195234
func evalStatusLine(deviceLine, statusLine string) (active, total, down, size int64, err error) {
235+
// e.g. 523968 blocks super 1.2 [4/4] [UUUU]
196236
statusFields := strings.Fields(statusLine)
197237
if len(statusFields) < 1 {
198238
return 0, 0, 0, 0, fmt.Errorf("%w: Unexpected statusline %q: %w", ErrFileParse, statusLine, err)
@@ -283,17 +323,29 @@ func evalRecoveryLine(recoveryLine string) (blocksSynced int64, blocksToBeSynced
283323
return blocksSynced, blocksToBeSynced, pct, finish, speed, nil
284324
}
285325

286-
func evalComponentDevices(deviceFields []string) []string {
287-
mdComponentDevices := make([]string, 0)
288-
if len(deviceFields) > 3 {
289-
for _, field := range deviceFields[4:] {
290-
match := componentDeviceRE.FindStringSubmatch(field)
291-
if match == nil {
292-
continue
293-
}
294-
mdComponentDevices = append(mdComponentDevices, match[1])
326+
func evalComponentDevices(deviceFields []string) ([]MDStatComponent, error) {
327+
mdComponentDevices := make([]MDStatComponent, 0)
328+
for _, field := range deviceFields {
329+
match := componentDeviceRE.FindStringSubmatch(field)
330+
if match == nil {
331+
continue
295332
}
333+
descriptorIndex, err := strconv.ParseInt(match[2], 10, 32)
334+
if err != nil {
335+
return mdComponentDevices, fmt.Errorf("error parsing int from device %q: %w", match[2], err)
336+
}
337+
mdComponentDevices = append(mdComponentDevices, MDStatComponent{
338+
Name: match[1],
339+
DescriptorIndex: int32(descriptorIndex),
340+
// match may contain one or more of these
341+
// https://github.com/torvalds/linux/blob/7ec462100ef9142344ddbf86f2c3008b97acddbe/drivers/md/md.c#L8376-L8392
342+
Faulty: strings.Contains(match[3], "(F)"),
343+
Spare: strings.Contains(match[3], "(S)"),
344+
Journal: strings.Contains(match[3], "(J)"),
345+
Replacement: strings.Contains(match[3], "(R)"),
346+
WriteMostly: strings.Contains(match[3], "(W)"),
347+
})
296348
}
297349

298-
return mdComponentDevices
350+
return mdComponentDevices, nil
299351
}

mdstat_test.go

Lines changed: 39 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,11 @@ func TestFS_MDStat(t *testing.T) {
2626
if err != nil {
2727
t.Fatalf("parsing of reference-file failed entirely: %s", err)
2828
}
29+
// TODO: Test cases to capture in future:
30+
// WriteMostly devices
31+
// Journal devices
32+
// Replacement devices
33+
// Global hotspares
2934

3035
refs := map[string]MDStat{
3136
"md127": {
@@ -43,7 +48,7 @@ func TestFS_MDStat(t *testing.T) {
4348
BlocksSyncedPct: 0,
4449
BlocksSyncedFinishTime: 0,
4550
BlocksSyncedSpeed: 0,
46-
Devices: []string{"sdi2", "sdj2"}},
51+
Devices: []MDStatComponent{{Name: "sdi2", DescriptorIndex: 0}, {Name: "sdj2", DescriptorIndex: 1}}},
4752
"md0": {
4853
Name: "md0",
4954
Type: "raid1",
@@ -59,7 +64,7 @@ func TestFS_MDStat(t *testing.T) {
5964
BlocksSyncedPct: 0,
6065
BlocksSyncedFinishTime: 0,
6166
BlocksSyncedSpeed: 0,
62-
Devices: []string{"sdi1", "sdj1"}},
67+
Devices: []MDStatComponent{{Name: "sdi1", DescriptorIndex: 0}, {Name: "sdj1", DescriptorIndex: 1}}},
6368
"md4": {
6469
Name: "md4",
6570
Type: "raid1",
@@ -75,7 +80,7 @@ func TestFS_MDStat(t *testing.T) {
7580
BlocksSyncedPct: 0,
7681
BlocksSyncedFinishTime: 0,
7782
BlocksSyncedSpeed: 0,
78-
Devices: []string{"sda3", "sdb3"}},
83+
Devices: []MDStatComponent{{Name: "sda3", Faulty: true, DescriptorIndex: 0}, {Name: "sdb3", Spare: true, DescriptorIndex: 1}}},
7984
"md6": {
8085
Name: "md6",
8186
Type: "raid1",
@@ -91,7 +96,7 @@ func TestFS_MDStat(t *testing.T) {
9196
BlocksSyncedPct: 8.5,
9297
BlocksSyncedFinishTime: 17,
9398
BlocksSyncedSpeed: 259783,
94-
Devices: []string{"sdb2", "sdc", "sda2"}},
99+
Devices: []MDStatComponent{{Name: "sdb2", DescriptorIndex: 2, Faulty: true}, {Name: "sdc", DescriptorIndex: 1, Spare: true}, {Name: "sda2", DescriptorIndex: 0}}},
95100
"md3": {
96101
Name: "md3",
97102
Type: "raid6",
@@ -107,7 +112,7 @@ func TestFS_MDStat(t *testing.T) {
107112
BlocksSyncedPct: 0,
108113
BlocksSyncedFinishTime: 0,
109114
BlocksSyncedSpeed: 0,
110-
Devices: []string{"sda1", "sdh1", "sdg1", "sdf1", "sde1", "sdd1", "sdc1", "sdb1", "sdd1", "sdd2"}},
115+
Devices: []MDStatComponent{{Name: "sda1", DescriptorIndex: 8}, {Name: "sdh1", DescriptorIndex: 7}, {Name: "sdg1", DescriptorIndex: 6}, {Name: "sdf1", DescriptorIndex: 5}, {Name: "sde1", DescriptorIndex: 11}, {Name: "sdd1", DescriptorIndex: 3}, {Name: "sdc1", DescriptorIndex: 10}, {Name: "sdb1", DescriptorIndex: 9}, {Name: "sdd1", DescriptorIndex: 10, Spare: true}, {Name: "sdd2", DescriptorIndex: 11, Spare: true}}},
111116
"md8": {
112117
Name: "md8",
113118
Type: "raid1",
@@ -123,7 +128,7 @@ func TestFS_MDStat(t *testing.T) {
123128
BlocksSyncedPct: 8.5,
124129
BlocksSyncedFinishTime: 17,
125130
BlocksSyncedSpeed: 259783,
126-
Devices: []string{"sdb1", "sda1", "sdc", "sde"}},
131+
Devices: []MDStatComponent{{Name: "sdb1", DescriptorIndex: 1}, {Name: "sda1", DescriptorIndex: 0}, {Name: "sdc", DescriptorIndex: 2, Spare: true}, {Name: "sde", DescriptorIndex: 3, Spare: true}}},
127132
"md7": {
128133
Name: "md7",
129134
Type: "raid6",
@@ -139,7 +144,7 @@ func TestFS_MDStat(t *testing.T) {
139144
BlocksSyncedPct: 0,
140145
BlocksSyncedFinishTime: 0,
141146
BlocksSyncedSpeed: 0,
142-
Devices: []string{"sdb1", "sde1", "sdd1", "sdc1"}},
147+
Devices: []MDStatComponent{{Name: "sdb1", DescriptorIndex: 0}, {Name: "sde1", DescriptorIndex: 3}, {Name: "sdd1", DescriptorIndex: 2}, {Name: "sdc1", DescriptorIndex: 1, Faulty: true}}},
143148
"md9": {
144149
Name: "md9",
145150
Type: "raid1",
@@ -155,7 +160,7 @@ func TestFS_MDStat(t *testing.T) {
155160
BlocksSyncedPct: 0,
156161
BlocksSyncedFinishTime: 0,
157162
BlocksSyncedSpeed: 0,
158-
Devices: []string{"sdc2", "sdd2", "sdb2", "sda2", "sde", "sdf", "sdg"}},
163+
Devices: []MDStatComponent{{Name: "sdc2", DescriptorIndex: 2}, {Name: "sdd2", DescriptorIndex: 3}, {Name: "sdb2", DescriptorIndex: 1}, {Name: "sda2", DescriptorIndex: 0}, {Name: "sde", DescriptorIndex: 4, Faulty: true}, {Name: "sdf", DescriptorIndex: 5, Faulty: true}, {Name: "sdg", DescriptorIndex: 6, Spare: true}}},
159164
"md10": {
160165
Name: "md10",
161166
Type: "raid0",
@@ -171,7 +176,7 @@ func TestFS_MDStat(t *testing.T) {
171176
BlocksSyncedPct: 0,
172177
BlocksSyncedFinishTime: 0,
173178
BlocksSyncedSpeed: 0,
174-
Devices: []string{"sda1", "sdb1"}},
179+
Devices: []MDStatComponent{{Name: "sda1", DescriptorIndex: 0}, {Name: "sdb1", DescriptorIndex: 1}}},
175180
"md11": {
176181
Name: "md11",
177182
Type: "raid1",
@@ -187,7 +192,7 @@ func TestFS_MDStat(t *testing.T) {
187192
BlocksSyncedPct: 0,
188193
BlocksSyncedFinishTime: 0,
189194
BlocksSyncedSpeed: 0,
190-
Devices: []string{"sdb2", "sdc2", "sdc3", "hda", "ssdc2"}},
195+
Devices: []MDStatComponent{{Name: "sdb2", DescriptorIndex: 0}, {Name: "sdc2", DescriptorIndex: 1}, {Name: "sdc3", DescriptorIndex: 2, Faulty: true}, {Name: "hda", DescriptorIndex: 4, Spare: true}, {Name: "ssdc2", DescriptorIndex: 3, Spare: true}}},
191196
"md12": {
192197
Name: "md12",
193198
Type: "raid0",
@@ -203,7 +208,7 @@ func TestFS_MDStat(t *testing.T) {
203208
BlocksSyncedPct: 0,
204209
BlocksSyncedFinishTime: 0,
205210
BlocksSyncedSpeed: 0,
206-
Devices: []string{"sdc2", "sdd2"}},
211+
Devices: []MDStatComponent{{Name: "sdc2", DescriptorIndex: 0}, {Name: "sdd2", DescriptorIndex: 1}}},
207212
"md120": {
208213
Name: "md120",
209214
Type: "linear",
@@ -219,7 +224,7 @@ func TestFS_MDStat(t *testing.T) {
219224
BlocksSyncedPct: 0,
220225
BlocksSyncedFinishTime: 0,
221226
BlocksSyncedSpeed: 0,
222-
Devices: []string{"sda1", "sdb1"}},
227+
Devices: []MDStatComponent{{Name: "sda1", DescriptorIndex: 1}, {Name: "sdb1", DescriptorIndex: 0}}},
223228
"md126": {
224229
Name: "md126",
225230
Type: "raid0",
@@ -235,7 +240,7 @@ func TestFS_MDStat(t *testing.T) {
235240
BlocksSyncedPct: 0,
236241
BlocksSyncedFinishTime: 0,
237242
BlocksSyncedSpeed: 0,
238-
Devices: []string{"sdb", "sdc"}},
243+
Devices: []MDStatComponent{{Name: "sdb", DescriptorIndex: 1}, {Name: "sdc", DescriptorIndex: 0}}},
239244
"md219": {
240245
Name: "md219",
241246
Type: "unknown",
@@ -251,7 +256,7 @@ func TestFS_MDStat(t *testing.T) {
251256
BlocksSyncedPct: 0,
252257
BlocksSyncedFinishTime: 0,
253258
BlocksSyncedSpeed: 0,
254-
Devices: []string{"sdc", "sda"}},
259+
Devices: []MDStatComponent{{Name: "sdb", DescriptorIndex: 2, Spare: true}, {Name: "sdc", DescriptorIndex: 1, Spare: true}, {Name: "sda", DescriptorIndex: 0, Spare: true}}},
255260
"md00": {
256261
Name: "md00",
257262
Type: "raid0",
@@ -267,7 +272,7 @@ func TestFS_MDStat(t *testing.T) {
267272
BlocksSyncedPct: 0,
268273
BlocksSyncedFinishTime: 0,
269274
BlocksSyncedSpeed: 0,
270-
Devices: []string{"xvdb"}},
275+
Devices: []MDStatComponent{{Name: "xvdb", DescriptorIndex: 0}}},
271276
"md101": {
272277
Name: "md101",
273278
Type: "raid0",
@@ -283,7 +288,7 @@ func TestFS_MDStat(t *testing.T) {
283288
BlocksSyncedPct: 0,
284289
BlocksSyncedFinishTime: 0,
285290
BlocksSyncedSpeed: 0,
286-
Devices: []string{"sdb", "sdd", "sdc"}},
291+
Devices: []MDStatComponent{{Name: "sdb", DescriptorIndex: 2}, {Name: "sdd", DescriptorIndex: 1}, {Name: "sdc", DescriptorIndex: 0}}},
287292
"md201": {
288293
Name: "md201",
289294
Type: "raid1",
@@ -299,7 +304,7 @@ func TestFS_MDStat(t *testing.T) {
299304
BlocksSyncedPct: 5.7,
300305
BlocksSyncedFinishTime: 0.2,
301306
BlocksSyncedSpeed: 114176,
302-
Devices: []string{"sda3", "sdb3"}},
307+
Devices: []MDStatComponent{{Name: "sda3", DescriptorIndex: 0}, {Name: "sdb3", DescriptorIndex: 1}}},
303308
"md42": {
304309
Name: "md42",
305310
ActivityState: "reshaping",
@@ -314,7 +319,7 @@ func TestFS_MDStat(t *testing.T) {
314319
BlocksSyncedPct: 56.1,
315320
BlocksSyncedFinishTime: 1868.1,
316321
BlocksSyncedSpeed: 7640,
317-
Devices: []string{"sda1", "sdd1", "sde1"}},
322+
Devices: []MDStatComponent{{Name: "sda1", DescriptorIndex: 3, Spare: true}, {Name: "sdd1", DescriptorIndex: 0}, {Name: "sde1", DescriptorIndex: 1}}},
318323
}
319324

320325
if want, have := len(refs), len(mdStats); want != have {
@@ -329,18 +334,32 @@ func TestFS_MDStat(t *testing.T) {
329334
}
330335

331336
func TestInvalidMdstat(t *testing.T) {
332-
invalidMount := [][]byte{[]byte(`
337+
invalidMount := [][]byte{
338+
// Test invalid Personality and format
339+
[]byte(`
333340
Personalities : [invalid]
334341
md3 : invalid
335342
314159265 blocks 64k chunks
336343
337344
unused devices: <none>
338345
`),
346+
// Test extra blank line
339347
[]byte(`
340348
md12 : active raid0 sdc2[0] sdd2[1]
341349
342350
3886394368 blocks super 1.2 512k chunks
343-
`)}
351+
`),
352+
// test for impossible component state
353+
[]byte(`
354+
md127 : active raid1 sdi2[0] sdj2[1](Z)
355+
312319552 blocks [2/2] [UU]
356+
`),
357+
// test for malformed component state
358+
[]byte(`
359+
md127 : active raid1 sdi2[0] sdj2[X]
360+
312319552 blocks [2/2] [UU]
361+
`),
362+
}
344363

345364
for _, invalid := range invalidMount {
346365
_, err := parseMDStat(invalid)

0 commit comments

Comments
 (0)