From 66aa9a8c6a01791e33584d31dbc06deeb89ba8c3 Mon Sep 17 00:00:00 2001 From: Krisztian Litkey Date: Sat, 22 Feb 2025 17:04:12 +0200 Subject: [PATCH 1/3] pkg/cdi: add test case for EMFILE crash (avoidance). Signed-off-by: Krisztian Litkey --- pkg/cdi/cache_linux_test.go | 142 ++++++++++++++++++++++++++++++++++++ 1 file changed, 142 insertions(+) create mode 100644 pkg/cdi/cache_linux_test.go diff --git a/pkg/cdi/cache_linux_test.go b/pkg/cdi/cache_linux_test.go new file mode 100644 index 00000000..18efb320 --- /dev/null +++ b/pkg/cdi/cache_linux_test.go @@ -0,0 +1,142 @@ +/* + Copyright © The CDI Authors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package cdi + +import ( + "fmt" + "os" + "strconv" + "strings" + "syscall" + "testing" + + oci "github.com/opencontainers/runtime-spec/specs-go" + "github.com/stretchr/testify/require" +) + +func TestTooManyOpenFiles(t *testing.T) { + em, err := triggerEmfile() + require.NoError(t, err) + require.NotNil(t, em) + defer func() { + require.NoError(t, em.undo()) + }() + + _, err = syscall.Socket(syscall.AF_INET, syscall.SOCK_DGRAM, 0) + require.Equal(t, syscall.EMFILE, err) + + cache := newCache( + WithAutoRefresh(true), + ) + require.NotNil(t, cache) + + // try to trigger original crash with a nil fsnotify.Watcher + _, _ = cache.InjectDevices(&oci.Spec{}, "vendor1.com/device=dev1") +} + +type emfile struct { + limit syscall.Rlimit + fds []int + undone bool +} + +// getFdTableSize reads the process' FD table size. +func getFdTableSize() (uint64, error) { + status, err := os.ReadFile("/proc/self/status") + if err != nil { + return 0, err + } + + const fdSizeTag = "FDSize:" + + for _, line := range strings.Split(string(status), "\n") { + if strings.HasPrefix(line, fdSizeTag) { + value := strings.TrimSpace(strings.TrimPrefix(line, fdSizeTag)) + size, err := strconv.ParseUint(value, 10, 64) + if err != nil { + return 0, err + } + return size, nil + } + } + + return 0, fmt.Errorf("tag %s not found in /proc/self/status", fdSizeTag) +} + +// triggerEmfile exhausts the file descriptors of the process and triggers +// a failure with an EMFILE for any syscall that needs to create a new fd. +// On success, the returned emfile's undo() method can be used to undo the +// exhausted table and restore everything to a working state. +func triggerEmfile() (*emfile, error) { + // We exhaust our file descriptors by + // - checking the size of our current fd table + // - setting our soft RLIMIT_NOFILE limit to the table size + // - ensuring the fd table is full by creating new fd's + // + // We also save our original RLIMIT_NOFILE limit and any fd's we + // might need to create, so we can eventually restore everything + // to its original state. + + fdsize, err := getFdTableSize() + if err != nil { + return nil, err + } + + em := &emfile{} + + if err := syscall.Getrlimit(syscall.RLIMIT_NOFILE, &em.limit); err != nil { + return nil, err + } + + limit := em.limit + limit.Cur = fdsize + + if err := syscall.Setrlimit(syscall.RLIMIT_NOFILE, &limit); err != nil { + return nil, err + } + + for i := uint64(0); i < fdsize; i++ { + fd, err := syscall.Socket(syscall.AF_INET, syscall.SOCK_DGRAM, 0) + if err != nil { + return em, nil + } + em.fds = append(em.fds, fd) + } + + return nil, fmt.Errorf("failed to trigger EMFILE") +} + +// undo restores the process' state to its pre-EMFILE condition. +func (em *emfile) undo() error { + if em == nil || em.undone { + return nil + } + + // we restore the process' state to pre-EMFILE condition by + // - restoring our saved RLIMIT_NOFILE + // - closing any extra file descriptors we might have created + + if err := syscall.Setrlimit(syscall.RLIMIT_NOFILE, &em.limit); err != nil { + return err + } + for _, fd := range em.fds { + syscall.Close(fd) + } + em.undone = true + + return nil +} From d0ccbf896310ab6fb8c0bf493bdf6796dc41db33 Mon Sep 17 00:00:00 2001 From: Krisztian Litkey Date: Sat, 22 Feb 2025 16:31:57 +0200 Subject: [PATCH 2/3] pkg/cdi: don't crash if we fail to create fsnotify watch. Don't crash in update() if we fail to create an fsnotify watch. This can happen if we have too many open files. In this case we now record a failure for all configured spec directories and in update we always trigger a refresh. If the process if ever able to create new file descriptors the cache becomes functional but in a 'always implicitly fully refreshed' mode instead of auto- refreshed. It's not entirely clear what is the best option to deal with a failed watch creation. Being out of file descriptors typically results in a cascading chain of errors which the process does not usually survive. This fix aims for minimal footprint. On failed watch creation it does not render the cache fully unusable. If the process is ever able to create new file descriptors again the cache also becomes functional, but instead of autorefreshed mode it will be in an 'always implicitly fully refreshed' mode. Signed-off-by: Krisztian Litkey --- pkg/cdi/cache.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pkg/cdi/cache.go b/pkg/cdi/cache.go index c2f7fe34..9afa4b18 100644 --- a/pkg/cdi/cache.go +++ b/pkg/cdi/cache.go @@ -564,6 +564,14 @@ func (w *watch) update(dirErrors map[string]error, removed ...string) bool { update bool ) + // If we failed to create an fsnotify.Watcher we have a nil watcher here + // (but with autoRefresh left on). One known case when this can happen is + // if we have too many open files. In that case we always return true and + // force a refresh. + if w.watcher == nil { + return true + } + for dir, ok = range w.tracked { if ok { continue From 693c957e3c57c82abb89e800a3fc3efe036254b9 Mon Sep 17 00:00:00 2001 From: Krisztian Litkey Date: Mon, 24 Feb 2025 11:48:32 +0200 Subject: [PATCH 3/3] pkg/cdi: add test case for post-EMFILE recovery. Test that the cache indeed recovers functionally if the process ever recovers from not being able to create new file descriptors. Signed-off-by: Krisztian Litkey --- pkg/cdi/cache_linux_test.go | 95 +++++++++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) diff --git a/pkg/cdi/cache_linux_test.go b/pkg/cdi/cache_linux_test.go index 18efb320..9eda4bd6 100644 --- a/pkg/cdi/cache_linux_test.go +++ b/pkg/cdi/cache_linux_test.go @@ -19,6 +19,7 @@ package cdi import ( "fmt" "os" + "path/filepath" "strconv" "strings" "syscall" @@ -48,6 +49,100 @@ func TestTooManyOpenFiles(t *testing.T) { _, _ = cache.InjectDevices(&oci.Spec{}, "vendor1.com/device=dev1") } +func TestRecoveryAfterTooManyOpenFiles(t *testing.T) { + var ( + etcDir = map[string]string{ + "vendor1.yaml": ` +cdiVersion: "0.3.0" +kind: "vendor1.com/device" +containerEdits: + env: + - VENDOR1_SPEC_VAR1=VAL1 +devices: + - name: "dev1" + containerEdits: + env: + - "VENDOR1_VAR1=VAL1" + deviceNodes: + - path: "/dev/vendor1-dev1" + type: b + major: 10 + minor: 1 +`, + } + + devices = []string{ + "vendor1.com/device=dev1", + } + + ociSpec = &oci.Spec{} + + resultingSpec = &oci.Spec{ + Process: &oci.Process{ + Env: []string{ + "VENDOR1_SPEC_VAR1=VAL1", + "VENDOR1_VAR1=VAL1", + }, + }, + Linux: &oci.Linux{ + Devices: []oci.LinuxDevice{ + { + Path: "/dev/vendor1-dev1", + Type: "b", + Major: 10, + Minor: 1, + }, + }, + Resources: &oci.LinuxResources{ + Devices: []oci.LinuxDeviceCgroup{ + { + Allow: true, + Type: "b", + Major: int64ptr(10), + Minor: int64ptr(1), + Access: "rwm", + }, + }, + }, + }, + } + ) + + dir, err := createSpecDirs(t, etcDir, nil) + require.NoError(t, err, "failed to create test directory") + + // trigger EMFILE for fd creation: exhaust our file descriptor table + em, err := triggerEmfile() + require.NoError(t, err) + require.NotNil(t, em) + defer func() { + require.NoError(t, em.undo()) + }() + + _, err = syscall.Socket(syscall.AF_INET, syscall.SOCK_DGRAM, 0) + require.Equal(t, syscall.EMFILE, err) + + cache := newCache( + WithSpecDirs( + filepath.Join(dir, "etc"), + ), + WithAutoRefresh(true), + ) + require.NotNil(t, cache) + + // try to trigger original crash with a nil fsnotify.Watcher + _, _ = cache.InjectDevices(&oci.Spec{}, devices...) + + // undo EMFILE for fd creation + require.NoError(t, em.undo()) + + // verify that injection works again + unresolved, err := cache.InjectDevices(ociSpec, devices...) + require.NoError(t, err) + require.Nil(t, unresolved) + require.Equal(t, resultingSpec, ociSpec) +} + type emfile struct { limit syscall.Rlimit fds []int