From cdd3c3f98acdd8f5ff2a740bb0b17245963c07b9 Mon Sep 17 00:00:00 2001 From: Li Qiang Date: Wed, 30 Jul 2025 23:04:20 +0800 Subject: [PATCH] runsc: Allow map host user to non-root user in rootless mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix #9918. Currently, the rootless mode(runsc is called by no-root user) is not working well with the filesystem if we uses a non-root user in runsc container. This is because the runsc is mapping the host non-root user to root-user in container. In some cases we need to map the host non-root user to runsc container non-root user (with the same uid). After this patch, the following filesystem operations works well. test@test-virtual-machine:~/test$ ./runsc -ignore-cgroups --network host run abc id uid=1000(test) gid=1000(test) groups=1000(test) touch /tmp/runsctest echo aaa > /tmp/runsctest ls -lh /tmp/runsctest -rw-r--r-- 1 test test 4 Jun 29 18:46 /tmp/runsctest exit test@test-virtual-machine:~/test$ ls -lh /tmp/runsctest -rw-r--r-- 1 test test 4 6月 29 18:46 /tmp/runsctest test@test-virtual-machine:~/test$ cat /tmp/runsctest --- runsc/cmd/gofer.go | 36 ++++++++++++++++++++++++--- runsc/container/container.go | 16 +++++++++++- runsc/sandbox/sandbox.go | 47 +++++++++++++++++++++++++++++++++++- 3 files changed, 93 insertions(+), 6 deletions(-) diff --git a/runsc/cmd/gofer.go b/runsc/cmd/gofer.go index ebafd45e47..e8d7f0cb4e 100644 --- a/runsc/cmd/gofer.go +++ b/runsc/cmd/gofer.go @@ -17,6 +17,7 @@ package cmd import ( "context" "encoding/json" + "encoding/binary" "fmt" "io" "os" @@ -64,6 +65,8 @@ var goferCaps = &specs.LinuxCapabilities{ Bounding: caps, Effective: caps, Permitted: caps, + Inheritable: caps, + Ambient: caps, } var goferUdsOpenCaps = &specs.LinuxCapabilities{ @@ -818,6 +821,26 @@ func waitForFD(fd int, fdName string) error { return nil } +func waitForID(fd int, fdName string) (uint32, uint32, error) { + log.Debugf("Waiting on %s %d...", fdName, fd) + f := os.NewFile(uintptr(fd), fdName) + defer f.Close() + + var uid uint32 + var gid uint32 + buf := make([]byte, 8) + + if n, err := f.Read(buf); n != 8 || err != nil { + e := fmt.Errorf("failed to convert to int:%v :%v", uid, err) + return 0, 0, e + } + uid = binary.BigEndian.Uint32(buf[0:4]) + gid = binary.BigEndian.Uint32(buf[4:8]) + + + return uid, gid, nil +} + // spawnProcMounter executes the /proc unmounter process. // It returns a function to wait on the proc unmounter process, which // should be called (via defer) in case of errors in order to clean up the @@ -872,17 +895,22 @@ func (g *goferSyncFDs) syncUsernsForRootless() { // // Postcondition: All callers must re-exec themselves after this returns. func syncUsernsForRootless(fd int) { - if err := waitForFD(fd, "userns sync FD"); err != nil { - util.Fatalf("failed to sync on userns FD: %v", err) + var uid uint32 + var gid uint32 + var err error + + if uid, gid, err = waitForID(fd, "userns sync FD"); err != nil { + util.Fatalf("failed to sync on userns FD:%v: %v %v", uid, gid, err) } + // SETUID changes UID on the current system thread, so we have // to re-execute current binary. runtime.LockOSThread() - if _, _, errno := unix.RawSyscall(unix.SYS_SETUID, 0, 0, 0); errno != 0 { + if _, _, errno := unix.RawSyscall(unix.SYS_SETUID, uintptr(uid), 0, 0); errno != 0 { util.Fatalf("failed to set UID: %v", errno) } - if _, _, errno := unix.RawSyscall(unix.SYS_SETGID, 0, 0, 0); errno != 0 { + if _, _, errno := unix.RawSyscall(unix.SYS_SETGID, uintptr(gid), 0, 0); errno != 0 { util.Fatalf("failed to set GID: %v", errno) } } diff --git a/runsc/container/container.go b/runsc/container/container.go index 89ef3429f3..f97278d32e 100644 --- a/runsc/container/container.go +++ b/runsc/container/container.go @@ -1392,6 +1392,13 @@ func (c *Container) createGoferProcess(conf *config.Config, mountHints *boot.Pod {Type: specs.UTSNamespace}, } + var gSyncFile *os.File + defer func() { + if gSyncFile != nil { + gSyncFile.Close() + } + }() + rootlessEUID := unix.Geteuid() != 0 // Setup any uid/gid mappings, and create or join the configured user // namespace so the gofer's view of the filesystem aligns with the @@ -1413,7 +1420,7 @@ func (c *Container) createGoferProcess(conf *config.Config, mountHints *boot.Pod if err != nil { return nil, nil, nil, nil, err } - defer syncFile.Close() + gSyncFile = syncFile } // Create synchronization FD for chroot. @@ -1459,6 +1466,13 @@ func (c *Container) createGoferProcess(conf *config.Config, mountHints *boot.Pod return nil, nil, nil, nil, fmt.Errorf("creating gofer filestore files: %w", err) } + if rootlessEUID { + chrootSyncSandEnd.Close() + if err := sandbox.SendIDToSandbox(gSyncFile, c.Spec); err != nil { + return nil, nil, nil, nil, err + } + } + return sandEnds, goferFilestores, devSandEnd, mountsSand, nil } diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go index 134dd12598..4725cf3755 100644 --- a/runsc/sandbox/sandbox.go +++ b/runsc/sandbox/sandbox.go @@ -17,6 +17,7 @@ package sandbox import ( "context" + "encoding/binary" "encoding/json" "errors" "fmt" @@ -1030,6 +1031,13 @@ func (s *Sandbox) createSandboxProcess(conf *config.Config, args *Args, startSyn // configured. rootlessEUID := unix.Geteuid() != 0 setUserMappings := false + var gSyncFile *os.File + defer func() { + if gSyncFile != nil { + gSyncFile.Close() + } + }() + if conf.Network == config.NetworkHost || conf.DirectFS { if userns, ok := specutils.GetNS(specs.UserNamespace, args.Spec); ok { log.Infof("Sandbox will be started in container's user namespace: %+v", userns) @@ -1039,7 +1047,7 @@ func (s *Sandbox) createSandboxProcess(conf *config.Config, args *Args, startSyn if err != nil { return err } - defer syncFile.Close() + gSyncFile = syncFile setUserMappings = true } else { specutils.SetUIDGIDMappings(cmd, args.Spec) @@ -1284,6 +1292,9 @@ func (s *Sandbox) createSandboxProcess(conf *config.Config, args *Args, startSyn if err := SetUserMappings(args.Spec, cmd.Process.Pid); err != nil { return err } + if err := SendIDToSandbox(gSyncFile, args.Spec); err != nil { + return err + } } s.child = true @@ -1293,6 +1304,40 @@ func (s *Sandbox) createSandboxProcess(conf *config.Config, args *Args, startSyn return nil } +func rootMappedInContainer(IDMap []specs.LinuxIDMapping) bool { + for _, idMap := range IDMap { + if idMap.ContainerID == 0 { + return true + } + } + return false +} + +// Send the UID & GID to the sandbox and gofer process +// This UID & GID is the ID for container init process +func SendIDToSandbox(syncFile *os.File, spec *specs.Spec) error { + + uid := uint32(0) + gid := uint32(0) + + if !rootMappedInContainer(spec.Linux.UIDMappings) { + uid = spec.Process.User.UID + } + + if !rootMappedInContainer(spec.Linux.GIDMappings) { + gid = spec.Process.User.GID + } + + buf := make([]byte, 8) + binary.BigEndian.PutUint32(buf[0:4], uid) + binary.BigEndian.PutUint32(buf[4:8], gid) + if _, err := syncFile.Write(buf); err != nil { + return fmt.Errorf("write uid&gid to sandbox error: %w", err) + } + + return nil +} + // Wait waits for the containerized process to exit, and returns its WaitStatus. func (s *Sandbox) Wait(cid string) (unix.WaitStatus, error) { log.Debugf("Waiting for container %q in sandbox %q", cid, s.ID)