From 61bd4e9dda5a0435f85e25615286f66724f5f0fd Mon Sep 17 00:00:00 2001 From: Josh Stone Date: Tue, 18 Jan 2022 17:42:47 -0800 Subject: [PATCH 01/18] WIP: looping version of unix remove_dir_all --- library/std/src/sys/unix/fs.rs | 99 +++++++++++++++++++++++----------- 1 file changed, 67 insertions(+), 32 deletions(-) diff --git a/library/std/src/sys/unix/fs.rs b/library/std/src/sys/unix/fs.rs index 5b2199c2b7fa4..1842499862d0c 100644 --- a/library/std/src/sys/unix/fs.rs +++ b/library/std/src/sys/unix/fs.rs @@ -1672,55 +1672,90 @@ mod remove_dir_impl { } } - fn remove_dir_all_recursive(parent_fd: Option, p: &Path) -> io::Result<()> { - let pcstr = cstr(p)?; - - // entry is expected to be a directory, open as such - let fd = openat_nofollow_dironly(parent_fd, &pcstr)?; - - // open the directory passing ownership of the fd - let (dir, fd) = fdreaddir(fd)?; - for child in dir { - let child = child?; - match is_dir(&child) { - Some(true) => { - remove_dir_all_recursive(Some(fd), Path::new(&child.file_name()))?; - } - Some(false) => { - cvt(unsafe { unlinkat(fd, child.name_cstr().as_ptr(), 0) })?; - } - None => match cvt(unsafe { unlinkat(fd, child.name_cstr().as_ptr(), 0) }) { + fn unlink_direntry(ent: &DirEntry, parent_fd: RawFd) -> io::Result { + match is_dir(&ent) { + Some(true) => Ok(false), + Some(false) => { + cvt(unsafe { unlinkat(parent_fd, ent.name_cstr().as_ptr(), 0) })?; + Ok(true) + } + None => { + match cvt(unsafe { unlinkat(parent_fd, ent.name_cstr().as_ptr(), 0) }) { // type unknown - try to unlink Err(err) if err.raw_os_error() == Some(libc::EISDIR) || err.raw_os_error() == Some(libc::EPERM) => { - // if the file is a directory unlink fails with EISDIR on Linux and EPERM everyhwere else - remove_dir_all_recursive(Some(fd), Path::new(&child.file_name()))?; - } - result => { - result?; + // if the file is a directory unlink fails with EISDIR on Linux + // and EPERM everyhwere else + Ok(false) } - }, + result => result.map(|_| true), + } } } + } - // unlink the directory after removing its contents - cvt(unsafe { - unlinkat(parent_fd.unwrap_or(libc::AT_FDCWD), pcstr.as_ptr(), libc::AT_REMOVEDIR) - })?; - Ok(()) + fn remove_dir_all_loop(p: &Path) -> io::Result<()> { + use crate::ffi::CString; + + struct State { + dir: ReadDir, + fd: RawFd, + parent_fd: Option, + pcstr: CString, + } + + impl State { + fn new(parent_fd: Option, pcstr: CString) -> io::Result { + // entry is expected to be a directory, open as such + let fd = openat_nofollow_dironly(parent_fd, &pcstr)?; + + // open the directory passing ownership of the fd + let (dir, fd) = fdreaddir(fd)?; + + Ok(Self { dir, fd, parent_fd, pcstr }) + } + } + + let mut parents = Vec::::new(); + let mut current = State::new(None, cstr(p)?)?; + loop { + while let Some(child) = current.dir.next() { + let child = child?; + if !unlink_direntry(&child, current.fd)? { + // Descend into this child directory + let parent = current; + current = State::new(Some(parent.fd), child.name_cstr().into())?; + parents.push(parent); + } + } + + // unlink the directory after removing its contents + cvt(unsafe { + unlinkat( + current.parent_fd.unwrap_or(libc::AT_FDCWD), + current.pcstr.as_ptr(), + libc::AT_REMOVEDIR, + ) + })?; + + match parents.pop() { + Some(parent) => current = parent, + None => return Ok(()), + } + } } pub fn remove_dir_all(p: &Path) -> io::Result<()> { - // We cannot just call remove_dir_all_recursive() here because that would not delete a passed - // symlink. No need to worry about races, because remove_dir_all_recursive() does not recurse + // We cannot just call remove_dir_all_loop() here because that would not delete a passed + // symlink. No need to worry about races, because remove_dir_all_loop() does not descend // into symlinks. let attr = lstat(p)?; if attr.file_type().is_symlink() { crate::fs::remove_file(p) } else { - remove_dir_all_recursive(None, p) + remove_dir_all_loop(p) } } } From 7cf62c75902e649fb50f8bb47ad44117065a9ba1 Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Sun, 30 Jan 2022 08:13:56 +0100 Subject: [PATCH 02/18] Temporarily remove special-cased Macos x86-64 implementation. --- library/std/src/sys/unix/fs.rs | 118 --------------------------------- 1 file changed, 118 deletions(-) diff --git a/library/std/src/sys/unix/fs.rs b/library/std/src/sys/unix/fs.rs index 1842499862d0c..cb0cad9e0682b 100644 --- a/library/std/src/sys/unix/fs.rs +++ b/library/std/src/sys/unix/fs.rs @@ -1474,124 +1474,6 @@ mod remove_dir_impl { pub use crate::sys_common::fs::remove_dir_all; } -// Dynamically choose implementation Macos x86-64: modern for 10.10+, fallback for older versions -#[cfg(all(target_os = "macos", target_arch = "x86_64"))] -mod remove_dir_impl { - use super::{cstr, lstat, Dir, InnerReadDir, ReadDir}; - use crate::ffi::CStr; - use crate::io; - use crate::os::unix::io::{AsRawFd, FromRawFd, IntoRawFd}; - use crate::os::unix::prelude::{OwnedFd, RawFd}; - use crate::path::{Path, PathBuf}; - use crate::sync::Arc; - use crate::sys::weak::weak; - use crate::sys::{cvt, cvt_r}; - use libc::{c_char, c_int, DIR}; - - pub fn openat_nofollow_dironly(parent_fd: Option, p: &CStr) -> io::Result { - weak!(fn openat(c_int, *const c_char, c_int) -> c_int); - let fd = cvt_r(|| unsafe { - openat.get().unwrap()( - parent_fd.unwrap_or(libc::AT_FDCWD), - p.as_ptr(), - libc::O_CLOEXEC | libc::O_RDONLY | libc::O_NOFOLLOW | libc::O_DIRECTORY, - ) - })?; - Ok(unsafe { OwnedFd::from_raw_fd(fd) }) - } - - fn fdreaddir(dir_fd: OwnedFd) -> io::Result<(ReadDir, RawFd)> { - weak!(fn fdopendir(c_int) -> *mut DIR, "fdopendir$INODE64"); - let ptr = unsafe { fdopendir.get().unwrap()(dir_fd.as_raw_fd()) }; - if ptr.is_null() { - return Err(io::Error::last_os_error()); - } - let dirp = Dir(ptr); - // file descriptor is automatically closed by libc::closedir() now, so give up ownership - let new_parent_fd = dir_fd.into_raw_fd(); - // a valid root is not needed because we do not call any functions involving the full path - // of the DirEntrys. - let dummy_root = PathBuf::new(); - Ok(( - ReadDir { - inner: Arc::new(InnerReadDir { dirp, root: dummy_root }), - end_of_stream: false, - }, - new_parent_fd, - )) - } - - fn remove_dir_all_recursive(parent_fd: Option, p: &Path) -> io::Result<()> { - weak!(fn unlinkat(c_int, *const c_char, c_int) -> c_int); - - let pcstr = cstr(p)?; - - // entry is expected to be a directory, open as such - let fd = openat_nofollow_dironly(parent_fd, &pcstr)?; - - // open the directory passing ownership of the fd - let (dir, fd) = fdreaddir(fd)?; - for child in dir { - let child = child?; - match child.entry.d_type { - libc::DT_DIR => { - remove_dir_all_recursive(Some(fd), Path::new(&child.file_name()))?; - } - libc::DT_UNKNOWN => { - match cvt(unsafe { unlinkat.get().unwrap()(fd, child.name_cstr().as_ptr(), 0) }) - { - // type unknown - try to unlink - Err(err) if err.raw_os_error() == Some(libc::EPERM) => { - // if the file is a directory unlink fails with EPERM - remove_dir_all_recursive(Some(fd), Path::new(&child.file_name()))?; - } - result => { - result?; - } - } - } - _ => { - // not a directory -> unlink - cvt(unsafe { unlinkat.get().unwrap()(fd, child.name_cstr().as_ptr(), 0) })?; - } - } - } - - // unlink the directory after removing its contents - cvt(unsafe { - unlinkat.get().unwrap()( - parent_fd.unwrap_or(libc::AT_FDCWD), - pcstr.as_ptr(), - libc::AT_REMOVEDIR, - ) - })?; - Ok(()) - } - - fn remove_dir_all_modern(p: &Path) -> io::Result<()> { - // We cannot just call remove_dir_all_recursive() here because that would not delete a passed - // symlink. No need to worry about races, because remove_dir_all_recursive() does not recurse - // into symlinks. - let attr = lstat(p)?; - if attr.file_type().is_symlink() { - crate::fs::remove_file(p) - } else { - remove_dir_all_recursive(None, p) - } - } - - pub fn remove_dir_all(p: &Path) -> io::Result<()> { - weak!(fn openat(c_int, *const c_char, c_int) -> c_int); - if openat.get().is_some() { - // openat() is available with macOS 10.10+, just like unlinkat() and fdopendir() - remove_dir_all_modern(p) - } else { - // fall back to classic implementation - crate::sys_common::fs::remove_dir_all(p) - } - } -} - // Modern implementation using openat(), unlinkat() and fdopendir() #[cfg(not(any( all(target_os = "macos", target_arch = "x86_64"), From d2cfba13265a3a62dee390f386dd902c91331833 Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Sun, 30 Jan 2022 08:15:41 +0100 Subject: [PATCH 03/18] Readd Macos x86-64 remove_dir_all() special case using dynamic symbols. --- library/std/src/sys/unix/fs.rs | 55 ++++++++++++++++++++++++++++++---- 1 file changed, 49 insertions(+), 6 deletions(-) diff --git a/library/std/src/sys/unix/fs.rs b/library/std/src/sys/unix/fs.rs index cb0cad9e0682b..26ae920f500df 100644 --- a/library/std/src/sys/unix/fs.rs +++ b/library/std/src/sys/unix/fs.rs @@ -1475,11 +1475,7 @@ mod remove_dir_impl { } // Modern implementation using openat(), unlinkat() and fdopendir() -#[cfg(not(any( - all(target_os = "macos", target_arch = "x86_64"), - target_os = "redox", - target_os = "espidf" -)))] +#[cfg(not(any(target_os = "redox", target_os = "espidf")))] mod remove_dir_impl { use super::{cstr, lstat, Dir, DirEntry, InnerReadDir, ReadDir}; use crate::ffi::CStr; @@ -1489,8 +1485,39 @@ mod remove_dir_impl { use crate::path::{Path, PathBuf}; use crate::sync::Arc; use crate::sys::{cvt, cvt_r}; + + #[cfg(not(all(target_os = "macos", target_arch = "x86_64"),))] use libc::{fdopendir, openat, unlinkat}; + #[cfg(all(target_os = "macos", target_arch = "x86_64"))] + mod macos_weak { + use crate::sys::weak::weak; + use libc::{c_char, c_int, DIR}; + + pub unsafe fn openat(dirfd: c_int, pathname: *const c_char, flags: c_int) -> c_int { + weak!(fn openat(c_int, *const c_char, c_int) -> c_int); + openat.get().unwrap()(dirfd, pathname, flags) + } + + pub unsafe fn fdopendir(fd: c_int) -> *mut DIR { + weak!(fn fdopendir(c_int) -> *mut DIR, "fdopendir$INODE64"); + fdopendir.get().unwrap()(fd) + } + + pub unsafe fn unlinkat(dirfd: c_int, pathname: *const c_char, flags: c_int) -> c_int { + weak!(fn unlinkat(c_int, *const c_char, c_int) -> c_int); + unlinkat.get().unwrap()(dirfd, pathname, flags) + } + + pub fn has_openat() -> bool { + weak!(fn openat(c_int, *const c_char, c_int) -> c_int); + openat.get().is_some() + } + } + + #[cfg(all(target_os = "macos", target_arch = "x86_64"))] + use macos_weak::{fdopendir, openat, unlinkat}; + pub fn openat_nofollow_dironly(parent_fd: Option, p: &CStr) -> io::Result { let fd = cvt_r(|| unsafe { openat( @@ -1629,7 +1656,7 @@ mod remove_dir_impl { } } - pub fn remove_dir_all(p: &Path) -> io::Result<()> { + fn remove_dir_all_modern(p: &Path) -> io::Result<()> { // We cannot just call remove_dir_all_loop() here because that would not delete a passed // symlink. No need to worry about races, because remove_dir_all_loop() does not descend // into symlinks. @@ -1640,4 +1667,20 @@ mod remove_dir_impl { remove_dir_all_loop(p) } } + + #[cfg(not(all(target_os = "macos", target_arch = "x86_64")))] + pub fn remove_dir_all(p: &Path) -> io::Result<()> { + remove_dir_all_modern(p) + } + + #[cfg(all(target_os = "macos", target_arch = "x86_64"))] + pub fn remove_dir_all(p: &Path) -> io::Result<()> { + if macos_weak::has_openat() { + // openat() is available with macOS 10.10+, just like unlinkat() and fdopendir() + remove_dir_all_modern(p) + } else { + // fall back to classic implementation + crate::sys_common::fs::remove_dir_all(p) + } + } } From 93e708b27813c0818c349c0ffcc19a7bbfb9ad64 Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Sun, 30 Jan 2022 09:44:23 +0100 Subject: [PATCH 04/18] Use limited Readdir cache and use openat(dirf, "..", O_NOFOLLOW) to go up. --- library/std/src/sys/unix/fs.rs | 158 +++++++++++++++++++++++++-------- 1 file changed, 121 insertions(+), 37 deletions(-) diff --git a/library/std/src/sys/unix/fs.rs b/library/std/src/sys/unix/fs.rs index 26ae920f500df..3d8d2dd009588 100644 --- a/library/std/src/sys/unix/fs.rs +++ b/library/std/src/sys/unix/fs.rs @@ -1478,13 +1478,15 @@ mod remove_dir_impl { #[cfg(not(any(target_os = "redox", target_os = "espidf")))] mod remove_dir_impl { use super::{cstr, lstat, Dir, DirEntry, InnerReadDir, ReadDir}; - use crate::ffi::CStr; + use crate::ffi::{CStr, CString}; use crate::io; + use crate::mem; use crate::os::unix::io::{AsRawFd, FromRawFd, IntoRawFd}; use crate::os::unix::prelude::{OwnedFd, RawFd}; use crate::path::{Path, PathBuf}; use crate::sync::Arc; use crate::sys::{cvt, cvt_r}; + use alloc::collections::VecDeque; #[cfg(not(all(target_os = "macos", target_arch = "x86_64"),))] use libc::{fdopendir, openat, unlinkat}; @@ -1518,6 +1520,23 @@ mod remove_dir_impl { #[cfg(all(target_os = "macos", target_arch = "x86_64"))] use macos_weak::{fdopendir, openat, unlinkat}; + #[cfg(not(any( + target_os = "linux", + target_os = "emscripten", + target_os = "l4re", + target_os = "android" + )))] + use libc::fstat as fstat64; + #[cfg(any( + target_os = "linux", + target_os = "emscripten", + target_os = "l4re", + target_os = "android" + ))] + use libc::fstat64; + + const MAX_OPEN_FDS: usize = 32; + pub fn openat_nofollow_dironly(parent_fd: Option, p: &CStr) -> io::Result { let fd = cvt_r(|| unsafe { openat( @@ -1605,55 +1624,120 @@ mod remove_dir_impl { } } - fn remove_dir_all_loop(p: &Path) -> io::Result<()> { - use crate::ffi::CString; + struct CachedReadDir { + readdir: ReadDir, + raw_fd: RawFd, + } - struct State { - dir: ReadDir, - fd: RawFd, - parent_fd: Option, - pcstr: CString, - } + struct DirComponent { + name: CString, + ino: u64, + } - impl State { - fn new(parent_fd: Option, pcstr: CString) -> io::Result { - // entry is expected to be a directory, open as such - let fd = openat_nofollow_dironly(parent_fd, &pcstr)?; + fn readdir_open_path(path: &CStr) -> io::Result<(DirComponent, CachedReadDir)> { + let dir_fd = openat_nofollow_dironly(None, path)?; - // open the directory passing ownership of the fd - let (dir, fd) = fdreaddir(fd)?; + // use fstat() to get the inode of the directory + let mut stat = unsafe { mem::zeroed() }; + cvt(unsafe { fstat64(dir_fd.as_raw_fd(), &mut stat) })?; + let (readdir, raw_fd) = fdreaddir(dir_fd)?; + Ok(( + DirComponent { name: CString::new("")?, ino: stat.st_ino }, + CachedReadDir { readdir, raw_fd }, + )) + } - Ok(Self { dir, fd, parent_fd, pcstr }) - } - } + fn readdir_open_child( + readdir: &CachedReadDir, + child: &DirEntry, + ) -> io::Result<(DirComponent, CachedReadDir)> { + let dir_fd = openat_nofollow_dironly(Some(readdir.raw_fd), child.name_cstr())?; + let (readdir, raw_fd) = fdreaddir(dir_fd)?; + Ok(( + DirComponent { name: child.name_cstr().into(), ino: child.entry.d_ino }, + CachedReadDir { readdir, raw_fd }, + )) + } - let mut parents = Vec::::new(); - let mut current = State::new(None, cstr(p)?)?; + fn readdir_reopen_parent( + dir: &CachedReadDir, + expected_parent_dir: &DirComponent, + ) -> io::Result { + let parent_dir_fd = openat_nofollow_dironly(Some(dir.raw_fd), unsafe { + CStr::from_bytes_with_nul_unchecked(b"..\0") + })?; + let mut stat = unsafe { mem::zeroed() }; + cvt(unsafe { fstat64(parent_dir_fd.as_raw_fd(), &mut stat) })?; + // Make sure that the reopened parent directory has the same inode as when we visited it descending + // the directory tree. More detailed risk analysis TBD. + if expected_parent_dir.ino != stat.st_ino { + return Err(io::Error::new( + io::ErrorKind::Uncategorized, + "parent directory inode does not match", + )); + } + let (readdir, raw_fd) = fdreaddir(parent_dir_fd)?; + Ok(CachedReadDir { readdir, raw_fd }) + } + + fn remove_dir_all_loop(root: &Path) -> io::Result<()> { + // all ancestor names and inodes from the deletion root directory to the parent of the currently processed + // directory + let mut parent_dir_components = Vec::new(); + // cache of up to MAX_OPEN_FDS ancestor ReadDirs and associated file descriptors + let mut readdir_cache = VecDeque::with_capacity(MAX_OPEN_FDS); + // the directory name, inode pair and ReadDir currently being processed + let (mut current_dir_component, mut current_readdir) = readdir_open_path(&cstr(root)?)?; loop { - while let Some(child) = current.dir.next() { + while let Some(child) = current_readdir.readdir.next() { let child = child?; - if !unlink_direntry(&child, current.fd)? { + if !unlink_direntry(&child, current_readdir.raw_fd)? { // Descend into this child directory - let parent = current; - current = State::new(Some(parent.fd), child.name_cstr().into())?; - parents.push(parent); + + let (child_dir_compoment, child_readdir) = + readdir_open_child(¤t_readdir, &child)?; + parent_dir_components.push(current_dir_component); + + // avoid growing the cache over capacity + if readdir_cache.len() == readdir_cache.capacity() { + readdir_cache.pop_front(); + } + readdir_cache.push_back(current_readdir); + + current_readdir = child_readdir; + current_dir_component = child_dir_compoment; } } - // unlink the directory after removing its contents - cvt(unsafe { - unlinkat( - current.parent_fd.unwrap_or(libc::AT_FDCWD), - current.pcstr.as_ptr(), - libc::AT_REMOVEDIR, - ) - })?; - - match parents.pop() { - Some(parent) => current = parent, - None => return Ok(()), + match parent_dir_components.pop() { + Some(parent) => { + // Going back up... + let parent_readdir = match readdir_cache.pop_back() { + Some(readdir) => readdir, + None => { + // not cached -> reopen + readdir_reopen_parent(¤t_readdir, &parent)? + } + }; + // unlink the directory after having removed its contents + cvt(unsafe { + unlinkat( + parent_readdir.raw_fd, + current_dir_component.name.as_ptr(), + libc::AT_REMOVEDIR, + ) + })?; + + current_dir_component = parent; + current_readdir = parent_readdir; + } + None => break, } } + + // unlink root dir + cvt(unsafe { unlinkat(libc::AT_FDCWD, cstr(root)?.as_ptr(), libc::AT_REMOVEDIR) })?; + Ok(()) } fn remove_dir_all_modern(p: &Path) -> io::Result<()> { From 424189ded226eca12726cc402151fdb103bed457 Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Sun, 30 Jan 2022 12:55:57 +0100 Subject: [PATCH 05/18] replace RawFd with BorrowedFd --- library/std/src/sys/unix/fs.rs | 51 +++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 23 deletions(-) diff --git a/library/std/src/sys/unix/fs.rs b/library/std/src/sys/unix/fs.rs index 3d8d2dd009588..337463bea3737 100644 --- a/library/std/src/sys/unix/fs.rs +++ b/library/std/src/sys/unix/fs.rs @@ -1482,7 +1482,7 @@ mod remove_dir_impl { use crate::io; use crate::mem; use crate::os::unix::io::{AsRawFd, FromRawFd, IntoRawFd}; - use crate::os::unix::prelude::{OwnedFd, RawFd}; + use crate::os::unix::prelude::{BorrowedFd, OwnedFd, RawFd}; use crate::path::{Path, PathBuf}; use crate::sync::Arc; use crate::sys::{cvt, cvt_r}; @@ -1537,10 +1537,13 @@ mod remove_dir_impl { const MAX_OPEN_FDS: usize = 32; - pub fn openat_nofollow_dironly(parent_fd: Option, p: &CStr) -> io::Result { + pub fn openat_nofollow_dironly( + parent_fd: Option>, + p: &CStr, + ) -> io::Result { let fd = cvt_r(|| unsafe { openat( - parent_fd.unwrap_or(libc::AT_FDCWD), + parent_fd.map(|fd| fd.as_raw_fd()).unwrap_or(libc::AT_FDCWD), p.as_ptr(), libc::O_CLOEXEC | libc::O_RDONLY | libc::O_NOFOLLOW | libc::O_DIRECTORY, ) @@ -1548,7 +1551,7 @@ mod remove_dir_impl { Ok(unsafe { OwnedFd::from_raw_fd(fd) }) } - fn fdreaddir(dir_fd: OwnedFd) -> io::Result<(ReadDir, RawFd)> { + fn fdreaddir(dir_fd: OwnedFd) -> io::Result { let ptr = unsafe { fdopendir(dir_fd.as_raw_fd()) }; if ptr.is_null() { return Err(io::Error::last_os_error()); @@ -1559,8 +1562,8 @@ mod remove_dir_impl { // a valid root is not needed because we do not call any functions involving the full path // of the DirEntrys. let dummy_root = PathBuf::new(); - Ok(( - ReadDir { + Ok(CachedReadDir { + readdir: ReadDir { inner: Arc::new(InnerReadDir { dirp, root: dummy_root }), #[cfg(not(any( target_os = "android", @@ -1572,8 +1575,8 @@ mod remove_dir_impl { )))] end_of_stream: false, }, - new_parent_fd, - )) + raw_fd: new_parent_fd, + }) } #[cfg(any( @@ -1600,15 +1603,15 @@ mod remove_dir_impl { } } - fn unlink_direntry(ent: &DirEntry, parent_fd: RawFd) -> io::Result { + fn unlink_direntry(ent: &DirEntry, parent_fd: BorrowedFd<'_>) -> io::Result { match is_dir(&ent) { Some(true) => Ok(false), Some(false) => { - cvt(unsafe { unlinkat(parent_fd, ent.name_cstr().as_ptr(), 0) })?; + cvt(unsafe { unlinkat(parent_fd.as_raw_fd(), ent.name_cstr().as_ptr(), 0) })?; Ok(true) } None => { - match cvt(unsafe { unlinkat(parent_fd, ent.name_cstr().as_ptr(), 0) }) { + match cvt(unsafe { unlinkat(parent_fd.as_raw_fd(), ent.name_cstr().as_ptr(), 0) }) { // type unknown - try to unlink Err(err) if err.raw_os_error() == Some(libc::EISDIR) @@ -1629,6 +1632,12 @@ mod remove_dir_impl { raw_fd: RawFd, } + impl CachedReadDir { + fn as_fd(&self) -> BorrowedFd<'_> { + unsafe { BorrowedFd::borrow_raw_fd(self.raw_fd) } + } + } + struct DirComponent { name: CString, ino: u64, @@ -1640,22 +1649,19 @@ mod remove_dir_impl { // use fstat() to get the inode of the directory let mut stat = unsafe { mem::zeroed() }; cvt(unsafe { fstat64(dir_fd.as_raw_fd(), &mut stat) })?; - let (readdir, raw_fd) = fdreaddir(dir_fd)?; - Ok(( - DirComponent { name: CString::new("")?, ino: stat.st_ino }, - CachedReadDir { readdir, raw_fd }, - )) + let cached_readdir = fdreaddir(dir_fd)?; + Ok((DirComponent { name: CString::new("")?, ino: stat.st_ino }, cached_readdir)) } fn readdir_open_child( readdir: &CachedReadDir, child: &DirEntry, ) -> io::Result<(DirComponent, CachedReadDir)> { - let dir_fd = openat_nofollow_dironly(Some(readdir.raw_fd), child.name_cstr())?; - let (readdir, raw_fd) = fdreaddir(dir_fd)?; + let dir_fd = openat_nofollow_dironly(Some(readdir.as_fd()), child.name_cstr())?; + let cached_readdir = fdreaddir(dir_fd)?; Ok(( DirComponent { name: child.name_cstr().into(), ino: child.entry.d_ino }, - CachedReadDir { readdir, raw_fd }, + cached_readdir, )) } @@ -1663,7 +1669,7 @@ mod remove_dir_impl { dir: &CachedReadDir, expected_parent_dir: &DirComponent, ) -> io::Result { - let parent_dir_fd = openat_nofollow_dironly(Some(dir.raw_fd), unsafe { + let parent_dir_fd = openat_nofollow_dironly(Some(dir.as_fd()), unsafe { CStr::from_bytes_with_nul_unchecked(b"..\0") })?; let mut stat = unsafe { mem::zeroed() }; @@ -1676,8 +1682,7 @@ mod remove_dir_impl { "parent directory inode does not match", )); } - let (readdir, raw_fd) = fdreaddir(parent_dir_fd)?; - Ok(CachedReadDir { readdir, raw_fd }) + fdreaddir(parent_dir_fd) } fn remove_dir_all_loop(root: &Path) -> io::Result<()> { @@ -1691,7 +1696,7 @@ mod remove_dir_impl { loop { while let Some(child) = current_readdir.readdir.next() { let child = child?; - if !unlink_direntry(&child, current_readdir.raw_fd)? { + if !unlink_direntry(&child, current_readdir.as_fd())? { // Descend into this child directory let (child_dir_compoment, child_readdir) = From 65b740cffa3c5dffd2e5b9997f887a704e2d1b7d Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Sun, 30 Jan 2022 13:06:14 +0100 Subject: [PATCH 06/18] Store and compare with stat.st_dev as well when going back up via .. --- library/std/src/sys/unix/fs.rs | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/library/std/src/sys/unix/fs.rs b/library/std/src/sys/unix/fs.rs index 337463bea3737..75dcca7e68bcf 100644 --- a/library/std/src/sys/unix/fs.rs +++ b/library/std/src/sys/unix/fs.rs @@ -1487,6 +1487,7 @@ mod remove_dir_impl { use crate::sync::Arc; use crate::sys::{cvt, cvt_r}; use alloc::collections::VecDeque; + use libc::dev_t; #[cfg(not(all(target_os = "macos", target_arch = "x86_64"),))] use libc::{fdopendir, openat, unlinkat}; @@ -1640,17 +1641,21 @@ mod remove_dir_impl { struct DirComponent { name: CString, + dev: dev_t, ino: u64, } fn readdir_open_path(path: &CStr) -> io::Result<(DirComponent, CachedReadDir)> { let dir_fd = openat_nofollow_dironly(None, path)?; - // use fstat() to get the inode of the directory + // use fstat() to get dev, inode of the directory let mut stat = unsafe { mem::zeroed() }; cvt(unsafe { fstat64(dir_fd.as_raw_fd(), &mut stat) })?; let cached_readdir = fdreaddir(dir_fd)?; - Ok((DirComponent { name: CString::new("")?, ino: stat.st_ino }, cached_readdir)) + Ok(( + DirComponent { name: CString::new("")?, dev: stat.st_dev, ino: stat.st_ino }, + cached_readdir, + )) } fn readdir_open_child( @@ -1658,9 +1663,11 @@ mod remove_dir_impl { child: &DirEntry, ) -> io::Result<(DirComponent, CachedReadDir)> { let dir_fd = openat_nofollow_dironly(Some(readdir.as_fd()), child.name_cstr())?; + let mut stat = unsafe { mem::zeroed() }; + cvt(unsafe { fstat64(dir_fd.as_raw_fd(), &mut stat) })?; let cached_readdir = fdreaddir(dir_fd)?; Ok(( - DirComponent { name: child.name_cstr().into(), ino: child.entry.d_ino }, + DirComponent { name: child.name_cstr().into(), dev: stat.st_dev, ino: stat.st_ino }, cached_readdir, )) } @@ -1676,7 +1683,7 @@ mod remove_dir_impl { cvt(unsafe { fstat64(parent_dir_fd.as_raw_fd(), &mut stat) })?; // Make sure that the reopened parent directory has the same inode as when we visited it descending // the directory tree. More detailed risk analysis TBD. - if expected_parent_dir.ino != stat.st_ino { + if expected_parent_dir.dev != stat.st_dev || expected_parent_dir.ino != stat.st_ino { return Err(io::Error::new( io::ErrorKind::Uncategorized, "parent directory inode does not match", From 12b456e329242050da7e0108a66cb267faf90f0f Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Sun, 30 Jan 2022 23:09:10 +0100 Subject: [PATCH 07/18] fix for some UNIXes: parent directory needs to be reopened after deletion of child. --- library/std/src/sys/unix/fs.rs | 35 +++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/library/std/src/sys/unix/fs.rs b/library/std/src/sys/unix/fs.rs index 75dcca7e68bcf..54cc1e600eb0f 100644 --- a/library/std/src/sys/unix/fs.rs +++ b/library/std/src/sys/unix/fs.rs @@ -1672,10 +1672,10 @@ mod remove_dir_impl { )) } - fn readdir_reopen_parent( + fn reopen_parent( dir: &CachedReadDir, expected_parent_dir: &DirComponent, - ) -> io::Result { + ) -> io::Result { let parent_dir_fd = openat_nofollow_dironly(Some(dir.as_fd()), unsafe { CStr::from_bytes_with_nul_unchecked(b"..\0") })?; @@ -1689,7 +1689,7 @@ mod remove_dir_impl { "parent directory inode does not match", )); } - fdreaddir(parent_dir_fd) + Ok(parent_dir_fd) } fn remove_dir_all_loop(root: &Path) -> io::Result<()> { @@ -1725,20 +1725,29 @@ mod remove_dir_impl { Some(parent) => { // Going back up... let parent_readdir = match readdir_cache.pop_back() { - Some(readdir) => readdir, + Some(readdir) => { + cvt(unsafe { + unlinkat( + readdir.as_fd().as_raw_fd(), + current_dir_component.name.as_ptr(), + libc::AT_REMOVEDIR, + ) + })?; + readdir + } None => { // not cached -> reopen - readdir_reopen_parent(¤t_readdir, &parent)? + let parent_dir = reopen_parent(¤t_readdir, &parent)?; + cvt(unsafe { + unlinkat( + parent_dir.as_raw_fd(), + current_dir_component.name.as_ptr(), + libc::AT_REMOVEDIR, + ) + })?; + fdreaddir(parent_dir)? } }; - // unlink the directory after having removed its contents - cvt(unsafe { - unlinkat( - parent_readdir.raw_fd, - current_dir_component.name.as_ptr(), - libc::AT_REMOVEDIR, - ) - })?; current_dir_component = parent; current_readdir = parent_readdir; From 4641c5685f11e5f6252008af3b4866dcd2facd87 Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Mon, 31 Jan 2022 09:47:58 +0100 Subject: [PATCH 08/18] Add ui test for calling remove_dir_all() on deep directory hierarchies. --- .../stdlib-unit-tests/remove-dir-all-deep.rs | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 src/test/ui/stdlib-unit-tests/remove-dir-all-deep.rs diff --git a/src/test/ui/stdlib-unit-tests/remove-dir-all-deep.rs b/src/test/ui/stdlib-unit-tests/remove-dir-all-deep.rs new file mode 100644 index 0000000000000..1cba9e18c90a3 --- /dev/null +++ b/src/test/ui/stdlib-unit-tests/remove-dir-all-deep.rs @@ -0,0 +1,31 @@ +// run-pass + +use std::env::{current_dir, set_current_dir}; +use std::fs::{create_dir, remove_dir_all}; +use std::path::Path; + +pub fn main() { + let saved_cwd = current_dir().unwrap(); + if !Path::exists(Path::new("tmpdir")) { + create_dir("tmpdir").unwrap(); + } + set_current_dir("tmpdir").unwrap(); + let depth = if cfg!(target_os = "windows") { + // On Windows the absolute path length is limited. + 8192 + } else if cfg!(target_os = "macos") { + // On Macos increasing depth leads to a superlinear slowdown + // and - if one digs deep enough - unremovable directories. + 1024 + } else { + 65536 + }; + for _ in 0..depth { + if !Path::exists(Path::new("a")) { + create_dir("a").unwrap(); + } + set_current_dir("a").unwrap(); + } + set_current_dir(saved_cwd).unwrap(); + remove_dir_all("tmpdir").unwrap(); +} From 752330f04b019f5ff5a8c7b6e885f2eed683c166 Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Mon, 31 Jan 2022 10:18:34 +0100 Subject: [PATCH 09/18] test: safe depths for all platforms. --- .../ui/stdlib-unit-tests/remove-dir-all-deep.rs | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/test/ui/stdlib-unit-tests/remove-dir-all-deep.rs b/src/test/ui/stdlib-unit-tests/remove-dir-all-deep.rs index 1cba9e18c90a3..0f296bd708f47 100644 --- a/src/test/ui/stdlib-unit-tests/remove-dir-all-deep.rs +++ b/src/test/ui/stdlib-unit-tests/remove-dir-all-deep.rs @@ -10,15 +10,18 @@ pub fn main() { create_dir("tmpdir").unwrap(); } set_current_dir("tmpdir").unwrap(); - let depth = if cfg!(target_os = "windows") { - // On Windows the absolute path length is limited. - 8192 + let depth = if cfg!(target_os = "linux") { + // Should work on all Linux filesystems. + 65536 } else if cfg!(target_os = "macos") { - // On Macos increasing depth leads to a superlinear slowdown - // and - if one digs deep enough - unremovable directories. + // On Macos increasing depth leads to a superlinear slowdown. + 1024 + } else if cfg!(unix) { + // Should be no problem on other UNIXes either. 1024 } else { - 65536 + // "Safe" fallback for other platforms. + 64 }; for _ in 0..depth { if !Path::exists(Path::new("a")) { From bc9f33cf9ee81cc6cdd3e5902bfa2a0801136120 Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Wed, 2 Feb 2022 06:27:34 +0100 Subject: [PATCH 10/18] WIP: lazy read dir datatype --- library/std/src/sys/unix/fs.rs | 217 ++++++++++++++++++--------------- 1 file changed, 117 insertions(+), 100 deletions(-) diff --git a/library/std/src/sys/unix/fs.rs b/library/std/src/sys/unix/fs.rs index 54cc1e600eb0f..687da0d6a47b1 100644 --- a/library/std/src/sys/unix/fs.rs +++ b/library/std/src/sys/unix/fs.rs @@ -1482,7 +1482,7 @@ mod remove_dir_impl { use crate::io; use crate::mem; use crate::os::unix::io::{AsRawFd, FromRawFd, IntoRawFd}; - use crate::os::unix::prelude::{BorrowedFd, OwnedFd, RawFd}; + use crate::os::unix::prelude::{AsFd, BorrowedFd, OwnedFd, RawFd}; use crate::path::{Path, PathBuf}; use crate::sync::Arc; use crate::sys::{cvt, cvt_r}; @@ -1552,34 +1552,6 @@ mod remove_dir_impl { Ok(unsafe { OwnedFd::from_raw_fd(fd) }) } - fn fdreaddir(dir_fd: OwnedFd) -> io::Result { - let ptr = unsafe { fdopendir(dir_fd.as_raw_fd()) }; - if ptr.is_null() { - return Err(io::Error::last_os_error()); - } - let dirp = Dir(ptr); - // file descriptor is automatically closed by libc::closedir() now, so give up ownership - let new_parent_fd = dir_fd.into_raw_fd(); - // a valid root is not needed because we do not call any functions involving the full path - // of the DirEntrys. - let dummy_root = PathBuf::new(); - Ok(CachedReadDir { - readdir: ReadDir { - inner: Arc::new(InnerReadDir { dirp, root: dummy_root }), - #[cfg(not(any( - target_os = "android", - target_os = "linux", - target_os = "solaris", - target_os = "illumos", - target_os = "fuchsia", - target_os = "redox", - )))] - end_of_stream: false, - }, - raw_fd: new_parent_fd, - }) - } - #[cfg(any( target_os = "solaris", target_os = "illumos", @@ -1628,14 +1600,87 @@ mod remove_dir_impl { } } - struct CachedReadDir { - readdir: ReadDir, - raw_fd: RawFd, + enum LazyReadDir { + Fd(Option), // never None except when fdreaddir() fails + OpenReadDir(ReadDir, RawFd), } - impl CachedReadDir { + impl LazyReadDir { + fn from_path(path: &CStr) -> io::Result { + let fd = openat_nofollow_dironly(None, path)?; + Ok(LazyReadDir::Fd(Some(fd))) + } + + fn get_child(&self, child: &DirEntry) -> io::Result { + let fd = openat_nofollow_dironly(Some(self.as_fd()), child.name_cstr())?; + Ok(LazyReadDir::Fd(Some(fd))) + } + + fn get_parent(self: &LazyReadDir) -> io::Result { + let fd = openat_nofollow_dironly(Some(self.as_fd()), unsafe { + CStr::from_bytes_with_nul_unchecked(b"..\0") + })?; + Ok(LazyReadDir::Fd(Some(fd))) + } + + fn ensure_open(&mut self) -> io::Result<()> { + if let LazyReadDir::Fd(fd_opt) = self { + let fd = fd_opt.take().unwrap(); + let ptr = unsafe { fdopendir(fd.as_raw_fd()) }; + if ptr.is_null() { + return Err(io::Error::last_os_error()); + } + let dirp = Dir(ptr); + // file descriptor is automatically closed by libc::closedir() now, so give up ownership + let new_parent_fd = fd.into_raw_fd(); + // a valid root is not needed because we do not call any functions involving the full path + // of the DirEntrys. + let dummy_root = PathBuf::new(); + *self = LazyReadDir::OpenReadDir( + ReadDir { + inner: Arc::new(InnerReadDir { dirp, root: dummy_root }), + #[cfg(not(any( + target_os = "android", + target_os = "linux", + target_os = "solaris", + target_os = "illumos", + target_os = "fuchsia", + target_os = "redox", + )))] + end_of_stream: false, + }, + new_parent_fd, + ); + } + Ok(()) + } + } + + impl AsFd for LazyReadDir { fn as_fd(&self) -> BorrowedFd<'_> { - unsafe { BorrowedFd::borrow_raw_fd(self.raw_fd) } + match self { + LazyReadDir::Fd(Some(fd)) => fd.as_fd(), + LazyReadDir::Fd(None) => { + panic!("LazyReadDir::as_fd() called, but no fd present") + } + LazyReadDir::OpenReadDir(_, fd) => unsafe { BorrowedFd::borrow_raw_fd(*fd) }, + } + } + } + + impl Iterator for LazyReadDir { + type Item = io::Result; + + fn next(&mut self) -> Option> { + if let Err(err) = self.ensure_open() { + return Some(Err(err)); + } + match self { + LazyReadDir::OpenReadDir(rd, _) => rd.next(), + _ => { + unreachable!(); + } + } } } @@ -1645,51 +1690,26 @@ mod remove_dir_impl { ino: u64, } - fn readdir_open_path(path: &CStr) -> io::Result<(DirComponent, CachedReadDir)> { - let dir_fd = openat_nofollow_dironly(None, path)?; - - // use fstat() to get dev, inode of the directory - let mut stat = unsafe { mem::zeroed() }; - cvt(unsafe { fstat64(dir_fd.as_raw_fd(), &mut stat) })?; - let cached_readdir = fdreaddir(dir_fd)?; - Ok(( - DirComponent { name: CString::new("")?, dev: stat.st_dev, ino: stat.st_ino }, - cached_readdir, - )) - } - - fn readdir_open_child( - readdir: &CachedReadDir, - child: &DirEntry, - ) -> io::Result<(DirComponent, CachedReadDir)> { - let dir_fd = openat_nofollow_dironly(Some(readdir.as_fd()), child.name_cstr())?; - let mut stat = unsafe { mem::zeroed() }; - cvt(unsafe { fstat64(dir_fd.as_raw_fd(), &mut stat) })?; - let cached_readdir = fdreaddir(dir_fd)?; - Ok(( - DirComponent { name: child.name_cstr().into(), dev: stat.st_dev, ino: stat.st_ino }, - cached_readdir, - )) - } + impl DirComponent { + fn new(name: &CStr, fd: BorrowedFd<'_>) -> io::Result { + let mut stat = unsafe { mem::zeroed() }; + cvt(unsafe { fstat64(fd.as_raw_fd(), &mut stat) })?; + Ok(DirComponent { name: name.to_owned(), dev: stat.st_dev, ino: stat.st_ino }) + } - fn reopen_parent( - dir: &CachedReadDir, - expected_parent_dir: &DirComponent, - ) -> io::Result { - let parent_dir_fd = openat_nofollow_dironly(Some(dir.as_fd()), unsafe { - CStr::from_bytes_with_nul_unchecked(b"..\0") - })?; - let mut stat = unsafe { mem::zeroed() }; - cvt(unsafe { fstat64(parent_dir_fd.as_raw_fd(), &mut stat) })?; - // Make sure that the reopened parent directory has the same inode as when we visited it descending - // the directory tree. More detailed risk analysis TBD. - if expected_parent_dir.dev != stat.st_dev || expected_parent_dir.ino != stat.st_ino { - return Err(io::Error::new( - io::ErrorKind::Uncategorized, - "parent directory inode does not match", - )); + fn verify_dev_ino(&self, fd: BorrowedFd<'_>) -> io::Result<()> { + let mut stat = unsafe { mem::zeroed() }; + cvt(unsafe { fstat64(fd.as_raw_fd(), &mut stat) })?; + // Make sure that the reopened parent directory has the same inode as when we visited it descending + // the directory tree. More detailed risk analysis TBD. + if self.dev != stat.st_dev || self.ino != stat.st_ino { + return Err(io::Error::new( + io::ErrorKind::Uncategorized, + "parent directory inode does not match", + )); + } + Ok(()) } - Ok(parent_dir_fd) } fn remove_dir_all_loop(root: &Path) -> io::Result<()> { @@ -1699,15 +1719,20 @@ mod remove_dir_impl { // cache of up to MAX_OPEN_FDS ancestor ReadDirs and associated file descriptors let mut readdir_cache = VecDeque::with_capacity(MAX_OPEN_FDS); // the directory name, inode pair and ReadDir currently being processed - let (mut current_dir_component, mut current_readdir) = readdir_open_path(&cstr(root)?)?; + let mut current_readdir = LazyReadDir::from_path(&cstr(root)?)?; + let mut current_dir_component = DirComponent::new( + unsafe { CStr::from_bytes_with_nul_unchecked(b"\0") }, + current_readdir.as_fd(), + )?; loop { - while let Some(child) = current_readdir.readdir.next() { + while let Some(child) = current_readdir.next() { let child = child?; if !unlink_direntry(&child, current_readdir.as_fd())? { // Descend into this child directory - let (child_dir_compoment, child_readdir) = - readdir_open_child(¤t_readdir, &child)?; + let child_readdir = current_readdir.get_child(&child)?; + let child_dir_compoment = + DirComponent::new(&child.name_cstr(), child_readdir.as_fd())?; parent_dir_components.push(current_dir_component); // avoid growing the cache over capacity @@ -1725,29 +1750,21 @@ mod remove_dir_impl { Some(parent) => { // Going back up... let parent_readdir = match readdir_cache.pop_back() { - Some(readdir) => { - cvt(unsafe { - unlinkat( - readdir.as_fd().as_raw_fd(), - current_dir_component.name.as_ptr(), - libc::AT_REMOVEDIR, - ) - })?; - readdir - } + Some(readdir) => readdir, None => { // not cached -> reopen - let parent_dir = reopen_parent(¤t_readdir, &parent)?; - cvt(unsafe { - unlinkat( - parent_dir.as_raw_fd(), - current_dir_component.name.as_ptr(), - libc::AT_REMOVEDIR, - ) - })?; - fdreaddir(parent_dir)? + let parent_readdir = current_readdir.get_parent()?; + parent.verify_dev_ino(parent_readdir.as_fd())?; + parent_readdir } }; + cvt(unsafe { + unlinkat( + parent_readdir.as_fd().as_raw_fd(), + current_dir_component.name.as_ptr(), + libc::AT_REMOVEDIR, + ) + })?; current_dir_component = parent; current_readdir = parent_readdir; From a6953a5838d685e5338b5010f1e256c0fce53dc3 Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Wed, 2 Feb 2022 09:16:47 +0100 Subject: [PATCH 11/18] refill complete cache if empty when going up --- library/std/src/sys/unix/fs.rs | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/library/std/src/sys/unix/fs.rs b/library/std/src/sys/unix/fs.rs index 687da0d6a47b1..847d83dbda210 100644 --- a/library/std/src/sys/unix/fs.rs +++ b/library/std/src/sys/unix/fs.rs @@ -1700,12 +1700,12 @@ mod remove_dir_impl { fn verify_dev_ino(&self, fd: BorrowedFd<'_>) -> io::Result<()> { let mut stat = unsafe { mem::zeroed() }; cvt(unsafe { fstat64(fd.as_raw_fd(), &mut stat) })?; - // Make sure that the reopened parent directory has the same inode as when we visited it descending + // Make sure that the reopened directory has the same inode as when we visited it descending // the directory tree. More detailed risk analysis TBD. if self.dev != stat.st_dev || self.ino != stat.st_ino { return Err(io::Error::new( io::ErrorKind::Uncategorized, - "parent directory inode does not match", + "directory inode does not match", )); } Ok(()) @@ -1752,10 +1752,24 @@ mod remove_dir_impl { let parent_readdir = match readdir_cache.pop_back() { Some(readdir) => readdir, None => { - // not cached -> reopen + // cache is empty + + // reopen direct parent let parent_readdir = current_readdir.get_parent()?; parent.verify_dev_ino(parent_readdir.as_fd())?; - parent_readdir + readdir_cache.push_front(parent_readdir); + + // refill cache and verify ancestors + for ancester_component in parent_dir_components + .iter() + .rev() + .take(readdir_cache.capacity() - 1) + { + let parent_readdir = readdir_cache.front().unwrap().get_parent()?; + ancester_component.verify_dev_ino(parent_readdir.as_fd())?; + readdir_cache.push_front(parent_readdir); + } + readdir_cache.pop_back().unwrap() } }; cvt(unsafe { From a6b94a4b6e7cac95d09c54622d5fac9d7d7239fe Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Wed, 2 Feb 2022 10:15:06 +0100 Subject: [PATCH 12/18] cosmetics --- library/std/src/sys/unix/fs.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/library/std/src/sys/unix/fs.rs b/library/std/src/sys/unix/fs.rs index 847d83dbda210..3e1b75af0e5a4 100644 --- a/library/std/src/sys/unix/fs.rs +++ b/library/std/src/sys/unix/fs.rs @@ -1577,7 +1577,7 @@ mod remove_dir_impl { } fn unlink_direntry(ent: &DirEntry, parent_fd: BorrowedFd<'_>) -> io::Result { - match is_dir(&ent) { + match is_dir(ent) { Some(true) => Ok(false), Some(false) => { cvt(unsafe { unlinkat(parent_fd.as_raw_fd(), ent.name_cstr().as_ptr(), 0) })?; @@ -1611,12 +1611,12 @@ mod remove_dir_impl { Ok(LazyReadDir::Fd(Some(fd))) } - fn get_child(&self, child: &DirEntry) -> io::Result { - let fd = openat_nofollow_dironly(Some(self.as_fd()), child.name_cstr())?; + fn get_child(&self, child_name: &CStr) -> io::Result { + let fd = openat_nofollow_dironly(Some(self.as_fd()), child_name)?; Ok(LazyReadDir::Fd(Some(fd))) } - fn get_parent(self: &LazyReadDir) -> io::Result { + fn get_parent(&self) -> io::Result { let fd = openat_nofollow_dironly(Some(self.as_fd()), unsafe { CStr::from_bytes_with_nul_unchecked(b"..\0") })?; @@ -1691,7 +1691,7 @@ mod remove_dir_impl { } impl DirComponent { - fn new(name: &CStr, fd: BorrowedFd<'_>) -> io::Result { + fn new(name: &CStr, fd: BorrowedFd<'_>) -> io::Result { let mut stat = unsafe { mem::zeroed() }; cvt(unsafe { fstat64(fd.as_raw_fd(), &mut stat) })?; Ok(DirComponent { name: name.to_owned(), dev: stat.st_dev, ino: stat.st_ino }) @@ -1730,9 +1730,9 @@ mod remove_dir_impl { if !unlink_direntry(&child, current_readdir.as_fd())? { // Descend into this child directory - let child_readdir = current_readdir.get_child(&child)?; + let child_readdir = current_readdir.get_child(child.name_cstr())?; let child_dir_compoment = - DirComponent::new(&child.name_cstr(), child_readdir.as_fd())?; + DirComponent::new(child.name_cstr(), child_readdir.as_fd())?; parent_dir_components.push(current_dir_component); // avoid growing the cache over capacity From 20542dc8cc1518e55583361ca94b318fca943f54 Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Wed, 2 Feb 2022 10:42:02 +0100 Subject: [PATCH 13/18] CI filesystem is apparently slow, 64K deep hierarchy takes <2sec. locally --- src/test/ui/stdlib-unit-tests/remove-dir-all-deep.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/ui/stdlib-unit-tests/remove-dir-all-deep.rs b/src/test/ui/stdlib-unit-tests/remove-dir-all-deep.rs index 0f296bd708f47..6a821aa039e8c 100644 --- a/src/test/ui/stdlib-unit-tests/remove-dir-all-deep.rs +++ b/src/test/ui/stdlib-unit-tests/remove-dir-all-deep.rs @@ -12,7 +12,7 @@ pub fn main() { set_current_dir("tmpdir").unwrap(); let depth = if cfg!(target_os = "linux") { // Should work on all Linux filesystems. - 65536 + 4096 } else if cfg!(target_os = "macos") { // On Macos increasing depth leads to a superlinear slowdown. 1024 From f591d57b122479b81f8f28720a8c7127dc878322 Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Wed, 2 Feb 2022 12:54:24 +0100 Subject: [PATCH 14/18] correct limit for readdir cache --- library/std/src/sys/unix/fs.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/library/std/src/sys/unix/fs.rs b/library/std/src/sys/unix/fs.rs index 3e1b75af0e5a4..f47a79a0f2335 100644 --- a/library/std/src/sys/unix/fs.rs +++ b/library/std/src/sys/unix/fs.rs @@ -1536,7 +1536,8 @@ mod remove_dir_impl { ))] use libc::fstat64; - const MAX_OPEN_FDS: usize = 32; + // VecDeque allocates space for 2^n elements if the capacity is 2^n-1. + const MAX_OPEN_FDS: usize = 31; pub fn openat_nofollow_dironly( parent_fd: Option>, @@ -1760,6 +1761,7 @@ mod remove_dir_impl { readdir_cache.push_front(parent_readdir); // refill cache and verify ancestors + let mut count = 0; for ancester_component in parent_dir_components .iter() .rev() @@ -1768,7 +1770,9 @@ mod remove_dir_impl { let parent_readdir = readdir_cache.front().unwrap().get_parent()?; ancester_component.verify_dev_ino(parent_readdir.as_fd())?; readdir_cache.push_front(parent_readdir); + count += 1; } + eprintln!("Refilled cache with {} entries", count); readdir_cache.pop_back().unwrap() } }; From 266aaaf7e28da288a3b198e1c07d326f55c21d24 Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Wed, 2 Feb 2022 12:56:36 +0100 Subject: [PATCH 15/18] remove debug output (oops) --- library/std/src/sys/unix/fs.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/library/std/src/sys/unix/fs.rs b/library/std/src/sys/unix/fs.rs index f47a79a0f2335..6100b8090fe02 100644 --- a/library/std/src/sys/unix/fs.rs +++ b/library/std/src/sys/unix/fs.rs @@ -1761,7 +1761,6 @@ mod remove_dir_impl { readdir_cache.push_front(parent_readdir); // refill cache and verify ancestors - let mut count = 0; for ancester_component in parent_dir_components .iter() .rev() @@ -1770,9 +1769,7 @@ mod remove_dir_impl { let parent_readdir = readdir_cache.front().unwrap().get_parent()?; ancester_component.verify_dev_ino(parent_readdir.as_fd())?; readdir_cache.push_front(parent_readdir); - count += 1; } - eprintln!("Refilled cache with {} entries", count); readdir_cache.pop_back().unwrap() } }; From f0bc8a60b5baec1ad8bfebf511272d7f8411864e Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Sat, 5 Feb 2022 06:45:15 +0100 Subject: [PATCH 16/18] better inode reuse protection --- library/std/src/sys/unix/fs.rs | 41 +++++++++++++++++++++------------- 1 file changed, 25 insertions(+), 16 deletions(-) diff --git a/library/std/src/sys/unix/fs.rs b/library/std/src/sys/unix/fs.rs index 6100b8090fe02..eadd827105f02 100644 --- a/library/std/src/sys/unix/fs.rs +++ b/library/std/src/sys/unix/fs.rs @@ -1655,6 +1655,13 @@ mod remove_dir_impl { } Ok(()) } + + fn is_open(&self) -> bool { + match self { + LazyReadDir::OpenReadDir(_, _) => true, + _ => false, + } + } } impl AsFd for LazyReadDir { @@ -1750,29 +1757,19 @@ mod remove_dir_impl { match parent_dir_components.pop() { Some(parent) => { // Going back up... + + // Get parent directory readdir let parent_readdir = match readdir_cache.pop_back() { Some(readdir) => readdir, None => { - // cache is empty - - // reopen direct parent + // cache is empty - reopen parent let parent_readdir = current_readdir.get_parent()?; parent.verify_dev_ino(parent_readdir.as_fd())?; - readdir_cache.push_front(parent_readdir); - - // refill cache and verify ancestors - for ancester_component in parent_dir_components - .iter() - .rev() - .take(readdir_cache.capacity() - 1) - { - let parent_readdir = readdir_cache.front().unwrap().get_parent()?; - ancester_component.verify_dev_ino(parent_readdir.as_fd())?; - readdir_cache.push_front(parent_readdir); - } - readdir_cache.pop_back().unwrap() + parent_readdir } }; + + // Remove now empty directory cvt(unsafe { unlinkat( parent_readdir.as_fd().as_raw_fd(), @@ -1783,6 +1780,18 @@ mod remove_dir_impl { current_dir_component = parent; current_readdir = parent_readdir; + + // If we don't have readdir open for the current directory that means we got the file descriptor + // via openat(dirfd, ".."). To make sure the that the previous child directory was not moved + // somewhere else and its parent just happens to have the same reused (dev, inode) + // pair, that we found decending, we check the parent directory (dev, inode) as well. + if !current_readdir.is_open() && readdir_cache.is_empty() { + if let Some(parent) = parent_dir_components.last() { + let parent_readdir = current_readdir.get_parent()?; + parent.verify_dev_ino(parent_readdir.as_fd())?; + readdir_cache.push_back(parent_readdir); + } + } } None => break, } From aec1f4576de9cfc14b354ada534d6348e08426ec Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Sat, 5 Feb 2022 06:53:14 +0100 Subject: [PATCH 17/18] expand test to include an empty dir/file in each directory --- src/test/ui/stdlib-unit-tests/remove-dir-all-deep.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/test/ui/stdlib-unit-tests/remove-dir-all-deep.rs b/src/test/ui/stdlib-unit-tests/remove-dir-all-deep.rs index 6a821aa039e8c..47d9194504667 100644 --- a/src/test/ui/stdlib-unit-tests/remove-dir-all-deep.rs +++ b/src/test/ui/stdlib-unit-tests/remove-dir-all-deep.rs @@ -1,7 +1,7 @@ // run-pass use std::env::{current_dir, set_current_dir}; -use std::fs::{create_dir, remove_dir_all}; +use std::fs::{create_dir, remove_dir_all, File}; use std::path::Path; pub fn main() { @@ -25,6 +25,8 @@ pub fn main() { }; for _ in 0..depth { if !Path::exists(Path::new("a")) { + create_dir("empty_dir").unwrap(); + File::create("empty_file").unwrap(); create_dir("a").unwrap(); } set_current_dir("a").unwrap(); From dafa41fb03de226e0221b7c08c8362edd27c896b Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Sat, 5 Feb 2022 08:43:26 +0100 Subject: [PATCH 18/18] include parent of deletion root dir in reused inode check --- library/std/src/sys/unix/fs.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/library/std/src/sys/unix/fs.rs b/library/std/src/sys/unix/fs.rs index eadd827105f02..5e73710f7a611 100644 --- a/library/std/src/sys/unix/fs.rs +++ b/library/std/src/sys/unix/fs.rs @@ -1732,6 +1732,10 @@ mod remove_dir_impl { unsafe { CStr::from_bytes_with_nul_unchecked(b"\0") }, current_readdir.as_fd(), )?; + let root_parent_component = DirComponent::new( + unsafe { CStr::from_bytes_with_nul_unchecked(b"\0") }, + current_readdir.get_parent()?.as_fd(), + )?; loop { while let Some(child) = current_readdir.next() { let child = child?; @@ -1790,6 +1794,10 @@ mod remove_dir_impl { let parent_readdir = current_readdir.get_parent()?; parent.verify_dev_ino(parent_readdir.as_fd())?; readdir_cache.push_back(parent_readdir); + } else { + // verify parent of the root directory + let parent_readdir = current_readdir.get_parent()?; + root_parent_component.verify_dev_ino(parent_readdir.as_fd())?; } } }