From c161fd218c4694219c5bbdb0d9b0e3f12890bffb Mon Sep 17 00:00:00 2001 From: Asahi Lina Date: Wed, 18 Dec 2024 20:58:32 +0900 Subject: [PATCH 1/9] FileManagement: Simplify emulated file lookup To locate whether a path is in the emulated list, EmulatedFDManager::OpenAt() attemps to resolve the path. realpath() ends up calling readlinkat() on every path component, which is a lot of syscalls for every open() variant syscall. It also makes interaction with the rootfs complex and error-prone. There's a much easier way to do this: We just open the file without emulation and check its real path via get_fdpath(). This is just one readlink() syscall per open, instead of one per path component. If the file turns out to be emulated (uncommon case), we swap out the fds. This also decouples EmulatedFDManager from guest path resolution entirely, so it will never fall out of sync with the RootFS logic. --- .../EmulatedFiles/EmulatedFiles.cpp | 56 +------------- .../EmulatedFiles/EmulatedFiles.h | 2 +- .../LinuxSyscalls/FileManagement.cpp | 77 ++++++++++++------- .../LinuxSyscalls/FileManagement.h | 2 + 4 files changed, 56 insertions(+), 81 deletions(-) diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/EmulatedFiles/EmulatedFiles.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/EmulatedFiles/EmulatedFiles.cpp index c84c9eaa2b..ae3bc9d73e 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/EmulatedFiles/EmulatedFiles.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/EmulatedFiles/EmulatedFiles.cpp @@ -582,67 +582,17 @@ EmulatedFDManager::EmulatedFDManager(FEXCore::Context::Context* ctx) EmulatedFDManager::~EmulatedFDManager() {} -int32_t EmulatedFDManager::OpenAt(int dirfs, const char* pathname, int flags, uint32_t mode) { - char Tmp[PATH_MAX]; - const char* Path {}; - +int32_t EmulatedFDManager::Open(const char* pathname, int flags, uint32_t mode) { auto Creator = FDReadCreators.end(); if (pathname) { Creator = FDReadCreators.find(pathname); - Path = pathname; } if (Creator == FDReadCreators.end()) { - if (((pathname && pathname[0] != '/') || // If pathname exists then it must not be absolute - !pathname) && - dirfs != AT_FDCWD) { - // Passed in a dirfd that isn't magic FDCWD - // We need to get the path from the fd now - auto PathLength = FEX::get_fdpath(dirfs, Tmp); - if (PathLength != -1) { - if (pathname) { - Tmp[PathLength] = '/'; - PathLength += 1; - strncpy(&Tmp[PathLength], pathname, PATH_MAX - PathLength); - } else { - Tmp[PathLength] = '\0'; - } - Path = Tmp; - } else if (pathname) { - Path = pathname; - } - } else { - if (!pathname || pathname[0] == 0) { - return -1; - } - - Path = pathname; - } - - bool exists = access(Path, F_OK) == 0; - bool RealPathExists = false; - - if (exists) { - // If realpath fails then the temporary buffer is in an undefined state. - // Need to use another temporary just in-case realpath doesn't succeed. - char ExistsTempPath[PATH_MAX]; - char* RealPath = realpath(Path, ExistsTempPath); - if (RealPath) { - RealPathExists = true; - Creator = FDReadCreators.find(RealPath); - } - } - - if (!RealPathExists) { - Creator = FDReadCreators.find(FHU::Filesystem::LexicallyNormal(Path)); - } - - if (Creator == FDReadCreators.end()) { - return -1; - } + return -1; } - return Creator->second(CTX, dirfs, Path, flags, mode); + return Creator->second(CTX, AT_FDCWD, pathname, flags, mode); } int32_t EmulatedFDManager::ProcAuxv(FEXCore::Context::Context* ctx, int32_t fd, const char* pathname, int32_t flags, mode_t mode) { diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/EmulatedFiles/EmulatedFiles.h b/Source/Tools/LinuxEmulation/LinuxSyscalls/EmulatedFiles/EmulatedFiles.h index a1ce479afe..8542323417 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/EmulatedFiles/EmulatedFiles.h +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/EmulatedFiles/EmulatedFiles.h @@ -23,7 +23,7 @@ class EmulatedFDManager { public: EmulatedFDManager(FEXCore::Context::Context* ctx); ~EmulatedFDManager(); - int32_t OpenAt(int dirfs, const char* pathname, int flags, uint32_t mode); + int32_t Open(const char* pathname, int flags, uint32_t mode); private: FEXCore::Context::Context* CTX; diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/FileManagement.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/FileManagement.cpp index d4ccaa5a4a..7260610ec5 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/FileManagement.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/FileManagement.cpp @@ -515,23 +515,48 @@ static bool ShouldSkipOpenInEmu(int flags) { return false; } +bool FileManager::ReplaceEmuFd(int fd, int flags, uint32_t mode) { + char Tmp[PATH_MAX + 1]; + + if (fd < 0) { + return false; + } + + // Get the path of the file we just opened + auto PathLength = FEX::get_fdpath(fd, Tmp); + if (PathLength == -1) { + return false; + } + Tmp[PathLength] = '\0'; + + // And try to open via EmuFD + auto EmuFd = EmuFD.Open(Tmp, flags, mode); + if (EmuFd == -1) { + return false; + } + + // If we succeeded, swap out the fd + ::dup2(EmuFd, fd); + ::close(EmuFd); + return true; +} + uint64_t FileManager::Open(const char* pathname, int flags, uint32_t mode) { auto NewPath = GetSelf(pathname); const char* SelfPath = NewPath ? NewPath->data() : nullptr; int fd = -1; if (!ShouldSkipOpenInEmu(flags)) { - fd = EmuFD.OpenAt(AT_FDCWD, SelfPath, flags, mode); - if (fd == -1) { - FDPathTmpData TmpFilename; - auto Path = GetEmulatedFDPath(AT_FDCWD, SelfPath, true, TmpFilename); - if (Path.first != -1) { - fd = ::openat(Path.first, Path.second, flags, mode); - } + FDPathTmpData TmpFilename; + auto Path = GetEmulatedFDPath(AT_FDCWD, SelfPath, true, TmpFilename); + if (Path.first != -1) { + fd = ::openat(Path.first, Path.second, flags, mode); + } else { + fd = ::open(SelfPath, flags, mode); } - } - if (fd == -1) { + ReplaceEmuFd(fd, flags, mode); + } else { fd = ::open(SelfPath, flags, mode); } @@ -756,17 +781,16 @@ uint64_t FileManager::Openat([[maybe_unused]] int dirfs, const char* pathname, i int32_t fd = -1; if (!ShouldSkipOpenInEmu(flags)) { - fd = EmuFD.OpenAt(dirfs, SelfPath, flags, mode); - if (fd == -1) { - FDPathTmpData TmpFilename; - auto Path = GetEmulatedFDPath(dirfs, SelfPath, true, TmpFilename); - if (Path.first != -1) { - fd = ::syscall(SYSCALL_DEF(openat), Path.first, Path.second, flags, mode); - } + FDPathTmpData TmpFilename; + auto Path = GetEmulatedFDPath(dirfs, SelfPath, true, TmpFilename); + if (Path.first != -1) { + fd = ::syscall(SYSCALL_DEF(openat), Path.first, Path.second, flags, mode); + } else { + fd = ::syscall(SYSCALL_DEF(openat), dirfs, SelfPath, flags, mode); } - } - if (fd == -1) { + ReplaceEmuFd(fd, flags, mode); + } else { fd = ::syscall(SYSCALL_DEF(openat), dirfs, SelfPath, flags, mode); } @@ -780,17 +804,16 @@ uint64_t FileManager::Openat2(int dirfs, const char* pathname, FEX::HLE::open_ho int32_t fd = -1; if (!ShouldSkipOpenInEmu(how->flags)) { - fd = EmuFD.OpenAt(dirfs, SelfPath, how->flags, how->mode); - if (fd == -1) { - FDPathTmpData TmpFilename; - auto Path = GetEmulatedFDPath(dirfs, SelfPath, true, TmpFilename); - if (Path.first != -1) { - fd = ::syscall(SYSCALL_DEF(openat2), Path.first, Path.second, how, usize); - } + FDPathTmpData TmpFilename; + auto Path = GetEmulatedFDPath(dirfs, SelfPath, true, TmpFilename); + if (Path.first != -1) { + fd = ::syscall(SYSCALL_DEF(openat2), Path.first, Path.second, how, usize); + } else { + fd = ::syscall(SYSCALL_DEF(openat2), dirfs, SelfPath, how, usize); } - } - if (fd == -1) { + ReplaceEmuFd(fd, how->flags, how->mode); + } else { fd = ::syscall(SYSCALL_DEF(openat2), dirfs, SelfPath, how, usize); } diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/FileManagement.h b/Source/Tools/LinuxEmulation/LinuxSyscalls/FileManagement.h index aecf9a874c..1b0576ae37 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/FileManagement.h +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/FileManagement.h @@ -88,6 +88,8 @@ class FileManager final { using FDPathTmpData = std::array; std::pair GetEmulatedFDPath(int dirfd, const char* pathname, bool FollowSymlink, FDPathTmpData& TmpFilename); + bool ReplaceEmuFd(int fd, int flags, uint32_t mode); + #if defined(ASSERTIONS_ENABLED) && ASSERTIONS_ENABLED void TrackFEXFD(int FD) noexcept { std::lock_guard lk(FEXTrackingFDMutex); From 4ddd98708fb7ba0d5c6b0d6a9ccaf8e27529f3e8 Mon Sep 17 00:00:00 2001 From: Asahi Lina Date: Wed, 18 Dec 2024 19:43:10 +0900 Subject: [PATCH 2/9] FileManagement: Handle readlink /proc/self/fd/* properly If the guest reads a RootFS path from /proc/self/fd/*, we should return it with the RootFS prefix stripped. --- .../LinuxSyscalls/FileManagement.cpp | 105 ++++++++++++++++-- .../LinuxSyscalls/FileManagement.h | 2 + 2 files changed, 97 insertions(+), 10 deletions(-) diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/FileManagement.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/FileManagement.cpp index 7260610ec5..6035ab8026 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/FileManagement.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/FileManagement.cpp @@ -339,6 +339,85 @@ FileManager::~FileManager() { close(RootFSFD); } +size_t FileManager::GetRootFSPrefixLen(const char* pathname, size_t len, bool AliasedOnly) { + if (len < 2 || // If no pathname or root + pathname[0] != '/') { // If we are getting root + return 0; + } + + const auto& RootFSPath = LDPath(); + if (RootFSPath.empty()) { // If RootFS doesn't exist + return 0; + } + + auto RootFSLen = RootFSPath.length(); + if (RootFSPath.ends_with("/")) { + RootFSLen -= 1; + } + + if (RootFSLen > len) { + return 0; + } + + if (memcmp(pathname, RootFSPath.c_str(), RootFSLen) || (len > RootFSLen && pathname[RootFSLen] != '/')) { + return 0; // If the path is not within the RootFS + } + + if (AliasedOnly) { + fextl::string Path(pathname, len); // Need to nul-terminate so copy + + struct stat HostStat {}; + struct stat RootFSStat {}; + if (lstat(Path.c_str(), &RootFSStat)) { + LogMan::Msg::DFmt("GetRootFSPrefixLen: lstat on RootFS path failed: {}", std::string_view(pathname, len)); + return 0; // RootFS path does not exist? + } + if (lstat(Path.c_str() + RootFSLen, &HostStat)) { + return 0; // Host path does not exist or not accessible + } + // Note: We do not check st_dev, since the RootFS might be + // an overlayfs mount that changes it. This means there could + // be false positives. However, since we check the size too, + // this is highly unlikely (an overlaid file would need to + // have the same exact size and coincidentally the same + // inode number as on the host, which is implausible for things + // like binaries and libraries). + if (RootFSStat.st_size != HostStat.st_size || RootFSStat.st_ino != HostStat.st_ino || RootFSStat.st_mode != HostStat.st_mode) { + return 0; // Host path is a different file + } + } + + return RootFSLen; +} + +ssize_t FileManager::StripRootFSPrefix(char* pathname, ssize_t len, bool leaky) { + if (len < 0) { + return len; + } + + auto Prefix = GetRootFSPrefixLen(pathname, len, false); + if (Prefix == 0) { + return len; + } + + if (Prefix == len) { + if (leaky) { + // Getting the root, without a trailing /. This is a hack pressure-vessel uses to get the FEX RootFS, + // so we have to leak it here... + LogMan::Msg::DFmt("Leaking RootFS path for pressure-vessel"); + return len; + } else { + ::strcpy(pathname, "/"); + return 1; + } + } + + ::memmove(pathname, pathname + Prefix, len - Prefix); + pathname[len - Prefix] = '\0'; + + return len - Prefix; +} + fextl::string FileManager::GetEmulatedPath(const char* pathname, bool FollowSymlink) { if (!pathname || // If no pathname pathname[0] != '/' || // If relative @@ -683,11 +762,9 @@ uint64_t FileManager::Readlink(const char* pathname, char* buf, size_t bufsiz) { FDPathTmpData TmpFilename; auto Path = GetEmulatedFDPath(AT_FDCWD, pathname, false, TmpFilename); + uint64_t Result = -1; if (Path.first != -1) { - uint64_t Result = ::readlinkat(Path.first, Path.second, buf, bufsiz); - if (Result != -1) { - return Result; - } + Result = ::readlinkat(Path.first, Path.second, buf, bufsiz); if (Result == -1 && errno == EINVAL) { // This means that the file wasn't a symlink @@ -695,8 +772,12 @@ uint64_t FileManager::Readlink(const char* pathname, char* buf, size_t bufsiz) { return -errno; } } + if (Result == -1) { + Result = ::readlink(pathname, buf, bufsiz); + } - return ::readlink(pathname, buf, bufsiz); + // We might have read a /proc/self/fd/* link. If so, strip the RootFS prefix from it. + return StripRootFSPrefix(buf, Result, true); } uint64_t FileManager::Chmod(const char* pathname, mode_t mode) { @@ -758,11 +839,10 @@ uint64_t FileManager::Readlinkat(int dirfd, const char* pathname, char* buf, siz FDPathTmpData TmpFilename; auto NewPath = GetEmulatedFDPath(dirfd, pathname, false, TmpFilename); + uint64_t Result = -1; + if (NewPath.first != -1) { - uint64_t Result = ::readlinkat(NewPath.first, NewPath.second, buf, bufsiz); - if (Result != -1) { - return Result; - } + Result = ::readlinkat(NewPath.first, NewPath.second, buf, bufsiz); if (Result == -1 && errno == EINVAL) { // This means that the file wasn't a symlink @@ -771,7 +851,12 @@ uint64_t FileManager::Readlinkat(int dirfd, const char* pathname, char* buf, siz } } - return ::readlinkat(dirfd, pathname, buf, bufsiz); + if (Result == -1) { + Result = ::readlinkat(dirfd, pathname, buf, bufsiz); + } + + // We might have read a /proc/self/fd/* link. If so, strip the RootFS prefix from it. + return StripRootFSPrefix(buf, Result, true); } uint64_t FileManager::Openat([[maybe_unused]] int dirfs, const char* pathname, int flags, uint32_t mode) { diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/FileManagement.h b/Source/Tools/LinuxEmulation/LinuxSyscalls/FileManagement.h index 1b0576ae37..607fbf6866 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/FileManagement.h +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/FileManagement.h @@ -142,6 +142,8 @@ class FileManager final { #endif bool RootFSPathExists(const char* Filepath); + size_t GetRootFSPrefixLen(const char* pathname, size_t len, bool AliasedOnly); + ssize_t StripRootFSPrefix(char* pathname, ssize_t len, bool leaky); struct ThunkDBObject { fextl::string LibraryName; From 4e7d0e6be0c16c10358621fb66f4d76cb3a3f4aa Mon Sep 17 00:00:00 2001 From: Asahi Lina Date: Wed, 18 Dec 2024 21:56:22 +0900 Subject: [PATCH 3/9] FileManagement: Fix path resolution for symlinks to the root If there's a symlink to / within the RootFS, don't attempt to follow it, since that will end up trying to look up the empty string within the RootFS (which is not legal). Just return the symlink. --- Source/Tools/LinuxEmulation/LinuxSyscalls/FileManagement.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/FileManagement.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/FileManagement.cpp index 6035ab8026..ec26460737 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/FileManagement.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/FileManagement.cpp @@ -517,8 +517,8 @@ std::pair FileManager::GetEmulatedFDPath(int dirfd, const char // Get the symlink of RootFS FD + stripped subpath. auto SymlinkSize = FEX::HLE::GetSymlink(RootFSFD, &SubPath[1], CurrentTmp, PATH_MAX - 1); - if (SymlinkSize > 0 && CurrentTmp[0] == '/') { - // If the symlink is absolute: + if (SymlinkSize > 1 && CurrentTmp[0] == '/') { + // If the symlink is absolute and not the root: // 1) Zero terminate it. // 2) Set the path as our current subpath. // 3) Switch to the next temporary index. (We don't want to overwrite the current one on the next loop iteration). From 4658b24f9a3cadfd848d3e09cc327829aed1506b Mon Sep 17 00:00:00 2001 From: Asahi Lina Date: Thu, 19 Dec 2024 22:59:56 +0900 Subject: [PATCH 4/9] FileManagement: Handle RootFS symlinks into RootFS properly If a RootFS symlink links to an absolute path within the RootFS, we need to strip the RootFS prefix. This would not normally happen with a plain RootFS, but it can happen if /proc is mounted within the RootFS. --- Source/Tools/LinuxEmulation/LinuxSyscalls/FileManagement.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/FileManagement.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/FileManagement.cpp index ec26460737..f68a849138 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/FileManagement.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/FileManagement.cpp @@ -517,6 +517,9 @@ std::pair FileManager::GetEmulatedFDPath(int dirfd, const char // Get the symlink of RootFS FD + stripped subpath. auto SymlinkSize = FEX::HLE::GetSymlink(RootFSFD, &SubPath[1], CurrentTmp, PATH_MAX - 1); + // This might be a /proc symlink into the RootFS, so strip it in that case. + SymlinkSize = StripRootFSPrefix(CurrentTmp, SymlinkSize, false); + if (SymlinkSize > 1 && CurrentTmp[0] == '/') { // If the symlink is absolute and not the root: // 1) Zero terminate it. From 9433ae340504b18481898e839fa97313b1127acd Mon Sep 17 00:00:00 2001 From: Asahi Lina Date: Wed, 18 Dec 2024 18:47:24 +0900 Subject: [PATCH 5/9] Syscalls: Handle execve of native binaries with merged RootFS With a merged RootFS, all binaries are executed through the RootFS. When executing a binary that is actually a native binary, we want to do so outside the RootFS. Handle this by stripping the RootFS prefix in that case. --- .../LinuxSyscalls/FileManagement.cpp | 15 +++++++++++++++ .../LinuxEmulation/LinuxSyscalls/FileManagement.h | 1 + .../LinuxEmulation/LinuxSyscalls/Syscalls.cpp | 11 +++++++++++ 3 files changed, 27 insertions(+) diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/FileManagement.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/FileManagement.cpp index f68a849138..046ddd93e8 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/FileManagement.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/FileManagement.cpp @@ -418,6 +418,21 @@ ssize_t FileManager::StripRootFSPrefix(char* pathname, ssize_t len, bool leaky) return len - Prefix; } +fextl::string FileManager::GetHostPath(fextl::string& Path, bool AliasedOnly) { + auto Prefix = GetRootFSPrefixLen(Path.c_str(), Path.length(), AliasedOnly); + + if (Prefix == 0) { + return {}; + } + + auto ret = Path.substr(Prefix); + if (ret.empty()) { // Getting the root + ret = "/"; + } + + return ret; +} + fextl::string FileManager::GetEmulatedPath(const char* pathname, bool FollowSymlink) { if (!pathname || // If no pathname pathname[0] != '/' || // If relative diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/FileManagement.h b/Source/Tools/LinuxEmulation/LinuxSyscalls/FileManagement.h index 607fbf6866..77d50d44e3 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/FileManagement.h +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/FileManagement.h @@ -85,6 +85,7 @@ class FileManager final { bool IsRootFSFD(int dirfd, uint64_t inode); fextl::string GetEmulatedPath(const char* pathname, bool FollowSymlink = false); + fextl::string GetHostPath(fextl::string& Path, bool AliasedOnly); using FDPathTmpData = std::array; std::pair GetEmulatedFDPath(int dirfd, const char* pathname, bool FollowSymlink, FDPathTmpData& TmpFilename); diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls.cpp index ac3c8fe781..1e209db5cd 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls.cpp @@ -354,6 +354,17 @@ uint64_t ExecveHandler(FEXCore::Core::CpuStateFrame* Frame, const char* pathname EnvpPtr = const_cast(EnvpArgs.data()); } + if (!IsFDExec && (IsShebang || IsOtherELF)) { + // With a merged RootFS, the entire real filesystem is visible through the rootfs + // prefix. If we are executing a non-emulated binary, we should do so through the host + // path. + + auto Path = SyscallHandler->FM.GetHostPath(Filename, true); + if (!Path.empty() && FHU::Filesystem::Exists(Path)) { + Filename = std::move(Path); + } + } + if (IsBinfmtCompatible || IsOtherELF) { Result = ::syscall(SYS_execveat, Args.dirfd, Filename.c_str(), argv, EnvpPtr, Args.flags); CloseSeccompFD(); From 3a5eeb570059393325cde32aeb0e8b3500520eca Mon Sep 17 00:00:00 2001 From: Asahi Lina Date: Fri, 20 Dec 2024 01:26:09 +0900 Subject: [PATCH 6/9] Syscalls: Fix multiple shebang handling issues - Parse the shebang line properly (use FHU::ParseArgumentsFromString which is the same code the loader uses) - Make native-interpreter shebang files work by deferring to the kernel in that case (previously, they'd get executed through the loader and it would choke on the architecture of the interpreter) - Do not use the RootFS-prepended path when executing shebang files. The loader will prepend that anyway when looking it up, but it needs the bare guest path so it can pass it as an argument to the interpreter, which (since it's emulated) will do the lookup through the RootFS. --- .../LinuxEmulation/LinuxSyscalls/Syscalls.cpp | 69 +++++++++++-------- 1 file changed, 42 insertions(+), 27 deletions(-) diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls.cpp index 1e209db5cd..4cc4d432b2 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls.cpp @@ -9,6 +9,7 @@ desc: Glue logic, brk allocations #include "CodeLoader.h" +#include "FEXHeaderUtils/StringArgumentParser.h" #include "Linux/Utils/ELFContainer.h" #include "Linux/Utils/ELFParser.h" @@ -139,30 +140,20 @@ template uint64_t GetDentsEmulation(int, FEX::HLE::x64::linux_dirent*, ui template uint64_t GetDentsEmulation(int, FEX::HLE::x32::linux_dirent_32*, uint32_t); -static bool IsShebangFile(std::span Data) { +static fextl::string GetShebangInterpFile(std::span Data) { // File isn't large enough to even contain a shebang. if (Data.size() <= 2) { - return false; + return {}; } // Handle shebang files. if (Data[0] == '#' && Data[1] == '!') { fextl::string InterpreterLine {Data.begin() + 2, // strip off "#!" prefix std::find(Data.begin(), Data.end(), '\n')}; - fextl::vector ShebangArguments {}; - - // Shebang line can have a single argument - fextl::istringstream InterpreterSS(InterpreterLine); - fextl::string Argument; - while (std::getline(InterpreterSS, Argument, ' ')) { - if (Argument.empty()) { - continue; - } - ShebangArguments.push_back(std::move(Argument)); - } + fextl::vector ShebangArguments = FHU::ParseArgumentsFromString(InterpreterLine); // Executable argument - fextl::string& ShebangProgram = ShebangArguments[0]; + fextl::string ShebangProgram(ShebangArguments[0]); // If the filename is absolute then prepend the rootfs // If it is relative then don't append the rootfs @@ -170,13 +161,15 @@ static bool IsShebangFile(std::span Data) { ShebangProgram = FEX::HLE::_SyscallHandler->RootFSPath() + ShebangProgram; } - return FHU::Filesystem::Exists(ShebangProgram); + if (FHU::Filesystem::Exists(ShebangProgram)) { + return ShebangProgram; + } } - return false; + return {}; } -static bool IsShebangFD(int FD) { +static fextl::string GetShebangInterpFD(int FD) { // We don't know the state of the FD coming in since this might be a guest tracked FD. // Need to be extra careful here not to adjust file offsets and status flags. // @@ -187,19 +180,19 @@ static bool IsShebangFD(int FD) { const auto ChunkSize = 257l; const auto ReadSize = pread(FD, &Header.at(0), ChunkSize, 0); - return IsShebangFile(std::span(Header.data(), ReadSize)); + return GetShebangInterpFile(std::span(Header.data(), ReadSize)); } -static bool IsShebangFilename(const fextl::string& Filename) { +static fextl::string GetShebangInterpFilename(const fextl::string& Filename) { // Open the Filename to determine if it is a shebang file. int FD = open(Filename.c_str(), O_RDONLY | O_CLOEXEC); if (FD == -1) { - return false; + return {}; } - bool IsShebang = IsShebangFD(FD); + auto Interp = GetShebangInterpFD(FD); close(FD); - return IsShebang; + return Interp; } uint64_t ExecveHandler(FEXCore::Core::CpuStateFrame* Frame, const char* pathname, char* const* argv, char* const* envp, ExecveAtArgs Args) { @@ -208,18 +201,19 @@ uint64_t ExecveHandler(FEXCore::Core::CpuStateFrame* Frame, const char* pathname fextl::string RootFS = SyscallHandler->RootFSPath(); ELFLoader::ELFContainer::ELFType Type {}; + ELFLoader::ELFContainer::ELFType InterpreterType {}; // AT_EMPTY_PATH is only used if the pathname is empty. const bool IsFDExec = (Args.flags & AT_EMPTY_PATH) && strlen(pathname) == 0; fextl::string FDExecEnv; fextl::string FDSeccompEnv; - bool IsShebang {}; + fextl::string ShebangInterpreter {}; if (IsFDExec) { Type = ELFLoader::ELFContainer::GetELFType(Args.dirfd); - IsShebang = IsShebangFD(Args.dirfd); + ShebangInterpreter = GetShebangInterpFD(Args.dirfd); } else { // For absolute paths, check the rootfs first (if available) if (pathname[0] == '/') { @@ -253,7 +247,12 @@ uint64_t ExecveHandler(FEXCore::Core::CpuStateFrame* Frame, const char* pathname Type = ELFLoader::ELFContainer::GetELFType(Filename); - IsShebang = IsShebangFilename(Filename); + ShebangInterpreter = GetShebangInterpFilename(Filename); + } + + const bool IsShebang = !ShebangInterpreter.empty(); + if (IsShebang) { + InterpreterType = ELFLoader::ELFContainer::GetELFType(ShebangInterpreter); } if (!IsShebang && Type == ELFLoader::ELFContainer::ELFType::TYPE_NONE) { @@ -306,6 +305,10 @@ uint64_t ExecveHandler(FEXCore::Core::CpuStateFrame* Frame, const char* pathname // - FEXServer FD inheritance (unshare(CLONE_NEWNET)) const bool NeedsEnvpCopy = (IsFDExec && !(IsBinfmtCompatible || IsOtherELF)) || HasSeccomp; + // We are trying to execute a shebang handled by a different architecture interpreter (e.g. /usr/bin/python from the host FS). + // In this case we just defer to the kernel. + const bool IsForeignShebang = (IsShebang && InterpreterType == ELFLoader::ELFContainer::ELFType::TYPE_OTHER_ELF); + if (NeedsEnvpCopy) { if (envp) { auto OldEnvp = envp; @@ -354,7 +357,7 @@ uint64_t ExecveHandler(FEXCore::Core::CpuStateFrame* Frame, const char* pathname EnvpPtr = const_cast(EnvpArgs.data()); } - if (!IsFDExec && (IsShebang || IsOtherELF)) { + if (!IsFDExec && (IsForeignShebang || IsOtherELF || !IsBinfmtCompatible)) { // With a merged RootFS, the entire real filesystem is visible through the rootfs // prefix. If we are executing a non-emulated binary, we should do so through the host // path. @@ -365,13 +368,25 @@ uint64_t ExecveHandler(FEXCore::Core::CpuStateFrame* Frame, const char* pathname } } - if (IsBinfmtCompatible || IsOtherELF) { + if (IsBinfmtCompatible || IsOtherELF || IsForeignShebang) { Result = ::syscall(SYS_execveat, Args.dirfd, Filename.c_str(), argv, EnvpPtr, Args.flags); CloseSeccompFD(); CloseFDExecFD(); SYSCALL_ERRNO(); } + // If we are executing an emulated interpreter shebang file through the loader, + // we need to strip the RootFS prefix. The loader will pass this filename to the + // interpreter as-is, which will access it using RootFS redirection. + // Note that unlike above, the prefix is stripped unconditionally (AliasedOnly=false), + // and the script path need not exist in the host. + if (IsShebang) { + auto Path = SyscallHandler->FM.GetHostPath(Filename, false); + if (!Path.empty()) { + Filename = std::move(Path); + } + } + // We don't have an interpreter installed or we are executing a non-ELF executable // We now need to munge the arguments fextl::vector ExecveArgs {}; From b078a41a02168bef50ebccc6f156f578af1c7a49 Mon Sep 17 00:00:00 2001 From: Asahi Lina Date: Fri, 20 Dec 2024 01:51:14 +0900 Subject: [PATCH 7/9] FileManagement: Fix return val of readlink* The wrappers handle errno, we just need to return -1 on errors. --- Source/Tools/LinuxEmulation/LinuxSyscalls/FileManagement.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/FileManagement.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/FileManagement.cpp index 046ddd93e8..ff1d312128 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/FileManagement.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/FileManagement.cpp @@ -787,7 +787,7 @@ uint64_t FileManager::Readlink(const char* pathname, char* buf, size_t bufsiz) { if (Result == -1 && errno == EINVAL) { // This means that the file wasn't a symlink // This is expected behaviour - return -errno; + return -1; } } if (Result == -1) { @@ -865,7 +865,7 @@ uint64_t FileManager::Readlinkat(int dirfd, const char* pathname, char* buf, siz if (Result == -1 && errno == EINVAL) { // This means that the file wasn't a symlink // This is expected behaviour - return -errno; + return -1; } } From 3e99e814bc137ccf9d7c3ea05010ddf8525def18 Mon Sep 17 00:00:00 2001 From: Asahi Lina Date: Fri, 20 Dec 2024 01:55:17 +0900 Subject: [PATCH 8/9] FileManagement: Use openat2() with RESOLVE_IN_ROOT for RootFS open ops This avoids having to do the symlink chasing in GetEmulatedFDPath, since the kernel does it for us. On top of that, with a merged RootFS setup, this will correctly handle symlinks from user directories into the RootFS, fixing wine on Fedora. --- .../LinuxSyscalls/FileManagement.cpp | 58 ++++++++++++++++--- 1 file changed, 49 insertions(+), 9 deletions(-) diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/FileManagement.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/FileManagement.cpp index ff1d312128..f94c5cd4dd 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/FileManagement.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/FileManagement.cpp @@ -29,6 +29,7 @@ desc: Rootfs overlay logic #include #include #include +#include #include #include #include @@ -645,10 +646,23 @@ uint64_t FileManager::Open(const char* pathname, int flags, uint32_t mode) { if (!ShouldSkipOpenInEmu(flags)) { FDPathTmpData TmpFilename; - auto Path = GetEmulatedFDPath(AT_FDCWD, SelfPath, true, TmpFilename); + auto Path = GetEmulatedFDPath(AT_FDCWD, SelfPath, false, TmpFilename); if (Path.first != -1) { - fd = ::openat(Path.first, Path.second, flags, mode); - } else { + FEX::HLE::open_how how = { + .flags = (uint64_t)flags, + .mode = (flags & (O_CREAT | O_TMPFILE)) ? mode & 07777 : 0, // openat2() is stricter about this + .resolve = (Path.first == AT_FDCWD) ? 0u : RESOLVE_IN_ROOT, // AT_FDCWD means it's a thunk and not via RootFS + }; + fd = ::syscall(SYSCALL_DEF(openat2), Path.first, Path.second, &how, sizeof(how)); + if (fd == -1 && errno == EXDEV) { + // This means a magic symlink (/proc/foo) was involved. In this case we + // just punt and do the access without RESOLVE_IN_ROOT. + fd = ::syscall(SYSCALL_DEF(openat), Path.first, Path.second, flags, mode); + } + } + + // Open through RootFS failed (probably nonexistent), so open directly. + if (fd == -1) { fd = ::open(SelfPath, flags, mode); } @@ -885,10 +899,23 @@ uint64_t FileManager::Openat([[maybe_unused]] int dirfs, const char* pathname, i if (!ShouldSkipOpenInEmu(flags)) { FDPathTmpData TmpFilename; - auto Path = GetEmulatedFDPath(dirfs, SelfPath, true, TmpFilename); + auto Path = GetEmulatedFDPath(dirfs, SelfPath, false, TmpFilename); if (Path.first != -1) { - fd = ::syscall(SYSCALL_DEF(openat), Path.first, Path.second, flags, mode); - } else { + FEX::HLE::open_how how = { + .flags = (uint64_t)flags, + .mode = (flags & (O_CREAT | O_TMPFILE)) ? mode & 07777 : 0, // openat2() is stricter about this, + .resolve = (Path.first == AT_FDCWD) ? 0u : RESOLVE_IN_ROOT, // AT_FDCWD means it's a thunk and not via RootFS + }; + fd = ::syscall(SYSCALL_DEF(openat2), Path.first, Path.second, &how, sizeof(how)); + if (fd == -1 && errno == EXDEV) { + // This means a magic symlink (/proc/foo) was involved. In this case we + // just punt and do the access without RESOLVE_IN_ROOT. + fd = ::syscall(SYSCALL_DEF(openat), Path.first, Path.second, flags, mode); + } + } + + // Open through RootFS failed (probably nonexistent), so open directly. + if (fd == -1) { fd = ::syscall(SYSCALL_DEF(openat), dirfs, SelfPath, flags, mode); } @@ -908,10 +935,23 @@ uint64_t FileManager::Openat2(int dirfs, const char* pathname, FEX::HLE::open_ho if (!ShouldSkipOpenInEmu(how->flags)) { FDPathTmpData TmpFilename; - auto Path = GetEmulatedFDPath(dirfs, SelfPath, true, TmpFilename); - if (Path.first != -1) { + auto Path = GetEmulatedFDPath(dirfs, SelfPath, false, TmpFilename); + if (Path.first != -1 && !(how->resolve & RESOLVE_IN_ROOT)) { + // AT_FDCWD means it's a thunk and not via RootFS + if (Path.first != AT_FDCWD) { + how->resolve |= RESOLVE_IN_ROOT; + } fd = ::syscall(SYSCALL_DEF(openat2), Path.first, Path.second, how, usize); - } else { + how->resolve &= RESOLVE_IN_ROOT; + if (fd == -1 && errno == EXDEV) { + // This means a magic symlink (/proc/foo) was involved. In this case we + // just punt and do the access without RESOLVE_IN_ROOT. + fd = ::syscall(SYSCALL_DEF(openat2), Path.first, Path.second, how, usize); + } + } + + // Open through RootFS failed (probably nonexistent), so open directly. + if (fd == -1) { fd = ::syscall(SYSCALL_DEF(openat2), dirfs, SelfPath, how, usize); } From 3fe265078743551689bafb380376c6057bf57721 Mon Sep 17 00:00:00 2001 From: Asahi Lina Date: Fri, 20 Dec 2024 05:22:52 +0900 Subject: [PATCH 9/9] FileManagement: Gate new openat2() codepaths on recent enough kernel --- .../LinuxSyscalls/FileManagement.cpp | 18 ++++++++++++------ .../LinuxSyscalls/FileManagement.h | 1 + 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/FileManagement.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/FileManagement.cpp index f94c5cd4dd..82ba52839f 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/FileManagement.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/FileManagement.cpp @@ -333,6 +333,8 @@ FileManager::FileManager(FEXCore::Context::Context* ctx) ProcFSDev = Buffer.st_dev; } + uint32_t KernelVersion = FEX::HLE::SyscallHandler::CalculateHostKernelVersion(); + HasOpenat2 = KernelVersion >= FEX::HLE::SyscallHandler::KernelVersion(5, 8, 0); UpdatePID(::getpid()); } @@ -646,15 +648,17 @@ uint64_t FileManager::Open(const char* pathname, int flags, uint32_t mode) { if (!ShouldSkipOpenInEmu(flags)) { FDPathTmpData TmpFilename; - auto Path = GetEmulatedFDPath(AT_FDCWD, SelfPath, false, TmpFilename); + auto Path = GetEmulatedFDPath(AT_FDCWD, SelfPath, !HasOpenat2, TmpFilename); if (Path.first != -1) { FEX::HLE::open_how how = { .flags = (uint64_t)flags, .mode = (flags & (O_CREAT | O_TMPFILE)) ? mode & 07777 : 0, // openat2() is stricter about this .resolve = (Path.first == AT_FDCWD) ? 0u : RESOLVE_IN_ROOT, // AT_FDCWD means it's a thunk and not via RootFS }; - fd = ::syscall(SYSCALL_DEF(openat2), Path.first, Path.second, &how, sizeof(how)); - if (fd == -1 && errno == EXDEV) { + if (HasOpenat2) { + fd = ::syscall(SYSCALL_DEF(openat2), Path.first, Path.second, &how, sizeof(how)); + } + if (fd == -1 && (!HasOpenat2 || errno == EXDEV)) { // This means a magic symlink (/proc/foo) was involved. In this case we // just punt and do the access without RESOLVE_IN_ROOT. fd = ::syscall(SYSCALL_DEF(openat), Path.first, Path.second, flags, mode); @@ -899,15 +903,17 @@ uint64_t FileManager::Openat([[maybe_unused]] int dirfs, const char* pathname, i if (!ShouldSkipOpenInEmu(flags)) { FDPathTmpData TmpFilename; - auto Path = GetEmulatedFDPath(dirfs, SelfPath, false, TmpFilename); + auto Path = GetEmulatedFDPath(dirfs, SelfPath, !HasOpenat2, TmpFilename); if (Path.first != -1) { FEX::HLE::open_how how = { .flags = (uint64_t)flags, .mode = (flags & (O_CREAT | O_TMPFILE)) ? mode & 07777 : 0, // openat2() is stricter about this, .resolve = (Path.first == AT_FDCWD) ? 0u : RESOLVE_IN_ROOT, // AT_FDCWD means it's a thunk and not via RootFS }; - fd = ::syscall(SYSCALL_DEF(openat2), Path.first, Path.second, &how, sizeof(how)); - if (fd == -1 && errno == EXDEV) { + if (HasOpenat2) { + fd = ::syscall(SYSCALL_DEF(openat2), Path.first, Path.second, &how, sizeof(how)); + } + if (fd == -1 && (!HasOpenat2 || errno == EXDEV)) { // This means a magic symlink (/proc/foo) was involved. In this case we // just punt and do the access without RESOLVE_IN_ROOT. fd = ::syscall(SYSCALL_DEF(openat), Path.first, Path.second, flags, mode); diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/FileManagement.h b/Source/Tools/LinuxEmulation/LinuxSyscalls/FileManagement.h index 77d50d44e3..7644bc612f 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/FileManagement.h +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/FileManagement.h @@ -170,5 +170,6 @@ class FileManager final { int64_t RootFSFDInode = 0; int64_t ProcFDInode = 0; dev_t ProcFSDev; + bool HasOpenat2; }; } // namespace FEX::HLE