diff --git a/clang/include/clang/Basic/FileManager.h b/clang/include/clang/Basic/FileManager.h index ce4e8c1fbe16e..6cc6c2bfd2b6b 100644 --- a/clang/include/clang/Basic/FileManager.h +++ b/clang/include/clang/Basic/FileManager.h @@ -124,8 +124,8 @@ class FileManager : public RefCountedBase { std::unique_ptr StatCache; std::error_code getStatValue(StringRef Path, llvm::vfs::Status &Status, - bool isFile, - std::unique_ptr *F); + bool isFile, std::unique_ptr *F, + bool IsText = true); /// Add all ancestors of the given path (pointing to either a file /// or a directory) as virtual directories. @@ -230,7 +230,8 @@ class FileManager : public RefCountedBase { /// the failure to find this file. llvm::Expected getFileRef(StringRef Filename, bool OpenFile = false, - bool CacheFailure = true); + bool CacheFailure = true, + bool IsText = true); /// Get the FileEntryRef for stdin, returning an error if stdin cannot be /// read. @@ -290,23 +291,28 @@ class FileManager : public RefCountedBase { /// Open the specified file as a MemoryBuffer, returning a new /// MemoryBuffer if successful, otherwise returning null. + /// The IsText parameter controls whether the file should be opened as a text + /// or binary file, and should be set to false if the file contents should be + /// treated as binary. llvm::ErrorOr> getBufferForFile(FileEntryRef Entry, bool isVolatile = false, bool RequiresNullTerminator = true, - std::optional MaybeLimit = std::nullopt); + std::optional MaybeLimit = std::nullopt, + bool IsText = true); llvm::ErrorOr> getBufferForFile(StringRef Filename, bool isVolatile = false, bool RequiresNullTerminator = true, - std::optional MaybeLimit = std::nullopt) const { + std::optional MaybeLimit = std::nullopt, + bool IsText = true) const { return getBufferForFileImpl(Filename, /*FileSize=*/MaybeLimit.value_or(-1), - isVolatile, RequiresNullTerminator); + isVolatile, RequiresNullTerminator, IsText); } private: llvm::ErrorOr> getBufferForFileImpl(StringRef Filename, int64_t FileSize, bool isVolatile, - bool RequiresNullTerminator) const; + bool RequiresNullTerminator, bool IsText) const; DirectoryEntry *&getRealDirEntry(const llvm::vfs::Status &Status); diff --git a/clang/include/clang/Basic/FileSystemStatCache.h b/clang/include/clang/Basic/FileSystemStatCache.h index 5a003a748178d..73c256a016971 100644 --- a/clang/include/clang/Basic/FileSystemStatCache.h +++ b/clang/include/clang/Basic/FileSystemStatCache.h @@ -48,10 +48,10 @@ class FileSystemStatCache { /// success for directories (not files). On a successful file lookup, the /// implementation can optionally fill in \p F with a valid \p File object and /// the client guarantees that it will close it. - static std::error_code - get(StringRef Path, llvm::vfs::Status &Status, bool isFile, - std::unique_ptr *F, - FileSystemStatCache *Cache, llvm::vfs::FileSystem &FS); + static std::error_code get(StringRef Path, llvm::vfs::Status &Status, + bool isFile, std::unique_ptr *F, + FileSystemStatCache *Cache, + llvm::vfs::FileSystem &FS, bool IsText = true); protected: // FIXME: The pointer here is a non-owning/optional reference to the diff --git a/clang/lib/Basic/FileManager.cpp b/clang/lib/Basic/FileManager.cpp index 6097b85a03064..2876c290a26b1 100644 --- a/clang/lib/Basic/FileManager.cpp +++ b/clang/lib/Basic/FileManager.cpp @@ -212,8 +212,10 @@ FileManager::getFile(StringRef Filename, bool openFile, bool CacheFailure) { return llvm::errorToErrorCode(Result.takeError()); } -llvm::Expected -FileManager::getFileRef(StringRef Filename, bool openFile, bool CacheFailure) { +llvm::Expected FileManager::getFileRef(StringRef Filename, + bool openFile, + bool CacheFailure, + bool IsText) { ++NumFileLookups; // See if there is already an entry in the map. @@ -259,7 +261,7 @@ FileManager::getFileRef(StringRef Filename, bool openFile, bool CacheFailure) { std::unique_ptr F; llvm::vfs::Status Status; auto statError = getStatValue(InterndFileName, Status, true, - openFile ? &F : nullptr); + openFile ? &F : nullptr, IsText); if (statError) { // There's no real file at the given path. if (CacheFailure) @@ -531,7 +533,7 @@ void FileManager::fillRealPathName(FileEntry *UFE, llvm::StringRef FileName) { llvm::ErrorOr> FileManager::getBufferForFile(FileEntryRef FE, bool isVolatile, bool RequiresNullTerminator, - std::optional MaybeLimit) { + std::optional MaybeLimit, bool IsText) { const FileEntry *Entry = &FE.getFileEntry(); // If the content is living on the file entry, return a reference to it. if (Entry->Content) @@ -558,21 +560,21 @@ FileManager::getBufferForFile(FileEntryRef FE, bool isVolatile, // Otherwise, open the file. return getBufferForFileImpl(Filename, FileSize, isVolatile, - RequiresNullTerminator); + RequiresNullTerminator, IsText); } llvm::ErrorOr> FileManager::getBufferForFileImpl(StringRef Filename, int64_t FileSize, - bool isVolatile, - bool RequiresNullTerminator) const { + bool isVolatile, bool RequiresNullTerminator, + bool IsText) const { if (FileSystemOpts.WorkingDir.empty()) return FS->getBufferForFile(Filename, FileSize, RequiresNullTerminator, - isVolatile); + isVolatile, IsText); SmallString<128> FilePath(Filename); FixupRelativePath(FilePath); return FS->getBufferForFile(FilePath, FileSize, RequiresNullTerminator, - isVolatile); + isVolatile, IsText); } /// getStatValue - Get the 'stat' information for the specified path, @@ -580,20 +582,22 @@ FileManager::getBufferForFileImpl(StringRef Filename, int64_t FileSize, /// if the path points to a virtual file or does not exist, or returns /// false if it's an existent real file. If FileDescriptor is NULL, /// do directory look-up instead of file look-up. -std::error_code -FileManager::getStatValue(StringRef Path, llvm::vfs::Status &Status, - bool isFile, std::unique_ptr *F) { +std::error_code FileManager::getStatValue(StringRef Path, + llvm::vfs::Status &Status, + bool isFile, + std::unique_ptr *F, + bool IsText) { // FIXME: FileSystemOpts shouldn't be passed in here, all paths should be // absolute! if (FileSystemOpts.WorkingDir.empty()) - return FileSystemStatCache::get(Path, Status, isFile, F, - StatCache.get(), *FS); + return FileSystemStatCache::get(Path, Status, isFile, F, StatCache.get(), + *FS, IsText); SmallString<128> FilePath(Path); FixupRelativePath(FilePath); return FileSystemStatCache::get(FilePath.c_str(), Status, isFile, F, - StatCache.get(), *FS); + StatCache.get(), *FS, IsText); } std::error_code diff --git a/clang/lib/Basic/FileSystemStatCache.cpp b/clang/lib/Basic/FileSystemStatCache.cpp index 415a4e2025df8..183eea0986637 100644 --- a/clang/lib/Basic/FileSystemStatCache.cpp +++ b/clang/lib/Basic/FileSystemStatCache.cpp @@ -30,11 +30,12 @@ void FileSystemStatCache::anchor() {} /// success for directories (not files). On a successful file lookup, the /// implementation can optionally fill in FileDescriptor with a valid /// descriptor and the client guarantees that it will close it. -std::error_code -FileSystemStatCache::get(StringRef Path, llvm::vfs::Status &Status, - bool isFile, std::unique_ptr *F, - FileSystemStatCache *Cache, - llvm::vfs::FileSystem &FS) { +std::error_code FileSystemStatCache::get(StringRef Path, + llvm::vfs::Status &Status, bool isFile, + std::unique_ptr *F, + FileSystemStatCache *Cache, + llvm::vfs::FileSystem &FS, + bool IsText) { bool isForDir = !isFile; std::error_code RetCode; @@ -58,7 +59,8 @@ FileSystemStatCache::get(StringRef Path, llvm::vfs::Status &Status, // // Because of this, check to see if the file exists with 'open'. If the // open succeeds, use fstat to get the stat info. - auto OwnedFile = FS.openFileForRead(Path); + auto OwnedFile = + IsText ? FS.openFileForRead(Path) : FS.openFileForReadBinary(Path); if (!OwnedFile) { // If the open fails, our "stat" fails. diff --git a/clang/lib/Lex/HeaderMap.cpp b/clang/lib/Lex/HeaderMap.cpp index 00bf880726ee3..b04f67a4b2ed3 100644 --- a/clang/lib/Lex/HeaderMap.cpp +++ b/clang/lib/Lex/HeaderMap.cpp @@ -54,7 +54,10 @@ std::unique_ptr HeaderMap::Create(FileEntryRef FE, FileManager &FM) { unsigned FileSize = FE.getSize(); if (FileSize <= sizeof(HMapHeader)) return nullptr; - auto FileBuffer = FM.getBufferForFile(FE); + auto FileBuffer = + FM.getBufferForFile(FE, /*IsVolatile=*/false, + /*RequiresNullTerminator=*/true, + /*MaybeList=*/std::nullopt, /*IsText=*/false); if (!FileBuffer || !*FileBuffer) return nullptr; bool NeedsByteSwap; diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp index 4e77df9ec444c..8e7d80aa8911c 100644 --- a/clang/lib/Lex/PPDirectives.cpp +++ b/clang/lib/Lex/PPDirectives.cpp @@ -1080,8 +1080,8 @@ Preprocessor::LookupEmbedFile(StringRef Filename, bool isAngled, bool OpenFile, FileManager &FM = this->getFileManager(); if (llvm::sys::path::is_absolute(Filename)) { // lookup path or immediately fail - llvm::Expected ShouldBeEntry = - FM.getFileRef(Filename, OpenFile); + llvm::Expected ShouldBeEntry = FM.getFileRef( + Filename, OpenFile, /*CacheFailure=*/true, /*IsText=*/false); return llvm::expectedToOptional(std::move(ShouldBeEntry)); } @@ -1107,8 +1107,8 @@ Preprocessor::LookupEmbedFile(StringRef Filename, bool isAngled, bool OpenFile, StringRef FullFileDir = LookupFromFile->tryGetRealPathName(); if (!FullFileDir.empty()) { SeparateComponents(LookupPath, FullFileDir, Filename, true); - llvm::Expected ShouldBeEntry = - FM.getFileRef(LookupPath, OpenFile); + llvm::Expected ShouldBeEntry = FM.getFileRef( + LookupPath, OpenFile, /*CacheFailure=*/true, /*IsText=*/false); if (ShouldBeEntry) return llvm::expectedToOptional(std::move(ShouldBeEntry)); llvm::consumeError(ShouldBeEntry.takeError()); @@ -1123,8 +1123,8 @@ Preprocessor::LookupEmbedFile(StringRef Filename, bool isAngled, bool OpenFile, StringRef WorkingDir = WorkingDirEntry.getName(); if (!WorkingDir.empty()) { SeparateComponents(LookupPath, WorkingDir, Filename, false); - llvm::Expected ShouldBeEntry = - FM.getFileRef(LookupPath, OpenFile); + llvm::Expected ShouldBeEntry = FM.getFileRef( + LookupPath, OpenFile, /*CacheFailure=*/true, /*IsText=*/false); if (ShouldBeEntry) return llvm::expectedToOptional(std::move(ShouldBeEntry)); llvm::consumeError(ShouldBeEntry.takeError()); @@ -1135,8 +1135,8 @@ Preprocessor::LookupEmbedFile(StringRef Filename, bool isAngled, bool OpenFile, for (const auto &Entry : PPOpts->EmbedEntries) { LookupPath.clear(); SeparateComponents(LookupPath, Entry, Filename, false); - llvm::Expected ShouldBeEntry = - FM.getFileRef(LookupPath, OpenFile); + llvm::Expected ShouldBeEntry = FM.getFileRef( + LookupPath, OpenFile, /*CacheFailure=*/true, /*IsText=*/false); if (ShouldBeEntry) return llvm::expectedToOptional(std::move(ShouldBeEntry)); llvm::consumeError(ShouldBeEntry.takeError()); diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 5c4f8d0e9c46c..954bdf207d172 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -5333,7 +5333,9 @@ std::string ASTReader::getOriginalSourceFile( const PCHContainerReader &PCHContainerRdr, DiagnosticsEngine &Diags) { // Open the AST file. auto Buffer = FileMgr.getBufferForFile(ASTFileName, /*IsVolatile=*/false, - /*RequiresNullTerminator=*/false); + /*RequiresNullTerminator=*/false, + /*MaybeLimit=*/std::nullopt, + /*IsText=*/false); if (!Buffer) { Diags.Report(diag::err_fe_unable_to_read_pch_file) << ASTFileName << Buffer.getError().message(); diff --git a/clang/test/Preprocessor/embed_zos.c b/clang/test/Preprocessor/embed_zos.c new file mode 100644 index 0000000000000..564a65f42afcd --- /dev/null +++ b/clang/test/Preprocessor/embed_zos.c @@ -0,0 +1,109 @@ +// RUN: rm -rf %t +// RUN: mkdir -p %t/media && cp %S/Inputs/media/art.txt %t/media/ +// RUN: chtag -r %t/media/art.txt +// RUN: %clang_cc1 -std=c23 %s -fsyntax-only --embed-dir=%t -verify +// expected-no-diagnostics + +// REQUIRES: shell, system-zos + +const char data[] = { +#embed +}; +const char data2[] = { +#embed +, 0 +}; +const char data3[] = { +#embed suffix(, 0) +}; +const char data4[] = { +#embed suffix(,) +0 +}; +static_assert(sizeof(data) == 274); +static_assert(' ' == data[0]); +static_assert('_' == data[11]); +static_assert('\n' == data[273]); +static_assert(sizeof(data2) == 275); +static_assert(' ' == data2[0]); +static_assert('_' == data2[11]); +static_assert('\n' == data2[273]); +static_assert('\0' == data2[274]); +static_assert(sizeof(data3) == 275); +static_assert(' ' == data3[0]); +static_assert('_' == data3[11]); +static_assert('\n' == data3[273]); +static_assert('\0' == data3[274]); +static_assert(sizeof(data4) == 275); +static_assert(' ' == data4[0]); +static_assert('_' == data4[11]); +static_assert('\n' == data4[273]); +static_assert('\0' == data4[274]); + +const signed char data5[] = { +#embed +}; +const signed char data6[] = { +#embed +, 0 +}; +const signed char data7[] = { +#embed suffix(, 0) +}; +const signed char data8[] = { +#embed suffix(,) +0 +}; +static_assert(sizeof(data5) == 274); +static_assert(' ' == data5[0]); +static_assert('_' == data5[11]); +static_assert('\n' == data5[273]); +static_assert(sizeof(data6) == 275); +static_assert(' ' == data6[0]); +static_assert('_' == data6[11]); +static_assert('\n' == data6[273]); +static_assert('\0' == data6[274]); +static_assert(sizeof(data7) == 275); +static_assert(' ' == data7[0]); +static_assert('_' == data7[11]); +static_assert('\n' == data7[273]); +static_assert('\0' == data7[274]); +static_assert(sizeof(data8) == 275); +static_assert(' ' == data8[0]); +static_assert('_' == data8[11]); +static_assert('\n' == data8[273]); +static_assert('\0' == data8[274]); + +const unsigned char data9[] = { +#embed +}; +const unsigned char data10[] = { +0, +#embed +}; +const unsigned char data11[] = { +#embed prefix(0,) +}; +const unsigned char data12[] = { +0 +#embed prefix(,) +}; +static_assert(sizeof(data9) == 274); +static_assert(' ' == data9[0]); +static_assert('_' == data9[11]); +static_assert('\n' == data9[273]); +static_assert(sizeof(data10) == 275); +static_assert(' ' == data10[1]); +static_assert('_' == data10[12]); +static_assert('\n' == data10[274]); +static_assert('\0' == data10[0]); +static_assert(sizeof(data11) == 275); +static_assert(' ' == data11[1]); +static_assert('_' == data11[12]); +static_assert('\n' == data11[274]); +static_assert('\0' == data11[0]); +static_assert(sizeof(data12) == 275); +static_assert(' ' == data12[1]); +static_assert('_' == data12[12]); +static_assert('\n' == data12[274]); +static_assert('\0' == data12[0]); diff --git a/llvm/include/llvm/Support/VirtualFileSystem.h b/llvm/include/llvm/Support/VirtualFileSystem.h index 2531c075f262d..1358e880942a1 100644 --- a/llvm/include/llvm/Support/VirtualFileSystem.h +++ b/llvm/include/llvm/Support/VirtualFileSystem.h @@ -271,15 +271,28 @@ class FileSystem : public llvm::ThreadSafeRefCountedBase, /// Get the status of the entry at \p Path, if one exists. virtual llvm::ErrorOr status(const Twine &Path) = 0; - /// Get a \p File object for the file at \p Path, if one exists. + /// Get a \p File object for the text file at \p Path, if one exists. virtual llvm::ErrorOr> openFileForRead(const Twine &Path) = 0; + /// Get a \p File object for the binary file at \p Path, if one exists. + /// Some non-ascii based file systems perform encoding conversions + /// when reading as a text file, and this function should be used if + /// a file's bytes should be read as-is. On most filesystems, this + /// is the same behaviour as openFileForRead. + virtual llvm::ErrorOr> + openFileForReadBinary(const Twine &Path) { + return openFileForRead(Path); + } + /// This is a convenience method that opens a file, gets its content and then /// closes the file. + /// The IsText parameter is used to distinguish whether the file should be + /// opened as a binary or text file. llvm::ErrorOr> getBufferForFile(const Twine &Name, int64_t FileSize = -1, - bool RequiresNullTerminator = true, bool IsVolatile = false); + bool RequiresNullTerminator = true, bool IsVolatile = false, + bool IsText = true); /// Get a directory_iterator for \p Dir. /// \note The 'end' iterator is directory_iterator(). diff --git a/llvm/lib/Support/VirtualFileSystem.cpp b/llvm/lib/Support/VirtualFileSystem.cpp index 928c0b5a24ed6..4feb41554fc3c 100644 --- a/llvm/lib/Support/VirtualFileSystem.cpp +++ b/llvm/lib/Support/VirtualFileSystem.cpp @@ -117,8 +117,9 @@ FileSystem::~FileSystem() = default; ErrorOr> FileSystem::getBufferForFile(const llvm::Twine &Name, int64_t FileSize, - bool RequiresNullTerminator, bool IsVolatile) { - auto F = openFileForRead(Name); + bool RequiresNullTerminator, bool IsVolatile, + bool IsText) { + auto F = IsText ? openFileForRead(Name) : openFileForReadBinary(Name); if (!F) return F.getError(); @@ -279,6 +280,8 @@ class RealFileSystem : public FileSystem { ErrorOr status(const Twine &Path) override; ErrorOr> openFileForRead(const Twine &Path) override; + ErrorOr> + openFileForReadBinary(const Twine &Path) override; directory_iterator dir_begin(const Twine &Dir, std::error_code &EC) override; llvm::ErrorOr getCurrentWorkingDirectory() const override; @@ -302,6 +305,17 @@ class RealFileSystem : public FileSystem { return Storage; } + ErrorOr> + openFileForReadWithFlags(const Twine &Name, sys::fs::OpenFlags Flags) { + SmallString<256> RealName, Storage; + Expected FDOrErr = sys::fs::openNativeFileForRead( + adjustPath(Name, Storage), Flags, &RealName); + if (!FDOrErr) + return errorToErrorCode(FDOrErr.takeError()); + return std::unique_ptr( + new RealFile(*FDOrErr, Name.str(), RealName.str())); + } + struct WorkingDirectory { // The current working directory, without symlinks resolved. (echo $PWD). SmallString<128> Specified; @@ -324,13 +338,12 @@ ErrorOr RealFileSystem::status(const Twine &Path) { ErrorOr> RealFileSystem::openFileForRead(const Twine &Name) { - SmallString<256> RealName, Storage; - Expected FDOrErr = sys::fs::openNativeFileForRead( - adjustPath(Name, Storage), sys::fs::OF_None, &RealName); - if (!FDOrErr) - return errorToErrorCode(FDOrErr.takeError()); - return std::unique_ptr( - new RealFile(*FDOrErr, Name.str(), RealName.str())); + return openFileForReadWithFlags(Name, sys::fs::OF_Text); +} + +ErrorOr> +RealFileSystem::openFileForReadBinary(const Twine &Name) { + return openFileForReadWithFlags(Name, sys::fs::OF_None); } llvm::ErrorOr RealFileSystem::getCurrentWorkingDirectory() const {