From 95ccd3caa43399413a0e84956aeded646757ec40 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 20 Jun 2024 23:46:24 -0700 Subject: [PATCH 1/2] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20ch?= =?UTF-8?q?anges=20to=20main=20this=20commit=20is=20based=20on?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Created using spr 1.3.5-bogner [skip ci] --- clang/lib/AST/ODRHash.cpp | 2 +- clang/lib/Serialization/ASTReader.cpp | 10 ++++++---- clang/lib/Serialization/ASTWriter.cpp | 7 +++++-- llvm/include/llvm/ADT/FoldingSet.h | 23 +++++++++++++++++++---- 4 files changed, 31 insertions(+), 11 deletions(-) diff --git a/clang/lib/AST/ODRHash.cpp b/clang/lib/AST/ODRHash.cpp index 1249531eab09f..fbfe92318dc5e 100644 --- a/clang/lib/AST/ODRHash.cpp +++ b/clang/lib/AST/ODRHash.cpp @@ -251,7 +251,7 @@ unsigned ODRHash::CalculateHash() { assert(I == Bools.rend()); Bools.clear(); - return ID.ComputeHash(); + return ID.computeStableHash(); } namespace { diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 552a3af546e75..c0b7db72fb603 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -1201,7 +1201,7 @@ unsigned DeclarationNameKey::getHash() const { break; } - return ID.ComputeHash(); + return ID.computeStableHash(); } ModuleFile * @@ -2033,7 +2033,10 @@ const FileEntry *HeaderFileInfoTrait::getFile(const internal_key_type &Key) { } unsigned HeaderFileInfoTrait::ComputeHash(internal_key_ref ikey) { - return llvm::hash_combine(ikey.Size, ikey.ModTime); + uint8_t buf[sizeof(ikey.Size) + sizeof(ikey.ModTime)]; + memcpy(buf, &ikey.Size, sizeof(ikey.Size)); + memcpy(buf + sizeof(ikey.Size), &ikey.ModTime, sizeof(ikey.ModTime)); + return llvm::xxh3_64bits(buf); } HeaderFileInfoTrait::internal_key_type @@ -2640,8 +2643,7 @@ InputFile ASTReader::getInputFile(ModuleFile &F, unsigned ID, bool Complain) { return OriginalChange; } - // FIXME: hash_value is not guaranteed to be stable! - auto ContentHash = hash_value(MemBuffOrError.get()->getBuffer()); + auto ContentHash = xxh3_64bits(MemBuffOrError.get()->getBuffer()); if (StoredContentHash == static_cast(ContentHash)) return Change{Change::None}; diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 0297e20e9116f..e00bacfd940b9 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -1782,7 +1782,7 @@ void ASTWriter::WriteInputFiles(SourceManager &SourceMgr, .ValidateASTInputFilesContent) { auto MemBuff = Cache->getBufferIfLoaded(); if (MemBuff) - ContentHash = hash_value(MemBuff->getBuffer()); + ContentHash = xxh3_64bits(MemBuff->getBuffer()); else PP->Diag(SourceLocation(), diag::err_module_unable_to_hash_content) << Entry.File.getName(); @@ -1987,7 +1987,10 @@ namespace { // The hash is based only on size/time of the file, so that the reader can // match even when symlinking or excess path elements ("foo/../", "../") // change the form of the name. However, complete path is still the key. - return llvm::hash_combine(key.Size, key.ModTime); + uint8_t buf[sizeof(key.Size) + sizeof(key.ModTime)]; + memcpy(buf, &key.Size, sizeof(key.Size)); + memcpy(buf + sizeof(key.Size), &key.ModTime, sizeof(key.ModTime)); + return llvm::xxh3_64bits(buf); } std::pair diff --git a/llvm/include/llvm/ADT/FoldingSet.h b/llvm/include/llvm/ADT/FoldingSet.h index f82eabd5044b2..3c2eaade57e47 100644 --- a/llvm/include/llvm/ADT/FoldingSet.h +++ b/llvm/include/llvm/ADT/FoldingSet.h @@ -21,6 +21,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/iterator.h" #include "llvm/Support/Allocator.h" +#include "llvm/Support/xxhash.h" #include #include #include @@ -294,12 +295,19 @@ class FoldingSetNodeIDRef { FoldingSetNodeIDRef() = default; FoldingSetNodeIDRef(const unsigned *D, size_t S) : Data(D), Size(S) {} - /// ComputeHash - Compute a strong hash value for this FoldingSetNodeIDRef, - /// used to lookup the node in the FoldingSetBase. + // Compute a strong hash value used to lookup the node in the FoldingSetBase. + // The hash value is not guaranteed to be deterministic across processes. unsigned ComputeHash() const { return static_cast(hash_combine_range(Data, Data + Size)); } + // Compute a deterministic hash value across processes that is suitable for + // on-disk serialization. + unsigned computeStableHash() const { + return static_cast(xxh3_64bits(ArrayRef( + reinterpret_cast(Data), sizeof(unsigned) * Size))); + } + bool operator==(FoldingSetNodeIDRef) const; bool operator!=(FoldingSetNodeIDRef RHS) const { return !(*this == RHS); } @@ -366,12 +374,19 @@ class FoldingSetNodeID { /// object to be used to compute a new profile. inline void clear() { Bits.clear(); } - /// ComputeHash - Compute a strong hash value for this FoldingSetNodeID, used - /// to lookup the node in the FoldingSetBase. + // Compute a strong hash value for this FoldingSetNodeID, used to lookup the + // node in the FoldingSetBase. The hash value is not guaranteed to be + // deterministic across processes. unsigned ComputeHash() const { return FoldingSetNodeIDRef(Bits.data(), Bits.size()).ComputeHash(); } + // Compute a deterministic hash value across processes that is suitable for + // on-disk serialization. + unsigned computeStableHash() const { + return FoldingSetNodeIDRef(Bits.data(), Bits.size()).computeStableHash(); + } + /// operator== - Used to compare two nodes to each other. bool operator==(const FoldingSetNodeID &RHS) const; bool operator==(const FoldingSetNodeIDRef RHS) const; From a341e03cb6376d50a4fa219933d3f161e41a567a Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 27 Jun 2024 14:44:02 -0700 Subject: [PATCH 2/2] move seed inside #if Created using spr 1.3.5-bogner --- llvm/include/llvm/ADT/Hashing.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/include/llvm/ADT/Hashing.h b/llvm/include/llvm/ADT/Hashing.h index 397109880bb02..177fb0318bf80 100644 --- a/llvm/include/llvm/ADT/Hashing.h +++ b/llvm/include/llvm/ADT/Hashing.h @@ -311,11 +311,11 @@ struct hash_state { /// hash values. On platforms without ASLR, this is still likely /// non-deterministic per build. inline uint64_t get_execution_seed() { - [[maybe_unused]] static const char seed = 0; // Work around x86-64 negative offset folding for old Clang -fno-pic // https://reviews.llvm.org/D93931 #if LLVM_ENABLE_ABI_BREAKING_CHECKS && \ (!defined(__clang__) || __clang_major__ > 11) + static const char seed = 0; return static_cast(reinterpret_cast(&seed)); #else return 0xff51afd7ed558ccdULL;