From a4c41277e55bba6f676c5d4aa20df7c6b1d43cdb Mon Sep 17 00:00:00 2001 From: Alex Richardson Date: Fri, 9 May 2025 22:36:24 -0700 Subject: [PATCH 1/4] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20ch?= =?UTF-8?q?anges=20to=20main=20this=20commit=20is=20based=20on?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Created using spr 1.3.6-beta.1 [skip ci] --- llvm/docs/LangRef.rst | 43 ++++++++++++++++----- llvm/include/llvm/IR/DataLayout.h | 62 +++++++++++++++++++++++++++---- 2 files changed, 87 insertions(+), 18 deletions(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 7296bb84b7d95..f971c5a32c61f 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -3147,14 +3147,21 @@ as follows: ``A
`` Specifies the address space of objects created by '``alloca``'. Defaults to the default address space of 0. -``p[n]::[:][:]`` - This specifies the *size* of a pointer and its ```` and - ````\erred alignments for address space ``n``. - The fourth parameter ```` is the size of the - index that used for address calculation, which must be less than or equal - to the pointer size. If not - specified, the default index size is equal to the pointer size. All sizes - are in bits. The address space, ``n``, is optional, and if not specified, +``p[n]::[:[:[:]]]`` + This specifies the properties of a pointer in address space ``n``. + The ```` parameter specifies the size of the bitwise representation. + For :ref:`non-integral pointers ` the representation size may + be larger than the address width of the underlying address space (e.g. to + accommodate additional metadata). + The alignment requirements are specified via the ```` and + ````\erred alignments parameters. + The fourth parameter ```` is the size of the index that used for + address calculations such as :ref:`getelementptr `. + It must be less than or equal to the pointer size. If not specified, the + default index size is equal to the pointer size. + The index size also specifies the width of addresses in this address space. + All sizes are in bits. + The address space, ``n``, is optional, and if not specified, denotes the default address space 0. The value of ``n`` must be in the range [1,2^24). ``i:[:]`` @@ -4266,6 +4273,16 @@ address spaces defined in the :ref:`datalayout string`. the default globals address space and ``addrspace("P")`` the program address space. +The representation of pointers can be different for each address space and does +not necessarily need to be a plain integer address (e.g. for +:ref:`non-integral pointers `). In addition to a representation +bits size, pointers in each address space also have an index size which defines +the bitwidth of indexing operations as well as the size of `integer addresses` +in this address space. For example, CHERI capabilities are twice the size of the +underlying addresses to accommodate for additional metadata such as bounds and +permissions: on a 32-bit system the bitwidth of the pointer representation size +is 64, but the underlying address width remains 32 bits. + The default address space is number zero. The semantics of non-zero address spaces are target-specific. Memory @@ -12396,12 +12413,15 @@ Semantics: """""""""" The '``ptrtoint``' instruction converts ``value`` to integer type -``ty2`` by interpreting the pointer value as an integer and either -truncating or zero extending that value to the size of the integer type. +``ty2`` by interpreting the all pointer representation bits as an integer +(equivalent to a ``bitcast``) and either truncating or zero extending that value +to the size of the integer type. If ``value`` is smaller than ``ty2`` then a zero extension is done. If ``value`` is larger than ``ty2`` then a truncation is done. If they are the same size, then nothing is done (*no-op cast*) other than a type change. +The ``ptrtoint`` always :ref:`captures address and provenance ` +of the pointer argument. Example: """""""" @@ -12456,6 +12476,9 @@ of the integer ``value``. If ``value`` is larger than the size of a pointer then a truncation is done. If ``value`` is smaller than the size of a pointer then a zero extension is done. If they are the same size, nothing is done (*no-op cast*). +The behavior is equivalent to a ``bitcast``, however, the resulting value is not +guaranteed to be dereferenceable (e.g. if the result type is a +:ref:`non-integral pointers `). Example: """""""" diff --git a/llvm/include/llvm/IR/DataLayout.h b/llvm/include/llvm/IR/DataLayout.h index 2ad080e6d0cd2..09ba6b54cf721 100644 --- a/llvm/include/llvm/IR/DataLayout.h +++ b/llvm/include/llvm/IR/DataLayout.h @@ -324,16 +324,38 @@ class DataLayout { /// the backends/clients are updated. Align getPointerPrefAlignment(unsigned AS = 0) const; - /// Layout pointer size in bytes, rounded up to a whole - /// number of bytes. + /// The pointer representation size in bytes, rounded up to a whole number of + /// bytes. The difference between this function and getPointerAddressSize() is + /// this one returns the size of the entire pointer type (this includes + /// metadata bits for fat pointers) and the latter only returns the number of + /// address bits. + /// \sa DataLayout::getPointerAddressSizeInBits /// FIXME: The defaults need to be removed once all of /// the backends/clients are updated. unsigned getPointerSize(unsigned AS = 0) const; - // Index size in bytes used for address calculation, - /// rounded up to a whole number of bytes. + /// The index size in bytes used for address calculation, rounded up to a + /// whole number of bytes. This not only defines the size used in + /// getelementptr operations, but also the size of addresses in this \p AS. + /// For example, a 64-bit CHERI-enabled target has 128-bit pointers of which + /// only 64 are used to represent the address and the remaining ones are used + /// for metadata such as bounds and access permissions. In this case + /// getPointerSize() returns 16, but getIndexSize() returns 8. + /// To help with code understanding, the alias getPointerAddressSize() can be + /// used instead of getIndexSize() to clarify that an address width is needed. unsigned getIndexSize(unsigned AS) const; + /// The integral size of a pointer in a given address space in bytes, which + /// is defined to be the same as getIndexSize(). This exists as a separate + /// function to make it clearer when reading code that the size of an address + /// is being requested. While targets exist where index size and the + /// underlying address width are not identical (e.g. AMDGPU fat pointers with + /// 48-bit addresses and 32-bit offsets indexing), there is currently no need + /// to differentiate these properties in LLVM. + /// \sa DataLayout::getIndexSize + /// \sa DataLayout::getPointerAddressSizeInBits + unsigned getPointerAddressSize(unsigned AS) const { return getIndexSize(AS); } + /// Return the address spaces containing non-integral pointers. Pointers in /// this address space don't have a well-defined bitwise representation. SmallVector getNonIntegralAddressSpaces() const { @@ -358,29 +380,53 @@ class DataLayout { return PTy && isNonIntegralPointerType(PTy); } - /// Layout pointer size, in bits + /// The size in bits of the pointer representation in a given address space. + /// This is not necessarily the same as the integer address of a pointer (e.g. + /// for fat pointers). + /// \sa DataLayout::getPointerAddressSizeInBits() /// FIXME: The defaults need to be removed once all of /// the backends/clients are updated. unsigned getPointerSizeInBits(unsigned AS = 0) const { return getPointerSpec(AS).BitWidth; } - /// Size in bits of index used for address calculation in getelementptr. + /// The size in bits of indices used for address calculation in getelementptr + /// and for addresses in the given AS. See getIndexSize() for more + /// information. + /// \sa DataLayout::getPointerAddressSizeInBits() unsigned getIndexSizeInBits(unsigned AS) const { return getPointerSpec(AS).IndexBitWidth; } - /// Layout pointer size, in bits, based on the type. If this function is + /// The size in bits of an address in for the given AS. This is defined to + /// return the same value as getIndexSizeInBits() since there is currently no + /// target that requires these two properties to have different values. See + /// getIndexSize() for more information. + /// \sa DataLayout::getIndexSizeInBits() + unsigned getPointerAddressSizeInBits(unsigned AS) const { + return getIndexSizeInBits(AS); + } + + /// The pointer representation size in bits for this type. If this function is /// called with a pointer type, then the type size of the pointer is returned. /// If this function is called with a vector of pointers, then the type size /// of the pointer is returned. This should only be called with a pointer or /// vector of pointers. unsigned getPointerTypeSizeInBits(Type *) const; - /// Layout size of the index used in GEP calculation. + /// The size in bits of the index used in GEP calculation for this type. /// The function should be called with pointer or vector of pointers type. + /// This is defined to return the same value as getPointerAddressSizeInBits(), + /// but separate functions exist for code clarity. unsigned getIndexTypeSizeInBits(Type *Ty) const; + /// The size in bits of an address for this type. + /// This is defined to return the same value as getIndexTypeSizeInBits(), + /// but separate functions exist for code clarity. + unsigned getPointerAddressSizeInBits(Type *Ty) const { + return getIndexTypeSizeInBits(Ty); + } + unsigned getPointerTypeSize(Type *Ty) const { return getPointerTypeSizeInBits(Ty) / 8; } From 25dc175562349410f161ef0e80246301d9a7ba79 Mon Sep 17 00:00:00 2001 From: Alex Richardson Date: Fri, 9 May 2025 22:43:37 -0700 Subject: [PATCH 2/4] fix docs build Created using spr 1.3.6-beta.1 --- llvm/docs/LangRef.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 2d18d0d97aaee..38be6918ff73c 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -12435,7 +12435,7 @@ Example: .. _i_ptrtoaddr: '``ptrtoaddr .. to``' Instruction -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Syntax: """"""" From 1b4698885669f46a445a99c49ae5e529597399c5 Mon Sep 17 00:00:00 2001 From: Alex Richardson Date: Fri, 9 May 2025 22:46:20 -0700 Subject: [PATCH 3/4] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20ch?= =?UTF-8?q?anges=20introduced=20through=20rebase?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Created using spr 1.3.6-beta.1 [skip ci] --- llvm/docs/LangRef.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index f971c5a32c61f..e12651a9b3989 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -3147,7 +3147,7 @@ as follows: ``A
`` Specifies the address space of objects created by '``alloca``'. Defaults to the default address space of 0. -``p[n]::[:[:[:]]]`` +``p[n]::[:[:]]`` This specifies the properties of a pointer in address space ``n``. The ```` parameter specifies the size of the bitwise representation. For :ref:`non-integral pointers ` the representation size may From da85813d2a7dceada3a0fa9ad6d0fe88a6fac69e Mon Sep 17 00:00:00 2001 From: Alex Richardson Date: Mon, 12 May 2025 13:09:39 -0700 Subject: [PATCH 4/4] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20ch?= =?UTF-8?q?anges=20introduced=20through=20rebase?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Created using spr 1.3.6-beta.1 [skip ci] --- llvm/docs/LangRef.rst | 43 ++++++++++++++----- llvm/include/llvm/IR/DataLayout.h | 71 +++++++++++++++++++++++++++---- 2 files changed, 95 insertions(+), 19 deletions(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 5f14726c36672..405369c7c0a16 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -3147,14 +3147,21 @@ as follows: ``A
`` Specifies the address space of objects created by '``alloca``'. Defaults to the default address space of 0. -``p[n]::[:][:]`` - This specifies the *size* of a pointer and its ```` and - ````\erred alignments for address space ``n``. - The fourth parameter ```` is the size of the - index that used for address calculation, which must be less than or equal - to the pointer size. If not - specified, the default index size is equal to the pointer size. All sizes - are in bits. The address space, ``n``, is optional, and if not specified, +``p[n]::[:[:]]`` + This specifies the properties of a pointer in address space ``n``. + The ```` parameter specifies the size of the bitwise representation. + For :ref:`non-integral pointers ` the representation size may + be larger than the address width of the underlying address space (e.g. to + accommodate additional metadata). + The alignment requirements are specified via the ```` and + ````\erred alignments parameters. + The fourth parameter ```` is the size of the index that used for + address calculations such as :ref:`getelementptr `. + It must be less than or equal to the pointer size. If not specified, the + default index size is equal to the pointer size. + The index size also specifies the width of addresses in this address space. + All sizes are in bits. + The address space, ``n``, is optional, and if not specified, denotes the default address space 0. The value of ``n`` must be in the range [1,2^24). ``i:[:]`` @@ -4266,6 +4273,16 @@ address spaces defined in the :ref:`datalayout string`. the default globals address space and ``addrspace("P")`` the program address space. +The representation of pointers can be different for each address space and does +not necessarily need to be a plain integer address (e.g. for +:ref:`non-integral pointers `). In addition to a representation +bits size, pointers in each address space also have an index size which defines +the bitwidth of indexing operations as well as the size of `integer addresses` +in this address space. For example, CHERI capabilities are twice the size of the +underlying addresses to accommodate for additional metadata such as bounds and +permissions: on a 32-bit system the bitwidth of the pointer representation size +is 64, but the underlying address width remains 32 bits. + The default address space is number zero. The semantics of non-zero address spaces are target-specific. Memory @@ -12408,12 +12425,15 @@ Semantics: """""""""" The '``ptrtoint``' instruction converts ``value`` to integer type -``ty2`` by interpreting the pointer value as an integer and either -truncating or zero extending that value to the size of the integer type. +``ty2`` by interpreting the all pointer representation bits as an integer +(equivalent to a ``bitcast``) and either truncating or zero extending that value +to the size of the integer type. If ``value`` is smaller than ``ty2`` then a zero extension is done. If ``value`` is larger than ``ty2`` then a truncation is done. If they are the same size, then nothing is done (*no-op cast*) other than a type change. +The ``ptrtoint`` always :ref:`captures address and provenance ` +of the pointer argument. Example: """""""" @@ -12468,6 +12488,9 @@ of the integer ``value``. If ``value`` is larger than the size of a pointer then a truncation is done. If ``value`` is smaller than the size of a pointer then a zero extension is done. If they are the same size, nothing is done (*no-op cast*). +The behavior is equivalent to a ``bitcast``, however, the resulting value is not +guaranteed to be dereferenceable (e.g. if the result type is a +:ref:`non-integral pointers `). Example: """""""" diff --git a/llvm/include/llvm/IR/DataLayout.h b/llvm/include/llvm/IR/DataLayout.h index 2ad080e6d0cd2..ec54bab9ae3f6 100644 --- a/llvm/include/llvm/IR/DataLayout.h +++ b/llvm/include/llvm/IR/DataLayout.h @@ -92,6 +92,7 @@ class DataLayout { /// The function pointer alignment is a multiple of the function alignment. MultipleOfFunctionAlign, }; + private: bool BigEndian = false; @@ -324,16 +325,38 @@ class DataLayout { /// the backends/clients are updated. Align getPointerPrefAlignment(unsigned AS = 0) const; - /// Layout pointer size in bytes, rounded up to a whole - /// number of bytes. + /// The pointer representation size in bytes, rounded up to a whole number of + /// bytes. The difference between this function and getAddressSize() is that + /// this one returns the size of the entire pointer representation (including + /// metadata bits for fat pointers) and the latter only returns the number of + /// address bits. + /// \sa DataLayout::getAddressSizeInBits /// FIXME: The defaults need to be removed once all of /// the backends/clients are updated. unsigned getPointerSize(unsigned AS = 0) const; - // Index size in bytes used for address calculation, - /// rounded up to a whole number of bytes. + /// The index size in bytes used for address calculation, rounded up to a + /// whole number of bytes. This not only defines the size used in + /// getelementptr operations, but also the size of addresses in this \p AS. + /// For example, a 64-bit CHERI-enabled target has 128-bit pointers of which + /// only 64 are used to represent the address and the remaining ones are used + /// for metadata such as bounds and access permissions. In this case + /// getPointerSize() returns 16, but getIndexSize() returns 8. + /// To help with code understanding, the alias getAddressSize() can be used + /// instead of getIndexSize() to clarify that an address width is needed. unsigned getIndexSize(unsigned AS) const; + /// The integral size of a pointer in a given address space in bytes, which + /// is defined to be the same as getIndexSize(). This exists as a separate + /// function to make it clearer when reading code that the size of an address + /// is being requested. While targets exist where index size and the + /// underlying address width are not identical (e.g. AMDGPU fat pointers with + /// 48-bit addresses and 32-bit offsets indexing), there is currently no need + /// to differentiate these properties in LLVM. + /// \sa DataLayout::getIndexSize + /// \sa DataLayout::getAddressSizeInBits + unsigned getAddressSize(unsigned AS) const { return getIndexSize(AS); } + /// Return the address spaces containing non-integral pointers. Pointers in /// this address space don't have a well-defined bitwise representation. SmallVector getNonIntegralAddressSpaces() const { @@ -358,29 +381,53 @@ class DataLayout { return PTy && isNonIntegralPointerType(PTy); } - /// Layout pointer size, in bits + /// The size in bits of the pointer representation in a given address space. + /// This is not necessarily the same as the integer address of a pointer (e.g. + /// for fat pointers). + /// \sa DataLayout::getAddressSizeInBits() /// FIXME: The defaults need to be removed once all of /// the backends/clients are updated. unsigned getPointerSizeInBits(unsigned AS = 0) const { return getPointerSpec(AS).BitWidth; } - /// Size in bits of index used for address calculation in getelementptr. + /// The size in bits of indices used for address calculation in getelementptr + /// and for addresses in the given AS. See getIndexSize() for more + /// information. + /// \sa DataLayout::getAddressSizeInBits() unsigned getIndexSizeInBits(unsigned AS) const { return getPointerSpec(AS).IndexBitWidth; } - /// Layout pointer size, in bits, based on the type. If this function is + /// The size in bits of an address in for the given AS. This is defined to + /// return the same value as getIndexSizeInBits() since there is currently no + /// target that requires these two properties to have different values. See + /// getIndexSize() for more information. + /// \sa DataLayout::getIndexSizeInBits() + unsigned getAddressSizeInBits(unsigned AS) const { + return getIndexSizeInBits(AS); + } + + /// The pointer representation size in bits for this type. If this function is /// called with a pointer type, then the type size of the pointer is returned. /// If this function is called with a vector of pointers, then the type size /// of the pointer is returned. This should only be called with a pointer or /// vector of pointers. unsigned getPointerTypeSizeInBits(Type *) const; - /// Layout size of the index used in GEP calculation. + /// The size in bits of the index used in GEP calculation for this type. /// The function should be called with pointer or vector of pointers type. + /// This is defined to return the same value as getAddressSizeInBits(), + /// but separate functions exist for code clarity. unsigned getIndexTypeSizeInBits(Type *Ty) const; + /// The size in bits of an address for this type. + /// This is defined to return the same value as getIndexTypeSizeInBits(), + /// but separate functions exist for code clarity. + unsigned getAddressSizeInBits(Type *Ty) const { + return getIndexTypeSizeInBits(Ty); + } + unsigned getPointerTypeSize(Type *Ty) const { return getPointerTypeSizeInBits(Ty) / 8; } @@ -515,15 +562,21 @@ class DataLayout { /// are set. unsigned getLargestLegalIntTypeSizeInBits() const; - /// Returns the type of a GEP index in AddressSpace. + /// Returns the type of a GEP index in \p AddressSpace. /// If it was not specified explicitly, it will be the integer type of the /// pointer width - IntPtrType. IntegerType *getIndexType(LLVMContext &C, unsigned AddressSpace) const; + /// Returns the type of an address in \p AddressSpace + IntegerType *getAddressType(LLVMContext &C, unsigned AddressSpace) const { + return getIndexType(C, AddressSpace); + } /// Returns the type of a GEP index. /// If it was not specified explicitly, it will be the integer type of the /// pointer width - IntPtrType. Type *getIndexType(Type *PtrTy) const; + /// Returns the type of an address in \p AddressSpace + Type *getAddressType(Type *PtrTy) const { return getIndexType(PtrTy); } /// Returns the offset from the beginning of the type for the specified /// indices.