diff --git a/ci/docker/riscv64gc-unknown-linux-gnu/Dockerfile b/ci/docker/riscv64gc-unknown-linux-gnu/Dockerfile index b9b3c682ea..67f5f4cdef 100644 --- a/ci/docker/riscv64gc-unknown-linux-gnu/Dockerfile +++ b/ci/docker/riscv64gc-unknown-linux-gnu/Dockerfile @@ -1,10 +1,9 @@ -FROM ubuntu:22.04 +FROM ubuntu:23.04 RUN apt-get update && apt-get install -y --no-install-recommends \ gcc libc6-dev qemu-user ca-certificates \ - gcc-riscv64-linux-gnu libc6-dev-riscv64-cross \ - qemu-user + gcc-riscv64-linux-gnu libc6-dev-riscv64-cross ENV CARGO_TARGET_RISCV64GC_UNKNOWN_LINUX_GNU_LINKER=riscv64-linux-gnu-gcc \ - CARGO_TARGET_RISCV64GC_UNKNOWN_LINUX_GNU_RUNNER="qemu-riscv64 -L /usr/riscv64-linux-gnu" \ + CARGO_TARGET_RISCV64GC_UNKNOWN_LINUX_GNU_RUNNER="qemu-riscv64 -L /usr/riscv64-linux-gnu -cpu rv64,zk=true" \ OBJDUMP=riscv64-linux-gnu-objdump diff --git a/ci/run.sh b/ci/run.sh index 0e33de89db..9923fa8e36 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -47,6 +47,7 @@ case ${TARGET} in # Some of our test dependencies use the deprecated `gcc` crates which # doesn't detect RISC-V compilers automatically, so do it manually here. riscv64*) + export RUSTFLAGS="${RUSTFLAGS} -Ctarget-feature=+zk" export TARGET_CC="riscv64-linux-gnu-gcc" ;; esac diff --git a/crates/core_arch/src/mod.rs b/crates/core_arch/src/mod.rs index 12a5b086c6..ad3ec863d4 100644 --- a/crates/core_arch/src/mod.rs +++ b/crates/core_arch/src/mod.rs @@ -64,8 +64,9 @@ pub mod arch { /// See the [module documentation](../index.html) for more details. #[cfg(any(target_arch = "riscv32", doc))] #[doc(cfg(any(target_arch = "riscv32")))] - #[unstable(feature = "stdsimd", issue = "27731")] + #[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] pub mod riscv32 { + pub use crate::core_arch::riscv32::*; pub use crate::core_arch::riscv_shared::*; } @@ -74,7 +75,7 @@ pub mod arch { /// See the [module documentation](../index.html) for more details. #[cfg(any(target_arch = "riscv64", doc))] #[doc(cfg(any(target_arch = "riscv64")))] - #[unstable(feature = "stdsimd", issue = "27731")] + #[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] pub mod riscv64 { pub use crate::core_arch::riscv64::*; // RISC-V RV64 supports all RV32 instructions as well in current specifications (2022-01-05). @@ -279,6 +280,10 @@ mod aarch64; #[doc(cfg(any(target_arch = "arm")))] mod arm; +#[cfg(any(target_arch = "riscv32", doc))] +#[doc(cfg(any(target_arch = "riscv32")))] +mod riscv32; + #[cfg(any(target_arch = "riscv64", doc))] #[doc(cfg(any(target_arch = "riscv64")))] mod riscv64; diff --git a/crates/core_arch/src/riscv32/mod.rs b/crates/core_arch/src/riscv32/mod.rs new file mode 100644 index 0000000000..0a8634c85e --- /dev/null +++ b/crates/core_arch/src/riscv32/mod.rs @@ -0,0 +1,5 @@ +//! RISC-V RV32 specific intrinsics + +mod zk; + +pub use zk::*; diff --git a/crates/core_arch/src/riscv32/zk.rs b/crates/core_arch/src/riscv32/zk.rs new file mode 100644 index 0000000000..3767577724 --- /dev/null +++ b/crates/core_arch/src/riscv32/zk.rs @@ -0,0 +1,367 @@ +#[cfg(test)] +use stdarch_test::assert_instr; + +extern "unadjusted" { + #[link_name = "llvm.riscv.aes32esi"] + fn _aes32esi(rs1: i32, rs2: i32, bs: i32) -> i32; + + #[link_name = "llvm.riscv.aes32esmi"] + fn _aes32esmi(rs1: i32, rs2: i32, bs: i32) -> i32; + + #[link_name = "llvm.riscv.aes32dsi"] + fn _aes32dsi(rs1: i32, rs2: i32, bs: i32) -> i32; + + #[link_name = "llvm.riscv.aes32dsmi"] + fn _aes32dsmi(rs1: i32, rs2: i32, bs: i32) -> i32; + + #[link_name = "llvm.riscv.zip.i32"] + fn _zip(rs1: i32) -> i32; + + #[link_name = "llvm.riscv.unzip.i32"] + fn _unzip(rs1: i32) -> i32; + + #[link_name = "llvm.riscv.sha512sig0h"] + fn _sha512sig0h(rs1: i32, rs2: i32) -> i32; + + #[link_name = "llvm.riscv.sha512sig0l"] + fn _sha512sig0l(rs1: i32, rs2: i32) -> i32; + + #[link_name = "llvm.riscv.sha512sig1h"] + fn _sha512sig1h(rs1: i32, rs2: i32) -> i32; + + #[link_name = "llvm.riscv.sha512sig1l"] + fn _sha512sig1l(rs1: i32, rs2: i32) -> i32; + + #[link_name = "llvm.riscv.sha512sum0r"] + fn _sha512sum0r(rs1: i32, rs2: i32) -> i32; + + #[link_name = "llvm.riscv.sha512sum1r"] + fn _sha512sum1r(rs1: i32, rs2: i32) -> i32; +} + +/// AES final round encryption instruction for RV32. +/// +/// This instruction sources a single byte from rs2 according to bs. To this it applies the +/// forward AES SBox operation, before XOR’ing the result with rs1. This instruction must +/// always be implemented such that its execution latency does not depend on the data being +/// operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.3 +/// +/// # Note +/// +/// The `BS` parameter is expected to be a constant value and only the bottom 2 bits of `bs` are +/// used. +/// +/// # Safety +/// +/// This function is safe to use if the `zkne` target feature is present. +#[target_feature(enable = "zkne")] +#[rustc_legacy_const_generics(2)] +// See #1464 +// #[cfg_attr(test, assert_instr(aes32esi, BS = 0))] +#[inline] +pub unsafe fn aes32esi(rs1: u32, rs2: u32) -> u32 { + static_assert!(BS < 4); + + _aes32esi(rs1 as i32, rs2 as i32, BS as i32) as u32 +} + +/// AES middle round encryption instruction for RV32 with. +/// +/// This instruction sources a single byte from rs2 according to bs. To this it applies the +/// forward AES SBox operation, and a partial forward MixColumn, before XOR’ing the result with +/// rs1. This instruction must always be implemented such that its execution latency does not +/// depend on the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.4 +/// +/// # Note +/// +/// The `bs` parameter is expected to be a constant value and only the bottom 2 bits of `bs` are +/// used. +/// +/// # Safety +/// +/// This function is safe to use if the `zkne` target feature is present. +#[target_feature(enable = "zkne")] +#[rustc_legacy_const_generics(2)] +// See #1464 +// #[cfg_attr(test, assert_instr(aes32esmi, BS = 0))] +#[inline] +pub unsafe fn aes32esmi(rs1: u32, rs2: u32) -> u32 { + static_assert!(BS < 4); + + _aes32esmi(rs1 as i32, rs2 as i32, BS as i32) as u32 +} + +/// AES final round decryption instruction for RV32. +/// +/// This instruction sources a single byte from rs2 according to bs. To this it applies the +/// inverse AES SBox operation, and XOR’s the result with rs1. This instruction must always be +/// implemented such that its execution latency does not depend on the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.1 +/// +/// # Note +/// +/// The `BS` parameter is expected to be a constant value and only the bottom 2 bits of `bs` are +/// used. +/// +/// # Safety +/// +/// This function is safe to use if the `zknd` target feature is present. +#[target_feature(enable = "zknd")] +#[rustc_legacy_const_generics(2)] +// See #1464 +// #[cfg_attr(test, assert_instr(aes32dsi, BS = 0))] +#[inline] +pub unsafe fn aes32dsi(rs1: u32, rs2: u32) -> u32 { + static_assert!(BS < 4); + + _aes32dsi(rs1 as i32, rs2 as i32, BS as i32) as u32 +} + +/// AES middle round decryption instruction for RV32. +/// +/// This instruction sources a single byte from rs2 according to bs. To this it applies the +/// inverse AES SBox operation, and a partial inverse MixColumn, before XOR’ing the result with +/// rs1. This instruction must always be implemented such that its execution latency does not +/// depend on the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.2 +/// +/// # Note +/// +/// The `BS` parameter is expected to be a constant value and only the bottom 2 bits of `bs` are +/// used. +/// +/// # Safety +/// +/// This function is safe to use if the `zknd` target feature is present. +#[target_feature(enable = "zknd")] +#[rustc_legacy_const_generics(2)] +// See #1464 +// #[cfg_attr(test, assert_instr(aes32dsmi, BS = 0))] +#[inline] +pub unsafe fn aes32dsmi(rs1: u32, rs2: u32) -> u32 { + static_assert!(BS < 4); + + _aes32dsmi(rs1 as i32, rs2 as i32, BS as i32) as u32 +} + +/// Place upper/lower halves of the source register into odd/even bits of the destination +/// respectivley. +/// +/// This instruction places bits in the low half of the source register into the even bit +/// positions of the destination, and bits in the high half of the source register into the odd +/// bit positions of the destination. It is the inverse of the unzip instruction. This +/// instruction is available only on RV32. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.49 +/// +/// # Safety +/// +/// This function is safe to use if the `zbkb` target feature is present. +#[target_feature(enable = "zbkb")] +// See #1464 +// #[cfg_attr(test, assert_instr(zip))] +#[inline] +pub unsafe fn zip(rs: u32) -> u32 { + _zip(rs as i32) as u32 +} + +/// Place odd and even bits of the source word into upper/lower halves of the destination. +/// +/// This instruction places the even bits of the source register into the low half of the +/// destination, and the odd bits of the source into the high bits of the destination. It is +/// the inverse of the zip instruction. This instruction is available only on RV32. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.45 +/// +/// # Safety +/// +/// This function is safe to use if the `zbkb` target feature is present. +#[target_feature(enable = "zbkb")] +#[cfg_attr(test, assert_instr(unzip))] +#[inline] +pub unsafe fn unzip(rs: u32) -> u32 { + _unzip(rs as i32) as u32 +} + +/// Implements the high half of the Sigma0 transformation, as used in the SHA2-512 hash +/// function \[49\] (Section 4.1.3). +/// +/// This instruction is implemented on RV32 only. Used to compute the Sigma0 transform of the +/// SHA2-512 hash function in conjunction with the sha512sig0l instruction. The transform is a +/// 64-bit to 64-bit function, so the input and output are each represented by two 32-bit +/// registers. This instruction must always be implemented such that its execution latency does +/// not depend on the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.31 +/// +/// # Safety +/// +/// This function is safe to use if the `zknh` target feature is present. +#[target_feature(enable = "zknh")] +// See #1464 +// #[cfg_attr(test, assert_instr(sha512sig0h))] +#[inline] +pub unsafe fn sha512sig0h(rs1: u32, rs2: u32) -> u32 { + _sha512sig0h(rs1 as i32, rs2 as i32) as u32 +} + +/// Implements the low half of the Sigma0 transformation, as used in the SHA2-512 hash function +/// \[49\] (Section 4.1.3). +/// +/// This instruction is implemented on RV32 only. Used to compute the Sigma0 transform of the +/// SHA2-512 hash function in conjunction with the sha512sig0h instruction. The transform is a +/// 64-bit to 64-bit function, so the input and output are each represented by two 32-bit +/// registers. This instruction must always be implemented such that its execution latency does +/// not depend on the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.32 +/// +/// # Safety +/// +/// This function is safe to use if the `zknh` target feature is present. +#[target_feature(enable = "zknh")] +// See #1464 +// #[cfg_attr(test, assert_instr(sha512sig0l))] +#[inline] +pub unsafe fn sha512sig0l(rs1: u32, rs2: u32) -> u32 { + _sha512sig0l(rs1 as i32, rs2 as i32) as u32 +} + +/// Implements the high half of the Sigma1 transformation, as used in the SHA2-512 hash +/// function \[49\] (Section 4.1.3). +/// +/// This instruction is implemented on RV32 only. Used to compute the Sigma1 transform of the +/// SHA2-512 hash function in conjunction with the sha512sig1l instruction. The transform is a +/// 64-bit to 64-bit function, so the input and output are each represented by two 32-bit +/// registers. This instruction must always be implemented such that its execution latency does +/// not depend on the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.33 +/// +/// # Safety +/// +/// This function is safe to use if the `zknh` target feature is present. +#[target_feature(enable = "zknh")] +// See #1464 +// #[cfg_attr(test, assert_instr(sha512sig1h))] +#[inline] +pub unsafe fn sha512sig1h(rs1: u32, rs2: u32) -> u32 { + _sha512sig1h(rs1 as i32, rs2 as i32) as u32 +} + +/// Implements the low half of the Sigma1 transformation, as used in the SHA2-512 hash function +/// \[49\] (Section 4.1.3). +/// +/// This instruction is implemented on RV32 only. Used to compute the Sigma1 transform of the +/// SHA2-512 hash function in conjunction with the sha512sig1h instruction. The transform is a +/// 64-bit to 64-bit function, so the input and output are each represented by two 32-bit +/// registers. This instruction must always be implemented such that its execution latency does +/// not depend on the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.34 +/// +/// # Safety +/// +/// This function is safe to use if the `zknh` target feature is present. +#[target_feature(enable = "zknh")] +#[cfg_attr(test, assert_instr(sha512sig1l))] +#[inline] +pub unsafe fn sha512sig1l(rs1: u32, rs2: u32) -> u32 { + _sha512sig1l(rs1 as i32, rs2 as i32) as u32 +} + +/// Implements the Sum0 transformation, as used in the SHA2-512 hash function \[49\] (Section +/// 4.1.3). +/// +/// This instruction is implemented on RV32 only. Used to compute the Sum0 transform of the +/// SHA2-512 hash function. The transform is a 64-bit to 64-bit function, so the input and +/// output is represented by two 32-bit registers. This instruction must always be implemented +/// such that its execution latency does not depend on the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.35 +/// +/// # Safety +/// +/// This function is safe to use if the `zknh` target feature is present. +#[target_feature(enable = "zknh")] +// See #1464 +// #[cfg_attr(test, assert_instr(sha512sum0r))] +#[inline] +pub unsafe fn sha512sum0r(rs1: u32, rs2: u32) -> u32 { + _sha512sum0r(rs1 as i32, rs2 as i32) as u32 +} + +/// Implements the Sum1 transformation, as used in the SHA2-512 hash function \[49\] (Section +/// 4.1.3). +/// +/// This instruction is implemented on RV32 only. Used to compute the Sum1 transform of the +/// SHA2-512 hash function. The transform is a 64-bit to 64-bit function, so the input and +/// output is represented by two 32-bit registers. This instruction must always be implemented +/// such that its execution latency does not depend on the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.36 +/// +/// # Safety +/// +/// This function is safe to use if the `zknh` target feature is present. +#[target_feature(enable = "zknh")] +// See #1464 +// #[cfg_attr(test, assert_instr(sha512sum1r))] +#[inline] +pub unsafe fn sha512sum1r(rs1: u32, rs2: u32) -> u32 { + _sha512sum1r(rs1 as i32, rs2 as i32) as u32 +} diff --git a/crates/core_arch/src/riscv64/mod.rs b/crates/core_arch/src/riscv64/mod.rs index 751b9a860f..ad16d6c231 100644 --- a/crates/core_arch/src/riscv64/mod.rs +++ b/crates/core_arch/src/riscv64/mod.rs @@ -1,6 +1,10 @@ //! RISC-V RV64 specific intrinsics use crate::arch::asm; +mod zk; + +pub use zk::*; + /// Loads virtual machine memory by unsigned word integer /// /// This instruction performs an explicit memory access as though `V=1`; diff --git a/crates/core_arch/src/riscv64/zk.rs b/crates/core_arch/src/riscv64/zk.rs new file mode 100644 index 0000000000..3dbe3705db --- /dev/null +++ b/crates/core_arch/src/riscv64/zk.rs @@ -0,0 +1,281 @@ +#[cfg(test)] +use stdarch_test::assert_instr; + +extern "unadjusted" { + #[link_name = "llvm.riscv.aes64es"] + fn _aes64es(rs1: i64, rs2: i64) -> i64; + + #[link_name = "llvm.riscv.aes64esm"] + fn _aes64esm(rs1: i64, rs2: i64) -> i64; + + #[link_name = "llvm.riscv.aes64ds"] + fn _aes64ds(rs1: i64, rs2: i64) -> i64; + + #[link_name = "llvm.riscv.aes64dsm"] + fn _aes64dsm(rs1: i64, rs2: i64) -> i64; + + #[link_name = "llvm.riscv.aes64ks1i"] + fn _aes64ks1i(rs1: i64, rnum: i32) -> i64; + + #[link_name = "llvm.riscv.aes64ks2"] + fn _aes64ks2(rs1: i64, rs2: i64) -> i64; + + #[link_name = "llvm.riscv.sha512sig0"] + fn _sha512sig0(rs1: i64) -> i64; + + #[link_name = "llvm.riscv.sha512sig1"] + fn _sha512sig1(rs1: i64) -> i64; + + #[link_name = "llvm.riscv.sha512sum0"] + fn _sha512sum0(rs1: i64) -> i64; + + #[link_name = "llvm.riscv.sha512sum1"] + fn _sha512sum1(rs1: i64) -> i64; +} + +/// AES final round encryption instruction for RV64. +/// +/// Uses the two 64-bit source registers to represent the entire AES state, and produces half +/// of the next round output, applying the ShiftRows and SubBytes steps. This instruction must +/// always be implemented such that its execution latency does not depend on the data being +/// operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.7 +/// +/// # Safety +/// +/// This function is safe to use if the `zkne` target feature is present. +#[target_feature(enable = "zkne")] +// See #1464 +// #[cfg_attr(test, assert_instr(aes64es))] +#[inline] +pub unsafe fn aes64es(rs1: u64, rs2: u64) -> u64 { + _aes64es(rs1 as i64, rs2 as i64) as u64 +} + +/// AES middle round encryption instruction for RV64. +/// +/// Uses the two 64-bit source registers to represent the entire AES state, and produces half +/// of the next round output, applying the ShiftRows, SubBytes and MixColumns steps. This +/// instruction must always be implemented such that its execution latency does not depend on +/// the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.8 +/// +/// # Safety +/// +/// This function is safe to use if the `zkne` target feature is present. +#[target_feature(enable = "zkne")] +// See #1464 +// #[cfg_attr(test, assert_instr(aes64esm))] +#[inline] +pub unsafe fn aes64esm(rs1: u64, rs2: u64) -> u64 { + _aes64esm(rs1 as i64, rs2 as i64) as u64 +} + +/// AES final round decryption instruction for RV64. +/// +/// Uses the two 64-bit source registers to represent the entire AES state, and produces half +/// of the next round output, applying the Inverse ShiftRows and SubBytes steps. This +/// instruction must always be implemented such that its execution latency does not depend on +/// the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.5 +/// +/// # Safety +/// +/// This function is safe to use if the `zknd` target feature is present. +#[target_feature(enable = "zknd")] +// See #1464 +// #[cfg_attr(test, assert_instr(aes64ds))] +#[inline] +pub unsafe fn aes64ds(rs1: u64, rs2: u64) -> u64 { + _aes64ds(rs1 as i64, rs2 as i64) as u64 +} + +/// AES middle round decryption instruction for RV64. +/// +/// Uses the two 64-bit source registers to represent the entire AES state, and produces half +/// of the next round output, applying the Inverse ShiftRows, SubBytes and MixColumns steps. +/// This instruction must always be implemented such that its execution latency does not depend +/// on the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.6 +/// +/// # Safety +/// +/// This function is safe to use if the `zknd` target feature is present. +#[target_feature(enable = "zknd")] +// See #1464 +// #[cfg_attr(test, assert_instr(aes64dsm))] +#[inline] +pub unsafe fn aes64dsm(rs1: u64, rs2: u64) -> u64 { + _aes64dsm(rs1 as i64, rs2 as i64) as u64 +} + +/// This instruction implements part of the KeySchedule operation for the AES Block cipher +/// involving the SBox operation. +/// +/// This instruction implements the rotation, SubBytes and Round Constant addition steps of the +/// AES block cipher Key Schedule. This instruction must always be implemented such that its +/// execution latency does not depend on the data being operated on. Note that rnum must be in +/// the range 0x0..0xA. The values 0xB..0xF are reserved. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.10 +/// +/// # Note +/// +/// The `RNUM` parameter is expected to be a constant value inside the range of `0..=10`. +/// +/// # Safety +/// +/// This function is safe to use if the `zkne` or `zknd` target feature is present. +#[target_feature(enable = "zkne", enable = "zknd")] +#[rustc_legacy_const_generics(1)] +// See #1464 +// #[cfg_attr(test, assert_instr(aes64ks1i, RNUM = 0))] +#[inline] +pub unsafe fn aes64ks1i(rs1: u64) -> u64 { + static_assert!(RNUM <= 10); + + _aes64ks1i(rs1 as i64, RNUM as i32) as u64 +} + +/// This instruction implements part of the KeySchedule operation for the AES Block cipher. +/// +/// This instruction implements the additional XOR’ing of key words as part of the AES block +/// cipher Key Schedule. This instruction must always be implemented such that its execution +/// latency does not depend on the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.11 +/// +/// # Safety +/// +/// This function is safe to use if the `zkne` or `zknd` target feature is present. +#[target_feature(enable = "zkne", enable = "zknd")] +// See #1464 +// #[cfg_attr(test, assert_instr(aes64ks2))] +#[inline] +pub unsafe fn aes64ks2(rs1: u64, rs2: u64) -> u64 { + _aes64ks2(rs1 as i64, rs2 as i64) as u64 +} + +/// Implements the Sigma0 transformation function as used in the SHA2-512 hash function \[49\] +/// (Section 4.1.3). +/// +/// This instruction is supported for the RV64 base architecture. It implements the Sigma0 +/// transform of the SHA2-512 hash function. \[49\]. This instruction must always be +/// implemented such that its execution latency does not depend on the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.37 +/// +/// # Safety +/// +/// This function is safe to use if the `zknh` target feature is present. +#[target_feature(enable = "zknh")] +// See #1464 +// #[cfg_attr(test, assert_instr(sha512sig0))] +#[inline] +pub unsafe fn sha512sig0(rs1: u64) -> u64 { + _sha512sig0(rs1 as i64) as u64 +} + +/// Implements the Sigma1 transformation function as used in the SHA2-512 hash function \[49\] +/// (Section 4.1.3). +/// +/// This instruction is supported for the RV64 base architecture. It implements the Sigma1 +/// transform of the SHA2-512 hash function. \[49\]. This instruction must always be +/// implemented such that its execution latency does not depend on the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.38 +/// +/// # Safety +/// +/// This function is safe to use if the `zknh` target feature is present. +#[target_feature(enable = "zknh")] +// See #1464 +// #[cfg_attr(test, assert_instr(sha512sig1))] +#[inline] +pub unsafe fn sha512sig1(rs1: u64) -> u64 { + _sha512sig1(rs1 as i64) as u64 +} + +/// Implements the Sum0 transformation function as used in the SHA2-512 hash function \[49\] +/// (Section 4.1.3). +/// +/// This instruction is supported for the RV64 base architecture. It implements the Sum0 +/// transform of the SHA2-512 hash function. \[49\]. This instruction must always be +/// implemented such that its execution latency does not depend on the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.39 +/// +/// # Safety +/// +/// This function is safe to use if the `zknh` target feature is present. +#[target_feature(enable = "zknh")] +// See #1464 +// #[cfg_attr(test, assert_instr(sha512sum0))] +#[inline] +pub unsafe fn sha512sum0(rs1: u64) -> u64 { + _sha512sum0(rs1 as i64) as u64 +} + +/// Implements the Sum1 transformation function as used in the SHA2-512 hash function \[49\] +/// (Section 4.1.3). +/// +/// This instruction is supported for the RV64 base architecture. It implements the Sum1 +/// transform of the SHA2-512 hash function. \[49\]. This instruction must always be +/// implemented such that its execution latency does not depend on the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.40 +/// +/// # Safety +/// +/// This function is safe to use if the `zknh` target feature is present. +#[target_feature(enable = "zknh")] +// See #1464 +// #[cfg_attr(test, assert_instr(sha512sum1))] +#[inline] +pub unsafe fn sha512sum1(rs1: u64) -> u64 { + _sha512sum1(rs1 as i64) as u64 +} diff --git a/crates/core_arch/src/riscv_shared/mod.rs b/crates/core_arch/src/riscv_shared/mod.rs index ed021df5a9..d14431ead4 100644 --- a/crates/core_arch/src/riscv_shared/mod.rs +++ b/crates/core_arch/src/riscv_shared/mod.rs @@ -1,7 +1,11 @@ //! Shared RISC-V intrinsics + mod p; +mod zk; +#[unstable(feature = "stdsimd", issue = "27731")] pub use p::*; +pub use zk::*; use crate::arch::asm; @@ -10,6 +14,7 @@ use crate::arch::asm; /// The PAUSE instruction is a HINT that indicates the current hart's rate of instruction retirement /// should be temporarily reduced or paused. The duration of its effect must be bounded and may be zero. #[inline] +#[unstable(feature = "stdsimd", issue = "27731")] pub fn pause() { unsafe { asm!(".insn i 0x0F, 0, x0, x0, 0x010", options(nomem, nostack)) } } @@ -19,6 +24,7 @@ pub fn pause() { /// The NOP instruction does not change any architecturally visible state, except for /// advancing the `pc` and incrementing any applicable performance counters. #[inline] +#[unstable(feature = "stdsimd", issue = "27731")] pub fn nop() { unsafe { asm!("nop", options(nomem, nostack)) } } @@ -29,6 +35,7 @@ pub fn nop() { /// until an interrupt might need servicing. This instruction is a hint, /// and a legal implementation is to simply implement WFI as a NOP. #[inline] +#[unstable(feature = "stdsimd", issue = "27731")] pub unsafe fn wfi() { asm!("wfi", options(nomem, nostack)) } @@ -41,6 +48,7 @@ pub unsafe fn wfi() { /// FENCE.I does not ensure that other RISC-V harts' instruction fetches will observe the /// local hart's stores in a multiprocessor system. #[inline] +#[unstable(feature = "stdsimd", issue = "27731")] pub unsafe fn fence_i() { asm!("fence.i", options(nostack)) } @@ -54,6 +62,7 @@ pub unsafe fn fence_i() { /// virtual address in parameter `vaddr` and that match the address space identified by integer /// parameter `asid`, except for entries containing global mappings. #[inline] +#[unstable(feature = "stdsimd", issue = "27731")] pub unsafe fn sfence_vma(vaddr: usize, asid: usize) { asm!("sfence.vma {}, {}", in(reg) vaddr, in(reg) asid, options(nostack)) } @@ -65,6 +74,7 @@ pub unsafe fn sfence_vma(vaddr: usize, asid: usize) { /// The fence also invalidates all address-translation cache entries that contain leaf page /// table entries corresponding to the virtual address in parameter `vaddr`, for all address spaces. #[inline] +#[unstable(feature = "stdsimd", issue = "27731")] pub unsafe fn sfence_vma_vaddr(vaddr: usize) { asm!("sfence.vma {}, x0", in(reg) vaddr, options(nostack)) } @@ -78,6 +88,7 @@ pub unsafe fn sfence_vma_vaddr(vaddr: usize) { /// address-translation cache entries matching the address space identified by integer /// parameter `asid`, except for entries containing global mappings. #[inline] +#[unstable(feature = "stdsimd", issue = "27731")] pub unsafe fn sfence_vma_asid(asid: usize) { asm!("sfence.vma x0, {}", in(reg) asid, options(nostack)) } @@ -88,6 +99,7 @@ pub unsafe fn sfence_vma_asid(asid: usize) { /// tables, for all address spaces. The fence also invalidates all address-translation cache entries, /// for all address spaces. #[inline] +#[unstable(feature = "stdsimd", issue = "27731")] pub unsafe fn sfence_vma_all() { asm!("sfence.vma", options(nostack)) } @@ -97,6 +109,7 @@ pub unsafe fn sfence_vma_all() { /// This instruction invalidates any address-translation cache entries that an /// `SFENCE.VMA` instruction with the same values of `vaddr` and `asid` would invalidate. #[inline] +#[unstable(feature = "stdsimd", issue = "27731")] pub unsafe fn sinval_vma(vaddr: usize, asid: usize) { // asm!("sinval.vma {}, {}", in(reg) vaddr, in(reg) asid, options(nostack)) asm!(".insn r 0x73, 0, 0x0B, x0, {}, {}", in(reg) vaddr, in(reg) asid, options(nostack)) @@ -107,6 +120,7 @@ pub unsafe fn sinval_vma(vaddr: usize, asid: usize) { /// This instruction invalidates any address-translation cache entries that an /// `SFENCE.VMA` instruction with the same values of `vaddr` and `asid` would invalidate. #[inline] +#[unstable(feature = "stdsimd", issue = "27731")] pub unsafe fn sinval_vma_vaddr(vaddr: usize) { asm!(".insn r 0x73, 0, 0x0B, x0, {}, x0", in(reg) vaddr, options(nostack)) } @@ -116,6 +130,7 @@ pub unsafe fn sinval_vma_vaddr(vaddr: usize) { /// This instruction invalidates any address-translation cache entries that an /// `SFENCE.VMA` instruction with the same values of `vaddr` and `asid` would invalidate. #[inline] +#[unstable(feature = "stdsimd", issue = "27731")] pub unsafe fn sinval_vma_asid(asid: usize) { asm!(".insn r 0x73, 0, 0x0B, x0, x0, {}", in(reg) asid, options(nostack)) } @@ -125,6 +140,7 @@ pub unsafe fn sinval_vma_asid(asid: usize) { /// This instruction invalidates any address-translation cache entries that an /// `SFENCE.VMA` instruction with the same values of `vaddr` and `asid` would invalidate. #[inline] +#[unstable(feature = "stdsimd", issue = "27731")] pub unsafe fn sinval_vma_all() { asm!(".insn r 0x73, 0, 0x0B, x0, x0, x0", options(nostack)) } @@ -134,6 +150,7 @@ pub unsafe fn sinval_vma_all() { /// This instruction guarantees that any previous stores already visible to the current RISC-V hart /// are ordered before subsequent `SINVAL.VMA` instructions executed by the same hart. #[inline] +#[unstable(feature = "stdsimd", issue = "27731")] pub unsafe fn sfence_w_inval() { // asm!("sfence.w.inval", options(nostack)) asm!(".insn i 0x73, 0, x0, x0, 0x180", options(nostack)) @@ -144,6 +161,7 @@ pub unsafe fn sfence_w_inval() { /// This instruction guarantees that any previous SINVAL.VMA instructions executed by the current hart /// are ordered before subsequent implicit references by that hart to the memory-management data structures. #[inline] +#[unstable(feature = "stdsimd", issue = "27731")] pub unsafe fn sfence_inval_ir() { // asm!("sfence.inval.ir", options(nostack)) asm!(".insn i 0x73, 0, x0, x0, 0x181", options(nostack)) @@ -158,6 +176,7 @@ pub unsafe fn sfence_inval_ir() { /// This function is unsafe for it accesses the virtual supervisor or user via a `HLV.B` /// instruction which is effectively a dereference to any memory address. #[inline] +#[unstable(feature = "stdsimd", issue = "27731")] pub unsafe fn hlv_b(src: *const i8) -> i8 { let value: i8; asm!(".insn i 0x73, 0x4, {}, {}, 0x600", out(reg) value, in(reg) src, options(readonly, nostack)); @@ -173,6 +192,7 @@ pub unsafe fn hlv_b(src: *const i8) -> i8 { /// This function is unsafe for it accesses the virtual supervisor or user via a `HLV.BU` /// instruction which is effectively a dereference to any memory address. #[inline] +#[unstable(feature = "stdsimd", issue = "27731")] pub unsafe fn hlv_bu(src: *const u8) -> u8 { let value: u8; asm!(".insn i 0x73, 0x4, {}, {}, 0x601", out(reg) value, in(reg) src, options(readonly, nostack)); @@ -188,6 +208,7 @@ pub unsafe fn hlv_bu(src: *const u8) -> u8 { /// This function is unsafe for it accesses the virtual supervisor or user via a `HLV.H` /// instruction which is effectively a dereference to any memory address. #[inline] +#[unstable(feature = "stdsimd", issue = "27731")] pub unsafe fn hlv_h(src: *const i16) -> i16 { let value: i16; asm!(".insn i 0x73, 0x4, {}, {}, 0x640", out(reg) value, in(reg) src, options(readonly, nostack)); @@ -203,6 +224,7 @@ pub unsafe fn hlv_h(src: *const i16) -> i16 { /// This function is unsafe for it accesses the virtual supervisor or user via a `HLV.HU` /// instruction which is effectively a dereference to any memory address. #[inline] +#[unstable(feature = "stdsimd", issue = "27731")] pub unsafe fn hlv_hu(src: *const u16) -> u16 { let value: u16; asm!(".insn i 0x73, 0x4, {}, {}, 0x641", out(reg) value, in(reg) src, options(readonly, nostack)); @@ -218,6 +240,7 @@ pub unsafe fn hlv_hu(src: *const u16) -> u16 { /// This function is unsafe for it accesses the virtual supervisor or user via a `HLVX.HU` /// instruction which is effectively a dereference to any memory address. #[inline] +#[unstable(feature = "stdsimd", issue = "27731")] pub unsafe fn hlvx_hu(src: *const u16) -> u16 { let insn: u16; asm!(".insn i 0x73, 0x4, {}, {}, 0x643", out(reg) insn, in(reg) src, options(readonly, nostack)); @@ -233,6 +256,7 @@ pub unsafe fn hlvx_hu(src: *const u16) -> u16 { /// This function is unsafe for it accesses the virtual supervisor or user via a `HLV.W` /// instruction which is effectively a dereference to any memory address. #[inline] +#[unstable(feature = "stdsimd", issue = "27731")] pub unsafe fn hlv_w(src: *const i32) -> i32 { let value: i32; asm!(".insn i 0x73, 0x4, {}, {}, 0x680", out(reg) value, in(reg) src, options(readonly, nostack)); @@ -248,6 +272,7 @@ pub unsafe fn hlv_w(src: *const i32) -> i32 { /// This function is unsafe for it accesses the virtual supervisor or user via a `HLVX.WU` /// instruction which is effectively a dereference to any memory address. #[inline] +#[unstable(feature = "stdsimd", issue = "27731")] pub unsafe fn hlvx_wu(src: *const u32) -> u32 { let insn: u32; asm!(".insn i 0x73, 0x4, {}, {}, 0x683", out(reg) insn, in(reg) src, options(readonly, nostack)); @@ -263,6 +288,7 @@ pub unsafe fn hlvx_wu(src: *const u32) -> u32 { /// This function is unsafe for it accesses the virtual supervisor or user via a `HSV.B` /// instruction which is effectively a dereference to any memory address. #[inline] +#[unstable(feature = "stdsimd", issue = "27731")] pub unsafe fn hsv_b(dst: *mut i8, src: i8) { asm!(".insn r 0x73, 0x4, 0x31, x0, {}, {}", in(reg) dst, in(reg) src, options(nostack)); } @@ -276,6 +302,7 @@ pub unsafe fn hsv_b(dst: *mut i8, src: i8) { /// This function is unsafe for it accesses the virtual supervisor or user via a `HSV.H` /// instruction which is effectively a dereference to any memory address. #[inline] +#[unstable(feature = "stdsimd", issue = "27731")] pub unsafe fn hsv_h(dst: *mut i16, src: i16) { asm!(".insn r 0x73, 0x4, 0x33, x0, {}, {}", in(reg) dst, in(reg) src, options(nostack)); } @@ -289,6 +316,7 @@ pub unsafe fn hsv_h(dst: *mut i16, src: i16) { /// This function is unsafe for it accesses the virtual supervisor or user via a `HSV.W` /// instruction which is effectively a dereference to any memory address. #[inline] +#[unstable(feature = "stdsimd", issue = "27731")] pub unsafe fn hsv_w(dst: *mut i32, src: i32) { asm!(".insn r 0x73, 0x4, 0x35, x0, {}, {}", in(reg) dst, in(reg) src, options(nostack)); } @@ -302,6 +330,7 @@ pub unsafe fn hsv_w(dst: *mut i32, src: i32) { /// /// This fence specifies a single guest virtual address, and a single guest address-space identifier. #[inline] +#[unstable(feature = "stdsimd", issue = "27731")] pub unsafe fn hfence_vvma(vaddr: usize, asid: usize) { // asm!("hfence.vvma {}, {}", in(reg) vaddr, in(reg) asid) asm!(".insn r 0x73, 0, 0x11, x0, {}, {}", in(reg) vaddr, in(reg) asid, options(nostack)) @@ -316,6 +345,7 @@ pub unsafe fn hfence_vvma(vaddr: usize, asid: usize) { /// /// This fence specifies a single guest virtual address. #[inline] +#[unstable(feature = "stdsimd", issue = "27731")] pub unsafe fn hfence_vvma_vaddr(vaddr: usize) { asm!(".insn r 0x73, 0, 0x11, x0, {}, x0", in(reg) vaddr, options(nostack)) } @@ -329,6 +359,7 @@ pub unsafe fn hfence_vvma_vaddr(vaddr: usize) { /// /// This fence specifies a single guest address-space identifier. #[inline] +#[unstable(feature = "stdsimd", issue = "27731")] pub unsafe fn hfence_vvma_asid(asid: usize) { asm!(".insn r 0x73, 0, 0x11, x0, x0, {}", in(reg) asid, options(nostack)) } @@ -342,6 +373,7 @@ pub unsafe fn hfence_vvma_asid(asid: usize) { /// /// This fence applies to any guest address spaces and guest virtual addresses. #[inline] +#[unstable(feature = "stdsimd", issue = "27731")] pub unsafe fn hfence_vvma_all() { asm!(".insn r 0x73, 0, 0x11, x0, x0, x0", options(nostack)) } @@ -354,6 +386,7 @@ pub unsafe fn hfence_vvma_all() { /// This fence specifies a single guest physical address, **shifted right by 2 bits**, and a single virtual machine /// by virtual machine identifier (VMID). #[inline] +#[unstable(feature = "stdsimd", issue = "27731")] pub unsafe fn hfence_gvma(gaddr: usize, vmid: usize) { // asm!("hfence.gvma {}, {}", in(reg) gaddr, in(reg) vmid, options(nostack)) asm!(".insn r 0x73, 0, 0x31, x0, {}, {}", in(reg) gaddr, in(reg) vmid, options(nostack)) @@ -366,6 +399,7 @@ pub unsafe fn hfence_gvma(gaddr: usize, vmid: usize) { /// /// This fence specifies a single guest physical address; **the physical address should be shifted right by 2 bits**. #[inline] +#[unstable(feature = "stdsimd", issue = "27731")] pub unsafe fn hfence_gvma_gaddr(gaddr: usize) { asm!(".insn r 0x73, 0, 0x31, x0, {}, x0", in(reg) gaddr, options(nostack)) } @@ -377,6 +411,7 @@ pub unsafe fn hfence_gvma_gaddr(gaddr: usize) { /// /// This fence specifies a single virtual machine by virtual machine identifier (VMID). #[inline] +#[unstable(feature = "stdsimd", issue = "27731")] pub unsafe fn hfence_gvma_vmid(vmid: usize) { asm!(".insn r 0x73, 0, 0x31, x0, x0, {}", in(reg) vmid, options(nostack)) } @@ -388,6 +423,7 @@ pub unsafe fn hfence_gvma_vmid(vmid: usize) { /// /// This fence specifies all guest physical addresses and all virtual machines. #[inline] +#[unstable(feature = "stdsimd", issue = "27731")] pub unsafe fn hfence_gvma_all() { asm!(".insn r 0x73, 0, 0x31, x0, x0, x0", options(nostack)) } @@ -399,6 +435,7 @@ pub unsafe fn hfence_gvma_all() { /// /// This fence specifies a single guest virtual address, and a single guest address-space identifier. #[inline] +#[unstable(feature = "stdsimd", issue = "27731")] pub unsafe fn hinval_vvma(vaddr: usize, asid: usize) { // asm!("hinval.vvma {}, {}", in(reg) vaddr, in(reg) asid, options(nostack)) asm!(".insn r 0x73, 0, 0x13, x0, {}, {}", in(reg) vaddr, in(reg) asid, options(nostack)) @@ -411,6 +448,7 @@ pub unsafe fn hinval_vvma(vaddr: usize, asid: usize) { /// /// This fence specifies a single guest virtual address. #[inline] +#[unstable(feature = "stdsimd", issue = "27731")] pub unsafe fn hinval_vvma_vaddr(vaddr: usize) { asm!(".insn r 0x73, 0, 0x13, x0, {}, x0", in(reg) vaddr, options(nostack)) } @@ -422,6 +460,7 @@ pub unsafe fn hinval_vvma_vaddr(vaddr: usize) { /// /// This fence specifies a single guest address-space identifier. #[inline] +#[unstable(feature = "stdsimd", issue = "27731")] pub unsafe fn hinval_vvma_asid(asid: usize) { asm!(".insn r 0x73, 0, 0x13, x0, x0, {}", in(reg) asid, options(nostack)) } @@ -433,6 +472,7 @@ pub unsafe fn hinval_vvma_asid(asid: usize) { /// /// This fence applies to any guest address spaces and guest virtual addresses. #[inline] +#[unstable(feature = "stdsimd", issue = "27731")] pub unsafe fn hinval_vvma_all() { asm!(".insn r 0x73, 0, 0x13, x0, x0, x0", options(nostack)) } @@ -445,6 +485,7 @@ pub unsafe fn hinval_vvma_all() { /// This fence specifies a single guest physical address, **shifted right by 2 bits**, and a single virtual machine /// by virtual machine identifier (VMID). #[inline] +#[unstable(feature = "stdsimd", issue = "27731")] pub unsafe fn hinval_gvma(gaddr: usize, vmid: usize) { // asm!("hinval.gvma {}, {}", in(reg) gaddr, in(reg) vmid, options(nostack)) asm!(".insn r 0x73, 0, 0x33, x0, {}, {}", in(reg) gaddr, in(reg) vmid, options(nostack)) @@ -457,6 +498,7 @@ pub unsafe fn hinval_gvma(gaddr: usize, vmid: usize) { /// /// This fence specifies a single guest physical address; **the physical address should be shifted right by 2 bits**. #[inline] +#[unstable(feature = "stdsimd", issue = "27731")] pub unsafe fn hinval_gvma_gaddr(gaddr: usize) { asm!(".insn r 0x73, 0, 0x33, x0, {}, x0", in(reg) gaddr, options(nostack)) } @@ -468,6 +510,7 @@ pub unsafe fn hinval_gvma_gaddr(gaddr: usize) { /// /// This fence specifies a single virtual machine by virtual machine identifier (VMID). #[inline] +#[unstable(feature = "stdsimd", issue = "27731")] pub unsafe fn hinval_gvma_vmid(vmid: usize) { asm!(".insn r 0x73, 0, 0x33, x0, x0, {}", in(reg) vmid, options(nostack)) } @@ -479,6 +522,7 @@ pub unsafe fn hinval_gvma_vmid(vmid: usize) { /// /// This fence specifies all guest physical addresses and all virtual machines. #[inline] +#[unstable(feature = "stdsimd", issue = "27731")] pub unsafe fn hinval_gvma_all() { asm!(".insn r 0x73, 0, 0x33, x0, x0, x0", options(nostack)) } @@ -502,6 +546,7 @@ pub unsafe fn hinval_gvma_all() { /// [`frrm`]: fn.frrm.html /// [`frflags`]: fn.frflags.html #[inline] +#[unstable(feature = "stdsimd", issue = "27731")] pub fn frcsr() -> u32 { let value: u32; unsafe { asm!("frcsr {}", out(reg) value, options(nomem, nostack)) }; @@ -513,6 +558,7 @@ pub fn frcsr() -> u32 { /// This function swaps the value in `fcsr` by copying the original value to be returned, /// and then writing a new value obtained from input variable `value` into `fcsr`. #[inline] +#[unstable(feature = "stdsimd", issue = "27731")] pub fn fscsr(value: u32) -> u32 { let original: u32; unsafe { asm!("fscsr {}, {}", out(reg) original, in(reg) value, options(nomem, nostack)) } @@ -535,6 +581,7 @@ pub fn fscsr(value: u32) -> u32 { /// | 110 | | _Reserved for future use._ | /// | 111 | DYN | In Rounding Mode register, _reserved_. | #[inline] +#[unstable(feature = "stdsimd", issue = "27731")] pub fn frrm() -> u32 { let value: u32; unsafe { asm!("frrm {}", out(reg) value, options(nomem, nostack)) }; @@ -547,6 +594,7 @@ pub fn frrm() -> u32 { /// and then writing a new value obtained from the three least-significant bits of /// input variable `value` into `frm`. #[inline] +#[unstable(feature = "stdsimd", issue = "27731")] pub fn fsrm(value: u32) -> u32 { let original: u32; unsafe { asm!("fsrm {}, {}", out(reg) original, in(reg) value, options(nomem, nostack)) } @@ -570,6 +618,7 @@ pub fn fsrm(value: u32) -> u32 { /// | 1 | UF | Underflow | /// | 0 | NX | Inexact | #[inline] +#[unstable(feature = "stdsimd", issue = "27731")] pub fn frflags() -> u32 { let value: u32; unsafe { asm!("frflags {}", out(reg) value, options(nomem, nostack)) }; @@ -582,179 +631,9 @@ pub fn frflags() -> u32 { /// and then writing a new value obtained from the five least-significant bits of /// input variable `value` into `fflags`. #[inline] +#[unstable(feature = "stdsimd", issue = "27731")] pub fn fsflags(value: u32) -> u32 { let original: u32; unsafe { asm!("fsflags {}, {}", out(reg) original, in(reg) value, options(nomem, nostack)) } original } - -/// `P0` transformation function as is used in the SM3 hash algorithm -/// -/// This function is included in `Zksh` extension. It's defined as: -/// -/// ```text -/// P0(X) = X ⊕ (X ≪ 9) ⊕ (X ≪ 17) -/// ``` -/// -/// where `⊕` represents 32-bit xor, and `≪ k` represents rotate left by `k` bits. -/// -/// In the SM3 algorithm, the `P0` transformation is used as `E ← P0(TT2)` when the -/// compression function `CF` uses the intermediate value `TT2` to calculate -/// the variable `E` in one iteration for subsequent processes. -/// -/// According to RISC-V Cryptography Extensions, Volume I, the execution latency of -/// this instruction must always be independent from the data it operates on. -#[inline] -#[target_feature(enable = "zksh")] -pub fn sm3p0(x: u32) -> u32 { - let ans: u32; - unsafe { asm!("sm3p0 {}, {}", lateout(reg) ans, in(reg) x, options(pure, nomem, nostack)) }; - ans -} - -/// `P1` transformation function as is used in the SM3 hash algorithm -/// -/// This function is included in `Zksh` extension. It's defined as: -/// -/// ```text -/// P1(X) = X ⊕ (X ≪ 15) ⊕ (X ≪ 23) -/// ``` -/// -/// where `⊕` represents 32-bit xor, and `≪ k` represents rotate left by `k` bits. -/// -/// In the SM3 algorithm, the `P1` transformation is used to expand message, -/// where expanded word `Wj` can be generated from the previous words. -/// The whole process can be described as the following pseudocode: -/// -/// ```text -/// FOR j=16 TO 67 -/// Wj ← P1(Wj−16 ⊕ Wj−9 ⊕ (Wj−3 ≪ 15)) ⊕ (Wj−13 ≪ 7) ⊕ Wj−6 -/// ENDFOR -/// ``` -/// -/// According to RISC-V Cryptography Extensions, Volume I, the execution latency of -/// this instruction must always be independent from the data it operates on. -#[inline] -#[target_feature(enable = "zksh")] -pub fn sm3p1(x: u32) -> u32 { - let ans: u32; - unsafe { asm!("sm3p1 {}, {}", lateout(reg) ans, in(reg) x, options(pure, nomem, nostack)) }; - ans -} - -/// Accelerates the round function `F` in the SM4 block cipher algorithm -/// -/// This instruction is included in extension `Zksed`. It's defined as: -/// -/// ```text -/// SM4ED(x, a, BS) = x ⊕ T(ai) -/// ... where -/// ai = a.bytes[BS] -/// T(ai) = L(τ(ai)) -/// bi = τ(ai) = SM4-S-Box(ai) -/// ci = L(bi) = bi ⊕ (bi ≪ 2) ⊕ (bi ≪ 10) ⊕ (bi ≪ 18) ⊕ (bi ≪ 24) -/// SM4ED = (ci ≪ (BS * 8)) ⊕ x -/// ``` -/// -/// where `⊕` represents 32-bit xor, and `≪ k` represents rotate left by `k` bits. -/// As is defined above, `T` is a combined transformation of non linear S-Box transform `τ` -/// and linear layer transform `L`. -/// -/// In the SM4 algorithm, the round function `F` is defined as: -/// -/// ```text -/// F(x0, x1, x2, x3, rk) = x0 ⊕ T(x1 ⊕ x2 ⊕ x3 ⊕ rk) -/// ... where -/// T(A) = L(τ(A)) -/// B = τ(A) = (SM4-S-Box(a0), SM4-S-Box(a1), SM4-S-Box(a2), SM4-S-Box(a3)) -/// C = L(B) = B ⊕ (B ≪ 2) ⊕ (B ≪ 10) ⊕ (B ≪ 18) ⊕ (B ≪ 24) -/// ``` -/// -/// It can be implemented by `sm4ed` instruction like: -/// -/// ```no_run -/// # #[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))] -/// # fn round_function(x0: u32, x1: u32, x2: u32, x3: u32, rk: u32) -> u32 { -/// # #[cfg(target_arch = "riscv32")] use core::arch::riscv32::sm4ed; -/// # #[cfg(target_arch = "riscv64")] use core::arch::riscv64::sm4ed; -/// let a = x1 ^ x2 ^ x3 ^ rk; -/// let c0 = sm4ed::<0>(x0, a); -/// let c1 = sm4ed::<1>(c0, a); // c1 represents c[0..=1], etc. -/// let c2 = sm4ed::<2>(c1, a); -/// let c3 = sm4ed::<3>(c2, a); -/// return c3; // c3 represents c[0..=3] -/// # } -/// ``` -/// -/// According to RISC-V Cryptography Extensions, Volume I, the execution latency of -/// this instruction must always be independent from the data it operates on. -#[inline] -#[target_feature(enable = "zksed")] -pub fn sm4ed(x: u32, a: u32) -> u32 { - static_assert!(BS <= 3); - let ans: u32; - unsafe { - asm!("sm4ed {}, {}, {}, {}", lateout(reg) ans, in(reg) x, in(reg) a, const BS, options(pure, nomem, nostack)) - }; - ans -} - -/// Accelerates the key schedule operation in the SM4 block cipher algorithm -/// -/// This instruction is included in extension `Zksed`. It's defined as: -/// -/// ```text -/// SM4KS(x, k, BS) = x ⊕ T'(ki) -/// ... where -/// ki = k.bytes[BS] -/// T'(ki) = L'(τ(ki)) -/// bi = τ(ki) = SM4-S-Box(ki) -/// ci = L'(bi) = bi ⊕ (bi ≪ 13) ⊕ (bi ≪ 23) -/// SM4KS = (ci ≪ (BS * 8)) ⊕ x -/// ``` -/// -/// where `⊕` represents 32-bit xor, and `≪ k` represents rotate left by `k` bits. -/// As is defined above, `T'` is a combined transformation of non linear S-Box transform `τ` -/// and the replaced linear layer transform `L'`. -/// -/// In the SM4 algorithm, the key schedule is defined as: -/// -/// ```text -/// rk[i] = K[i+4] = K[i] ⊕ T'(K[i+1] ⊕ K[i+2] ⊕ K[i+3] ⊕ CK[i]) -/// ... where -/// K[0..=3] = MK[0..=3] ⊕ FK[0..=3] -/// T'(K) = L'(τ(K)) -/// B = τ(K) = (SM4-S-Box(k0), SM4-S-Box(k1), SM4-S-Box(k2), SM4-S-Box(k3)) -/// C = L'(B) = B ⊕ (B ≪ 13) ⊕ (B ≪ 23) -/// ``` -/// -/// where `MK` represents the input 128-bit encryption key, -/// constants `FK` and `CK` are fixed system configuration constant values defined by the SM4 algorithm. -/// Hence, the key schedule operation can be implemented by `sm4ks` instruction like: -/// -/// ```no_run -/// # #[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))] -/// # fn key_schedule(k0: u32, k1: u32, k2: u32, k3: u32, ck_i: u32) -> u32 { -/// # #[cfg(target_arch = "riscv32")] use core::arch::riscv32::sm4ks; -/// # #[cfg(target_arch = "riscv64")] use core::arch::riscv64::sm4ks; -/// let k = k1 ^ k2 ^ k3 ^ ck_i; -/// let c0 = sm4ks::<0>(k0, k); -/// let c1 = sm4ks::<1>(c0, k); // c1 represents c[0..=1], etc. -/// let c2 = sm4ks::<2>(c1, k); -/// let c3 = sm4ks::<3>(c2, k); -/// return c3; // c3 represents c[0..=3] -/// # } -/// ``` -/// -/// According to RISC-V Cryptography Extensions, Volume I, the execution latency of -/// this instruction must always be independent from the data it operates on. -#[inline] -#[target_feature(enable = "zksed")] -pub fn sm4ks(x: u32, k: u32) -> u32 { - static_assert!(BS <= 3); - let ans: u32; - unsafe { - asm!("sm4ks {}, {}, {}, {}", lateout(reg) ans, in(reg) x, in(reg) k, const BS, options(pure, nomem, nostack)) - }; - ans -} diff --git a/crates/core_arch/src/riscv_shared/zk.rs b/crates/core_arch/src/riscv_shared/zk.rs new file mode 100644 index 0000000000..db97f72bc4 --- /dev/null +++ b/crates/core_arch/src/riscv_shared/zk.rs @@ -0,0 +1,462 @@ +#[cfg(test)] +use stdarch_test::assert_instr; + +extern "unadjusted" { + #[link_name = "llvm.riscv.sm4ed"] + fn _sm4ed(rs1: i32, rs2: i32, bs: i32) -> i32; + + #[link_name = "llvm.riscv.sm4ks"] + fn _sm4ks(rs1: i32, rs2: i32, bs: i32) -> i32; + + #[link_name = "llvm.riscv.sm3p0"] + fn _sm3p0(rs1: i32) -> i32; + + #[link_name = "llvm.riscv.sm3p1"] + fn _sm3p1(rs1: i32) -> i32; + + #[link_name = "llvm.riscv.sha256sig0"] + fn _sha256sig0(rs1: i32) -> i32; + + #[link_name = "llvm.riscv.sha256sig1"] + fn _sha256sig1(rs1: i32) -> i32; + + #[link_name = "llvm.riscv.sha256sum0"] + fn _sha256sum0(rs1: i32) -> i32; + + #[link_name = "llvm.riscv.sha256sum1"] + fn _sha256sum1(rs1: i32) -> i32; +} + +#[cfg(target_arch = "riscv32")] +extern "unadjusted" { + #[link_name = "llvm.riscv.xperm8.i32"] + fn _xperm8_32(rs1: i32, rs2: i32) -> i32; + + #[link_name = "llvm.riscv.xperm4.i32"] + fn _xperm4_32(rs1: i32, rs2: i32) -> i32; +} + +#[cfg(target_arch = "riscv64")] +extern "unadjusted" { + #[link_name = "llvm.riscv.xperm8.i64"] + fn _xperm8_64(rs1: i64, rs2: i64) -> i64; + + #[link_name = "llvm.riscv.xperm4.i64"] + fn _xperm4_64(rs1: i64, rs2: i64) -> i64; +} + +/// Byte-wise lookup of indicies into a vector in registers. +/// +/// The xperm8 instruction operates on bytes. The rs1 register contains a vector of XLEN/8 +/// 8-bit elements. The rs2 register contains a vector of XLEN/8 8-bit indexes. The result is +/// each element in rs2 replaced by the indexed element in rs1, or zero if the index into rs2 +/// is out of bounds. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.47 +/// +/// # Safety +/// +/// This function is safe to use if the `zbkx` target feature is present. +#[target_feature(enable = "zbkx")] +// See #1464 +// #[cfg_attr(test, assert_instr(xperm8))] +#[inline] +pub unsafe fn xperm8(rs1: usize, rs2: usize) -> usize { + #[cfg(target_arch = "riscv32")] + { + _xperm8_32(rs1 as i32, rs2 as i32) as usize + } + + #[cfg(target_arch = "riscv64")] + { + _xperm8_64(rs1 as i64, rs2 as i64) as usize + } +} + +/// Nibble-wise lookup of indicies into a vector. +/// +/// The xperm4 instruction operates on nibbles. The rs1 register contains a vector of XLEN/4 +/// 4-bit elements. The rs2 register contains a vector of XLEN/4 4-bit indexes. The result is +/// each element in rs2 replaced by the indexed element in rs1, or zero if the index into rs2 +/// is out of bounds. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.48 +/// +/// # Safety +/// +/// This function is safe to use if the `zbkx` target feature is present. +#[target_feature(enable = "zbkx")] +// See #1464 +// #[cfg_attr(test, assert_instr(xperm4))] +#[inline] +pub unsafe fn xperm4(rs1: usize, rs2: usize) -> usize { + #[cfg(target_arch = "riscv32")] + { + _xperm4_32(rs1 as i32, rs2 as i32) as usize + } + + #[cfg(target_arch = "riscv64")] + { + _xperm4_64(rs1 as i64, rs2 as i64) as usize + } +} + +/// Implements the Sigma0 transformation function as used in the SHA2-256 hash function \[49\] +/// (Section 4.1.2). +/// +/// This instruction is supported for both RV32 and RV64 base architectures. For RV32, the +/// entire XLEN source register is operated on. For RV64, the low 32 bits of the source +/// register are operated on, and the result sign extended to XLEN bits. Though named for +/// SHA2-256, the instruction works for both the SHA2-224 and SHA2-256 parameterisations as +/// described in \[49\]. This instruction must always be implemented such that its execution +/// latency does not depend on the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.27 +/// +/// # Safety +/// +/// This function is safe to use if the `zknh` target feature is present. +#[target_feature(enable = "zknh")] +// See #1464 +// #[cfg_attr(test, assert_instr(sha256sig0))] +#[inline] +pub unsafe fn sha256sig0(rs1: u32) -> u32 { + _sha256sig0(rs1 as i32) as u32 +} + +/// Implements the Sigma1 transformation function as used in the SHA2-256 hash function \[49\] +/// (Section 4.1.2). +/// +/// This instruction is supported for both RV32 and RV64 base architectures. For RV32, the +/// entire XLEN source register is operated on. For RV64, the low 32 bits of the source +/// register are operated on, and the result sign extended to XLEN bits. Though named for +/// SHA2-256, the instruction works for both the SHA2-224 and SHA2-256 parameterisations as +/// described in \[49\]. This instruction must always be implemented such that its execution +/// latency does not depend on the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.28 +/// +/// # Safety +/// +/// This function is safe to use if the `zknh` target feature is present. +#[target_feature(enable = "zknh")] +// See #1464 +// #[cfg_attr(test, assert_instr(sha256sig1))] +#[inline] +pub unsafe fn sha256sig1(rs1: u32) -> u32 { + _sha256sig1(rs1 as i32) as u32 +} + +/// Implements the Sum0 transformation function as used in the SHA2-256 hash function \[49\] +/// (Section 4.1.2). +/// +/// This instruction is supported for both RV32 and RV64 base architectures. For RV32, the +/// entire XLEN source register is operated on. For RV64, the low 32 bits of the source +/// register are operated on, and the result sign extended to XLEN bits. Though named for +/// SHA2-256, the instruction works for both the SHA2-224 and SHA2-256 parameterisations as +/// described in \[49\]. This instruction must always be implemented such that its execution +/// latency does not depend on the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.29 +/// +/// # Safety +/// +/// This function is safe to use if the `zknh` target feature is present. +#[target_feature(enable = "zknh")] +// See #1464 +// #[cfg_attr(test, assert_instr(sha256sum0))] +#[inline] +pub unsafe fn sha256sum0(rs1: u32) -> u32 { + _sha256sum0(rs1 as i32) as u32 +} + +/// Implements the Sum1 transformation function as used in the SHA2-256 hash function \[49\] +/// (Section 4.1.2). +/// +/// This instruction is supported for both RV32 and RV64 base architectures. For RV32, the +/// entire XLEN source register is operated on. For RV64, the low 32 bits of the source +/// register are operated on, and the result sign extended to XLEN bits. Though named for +/// SHA2-256, the instruction works for both the SHA2-224 and SHA2-256 parameterisations as +/// described in \[49\]. This instruction must always be implemented such that its execution +/// latency does not depend on the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.30 +/// +/// # Safety +/// +/// This function is safe to use if the `zknh` target feature is present. +#[target_feature(enable = "zknh")] +// See #1464 +// #[cfg_attr(test, assert_instr(sha256sum1))] +#[inline] +pub unsafe fn sha256sum1(rs1: u32) -> u32 { + _sha256sum1(rs1 as i32) as u32 +} + +/// Accelerates the block encrypt/decrypt operation of the SM4 block cipher \[5, 31\]. +/// +/// Implements a T-tables in hardware style approach to accelerating the SM4 round function. A +/// byte is extracted from rs2 based on bs, to which the SBox and linear layer transforms are +/// applied, before the result is XOR’d with rs1 and written back to rd. This instruction +/// exists on RV32 and RV64 base architectures. On RV64, the 32-bit result is sign extended to +/// XLEN bits. This instruction must always be implemented such that its execution latency does +/// not depend on the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.43 +/// +/// # Note +/// +/// The `BS` parameter is expected to be a constant value and only the bottom 2 bits of `bs` are +/// used. +/// +/// # Safety +/// +/// This function is safe to use if the `zksed` target feature is present. +/// +/// # Details +/// +/// Accelerates the round function `F` in the SM4 block cipher algorithm +/// +/// This instruction is included in extension `Zksed`. It's defined as: +/// +/// ```text +/// SM4ED(x, a, BS) = x ⊕ T(ai) +/// ... where +/// ai = a.bytes[BS] +/// T(ai) = L(τ(ai)) +/// bi = τ(ai) = SM4-S-Box(ai) +/// ci = L(bi) = bi ⊕ (bi ≪ 2) ⊕ (bi ≪ 10) ⊕ (bi ≪ 18) ⊕ (bi ≪ 24) +/// SM4ED = (ci ≪ (BS * 8)) ⊕ x +/// ``` +/// +/// where `⊕` represents 32-bit xor, and `≪ k` represents rotate left by `k` bits. +/// As is defined above, `T` is a combined transformation of non linear S-Box transform `τ` +/// and linear layer transform `L`. +/// +/// In the SM4 algorithm, the round function `F` is defined as: +/// +/// ```text +/// F(x0, x1, x2, x3, rk) = x0 ⊕ T(x1 ⊕ x2 ⊕ x3 ⊕ rk) +/// ... where +/// T(A) = L(τ(A)) +/// B = τ(A) = (SM4-S-Box(a0), SM4-S-Box(a1), SM4-S-Box(a2), SM4-S-Box(a3)) +/// C = L(B) = B ⊕ (B ≪ 2) ⊕ (B ≪ 10) ⊕ (B ≪ 18) ⊕ (B ≪ 24) +/// ``` +/// +/// It can be implemented by `sm4ed` instruction like: +/// +/// ```no_run +/// # #[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))] +/// # fn round_function(x0: u32, x1: u32, x2: u32, x3: u32, rk: u32) -> u32 { +/// # #[cfg(target_arch = "riscv32")] use core::arch::riscv32::sm4ed; +/// # #[cfg(target_arch = "riscv64")] use core::arch::riscv64::sm4ed; +/// let a = x1 ^ x2 ^ x3 ^ rk; +/// let c0 = sm4ed(x0, a, 0); +/// let c1 = sm4ed(c0, a, 1); // c1 represents c[0..=1], etc. +/// let c2 = sm4ed(c1, a, 2); +/// let c3 = sm4ed(c2, a, 3); +/// return c3; // c3 represents c[0..=3] +/// # } +/// ``` +#[target_feature(enable = "zksed")] +#[rustc_legacy_const_generics(2)] +// See #1464 +// #[cfg_attr(test, assert_instr(sm4ed, BS = 0))] +#[inline] +pub unsafe fn sm4ed(rs1: u32, rs2: u32) -> u32 { + static_assert!(BS < 4); + + _sm4ed(rs1 as i32, rs2 as i32, BS as i32) as u32 +} + +/// Accelerates the Key Schedule operation of the SM4 block cipher \[5, 31\] with `bs=0`. +/// +/// Implements a T-tables in hardware style approach to accelerating the SM4 Key Schedule. A +/// byte is extracted from rs2 based on bs, to which the SBox and linear layer transforms are +/// applied, before the result is XOR’d with rs1 and written back to rd. This instruction +/// exists on RV32 and RV64 base architectures. On RV64, the 32-bit result is sign extended to +/// XLEN bits. This instruction must always be implemented such that its execution latency does +/// not depend on the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.44 +/// +/// # Note +/// +/// The `BS` parameter is expected to be a constant value and only the bottom 2 bits of `bs` are +/// used. +/// +/// # Safety +/// +/// This function is safe to use if the `zksed` target feature is present. +/// +/// # Details +/// +/// Accelerates the round function `F` in the SM4 block cipher algorithm +/// +/// This instruction is included in extension `Zksed`. It's defined as: +/// +/// ```text +/// SM4ED(x, a, BS) = x ⊕ T(ai) +/// ... where +/// ai = a.bytes[BS] +/// T(ai) = L(τ(ai)) +/// bi = τ(ai) = SM4-S-Box(ai) +/// ci = L(bi) = bi ⊕ (bi ≪ 2) ⊕ (bi ≪ 10) ⊕ (bi ≪ 18) ⊕ (bi ≪ 24) +/// SM4ED = (ci ≪ (BS * 8)) ⊕ x +/// ``` +/// +/// where `⊕` represents 32-bit xor, and `≪ k` represents rotate left by `k` bits. +/// As is defined above, `T` is a combined transformation of non linear S-Box transform `τ` +/// and linear layer transform `L`. +/// +/// In the SM4 algorithm, the round function `F` is defined as: +/// +/// ```text +/// F(x0, x1, x2, x3, rk) = x0 ⊕ T(x1 ⊕ x2 ⊕ x3 ⊕ rk) +/// ... where +/// T(A) = L(τ(A)) +/// B = τ(A) = (SM4-S-Box(a0), SM4-S-Box(a1), SM4-S-Box(a2), SM4-S-Box(a3)) +/// C = L(B) = B ⊕ (B ≪ 2) ⊕ (B ≪ 10) ⊕ (B ≪ 18) ⊕ (B ≪ 24) +/// ``` +/// +/// It can be implemented by `sm4ed` instruction like: +/// +/// ```no_run +/// # #[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))] +/// # fn round_function(x0: u32, x1: u32, x2: u32, x3: u32, rk: u32) -> u32 { +/// # #[cfg(target_arch = "riscv32")] use core::arch::riscv32::sm4ed; +/// # #[cfg(target_arch = "riscv64")] use core::arch::riscv64::sm4ed; +/// let a = x1 ^ x2 ^ x3 ^ rk; +/// let c0 = sm4ed(x0, a, 0); +/// let c1 = sm4ed(c0, a, 1); // c1 represents c[0..=1], etc. +/// let c2 = sm4ed(c1, a, 2); +/// let c3 = sm4ed(c2, a, 3); +/// return c3; // c3 represents c[0..=3] +/// # } +/// ``` +#[target_feature(enable = "zksed")] +#[rustc_legacy_const_generics(2)] +// See #1464 +// #[cfg_attr(test, assert_instr(sm4ks, BS = 0))] +#[inline] +pub unsafe fn sm4ks(rs1: u32, rs2: u32) -> u32 { + static_assert!(BS < 4); + + _sm4ks(rs1 as i32, rs2 as i32, BS as i32) as u32 +} + +/// Implements the P0 transformation function as used in the SM3 hash function [4, 30]. +/// +/// This instruction is supported for the RV32 and RV64 base architectures. It implements the +/// P0 transform of the SM3 hash function [4, 30]. This instruction must always be implemented +/// such that its execution latency does not depend on the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.41 +/// +/// # Safety +/// +/// This function is safe to use if the `zksh` target feature is present. +/// +/// # Details +/// +/// `P0` transformation function as is used in the SM3 hash algorithm +/// +/// This function is included in `Zksh` extension. It's defined as: +/// +/// ```text +/// P0(X) = X ⊕ (X ≪ 9) ⊕ (X ≪ 17) +/// ``` +/// +/// where `⊕` represents 32-bit xor, and `≪ k` represents rotate left by `k` bits. +/// +/// In the SM3 algorithm, the `P0` transformation is used as `E ← P0(TT2)` when the +/// compression function `CF` uses the intermediate value `TT2` to calculate +/// the variable `E` in one iteration for subsequent processes. +#[target_feature(enable = "zksh")] +// See #1464 +// #[cfg_attr(test, assert_instr(sm3p0))] +#[inline] +pub unsafe fn sm3p0(rs1: u32) -> u32 { + _sm3p0(rs1 as i32) as u32 +} + +/// Implements the P1 transformation function as used in the SM3 hash function [4, 30]. +/// +/// This instruction is supported for the RV32 and RV64 base architectures. It implements the +/// P1 transform of the SM3 hash function [4, 30]. This instruction must always be implemented +/// such that its execution latency does not depend on the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.42 +/// +/// # Safety +/// +/// This function is safe to use if the `zksh` target feature is present. +/// +/// # Details +/// +/// `P1` transformation function as is used in the SM3 hash algorithm +/// +/// This function is included in `Zksh` extension. It's defined as: +/// +/// ```text +/// P1(X) = X ⊕ (X ≪ 15) ⊕ (X ≪ 23) +/// ``` +/// +/// where `⊕` represents 32-bit xor, and `≪ k` represents rotate left by `k` bits. +/// +/// In the SM3 algorithm, the `P1` transformation is used to expand message, +/// where expanded word `Wj` can be generated from the previous words. +/// The whole process can be described as the following pseudocode: +/// +/// ```text +/// FOR j=16 TO 67 +/// Wj ← P1(Wj−16 ⊕ Wj−9 ⊕ (Wj−3 ≪ 15)) ⊕ (Wj−13 ≪ 7) ⊕ Wj−6 +/// ENDFOR +/// ``` +#[target_feature(enable = "zksh")] +// See #1464 +// #[cfg_attr(test, assert_instr(sm3p1))] +#[inline] +pub unsafe fn sm3p1(rs1: u32) -> u32 { + _sm3p1(rs1 as i32) as u32 +}