diff --git a/ci/docker/riscv64gc-unknown-linux-gnu/Dockerfile b/ci/docker/riscv64gc-unknown-linux-gnu/Dockerfile
index b9b3c682ea..67f5f4cdef 100644
--- a/ci/docker/riscv64gc-unknown-linux-gnu/Dockerfile
+++ b/ci/docker/riscv64gc-unknown-linux-gnu/Dockerfile
@@ -1,10 +1,9 @@
-FROM ubuntu:22.04
+FROM ubuntu:23.04
 
 RUN apt-get update && apt-get install -y --no-install-recommends \
         gcc libc6-dev qemu-user ca-certificates \
-        gcc-riscv64-linux-gnu libc6-dev-riscv64-cross \
-        qemu-user
+        gcc-riscv64-linux-gnu libc6-dev-riscv64-cross
 
 ENV CARGO_TARGET_RISCV64GC_UNKNOWN_LINUX_GNU_LINKER=riscv64-linux-gnu-gcc \
-    CARGO_TARGET_RISCV64GC_UNKNOWN_LINUX_GNU_RUNNER="qemu-riscv64 -L /usr/riscv64-linux-gnu" \
+    CARGO_TARGET_RISCV64GC_UNKNOWN_LINUX_GNU_RUNNER="qemu-riscv64 -L /usr/riscv64-linux-gnu -cpu rv64,zk=true" \
     OBJDUMP=riscv64-linux-gnu-objdump
diff --git a/ci/run.sh b/ci/run.sh
index 0e33de89db..9923fa8e36 100755
--- a/ci/run.sh
+++ b/ci/run.sh
@@ -47,6 +47,7 @@ case ${TARGET} in
     # Some of our test dependencies use the deprecated `gcc` crates which
     # doesn't detect RISC-V compilers automatically, so do it manually here.
     riscv64*)
+        export RUSTFLAGS="${RUSTFLAGS} -Ctarget-feature=+zk"
         export TARGET_CC="riscv64-linux-gnu-gcc"
         ;;
 esac
diff --git a/crates/core_arch/src/mod.rs b/crates/core_arch/src/mod.rs
index 12a5b086c6..ad3ec863d4 100644
--- a/crates/core_arch/src/mod.rs
+++ b/crates/core_arch/src/mod.rs
@@ -64,8 +64,9 @@ pub mod arch {
     /// See the [module documentation](../index.html) for more details.
     #[cfg(any(target_arch = "riscv32", doc))]
     #[doc(cfg(any(target_arch = "riscv32")))]
-    #[unstable(feature = "stdsimd", issue = "27731")]
+    #[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
     pub mod riscv32 {
+        pub use crate::core_arch::riscv32::*;
         pub use crate::core_arch::riscv_shared::*;
     }
 
@@ -74,7 +75,7 @@ pub mod arch {
     /// See the [module documentation](../index.html) for more details.
     #[cfg(any(target_arch = "riscv64", doc))]
     #[doc(cfg(any(target_arch = "riscv64")))]
-    #[unstable(feature = "stdsimd", issue = "27731")]
+    #[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
     pub mod riscv64 {
         pub use crate::core_arch::riscv64::*;
         // RISC-V RV64 supports all RV32 instructions as well in current specifications (2022-01-05).
@@ -279,6 +280,10 @@ mod aarch64;
 #[doc(cfg(any(target_arch = "arm")))]
 mod arm;
 
+#[cfg(any(target_arch = "riscv32", doc))]
+#[doc(cfg(any(target_arch = "riscv32")))]
+mod riscv32;
+
 #[cfg(any(target_arch = "riscv64", doc))]
 #[doc(cfg(any(target_arch = "riscv64")))]
 mod riscv64;
diff --git a/crates/core_arch/src/riscv32/mod.rs b/crates/core_arch/src/riscv32/mod.rs
new file mode 100644
index 0000000000..0a8634c85e
--- /dev/null
+++ b/crates/core_arch/src/riscv32/mod.rs
@@ -0,0 +1,5 @@
+//! RISC-V RV32 specific intrinsics
+
+mod zk;
+
+pub use zk::*;
diff --git a/crates/core_arch/src/riscv32/zk.rs b/crates/core_arch/src/riscv32/zk.rs
new file mode 100644
index 0000000000..3767577724
--- /dev/null
+++ b/crates/core_arch/src/riscv32/zk.rs
@@ -0,0 +1,367 @@
+#[cfg(test)]
+use stdarch_test::assert_instr;
+
+extern "unadjusted" {
+    #[link_name = "llvm.riscv.aes32esi"]
+    fn _aes32esi(rs1: i32, rs2: i32, bs: i32) -> i32;
+
+    #[link_name = "llvm.riscv.aes32esmi"]
+    fn _aes32esmi(rs1: i32, rs2: i32, bs: i32) -> i32;
+
+    #[link_name = "llvm.riscv.aes32dsi"]
+    fn _aes32dsi(rs1: i32, rs2: i32, bs: i32) -> i32;
+
+    #[link_name = "llvm.riscv.aes32dsmi"]
+    fn _aes32dsmi(rs1: i32, rs2: i32, bs: i32) -> i32;
+
+    #[link_name = "llvm.riscv.zip.i32"]
+    fn _zip(rs1: i32) -> i32;
+
+    #[link_name = "llvm.riscv.unzip.i32"]
+    fn _unzip(rs1: i32) -> i32;
+
+    #[link_name = "llvm.riscv.sha512sig0h"]
+    fn _sha512sig0h(rs1: i32, rs2: i32) -> i32;
+
+    #[link_name = "llvm.riscv.sha512sig0l"]
+    fn _sha512sig0l(rs1: i32, rs2: i32) -> i32;
+
+    #[link_name = "llvm.riscv.sha512sig1h"]
+    fn _sha512sig1h(rs1: i32, rs2: i32) -> i32;
+
+    #[link_name = "llvm.riscv.sha512sig1l"]
+    fn _sha512sig1l(rs1: i32, rs2: i32) -> i32;
+
+    #[link_name = "llvm.riscv.sha512sum0r"]
+    fn _sha512sum0r(rs1: i32, rs2: i32) -> i32;
+
+    #[link_name = "llvm.riscv.sha512sum1r"]
+    fn _sha512sum1r(rs1: i32, rs2: i32) -> i32;
+}
+
+/// AES final round encryption instruction for RV32.
+///
+/// This instruction sources a single byte from rs2 according to bs. To this it applies the
+/// forward AES SBox operation, before XOR’ing the result with rs1. This instruction must
+/// always be implemented such that its execution latency does not depend on the data being
+/// operated on.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.3
+///
+/// # Note
+///
+/// The `BS` parameter is expected to be a constant value and only the bottom 2 bits of `bs` are
+/// used.
+///
+/// # Safety
+///
+/// This function is safe to use if the `zkne` target feature is present.
+#[target_feature(enable = "zkne")]
+#[rustc_legacy_const_generics(2)]
+// See #1464
+// #[cfg_attr(test, assert_instr(aes32esi, BS = 0))]
+#[inline]
+pub unsafe fn aes32esi<const BS: u8>(rs1: u32, rs2: u32) -> u32 {
+    static_assert!(BS < 4);
+
+    _aes32esi(rs1 as i32, rs2 as i32, BS as i32) as u32
+}
+
+/// AES middle round encryption instruction for RV32 with.
+///
+/// This instruction sources a single byte from rs2 according to bs. To this it applies the
+/// forward AES SBox operation, and a partial forward MixColumn, before XOR’ing the result with
+/// rs1. This instruction must always be implemented such that its execution latency does not
+/// depend on the data being operated on.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.4
+///
+/// # Note
+///
+/// The `bs` parameter is expected to be a constant value and only the bottom 2 bits of `bs` are
+/// used.
+///
+/// # Safety
+///
+/// This function is safe to use if the `zkne` target feature is present.
+#[target_feature(enable = "zkne")]
+#[rustc_legacy_const_generics(2)]
+// See #1464
+// #[cfg_attr(test, assert_instr(aes32esmi, BS = 0))]
+#[inline]
+pub unsafe fn aes32esmi<const BS: u8>(rs1: u32, rs2: u32) -> u32 {
+    static_assert!(BS < 4);
+
+    _aes32esmi(rs1 as i32, rs2 as i32, BS as i32) as u32
+}
+
+/// AES final round decryption instruction for RV32.
+///
+/// This instruction sources a single byte from rs2 according to bs. To this it applies the
+/// inverse AES SBox operation, and XOR’s the result with rs1. This instruction must always be
+/// implemented such that its execution latency does not depend on the data being operated on.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.1
+///
+/// # Note
+///
+/// The `BS` parameter is expected to be a constant value and only the bottom 2 bits of `bs` are
+/// used.
+///
+/// # Safety
+///
+/// This function is safe to use if the `zknd` target feature is present.
+#[target_feature(enable = "zknd")]
+#[rustc_legacy_const_generics(2)]
+// See #1464
+// #[cfg_attr(test, assert_instr(aes32dsi, BS = 0))]
+#[inline]
+pub unsafe fn aes32dsi<const BS: u8>(rs1: u32, rs2: u32) -> u32 {
+    static_assert!(BS < 4);
+
+    _aes32dsi(rs1 as i32, rs2 as i32, BS as i32) as u32
+}
+
+/// AES middle round decryption instruction for RV32.
+///
+/// This instruction sources a single byte from rs2 according to bs. To this it applies the
+/// inverse AES SBox operation, and a partial inverse MixColumn, before XOR’ing the result with
+/// rs1. This instruction must always be implemented such that its execution latency does not
+/// depend on the data being operated on.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.2
+///
+/// # Note
+///
+/// The `BS` parameter is expected to be a constant value and only the bottom 2 bits of `bs` are
+/// used.
+///
+/// # Safety
+///
+/// This function is safe to use if the `zknd` target feature is present.
+#[target_feature(enable = "zknd")]
+#[rustc_legacy_const_generics(2)]
+// See #1464
+// #[cfg_attr(test, assert_instr(aes32dsmi, BS = 0))]
+#[inline]
+pub unsafe fn aes32dsmi<const BS: u8>(rs1: u32, rs2: u32) -> u32 {
+    static_assert!(BS < 4);
+
+    _aes32dsmi(rs1 as i32, rs2 as i32, BS as i32) as u32
+}
+
+/// Place upper/lower halves of the source register into odd/even bits of the destination
+/// respectivley.
+///
+/// This instruction places bits in the low half of the source register into the even bit
+/// positions of the destination, and bits in the high half of the source register into the odd
+/// bit positions of the destination. It is the inverse of the unzip instruction. This
+/// instruction is available only on RV32.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.49
+///
+/// # Safety
+///
+/// This function is safe to use if the `zbkb` target feature is present.
+#[target_feature(enable = "zbkb")]
+// See #1464
+// #[cfg_attr(test, assert_instr(zip))]
+#[inline]
+pub unsafe fn zip(rs: u32) -> u32 {
+    _zip(rs as i32) as u32
+}
+
+/// Place odd and even bits of the source word into upper/lower halves of the destination.
+///
+/// This instruction places the even bits of the source register into the low half of the
+/// destination, and the odd bits of the source into the high bits of the destination. It is
+/// the inverse of the zip instruction. This instruction is available only on RV32.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.45
+///
+/// # Safety
+///
+/// This function is safe to use if the `zbkb` target feature is present.
+#[target_feature(enable = "zbkb")]
+#[cfg_attr(test, assert_instr(unzip))]
+#[inline]
+pub unsafe fn unzip(rs: u32) -> u32 {
+    _unzip(rs as i32) as u32
+}
+
+/// Implements the high half of the Sigma0 transformation, as used in the SHA2-512 hash
+/// function \[49\] (Section 4.1.3).
+///
+/// This instruction is implemented on RV32 only. Used to compute the Sigma0 transform of the
+/// SHA2-512 hash function in conjunction with the sha512sig0l instruction. The transform is a
+/// 64-bit to 64-bit function, so the input and output are each represented by two 32-bit
+/// registers. This instruction must always be implemented such that its execution latency does
+/// not depend on the data being operated on.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.31
+///
+/// # Safety
+///
+/// This function is safe to use if the `zknh` target feature is present.
+#[target_feature(enable = "zknh")]
+// See #1464
+// #[cfg_attr(test, assert_instr(sha512sig0h))]
+#[inline]
+pub unsafe fn sha512sig0h(rs1: u32, rs2: u32) -> u32 {
+    _sha512sig0h(rs1 as i32, rs2 as i32) as u32
+}
+
+/// Implements the low half of the Sigma0 transformation, as used in the SHA2-512 hash function
+/// \[49\] (Section 4.1.3).
+///
+/// This instruction is implemented on RV32 only. Used to compute the Sigma0 transform of the
+/// SHA2-512 hash function in conjunction with the sha512sig0h instruction. The transform is a
+/// 64-bit to 64-bit function, so the input and output are each represented by two 32-bit
+/// registers. This instruction must always be implemented such that its execution latency does
+/// not depend on the data being operated on.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.32
+///
+/// # Safety
+///
+/// This function is safe to use if the `zknh` target feature is present.
+#[target_feature(enable = "zknh")]
+// See #1464
+// #[cfg_attr(test, assert_instr(sha512sig0l))]
+#[inline]
+pub unsafe fn sha512sig0l(rs1: u32, rs2: u32) -> u32 {
+    _sha512sig0l(rs1 as i32, rs2 as i32) as u32
+}
+
+/// Implements the high half of the Sigma1 transformation, as used in the SHA2-512 hash
+/// function \[49\] (Section 4.1.3).
+///
+/// This instruction is implemented on RV32 only. Used to compute the Sigma1 transform of the
+/// SHA2-512 hash function in conjunction with the sha512sig1l instruction. The transform is a
+/// 64-bit to 64-bit function, so the input and output are each represented by two 32-bit
+/// registers. This instruction must always be implemented such that its execution latency does
+/// not depend on the data being operated on.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.33
+///
+/// # Safety
+///
+/// This function is safe to use if the `zknh` target feature is present.
+#[target_feature(enable = "zknh")]
+// See #1464
+// #[cfg_attr(test, assert_instr(sha512sig1h))]
+#[inline]
+pub unsafe fn sha512sig1h(rs1: u32, rs2: u32) -> u32 {
+    _sha512sig1h(rs1 as i32, rs2 as i32) as u32
+}
+
+/// Implements the low half of the Sigma1 transformation, as used in the SHA2-512 hash function
+/// \[49\] (Section 4.1.3).
+///
+/// This instruction is implemented on RV32 only. Used to compute the Sigma1 transform of the
+/// SHA2-512 hash function in conjunction with the sha512sig1h instruction. The transform is a
+/// 64-bit to 64-bit function, so the input and output are each represented by two 32-bit
+/// registers. This instruction must always be implemented such that its execution latency does
+/// not depend on the data being operated on.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.34
+///
+/// # Safety
+///
+/// This function is safe to use if the `zknh` target feature is present.
+#[target_feature(enable = "zknh")]
+#[cfg_attr(test, assert_instr(sha512sig1l))]
+#[inline]
+pub unsafe fn sha512sig1l(rs1: u32, rs2: u32) -> u32 {
+    _sha512sig1l(rs1 as i32, rs2 as i32) as u32
+}
+
+/// Implements the Sum0 transformation, as used in the SHA2-512 hash function \[49\] (Section
+/// 4.1.3).
+///
+/// This instruction is implemented on RV32 only. Used to compute the Sum0 transform of the
+/// SHA2-512 hash function. The transform is a 64-bit to 64-bit function, so the input and
+/// output is represented by two 32-bit registers. This instruction must always be implemented
+/// such that its execution latency does not depend on the data being operated on.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.35
+///
+/// # Safety
+///
+/// This function is safe to use if the `zknh` target feature is present.
+#[target_feature(enable = "zknh")]
+// See #1464
+// #[cfg_attr(test, assert_instr(sha512sum0r))]
+#[inline]
+pub unsafe fn sha512sum0r(rs1: u32, rs2: u32) -> u32 {
+    _sha512sum0r(rs1 as i32, rs2 as i32) as u32
+}
+
+/// Implements the Sum1 transformation, as used in the SHA2-512 hash function \[49\] (Section
+/// 4.1.3).
+///
+/// This instruction is implemented on RV32 only. Used to compute the Sum1 transform of the
+/// SHA2-512 hash function. The transform is a 64-bit to 64-bit function, so the input and
+/// output is represented by two 32-bit registers. This instruction must always be implemented
+/// such that its execution latency does not depend on the data being operated on.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.36
+///
+/// # Safety
+///
+/// This function is safe to use if the `zknh` target feature is present.
+#[target_feature(enable = "zknh")]
+// See #1464
+// #[cfg_attr(test, assert_instr(sha512sum1r))]
+#[inline]
+pub unsafe fn sha512sum1r(rs1: u32, rs2: u32) -> u32 {
+    _sha512sum1r(rs1 as i32, rs2 as i32) as u32
+}
diff --git a/crates/core_arch/src/riscv64/mod.rs b/crates/core_arch/src/riscv64/mod.rs
index 751b9a860f..ad16d6c231 100644
--- a/crates/core_arch/src/riscv64/mod.rs
+++ b/crates/core_arch/src/riscv64/mod.rs
@@ -1,6 +1,10 @@
 //! RISC-V RV64 specific intrinsics
 use crate::arch::asm;
 
+mod zk;
+
+pub use zk::*;
+
 /// Loads virtual machine memory by unsigned word integer
 ///
 /// This instruction performs an explicit memory access as though `V=1`;
diff --git a/crates/core_arch/src/riscv64/zk.rs b/crates/core_arch/src/riscv64/zk.rs
new file mode 100644
index 0000000000..3dbe3705db
--- /dev/null
+++ b/crates/core_arch/src/riscv64/zk.rs
@@ -0,0 +1,281 @@
+#[cfg(test)]
+use stdarch_test::assert_instr;
+
+extern "unadjusted" {
+    #[link_name = "llvm.riscv.aes64es"]
+    fn _aes64es(rs1: i64, rs2: i64) -> i64;
+
+    #[link_name = "llvm.riscv.aes64esm"]
+    fn _aes64esm(rs1: i64, rs2: i64) -> i64;
+
+    #[link_name = "llvm.riscv.aes64ds"]
+    fn _aes64ds(rs1: i64, rs2: i64) -> i64;
+
+    #[link_name = "llvm.riscv.aes64dsm"]
+    fn _aes64dsm(rs1: i64, rs2: i64) -> i64;
+
+    #[link_name = "llvm.riscv.aes64ks1i"]
+    fn _aes64ks1i(rs1: i64, rnum: i32) -> i64;
+
+    #[link_name = "llvm.riscv.aes64ks2"]
+    fn _aes64ks2(rs1: i64, rs2: i64) -> i64;
+
+    #[link_name = "llvm.riscv.sha512sig0"]
+    fn _sha512sig0(rs1: i64) -> i64;
+
+    #[link_name = "llvm.riscv.sha512sig1"]
+    fn _sha512sig1(rs1: i64) -> i64;
+
+    #[link_name = "llvm.riscv.sha512sum0"]
+    fn _sha512sum0(rs1: i64) -> i64;
+
+    #[link_name = "llvm.riscv.sha512sum1"]
+    fn _sha512sum1(rs1: i64) -> i64;
+}
+
+/// AES final round encryption instruction for RV64.
+///
+/// Uses the two 64-bit source registers to represent the entire AES state, and produces half
+/// of the next round output, applying the ShiftRows and SubBytes steps. This instruction must
+/// always be implemented such that its execution latency does not depend on the data being
+/// operated on.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.7
+///
+/// # Safety
+///
+/// This function is safe to use if the `zkne` target feature is present.
+#[target_feature(enable = "zkne")]
+// See #1464
+// #[cfg_attr(test, assert_instr(aes64es))]
+#[inline]
+pub unsafe fn aes64es(rs1: u64, rs2: u64) -> u64 {
+    _aes64es(rs1 as i64, rs2 as i64) as u64
+}
+
+/// AES middle round encryption instruction for RV64.
+///
+/// Uses the two 64-bit source registers to represent the entire AES state, and produces half
+/// of the next round output, applying the ShiftRows, SubBytes and MixColumns steps. This
+/// instruction must always be implemented such that its execution latency does not depend on
+/// the data being operated on.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.8
+///
+/// # Safety
+///
+/// This function is safe to use if the `zkne` target feature is present.
+#[target_feature(enable = "zkne")]
+// See #1464
+// #[cfg_attr(test, assert_instr(aes64esm))]
+#[inline]
+pub unsafe fn aes64esm(rs1: u64, rs2: u64) -> u64 {
+    _aes64esm(rs1 as i64, rs2 as i64) as u64
+}
+
+/// AES final round decryption instruction for RV64.
+///
+/// Uses the two 64-bit source registers to represent the entire AES state, and produces half
+/// of the next round output, applying the Inverse ShiftRows and SubBytes steps. This
+/// instruction must always be implemented such that its execution latency does not depend on
+/// the data being operated on.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.5
+///
+/// # Safety
+///
+/// This function is safe to use if the `zknd` target feature is present.
+#[target_feature(enable = "zknd")]
+// See #1464
+// #[cfg_attr(test, assert_instr(aes64ds))]
+#[inline]
+pub unsafe fn aes64ds(rs1: u64, rs2: u64) -> u64 {
+    _aes64ds(rs1 as i64, rs2 as i64) as u64
+}
+
+/// AES middle round decryption instruction for RV64.
+///
+/// Uses the two 64-bit source registers to represent the entire AES state, and produces half
+/// of the next round output, applying the Inverse ShiftRows, SubBytes and MixColumns steps.
+/// This instruction must always be implemented such that its execution latency does not depend
+/// on the data being operated on.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.6
+///
+/// # Safety
+///
+/// This function is safe to use if the `zknd` target feature is present.
+#[target_feature(enable = "zknd")]
+// See #1464
+// #[cfg_attr(test, assert_instr(aes64dsm))]
+#[inline]
+pub unsafe fn aes64dsm(rs1: u64, rs2: u64) -> u64 {
+    _aes64dsm(rs1 as i64, rs2 as i64) as u64
+}
+
+/// This instruction implements part of the KeySchedule operation for the AES Block cipher
+/// involving the SBox operation.
+///
+/// This instruction implements the rotation, SubBytes and Round Constant addition steps of the
+/// AES block cipher Key Schedule. This instruction must always be implemented such that its
+/// execution latency does not depend on the data being operated on. Note that rnum must be in
+/// the range 0x0..0xA. The values 0xB..0xF are reserved.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.10
+///
+/// # Note
+///
+/// The `RNUM` parameter is expected to be a constant value inside the range of `0..=10`.
+///
+/// # Safety
+///
+/// This function is safe to use if the `zkne` or `zknd` target feature is present.
+#[target_feature(enable = "zkne", enable = "zknd")]
+#[rustc_legacy_const_generics(1)]
+// See #1464
+// #[cfg_attr(test, assert_instr(aes64ks1i, RNUM = 0))]
+#[inline]
+pub unsafe fn aes64ks1i<const RNUM: u8>(rs1: u64) -> u64 {
+    static_assert!(RNUM <= 10);
+
+    _aes64ks1i(rs1 as i64, RNUM as i32) as u64
+}
+
+/// This instruction implements part of the KeySchedule operation for the AES Block cipher.
+///
+/// This instruction implements the additional XOR’ing of key words as part of the AES block
+/// cipher Key Schedule. This instruction must always be implemented such that its execution
+/// latency does not depend on the data being operated on.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.11
+///
+/// # Safety
+///
+/// This function is safe to use if the `zkne` or `zknd` target feature is present.
+#[target_feature(enable = "zkne", enable = "zknd")]
+// See #1464
+// #[cfg_attr(test, assert_instr(aes64ks2))]
+#[inline]
+pub unsafe fn aes64ks2(rs1: u64, rs2: u64) -> u64 {
+    _aes64ks2(rs1 as i64, rs2 as i64) as u64
+}
+
+/// Implements the Sigma0 transformation function as used in the SHA2-512 hash function \[49\]
+/// (Section 4.1.3).
+///
+/// This instruction is supported for the RV64 base architecture. It implements the Sigma0
+/// transform of the SHA2-512 hash function. \[49\]. This instruction must always be
+/// implemented such that its execution latency does not depend on the data being operated on.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.37
+///
+/// # Safety
+///
+/// This function is safe to use if the `zknh` target feature is present.
+#[target_feature(enable = "zknh")]
+// See #1464
+// #[cfg_attr(test, assert_instr(sha512sig0))]
+#[inline]
+pub unsafe fn sha512sig0(rs1: u64) -> u64 {
+    _sha512sig0(rs1 as i64) as u64
+}
+
+/// Implements the Sigma1 transformation function as used in the SHA2-512 hash function \[49\]
+/// (Section 4.1.3).
+///
+/// This instruction is supported for the RV64 base architecture. It implements the Sigma1
+/// transform of the SHA2-512 hash function. \[49\]. This instruction must always be
+/// implemented such that its execution latency does not depend on the data being operated on.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.38
+///
+/// # Safety
+///
+/// This function is safe to use if the `zknh` target feature is present.
+#[target_feature(enable = "zknh")]
+// See #1464
+// #[cfg_attr(test, assert_instr(sha512sig1))]
+#[inline]
+pub unsafe fn sha512sig1(rs1: u64) -> u64 {
+    _sha512sig1(rs1 as i64) as u64
+}
+
+/// Implements the Sum0 transformation function as used in the SHA2-512 hash function \[49\]
+/// (Section 4.1.3).
+///
+/// This instruction is supported for the RV64 base architecture. It implements the Sum0
+/// transform of the SHA2-512 hash function. \[49\]. This instruction must always be
+/// implemented such that its execution latency does not depend on the data being operated on.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.39
+///
+/// # Safety
+///
+/// This function is safe to use if the `zknh` target feature is present.
+#[target_feature(enable = "zknh")]
+// See #1464
+// #[cfg_attr(test, assert_instr(sha512sum0))]
+#[inline]
+pub unsafe fn sha512sum0(rs1: u64) -> u64 {
+    _sha512sum0(rs1 as i64) as u64
+}
+
+/// Implements the Sum1 transformation function as used in the SHA2-512 hash function \[49\]
+/// (Section 4.1.3).
+///
+/// This instruction is supported for the RV64 base architecture. It implements the Sum1
+/// transform of the SHA2-512 hash function. \[49\]. This instruction must always be
+/// implemented such that its execution latency does not depend on the data being operated on.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.40
+///
+/// # Safety
+///
+/// This function is safe to use if the `zknh` target feature is present.
+#[target_feature(enable = "zknh")]
+// See #1464
+// #[cfg_attr(test, assert_instr(sha512sum1))]
+#[inline]
+pub unsafe fn sha512sum1(rs1: u64) -> u64 {
+    _sha512sum1(rs1 as i64) as u64
+}
diff --git a/crates/core_arch/src/riscv_shared/mod.rs b/crates/core_arch/src/riscv_shared/mod.rs
index ed021df5a9..d14431ead4 100644
--- a/crates/core_arch/src/riscv_shared/mod.rs
+++ b/crates/core_arch/src/riscv_shared/mod.rs
@@ -1,7 +1,11 @@
 //! Shared RISC-V intrinsics
+
 mod p;
+mod zk;
 
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub use p::*;
+pub use zk::*;
 
 use crate::arch::asm;
 
@@ -10,6 +14,7 @@ use crate::arch::asm;
 /// The PAUSE instruction is a HINT that indicates the current hart's rate of instruction retirement
 /// should be temporarily reduced or paused. The duration of its effect must be bounded and may be zero.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub fn pause() {
     unsafe { asm!(".insn i 0x0F, 0, x0, x0, 0x010", options(nomem, nostack)) }
 }
@@ -19,6 +24,7 @@ pub fn pause() {
 /// The NOP instruction does not change any architecturally visible state, except for
 /// advancing the `pc` and incrementing any applicable performance counters.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub fn nop() {
     unsafe { asm!("nop", options(nomem, nostack)) }
 }
@@ -29,6 +35,7 @@ pub fn nop() {
 /// until an interrupt might need servicing. This instruction is a hint,
 /// and a legal implementation is to simply implement WFI as a NOP.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn wfi() {
     asm!("wfi", options(nomem, nostack))
 }
@@ -41,6 +48,7 @@ pub unsafe fn wfi() {
 /// FENCE.I does not ensure that other RISC-V harts' instruction fetches will observe the
 /// local hart's stores in a multiprocessor system.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn fence_i() {
     asm!("fence.i", options(nostack))
 }
@@ -54,6 +62,7 @@ pub unsafe fn fence_i() {
 /// virtual address in parameter `vaddr` and that match the address space identified by integer
 /// parameter `asid`, except for entries containing global mappings.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn sfence_vma(vaddr: usize, asid: usize) {
     asm!("sfence.vma {}, {}", in(reg) vaddr, in(reg) asid, options(nostack))
 }
@@ -65,6 +74,7 @@ pub unsafe fn sfence_vma(vaddr: usize, asid: usize) {
 /// The fence also invalidates all address-translation cache entries that contain leaf page
 /// table entries corresponding to the virtual address in parameter `vaddr`, for all address spaces.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn sfence_vma_vaddr(vaddr: usize) {
     asm!("sfence.vma {}, x0", in(reg) vaddr, options(nostack))
 }
@@ -78,6 +88,7 @@ pub unsafe fn sfence_vma_vaddr(vaddr: usize) {
 /// address-translation cache entries matching the address space identified by integer
 /// parameter `asid`, except for entries containing global mappings.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn sfence_vma_asid(asid: usize) {
     asm!("sfence.vma x0, {}", in(reg) asid, options(nostack))
 }
@@ -88,6 +99,7 @@ pub unsafe fn sfence_vma_asid(asid: usize) {
 /// tables, for all address spaces. The fence also invalidates all address-translation cache entries,
 /// for all address spaces.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn sfence_vma_all() {
     asm!("sfence.vma", options(nostack))
 }
@@ -97,6 +109,7 @@ pub unsafe fn sfence_vma_all() {
 /// This instruction invalidates any address-translation cache entries that an
 /// `SFENCE.VMA` instruction with the same values of `vaddr` and `asid` would invalidate.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn sinval_vma(vaddr: usize, asid: usize) {
     // asm!("sinval.vma {}, {}", in(reg) vaddr, in(reg) asid, options(nostack))
     asm!(".insn r 0x73, 0, 0x0B, x0, {}, {}", in(reg) vaddr, in(reg) asid, options(nostack))
@@ -107,6 +120,7 @@ pub unsafe fn sinval_vma(vaddr: usize, asid: usize) {
 /// This instruction invalidates any address-translation cache entries that an
 /// `SFENCE.VMA` instruction with the same values of `vaddr` and `asid` would invalidate.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn sinval_vma_vaddr(vaddr: usize) {
     asm!(".insn r 0x73, 0, 0x0B, x0, {}, x0", in(reg) vaddr, options(nostack))
 }
@@ -116,6 +130,7 @@ pub unsafe fn sinval_vma_vaddr(vaddr: usize) {
 /// This instruction invalidates any address-translation cache entries that an
 /// `SFENCE.VMA` instruction with the same values of `vaddr` and `asid` would invalidate.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn sinval_vma_asid(asid: usize) {
     asm!(".insn r 0x73, 0, 0x0B, x0, x0, {}", in(reg) asid, options(nostack))
 }
@@ -125,6 +140,7 @@ pub unsafe fn sinval_vma_asid(asid: usize) {
 /// This instruction invalidates any address-translation cache entries that an
 /// `SFENCE.VMA` instruction with the same values of `vaddr` and `asid` would invalidate.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn sinval_vma_all() {
     asm!(".insn r 0x73, 0, 0x0B, x0, x0, x0", options(nostack))
 }
@@ -134,6 +150,7 @@ pub unsafe fn sinval_vma_all() {
 /// This instruction guarantees that any previous stores already visible to the current RISC-V hart
 /// are ordered before subsequent `SINVAL.VMA` instructions executed by the same hart.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn sfence_w_inval() {
     // asm!("sfence.w.inval", options(nostack))
     asm!(".insn i 0x73, 0, x0, x0, 0x180", options(nostack))
@@ -144,6 +161,7 @@ pub unsafe fn sfence_w_inval() {
 /// This instruction guarantees that any previous SINVAL.VMA instructions executed by the current hart
 /// are ordered before subsequent implicit references by that hart to the memory-management data structures.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn sfence_inval_ir() {
     // asm!("sfence.inval.ir", options(nostack))
     asm!(".insn i 0x73, 0, x0, x0, 0x181", options(nostack))
@@ -158,6 +176,7 @@ pub unsafe fn sfence_inval_ir() {
 /// This function is unsafe for it accesses the virtual supervisor or user via a `HLV.B`
 /// instruction which is effectively a dereference to any memory address.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn hlv_b(src: *const i8) -> i8 {
     let value: i8;
     asm!(".insn i 0x73, 0x4, {}, {}, 0x600", out(reg) value, in(reg) src, options(readonly, nostack));
@@ -173,6 +192,7 @@ pub unsafe fn hlv_b(src: *const i8) -> i8 {
 /// This function is unsafe for it accesses the virtual supervisor or user via a `HLV.BU`
 /// instruction which is effectively a dereference to any memory address.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn hlv_bu(src: *const u8) -> u8 {
     let value: u8;
     asm!(".insn i 0x73, 0x4, {}, {}, 0x601", out(reg) value, in(reg) src, options(readonly, nostack));
@@ -188,6 +208,7 @@ pub unsafe fn hlv_bu(src: *const u8) -> u8 {
 /// This function is unsafe for it accesses the virtual supervisor or user via a `HLV.H`
 /// instruction which is effectively a dereference to any memory address.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn hlv_h(src: *const i16) -> i16 {
     let value: i16;
     asm!(".insn i 0x73, 0x4, {}, {}, 0x640", out(reg) value, in(reg) src, options(readonly, nostack));
@@ -203,6 +224,7 @@ pub unsafe fn hlv_h(src: *const i16) -> i16 {
 /// This function is unsafe for it accesses the virtual supervisor or user via a `HLV.HU`
 /// instruction which is effectively a dereference to any memory address.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn hlv_hu(src: *const u16) -> u16 {
     let value: u16;
     asm!(".insn i 0x73, 0x4, {}, {}, 0x641", out(reg) value, in(reg) src, options(readonly, nostack));
@@ -218,6 +240,7 @@ pub unsafe fn hlv_hu(src: *const u16) -> u16 {
 /// This function is unsafe for it accesses the virtual supervisor or user via a `HLVX.HU`
 /// instruction which is effectively a dereference to any memory address.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn hlvx_hu(src: *const u16) -> u16 {
     let insn: u16;
     asm!(".insn i 0x73, 0x4, {}, {}, 0x643", out(reg) insn, in(reg) src, options(readonly, nostack));
@@ -233,6 +256,7 @@ pub unsafe fn hlvx_hu(src: *const u16) -> u16 {
 /// This function is unsafe for it accesses the virtual supervisor or user via a `HLV.W`
 /// instruction which is effectively a dereference to any memory address.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn hlv_w(src: *const i32) -> i32 {
     let value: i32;
     asm!(".insn i 0x73, 0x4, {}, {}, 0x680", out(reg) value, in(reg) src, options(readonly, nostack));
@@ -248,6 +272,7 @@ pub unsafe fn hlv_w(src: *const i32) -> i32 {
 /// This function is unsafe for it accesses the virtual supervisor or user via a `HLVX.WU`
 /// instruction which is effectively a dereference to any memory address.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn hlvx_wu(src: *const u32) -> u32 {
     let insn: u32;
     asm!(".insn i 0x73, 0x4, {}, {}, 0x683", out(reg) insn, in(reg) src, options(readonly, nostack));
@@ -263,6 +288,7 @@ pub unsafe fn hlvx_wu(src: *const u32) -> u32 {
 /// This function is unsafe for it accesses the virtual supervisor or user via a `HSV.B`
 /// instruction which is effectively a dereference to any memory address.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn hsv_b(dst: *mut i8, src: i8) {
     asm!(".insn r 0x73, 0x4, 0x31, x0, {}, {}", in(reg) dst, in(reg) src, options(nostack));
 }
@@ -276,6 +302,7 @@ pub unsafe fn hsv_b(dst: *mut i8, src: i8) {
 /// This function is unsafe for it accesses the virtual supervisor or user via a `HSV.H`
 /// instruction which is effectively a dereference to any memory address.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn hsv_h(dst: *mut i16, src: i16) {
     asm!(".insn r 0x73, 0x4, 0x33, x0, {}, {}", in(reg) dst, in(reg) src, options(nostack));
 }
@@ -289,6 +316,7 @@ pub unsafe fn hsv_h(dst: *mut i16, src: i16) {
 /// This function is unsafe for it accesses the virtual supervisor or user via a `HSV.W`
 /// instruction which is effectively a dereference to any memory address.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn hsv_w(dst: *mut i32, src: i32) {
     asm!(".insn r 0x73, 0x4, 0x35, x0, {}, {}", in(reg) dst, in(reg) src, options(nostack));
 }
@@ -302,6 +330,7 @@ pub unsafe fn hsv_w(dst: *mut i32, src: i32) {
 ///
 /// This fence specifies a single guest virtual address, and a single guest address-space identifier.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn hfence_vvma(vaddr: usize, asid: usize) {
     // asm!("hfence.vvma {}, {}", in(reg) vaddr, in(reg) asid)
     asm!(".insn r 0x73, 0, 0x11, x0, {}, {}", in(reg) vaddr, in(reg) asid, options(nostack))
@@ -316,6 +345,7 @@ pub unsafe fn hfence_vvma(vaddr: usize, asid: usize) {
 ///
 /// This fence specifies a single guest virtual address.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn hfence_vvma_vaddr(vaddr: usize) {
     asm!(".insn r 0x73, 0, 0x11, x0, {}, x0", in(reg) vaddr, options(nostack))
 }
@@ -329,6 +359,7 @@ pub unsafe fn hfence_vvma_vaddr(vaddr: usize) {
 ///
 /// This fence specifies a single guest address-space identifier.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn hfence_vvma_asid(asid: usize) {
     asm!(".insn r 0x73, 0, 0x11, x0, x0, {}", in(reg) asid, options(nostack))
 }
@@ -342,6 +373,7 @@ pub unsafe fn hfence_vvma_asid(asid: usize) {
 ///
 /// This fence applies to any guest address spaces and guest virtual addresses.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn hfence_vvma_all() {
     asm!(".insn r 0x73, 0, 0x11, x0, x0, x0", options(nostack))
 }
@@ -354,6 +386,7 @@ pub unsafe fn hfence_vvma_all() {
 /// This fence specifies a single guest physical address, **shifted right by 2 bits**, and a single virtual machine
 /// by virtual machine identifier (VMID).
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn hfence_gvma(gaddr: usize, vmid: usize) {
     // asm!("hfence.gvma {}, {}", in(reg) gaddr, in(reg) vmid, options(nostack))
     asm!(".insn r 0x73, 0, 0x31, x0, {}, {}", in(reg) gaddr, in(reg) vmid, options(nostack))
@@ -366,6 +399,7 @@ pub unsafe fn hfence_gvma(gaddr: usize, vmid: usize) {
 ///
 /// This fence specifies a single guest physical address; **the physical address should be shifted right by 2 bits**.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn hfence_gvma_gaddr(gaddr: usize) {
     asm!(".insn r 0x73, 0, 0x31, x0, {}, x0", in(reg) gaddr, options(nostack))
 }
@@ -377,6 +411,7 @@ pub unsafe fn hfence_gvma_gaddr(gaddr: usize) {
 ///
 /// This fence specifies a single virtual machine by virtual machine identifier (VMID).
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn hfence_gvma_vmid(vmid: usize) {
     asm!(".insn r 0x73, 0, 0x31, x0, x0, {}", in(reg) vmid, options(nostack))
 }
@@ -388,6 +423,7 @@ pub unsafe fn hfence_gvma_vmid(vmid: usize) {
 ///
 /// This fence specifies all guest physical addresses and all virtual machines.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn hfence_gvma_all() {
     asm!(".insn r 0x73, 0, 0x31, x0, x0, x0", options(nostack))
 }
@@ -399,6 +435,7 @@ pub unsafe fn hfence_gvma_all() {
 ///
 /// This fence specifies a single guest virtual address, and a single guest address-space identifier.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn hinval_vvma(vaddr: usize, asid: usize) {
     // asm!("hinval.vvma {}, {}", in(reg) vaddr, in(reg) asid, options(nostack))
     asm!(".insn r 0x73, 0, 0x13, x0, {}, {}", in(reg) vaddr, in(reg) asid, options(nostack))
@@ -411,6 +448,7 @@ pub unsafe fn hinval_vvma(vaddr: usize, asid: usize) {
 ///
 /// This fence specifies a single guest virtual address.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn hinval_vvma_vaddr(vaddr: usize) {
     asm!(".insn r 0x73, 0, 0x13, x0, {}, x0", in(reg) vaddr, options(nostack))
 }
@@ -422,6 +460,7 @@ pub unsafe fn hinval_vvma_vaddr(vaddr: usize) {
 ///
 /// This fence specifies a single guest address-space identifier.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn hinval_vvma_asid(asid: usize) {
     asm!(".insn r 0x73, 0, 0x13, x0, x0, {}", in(reg) asid, options(nostack))
 }
@@ -433,6 +472,7 @@ pub unsafe fn hinval_vvma_asid(asid: usize) {
 ///
 /// This fence applies to any guest address spaces and guest virtual addresses.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn hinval_vvma_all() {
     asm!(".insn r 0x73, 0, 0x13, x0, x0, x0", options(nostack))
 }
@@ -445,6 +485,7 @@ pub unsafe fn hinval_vvma_all() {
 /// This fence specifies a single guest physical address, **shifted right by 2 bits**, and a single virtual machine
 /// by virtual machine identifier (VMID).
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn hinval_gvma(gaddr: usize, vmid: usize) {
     // asm!("hinval.gvma {}, {}", in(reg) gaddr, in(reg) vmid, options(nostack))
     asm!(".insn r 0x73, 0, 0x33, x0, {}, {}", in(reg) gaddr, in(reg) vmid, options(nostack))
@@ -457,6 +498,7 @@ pub unsafe fn hinval_gvma(gaddr: usize, vmid: usize) {
 ///
 /// This fence specifies a single guest physical address; **the physical address should be shifted right by 2 bits**.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn hinval_gvma_gaddr(gaddr: usize) {
     asm!(".insn r 0x73, 0, 0x33, x0, {}, x0", in(reg) gaddr, options(nostack))
 }
@@ -468,6 +510,7 @@ pub unsafe fn hinval_gvma_gaddr(gaddr: usize) {
 ///
 /// This fence specifies a single virtual machine by virtual machine identifier (VMID).
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn hinval_gvma_vmid(vmid: usize) {
     asm!(".insn r 0x73, 0, 0x33, x0, x0, {}", in(reg) vmid, options(nostack))
 }
@@ -479,6 +522,7 @@ pub unsafe fn hinval_gvma_vmid(vmid: usize) {
 ///
 /// This fence specifies all guest physical addresses and all virtual machines.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn hinval_gvma_all() {
     asm!(".insn r 0x73, 0, 0x33, x0, x0, x0", options(nostack))
 }
@@ -502,6 +546,7 @@ pub unsafe fn hinval_gvma_all() {
 /// [`frrm`]: fn.frrm.html
 /// [`frflags`]: fn.frflags.html
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub fn frcsr() -> u32 {
     let value: u32;
     unsafe { asm!("frcsr {}", out(reg) value, options(nomem, nostack)) };
@@ -513,6 +558,7 @@ pub fn frcsr() -> u32 {
 /// This function swaps the value in `fcsr` by copying the original value to be returned,
 /// and then writing a new value obtained from input variable `value` into `fcsr`.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub fn fscsr(value: u32) -> u32 {
     let original: u32;
     unsafe { asm!("fscsr {}, {}", out(reg) original, in(reg) value, options(nomem, nostack)) }
@@ -535,6 +581,7 @@ pub fn fscsr(value: u32) -> u32 {
 /// | 110 |     | _Reserved for future use._ |
 /// | 111 | DYN | In Rounding Mode register, _reserved_. |
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub fn frrm() -> u32 {
     let value: u32;
     unsafe { asm!("frrm {}", out(reg) value, options(nomem, nostack)) };
@@ -547,6 +594,7 @@ pub fn frrm() -> u32 {
 /// and then writing a new value obtained from the three least-significant bits of
 /// input variable `value` into `frm`.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub fn fsrm(value: u32) -> u32 {
     let original: u32;
     unsafe { asm!("fsrm {}, {}", out(reg) original, in(reg) value, options(nomem, nostack)) }
@@ -570,6 +618,7 @@ pub fn fsrm(value: u32) -> u32 {
 /// | 1 | UF | Underflow |
 /// | 0 | NX | Inexact |
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub fn frflags() -> u32 {
     let value: u32;
     unsafe { asm!("frflags {}", out(reg) value, options(nomem, nostack)) };
@@ -582,179 +631,9 @@ pub fn frflags() -> u32 {
 /// and then writing a new value obtained from the five least-significant bits of
 /// input variable `value` into `fflags`.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub fn fsflags(value: u32) -> u32 {
     let original: u32;
     unsafe { asm!("fsflags {}, {}", out(reg) original, in(reg) value, options(nomem, nostack)) }
     original
 }
-
-/// `P0` transformation function as is used in the SM3 hash algorithm
-///
-/// This function is included in `Zksh` extension. It's defined as:
-///
-/// ```text
-/// P0(X) = X ⊕ (X ≪ 9) ⊕ (X ≪ 17)
-/// ```
-///
-/// where `⊕` represents 32-bit xor, and `≪ k` represents rotate left by `k` bits.
-///
-/// In the SM3 algorithm, the `P0` transformation is used as `E ← P0(TT2)` when the
-/// compression function `CF` uses the intermediate value `TT2` to calculate
-/// the variable `E` in one iteration for subsequent processes.
-///
-/// According to RISC-V Cryptography Extensions, Volume I, the execution latency of
-/// this instruction must always be independent from the data it operates on.
-#[inline]
-#[target_feature(enable = "zksh")]
-pub fn sm3p0(x: u32) -> u32 {
-    let ans: u32;
-    unsafe { asm!("sm3p0 {}, {}", lateout(reg) ans, in(reg) x, options(pure, nomem, nostack)) };
-    ans
-}
-
-/// `P1` transformation function as is used in the SM3 hash algorithm
-///
-/// This function is included in `Zksh` extension. It's defined as:
-///
-/// ```text
-/// P1(X) = X ⊕ (X ≪ 15) ⊕ (X ≪ 23)
-/// ```
-///
-/// where `⊕` represents 32-bit xor, and `≪ k` represents rotate left by `k` bits.
-///
-/// In the SM3 algorithm, the `P1` transformation is used to expand message,
-/// where expanded word `Wj` can be generated from the previous words.
-/// The whole process can be described as the following pseudocode:
-///
-/// ```text
-/// FOR j=16 TO 67
-///     Wj ← P1(Wj−16 ⊕ Wj−9 ⊕ (Wj−3 ≪ 15)) ⊕ (Wj−13 ≪ 7) ⊕ Wj−6
-/// ENDFOR
-/// ```
-///
-/// According to RISC-V Cryptography Extensions, Volume I, the execution latency of
-/// this instruction must always be independent from the data it operates on.
-#[inline]
-#[target_feature(enable = "zksh")]
-pub fn sm3p1(x: u32) -> u32 {
-    let ans: u32;
-    unsafe { asm!("sm3p1 {}, {}", lateout(reg) ans, in(reg) x, options(pure, nomem, nostack)) };
-    ans
-}
-
-/// Accelerates the round function `F` in the SM4 block cipher algorithm
-///
-/// This instruction is included in extension `Zksed`. It's defined as:
-///
-/// ```text
-/// SM4ED(x, a, BS) = x ⊕ T(ai)
-/// ... where
-/// ai = a.bytes[BS]
-/// T(ai) = L(τ(ai))
-/// bi = τ(ai) = SM4-S-Box(ai)
-/// ci = L(bi) = bi ⊕ (bi ≪ 2) ⊕ (bi ≪ 10) ⊕ (bi ≪ 18) ⊕ (bi ≪ 24)
-/// SM4ED = (ci ≪ (BS * 8)) ⊕ x
-/// ```
-///
-/// where `⊕` represents 32-bit xor, and `≪ k` represents rotate left by `k` bits.
-/// As is defined above, `T` is a combined transformation of non linear S-Box transform `τ`
-/// and linear layer transform `L`.
-///
-/// In the SM4 algorithm, the round function `F` is defined as:
-///
-/// ```text
-/// F(x0, x1, x2, x3, rk) = x0 ⊕ T(x1 ⊕ x2 ⊕ x3 ⊕ rk)
-/// ... where
-/// T(A) = L(τ(A))
-/// B = τ(A) = (SM4-S-Box(a0), SM4-S-Box(a1), SM4-S-Box(a2), SM4-S-Box(a3))
-/// C = L(B) = B ⊕ (B ≪ 2) ⊕ (B ≪ 10) ⊕ (B ≪ 18) ⊕ (B ≪ 24)
-/// ```
-///
-/// It can be implemented by `sm4ed` instruction like:
-///
-/// ```no_run
-/// # #[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))]
-/// # fn round_function(x0: u32, x1: u32, x2: u32, x3: u32, rk: u32) -> u32 {
-/// # #[cfg(target_arch = "riscv32")] use core::arch::riscv32::sm4ed;
-/// # #[cfg(target_arch = "riscv64")] use core::arch::riscv64::sm4ed;
-/// let a = x1 ^ x2 ^ x3 ^ rk;
-/// let c0 = sm4ed::<0>(x0, a);
-/// let c1 = sm4ed::<1>(c0, a); // c1 represents c[0..=1], etc.
-/// let c2 = sm4ed::<2>(c1, a);
-/// let c3 = sm4ed::<3>(c2, a);
-/// return c3; // c3 represents c[0..=3]
-/// # }
-/// ```
-///
-/// According to RISC-V Cryptography Extensions, Volume I, the execution latency of
-/// this instruction must always be independent from the data it operates on.
-#[inline]
-#[target_feature(enable = "zksed")]
-pub fn sm4ed<const BS: u8>(x: u32, a: u32) -> u32 {
-    static_assert!(BS <= 3);
-    let ans: u32;
-    unsafe {
-        asm!("sm4ed {}, {}, {}, {}", lateout(reg) ans, in(reg) x, in(reg) a, const BS, options(pure, nomem, nostack))
-    };
-    ans
-}
-
-/// Accelerates the key schedule operation in the SM4 block cipher algorithm
-///
-/// This instruction is included in extension `Zksed`. It's defined as:
-///
-/// ```text
-/// SM4KS(x, k, BS) = x ⊕ T'(ki)
-/// ... where
-/// ki = k.bytes[BS]
-/// T'(ki) = L'(τ(ki))
-/// bi = τ(ki) = SM4-S-Box(ki)
-/// ci = L'(bi) = bi ⊕ (bi ≪ 13) ⊕ (bi ≪ 23)
-/// SM4KS = (ci ≪ (BS * 8)) ⊕ x
-/// ```
-///
-/// where `⊕` represents 32-bit xor, and `≪ k` represents rotate left by `k` bits.
-/// As is defined above, `T'` is a combined transformation of non linear S-Box transform `τ`
-/// and the replaced linear layer transform `L'`.
-///
-/// In the SM4 algorithm, the key schedule is defined as:
-///
-/// ```text
-/// rk[i] = K[i+4] = K[i] ⊕ T'(K[i+1] ⊕ K[i+2] ⊕ K[i+3] ⊕ CK[i])
-/// ... where
-/// K[0..=3] = MK[0..=3] ⊕ FK[0..=3]
-/// T'(K) = L'(τ(K))
-/// B = τ(K) = (SM4-S-Box(k0), SM4-S-Box(k1), SM4-S-Box(k2), SM4-S-Box(k3))
-/// C = L'(B) = B ⊕ (B ≪ 13) ⊕ (B ≪ 23)
-/// ```
-///
-/// where `MK` represents the input 128-bit encryption key,
-/// constants `FK` and `CK` are fixed system configuration constant values defined by the SM4 algorithm.
-/// Hence, the key schedule operation can be implemented by `sm4ks` instruction like:
-///
-/// ```no_run
-/// # #[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))]
-/// # fn key_schedule(k0: u32, k1: u32, k2: u32, k3: u32, ck_i: u32) -> u32 {
-/// # #[cfg(target_arch = "riscv32")] use core::arch::riscv32::sm4ks;
-/// # #[cfg(target_arch = "riscv64")] use core::arch::riscv64::sm4ks;
-/// let k = k1 ^ k2 ^ k3 ^ ck_i;
-/// let c0 = sm4ks::<0>(k0, k);
-/// let c1 = sm4ks::<1>(c0, k); // c1 represents c[0..=1], etc.
-/// let c2 = sm4ks::<2>(c1, k);
-/// let c3 = sm4ks::<3>(c2, k);
-/// return c3; // c3 represents c[0..=3]
-/// # }
-/// ```
-///
-/// According to RISC-V Cryptography Extensions, Volume I, the execution latency of
-/// this instruction must always be independent from the data it operates on.
-#[inline]
-#[target_feature(enable = "zksed")]
-pub fn sm4ks<const BS: u8>(x: u32, k: u32) -> u32 {
-    static_assert!(BS <= 3);
-    let ans: u32;
-    unsafe {
-        asm!("sm4ks {}, {}, {}, {}", lateout(reg) ans, in(reg) x, in(reg) k, const BS, options(pure, nomem, nostack))
-    };
-    ans
-}
diff --git a/crates/core_arch/src/riscv_shared/zk.rs b/crates/core_arch/src/riscv_shared/zk.rs
new file mode 100644
index 0000000000..db97f72bc4
--- /dev/null
+++ b/crates/core_arch/src/riscv_shared/zk.rs
@@ -0,0 +1,462 @@
+#[cfg(test)]
+use stdarch_test::assert_instr;
+
+extern "unadjusted" {
+    #[link_name = "llvm.riscv.sm4ed"]
+    fn _sm4ed(rs1: i32, rs2: i32, bs: i32) -> i32;
+
+    #[link_name = "llvm.riscv.sm4ks"]
+    fn _sm4ks(rs1: i32, rs2: i32, bs: i32) -> i32;
+
+    #[link_name = "llvm.riscv.sm3p0"]
+    fn _sm3p0(rs1: i32) -> i32;
+
+    #[link_name = "llvm.riscv.sm3p1"]
+    fn _sm3p1(rs1: i32) -> i32;
+
+    #[link_name = "llvm.riscv.sha256sig0"]
+    fn _sha256sig0(rs1: i32) -> i32;
+
+    #[link_name = "llvm.riscv.sha256sig1"]
+    fn _sha256sig1(rs1: i32) -> i32;
+
+    #[link_name = "llvm.riscv.sha256sum0"]
+    fn _sha256sum0(rs1: i32) -> i32;
+
+    #[link_name = "llvm.riscv.sha256sum1"]
+    fn _sha256sum1(rs1: i32) -> i32;
+}
+
+#[cfg(target_arch = "riscv32")]
+extern "unadjusted" {
+    #[link_name = "llvm.riscv.xperm8.i32"]
+    fn _xperm8_32(rs1: i32, rs2: i32) -> i32;
+
+    #[link_name = "llvm.riscv.xperm4.i32"]
+    fn _xperm4_32(rs1: i32, rs2: i32) -> i32;
+}
+
+#[cfg(target_arch = "riscv64")]
+extern "unadjusted" {
+    #[link_name = "llvm.riscv.xperm8.i64"]
+    fn _xperm8_64(rs1: i64, rs2: i64) -> i64;
+
+    #[link_name = "llvm.riscv.xperm4.i64"]
+    fn _xperm4_64(rs1: i64, rs2: i64) -> i64;
+}
+
+/// Byte-wise lookup of indicies into a vector in registers.
+///
+/// The xperm8 instruction operates on bytes. The rs1 register contains a vector of XLEN/8
+/// 8-bit elements. The rs2 register contains a vector of XLEN/8 8-bit indexes. The result is
+/// each element in rs2 replaced by the indexed element in rs1, or zero if the index into rs2
+/// is out of bounds.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.47
+///
+/// # Safety
+///
+/// This function is safe to use if the `zbkx` target feature is present.
+#[target_feature(enable = "zbkx")]
+// See #1464
+// #[cfg_attr(test, assert_instr(xperm8))]
+#[inline]
+pub unsafe fn xperm8(rs1: usize, rs2: usize) -> usize {
+    #[cfg(target_arch = "riscv32")]
+    {
+        _xperm8_32(rs1 as i32, rs2 as i32) as usize
+    }
+
+    #[cfg(target_arch = "riscv64")]
+    {
+        _xperm8_64(rs1 as i64, rs2 as i64) as usize
+    }
+}
+
+/// Nibble-wise lookup of indicies into a vector.
+///
+/// The xperm4 instruction operates on nibbles. The rs1 register contains a vector of XLEN/4
+/// 4-bit elements. The rs2 register contains a vector of XLEN/4 4-bit indexes. The result is
+/// each element in rs2 replaced by the indexed element in rs1, or zero if the index into rs2
+/// is out of bounds.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.48
+///
+/// # Safety
+///
+/// This function is safe to use if the `zbkx` target feature is present.
+#[target_feature(enable = "zbkx")]
+// See #1464
+// #[cfg_attr(test, assert_instr(xperm4))]
+#[inline]
+pub unsafe fn xperm4(rs1: usize, rs2: usize) -> usize {
+    #[cfg(target_arch = "riscv32")]
+    {
+        _xperm4_32(rs1 as i32, rs2 as i32) as usize
+    }
+
+    #[cfg(target_arch = "riscv64")]
+    {
+        _xperm4_64(rs1 as i64, rs2 as i64) as usize
+    }
+}
+
+/// Implements the Sigma0 transformation function as used in the SHA2-256 hash function \[49\]
+/// (Section 4.1.2).
+///
+/// This instruction is supported for both RV32 and RV64 base architectures. For RV32, the
+/// entire XLEN source register is operated on. For RV64, the low 32 bits of the source
+/// register are operated on, and the result sign extended to XLEN bits. Though named for
+/// SHA2-256, the instruction works for both the SHA2-224 and SHA2-256 parameterisations as
+/// described in \[49\]. This instruction must always be implemented such that its execution
+/// latency does not depend on the data being operated on.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.27
+///
+/// # Safety
+///
+/// This function is safe to use if the `zknh` target feature is present.
+#[target_feature(enable = "zknh")]
+// See #1464
+// #[cfg_attr(test, assert_instr(sha256sig0))]
+#[inline]
+pub unsafe fn sha256sig0(rs1: u32) -> u32 {
+    _sha256sig0(rs1 as i32) as u32
+}
+
+/// Implements the Sigma1 transformation function as used in the SHA2-256 hash function \[49\]
+/// (Section 4.1.2).
+///
+/// This instruction is supported for both RV32 and RV64 base architectures. For RV32, the
+/// entire XLEN source register is operated on. For RV64, the low 32 bits of the source
+/// register are operated on, and the result sign extended to XLEN bits. Though named for
+/// SHA2-256, the instruction works for both the SHA2-224 and SHA2-256 parameterisations as
+/// described in \[49\]. This instruction must always be implemented such that its execution
+/// latency does not depend on the data being operated on.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.28
+///
+/// # Safety
+///
+/// This function is safe to use if the `zknh` target feature is present.
+#[target_feature(enable = "zknh")]
+// See #1464
+// #[cfg_attr(test, assert_instr(sha256sig1))]
+#[inline]
+pub unsafe fn sha256sig1(rs1: u32) -> u32 {
+    _sha256sig1(rs1 as i32) as u32
+}
+
+/// Implements the Sum0 transformation function as used in the SHA2-256 hash function \[49\]
+/// (Section 4.1.2).
+///
+/// This instruction is supported for both RV32 and RV64 base architectures. For RV32, the
+/// entire XLEN source register is operated on. For RV64, the low 32 bits of the source
+/// register are operated on, and the result sign extended to XLEN bits. Though named for
+/// SHA2-256, the instruction works for both the SHA2-224 and SHA2-256 parameterisations as
+/// described in \[49\]. This instruction must always be implemented such that its execution
+/// latency does not depend on the data being operated on.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.29
+///
+/// # Safety
+///
+/// This function is safe to use if the `zknh` target feature is present.
+#[target_feature(enable = "zknh")]
+// See #1464
+// #[cfg_attr(test, assert_instr(sha256sum0))]
+#[inline]
+pub unsafe fn sha256sum0(rs1: u32) -> u32 {
+    _sha256sum0(rs1 as i32) as u32
+}
+
+/// Implements the Sum1 transformation function as used in the SHA2-256 hash function \[49\]
+/// (Section 4.1.2).
+///
+/// This instruction is supported for both RV32 and RV64 base architectures. For RV32, the
+/// entire XLEN source register is operated on. For RV64, the low 32 bits of the source
+/// register are operated on, and the result sign extended to XLEN bits. Though named for
+/// SHA2-256, the instruction works for both the SHA2-224 and SHA2-256 parameterisations as
+/// described in \[49\]. This instruction must always be implemented such that its execution
+/// latency does not depend on the data being operated on.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.30
+///
+/// # Safety
+///
+/// This function is safe to use if the `zknh` target feature is present.
+#[target_feature(enable = "zknh")]
+// See #1464
+// #[cfg_attr(test, assert_instr(sha256sum1))]
+#[inline]
+pub unsafe fn sha256sum1(rs1: u32) -> u32 {
+    _sha256sum1(rs1 as i32) as u32
+}
+
+/// Accelerates the block encrypt/decrypt operation of the SM4 block cipher \[5, 31\].
+///
+/// Implements a T-tables in hardware style approach to accelerating the SM4 round function. A
+/// byte is extracted from rs2 based on bs, to which the SBox and linear layer transforms are
+/// applied, before the result is XOR’d with rs1 and written back to rd. This instruction
+/// exists on RV32 and RV64 base architectures. On RV64, the 32-bit result is sign extended to
+/// XLEN bits. This instruction must always be implemented such that its execution latency does
+/// not depend on the data being operated on.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.43
+///
+/// # Note
+///
+/// The `BS` parameter is expected to be a constant value and only the bottom 2 bits of `bs` are
+/// used.
+///
+/// # Safety
+///
+/// This function is safe to use if the `zksed` target feature is present.
+///
+/// # Details
+///
+/// Accelerates the round function `F` in the SM4 block cipher algorithm
+///
+/// This instruction is included in extension `Zksed`. It's defined as:
+///
+/// ```text
+/// SM4ED(x, a, BS) = x ⊕ T(ai)
+/// ... where
+/// ai = a.bytes[BS]
+/// T(ai) = L(τ(ai))
+/// bi = τ(ai) = SM4-S-Box(ai)
+/// ci = L(bi) = bi ⊕ (bi ≪ 2) ⊕ (bi ≪ 10) ⊕ (bi ≪ 18) ⊕ (bi ≪ 24)
+/// SM4ED = (ci ≪ (BS * 8)) ⊕ x
+/// ```
+///
+/// where `⊕` represents 32-bit xor, and `≪ k` represents rotate left by `k` bits.
+/// As is defined above, `T` is a combined transformation of non linear S-Box transform `τ`
+/// and linear layer transform `L`.
+///
+/// In the SM4 algorithm, the round function `F` is defined as:
+///
+/// ```text
+/// F(x0, x1, x2, x3, rk) = x0 ⊕ T(x1 ⊕ x2 ⊕ x3 ⊕ rk)
+/// ... where
+/// T(A) = L(τ(A))
+/// B = τ(A) = (SM4-S-Box(a0), SM4-S-Box(a1), SM4-S-Box(a2), SM4-S-Box(a3))
+/// C = L(B) = B ⊕ (B ≪ 2) ⊕ (B ≪ 10) ⊕ (B ≪ 18) ⊕ (B ≪ 24)
+/// ```
+///
+/// It can be implemented by `sm4ed` instruction like:
+///
+/// ```no_run
+/// # #[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))]
+/// # fn round_function(x0: u32, x1: u32, x2: u32, x3: u32, rk: u32) -> u32 {
+/// # #[cfg(target_arch = "riscv32")] use core::arch::riscv32::sm4ed;
+/// # #[cfg(target_arch = "riscv64")] use core::arch::riscv64::sm4ed;
+/// let a = x1 ^ x2 ^ x3 ^ rk;
+/// let c0 = sm4ed(x0, a, 0);
+/// let c1 = sm4ed(c0, a, 1); // c1 represents c[0..=1], etc.
+/// let c2 = sm4ed(c1, a, 2);
+/// let c3 = sm4ed(c2, a, 3);
+/// return c3; // c3 represents c[0..=3]
+/// # }
+/// ```
+#[target_feature(enable = "zksed")]
+#[rustc_legacy_const_generics(2)]
+// See #1464
+// #[cfg_attr(test, assert_instr(sm4ed, BS = 0))]
+#[inline]
+pub unsafe fn sm4ed<const BS: u8>(rs1: u32, rs2: u32) -> u32 {
+    static_assert!(BS < 4);
+
+    _sm4ed(rs1 as i32, rs2 as i32, BS as i32) as u32
+}
+
+/// Accelerates the Key Schedule operation of the SM4 block cipher \[5, 31\] with `bs=0`.
+///
+/// Implements a T-tables in hardware style approach to accelerating the SM4 Key Schedule. A
+/// byte is extracted from rs2 based on bs, to which the SBox and linear layer transforms are
+/// applied, before the result is XOR’d with rs1 and written back to rd. This instruction
+/// exists on RV32 and RV64 base architectures. On RV64, the 32-bit result is sign extended to
+/// XLEN bits. This instruction must always be implemented such that its execution latency does
+/// not depend on the data being operated on.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.44
+///
+/// # Note
+///
+/// The `BS` parameter is expected to be a constant value and only the bottom 2 bits of `bs` are
+/// used.
+///
+/// # Safety
+///
+/// This function is safe to use if the `zksed` target feature is present.
+///
+/// # Details
+///
+/// Accelerates the round function `F` in the SM4 block cipher algorithm
+///
+/// This instruction is included in extension `Zksed`. It's defined as:
+///
+/// ```text
+/// SM4ED(x, a, BS) = x ⊕ T(ai)
+/// ... where
+/// ai = a.bytes[BS]
+/// T(ai) = L(τ(ai))
+/// bi = τ(ai) = SM4-S-Box(ai)
+/// ci = L(bi) = bi ⊕ (bi ≪ 2) ⊕ (bi ≪ 10) ⊕ (bi ≪ 18) ⊕ (bi ≪ 24)
+/// SM4ED = (ci ≪ (BS * 8)) ⊕ x
+/// ```
+///
+/// where `⊕` represents 32-bit xor, and `≪ k` represents rotate left by `k` bits.
+/// As is defined above, `T` is a combined transformation of non linear S-Box transform `τ`
+/// and linear layer transform `L`.
+///
+/// In the SM4 algorithm, the round function `F` is defined as:
+///
+/// ```text
+/// F(x0, x1, x2, x3, rk) = x0 ⊕ T(x1 ⊕ x2 ⊕ x3 ⊕ rk)
+/// ... where
+/// T(A) = L(τ(A))
+/// B = τ(A) = (SM4-S-Box(a0), SM4-S-Box(a1), SM4-S-Box(a2), SM4-S-Box(a3))
+/// C = L(B) = B ⊕ (B ≪ 2) ⊕ (B ≪ 10) ⊕ (B ≪ 18) ⊕ (B ≪ 24)
+/// ```
+///
+/// It can be implemented by `sm4ed` instruction like:
+///
+/// ```no_run
+/// # #[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))]
+/// # fn round_function(x0: u32, x1: u32, x2: u32, x3: u32, rk: u32) -> u32 {
+/// # #[cfg(target_arch = "riscv32")] use core::arch::riscv32::sm4ed;
+/// # #[cfg(target_arch = "riscv64")] use core::arch::riscv64::sm4ed;
+/// let a = x1 ^ x2 ^ x3 ^ rk;
+/// let c0 = sm4ed(x0, a, 0);
+/// let c1 = sm4ed(c0, a, 1); // c1 represents c[0..=1], etc.
+/// let c2 = sm4ed(c1, a, 2);
+/// let c3 = sm4ed(c2, a, 3);
+/// return c3; // c3 represents c[0..=3]
+/// # }
+/// ```
+#[target_feature(enable = "zksed")]
+#[rustc_legacy_const_generics(2)]
+// See #1464
+// #[cfg_attr(test, assert_instr(sm4ks, BS = 0))]
+#[inline]
+pub unsafe fn sm4ks<const BS: u8>(rs1: u32, rs2: u32) -> u32 {
+    static_assert!(BS < 4);
+
+    _sm4ks(rs1 as i32, rs2 as i32, BS as i32) as u32
+}
+
+/// Implements the P0 transformation function as used in the SM3 hash function [4, 30].
+///
+/// This instruction is supported for the RV32 and RV64 base architectures. It implements the
+/// P0 transform of the SM3 hash function [4, 30]. This instruction must always be implemented
+/// such that its execution latency does not depend on the data being operated on.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.41
+///
+/// # Safety
+///
+/// This function is safe to use if the `zksh` target feature is present.
+///
+/// # Details
+///
+/// `P0` transformation function as is used in the SM3 hash algorithm
+///
+/// This function is included in `Zksh` extension. It's defined as:
+///
+/// ```text
+/// P0(X) = X ⊕ (X ≪ 9) ⊕ (X ≪ 17)
+/// ```
+///
+/// where `⊕` represents 32-bit xor, and `≪ k` represents rotate left by `k` bits.
+///
+/// In the SM3 algorithm, the `P0` transformation is used as `E ← P0(TT2)` when the
+/// compression function `CF` uses the intermediate value `TT2` to calculate
+/// the variable `E` in one iteration for subsequent processes.
+#[target_feature(enable = "zksh")]
+// See #1464
+// #[cfg_attr(test, assert_instr(sm3p0))]
+#[inline]
+pub unsafe fn sm3p0(rs1: u32) -> u32 {
+    _sm3p0(rs1 as i32) as u32
+}
+
+/// Implements the P1 transformation function as used in the SM3 hash function [4, 30].
+///
+/// This instruction is supported for the RV32 and RV64 base architectures. It implements the
+/// P1 transform of the SM3 hash function [4, 30]. This instruction must always be implemented
+/// such that its execution latency does not depend on the data being operated on.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.42
+///
+/// # Safety
+///
+/// This function is safe to use if the `zksh` target feature is present.
+///
+/// # Details
+///
+/// `P1` transformation function as is used in the SM3 hash algorithm
+///
+/// This function is included in `Zksh` extension. It's defined as:
+///
+/// ```text
+/// P1(X) = X ⊕ (X ≪ 15) ⊕ (X ≪ 23)
+/// ```
+///
+/// where `⊕` represents 32-bit xor, and `≪ k` represents rotate left by `k` bits.
+///
+/// In the SM3 algorithm, the `P1` transformation is used to expand message,
+/// where expanded word `Wj` can be generated from the previous words.
+/// The whole process can be described as the following pseudocode:
+///
+/// ```text
+/// FOR j=16 TO 67
+///     Wj ← P1(Wj−16 ⊕ Wj−9 ⊕ (Wj−3 ≪ 15)) ⊕ (Wj−13 ≪ 7) ⊕ Wj−6
+/// ENDFOR
+/// ```
+#[target_feature(enable = "zksh")]
+// See #1464
+// #[cfg_attr(test, assert_instr(sm3p1))]
+#[inline]
+pub unsafe fn sm3p1(rs1: u32) -> u32 {
+    _sm3p1(rs1 as i32) as u32
+}