From e5df944ced2fd112e7232232505f85ee4cb48b76 Mon Sep 17 00:00:00 2001
From: Gijs Burghoorn <g.burghoorn@gmail.com>
Date: Sun, 6 Aug 2023 15:40:41 +0200
Subject: [PATCH 01/12] Depend on `riscv_ext_intrinsics` feature.

---
 crates/core_arch/src/mod.rs              |  4 +--
 crates/core_arch/src/riscv_shared/mod.rs | 46 ++++++++++++++++++++++++
 2 files changed, 48 insertions(+), 2 deletions(-)

diff --git a/crates/core_arch/src/mod.rs b/crates/core_arch/src/mod.rs
index 12a5b086c6..f2cf11e479 100644
--- a/crates/core_arch/src/mod.rs
+++ b/crates/core_arch/src/mod.rs
@@ -64,7 +64,7 @@ pub mod arch {
     /// See the [module documentation](../index.html) for more details.
     #[cfg(any(target_arch = "riscv32", doc))]
     #[doc(cfg(any(target_arch = "riscv32")))]
-    #[unstable(feature = "stdsimd", issue = "27731")]
+    #[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
     pub mod riscv32 {
         pub use crate::core_arch::riscv_shared::*;
     }
@@ -74,7 +74,7 @@ pub mod arch {
     /// See the [module documentation](../index.html) for more details.
     #[cfg(any(target_arch = "riscv64", doc))]
     #[doc(cfg(any(target_arch = "riscv64")))]
-    #[unstable(feature = "stdsimd", issue = "27731")]
+    #[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
     pub mod riscv64 {
         pub use crate::core_arch::riscv64::*;
         // RISC-V RV64 supports all RV32 instructions as well in current specifications (2022-01-05).
diff --git a/crates/core_arch/src/riscv_shared/mod.rs b/crates/core_arch/src/riscv_shared/mod.rs
index ed021df5a9..b796ee0e77 100644
--- a/crates/core_arch/src/riscv_shared/mod.rs
+++ b/crates/core_arch/src/riscv_shared/mod.rs
@@ -1,6 +1,7 @@
 //! Shared RISC-V intrinsics
 mod p;
 
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub use p::*;
 
 use crate::arch::asm;
@@ -10,6 +11,7 @@ use crate::arch::asm;
 /// The PAUSE instruction is a HINT that indicates the current hart's rate of instruction retirement
 /// should be temporarily reduced or paused. The duration of its effect must be bounded and may be zero.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub fn pause() {
     unsafe { asm!(".insn i 0x0F, 0, x0, x0, 0x010", options(nomem, nostack)) }
 }
@@ -19,6 +21,7 @@ pub fn pause() {
 /// The NOP instruction does not change any architecturally visible state, except for
 /// advancing the `pc` and incrementing any applicable performance counters.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub fn nop() {
     unsafe { asm!("nop", options(nomem, nostack)) }
 }
@@ -29,6 +32,7 @@ pub fn nop() {
 /// until an interrupt might need servicing. This instruction is a hint,
 /// and a legal implementation is to simply implement WFI as a NOP.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn wfi() {
     asm!("wfi", options(nomem, nostack))
 }
@@ -41,6 +45,7 @@ pub unsafe fn wfi() {
 /// FENCE.I does not ensure that other RISC-V harts' instruction fetches will observe the
 /// local hart's stores in a multiprocessor system.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn fence_i() {
     asm!("fence.i", options(nostack))
 }
@@ -54,6 +59,7 @@ pub unsafe fn fence_i() {
 /// virtual address in parameter `vaddr` and that match the address space identified by integer
 /// parameter `asid`, except for entries containing global mappings.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn sfence_vma(vaddr: usize, asid: usize) {
     asm!("sfence.vma {}, {}", in(reg) vaddr, in(reg) asid, options(nostack))
 }
@@ -65,6 +71,7 @@ pub unsafe fn sfence_vma(vaddr: usize, asid: usize) {
 /// The fence also invalidates all address-translation cache entries that contain leaf page
 /// table entries corresponding to the virtual address in parameter `vaddr`, for all address spaces.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn sfence_vma_vaddr(vaddr: usize) {
     asm!("sfence.vma {}, x0", in(reg) vaddr, options(nostack))
 }
@@ -78,6 +85,7 @@ pub unsafe fn sfence_vma_vaddr(vaddr: usize) {
 /// address-translation cache entries matching the address space identified by integer
 /// parameter `asid`, except for entries containing global mappings.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn sfence_vma_asid(asid: usize) {
     asm!("sfence.vma x0, {}", in(reg) asid, options(nostack))
 }
@@ -88,6 +96,7 @@ pub unsafe fn sfence_vma_asid(asid: usize) {
 /// tables, for all address spaces. The fence also invalidates all address-translation cache entries,
 /// for all address spaces.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn sfence_vma_all() {
     asm!("sfence.vma", options(nostack))
 }
@@ -97,6 +106,7 @@ pub unsafe fn sfence_vma_all() {
 /// This instruction invalidates any address-translation cache entries that an
 /// `SFENCE.VMA` instruction with the same values of `vaddr` and `asid` would invalidate.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn sinval_vma(vaddr: usize, asid: usize) {
     // asm!("sinval.vma {}, {}", in(reg) vaddr, in(reg) asid, options(nostack))
     asm!(".insn r 0x73, 0, 0x0B, x0, {}, {}", in(reg) vaddr, in(reg) asid, options(nostack))
@@ -107,6 +117,7 @@ pub unsafe fn sinval_vma(vaddr: usize, asid: usize) {
 /// This instruction invalidates any address-translation cache entries that an
 /// `SFENCE.VMA` instruction with the same values of `vaddr` and `asid` would invalidate.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn sinval_vma_vaddr(vaddr: usize) {
     asm!(".insn r 0x73, 0, 0x0B, x0, {}, x0", in(reg) vaddr, options(nostack))
 }
@@ -116,6 +127,7 @@ pub unsafe fn sinval_vma_vaddr(vaddr: usize) {
 /// This instruction invalidates any address-translation cache entries that an
 /// `SFENCE.VMA` instruction with the same values of `vaddr` and `asid` would invalidate.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn sinval_vma_asid(asid: usize) {
     asm!(".insn r 0x73, 0, 0x0B, x0, x0, {}", in(reg) asid, options(nostack))
 }
@@ -125,6 +137,7 @@ pub unsafe fn sinval_vma_asid(asid: usize) {
 /// This instruction invalidates any address-translation cache entries that an
 /// `SFENCE.VMA` instruction with the same values of `vaddr` and `asid` would invalidate.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn sinval_vma_all() {
     asm!(".insn r 0x73, 0, 0x0B, x0, x0, x0", options(nostack))
 }
@@ -134,6 +147,7 @@ pub unsafe fn sinval_vma_all() {
 /// This instruction guarantees that any previous stores already visible to the current RISC-V hart
 /// are ordered before subsequent `SINVAL.VMA` instructions executed by the same hart.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn sfence_w_inval() {
     // asm!("sfence.w.inval", options(nostack))
     asm!(".insn i 0x73, 0, x0, x0, 0x180", options(nostack))
@@ -144,6 +158,7 @@ pub unsafe fn sfence_w_inval() {
 /// This instruction guarantees that any previous SINVAL.VMA instructions executed by the current hart
 /// are ordered before subsequent implicit references by that hart to the memory-management data structures.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn sfence_inval_ir() {
     // asm!("sfence.inval.ir", options(nostack))
     asm!(".insn i 0x73, 0, x0, x0, 0x181", options(nostack))
@@ -158,6 +173,7 @@ pub unsafe fn sfence_inval_ir() {
 /// This function is unsafe for it accesses the virtual supervisor or user via a `HLV.B`
 /// instruction which is effectively a dereference to any memory address.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn hlv_b(src: *const i8) -> i8 {
     let value: i8;
     asm!(".insn i 0x73, 0x4, {}, {}, 0x600", out(reg) value, in(reg) src, options(readonly, nostack));
@@ -173,6 +189,7 @@ pub unsafe fn hlv_b(src: *const i8) -> i8 {
 /// This function is unsafe for it accesses the virtual supervisor or user via a `HLV.BU`
 /// instruction which is effectively a dereference to any memory address.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn hlv_bu(src: *const u8) -> u8 {
     let value: u8;
     asm!(".insn i 0x73, 0x4, {}, {}, 0x601", out(reg) value, in(reg) src, options(readonly, nostack));
@@ -188,6 +205,7 @@ pub unsafe fn hlv_bu(src: *const u8) -> u8 {
 /// This function is unsafe for it accesses the virtual supervisor or user via a `HLV.H`
 /// instruction which is effectively a dereference to any memory address.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn hlv_h(src: *const i16) -> i16 {
     let value: i16;
     asm!(".insn i 0x73, 0x4, {}, {}, 0x640", out(reg) value, in(reg) src, options(readonly, nostack));
@@ -203,6 +221,7 @@ pub unsafe fn hlv_h(src: *const i16) -> i16 {
 /// This function is unsafe for it accesses the virtual supervisor or user via a `HLV.HU`
 /// instruction which is effectively a dereference to any memory address.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn hlv_hu(src: *const u16) -> u16 {
     let value: u16;
     asm!(".insn i 0x73, 0x4, {}, {}, 0x641", out(reg) value, in(reg) src, options(readonly, nostack));
@@ -218,6 +237,7 @@ pub unsafe fn hlv_hu(src: *const u16) -> u16 {
 /// This function is unsafe for it accesses the virtual supervisor or user via a `HLVX.HU`
 /// instruction which is effectively a dereference to any memory address.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn hlvx_hu(src: *const u16) -> u16 {
     let insn: u16;
     asm!(".insn i 0x73, 0x4, {}, {}, 0x643", out(reg) insn, in(reg) src, options(readonly, nostack));
@@ -233,6 +253,7 @@ pub unsafe fn hlvx_hu(src: *const u16) -> u16 {
 /// This function is unsafe for it accesses the virtual supervisor or user via a `HLV.W`
 /// instruction which is effectively a dereference to any memory address.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn hlv_w(src: *const i32) -> i32 {
     let value: i32;
     asm!(".insn i 0x73, 0x4, {}, {}, 0x680", out(reg) value, in(reg) src, options(readonly, nostack));
@@ -248,6 +269,7 @@ pub unsafe fn hlv_w(src: *const i32) -> i32 {
 /// This function is unsafe for it accesses the virtual supervisor or user via a `HLVX.WU`
 /// instruction which is effectively a dereference to any memory address.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn hlvx_wu(src: *const u32) -> u32 {
     let insn: u32;
     asm!(".insn i 0x73, 0x4, {}, {}, 0x683", out(reg) insn, in(reg) src, options(readonly, nostack));
@@ -263,6 +285,7 @@ pub unsafe fn hlvx_wu(src: *const u32) -> u32 {
 /// This function is unsafe for it accesses the virtual supervisor or user via a `HSV.B`
 /// instruction which is effectively a dereference to any memory address.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn hsv_b(dst: *mut i8, src: i8) {
     asm!(".insn r 0x73, 0x4, 0x31, x0, {}, {}", in(reg) dst, in(reg) src, options(nostack));
 }
@@ -276,6 +299,7 @@ pub unsafe fn hsv_b(dst: *mut i8, src: i8) {
 /// This function is unsafe for it accesses the virtual supervisor or user via a `HSV.H`
 /// instruction which is effectively a dereference to any memory address.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn hsv_h(dst: *mut i16, src: i16) {
     asm!(".insn r 0x73, 0x4, 0x33, x0, {}, {}", in(reg) dst, in(reg) src, options(nostack));
 }
@@ -289,6 +313,7 @@ pub unsafe fn hsv_h(dst: *mut i16, src: i16) {
 /// This function is unsafe for it accesses the virtual supervisor or user via a `HSV.W`
 /// instruction which is effectively a dereference to any memory address.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn hsv_w(dst: *mut i32, src: i32) {
     asm!(".insn r 0x73, 0x4, 0x35, x0, {}, {}", in(reg) dst, in(reg) src, options(nostack));
 }
@@ -302,6 +327,7 @@ pub unsafe fn hsv_w(dst: *mut i32, src: i32) {
 ///
 /// This fence specifies a single guest virtual address, and a single guest address-space identifier.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn hfence_vvma(vaddr: usize, asid: usize) {
     // asm!("hfence.vvma {}, {}", in(reg) vaddr, in(reg) asid)
     asm!(".insn r 0x73, 0, 0x11, x0, {}, {}", in(reg) vaddr, in(reg) asid, options(nostack))
@@ -316,6 +342,7 @@ pub unsafe fn hfence_vvma(vaddr: usize, asid: usize) {
 ///
 /// This fence specifies a single guest virtual address.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn hfence_vvma_vaddr(vaddr: usize) {
     asm!(".insn r 0x73, 0, 0x11, x0, {}, x0", in(reg) vaddr, options(nostack))
 }
@@ -329,6 +356,7 @@ pub unsafe fn hfence_vvma_vaddr(vaddr: usize) {
 ///
 /// This fence specifies a single guest address-space identifier.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn hfence_vvma_asid(asid: usize) {
     asm!(".insn r 0x73, 0, 0x11, x0, x0, {}", in(reg) asid, options(nostack))
 }
@@ -342,6 +370,7 @@ pub unsafe fn hfence_vvma_asid(asid: usize) {
 ///
 /// This fence applies to any guest address spaces and guest virtual addresses.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn hfence_vvma_all() {
     asm!(".insn r 0x73, 0, 0x11, x0, x0, x0", options(nostack))
 }
@@ -354,6 +383,7 @@ pub unsafe fn hfence_vvma_all() {
 /// This fence specifies a single guest physical address, **shifted right by 2 bits**, and a single virtual machine
 /// by virtual machine identifier (VMID).
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn hfence_gvma(gaddr: usize, vmid: usize) {
     // asm!("hfence.gvma {}, {}", in(reg) gaddr, in(reg) vmid, options(nostack))
     asm!(".insn r 0x73, 0, 0x31, x0, {}, {}", in(reg) gaddr, in(reg) vmid, options(nostack))
@@ -366,6 +396,7 @@ pub unsafe fn hfence_gvma(gaddr: usize, vmid: usize) {
 ///
 /// This fence specifies a single guest physical address; **the physical address should be shifted right by 2 bits**.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn hfence_gvma_gaddr(gaddr: usize) {
     asm!(".insn r 0x73, 0, 0x31, x0, {}, x0", in(reg) gaddr, options(nostack))
 }
@@ -377,6 +408,7 @@ pub unsafe fn hfence_gvma_gaddr(gaddr: usize) {
 ///
 /// This fence specifies a single virtual machine by virtual machine identifier (VMID).
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn hfence_gvma_vmid(vmid: usize) {
     asm!(".insn r 0x73, 0, 0x31, x0, x0, {}", in(reg) vmid, options(nostack))
 }
@@ -388,6 +420,7 @@ pub unsafe fn hfence_gvma_vmid(vmid: usize) {
 ///
 /// This fence specifies all guest physical addresses and all virtual machines.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn hfence_gvma_all() {
     asm!(".insn r 0x73, 0, 0x31, x0, x0, x0", options(nostack))
 }
@@ -399,6 +432,7 @@ pub unsafe fn hfence_gvma_all() {
 ///
 /// This fence specifies a single guest virtual address, and a single guest address-space identifier.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn hinval_vvma(vaddr: usize, asid: usize) {
     // asm!("hinval.vvma {}, {}", in(reg) vaddr, in(reg) asid, options(nostack))
     asm!(".insn r 0x73, 0, 0x13, x0, {}, {}", in(reg) vaddr, in(reg) asid, options(nostack))
@@ -411,6 +445,7 @@ pub unsafe fn hinval_vvma(vaddr: usize, asid: usize) {
 ///
 /// This fence specifies a single guest virtual address.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn hinval_vvma_vaddr(vaddr: usize) {
     asm!(".insn r 0x73, 0, 0x13, x0, {}, x0", in(reg) vaddr, options(nostack))
 }
@@ -422,6 +457,7 @@ pub unsafe fn hinval_vvma_vaddr(vaddr: usize) {
 ///
 /// This fence specifies a single guest address-space identifier.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn hinval_vvma_asid(asid: usize) {
     asm!(".insn r 0x73, 0, 0x13, x0, x0, {}", in(reg) asid, options(nostack))
 }
@@ -433,6 +469,7 @@ pub unsafe fn hinval_vvma_asid(asid: usize) {
 ///
 /// This fence applies to any guest address spaces and guest virtual addresses.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn hinval_vvma_all() {
     asm!(".insn r 0x73, 0, 0x13, x0, x0, x0", options(nostack))
 }
@@ -445,6 +482,7 @@ pub unsafe fn hinval_vvma_all() {
 /// This fence specifies a single guest physical address, **shifted right by 2 bits**, and a single virtual machine
 /// by virtual machine identifier (VMID).
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn hinval_gvma(gaddr: usize, vmid: usize) {
     // asm!("hinval.gvma {}, {}", in(reg) gaddr, in(reg) vmid, options(nostack))
     asm!(".insn r 0x73, 0, 0x33, x0, {}, {}", in(reg) gaddr, in(reg) vmid, options(nostack))
@@ -457,6 +495,7 @@ pub unsafe fn hinval_gvma(gaddr: usize, vmid: usize) {
 ///
 /// This fence specifies a single guest physical address; **the physical address should be shifted right by 2 bits**.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn hinval_gvma_gaddr(gaddr: usize) {
     asm!(".insn r 0x73, 0, 0x33, x0, {}, x0", in(reg) gaddr, options(nostack))
 }
@@ -468,6 +507,7 @@ pub unsafe fn hinval_gvma_gaddr(gaddr: usize) {
 ///
 /// This fence specifies a single virtual machine by virtual machine identifier (VMID).
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn hinval_gvma_vmid(vmid: usize) {
     asm!(".insn r 0x73, 0, 0x33, x0, x0, {}", in(reg) vmid, options(nostack))
 }
@@ -479,6 +519,7 @@ pub unsafe fn hinval_gvma_vmid(vmid: usize) {
 ///
 /// This fence specifies all guest physical addresses and all virtual machines.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub unsafe fn hinval_gvma_all() {
     asm!(".insn r 0x73, 0, 0x33, x0, x0, x0", options(nostack))
 }
@@ -502,6 +543,7 @@ pub unsafe fn hinval_gvma_all() {
 /// [`frrm`]: fn.frrm.html
 /// [`frflags`]: fn.frflags.html
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub fn frcsr() -> u32 {
     let value: u32;
     unsafe { asm!("frcsr {}", out(reg) value, options(nomem, nostack)) };
@@ -513,6 +555,7 @@ pub fn frcsr() -> u32 {
 /// This function swaps the value in `fcsr` by copying the original value to be returned,
 /// and then writing a new value obtained from input variable `value` into `fcsr`.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub fn fscsr(value: u32) -> u32 {
     let original: u32;
     unsafe { asm!("fscsr {}, {}", out(reg) original, in(reg) value, options(nomem, nostack)) }
@@ -535,6 +578,7 @@ pub fn fscsr(value: u32) -> u32 {
 /// | 110 |     | _Reserved for future use._ |
 /// | 111 | DYN | In Rounding Mode register, _reserved_. |
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub fn frrm() -> u32 {
     let value: u32;
     unsafe { asm!("frrm {}", out(reg) value, options(nomem, nostack)) };
@@ -547,6 +591,7 @@ pub fn frrm() -> u32 {
 /// and then writing a new value obtained from the three least-significant bits of
 /// input variable `value` into `frm`.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub fn fsrm(value: u32) -> u32 {
     let original: u32;
     unsafe { asm!("fsrm {}, {}", out(reg) original, in(reg) value, options(nomem, nostack)) }
@@ -570,6 +615,7 @@ pub fn fsrm(value: u32) -> u32 {
 /// | 1 | UF | Underflow |
 /// | 0 | NX | Inexact |
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub fn frflags() -> u32 {
     let value: u32;
     unsafe { asm!("frflags {}", out(reg) value, options(nomem, nostack)) };

From 6d97e92b50b0dd76cf52be4543c181f4ebb40716 Mon Sep 17 00:00:00 2001
From: Gijs Burghoorn <g.burghoorn@gmail.com>
Date: Sun, 6 Aug 2023 15:41:34 +0200
Subject: [PATCH 02/12] Implement RISC-V Zk extension intrinsics

---
 crates/core_arch/src/mod.rs              |   5 +
 crates/core_arch/src/riscv32/mod.rs      |   5 +
 crates/core_arch/src/riscv32/zk.rs       | 458 +++++++++++++++++
 crates/core_arch/src/riscv64/mod.rs      |   4 +
 crates/core_arch/src/riscv64/zk.rs       | 388 +++++++++++++++
 crates/core_arch/src/riscv_shared/mod.rs | 175 +------
 crates/core_arch/src/riscv_shared/zk.rs  | 594 +++++++++++++++++++++++
 7 files changed, 1458 insertions(+), 171 deletions(-)
 create mode 100644 crates/core_arch/src/riscv32/mod.rs
 create mode 100644 crates/core_arch/src/riscv32/zk.rs
 create mode 100644 crates/core_arch/src/riscv64/zk.rs
 create mode 100644 crates/core_arch/src/riscv_shared/zk.rs

diff --git a/crates/core_arch/src/mod.rs b/crates/core_arch/src/mod.rs
index f2cf11e479..ad3ec863d4 100644
--- a/crates/core_arch/src/mod.rs
+++ b/crates/core_arch/src/mod.rs
@@ -66,6 +66,7 @@ pub mod arch {
     #[doc(cfg(any(target_arch = "riscv32")))]
     #[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
     pub mod riscv32 {
+        pub use crate::core_arch::riscv32::*;
         pub use crate::core_arch::riscv_shared::*;
     }
 
@@ -279,6 +280,10 @@ mod aarch64;
 #[doc(cfg(any(target_arch = "arm")))]
 mod arm;
 
+#[cfg(any(target_arch = "riscv32", doc))]
+#[doc(cfg(any(target_arch = "riscv32")))]
+mod riscv32;
+
 #[cfg(any(target_arch = "riscv64", doc))]
 #[doc(cfg(any(target_arch = "riscv64")))]
 mod riscv64;
diff --git a/crates/core_arch/src/riscv32/mod.rs b/crates/core_arch/src/riscv32/mod.rs
new file mode 100644
index 0000000000..394d695ae0
--- /dev/null
+++ b/crates/core_arch/src/riscv32/mod.rs
@@ -0,0 +1,5 @@
+//! RISC-V RV32 specific intrinsics
+
+mod zk;
+
+pub use zk::*;
\ No newline at end of file
diff --git a/crates/core_arch/src/riscv32/zk.rs b/crates/core_arch/src/riscv32/zk.rs
new file mode 100644
index 0000000000..56115a986f
--- /dev/null
+++ b/crates/core_arch/src/riscv32/zk.rs
@@ -0,0 +1,458 @@
+#[allow(unused)]
+use core::arch::asm;
+
+#[allow(unused)]
+macro_rules! constify_imm2 {
+    ($imm2:expr, $expand:ident) => {
+        #[allow(overflowing_literals)]
+        match $imm2 & 0b11 {
+            0b00 => $expand!(0),
+            0b01 => $expand!(1),
+            0b10 => $expand!(2),
+            _ => $expand!(3),
+        }
+    };
+}
+
+/// AES final round encryption instruction for RV32.
+///
+/// This instruction sources a single byte from rs2 according to bs. To this it applies the
+/// forward AES SBox operation, before XOR’ing the result with rs1. This instruction must
+/// always be implemented such that its execution latency does not depend on the data being
+/// operated on.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.3
+///
+/// # Note
+///
+/// The `bs` parameter is expected to be a constant value and only the bottom 2 bits of `bs` are
+/// used.
+///
+/// # Safety
+///
+/// This function is safe to use if the `zkne` target feature is present.
+#[target_feature(enable = "zkne")]
+#[cfg_attr(test, assert_instr(aes32esi))]
+#[inline]
+pub unsafe fn aes32esi(rs1: u32, rs2: u32, bs: u8) -> u32 {
+    macro_rules! aes32esi {
+            ($imm2:expr) => {{
+                let value: u32;
+                unsafe {
+                    asm!(
+                        concat!("aes32esi {rd},{rs1},{rs2},", $imm2),
+                        rd = lateout(reg) value,
+                        rs1 = in(reg) rs1,
+                        rs2 = in(reg) rs2,
+                        options(pure, nomem, nostack),
+                    );
+                }
+                value
+            }}
+        }
+    constify_imm2!(bs, aes32esi)
+}
+
+/// AES middle round encryption instruction for RV32 with.
+///
+/// This instruction sources a single byte from rs2 according to bs. To this it applies the
+/// forward AES SBox operation, and a partial forward MixColumn, before XOR’ing the result with
+/// rs1. This instruction must always be implemented such that its execution latency does not
+/// depend on the data being operated on.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.4
+///
+/// # Note
+///
+/// The `bs` parameter is expected to be a constant value and only the bottom 2 bits of `bs` are
+/// used.
+///
+/// # Safety
+///
+/// This function is safe to use if the `zkne` target feature is present.
+#[target_feature(enable = "zkne")]
+#[cfg_attr(test, assert_instr(aes32esmi))]
+#[inline]
+pub unsafe fn aes32esmi(rs1: u32, rs2: u32, bs: u8) -> u32 {
+    macro_rules! aes32esmi {
+            ($imm2:expr) => {{
+                let value: u32;
+                unsafe {
+                    asm!(
+                        concat!("aes32esmi {rd},{rs1},{rs2},", $imm2),
+                        rd = lateout(reg) value,
+                        rs1 = in(reg) rs1,
+                        rs2 = in(reg) rs2,
+                        options(pure, nomem, nostack),
+                    );
+                }
+                value
+            }}
+        }
+    constify_imm2!(bs, aes32esmi)
+}
+
+/// AES final round decryption instruction for RV32.
+///
+/// This instruction sources a single byte from rs2 according to bs. To this it applies the
+/// inverse AES SBox operation, and XOR’s the result with rs1. This instruction must always be
+/// implemented such that its execution latency does not depend on the data being operated on.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.1
+///
+/// # Note
+///
+/// The `bs` parameter is expected to be a constant value and only the bottom 2 bits of `bs` are
+/// used.
+///
+/// # Safety
+///
+/// This function is safe to use if the `zknd` target feature is present.
+#[target_feature(enable = "zknd")]
+#[cfg_attr(test, assert_instr(aes32dsi))]
+#[inline]
+pub unsafe fn aes32dsi(rs1: u32, rs2: u32, bs: u8) -> u32 {
+    macro_rules! aes32dsi {
+            ($imm2:expr) => {{
+                let value: u32;
+                unsafe {
+                    asm!(
+                        concat!("aes32dsi {rd},{rs1},{rs2},", $imm2),
+                        rd = lateout(reg) value,
+                        rs1 = in(reg) rs1,
+                        rs2 = in(reg) rs2,
+                        options(pure, nomem, nostack),
+                    );
+                }
+                value
+            }}
+        }
+    constify_imm2!(bs, aes32dsi)
+}
+
+#[target_feature(enable = "zknd")]
+#[cfg_attr(test, assert_instr(aes32dsmi))]
+#[inline]
+/// AES middle round decryption instruction for RV32.
+///
+/// This instruction sources a single byte from rs2 according to bs. To this it applies the
+/// inverse AES SBox operation, and a partial inverse MixColumn, before XOR’ing the result with
+/// rs1. This instruction must always be implemented such that its execution latency does not
+/// depend on the data being operated on.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.2
+///
+/// # Note
+///
+/// The `bs` parameter is expected to be a constant value and only the bottom 2 bits of `bs` are
+/// used.
+///
+/// # Safety
+///
+/// This function is safe to use if the `zknd` target feature is present.
+pub unsafe fn aes32dsmi(rs1: u32, rs2: u32, bs: u8) -> u32 {
+    macro_rules! aes32dsmi {
+            ($imm2:expr) => {{
+                let value: u32;
+                unsafe {
+                    asm!(
+                        concat!("aes32dsmi {rd},{rs1},{rs2},", $imm2),
+                        rd = lateout(reg) value,
+                        rs1 = in(reg) rs1,
+                        rs2 = in(reg) rs2,
+                        options(pure, nomem, nostack),
+                    );
+                }
+                value
+            }}
+        }
+    constify_imm2!(bs, aes32dsmi)
+}
+
+/// Place upper/lower halves of the source register into odd/even bits of the destination
+/// respectivley.
+///
+/// This instruction places bits in the low half of the source register into the even bit
+/// positions of the destination, and bits in the high half of the source register into the odd
+/// bit positions of the destination. It is the inverse of the unzip instruction. This
+/// instruction is available only on RV32.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.49
+///
+/// # Safety
+///
+/// This function is safe to use if the `zbkb` target feature is present.
+#[target_feature(enable = "zbkb")]
+#[cfg_attr(test, assert_instr(zip))]
+#[inline]
+pub unsafe fn zip(rs: usize) -> usize {
+    let value: usize;
+    unsafe {
+        asm!(
+            "zip {rd},{rs}",
+            rd = lateout(reg) value,
+            rs = in(reg) rs,
+            options(pure, nomem, nostack),
+        )
+    }
+    value
+}
+
+/// Place odd and even bits of the source word into upper/lower halves of the destination.
+///
+/// This instruction places the even bits of the source register into the low half of the
+/// destination, and the odd bits of the source into the high bits of the destination. It is
+/// the inverse of the zip instruction. This instruction is available only on RV32.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.45
+///
+/// # Safety
+///
+/// This function is safe to use if the `zbkb` target feature is present.
+#[target_feature(enable = "zbkb")]
+#[cfg_attr(test, assert_instr(unzip))]
+#[inline]
+pub unsafe fn unzip(rs: usize) -> usize {
+    let value: usize;
+    unsafe {
+        asm!(
+            "unzip {rd},{rs}",
+            rd = lateout(reg) value,
+            rs = in(reg) rs,
+            options(pure, nomem, nostack),
+        )
+    }
+    value
+}
+
+/// Implements the high half of the Sigma0 transformation, as used in the SHA2-512 hash
+/// function \[49\] (Section 4.1.3).
+///
+/// This instruction is implemented on RV32 only. Used to compute the Sigma0 transform of the
+/// SHA2-512 hash function in conjunction with the sha512sig0l instruction. The transform is a
+/// 64-bit to 64-bit function, so the input and output are each represented by two 32-bit
+/// registers. This instruction must always be implemented such that its execution latency does
+/// not depend on the data being operated on.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.31
+///
+/// # Safety
+///
+/// This function is safe to use if the `zknh` target feature is present.
+#[target_feature(enable = "zknh")]
+#[cfg_attr(test, assert_instr(sha512sig0h))]
+#[inline]
+pub unsafe fn sha512sig0h(rs1: u32, rs2: u32) -> u32 {
+    let value: u32;
+    unsafe {
+        asm!(
+            "sha512sig0h {rd},{rs1},{rs2}",
+            rd = lateout(reg) value,
+            rs1 = in(reg) rs1,
+            rs2 = in(reg) rs2,
+            options(pure, nomem, nostack),
+        )
+    }
+    value
+}
+
+/// Implements the low half of the Sigma0 transformation, as used in the SHA2-512 hash function
+/// \[49\] (Section 4.1.3).
+///
+/// This instruction is implemented on RV32 only. Used to compute the Sigma0 transform of the
+/// SHA2-512 hash function in conjunction with the sha512sig0h instruction. The transform is a
+/// 64-bit to 64-bit function, so the input and output are each represented by two 32-bit
+/// registers. This instruction must always be implemented such that its execution latency does
+/// not depend on the data being operated on.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.32
+///
+/// # Safety
+///
+/// This function is safe to use if the `zknh` target feature is present.
+#[target_feature(enable = "zknh")]
+#[cfg_attr(test, assert_instr(sha512sig0l))]
+#[inline]
+pub unsafe fn sha512sig0l(rs1: u32, rs2: u32) -> u32 {
+    let value: u32;
+    unsafe {
+        asm!(
+            "sha512sig0l {rd},{rs1},{rs2}",
+            rd = lateout(reg) value,
+            rs1 = in(reg) rs1,
+            rs2 = in(reg) rs2,
+            options(pure, nomem, nostack),
+        )
+    }
+    value
+}
+
+/// Implements the high half of the Sigma1 transformation, as used in the SHA2-512 hash
+/// function \[49\] (Section 4.1.3).
+///
+/// This instruction is implemented on RV32 only. Used to compute the Sigma1 transform of the
+/// SHA2-512 hash function in conjunction with the sha512sig1l instruction. The transform is a
+/// 64-bit to 64-bit function, so the input and output are each represented by two 32-bit
+/// registers. This instruction must always be implemented such that its execution latency does
+/// not depend on the data being operated on.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.33
+///
+/// # Safety
+///
+/// This function is safe to use if the `zknh` target feature is present.
+#[target_feature(enable = "zknh")]
+#[cfg_attr(test, assert_instr(sha512sig1h))]
+#[inline]
+pub unsafe fn sha512sig1h(rs1: u32, rs2: u32) -> u32 {
+    let value: u32;
+    unsafe {
+        asm!(
+            "sha512sig1h {rd},{rs1},{rs2}",
+            rd = lateout(reg) value,
+            rs1 = in(reg) rs1,
+            rs2 = in(reg) rs2,
+            options(pure, nomem, nostack),
+        )
+    }
+    value
+}
+
+/// Implements the low half of the Sigma1 transformation, as used in the SHA2-512 hash function
+/// \[49\] (Section 4.1.3).
+///
+/// This instruction is implemented on RV32 only. Used to compute the Sigma1 transform of the
+/// SHA2-512 hash function in conjunction with the sha512sig1h instruction. The transform is a
+/// 64-bit to 64-bit function, so the input and output are each represented by two 32-bit
+/// registers. This instruction must always be implemented such that its execution latency does
+/// not depend on the data being operated on.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.34
+///
+/// # Safety
+///
+/// This function is safe to use if the `zknh` target feature is present.
+#[target_feature(enable = "zknh")]
+#[cfg_attr(test, assert_instr(sha512sig1l))]
+#[inline]
+pub unsafe fn sha512sig1l(rs1: u32, rs2: u32) -> u32 {
+    let value: u32;
+    unsafe {
+        asm!(
+            "sha512sig1l {rd},{rs1},{rs2}",
+            rd = lateout(reg) value,
+            rs1 = in(reg) rs1,
+            rs2 = in(reg) rs2,
+            options(pure, nomem, nostack),
+        )
+    }
+    value
+}
+
+/// Implements the Sum0 transformation, as used in the SHA2-512 hash function \[49\] (Section
+/// 4.1.3).
+///
+/// This instruction is implemented on RV32 only. Used to compute the Sum0 transform of the
+/// SHA2-512 hash function. The transform is a 64-bit to 64-bit function, so the input and
+/// output is represented by two 32-bit registers. This instruction must always be implemented
+/// such that its execution latency does not depend on the data being operated on.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.35
+///
+/// # Safety
+///
+/// This function is safe to use if the `zknh` target feature is present.
+#[target_feature(enable = "zknh")]
+#[cfg_attr(test, assert_instr(sha512sum0r))]
+#[inline]
+pub unsafe fn sha512sum0r(rs1: u32, rs2: u32) -> u32 {
+    let value: u32;
+    unsafe {
+        asm!(
+            "sha512sum0r {rd},{rs1},{rs2}",
+            rd = lateout(reg) value,
+            rs1 = in(reg) rs1,
+            rs2 = in(reg) rs2,
+            options(pure, nomem, nostack),
+        )
+    }
+    value
+}
+
+/// Implements the Sum1 transformation, as used in the SHA2-512 hash function \[49\] (Section
+/// 4.1.3).
+///
+/// This instruction is implemented on RV32 only. Used to compute the Sum1 transform of the
+/// SHA2-512 hash function. The transform is a 64-bit to 64-bit function, so the input and
+/// output is represented by two 32-bit registers. This instruction must always be implemented
+/// such that its execution latency does not depend on the data being operated on.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.36
+///
+/// # Safety
+///
+/// This function is safe to use if the `zknh` target feature is present.
+#[target_feature(enable = "zknh")]
+#[cfg_attr(test, assert_instr(sha512sum1r))]
+#[inline]
+pub unsafe fn sha512sum1r(rs1: u32, rs2: u32) -> u32 {
+    let value: u32;
+    unsafe {
+        asm!(
+            "sha512sum1r {rd},{rs1},{rs2}",
+            rd = lateout(reg) value,
+            rs1 = in(reg) rs1,
+            rs2 = in(reg) rs2,
+            options(pure, nomem, nostack),
+        )
+    }
+    value
+}
\ No newline at end of file
diff --git a/crates/core_arch/src/riscv64/mod.rs b/crates/core_arch/src/riscv64/mod.rs
index 751b9a860f..ad16d6c231 100644
--- a/crates/core_arch/src/riscv64/mod.rs
+++ b/crates/core_arch/src/riscv64/mod.rs
@@ -1,6 +1,10 @@
 //! RISC-V RV64 specific intrinsics
 use crate::arch::asm;
 
+mod zk;
+
+pub use zk::*;
+
 /// Loads virtual machine memory by unsigned word integer
 ///
 /// This instruction performs an explicit memory access as though `V=1`;
diff --git a/crates/core_arch/src/riscv64/zk.rs b/crates/core_arch/src/riscv64/zk.rs
new file mode 100644
index 0000000000..de45ad62c7
--- /dev/null
+++ b/crates/core_arch/src/riscv64/zk.rs
@@ -0,0 +1,388 @@
+#[allow(unused)]
+use core::arch::asm;
+
+#[allow(unused)]
+macro_rules! constify_imm_0_until_10 {
+    ($imm2:expr, $expand:ident) => {
+        match $imm2 {
+            1 => $expand!(1),
+            2 => $expand!(2),
+            3 => $expand!(3),
+            4 => $expand!(4),
+            5 => $expand!(5),
+            6 => $expand!(6),
+            7 => $expand!(7),
+            8 => $expand!(8),
+            9 => $expand!(9),
+            10 => $expand!(10),
+            _ => $expand!(0),
+        }
+    };
+}
+
+/// AES final round encryption instruction for RV64.
+///
+/// Uses the two 64-bit source registers to represent the entire AES state, and produces half
+/// of the next round output, applying the ShiftRows and SubBytes steps. This instruction must
+/// always be implemented such that its execution latency does not depend on the data being
+/// operated on.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.7
+///
+/// # Safety
+///
+/// This function is safe to use if the `zkne` target feature is present.
+#[target_feature(enable = "zkne")]
+#[cfg_attr(test, assert_instr(aes64es))]
+#[inline]
+pub unsafe fn aes64es(rs1: u64, rs2: u64) -> u64 {
+    let value: u64;
+    unsafe {
+        asm!(
+            "aes64es {rd},{rs1},{rs2}",
+            rd = lateout(reg) value,
+            rs1 = in(reg) rs1,
+            rs2 = in(reg) rs2,
+            options(pure, nomem, nostack),
+        )
+    }
+    value
+}
+
+/// AES middle round encryption instruction for RV64.
+///
+/// Uses the two 64-bit source registers to represent the entire AES state, and produces half
+/// of the next round output, applying the ShiftRows, SubBytes and MixColumns steps. This
+/// instruction must always be implemented such that its execution latency does not depend on
+/// the data being operated on.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.8
+///
+/// # Safety
+///
+/// This function is safe to use if the `zkne` target feature is present.
+#[target_feature(enable = "zkne")]
+#[cfg_attr(test, assert_instr(aes64esm))]
+#[inline]
+pub unsafe fn aes64esm(rs1: u64, rs2: u64) -> u64 {
+    let value: u64;
+    unsafe {
+        asm!(
+            "aes64esm {rd},{rs1},{rs2}",
+            rd = lateout(reg) value,
+            rs1 = in(reg) rs1,
+            rs2 = in(reg) rs2,
+            options(pure, nomem, nostack),
+        )
+    }
+    value
+}
+
+/// AES final round decryption instruction for RV64.
+///
+/// Uses the two 64-bit source registers to represent the entire AES state, and produces half
+/// of the next round output, applying the Inverse ShiftRows and SubBytes steps. This
+/// instruction must always be implemented such that its execution latency does not depend on
+/// the data being operated on.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.5
+///
+/// # Safety
+///
+/// This function is safe to use if the `zknd` target feature is present.
+#[target_feature(enable = "zknd")]
+#[cfg_attr(test, assert_instr(aes64ds))]
+#[inline]
+pub unsafe fn aes64ds(rs1: u64, rs2: u64) -> u64 {
+    let value: u64;
+    unsafe {
+        asm!(
+            "aes64ds {rd},{rs1},{rs2}",
+            rd = lateout(reg) value,
+            rs1 = in(reg) rs1,
+            rs2 = in(reg) rs2,
+            options(pure, nomem, nostack),
+        )
+    }
+    value
+}
+
+/// AES middle round decryption instruction for RV64.
+///
+/// Uses the two 64-bit source registers to represent the entire AES state, and produces half
+/// of the next round output, applying the Inverse ShiftRows, SubBytes and MixColumns steps.
+/// This instruction must always be implemented such that its execution latency does not depend
+/// on the data being operated on.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.6
+///
+/// # Safety
+///
+/// This function is safe to use if the `zknd` target feature is present.
+#[target_feature(enable = "zknd")]
+#[cfg_attr(test, assert_instr(aes64dsm))]
+#[inline]
+pub unsafe fn aes64dsm(rs1: u64, rs2: u64) -> u64 {
+    let value: u64;
+    unsafe {
+        asm!(
+            "aes64esm {rd},{rs1},{rs2}",
+            rd = lateout(reg) value,
+            rs1 = in(reg) rs1,
+            rs2 = in(reg) rs2,
+            options(pure, nomem, nostack),
+        )
+    }
+    value
+}
+
+/// This instruction implements part of the KeySchedule operation for the AES Block cipher
+/// involving the SBox operation.
+///
+/// This instruction implements the rotation, SubBytes and Round Constant addition steps of the
+/// AES block cipher Key Schedule. This instruction must always be implemented such that its
+/// execution latency does not depend on the data being operated on. Note that rnum must be in
+/// the range 0x0..0xA. The values 0xB..0xF are reserved.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.10
+///
+/// # Note
+///
+/// The `rnum` parameter is expected to be a constant value inside the range of `0..=10`, if a
+/// value outside the valid range is given it uses `rnum=0`.
+///
+/// # Safety
+///
+/// This function is safe to use if the `zkne` or `zknd` target feature is present.
+#[target_feature(enable = "zkne", enable = "zknd")]
+#[cfg_attr(test, assert_instr(aes64ks1i))]
+#[inline]
+pub unsafe fn aes64ks1i(rs1: u64, rnum: u8) -> u64 {
+    macro_rules! aes64ks1i {
+            ($imm_0_until_10:expr) => {{
+                let value: u64;
+                unsafe {
+                    asm!(
+                        concat!("aes64ks1i {rd},{rs1},", $imm_0_until_10),
+                        rd = lateout(reg) value,
+                        rs1 = in(reg) rs1,
+                        options(pure, nomem, nostack),
+                    )
+                }
+                value
+            }}
+        }
+    constify_imm_0_until_10!(rnum, aes64ks1i)
+}
+
+/// This instruction implements part of the KeySchedule operation for the AES Block cipher.
+///
+/// This instruction implements the additional XOR’ing of key words as part of the AES block
+/// cipher Key Schedule. This instruction must always be implemented such that its execution
+/// latency does not depend on the data being operated on.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.11
+///
+/// # Safety
+///
+/// This function is safe to use if the `zkne` or `zknd` target feature is present.
+#[target_feature(enable = "zkne", enable = "zknd")]
+#[cfg_attr(test, assert_instr(aes64ks2))]
+#[inline]
+pub unsafe fn aes64ks2(rs1: u64, rs2: u64) -> u64 {
+    let value: u64;
+    unsafe {
+        asm!(
+            "aes64ks2 {rd},{rs1},{rs2}",
+            rd = lateout(reg) value,
+            rs1 = in(reg) rs1,
+            rs2 = in(reg) rs2,
+            options(pure, nomem, nostack),
+        )
+    }
+    value
+}
+
+/// Pack the low 16-bits of rs1 and rs2 into rd on RV64
+///
+/// This instruction packs the low 16 bits of rs1 and rs2 into the 32 least-significant bits of
+/// rd, sign extending the 32-bit result to the rest of rd. This instruction only exists on
+/// RV64 based systems.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.26
+///
+/// # Safety
+///
+/// This function is safe to use if the `zbkb` target feature is present.
+#[target_feature(enable = "zbkb")]
+#[cfg_attr(test, assert_instr(packw))]
+#[inline]
+pub unsafe fn packw(rs1: u64, rs2: u64) -> u64 {
+    let value: u64;
+    unsafe {
+        asm!(
+            "packw {rd},{rs1},{rs2}",
+            rd = lateout(reg) value,
+            rs1 = in(reg) rs1,
+            rs2 = in(reg) rs2,
+            options(pure, nomem, nostack),
+        )
+    }
+    value
+}
+
+/// Implements the Sigma0 transformation function as used in the SHA2-512 hash function \[49\]
+/// (Section 4.1.3).
+///
+/// This instruction is supported for the RV64 base architecture. It implements the Sigma0
+/// transform of the SHA2-512 hash function. \[49\]. This instruction must always be
+/// implemented such that its execution latency does not depend on the data being operated on.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.37
+///
+/// # Safety
+///
+/// This function is safe to use if the `zknh` target feature is present.
+#[target_feature(enable = "zknh")]
+#[cfg_attr(test, assert_instr(sha512sig0))]
+#[inline]
+pub unsafe fn sha512sig0(rs1: u64) -> u64 {
+    let value: u64;
+    unsafe {
+        asm!(
+            "sha512sig0 {rd},{rs1}",
+            rd = lateout(reg) value,
+            rs1 = in(reg) rs1,
+            options(pure, nomem, nostack),
+        )
+    }
+    value
+}
+
+/// Implements the Sigma1 transformation function as used in the SHA2-512 hash function \[49\]
+/// (Section 4.1.3).
+///
+/// This instruction is supported for the RV64 base architecture. It implements the Sigma1
+/// transform of the SHA2-512 hash function. \[49\]. This instruction must always be
+/// implemented such that its execution latency does not depend on the data being operated on.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.38
+///
+/// # Safety
+///
+/// This function is safe to use if the `zknh` target feature is present.
+#[target_feature(enable = "zknh")]
+#[cfg_attr(test, assert_instr(sha512sig1))]
+#[inline]
+pub unsafe fn sha512sig1(rs1: u64) -> u64 {
+    let value: u64;
+    unsafe {
+        asm!(
+            "sha512sig1 {rd},{rs1}",
+            rd = lateout(reg) value,
+            rs1 = in(reg) rs1,
+            options(pure, nomem, nostack),
+        )
+    }
+    value
+}
+
+/// Implements the Sum0 transformation function as used in the SHA2-512 hash function \[49\]
+/// (Section 4.1.3).
+///
+/// This instruction is supported for the RV64 base architecture. It implements the Sum0
+/// transform of the SHA2-512 hash function. \[49\]. This instruction must always be
+/// implemented such that its execution latency does not depend on the data being operated on.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.39
+///
+/// # Safety
+///
+/// This function is safe to use if the `zknh` target feature is present.
+#[target_feature(enable = "zknh")]
+#[cfg_attr(test, assert_instr(sha512sum0))]
+#[inline]
+pub unsafe fn sha512sum0(rs1: u64) -> u64 {
+    let value: u64;
+    unsafe {
+        asm!(
+            "sha512sum0 {rd},{rs1}",
+            rd = lateout(reg) value,
+            rs1 = in(reg) rs1,
+            options(pure, nomem, nostack),
+        )
+    }
+    value
+}
+
+/// Implements the Sum1 transformation function as used in the SHA2-512 hash function \[49\]
+/// (Section 4.1.3).
+///
+/// This instruction is supported for the RV64 base architecture. It implements the Sum1
+/// transform of the SHA2-512 hash function. \[49\]. This instruction must always be
+/// implemented such that its execution latency does not depend on the data being operated on.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.40
+///
+/// # Safety
+///
+/// This function is safe to use if the `zknh` target feature is present.
+#[target_feature(enable = "zknh")]
+#[cfg_attr(test, assert_instr(sha512sum1))]
+#[inline]
+pub unsafe fn sha512sum1(rs1: u64) -> u64 {
+    let value: u64;
+    unsafe {
+        asm!(
+            "sha512sum0 {rd},{rs1}",
+            rd = lateout(reg) value,
+            rs1 = in(reg) rs1,
+            options(pure, nomem, nostack),
+        )
+    }
+    value
+}
diff --git a/crates/core_arch/src/riscv_shared/mod.rs b/crates/core_arch/src/riscv_shared/mod.rs
index b796ee0e77..d14a440044 100644
--- a/crates/core_arch/src/riscv_shared/mod.rs
+++ b/crates/core_arch/src/riscv_shared/mod.rs
@@ -1,8 +1,11 @@
 //! Shared RISC-V intrinsics
+
+mod zk;
 mod p;
 
 #[unstable(feature = "stdsimd", issue = "27731")]
 pub use p::*;
+pub use zk::*;
 
 use crate::arch::asm;
 
@@ -628,179 +631,9 @@ pub fn frflags() -> u32 {
 /// and then writing a new value obtained from the five least-significant bits of
 /// input variable `value` into `fflags`.
 #[inline]
+#[unstable(feature = "stdsimd", issue = "27731")]
 pub fn fsflags(value: u32) -> u32 {
     let original: u32;
     unsafe { asm!("fsflags {}, {}", out(reg) original, in(reg) value, options(nomem, nostack)) }
     original
 }
-
-/// `P0` transformation function as is used in the SM3 hash algorithm
-///
-/// This function is included in `Zksh` extension. It's defined as:
-///
-/// ```text
-/// P0(X) = X ⊕ (X ≪ 9) ⊕ (X ≪ 17)
-/// ```
-///
-/// where `⊕` represents 32-bit xor, and `≪ k` represents rotate left by `k` bits.
-///
-/// In the SM3 algorithm, the `P0` transformation is used as `E ← P0(TT2)` when the
-/// compression function `CF` uses the intermediate value `TT2` to calculate
-/// the variable `E` in one iteration for subsequent processes.
-///
-/// According to RISC-V Cryptography Extensions, Volume I, the execution latency of
-/// this instruction must always be independent from the data it operates on.
-#[inline]
-#[target_feature(enable = "zksh")]
-pub fn sm3p0(x: u32) -> u32 {
-    let ans: u32;
-    unsafe { asm!("sm3p0 {}, {}", lateout(reg) ans, in(reg) x, options(pure, nomem, nostack)) };
-    ans
-}
-
-/// `P1` transformation function as is used in the SM3 hash algorithm
-///
-/// This function is included in `Zksh` extension. It's defined as:
-///
-/// ```text
-/// P1(X) = X ⊕ (X ≪ 15) ⊕ (X ≪ 23)
-/// ```
-///
-/// where `⊕` represents 32-bit xor, and `≪ k` represents rotate left by `k` bits.
-///
-/// In the SM3 algorithm, the `P1` transformation is used to expand message,
-/// where expanded word `Wj` can be generated from the previous words.
-/// The whole process can be described as the following pseudocode:
-///
-/// ```text
-/// FOR j=16 TO 67
-///     Wj ← P1(Wj−16 ⊕ Wj−9 ⊕ (Wj−3 ≪ 15)) ⊕ (Wj−13 ≪ 7) ⊕ Wj−6
-/// ENDFOR
-/// ```
-///
-/// According to RISC-V Cryptography Extensions, Volume I, the execution latency of
-/// this instruction must always be independent from the data it operates on.
-#[inline]
-#[target_feature(enable = "zksh")]
-pub fn sm3p1(x: u32) -> u32 {
-    let ans: u32;
-    unsafe { asm!("sm3p1 {}, {}", lateout(reg) ans, in(reg) x, options(pure, nomem, nostack)) };
-    ans
-}
-
-/// Accelerates the round function `F` in the SM4 block cipher algorithm
-///
-/// This instruction is included in extension `Zksed`. It's defined as:
-///
-/// ```text
-/// SM4ED(x, a, BS) = x ⊕ T(ai)
-/// ... where
-/// ai = a.bytes[BS]
-/// T(ai) = L(τ(ai))
-/// bi = τ(ai) = SM4-S-Box(ai)
-/// ci = L(bi) = bi ⊕ (bi ≪ 2) ⊕ (bi ≪ 10) ⊕ (bi ≪ 18) ⊕ (bi ≪ 24)
-/// SM4ED = (ci ≪ (BS * 8)) ⊕ x
-/// ```
-///
-/// where `⊕` represents 32-bit xor, and `≪ k` represents rotate left by `k` bits.
-/// As is defined above, `T` is a combined transformation of non linear S-Box transform `τ`
-/// and linear layer transform `L`.
-///
-/// In the SM4 algorithm, the round function `F` is defined as:
-///
-/// ```text
-/// F(x0, x1, x2, x3, rk) = x0 ⊕ T(x1 ⊕ x2 ⊕ x3 ⊕ rk)
-/// ... where
-/// T(A) = L(τ(A))
-/// B = τ(A) = (SM4-S-Box(a0), SM4-S-Box(a1), SM4-S-Box(a2), SM4-S-Box(a3))
-/// C = L(B) = B ⊕ (B ≪ 2) ⊕ (B ≪ 10) ⊕ (B ≪ 18) ⊕ (B ≪ 24)
-/// ```
-///
-/// It can be implemented by `sm4ed` instruction like:
-///
-/// ```no_run
-/// # #[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))]
-/// # fn round_function(x0: u32, x1: u32, x2: u32, x3: u32, rk: u32) -> u32 {
-/// # #[cfg(target_arch = "riscv32")] use core::arch::riscv32::sm4ed;
-/// # #[cfg(target_arch = "riscv64")] use core::arch::riscv64::sm4ed;
-/// let a = x1 ^ x2 ^ x3 ^ rk;
-/// let c0 = sm4ed::<0>(x0, a);
-/// let c1 = sm4ed::<1>(c0, a); // c1 represents c[0..=1], etc.
-/// let c2 = sm4ed::<2>(c1, a);
-/// let c3 = sm4ed::<3>(c2, a);
-/// return c3; // c3 represents c[0..=3]
-/// # }
-/// ```
-///
-/// According to RISC-V Cryptography Extensions, Volume I, the execution latency of
-/// this instruction must always be independent from the data it operates on.
-#[inline]
-#[target_feature(enable = "zksed")]
-pub fn sm4ed<const BS: u8>(x: u32, a: u32) -> u32 {
-    static_assert!(BS <= 3);
-    let ans: u32;
-    unsafe {
-        asm!("sm4ed {}, {}, {}, {}", lateout(reg) ans, in(reg) x, in(reg) a, const BS, options(pure, nomem, nostack))
-    };
-    ans
-}
-
-/// Accelerates the key schedule operation in the SM4 block cipher algorithm
-///
-/// This instruction is included in extension `Zksed`. It's defined as:
-///
-/// ```text
-/// SM4KS(x, k, BS) = x ⊕ T'(ki)
-/// ... where
-/// ki = k.bytes[BS]
-/// T'(ki) = L'(τ(ki))
-/// bi = τ(ki) = SM4-S-Box(ki)
-/// ci = L'(bi) = bi ⊕ (bi ≪ 13) ⊕ (bi ≪ 23)
-/// SM4KS = (ci ≪ (BS * 8)) ⊕ x
-/// ```
-///
-/// where `⊕` represents 32-bit xor, and `≪ k` represents rotate left by `k` bits.
-/// As is defined above, `T'` is a combined transformation of non linear S-Box transform `τ`
-/// and the replaced linear layer transform `L'`.
-///
-/// In the SM4 algorithm, the key schedule is defined as:
-///
-/// ```text
-/// rk[i] = K[i+4] = K[i] ⊕ T'(K[i+1] ⊕ K[i+2] ⊕ K[i+3] ⊕ CK[i])
-/// ... where
-/// K[0..=3] = MK[0..=3] ⊕ FK[0..=3]
-/// T'(K) = L'(τ(K))
-/// B = τ(K) = (SM4-S-Box(k0), SM4-S-Box(k1), SM4-S-Box(k2), SM4-S-Box(k3))
-/// C = L'(B) = B ⊕ (B ≪ 13) ⊕ (B ≪ 23)
-/// ```
-///
-/// where `MK` represents the input 128-bit encryption key,
-/// constants `FK` and `CK` are fixed system configuration constant values defined by the SM4 algorithm.
-/// Hence, the key schedule operation can be implemented by `sm4ks` instruction like:
-///
-/// ```no_run
-/// # #[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))]
-/// # fn key_schedule(k0: u32, k1: u32, k2: u32, k3: u32, ck_i: u32) -> u32 {
-/// # #[cfg(target_arch = "riscv32")] use core::arch::riscv32::sm4ks;
-/// # #[cfg(target_arch = "riscv64")] use core::arch::riscv64::sm4ks;
-/// let k = k1 ^ k2 ^ k3 ^ ck_i;
-/// let c0 = sm4ks::<0>(k0, k);
-/// let c1 = sm4ks::<1>(c0, k); // c1 represents c[0..=1], etc.
-/// let c2 = sm4ks::<2>(c1, k);
-/// let c3 = sm4ks::<3>(c2, k);
-/// return c3; // c3 represents c[0..=3]
-/// # }
-/// ```
-///
-/// According to RISC-V Cryptography Extensions, Volume I, the execution latency of
-/// this instruction must always be independent from the data it operates on.
-#[inline]
-#[target_feature(enable = "zksed")]
-pub fn sm4ks<const BS: u8>(x: u32, k: u32) -> u32 {
-    static_assert!(BS <= 3);
-    let ans: u32;
-    unsafe {
-        asm!("sm4ks {}, {}, {}, {}", lateout(reg) ans, in(reg) x, in(reg) k, const BS, options(pure, nomem, nostack))
-    };
-    ans
-}
diff --git a/crates/core_arch/src/riscv_shared/zk.rs b/crates/core_arch/src/riscv_shared/zk.rs
new file mode 100644
index 0000000000..0877e052a7
--- /dev/null
+++ b/crates/core_arch/src/riscv_shared/zk.rs
@@ -0,0 +1,594 @@
+#[allow(unused)]
+use core::arch::asm;
+
+#[allow(unused)]
+macro_rules! constify_imm2 {
+    ($imm2:expr, $expand:ident) => {
+        #[allow(overflowing_literals)]
+        match $imm2 & 0b11 {
+            0b00 => $expand!(0),
+            0b01 => $expand!(1),
+            0b10 => $expand!(2),
+            _ => $expand!(3),
+        }
+    };
+}
+
+/// Pack the low halves of rs1 and rs2 into rd.
+///
+/// The pack instruction packs the XLEN/2-bit lower halves of rs1 and rs2 into rd, with rs1 in
+/// the lower half and rs2 in the upper half.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.17
+///
+/// # Safety
+///
+/// This function is safe to use if the `zbkb` target feature is present.
+#[target_feature(enable = "zbkb")]
+#[cfg_attr(test, assert_instr(pack))]
+#[inline]
+pub unsafe fn pack(rs1: usize, rs2: usize) -> usize {
+    let value: usize;
+    unsafe {
+        asm!(
+            "pack {rd},{rs1},{rs2}",
+            rd = lateout(reg) value,
+            rs1 = in(reg) rs1,
+            rs2 = in(reg) rs2,
+            options(pure, nomem, nostack),
+        )
+    }
+    value
+}
+
+/// Pack the low bytes of rs1 and rs2 into rd.
+///
+/// And the packh instruction packs the least-significant bytes of rs1 and rs2 into the 16
+/// least-significant bits of rd, zero extending the rest of rd.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.18
+///
+/// # Safety
+///
+/// This function is safe to use if the `zbkb` target feature is present.
+#[target_feature(enable = "zbkb")]
+#[cfg_attr(test, assert_instr(packh))]
+#[inline]
+pub unsafe fn packh(rs1: usize, rs2: usize) -> usize {
+    let value: usize;
+    unsafe {
+        asm!(
+            "packh {rd},{rs1},{rs2}",
+            rd = lateout(reg) value,
+            rs1 = in(reg) rs1,
+            rs2 = in(reg) rs2,
+            options(pure, nomem, nostack),
+        )
+    }
+    value
+}
+
+/// Reverse the bits in each byte of a source register.
+///
+/// This instruction reverses the order of the bits in every byte of a register.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.13
+///
+/// # Safety
+///
+/// This function is safe to use if the `zbkb` target feature is present.
+#[target_feature(enable = "zbkb")]
+#[cfg_attr(test, assert_instr(brev8))]
+#[inline]
+pub unsafe fn brev8(rs: usize) -> usize {
+    let value: usize;
+    unsafe {
+        asm!(
+            "brev8 {rd},{rs}",
+            rd = lateout(reg) value,
+            rs = in(reg) rs,
+            options(pure, nomem, nostack),
+        )
+    }
+    value
+}
+
+/// Byte-wise lookup of indicies into a vector in registers.
+///
+/// The xperm8 instruction operates on bytes. The rs1 register contains a vector of XLEN/8
+/// 8-bit elements. The rs2 register contains a vector of XLEN/8 8-bit indexes. The result is
+/// each element in rs2 replaced by the indexed element in rs1, or zero if the index into rs2
+/// is out of bounds.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.47
+///
+/// # Safety
+///
+/// This function is safe to use if the `zbkx` target feature is present.
+#[target_feature(enable = "zbkx")]
+#[cfg_attr(test, assert_instr(xperm8))]
+#[inline]
+pub unsafe fn xperm8(rs1: usize, rs2: usize) -> usize {
+    let value: usize;
+    unsafe {
+        asm!(
+            "xperm8 {rd},{rs1},{rs2}",
+            rd = lateout(reg) value,
+            rs1 = in(reg) rs1,
+            rs2 = in(reg) rs2,
+            options(pure, nomem, nostack),
+        )
+    }
+    value
+}
+
+/// Nibble-wise lookup of indicies into a vector.
+///
+/// The xperm4 instruction operates on nibbles. The rs1 register contains a vector of XLEN/4
+/// 4-bit elements. The rs2 register contains a vector of XLEN/4 4-bit indexes. The result is
+/// each element in rs2 replaced by the indexed element in rs1, or zero if the index into rs2
+/// is out of bounds.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.48
+///
+/// # Safety
+///
+/// This function is safe to use if the `zbkx` target feature is present.
+#[target_feature(enable = "zbkx")]
+#[cfg_attr(test, assert_instr(xperm4))]
+#[inline]
+pub unsafe fn xperm4(rs1: usize, rs2: usize) -> usize {
+    let value: usize;
+    unsafe {
+        asm!(
+            "xperm4 {rd},{rs1},{rs2}",
+            rd = lateout(reg) value,
+            rs1 = in(reg) rs1,
+            rs2 = in(reg) rs2,
+            options(pure, nomem, nostack),
+        )
+    }
+    value
+}
+
+/// Implements the Sigma0 transformation function as used in the SHA2-256 hash function \[49\]
+/// (Section 4.1.2).
+///
+/// This instruction is supported for both RV32 and RV64 base architectures. For RV32, the
+/// entire XLEN source register is operated on. For RV64, the low 32 bits of the source
+/// register are operated on, and the result sign extended to XLEN bits. Though named for
+/// SHA2-256, the instruction works for both the SHA2-224 and SHA2-256 parameterisations as
+/// described in \[49\]. This instruction must always be implemented such that its execution
+/// latency does not depend on the data being operated on.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.27
+///
+/// # Safety
+///
+/// This function is safe to use if the `zknh` target feature is present.
+#[target_feature(enable = "zknh")]
+#[cfg_attr(test, assert_instr(sha256sig0))]
+#[inline]
+pub unsafe fn sha256sig0(rs1: usize) -> usize {
+    let value: usize;
+    unsafe {
+        asm!(
+            "sha256sig0 {rd},{rs1}",
+            rd = lateout(reg) value,
+            rs1 = in(reg) rs1,
+            options(pure, nomem, nostack),
+        )
+    }
+    value
+}
+
+/// Implements the Sigma1 transformation function as used in the SHA2-256 hash function \[49\]
+/// (Section 4.1.2).
+///
+/// This instruction is supported for both RV32 and RV64 base architectures. For RV32, the
+/// entire XLEN source register is operated on. For RV64, the low 32 bits of the source
+/// register are operated on, and the result sign extended to XLEN bits. Though named for
+/// SHA2-256, the instruction works for both the SHA2-224 and SHA2-256 parameterisations as
+/// described in \[49\]. This instruction must always be implemented such that its execution
+/// latency does not depend on the data being operated on.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.28
+///
+/// # Safety
+///
+/// This function is safe to use if the `zknh` target feature is present.
+#[target_feature(enable = "zknh")]
+#[cfg_attr(test, assert_instr(sha256sig1))]
+#[inline]
+pub unsafe fn sha256sig1(rs1: usize) -> usize {
+    let value: usize;
+    unsafe {
+        asm!(
+            "sha256sig1 {rd},{rs1}",
+            rd = lateout(reg) value,
+            rs1 = in(reg) rs1,
+            options(pure, nomem, nostack),
+        )
+    }
+    value
+}
+
+/// Implements the Sum0 transformation function as used in the SHA2-256 hash function \[49\]
+/// (Section 4.1.2).
+///
+/// This instruction is supported for both RV32 and RV64 base architectures. For RV32, the
+/// entire XLEN source register is operated on. For RV64, the low 32 bits of the source
+/// register are operated on, and the result sign extended to XLEN bits. Though named for
+/// SHA2-256, the instruction works for both the SHA2-224 and SHA2-256 parameterisations as
+/// described in \[49\]. This instruction must always be implemented such that its execution
+/// latency does not depend on the data being operated on.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.29
+///
+/// # Safety
+///
+/// This function is safe to use if the `zknh` target feature is present.
+#[target_feature(enable = "zknh")]
+#[cfg_attr(test, assert_instr(sha256sum0))]
+#[inline]
+pub unsafe fn sha256sum0(rs1: usize) -> usize {
+    let value: usize;
+    unsafe {
+        asm!(
+            "sha256sig1 {rd},{rs1}",
+            rd = lateout(reg) value,
+            rs1 = in(reg) rs1,
+            options(pure, nomem, nostack),
+        )
+    }
+    value
+}
+
+/// Implements the Sum1 transformation function as used in the SHA2-256 hash function \[49\]
+/// (Section 4.1.2).
+///
+/// This instruction is supported for both RV32 and RV64 base architectures. For RV32, the
+/// entire XLEN source register is operated on. For RV64, the low 32 bits of the source
+/// register are operated on, and the result sign extended to XLEN bits. Though named for
+/// SHA2-256, the instruction works for both the SHA2-224 and SHA2-256 parameterisations as
+/// described in \[49\]. This instruction must always be implemented such that its execution
+/// latency does not depend on the data being operated on.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.30
+///
+/// # Safety
+///
+/// This function is safe to use if the `zknh` target feature is present.
+#[target_feature(enable = "zknh")]
+#[cfg_attr(test, assert_instr(sha256sum1))]
+#[inline]
+pub unsafe fn sha256sum1(rs1: usize) -> usize {
+    let value: usize;
+    unsafe {
+        asm!(
+            "sha256sig1 {rd},{rs1}",
+            rd = lateout(reg) value,
+            rs1 = in(reg) rs1,
+            options(pure, nomem, nostack),
+        )
+    }
+    value
+}
+
+/// Accelerates the block encrypt/decrypt operation of the SM4 block cipher \[5, 31\].
+///
+/// Implements a T-tables in hardware style approach to accelerating the SM4 round function. A
+/// byte is extracted from rs2 based on bs, to which the SBox and linear layer transforms are
+/// applied, before the result is XOR’d with rs1 and written back to rd. This instruction
+/// exists on RV32 and RV64 base architectures. On RV64, the 32-bit result is sign extended to
+/// XLEN bits. This instruction must always be implemented such that its execution latency does
+/// not depend on the data being operated on.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.43
+///
+/// # Note
+///
+/// The `bs` parameter is expected to be a constant value and only the bottom 2 bits of `bs` are
+/// used.
+///
+/// # Safety
+///
+/// This function is safe to use if the `zksed` target feature is present.
+///
+/// # Details
+///
+/// Accelerates the round function `F` in the SM4 block cipher algorithm
+///
+/// This instruction is included in extension `Zksed`. It's defined as:
+///
+/// ```text
+/// SM4ED(x, a, BS) = x ⊕ T(ai)
+/// ... where
+/// ai = a.bytes[BS]
+/// T(ai) = L(τ(ai))
+/// bi = τ(ai) = SM4-S-Box(ai)
+/// ci = L(bi) = bi ⊕ (bi ≪ 2) ⊕ (bi ≪ 10) ⊕ (bi ≪ 18) ⊕ (bi ≪ 24)
+/// SM4ED = (ci ≪ (BS * 8)) ⊕ x
+/// ```
+///
+/// where `⊕` represents 32-bit xor, and `≪ k` represents rotate left by `k` bits.
+/// As is defined above, `T` is a combined transformation of non linear S-Box transform `τ`
+/// and linear layer transform `L`.
+///
+/// In the SM4 algorithm, the round function `F` is defined as:
+///
+/// ```text
+/// F(x0, x1, x2, x3, rk) = x0 ⊕ T(x1 ⊕ x2 ⊕ x3 ⊕ rk)
+/// ... where
+/// T(A) = L(τ(A))
+/// B = τ(A) = (SM4-S-Box(a0), SM4-S-Box(a1), SM4-S-Box(a2), SM4-S-Box(a3))
+/// C = L(B) = B ⊕ (B ≪ 2) ⊕ (B ≪ 10) ⊕ (B ≪ 18) ⊕ (B ≪ 24)
+/// ```
+///
+/// It can be implemented by `sm4ed` instruction like:
+///
+/// ```no_run
+/// # #[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))]
+/// # fn round_function(x0: u32, x1: u32, x2: u32, x3: u32, rk: u32) -> u32 {
+/// # #[cfg(target_arch = "riscv32")] use core::arch::riscv32::sm4ed;
+/// # #[cfg(target_arch = "riscv64")] use core::arch::riscv64::sm4ed;
+/// let a = x1 ^ x2 ^ x3 ^ rk;
+/// let c0 = sm4ed(x0, a, 0);
+/// let c1 = sm4ed(c0, a, 1); // c1 represents c[0..=1], etc.
+/// let c2 = sm4ed(c1, a, 2);
+/// let c3 = sm4ed(c2, a, 3);
+/// return c3; // c3 represents c[0..=3]
+/// # }
+/// ```
+#[target_feature(enable = "zksed")]
+#[cfg_attr(test, assert_instr(sm4ed))]
+#[inline]
+pub unsafe fn sm4ed(rs1: usize, rs2: usize, bs: u8) -> usize {
+    macro_rules! sm4ed {
+        ($imm2:expr) => {{
+            let value: usize;
+            unsafe {
+                asm!(
+                    concat!("sm4ed {rd},{rs1},{rs2},", $imm2),
+                    rd = lateout(reg) value,
+                    rs1 = in(reg) rs1,
+                    rs2 = in(reg) rs2,
+                    options(pure, nomem, nostack),
+                )
+            }
+            value
+        }}
+    }
+    constify_imm2!(bs, sm4ed)
+}
+
+/// Accelerates the Key Schedule operation of the SM4 block cipher \[5, 31\] with `bs=0`.
+///
+/// Implements a T-tables in hardware style approach to accelerating the SM4 Key Schedule. A
+/// byte is extracted from rs2 based on bs, to which the SBox and linear layer transforms are
+/// applied, before the result is XOR’d with rs1 and written back to rd. This instruction
+/// exists on RV32 and RV64 base architectures. On RV64, the 32-bit result is sign extended to
+/// XLEN bits. This instruction must always be implemented such that its execution latency does
+/// not depend on the data being operated on.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.44
+///
+/// # Note
+///
+/// The `bs` parameter is expected to be a constant value and only the bottom 2 bits of `bs` are
+/// used.
+///
+/// # Safety
+///
+/// This function is safe to use if the `zksed` target feature is present.
+///
+/// # Details
+///
+/// Accelerates the round function `F` in the SM4 block cipher algorithm
+///
+/// This instruction is included in extension `Zksed`. It's defined as:
+///
+/// ```text
+/// SM4ED(x, a, BS) = x ⊕ T(ai)
+/// ... where
+/// ai = a.bytes[BS]
+/// T(ai) = L(τ(ai))
+/// bi = τ(ai) = SM4-S-Box(ai)
+/// ci = L(bi) = bi ⊕ (bi ≪ 2) ⊕ (bi ≪ 10) ⊕ (bi ≪ 18) ⊕ (bi ≪ 24)
+/// SM4ED = (ci ≪ (BS * 8)) ⊕ x
+/// ```
+///
+/// where `⊕` represents 32-bit xor, and `≪ k` represents rotate left by `k` bits.
+/// As is defined above, `T` is a combined transformation of non linear S-Box transform `τ`
+/// and linear layer transform `L`.
+///
+/// In the SM4 algorithm, the round function `F` is defined as:
+///
+/// ```text
+/// F(x0, x1, x2, x3, rk) = x0 ⊕ T(x1 ⊕ x2 ⊕ x3 ⊕ rk)
+/// ... where
+/// T(A) = L(τ(A))
+/// B = τ(A) = (SM4-S-Box(a0), SM4-S-Box(a1), SM4-S-Box(a2), SM4-S-Box(a3))
+/// C = L(B) = B ⊕ (B ≪ 2) ⊕ (B ≪ 10) ⊕ (B ≪ 18) ⊕ (B ≪ 24)
+/// ```
+///
+/// It can be implemented by `sm4ed` instruction like:
+///
+/// ```no_run
+/// # #[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))]
+/// # fn round_function(x0: u32, x1: u32, x2: u32, x3: u32, rk: u32) -> u32 {
+/// # #[cfg(target_arch = "riscv32")] use core::arch::riscv32::sm4ed;
+/// # #[cfg(target_arch = "riscv64")] use core::arch::riscv64::sm4ed;
+/// let a = x1 ^ x2 ^ x3 ^ rk;
+/// let c0 = sm4ed(x0, a, 0);
+/// let c1 = sm4ed(c0, a, 1); // c1 represents c[0..=1], etc.
+/// let c2 = sm4ed(c1, a, 2);
+/// let c3 = sm4ed(c2, a, 3);
+/// return c3; // c3 represents c[0..=3]
+/// # }
+/// ```
+#[target_feature(enable = "zksed")]
+#[cfg_attr(test, assert_instr(sm4ks))]
+#[inline]
+pub unsafe fn sm4ks(rs1: usize, rs2: usize, bs: u8) -> usize {
+    macro_rules! sm4ks {
+        ($imm2:expr) => {{
+            let value: usize;
+            unsafe {
+                asm!(
+                    concat!("sm4ks {rd},{rs1},{rs2},", $imm2),
+                    rd = lateout(reg) value,
+                    rs1 = in(reg) rs1,
+                    rs2 = in(reg) rs2,
+                    options(pure, nomem, nostack),
+                )
+            }
+            value
+        }}
+    }
+    constify_imm2!(bs, sm4ks)
+}
+
+/// Implements the P0 transformation function as used in the SM3 hash function [4, 30].
+///
+/// This instruction is supported for the RV32 and RV64 base architectures. It implements the
+/// P0 transform of the SM3 hash function [4, 30]. This instruction must always be implemented
+/// such that its execution latency does not depend on the data being operated on.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.41
+///
+/// # Safety
+///
+/// This function is safe to use if the `zksh` target feature is present.
+///
+/// # Details
+///
+/// `P0` transformation function as is used in the SM3 hash algorithm
+///
+/// This function is included in `Zksh` extension. It's defined as:
+///
+/// ```text
+/// P0(X) = X ⊕ (X ≪ 9) ⊕ (X ≪ 17)
+/// ```
+///
+/// where `⊕` represents 32-bit xor, and `≪ k` represents rotate left by `k` bits.
+///
+/// In the SM3 algorithm, the `P0` transformation is used as `E ← P0(TT2)` when the
+/// compression function `CF` uses the intermediate value `TT2` to calculate
+/// the variable `E` in one iteration for subsequent processes.
+#[target_feature(enable = "zksh")]
+#[cfg_attr(test, assert_instr(sm3p0))]
+#[inline]
+pub unsafe fn sm3p0(rs1: usize) -> usize {
+    let value: usize;
+    unsafe {
+        asm!(
+            "sm3p0 {rd},{rs1}",
+            rd = lateout(reg) value,
+            rs1 = in(reg) rs1,
+            options(pure, nomem, nostack),
+        )
+    }
+    value
+}
+
+/// Implements the P1 transformation function as used in the SM3 hash function [4, 30].
+///
+/// This instruction is supported for the RV32 and RV64 base architectures. It implements the
+/// P1 transform of the SM3 hash function [4, 30]. This instruction must always be implemented
+/// such that its execution latency does not depend on the data being operated on.
+///
+/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
+///
+/// Version: v1.0.1
+///
+/// Section: 3.42
+///
+/// # Safety
+///
+/// This function is safe to use if the `zksh` target feature is present.
+///
+/// # Details
+///
+/// `P1` transformation function as is used in the SM3 hash algorithm
+///
+/// This function is included in `Zksh` extension. It's defined as:
+///
+/// ```text
+/// P1(X) = X ⊕ (X ≪ 15) ⊕ (X ≪ 23)
+/// ```
+///
+/// where `⊕` represents 32-bit xor, and `≪ k` represents rotate left by `k` bits.
+///
+/// In the SM3 algorithm, the `P1` transformation is used to expand message,
+/// where expanded word `Wj` can be generated from the previous words.
+/// The whole process can be described as the following pseudocode:
+///
+/// ```text
+/// FOR j=16 TO 67
+///     Wj ← P1(Wj−16 ⊕ Wj−9 ⊕ (Wj−3 ≪ 15)) ⊕ (Wj−13 ≪ 7) ⊕ Wj−6
+/// ENDFOR
+/// ```
+#[target_feature(enable = "zksh")]
+#[cfg_attr(test, assert_instr(sm3p1))]
+#[inline]
+pub unsafe fn sm3p1(rs1: usize) -> usize {
+    let value: usize;
+    unsafe {
+        asm!(
+            "sm3p1 {rd},{rs1}",
+            rd = lateout(reg) value,
+            rs1 = in(reg) rs1,
+            options(pure, nomem, nostack),
+        )
+    }
+    value
+}
\ No newline at end of file

From e2f6a3ed3a98c6fd79bcf9232879d465ce421597 Mon Sep 17 00:00:00 2001
From: Gijs Burghoorn <g.burghoorn@gmail.com>
Date: Sun, 6 Aug 2023 15:54:48 +0200
Subject: [PATCH 03/12] Chore cargo fmt

---
 crates/core_arch/src/riscv32/mod.rs      | 2 +-
 crates/core_arch/src/riscv32/zk.rs       | 2 +-
 crates/core_arch/src/riscv_shared/mod.rs | 2 +-
 crates/core_arch/src/riscv_shared/zk.rs  | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/crates/core_arch/src/riscv32/mod.rs b/crates/core_arch/src/riscv32/mod.rs
index 394d695ae0..0a8634c85e 100644
--- a/crates/core_arch/src/riscv32/mod.rs
+++ b/crates/core_arch/src/riscv32/mod.rs
@@ -2,4 +2,4 @@
 
 mod zk;
 
-pub use zk::*;
\ No newline at end of file
+pub use zk::*;
diff --git a/crates/core_arch/src/riscv32/zk.rs b/crates/core_arch/src/riscv32/zk.rs
index 56115a986f..d95890f420 100644
--- a/crates/core_arch/src/riscv32/zk.rs
+++ b/crates/core_arch/src/riscv32/zk.rs
@@ -455,4 +455,4 @@ pub unsafe fn sha512sum1r(rs1: u32, rs2: u32) -> u32 {
         )
     }
     value
-}
\ No newline at end of file
+}
diff --git a/crates/core_arch/src/riscv_shared/mod.rs b/crates/core_arch/src/riscv_shared/mod.rs
index d14a440044..d14431ead4 100644
--- a/crates/core_arch/src/riscv_shared/mod.rs
+++ b/crates/core_arch/src/riscv_shared/mod.rs
@@ -1,7 +1,7 @@
 //! Shared RISC-V intrinsics
 
-mod zk;
 mod p;
+mod zk;
 
 #[unstable(feature = "stdsimd", issue = "27731")]
 pub use p::*;
diff --git a/crates/core_arch/src/riscv_shared/zk.rs b/crates/core_arch/src/riscv_shared/zk.rs
index 0877e052a7..37ae597b8a 100644
--- a/crates/core_arch/src/riscv_shared/zk.rs
+++ b/crates/core_arch/src/riscv_shared/zk.rs
@@ -591,4 +591,4 @@ pub unsafe fn sm3p1(rs1: usize) -> usize {
         )
     }
     value
-}
\ No newline at end of file
+}

From a3e83d212f59d02243eb20153c2ef37e09462e86 Mon Sep 17 00:00:00 2001
From: Gijs Burghoorn <g.burghoorn@gmail.com>
Date: Tue, 15 Aug 2023 13:56:49 +0200
Subject: [PATCH 04/12] Fix: Assembly mistakes in RISC-V Zk extensions

---
 crates/core_arch/src/riscv32/zk.rs      | 6 +++---
 crates/core_arch/src/riscv64/zk.rs      | 4 ++--
 crates/core_arch/src/riscv_shared/zk.rs | 4 ++--
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/crates/core_arch/src/riscv32/zk.rs b/crates/core_arch/src/riscv32/zk.rs
index d95890f420..81bcd0d2df 100644
--- a/crates/core_arch/src/riscv32/zk.rs
+++ b/crates/core_arch/src/riscv32/zk.rs
@@ -142,9 +142,6 @@ pub unsafe fn aes32dsi(rs1: u32, rs2: u32, bs: u8) -> u32 {
     constify_imm2!(bs, aes32dsi)
 }
 
-#[target_feature(enable = "zknd")]
-#[cfg_attr(test, assert_instr(aes32dsmi))]
-#[inline]
 /// AES middle round decryption instruction for RV32.
 ///
 /// This instruction sources a single byte from rs2 according to bs. To this it applies the
@@ -166,6 +163,9 @@ pub unsafe fn aes32dsi(rs1: u32, rs2: u32, bs: u8) -> u32 {
 /// # Safety
 ///
 /// This function is safe to use if the `zknd` target feature is present.
+#[target_feature(enable = "zknd")]
+#[cfg_attr(test, assert_instr(aes32dsmi))]
+#[inline]
 pub unsafe fn aes32dsmi(rs1: u32, rs2: u32, bs: u8) -> u32 {
     macro_rules! aes32dsmi {
             ($imm2:expr) => {{
diff --git a/crates/core_arch/src/riscv64/zk.rs b/crates/core_arch/src/riscv64/zk.rs
index de45ad62c7..8f2884668a 100644
--- a/crates/core_arch/src/riscv64/zk.rs
+++ b/crates/core_arch/src/riscv64/zk.rs
@@ -142,7 +142,7 @@ pub unsafe fn aes64dsm(rs1: u64, rs2: u64) -> u64 {
     let value: u64;
     unsafe {
         asm!(
-            "aes64esm {rd},{rs1},{rs2}",
+            "aes64dsm {rd},{rs1},{rs2}",
             rd = lateout(reg) value,
             rs1 = in(reg) rs1,
             rs2 = in(reg) rs2,
@@ -378,7 +378,7 @@ pub unsafe fn sha512sum1(rs1: u64) -> u64 {
     let value: u64;
     unsafe {
         asm!(
-            "sha512sum0 {rd},{rs1}",
+            "sha512sum1 {rd},{rs1}",
             rd = lateout(reg) value,
             rs1 = in(reg) rs1,
             options(pure, nomem, nostack),
diff --git a/crates/core_arch/src/riscv_shared/zk.rs b/crates/core_arch/src/riscv_shared/zk.rs
index 37ae597b8a..8402c26756 100644
--- a/crates/core_arch/src/riscv_shared/zk.rs
+++ b/crates/core_arch/src/riscv_shared/zk.rs
@@ -267,7 +267,7 @@ pub unsafe fn sha256sum0(rs1: usize) -> usize {
     let value: usize;
     unsafe {
         asm!(
-            "sha256sig1 {rd},{rs1}",
+            "sha256sum0 {rd},{rs1}",
             rd = lateout(reg) value,
             rs1 = in(reg) rs1,
             options(pure, nomem, nostack),
@@ -302,7 +302,7 @@ pub unsafe fn sha256sum1(rs1: usize) -> usize {
     let value: usize;
     unsafe {
         asm!(
-            "sha256sig1 {rd},{rs1}",
+            "sha256sum1 {rd},{rs1}",
             rd = lateout(reg) value,
             rs1 = in(reg) rs1,
             options(pure, nomem, nostack),

From db78d79b047f7d061b4b19b27b7112d314d483ac Mon Sep 17 00:00:00 2001
From: Gijs Burghoorn <g.burghoorn@gmail.com>
Date: Tue, 22 Aug 2023 12:23:51 +0200
Subject: [PATCH 05/12] Fix: Change to 'rustc_legacy_const_generics'

---
 crates/core_arch/src/riscv32/zk.rs      | 125 ++++++++---------------
 crates/core_arch/src/riscv64/zk.rs      |  52 ++++------
 crates/core_arch/src/riscv_shared/zk.rs | 127 +++++++++++-------------
 3 files changed, 120 insertions(+), 184 deletions(-)

diff --git a/crates/core_arch/src/riscv32/zk.rs b/crates/core_arch/src/riscv32/zk.rs
index 81bcd0d2df..62a749230b 100644
--- a/crates/core_arch/src/riscv32/zk.rs
+++ b/crates/core_arch/src/riscv32/zk.rs
@@ -1,19 +1,28 @@
-#[allow(unused)]
 use core::arch::asm;
 
-#[allow(unused)]
-macro_rules! constify_imm2 {
-    ($imm2:expr, $expand:ident) => {
-        #[allow(overflowing_literals)]
-        match $imm2 & 0b11 {
-            0b00 => $expand!(0),
-            0b01 => $expand!(1),
-            0b10 => $expand!(2),
-            _ => $expand!(3),
-        }
+macro_rules! static_assert_imm2 {
+    ($imm:ident) => {
+        static_assert!(
+            $imm < 4,
+            "Immediate value allowed to be a constant from 0 up to including 3"
+        )
     };
 }
 
+extern "unadjusted" {
+    #[link_name = "llvm.riscv.aes32esi"]
+    fn _aes32esi(rs1: i32, rs2: i32, bs: i32) -> i32;
+
+    #[link_name = "llvm.riscv.aes32esmi"]
+    fn _aes32esmi(rs1: i32, rs2: i32, bs: i32) -> i32;
+
+    #[link_name = "llvm.riscv.aes32dsi"]
+    fn _aes32dsi(rs1: i32, rs2: i32, bs: i32) -> i32;
+
+    #[link_name = "llvm.riscv.aes32dsmi"]
+    fn _aes32dsmi(rs1: i32, rs2: i32, bs: i32) -> i32;
+}
+
 /// AES final round encryption instruction for RV32.
 ///
 /// This instruction sources a single byte from rs2 according to bs. To this it applies the
@@ -29,32 +38,20 @@ macro_rules! constify_imm2 {
 ///
 /// # Note
 ///
-/// The `bs` parameter is expected to be a constant value and only the bottom 2 bits of `bs` are
+/// The `BS` parameter is expected to be a constant value and only the bottom 2 bits of `bs` are
 /// used.
 ///
 /// # Safety
 ///
 /// This function is safe to use if the `zkne` target feature is present.
 #[target_feature(enable = "zkne")]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(test, assert_instr(aes32esi))]
 #[inline]
-pub unsafe fn aes32esi(rs1: u32, rs2: u32, bs: u8) -> u32 {
-    macro_rules! aes32esi {
-            ($imm2:expr) => {{
-                let value: u32;
-                unsafe {
-                    asm!(
-                        concat!("aes32esi {rd},{rs1},{rs2},", $imm2),
-                        rd = lateout(reg) value,
-                        rs1 = in(reg) rs1,
-                        rs2 = in(reg) rs2,
-                        options(pure, nomem, nostack),
-                    );
-                }
-                value
-            }}
-        }
-    constify_imm2!(bs, aes32esi)
+pub unsafe fn aes32esi<const BS: u8>(rs1: u32, rs2: u32) -> u32 {
+    static_assert_imm2!(BS);
+
+    _aes32esi(rs1 as i32, rs2 as i32, BS as i32) as u32
 }
 
 /// AES middle round encryption instruction for RV32 with.
@@ -79,25 +76,13 @@ pub unsafe fn aes32esi(rs1: u32, rs2: u32, bs: u8) -> u32 {
 ///
 /// This function is safe to use if the `zkne` target feature is present.
 #[target_feature(enable = "zkne")]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(test, assert_instr(aes32esmi))]
 #[inline]
-pub unsafe fn aes32esmi(rs1: u32, rs2: u32, bs: u8) -> u32 {
-    macro_rules! aes32esmi {
-            ($imm2:expr) => {{
-                let value: u32;
-                unsafe {
-                    asm!(
-                        concat!("aes32esmi {rd},{rs1},{rs2},", $imm2),
-                        rd = lateout(reg) value,
-                        rs1 = in(reg) rs1,
-                        rs2 = in(reg) rs2,
-                        options(pure, nomem, nostack),
-                    );
-                }
-                value
-            }}
-        }
-    constify_imm2!(bs, aes32esmi)
+pub unsafe fn aes32esmi<const BS: u8>(rs1: u32, rs2: u32) -> u32 {
+    static_assert_imm2!(BS);
+
+    _aes32esmi(rs1 as i32, rs2 as i32, BS as i32) as u32
 }
 
 /// AES final round decryption instruction for RV32.
@@ -114,32 +99,20 @@ pub unsafe fn aes32esmi(rs1: u32, rs2: u32, bs: u8) -> u32 {
 ///
 /// # Note
 ///
-/// The `bs` parameter is expected to be a constant value and only the bottom 2 bits of `bs` are
+/// The `BS` parameter is expected to be a constant value and only the bottom 2 bits of `bs` are
 /// used.
 ///
 /// # Safety
 ///
 /// This function is safe to use if the `zknd` target feature is present.
 #[target_feature(enable = "zknd")]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(test, assert_instr(aes32dsi))]
 #[inline]
-pub unsafe fn aes32dsi(rs1: u32, rs2: u32, bs: u8) -> u32 {
-    macro_rules! aes32dsi {
-            ($imm2:expr) => {{
-                let value: u32;
-                unsafe {
-                    asm!(
-                        concat!("aes32dsi {rd},{rs1},{rs2},", $imm2),
-                        rd = lateout(reg) value,
-                        rs1 = in(reg) rs1,
-                        rs2 = in(reg) rs2,
-                        options(pure, nomem, nostack),
-                    );
-                }
-                value
-            }}
-        }
-    constify_imm2!(bs, aes32dsi)
+pub unsafe fn aes32dsi<const BS: u8>(rs1: u32, rs2: u32) -> u32 {
+    static_assert_imm2!(BS);
+
+    _aes32dsi(rs1 as i32, rs2 as i32, BS as i32) as u32
 }
 
 /// AES middle round decryption instruction for RV32.
@@ -157,32 +130,20 @@ pub unsafe fn aes32dsi(rs1: u32, rs2: u32, bs: u8) -> u32 {
 ///
 /// # Note
 ///
-/// The `bs` parameter is expected to be a constant value and only the bottom 2 bits of `bs` are
+/// The `BS` parameter is expected to be a constant value and only the bottom 2 bits of `bs` are
 /// used.
 ///
 /// # Safety
 ///
 /// This function is safe to use if the `zknd` target feature is present.
 #[target_feature(enable = "zknd")]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(test, assert_instr(aes32dsmi))]
 #[inline]
-pub unsafe fn aes32dsmi(rs1: u32, rs2: u32, bs: u8) -> u32 {
-    macro_rules! aes32dsmi {
-            ($imm2:expr) => {{
-                let value: u32;
-                unsafe {
-                    asm!(
-                        concat!("aes32dsmi {rd},{rs1},{rs2},", $imm2),
-                        rd = lateout(reg) value,
-                        rs1 = in(reg) rs1,
-                        rs2 = in(reg) rs2,
-                        options(pure, nomem, nostack),
-                    );
-                }
-                value
-            }}
-        }
-    constify_imm2!(bs, aes32dsmi)
+pub unsafe fn aes32dsmi<const BS: u8>(rs1: u32, rs2: u32) -> u32 {
+    static_assert_imm2!(BS);
+
+    _aes32dsmi(rs1 as i32, rs2 as i32, BS as i32) as u32
 }
 
 /// Place upper/lower halves of the source register into odd/even bits of the destination
diff --git a/crates/core_arch/src/riscv64/zk.rs b/crates/core_arch/src/riscv64/zk.rs
index 8f2884668a..017fa1c041 100644
--- a/crates/core_arch/src/riscv64/zk.rs
+++ b/crates/core_arch/src/riscv64/zk.rs
@@ -1,25 +1,19 @@
-#[allow(unused)]
 use core::arch::asm;
 
-#[allow(unused)]
-macro_rules! constify_imm_0_until_10 {
-    ($imm2:expr, $expand:ident) => {
-        match $imm2 {
-            1 => $expand!(1),
-            2 => $expand!(2),
-            3 => $expand!(3),
-            4 => $expand!(4),
-            5 => $expand!(5),
-            6 => $expand!(6),
-            7 => $expand!(7),
-            8 => $expand!(8),
-            9 => $expand!(9),
-            10 => $expand!(10),
-            _ => $expand!(0),
-        }
+macro_rules! static_assert_imm_0_until_10 {
+    ($imm:ident) => {
+        static_assert!(
+            $imm <= 10,
+            "Immediate value allowed to be a constant from 0 up to including 10"
+        )
     };
 }
 
+extern "unadjusted" {
+    #[link_name = "llvm.riscv.aes64ks1i"]
+    fn _aes64ks1i(rs1: i64, rnum: i32) -> i64;
+}
+
 /// AES final round encryption instruction for RV64.
 ///
 /// Uses the two 64-bit source registers to represent the entire AES state, and produces half
@@ -168,31 +162,19 @@ pub unsafe fn aes64dsm(rs1: u64, rs2: u64) -> u64 {
 ///
 /// # Note
 ///
-/// The `rnum` parameter is expected to be a constant value inside the range of `0..=10`, if a
-/// value outside the valid range is given it uses `rnum=0`.
+/// The `RNUM` parameter is expected to be a constant value inside the range of `0..=10`.
 ///
 /// # Safety
 ///
 /// This function is safe to use if the `zkne` or `zknd` target feature is present.
 #[target_feature(enable = "zkne", enable = "zknd")]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(test, assert_instr(aes64ks1i))]
 #[inline]
-pub unsafe fn aes64ks1i(rs1: u64, rnum: u8) -> u64 {
-    macro_rules! aes64ks1i {
-            ($imm_0_until_10:expr) => {{
-                let value: u64;
-                unsafe {
-                    asm!(
-                        concat!("aes64ks1i {rd},{rs1},", $imm_0_until_10),
-                        rd = lateout(reg) value,
-                        rs1 = in(reg) rs1,
-                        options(pure, nomem, nostack),
-                    )
-                }
-                value
-            }}
-        }
-    constify_imm_0_until_10!(rnum, aes64ks1i)
+pub unsafe fn aes64ks1i<const RNUM: u8>(rs1: u64) -> u64 {
+    static_assert_imm_0_until_10!(RNUM);
+
+    _aes64ks1i(rs1 as i64, RNUM as i32) as u64
 }
 
 /// This instruction implements part of the KeySchedule operation for the AES Block cipher.
diff --git a/crates/core_arch/src/riscv_shared/zk.rs b/crates/core_arch/src/riscv_shared/zk.rs
index 8402c26756..2b1644fc0a 100644
--- a/crates/core_arch/src/riscv_shared/zk.rs
+++ b/crates/core_arch/src/riscv_shared/zk.rs
@@ -1,19 +1,40 @@
-#[allow(unused)]
 use core::arch::asm;
 
-#[allow(unused)]
-macro_rules! constify_imm2 {
-    ($imm2:expr, $expand:ident) => {
-        #[allow(overflowing_literals)]
-        match $imm2 & 0b11 {
-            0b00 => $expand!(0),
-            0b01 => $expand!(1),
-            0b10 => $expand!(2),
-            _ => $expand!(3),
-        }
+macro_rules! static_assert_imm2 {
+    ($imm:ident) => {
+        static_assert!(
+            $imm < 4,
+            "Immediate value allowed to be a constant from 0 up to including 3"
+        )
     };
 }
 
+extern "unadjusted" {
+    #[link_name = "llvm.riscv.sm4ed"]
+    fn _sm4ed(rs1: i32, rs2: i32, bs: i32) -> i32;
+
+    #[link_name = "llvm.riscv.sm4ks"]
+    fn _sm4ks(rs1: i32, rs2: i32, bs: i32) -> i32;
+}
+
+#[cfg(target_arch = "riscv32")]
+extern "unadjusted" {
+    #[link_name = "llvm.riscv.xperm8.i32"]
+    fn _xperm8_32(rs1: i32, rs2: i32) -> i32;
+
+    #[link_name = "llvm.riscv.xperm4.i32"]
+    fn _xperm4_32(rs1: i32, rs2: i32) -> i32;
+}
+
+#[cfg(target_arch = "riscv64")]
+extern "unadjusted" {
+    #[link_name = "llvm.riscv.xperm8.i64"]
+    fn _xperm8_64(rs1: i64, rs2: i64) -> i64;
+
+    #[link_name = "llvm.riscv.xperm4.i64"]
+    fn _xperm4_64(rs1: i64, rs2: i64) -> i64;
+}
+
 /// Pack the low halves of rs1 and rs2 into rd.
 ///
 /// The pack instruction packs the XLEN/2-bit lower halves of rs1 and rs2 into rd, with rs1 in
@@ -125,17 +146,15 @@ pub unsafe fn brev8(rs: usize) -> usize {
 #[cfg_attr(test, assert_instr(xperm8))]
 #[inline]
 pub unsafe fn xperm8(rs1: usize, rs2: usize) -> usize {
-    let value: usize;
-    unsafe {
-        asm!(
-            "xperm8 {rd},{rs1},{rs2}",
-            rd = lateout(reg) value,
-            rs1 = in(reg) rs1,
-            rs2 = in(reg) rs2,
-            options(pure, nomem, nostack),
-        )
+    #[cfg(target_arch = "riscv32")]
+    {
+        _xperm8_32(rs1 as i32, rs2 as i32) as usize
+    }
+
+    #[cfg(target_arch = "riscv64")]
+    {
+        _xperm8_64(rs1 as i64, rs2 as i64) as usize
     }
-    value
 }
 
 /// Nibble-wise lookup of indicies into a vector.
@@ -158,17 +177,15 @@ pub unsafe fn xperm8(rs1: usize, rs2: usize) -> usize {
 #[cfg_attr(test, assert_instr(xperm4))]
 #[inline]
 pub unsafe fn xperm4(rs1: usize, rs2: usize) -> usize {
-    let value: usize;
-    unsafe {
-        asm!(
-            "xperm4 {rd},{rs1},{rs2}",
-            rd = lateout(reg) value,
-            rs1 = in(reg) rs1,
-            rs2 = in(reg) rs2,
-            options(pure, nomem, nostack),
-        )
+    #[cfg(target_arch = "riscv32")]
+    {
+        _xperm4_32(rs1 as i32, rs2 as i32) as usize
+    }
+
+    #[cfg(target_arch = "riscv64")]
+    {
+        _xperm4_64(rs1 as i64, rs2 as i64) as usize
     }
-    value
 }
 
 /// Implements the Sigma0 transformation function as used in the SHA2-256 hash function \[49\]
@@ -328,7 +345,7 @@ pub unsafe fn sha256sum1(rs1: usize) -> usize {
 ///
 /// # Note
 ///
-/// The `bs` parameter is expected to be a constant value and only the bottom 2 bits of `bs` are
+/// The `BS` parameter is expected to be a constant value and only the bottom 2 bits of `bs` are
 /// used.
 ///
 /// # Safety
@@ -381,25 +398,13 @@ pub unsafe fn sha256sum1(rs1: usize) -> usize {
 /// # }
 /// ```
 #[target_feature(enable = "zksed")]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(test, assert_instr(sm4ed))]
 #[inline]
-pub unsafe fn sm4ed(rs1: usize, rs2: usize, bs: u8) -> usize {
-    macro_rules! sm4ed {
-        ($imm2:expr) => {{
-            let value: usize;
-            unsafe {
-                asm!(
-                    concat!("sm4ed {rd},{rs1},{rs2},", $imm2),
-                    rd = lateout(reg) value,
-                    rs1 = in(reg) rs1,
-                    rs2 = in(reg) rs2,
-                    options(pure, nomem, nostack),
-                )
-            }
-            value
-        }}
-    }
-    constify_imm2!(bs, sm4ed)
+pub unsafe fn sm4ed<const BS: u8>(rs1: u32, rs2: u32) -> u32 {
+    static_assert_imm2!(BS);
+
+    _sm4ed(rs1 as i32, rs2 as i32, BS as i32) as u32
 }
 
 /// Accelerates the Key Schedule operation of the SM4 block cipher \[5, 31\] with `bs=0`.
@@ -419,7 +424,7 @@ pub unsafe fn sm4ed(rs1: usize, rs2: usize, bs: u8) -> usize {
 ///
 /// # Note
 ///
-/// The `bs` parameter is expected to be a constant value and only the bottom 2 bits of `bs` are
+/// The `BS` parameter is expected to be a constant value and only the bottom 2 bits of `bs` are
 /// used.
 ///
 /// # Safety
@@ -472,25 +477,13 @@ pub unsafe fn sm4ed(rs1: usize, rs2: usize, bs: u8) -> usize {
 /// # }
 /// ```
 #[target_feature(enable = "zksed")]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(test, assert_instr(sm4ks))]
 #[inline]
-pub unsafe fn sm4ks(rs1: usize, rs2: usize, bs: u8) -> usize {
-    macro_rules! sm4ks {
-        ($imm2:expr) => {{
-            let value: usize;
-            unsafe {
-                asm!(
-                    concat!("sm4ks {rd},{rs1},{rs2},", $imm2),
-                    rd = lateout(reg) value,
-                    rs1 = in(reg) rs1,
-                    rs2 = in(reg) rs2,
-                    options(pure, nomem, nostack),
-                )
-            }
-            value
-        }}
-    }
-    constify_imm2!(bs, sm4ks)
+pub unsafe fn sm4ks<const BS: u8>(rs1: u32, rs2: u32) -> u32 {
+    static_assert_imm2!(BS);
+
+    _sm4ks(rs1 as i32, rs2 as i32, BS as i32) as u32
 }
 
 /// Implements the P0 transformation function as used in the SM3 hash function [4, 30].

From b2e97b396d70ddb4c558217bc2f2d3aca53e988a Mon Sep 17 00:00:00 2001
From: Gijs Burghoorn <g.burghoorn@gmail.com>
Date: Tue, 22 Aug 2023 13:45:04 +0200
Subject: [PATCH 06/12] Fix: Utilize LLVM intrinsics where possible

---
 crates/core_arch/src/riscv32/zk.rs      | 125 ++++++------------
 crates/core_arch/src/riscv64/zk.rs      | 136 ++++++--------------
 crates/core_arch/src/riscv_shared/zk.rs | 161 ++++++------------------
 3 files changed, 112 insertions(+), 310 deletions(-)

diff --git a/crates/core_arch/src/riscv32/zk.rs b/crates/core_arch/src/riscv32/zk.rs
index 62a749230b..b32054c11a 100644
--- a/crates/core_arch/src/riscv32/zk.rs
+++ b/crates/core_arch/src/riscv32/zk.rs
@@ -1,4 +1,5 @@
-use core::arch::asm;
+#[cfg(test)]
+use stdarch_test::assert_instr;
 
 macro_rules! static_assert_imm2 {
     ($imm:ident) => {
@@ -21,6 +22,30 @@ extern "unadjusted" {
 
     #[link_name = "llvm.riscv.aes32dsmi"]
     fn _aes32dsmi(rs1: i32, rs2: i32, bs: i32) -> i32;
+
+    #[link_name = "llvm.riscv.zip.i32"]
+    fn _zip(rs1: i32) -> i32;
+
+    #[link_name = "llvm.riscv.unzip.i32"]
+    fn _unzip(rs1: i32) -> i32;
+
+    #[link_name = "llvm.riscv.sha512sig0h"]
+    fn _sha512sig0h(rs1: i32, rs2: i32) -> i32;
+
+    #[link_name = "llvm.riscv.sha512sig0l"]
+    fn _sha512sig0l(rs1: i32, rs2: i32) -> i32;
+
+    #[link_name = "llvm.riscv.sha512sig1h"]
+    fn _sha512sig1h(rs1: i32, rs2: i32) -> i32;
+
+    #[link_name = "llvm.riscv.sha512sig1l"]
+    fn _sha512sig1l(rs1: i32, rs2: i32) -> i32;
+
+    #[link_name = "llvm.riscv.sha512sum0r"]
+    fn _sha512sum0r(rs1: i32, rs2: i32) -> i32;
+
+    #[link_name = "llvm.riscv.sha512sum1r"]
+    fn _sha512sum1r(rs1: i32, rs2: i32) -> i32;
 }
 
 /// AES final round encryption instruction for RV32.
@@ -166,17 +191,8 @@ pub unsafe fn aes32dsmi<const BS: u8>(rs1: u32, rs2: u32) -> u32 {
 #[target_feature(enable = "zbkb")]
 #[cfg_attr(test, assert_instr(zip))]
 #[inline]
-pub unsafe fn zip(rs: usize) -> usize {
-    let value: usize;
-    unsafe {
-        asm!(
-            "zip {rd},{rs}",
-            rd = lateout(reg) value,
-            rs = in(reg) rs,
-            options(pure, nomem, nostack),
-        )
-    }
-    value
+pub unsafe fn zip(rs: u32) -> u32 {
+    _zip(rs as i32) as u32
 }
 
 /// Place odd and even bits of the source word into upper/lower halves of the destination.
@@ -197,17 +213,8 @@ pub unsafe fn zip(rs: usize) -> usize {
 #[target_feature(enable = "zbkb")]
 #[cfg_attr(test, assert_instr(unzip))]
 #[inline]
-pub unsafe fn unzip(rs: usize) -> usize {
-    let value: usize;
-    unsafe {
-        asm!(
-            "unzip {rd},{rs}",
-            rd = lateout(reg) value,
-            rs = in(reg) rs,
-            options(pure, nomem, nostack),
-        )
-    }
-    value
+pub unsafe fn unzip(rs: u32) -> u32 {
+    _unzip(rs as i32) as u32
 }
 
 /// Implements the high half of the Sigma0 transformation, as used in the SHA2-512 hash
@@ -232,17 +239,7 @@ pub unsafe fn unzip(rs: usize) -> usize {
 #[cfg_attr(test, assert_instr(sha512sig0h))]
 #[inline]
 pub unsafe fn sha512sig0h(rs1: u32, rs2: u32) -> u32 {
-    let value: u32;
-    unsafe {
-        asm!(
-            "sha512sig0h {rd},{rs1},{rs2}",
-            rd = lateout(reg) value,
-            rs1 = in(reg) rs1,
-            rs2 = in(reg) rs2,
-            options(pure, nomem, nostack),
-        )
-    }
-    value
+    _sha512sig0h(rs1 as i32, rs2 as i32) as u32
 }
 
 /// Implements the low half of the Sigma0 transformation, as used in the SHA2-512 hash function
@@ -267,17 +264,7 @@ pub unsafe fn sha512sig0h(rs1: u32, rs2: u32) -> u32 {
 #[cfg_attr(test, assert_instr(sha512sig0l))]
 #[inline]
 pub unsafe fn sha512sig0l(rs1: u32, rs2: u32) -> u32 {
-    let value: u32;
-    unsafe {
-        asm!(
-            "sha512sig0l {rd},{rs1},{rs2}",
-            rd = lateout(reg) value,
-            rs1 = in(reg) rs1,
-            rs2 = in(reg) rs2,
-            options(pure, nomem, nostack),
-        )
-    }
-    value
+    _sha512sig0l(rs1 as i32, rs2 as i32) as u32
 }
 
 /// Implements the high half of the Sigma1 transformation, as used in the SHA2-512 hash
@@ -302,17 +289,7 @@ pub unsafe fn sha512sig0l(rs1: u32, rs2: u32) -> u32 {
 #[cfg_attr(test, assert_instr(sha512sig1h))]
 #[inline]
 pub unsafe fn sha512sig1h(rs1: u32, rs2: u32) -> u32 {
-    let value: u32;
-    unsafe {
-        asm!(
-            "sha512sig1h {rd},{rs1},{rs2}",
-            rd = lateout(reg) value,
-            rs1 = in(reg) rs1,
-            rs2 = in(reg) rs2,
-            options(pure, nomem, nostack),
-        )
-    }
-    value
+    _sha512sig1h(rs1 as i32, rs2 as i32) as u32
 }
 
 /// Implements the low half of the Sigma1 transformation, as used in the SHA2-512 hash function
@@ -337,17 +314,7 @@ pub unsafe fn sha512sig1h(rs1: u32, rs2: u32) -> u32 {
 #[cfg_attr(test, assert_instr(sha512sig1l))]
 #[inline]
 pub unsafe fn sha512sig1l(rs1: u32, rs2: u32) -> u32 {
-    let value: u32;
-    unsafe {
-        asm!(
-            "sha512sig1l {rd},{rs1},{rs2}",
-            rd = lateout(reg) value,
-            rs1 = in(reg) rs1,
-            rs2 = in(reg) rs2,
-            options(pure, nomem, nostack),
-        )
-    }
-    value
+    _sha512sig1l(rs1 as i32, rs2 as i32) as u32
 }
 
 /// Implements the Sum0 transformation, as used in the SHA2-512 hash function \[49\] (Section
@@ -371,17 +338,7 @@ pub unsafe fn sha512sig1l(rs1: u32, rs2: u32) -> u32 {
 #[cfg_attr(test, assert_instr(sha512sum0r))]
 #[inline]
 pub unsafe fn sha512sum0r(rs1: u32, rs2: u32) -> u32 {
-    let value: u32;
-    unsafe {
-        asm!(
-            "sha512sum0r {rd},{rs1},{rs2}",
-            rd = lateout(reg) value,
-            rs1 = in(reg) rs1,
-            rs2 = in(reg) rs2,
-            options(pure, nomem, nostack),
-        )
-    }
-    value
+    _sha512sum0r(rs1 as i32, rs2 as i32) as u32
 }
 
 /// Implements the Sum1 transformation, as used in the SHA2-512 hash function \[49\] (Section
@@ -405,15 +362,5 @@ pub unsafe fn sha512sum0r(rs1: u32, rs2: u32) -> u32 {
 #[cfg_attr(test, assert_instr(sha512sum1r))]
 #[inline]
 pub unsafe fn sha512sum1r(rs1: u32, rs2: u32) -> u32 {
-    let value: u32;
-    unsafe {
-        asm!(
-            "sha512sum1r {rd},{rs1},{rs2}",
-            rd = lateout(reg) value,
-            rs1 = in(reg) rs1,
-            rs2 = in(reg) rs2,
-            options(pure, nomem, nostack),
-        )
-    }
-    value
+    _sha512sum1r(rs1 as i32, rs2 as i32) as u32
 }
diff --git a/crates/core_arch/src/riscv64/zk.rs b/crates/core_arch/src/riscv64/zk.rs
index 017fa1c041..a6aae49f26 100644
--- a/crates/core_arch/src/riscv64/zk.rs
+++ b/crates/core_arch/src/riscv64/zk.rs
@@ -1,5 +1,8 @@
 use core::arch::asm;
 
+#[cfg(test)]
+use stdarch_test::assert_instr;
+
 macro_rules! static_assert_imm_0_until_10 {
     ($imm:ident) => {
         static_assert!(
@@ -10,8 +13,35 @@ macro_rules! static_assert_imm_0_until_10 {
 }
 
 extern "unadjusted" {
+    #[link_name = "llvm.riscv.aes64es"]
+    fn _aes64es(rs1: i64, rs2: i64) -> i64;
+
+    #[link_name = "llvm.riscv.aes64esm"]
+    fn _aes64esm(rs1: i64, rs2: i64) -> i64;
+
+    #[link_name = "llvm.riscv.aes64ds"]
+    fn _aes64ds(rs1: i64, rs2: i64) -> i64;
+
+    #[link_name = "llvm.riscv.aes64dsm"]
+    fn _aes64dsm(rs1: i64, rs2: i64) -> i64;
+
     #[link_name = "llvm.riscv.aes64ks1i"]
     fn _aes64ks1i(rs1: i64, rnum: i32) -> i64;
+
+    #[link_name = "llvm.riscv.aes64ks2"]
+    fn _aes64ks2(rs1: i64, rs2: i64) -> i64;
+
+    #[link_name = "llvm.riscv.sha512sig0"]
+    fn _sha512sig0(rs1: i64) -> i64;
+
+    #[link_name = "llvm.riscv.sha512sig1"]
+    fn _sha512sig1(rs1: i64) -> i64;
+
+    #[link_name = "llvm.riscv.sha512sum0"]
+    fn _sha512sum0(rs1: i64) -> i64;
+
+    #[link_name = "llvm.riscv.sha512sum1"]
+    fn _sha512sum1(rs1: i64) -> i64;
 }
 
 /// AES final round encryption instruction for RV64.
@@ -34,17 +64,7 @@ extern "unadjusted" {
 #[cfg_attr(test, assert_instr(aes64es))]
 #[inline]
 pub unsafe fn aes64es(rs1: u64, rs2: u64) -> u64 {
-    let value: u64;
-    unsafe {
-        asm!(
-            "aes64es {rd},{rs1},{rs2}",
-            rd = lateout(reg) value,
-            rs1 = in(reg) rs1,
-            rs2 = in(reg) rs2,
-            options(pure, nomem, nostack),
-        )
-    }
-    value
+    _aes64es(rs1 as i64, rs2 as i64) as u64
 }
 
 /// AES middle round encryption instruction for RV64.
@@ -67,17 +87,7 @@ pub unsafe fn aes64es(rs1: u64, rs2: u64) -> u64 {
 #[cfg_attr(test, assert_instr(aes64esm))]
 #[inline]
 pub unsafe fn aes64esm(rs1: u64, rs2: u64) -> u64 {
-    let value: u64;
-    unsafe {
-        asm!(
-            "aes64esm {rd},{rs1},{rs2}",
-            rd = lateout(reg) value,
-            rs1 = in(reg) rs1,
-            rs2 = in(reg) rs2,
-            options(pure, nomem, nostack),
-        )
-    }
-    value
+    _aes64esm(rs1 as i64, rs2 as i64) as u64
 }
 
 /// AES final round decryption instruction for RV64.
@@ -100,17 +110,7 @@ pub unsafe fn aes64esm(rs1: u64, rs2: u64) -> u64 {
 #[cfg_attr(test, assert_instr(aes64ds))]
 #[inline]
 pub unsafe fn aes64ds(rs1: u64, rs2: u64) -> u64 {
-    let value: u64;
-    unsafe {
-        asm!(
-            "aes64ds {rd},{rs1},{rs2}",
-            rd = lateout(reg) value,
-            rs1 = in(reg) rs1,
-            rs2 = in(reg) rs2,
-            options(pure, nomem, nostack),
-        )
-    }
-    value
+    _aes64ds(rs1 as i64, rs2 as i64) as u64
 }
 
 /// AES middle round decryption instruction for RV64.
@@ -133,17 +133,7 @@ pub unsafe fn aes64ds(rs1: u64, rs2: u64) -> u64 {
 #[cfg_attr(test, assert_instr(aes64dsm))]
 #[inline]
 pub unsafe fn aes64dsm(rs1: u64, rs2: u64) -> u64 {
-    let value: u64;
-    unsafe {
-        asm!(
-            "aes64dsm {rd},{rs1},{rs2}",
-            rd = lateout(reg) value,
-            rs1 = in(reg) rs1,
-            rs2 = in(reg) rs2,
-            options(pure, nomem, nostack),
-        )
-    }
-    value
+    _aes64dsm(rs1 as i64, rs2 as i64) as u64
 }
 
 /// This instruction implements part of the KeySchedule operation for the AES Block cipher
@@ -196,17 +186,7 @@ pub unsafe fn aes64ks1i<const RNUM: u8>(rs1: u64) -> u64 {
 #[cfg_attr(test, assert_instr(aes64ks2))]
 #[inline]
 pub unsafe fn aes64ks2(rs1: u64, rs2: u64) -> u64 {
-    let value: u64;
-    unsafe {
-        asm!(
-            "aes64ks2 {rd},{rs1},{rs2}",
-            rd = lateout(reg) value,
-            rs1 = in(reg) rs1,
-            rs2 = in(reg) rs2,
-            options(pure, nomem, nostack),
-        )
-    }
-    value
+    _aes64ks2(rs1 as i64, rs2 as i64) as u64
 }
 
 /// Pack the low 16-bits of rs1 and rs2 into rd on RV64
@@ -228,6 +208,8 @@ pub unsafe fn aes64ks2(rs1: u64, rs2: u64) -> u64 {
 #[cfg_attr(test, assert_instr(packw))]
 #[inline]
 pub unsafe fn packw(rs1: u64, rs2: u64) -> u64 {
+    // Note: There is no LLVM intrinsic for this instruction currently.
+
     let value: u64;
     unsafe {
         asm!(
@@ -261,16 +243,7 @@ pub unsafe fn packw(rs1: u64, rs2: u64) -> u64 {
 #[cfg_attr(test, assert_instr(sha512sig0))]
 #[inline]
 pub unsafe fn sha512sig0(rs1: u64) -> u64 {
-    let value: u64;
-    unsafe {
-        asm!(
-            "sha512sig0 {rd},{rs1}",
-            rd = lateout(reg) value,
-            rs1 = in(reg) rs1,
-            options(pure, nomem, nostack),
-        )
-    }
-    value
+    _sha512sig0(rs1 as i64) as u64
 }
 
 /// Implements the Sigma1 transformation function as used in the SHA2-512 hash function \[49\]
@@ -293,16 +266,7 @@ pub unsafe fn sha512sig0(rs1: u64) -> u64 {
 #[cfg_attr(test, assert_instr(sha512sig1))]
 #[inline]
 pub unsafe fn sha512sig1(rs1: u64) -> u64 {
-    let value: u64;
-    unsafe {
-        asm!(
-            "sha512sig1 {rd},{rs1}",
-            rd = lateout(reg) value,
-            rs1 = in(reg) rs1,
-            options(pure, nomem, nostack),
-        )
-    }
-    value
+    _sha512sig1(rs1 as i64) as u64
 }
 
 /// Implements the Sum0 transformation function as used in the SHA2-512 hash function \[49\]
@@ -325,16 +289,7 @@ pub unsafe fn sha512sig1(rs1: u64) -> u64 {
 #[cfg_attr(test, assert_instr(sha512sum0))]
 #[inline]
 pub unsafe fn sha512sum0(rs1: u64) -> u64 {
-    let value: u64;
-    unsafe {
-        asm!(
-            "sha512sum0 {rd},{rs1}",
-            rd = lateout(reg) value,
-            rs1 = in(reg) rs1,
-            options(pure, nomem, nostack),
-        )
-    }
-    value
+    _sha512sum0(rs1 as i64) as u64
 }
 
 /// Implements the Sum1 transformation function as used in the SHA2-512 hash function \[49\]
@@ -357,14 +312,5 @@ pub unsafe fn sha512sum0(rs1: u64) -> u64 {
 #[cfg_attr(test, assert_instr(sha512sum1))]
 #[inline]
 pub unsafe fn sha512sum1(rs1: u64) -> u64 {
-    let value: u64;
-    unsafe {
-        asm!(
-            "sha512sum1 {rd},{rs1}",
-            rd = lateout(reg) value,
-            rs1 = in(reg) rs1,
-            options(pure, nomem, nostack),
-        )
-    }
-    value
+    _sha512sum1(rs1 as i64) as u64
 }
diff --git a/crates/core_arch/src/riscv_shared/zk.rs b/crates/core_arch/src/riscv_shared/zk.rs
index 2b1644fc0a..502b5c57fd 100644
--- a/crates/core_arch/src/riscv_shared/zk.rs
+++ b/crates/core_arch/src/riscv_shared/zk.rs
@@ -1,5 +1,8 @@
 use core::arch::asm;
 
+#[cfg(test)]
+use stdarch_test::assert_instr;
+
 macro_rules! static_assert_imm2 {
     ($imm:ident) => {
         static_assert!(
@@ -15,6 +18,24 @@ extern "unadjusted" {
 
     #[link_name = "llvm.riscv.sm4ks"]
     fn _sm4ks(rs1: i32, rs2: i32, bs: i32) -> i32;
+
+    #[link_name = "llvm.riscv.sm3p0"]
+    fn _sm3p0(rs1: i32) -> i32;
+
+    #[link_name = "llvm.riscv.sm3p1"]
+    fn _sm3p1(rs1: i32) -> i32;
+
+    #[link_name = "llvm.riscv.sha256sig0"]
+    fn _sha256sig0(rs1: i32) -> i32;
+
+    #[link_name = "llvm.riscv.sha256sig1"]
+    fn _sha256sig1(rs1: i32) -> i32;
+
+    #[link_name = "llvm.riscv.sha256sum0"]
+    fn _sha256sum0(rs1: i32) -> i32;
+
+    #[link_name = "llvm.riscv.sha256sum1"]
+    fn _sha256sum1(rs1: i32) -> i32;
 }
 
 #[cfg(target_arch = "riscv32")]
@@ -35,37 +56,6 @@ extern "unadjusted" {
     fn _xperm4_64(rs1: i64, rs2: i64) -> i64;
 }
 
-/// Pack the low halves of rs1 and rs2 into rd.
-///
-/// The pack instruction packs the XLEN/2-bit lower halves of rs1 and rs2 into rd, with rs1 in
-/// the lower half and rs2 in the upper half.
-///
-/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
-///
-/// Version: v1.0.1
-///
-/// Section: 3.17
-///
-/// # Safety
-///
-/// This function is safe to use if the `zbkb` target feature is present.
-#[target_feature(enable = "zbkb")]
-#[cfg_attr(test, assert_instr(pack))]
-#[inline]
-pub unsafe fn pack(rs1: usize, rs2: usize) -> usize {
-    let value: usize;
-    unsafe {
-        asm!(
-            "pack {rd},{rs1},{rs2}",
-            rd = lateout(reg) value,
-            rs1 = in(reg) rs1,
-            rs2 = in(reg) rs2,
-            options(pure, nomem, nostack),
-        )
-    }
-    value
-}
-
 /// Pack the low bytes of rs1 and rs2 into rd.
 ///
 /// And the packh instruction packs the least-significant bytes of rs1 and rs2 into the 16
@@ -84,6 +74,8 @@ pub unsafe fn pack(rs1: usize, rs2: usize) -> usize {
 #[cfg_attr(test, assert_instr(packh))]
 #[inline]
 pub unsafe fn packh(rs1: usize, rs2: usize) -> usize {
+    // Note: There is no LLVM intrinsic for this instruction currently.
+     
     let value: usize;
     unsafe {
         asm!(
@@ -97,35 +89,6 @@ pub unsafe fn packh(rs1: usize, rs2: usize) -> usize {
     value
 }
 
-/// Reverse the bits in each byte of a source register.
-///
-/// This instruction reverses the order of the bits in every byte of a register.
-///
-/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
-///
-/// Version: v1.0.1
-///
-/// Section: 3.13
-///
-/// # Safety
-///
-/// This function is safe to use if the `zbkb` target feature is present.
-#[target_feature(enable = "zbkb")]
-#[cfg_attr(test, assert_instr(brev8))]
-#[inline]
-pub unsafe fn brev8(rs: usize) -> usize {
-    let value: usize;
-    unsafe {
-        asm!(
-            "brev8 {rd},{rs}",
-            rd = lateout(reg) value,
-            rs = in(reg) rs,
-            options(pure, nomem, nostack),
-        )
-    }
-    value
-}
-
 /// Byte-wise lookup of indicies into a vector in registers.
 ///
 /// The xperm8 instruction operates on bytes. The rs1 register contains a vector of XLEN/8
@@ -210,17 +173,8 @@ pub unsafe fn xperm4(rs1: usize, rs2: usize) -> usize {
 #[target_feature(enable = "zknh")]
 #[cfg_attr(test, assert_instr(sha256sig0))]
 #[inline]
-pub unsafe fn sha256sig0(rs1: usize) -> usize {
-    let value: usize;
-    unsafe {
-        asm!(
-            "sha256sig0 {rd},{rs1}",
-            rd = lateout(reg) value,
-            rs1 = in(reg) rs1,
-            options(pure, nomem, nostack),
-        )
-    }
-    value
+pub unsafe fn sha256sig0(rs1: u32) -> u32 {
+    _sha256sig0(rs1 as i32) as u32
 }
 
 /// Implements the Sigma1 transformation function as used in the SHA2-256 hash function \[49\]
@@ -245,17 +199,8 @@ pub unsafe fn sha256sig0(rs1: usize) -> usize {
 #[target_feature(enable = "zknh")]
 #[cfg_attr(test, assert_instr(sha256sig1))]
 #[inline]
-pub unsafe fn sha256sig1(rs1: usize) -> usize {
-    let value: usize;
-    unsafe {
-        asm!(
-            "sha256sig1 {rd},{rs1}",
-            rd = lateout(reg) value,
-            rs1 = in(reg) rs1,
-            options(pure, nomem, nostack),
-        )
-    }
-    value
+pub unsafe fn sha256sig1(rs1: u32) -> u32 {
+    _sha256sig1(rs1 as i32) as u32
 }
 
 /// Implements the Sum0 transformation function as used in the SHA2-256 hash function \[49\]
@@ -280,17 +225,8 @@ pub unsafe fn sha256sig1(rs1: usize) -> usize {
 #[target_feature(enable = "zknh")]
 #[cfg_attr(test, assert_instr(sha256sum0))]
 #[inline]
-pub unsafe fn sha256sum0(rs1: usize) -> usize {
-    let value: usize;
-    unsafe {
-        asm!(
-            "sha256sum0 {rd},{rs1}",
-            rd = lateout(reg) value,
-            rs1 = in(reg) rs1,
-            options(pure, nomem, nostack),
-        )
-    }
-    value
+pub unsafe fn sha256sum0(rs1: u32) -> u32 {
+    _sha256sum0(rs1 as i32) as u32
 }
 
 /// Implements the Sum1 transformation function as used in the SHA2-256 hash function \[49\]
@@ -315,17 +251,8 @@ pub unsafe fn sha256sum0(rs1: usize) -> usize {
 #[target_feature(enable = "zknh")]
 #[cfg_attr(test, assert_instr(sha256sum1))]
 #[inline]
-pub unsafe fn sha256sum1(rs1: usize) -> usize {
-    let value: usize;
-    unsafe {
-        asm!(
-            "sha256sum1 {rd},{rs1}",
-            rd = lateout(reg) value,
-            rs1 = in(reg) rs1,
-            options(pure, nomem, nostack),
-        )
-    }
-    value
+pub unsafe fn sha256sum1(rs1: u32) -> u32 {
+    _sha256sum1(rs1 as i32) as u32
 }
 
 /// Accelerates the block encrypt/decrypt operation of the SM4 block cipher \[5, 31\].
@@ -520,17 +447,8 @@ pub unsafe fn sm4ks<const BS: u8>(rs1: u32, rs2: u32) -> u32 {
 #[target_feature(enable = "zksh")]
 #[cfg_attr(test, assert_instr(sm3p0))]
 #[inline]
-pub unsafe fn sm3p0(rs1: usize) -> usize {
-    let value: usize;
-    unsafe {
-        asm!(
-            "sm3p0 {rd},{rs1}",
-            rd = lateout(reg) value,
-            rs1 = in(reg) rs1,
-            options(pure, nomem, nostack),
-        )
-    }
-    value
+pub unsafe fn sm3p0(rs1: u32) -> u32 {
+    _sm3p0(rs1 as i32) as u32
 }
 
 /// Implements the P1 transformation function as used in the SM3 hash function [4, 30].
@@ -573,15 +491,6 @@ pub unsafe fn sm3p0(rs1: usize) -> usize {
 #[target_feature(enable = "zksh")]
 #[cfg_attr(test, assert_instr(sm3p1))]
 #[inline]
-pub unsafe fn sm3p1(rs1: usize) -> usize {
-    let value: usize;
-    unsafe {
-        asm!(
-            "sm3p1 {rd},{rs1}",
-            rd = lateout(reg) value,
-            rs1 = in(reg) rs1,
-            options(pure, nomem, nostack),
-        )
-    }
-    value
+pub unsafe fn sm3p1(rs1: u32) -> u32 {
+    _sm3p1(rs1 as i32) as u32
 }

From f52a50c80e2693a447429895c77f3f033d4c1dde Mon Sep 17 00:00:00 2001
From: Gijs Burghoorn <g.burghoorn@gmail.com>
Date: Tue, 22 Aug 2023 13:50:30 +0200
Subject: [PATCH 07/12] Chore: Cargo format

---
 crates/core_arch/src/riscv_shared/zk.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crates/core_arch/src/riscv_shared/zk.rs b/crates/core_arch/src/riscv_shared/zk.rs
index 502b5c57fd..035f22d072 100644
--- a/crates/core_arch/src/riscv_shared/zk.rs
+++ b/crates/core_arch/src/riscv_shared/zk.rs
@@ -75,7 +75,7 @@ extern "unadjusted" {
 #[inline]
 pub unsafe fn packh(rs1: usize, rs2: usize) -> usize {
     // Note: There is no LLVM intrinsic for this instruction currently.
-     
+
     let value: usize;
     unsafe {
         asm!(

From 7cf295a85b6bd1fd22fc473055e1d683b8d1aa6d Mon Sep 17 00:00:00 2001
From: Gijs Burghoorn <g.burghoorn@gmail.com>
Date: Fri, 25 Aug 2023 19:04:00 +0200
Subject: [PATCH 08/12] Impr: Remove pack instructions as instrinsics

---
 crates/core_arch/src/riscv64/zk.rs      | 34 -------------------------
 crates/core_arch/src/riscv_shared/zk.rs | 33 ------------------------
 2 files changed, 67 deletions(-)

diff --git a/crates/core_arch/src/riscv64/zk.rs b/crates/core_arch/src/riscv64/zk.rs
index a6aae49f26..38725caf5d 100644
--- a/crates/core_arch/src/riscv64/zk.rs
+++ b/crates/core_arch/src/riscv64/zk.rs
@@ -189,40 +189,6 @@ pub unsafe fn aes64ks2(rs1: u64, rs2: u64) -> u64 {
     _aes64ks2(rs1 as i64, rs2 as i64) as u64
 }
 
-/// Pack the low 16-bits of rs1 and rs2 into rd on RV64
-///
-/// This instruction packs the low 16 bits of rs1 and rs2 into the 32 least-significant bits of
-/// rd, sign extending the 32-bit result to the rest of rd. This instruction only exists on
-/// RV64 based systems.
-///
-/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
-///
-/// Version: v1.0.1
-///
-/// Section: 3.26
-///
-/// # Safety
-///
-/// This function is safe to use if the `zbkb` target feature is present.
-#[target_feature(enable = "zbkb")]
-#[cfg_attr(test, assert_instr(packw))]
-#[inline]
-pub unsafe fn packw(rs1: u64, rs2: u64) -> u64 {
-    // Note: There is no LLVM intrinsic for this instruction currently.
-
-    let value: u64;
-    unsafe {
-        asm!(
-            "packw {rd},{rs1},{rs2}",
-            rd = lateout(reg) value,
-            rs1 = in(reg) rs1,
-            rs2 = in(reg) rs2,
-            options(pure, nomem, nostack),
-        )
-    }
-    value
-}
-
 /// Implements the Sigma0 transformation function as used in the SHA2-512 hash function \[49\]
 /// (Section 4.1.3).
 ///
diff --git a/crates/core_arch/src/riscv_shared/zk.rs b/crates/core_arch/src/riscv_shared/zk.rs
index 035f22d072..acf24991d8 100644
--- a/crates/core_arch/src/riscv_shared/zk.rs
+++ b/crates/core_arch/src/riscv_shared/zk.rs
@@ -56,39 +56,6 @@ extern "unadjusted" {
     fn _xperm4_64(rs1: i64, rs2: i64) -> i64;
 }
 
-/// Pack the low bytes of rs1 and rs2 into rd.
-///
-/// And the packh instruction packs the least-significant bytes of rs1 and rs2 into the 16
-/// least-significant bits of rd, zero extending the rest of rd.
-///
-/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
-///
-/// Version: v1.0.1
-///
-/// Section: 3.18
-///
-/// # Safety
-///
-/// This function is safe to use if the `zbkb` target feature is present.
-#[target_feature(enable = "zbkb")]
-#[cfg_attr(test, assert_instr(packh))]
-#[inline]
-pub unsafe fn packh(rs1: usize, rs2: usize) -> usize {
-    // Note: There is no LLVM intrinsic for this instruction currently.
-
-    let value: usize;
-    unsafe {
-        asm!(
-            "packh {rd},{rs1},{rs2}",
-            rd = lateout(reg) value,
-            rs1 = in(reg) rs1,
-            rs2 = in(reg) rs2,
-            options(pure, nomem, nostack),
-        )
-    }
-    value
-}
-
 /// Byte-wise lookup of indicies into a vector in registers.
 ///
 /// The xperm8 instruction operates on bytes. The rs1 register contains a vector of XLEN/8

From ef892cf4ed6b0e14df011585dd69057567d530e2 Mon Sep 17 00:00:00 2001
From: Gijs Burghoorn <g.burghoorn@gmail.com>
Date: Tue, 29 Aug 2023 19:07:58 +0200
Subject: [PATCH 09/12] Fix: Remove unused arch::asm imports

---
 crates/core_arch/src/riscv64/zk.rs      | 2 --
 crates/core_arch/src/riscv_shared/zk.rs | 2 --
 2 files changed, 4 deletions(-)

diff --git a/crates/core_arch/src/riscv64/zk.rs b/crates/core_arch/src/riscv64/zk.rs
index 38725caf5d..5df9cb80d1 100644
--- a/crates/core_arch/src/riscv64/zk.rs
+++ b/crates/core_arch/src/riscv64/zk.rs
@@ -1,5 +1,3 @@
-use core::arch::asm;
-
 #[cfg(test)]
 use stdarch_test::assert_instr;
 
diff --git a/crates/core_arch/src/riscv_shared/zk.rs b/crates/core_arch/src/riscv_shared/zk.rs
index acf24991d8..beda7cd763 100644
--- a/crates/core_arch/src/riscv_shared/zk.rs
+++ b/crates/core_arch/src/riscv_shared/zk.rs
@@ -1,5 +1,3 @@
-use core::arch::asm;
-
 #[cfg(test)]
 use stdarch_test::assert_instr;
 

From 4945aa528fb099b17558d21f0da9f3708fac6714 Mon Sep 17 00:00:00 2001
From: Gijs Burghoorn <g.burghoorn@gmail.com>
Date: Tue, 29 Aug 2023 20:10:22 +0200
Subject: [PATCH 10/12] Fix: Add constant for assert_instr

---
 crates/core_arch/src/riscv32/zk.rs      | 25 ++++++++-----------------
 crates/core_arch/src/riscv64/zk.rs      | 13 ++-----------
 crates/core_arch/src/riscv_shared/zk.rs | 17 ++++-------------
 3 files changed, 14 insertions(+), 41 deletions(-)

diff --git a/crates/core_arch/src/riscv32/zk.rs b/crates/core_arch/src/riscv32/zk.rs
index b32054c11a..4c090c5cd0 100644
--- a/crates/core_arch/src/riscv32/zk.rs
+++ b/crates/core_arch/src/riscv32/zk.rs
@@ -1,15 +1,6 @@
 #[cfg(test)]
 use stdarch_test::assert_instr;
 
-macro_rules! static_assert_imm2 {
-    ($imm:ident) => {
-        static_assert!(
-            $imm < 4,
-            "Immediate value allowed to be a constant from 0 up to including 3"
-        )
-    };
-}
-
 extern "unadjusted" {
     #[link_name = "llvm.riscv.aes32esi"]
     fn _aes32esi(rs1: i32, rs2: i32, bs: i32) -> i32;
@@ -71,10 +62,10 @@ extern "unadjusted" {
 /// This function is safe to use if the `zkne` target feature is present.
 #[target_feature(enable = "zkne")]
 #[rustc_legacy_const_generics(2)]
-#[cfg_attr(test, assert_instr(aes32esi))]
+#[cfg_attr(test, assert_instr(aes32esi, BS = 0))]
 #[inline]
 pub unsafe fn aes32esi<const BS: u8>(rs1: u32, rs2: u32) -> u32 {
-    static_assert_imm2!(BS);
+    static_assert!(BS < 4);
 
     _aes32esi(rs1 as i32, rs2 as i32, BS as i32) as u32
 }
@@ -102,10 +93,10 @@ pub unsafe fn aes32esi<const BS: u8>(rs1: u32, rs2: u32) -> u32 {
 /// This function is safe to use if the `zkne` target feature is present.
 #[target_feature(enable = "zkne")]
 #[rustc_legacy_const_generics(2)]
-#[cfg_attr(test, assert_instr(aes32esmi))]
+#[cfg_attr(test, assert_instr(aes32esmi, BS = 0))]
 #[inline]
 pub unsafe fn aes32esmi<const BS: u8>(rs1: u32, rs2: u32) -> u32 {
-    static_assert_imm2!(BS);
+    static_assert!(BS < 4);
 
     _aes32esmi(rs1 as i32, rs2 as i32, BS as i32) as u32
 }
@@ -132,10 +123,10 @@ pub unsafe fn aes32esmi<const BS: u8>(rs1: u32, rs2: u32) -> u32 {
 /// This function is safe to use if the `zknd` target feature is present.
 #[target_feature(enable = "zknd")]
 #[rustc_legacy_const_generics(2)]
-#[cfg_attr(test, assert_instr(aes32dsi))]
+#[cfg_attr(test, assert_instr(aes32dsi, BS = 0))]
 #[inline]
 pub unsafe fn aes32dsi<const BS: u8>(rs1: u32, rs2: u32) -> u32 {
-    static_assert_imm2!(BS);
+    static_assert!(BS < 4);
 
     _aes32dsi(rs1 as i32, rs2 as i32, BS as i32) as u32
 }
@@ -163,10 +154,10 @@ pub unsafe fn aes32dsi<const BS: u8>(rs1: u32, rs2: u32) -> u32 {
 /// This function is safe to use if the `zknd` target feature is present.
 #[target_feature(enable = "zknd")]
 #[rustc_legacy_const_generics(2)]
-#[cfg_attr(test, assert_instr(aes32dsmi))]
+#[cfg_attr(test, assert_instr(aes32dsmi, BS = 0))]
 #[inline]
 pub unsafe fn aes32dsmi<const BS: u8>(rs1: u32, rs2: u32) -> u32 {
-    static_assert_imm2!(BS);
+    static_assert!(BS < 4);
 
     _aes32dsmi(rs1 as i32, rs2 as i32, BS as i32) as u32
 }
diff --git a/crates/core_arch/src/riscv64/zk.rs b/crates/core_arch/src/riscv64/zk.rs
index 5df9cb80d1..bdceb9a268 100644
--- a/crates/core_arch/src/riscv64/zk.rs
+++ b/crates/core_arch/src/riscv64/zk.rs
@@ -1,15 +1,6 @@
 #[cfg(test)]
 use stdarch_test::assert_instr;
 
-macro_rules! static_assert_imm_0_until_10 {
-    ($imm:ident) => {
-        static_assert!(
-            $imm <= 10,
-            "Immediate value allowed to be a constant from 0 up to including 10"
-        )
-    };
-}
-
 extern "unadjusted" {
     #[link_name = "llvm.riscv.aes64es"]
     fn _aes64es(rs1: i64, rs2: i64) -> i64;
@@ -157,10 +148,10 @@ pub unsafe fn aes64dsm(rs1: u64, rs2: u64) -> u64 {
 /// This function is safe to use if the `zkne` or `zknd` target feature is present.
 #[target_feature(enable = "zkne", enable = "zknd")]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(test, assert_instr(aes64ks1i))]
+#[cfg_attr(test, assert_instr(aes64ks1i, RNUM = 0))]
 #[inline]
 pub unsafe fn aes64ks1i<const RNUM: u8>(rs1: u64) -> u64 {
-    static_assert_imm_0_until_10!(RNUM);
+    static_assert!(RNUM <= 10);
 
     _aes64ks1i(rs1 as i64, RNUM as i32) as u64
 }
diff --git a/crates/core_arch/src/riscv_shared/zk.rs b/crates/core_arch/src/riscv_shared/zk.rs
index beda7cd763..5fc5b4cdaf 100644
--- a/crates/core_arch/src/riscv_shared/zk.rs
+++ b/crates/core_arch/src/riscv_shared/zk.rs
@@ -1,15 +1,6 @@
 #[cfg(test)]
 use stdarch_test::assert_instr;
 
-macro_rules! static_assert_imm2 {
-    ($imm:ident) => {
-        static_assert!(
-            $imm < 4,
-            "Immediate value allowed to be a constant from 0 up to including 3"
-        )
-    };
-}
-
 extern "unadjusted" {
     #[link_name = "llvm.riscv.sm4ed"]
     fn _sm4ed(rs1: i32, rs2: i32, bs: i32) -> i32;
@@ -291,10 +282,10 @@ pub unsafe fn sha256sum1(rs1: u32) -> u32 {
 /// ```
 #[target_feature(enable = "zksed")]
 #[rustc_legacy_const_generics(2)]
-#[cfg_attr(test, assert_instr(sm4ed))]
+#[cfg_attr(test, assert_instr(sm4ed, BS = 0))]
 #[inline]
 pub unsafe fn sm4ed<const BS: u8>(rs1: u32, rs2: u32) -> u32 {
-    static_assert_imm2!(BS);
+    static_assert!(BS < 4);
 
     _sm4ed(rs1 as i32, rs2 as i32, BS as i32) as u32
 }
@@ -370,10 +361,10 @@ pub unsafe fn sm4ed<const BS: u8>(rs1: u32, rs2: u32) -> u32 {
 /// ```
 #[target_feature(enable = "zksed")]
 #[rustc_legacy_const_generics(2)]
-#[cfg_attr(test, assert_instr(sm4ks))]
+#[cfg_attr(test, assert_instr(sm4ks, BS = 0))]
 #[inline]
 pub unsafe fn sm4ks<const BS: u8>(rs1: u32, rs2: u32) -> u32 {
-    static_assert_imm2!(BS);
+    static_assert!(BS < 4);
 
     _sm4ks(rs1 as i32, rs2 as i32, BS as i32) as u32
 }

From e672eca36ed4285c25a0021ee3ab74f9f7abffef Mon Sep 17 00:00:00 2001
From: Gijs Burghoorn <me@gburghoorn.com>
Date: Thu, 31 Aug 2023 17:41:04 +0200
Subject: [PATCH 11/12] Fix: Remove assert_instr for RISCV, see #1464

---
 crates/core_arch/src/riscv32/zk.rs      | 30 ++++++++++++++++---------
 crates/core_arch/src/riscv64/zk.rs      | 30 ++++++++++++++++---------
 crates/core_arch/src/riscv_shared/zk.rs | 30 ++++++++++++++++---------
 3 files changed, 60 insertions(+), 30 deletions(-)

diff --git a/crates/core_arch/src/riscv32/zk.rs b/crates/core_arch/src/riscv32/zk.rs
index 4c090c5cd0..3767577724 100644
--- a/crates/core_arch/src/riscv32/zk.rs
+++ b/crates/core_arch/src/riscv32/zk.rs
@@ -62,7 +62,8 @@ extern "unadjusted" {
 /// This function is safe to use if the `zkne` target feature is present.
 #[target_feature(enable = "zkne")]
 #[rustc_legacy_const_generics(2)]
-#[cfg_attr(test, assert_instr(aes32esi, BS = 0))]
+// See #1464
+// #[cfg_attr(test, assert_instr(aes32esi, BS = 0))]
 #[inline]
 pub unsafe fn aes32esi<const BS: u8>(rs1: u32, rs2: u32) -> u32 {
     static_assert!(BS < 4);
@@ -93,7 +94,8 @@ pub unsafe fn aes32esi<const BS: u8>(rs1: u32, rs2: u32) -> u32 {
 /// This function is safe to use if the `zkne` target feature is present.
 #[target_feature(enable = "zkne")]
 #[rustc_legacy_const_generics(2)]
-#[cfg_attr(test, assert_instr(aes32esmi, BS = 0))]
+// See #1464
+// #[cfg_attr(test, assert_instr(aes32esmi, BS = 0))]
 #[inline]
 pub unsafe fn aes32esmi<const BS: u8>(rs1: u32, rs2: u32) -> u32 {
     static_assert!(BS < 4);
@@ -123,7 +125,8 @@ pub unsafe fn aes32esmi<const BS: u8>(rs1: u32, rs2: u32) -> u32 {
 /// This function is safe to use if the `zknd` target feature is present.
 #[target_feature(enable = "zknd")]
 #[rustc_legacy_const_generics(2)]
-#[cfg_attr(test, assert_instr(aes32dsi, BS = 0))]
+// See #1464
+// #[cfg_attr(test, assert_instr(aes32dsi, BS = 0))]
 #[inline]
 pub unsafe fn aes32dsi<const BS: u8>(rs1: u32, rs2: u32) -> u32 {
     static_assert!(BS < 4);
@@ -154,7 +157,8 @@ pub unsafe fn aes32dsi<const BS: u8>(rs1: u32, rs2: u32) -> u32 {
 /// This function is safe to use if the `zknd` target feature is present.
 #[target_feature(enable = "zknd")]
 #[rustc_legacy_const_generics(2)]
-#[cfg_attr(test, assert_instr(aes32dsmi, BS = 0))]
+// See #1464
+// #[cfg_attr(test, assert_instr(aes32dsmi, BS = 0))]
 #[inline]
 pub unsafe fn aes32dsmi<const BS: u8>(rs1: u32, rs2: u32) -> u32 {
     static_assert!(BS < 4);
@@ -180,7 +184,8 @@ pub unsafe fn aes32dsmi<const BS: u8>(rs1: u32, rs2: u32) -> u32 {
 ///
 /// This function is safe to use if the `zbkb` target feature is present.
 #[target_feature(enable = "zbkb")]
-#[cfg_attr(test, assert_instr(zip))]
+// See #1464
+// #[cfg_attr(test, assert_instr(zip))]
 #[inline]
 pub unsafe fn zip(rs: u32) -> u32 {
     _zip(rs as i32) as u32
@@ -227,7 +232,8 @@ pub unsafe fn unzip(rs: u32) -> u32 {
 ///
 /// This function is safe to use if the `zknh` target feature is present.
 #[target_feature(enable = "zknh")]
-#[cfg_attr(test, assert_instr(sha512sig0h))]
+// See #1464
+// #[cfg_attr(test, assert_instr(sha512sig0h))]
 #[inline]
 pub unsafe fn sha512sig0h(rs1: u32, rs2: u32) -> u32 {
     _sha512sig0h(rs1 as i32, rs2 as i32) as u32
@@ -252,7 +258,8 @@ pub unsafe fn sha512sig0h(rs1: u32, rs2: u32) -> u32 {
 ///
 /// This function is safe to use if the `zknh` target feature is present.
 #[target_feature(enable = "zknh")]
-#[cfg_attr(test, assert_instr(sha512sig0l))]
+// See #1464
+// #[cfg_attr(test, assert_instr(sha512sig0l))]
 #[inline]
 pub unsafe fn sha512sig0l(rs1: u32, rs2: u32) -> u32 {
     _sha512sig0l(rs1 as i32, rs2 as i32) as u32
@@ -277,7 +284,8 @@ pub unsafe fn sha512sig0l(rs1: u32, rs2: u32) -> u32 {
 ///
 /// This function is safe to use if the `zknh` target feature is present.
 #[target_feature(enable = "zknh")]
-#[cfg_attr(test, assert_instr(sha512sig1h))]
+// See #1464
+// #[cfg_attr(test, assert_instr(sha512sig1h))]
 #[inline]
 pub unsafe fn sha512sig1h(rs1: u32, rs2: u32) -> u32 {
     _sha512sig1h(rs1 as i32, rs2 as i32) as u32
@@ -326,7 +334,8 @@ pub unsafe fn sha512sig1l(rs1: u32, rs2: u32) -> u32 {
 ///
 /// This function is safe to use if the `zknh` target feature is present.
 #[target_feature(enable = "zknh")]
-#[cfg_attr(test, assert_instr(sha512sum0r))]
+// See #1464
+// #[cfg_attr(test, assert_instr(sha512sum0r))]
 #[inline]
 pub unsafe fn sha512sum0r(rs1: u32, rs2: u32) -> u32 {
     _sha512sum0r(rs1 as i32, rs2 as i32) as u32
@@ -350,7 +359,8 @@ pub unsafe fn sha512sum0r(rs1: u32, rs2: u32) -> u32 {
 ///
 /// This function is safe to use if the `zknh` target feature is present.
 #[target_feature(enable = "zknh")]
-#[cfg_attr(test, assert_instr(sha512sum1r))]
+// See #1464
+// #[cfg_attr(test, assert_instr(sha512sum1r))]
 #[inline]
 pub unsafe fn sha512sum1r(rs1: u32, rs2: u32) -> u32 {
     _sha512sum1r(rs1 as i32, rs2 as i32) as u32
diff --git a/crates/core_arch/src/riscv64/zk.rs b/crates/core_arch/src/riscv64/zk.rs
index bdceb9a268..3dbe3705db 100644
--- a/crates/core_arch/src/riscv64/zk.rs
+++ b/crates/core_arch/src/riscv64/zk.rs
@@ -50,7 +50,8 @@ extern "unadjusted" {
 ///
 /// This function is safe to use if the `zkne` target feature is present.
 #[target_feature(enable = "zkne")]
-#[cfg_attr(test, assert_instr(aes64es))]
+// See #1464
+// #[cfg_attr(test, assert_instr(aes64es))]
 #[inline]
 pub unsafe fn aes64es(rs1: u64, rs2: u64) -> u64 {
     _aes64es(rs1 as i64, rs2 as i64) as u64
@@ -73,7 +74,8 @@ pub unsafe fn aes64es(rs1: u64, rs2: u64) -> u64 {
 ///
 /// This function is safe to use if the `zkne` target feature is present.
 #[target_feature(enable = "zkne")]
-#[cfg_attr(test, assert_instr(aes64esm))]
+// See #1464
+// #[cfg_attr(test, assert_instr(aes64esm))]
 #[inline]
 pub unsafe fn aes64esm(rs1: u64, rs2: u64) -> u64 {
     _aes64esm(rs1 as i64, rs2 as i64) as u64
@@ -96,7 +98,8 @@ pub unsafe fn aes64esm(rs1: u64, rs2: u64) -> u64 {
 ///
 /// This function is safe to use if the `zknd` target feature is present.
 #[target_feature(enable = "zknd")]
-#[cfg_attr(test, assert_instr(aes64ds))]
+// See #1464
+// #[cfg_attr(test, assert_instr(aes64ds))]
 #[inline]
 pub unsafe fn aes64ds(rs1: u64, rs2: u64) -> u64 {
     _aes64ds(rs1 as i64, rs2 as i64) as u64
@@ -119,7 +122,8 @@ pub unsafe fn aes64ds(rs1: u64, rs2: u64) -> u64 {
 ///
 /// This function is safe to use if the `zknd` target feature is present.
 #[target_feature(enable = "zknd")]
-#[cfg_attr(test, assert_instr(aes64dsm))]
+// See #1464
+// #[cfg_attr(test, assert_instr(aes64dsm))]
 #[inline]
 pub unsafe fn aes64dsm(rs1: u64, rs2: u64) -> u64 {
     _aes64dsm(rs1 as i64, rs2 as i64) as u64
@@ -148,7 +152,8 @@ pub unsafe fn aes64dsm(rs1: u64, rs2: u64) -> u64 {
 /// This function is safe to use if the `zkne` or `zknd` target feature is present.
 #[target_feature(enable = "zkne", enable = "zknd")]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(test, assert_instr(aes64ks1i, RNUM = 0))]
+// See #1464
+// #[cfg_attr(test, assert_instr(aes64ks1i, RNUM = 0))]
 #[inline]
 pub unsafe fn aes64ks1i<const RNUM: u8>(rs1: u64) -> u64 {
     static_assert!(RNUM <= 10);
@@ -172,7 +177,8 @@ pub unsafe fn aes64ks1i<const RNUM: u8>(rs1: u64) -> u64 {
 ///
 /// This function is safe to use if the `zkne` or `zknd` target feature is present.
 #[target_feature(enable = "zkne", enable = "zknd")]
-#[cfg_attr(test, assert_instr(aes64ks2))]
+// See #1464
+// #[cfg_attr(test, assert_instr(aes64ks2))]
 #[inline]
 pub unsafe fn aes64ks2(rs1: u64, rs2: u64) -> u64 {
     _aes64ks2(rs1 as i64, rs2 as i64) as u64
@@ -195,7 +201,8 @@ pub unsafe fn aes64ks2(rs1: u64, rs2: u64) -> u64 {
 ///
 /// This function is safe to use if the `zknh` target feature is present.
 #[target_feature(enable = "zknh")]
-#[cfg_attr(test, assert_instr(sha512sig0))]
+// See #1464
+// #[cfg_attr(test, assert_instr(sha512sig0))]
 #[inline]
 pub unsafe fn sha512sig0(rs1: u64) -> u64 {
     _sha512sig0(rs1 as i64) as u64
@@ -218,7 +225,8 @@ pub unsafe fn sha512sig0(rs1: u64) -> u64 {
 ///
 /// This function is safe to use if the `zknh` target feature is present.
 #[target_feature(enable = "zknh")]
-#[cfg_attr(test, assert_instr(sha512sig1))]
+// See #1464
+// #[cfg_attr(test, assert_instr(sha512sig1))]
 #[inline]
 pub unsafe fn sha512sig1(rs1: u64) -> u64 {
     _sha512sig1(rs1 as i64) as u64
@@ -241,7 +249,8 @@ pub unsafe fn sha512sig1(rs1: u64) -> u64 {
 ///
 /// This function is safe to use if the `zknh` target feature is present.
 #[target_feature(enable = "zknh")]
-#[cfg_attr(test, assert_instr(sha512sum0))]
+// See #1464
+// #[cfg_attr(test, assert_instr(sha512sum0))]
 #[inline]
 pub unsafe fn sha512sum0(rs1: u64) -> u64 {
     _sha512sum0(rs1 as i64) as u64
@@ -264,7 +273,8 @@ pub unsafe fn sha512sum0(rs1: u64) -> u64 {
 ///
 /// This function is safe to use if the `zknh` target feature is present.
 #[target_feature(enable = "zknh")]
-#[cfg_attr(test, assert_instr(sha512sum1))]
+// See #1464
+// #[cfg_attr(test, assert_instr(sha512sum1))]
 #[inline]
 pub unsafe fn sha512sum1(rs1: u64) -> u64 {
     _sha512sum1(rs1 as i64) as u64
diff --git a/crates/core_arch/src/riscv_shared/zk.rs b/crates/core_arch/src/riscv_shared/zk.rs
index 5fc5b4cdaf..db97f72bc4 100644
--- a/crates/core_arch/src/riscv_shared/zk.rs
+++ b/crates/core_arch/src/riscv_shared/zk.rs
@@ -62,7 +62,8 @@ extern "unadjusted" {
 ///
 /// This function is safe to use if the `zbkx` target feature is present.
 #[target_feature(enable = "zbkx")]
-#[cfg_attr(test, assert_instr(xperm8))]
+// See #1464
+// #[cfg_attr(test, assert_instr(xperm8))]
 #[inline]
 pub unsafe fn xperm8(rs1: usize, rs2: usize) -> usize {
     #[cfg(target_arch = "riscv32")]
@@ -93,7 +94,8 @@ pub unsafe fn xperm8(rs1: usize, rs2: usize) -> usize {
 ///
 /// This function is safe to use if the `zbkx` target feature is present.
 #[target_feature(enable = "zbkx")]
-#[cfg_attr(test, assert_instr(xperm4))]
+// See #1464
+// #[cfg_attr(test, assert_instr(xperm4))]
 #[inline]
 pub unsafe fn xperm4(rs1: usize, rs2: usize) -> usize {
     #[cfg(target_arch = "riscv32")]
@@ -127,7 +129,8 @@ pub unsafe fn xperm4(rs1: usize, rs2: usize) -> usize {
 ///
 /// This function is safe to use if the `zknh` target feature is present.
 #[target_feature(enable = "zknh")]
-#[cfg_attr(test, assert_instr(sha256sig0))]
+// See #1464
+// #[cfg_attr(test, assert_instr(sha256sig0))]
 #[inline]
 pub unsafe fn sha256sig0(rs1: u32) -> u32 {
     _sha256sig0(rs1 as i32) as u32
@@ -153,7 +156,8 @@ pub unsafe fn sha256sig0(rs1: u32) -> u32 {
 ///
 /// This function is safe to use if the `zknh` target feature is present.
 #[target_feature(enable = "zknh")]
-#[cfg_attr(test, assert_instr(sha256sig1))]
+// See #1464
+// #[cfg_attr(test, assert_instr(sha256sig1))]
 #[inline]
 pub unsafe fn sha256sig1(rs1: u32) -> u32 {
     _sha256sig1(rs1 as i32) as u32
@@ -179,7 +183,8 @@ pub unsafe fn sha256sig1(rs1: u32) -> u32 {
 ///
 /// This function is safe to use if the `zknh` target feature is present.
 #[target_feature(enable = "zknh")]
-#[cfg_attr(test, assert_instr(sha256sum0))]
+// See #1464
+// #[cfg_attr(test, assert_instr(sha256sum0))]
 #[inline]
 pub unsafe fn sha256sum0(rs1: u32) -> u32 {
     _sha256sum0(rs1 as i32) as u32
@@ -205,7 +210,8 @@ pub unsafe fn sha256sum0(rs1: u32) -> u32 {
 ///
 /// This function is safe to use if the `zknh` target feature is present.
 #[target_feature(enable = "zknh")]
-#[cfg_attr(test, assert_instr(sha256sum1))]
+// See #1464
+// #[cfg_attr(test, assert_instr(sha256sum1))]
 #[inline]
 pub unsafe fn sha256sum1(rs1: u32) -> u32 {
     _sha256sum1(rs1 as i32) as u32
@@ -282,7 +288,8 @@ pub unsafe fn sha256sum1(rs1: u32) -> u32 {
 /// ```
 #[target_feature(enable = "zksed")]
 #[rustc_legacy_const_generics(2)]
-#[cfg_attr(test, assert_instr(sm4ed, BS = 0))]
+// See #1464
+// #[cfg_attr(test, assert_instr(sm4ed, BS = 0))]
 #[inline]
 pub unsafe fn sm4ed<const BS: u8>(rs1: u32, rs2: u32) -> u32 {
     static_assert!(BS < 4);
@@ -361,7 +368,8 @@ pub unsafe fn sm4ed<const BS: u8>(rs1: u32, rs2: u32) -> u32 {
 /// ```
 #[target_feature(enable = "zksed")]
 #[rustc_legacy_const_generics(2)]
-#[cfg_attr(test, assert_instr(sm4ks, BS = 0))]
+// See #1464
+// #[cfg_attr(test, assert_instr(sm4ks, BS = 0))]
 #[inline]
 pub unsafe fn sm4ks<const BS: u8>(rs1: u32, rs2: u32) -> u32 {
     static_assert!(BS < 4);
@@ -401,7 +409,8 @@ pub unsafe fn sm4ks<const BS: u8>(rs1: u32, rs2: u32) -> u32 {
 /// compression function `CF` uses the intermediate value `TT2` to calculate
 /// the variable `E` in one iteration for subsequent processes.
 #[target_feature(enable = "zksh")]
-#[cfg_attr(test, assert_instr(sm3p0))]
+// See #1464
+// #[cfg_attr(test, assert_instr(sm3p0))]
 #[inline]
 pub unsafe fn sm3p0(rs1: u32) -> u32 {
     _sm3p0(rs1 as i32) as u32
@@ -445,7 +454,8 @@ pub unsafe fn sm3p0(rs1: u32) -> u32 {
 /// ENDFOR
 /// ```
 #[target_feature(enable = "zksh")]
-#[cfg_attr(test, assert_instr(sm3p1))]
+// See #1464
+// #[cfg_attr(test, assert_instr(sm3p1))]
 #[inline]
 pub unsafe fn sm3p1(rs1: u32) -> u32 {
     _sm3p1(rs1 as i32) as u32

From 5d3bb9e80cea0b147883be746797ad4d2ee6111a Mon Sep 17 00:00:00 2001
From: Gijs Burghoorn <me@gburghoorn.com>
Date: Thu, 31 Aug 2023 17:49:02 +0200
Subject: [PATCH 12/12] Fix: Add proper flags for RISCV64 ci

---
 ci/docker/riscv64gc-unknown-linux-gnu/Dockerfile | 7 +++----
 ci/run.sh                                        | 1 +
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/ci/docker/riscv64gc-unknown-linux-gnu/Dockerfile b/ci/docker/riscv64gc-unknown-linux-gnu/Dockerfile
index b9b3c682ea..67f5f4cdef 100644
--- a/ci/docker/riscv64gc-unknown-linux-gnu/Dockerfile
+++ b/ci/docker/riscv64gc-unknown-linux-gnu/Dockerfile
@@ -1,10 +1,9 @@
-FROM ubuntu:22.04
+FROM ubuntu:23.04
 
 RUN apt-get update && apt-get install -y --no-install-recommends \
         gcc libc6-dev qemu-user ca-certificates \
-        gcc-riscv64-linux-gnu libc6-dev-riscv64-cross \
-        qemu-user
+        gcc-riscv64-linux-gnu libc6-dev-riscv64-cross
 
 ENV CARGO_TARGET_RISCV64GC_UNKNOWN_LINUX_GNU_LINKER=riscv64-linux-gnu-gcc \
-    CARGO_TARGET_RISCV64GC_UNKNOWN_LINUX_GNU_RUNNER="qemu-riscv64 -L /usr/riscv64-linux-gnu" \
+    CARGO_TARGET_RISCV64GC_UNKNOWN_LINUX_GNU_RUNNER="qemu-riscv64 -L /usr/riscv64-linux-gnu -cpu rv64,zk=true" \
     OBJDUMP=riscv64-linux-gnu-objdump
diff --git a/ci/run.sh b/ci/run.sh
index 0e33de89db..9923fa8e36 100755
--- a/ci/run.sh
+++ b/ci/run.sh
@@ -47,6 +47,7 @@ case ${TARGET} in
     # Some of our test dependencies use the deprecated `gcc` crates which
     # doesn't detect RISC-V compilers automatically, so do it manually here.
     riscv64*)
+        export RUSTFLAGS="${RUSTFLAGS} -Ctarget-feature=+zk"
         export TARGET_CC="riscv64-linux-gnu-gcc"
         ;;
 esac