Skip to content

Implement @depositBits and @extractBits #23474

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 16 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 39 additions & 0 deletions doc/langref.html.in
Original file line number Diff line number Diff line change
Expand Up @@ -4742,6 +4742,25 @@ fn cmpxchgWeakButNotAtomic(comptime T: type, ptr: *T, expected_value: T, new_val
{#see_also|@cVaArg|@cVaCopy|@cVaEnd#}
{#header_close#}

{#header_open|@depositBits#}
<pre>{#syntax#}@depositBits(source: T, mask: T) T{#endsyntax#}</pre>
<p>
{#syntax#}T{#endsyntax#} must be an unsigned integer type, or {#syntax#}comptime_int{#endsyntax#} (for which both parameters must be non-negative). {#syntax#}T{#endsyntax#} is determined by peer-type resolution.
</p>
<p>
Transfers contiguous bits from the bottom of the {#syntax#}source{#endsyntax#} operand to positions in the destination corresponding to bits that are set in the {#syntax#}mask{#endsyntax#}. The remaining bits in the destination are zeroed.
</p>
<p>
Where available, this builtin compiles down to a {#syntax#}pdep{#endsyntax#} instruction on x86 targets with BMI2 enabled. For x86-64, this will happen for types up to {#syntax#}u64{#endsyntax#}, and will happen for types up to {#syntax#}u32{#endsyntax#} for x86.
</p>
<p>
Example:
</p>

{#code|test_depositbits_builtin.zig#}
{#see_also|@extractBits#}
{#header_close#}

{#header_open|@divExact#}
<pre>{#syntax#}@divExact(numerator: T, denominator: T) T{#endsyntax#}</pre>
<p>
Expand Down Expand Up @@ -4896,6 +4915,26 @@ fn cmpxchgWeakButNotAtomic(comptime T: type, ptr: *T, expected_value: T, new_val
{#see_also|@export#}
{#header_close#}

{#header_open|@extractBits#}
<pre>{#syntax#}@extractBits(source: T, mask: T) T{#endsyntax#}</pre>
<p>
{#syntax#}T{#endsyntax#} must be an unsigned integer type, or {#syntax#}comptime_int{#endsyntax#} (for which both parameters must be non-negative). {#syntax#}T{#endsyntax#} is determined by peer-type resolution.
</p>
<p>
Transfers bits in the {#syntax#}source{#endsyntax#} corresponding to bits set in the {#syntax#}mask{#endsyntax#} operand to the destination, writing them as contiguous lower bits. The remaining bits in the destination are zeroed.
</p>
<p>
Where available, this builtin compiles down to a {#syntax#}pext{#endsyntax#} instruction on x86 targets with BMI2 enabled. For x86-64, this will happen for types up to {#syntax#}u64{#endsyntax#}, and will happen for types up to {#syntax#}u32{#endsyntax#} for x86.
</p>
<p>
Example:
</p>

{#code|test_extractbits_builtin.zig#}

{#see_also|@depositBits#}
{#header_close#}

{#header_open|@field#}
<pre>{#syntax#}@field(lhs: anytype, comptime field_name: []const u8) (field){#endsyntax#}</pre>
<p>Performs field access by a compile-time string. Works on both fields and declarations.
Expand Down
7 changes: 7 additions & 0 deletions doc/langref/test_depositbits_builtin.zig
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
const std = @import("std");

test "deposit bits" {
try std.testing.expectEqual(@depositBits(0x00001234, 0xf0f0f0f0), 0x10203040);
}

// test
7 changes: 7 additions & 0 deletions doc/langref/test_extractbits_builtin.zig
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
const std = @import("std");

test "extract bits" {
try std.testing.expectEqual(@extractBits(0x12345678, 0xf0f0f0f0), 0x00001357);
}

// test
1 change: 1 addition & 0 deletions lib/compiler_rt.zig
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ comptime {
_ = @import("compiler_rt/bitreverse.zig");
_ = @import("compiler_rt/bswap.zig");
_ = @import("compiler_rt/cmp.zig");
_ = @import("compiler_rt/pdeppext.zig");

_ = @import("compiler_rt/shift.zig");
_ = @import("compiler_rt/negXi2.zig");
Expand Down
312 changes: 312 additions & 0 deletions lib/compiler_rt/pdeppext.zig
Original file line number Diff line number Diff line change
@@ -0,0 +1,312 @@
const std = @import("std");
const builtin = @import("builtin");
const common = @import("common.zig");

const Limb = u32;
const Log2Limb = u5;

comptime {
@export(&__pdep_bigint, .{ .name = "__pdep_bigint", .linkage = common.linkage, .visibility = common.visibility });
@export(&__pdep_u32, .{ .name = "__pdep_u32", .linkage = common.linkage, .visibility = common.visibility });
@export(&__pdep_u64, .{ .name = "__pdep_u64", .linkage = common.linkage, .visibility = common.visibility });
@export(&__pdep_u128, .{ .name = "__pdep_u128", .linkage = common.linkage, .visibility = common.visibility });

@export(&__pext_bigint, .{ .name = "__pext_bigint", .linkage = common.linkage, .visibility = common.visibility });
@export(&__pext_u32, .{ .name = "__pext_u32", .linkage = common.linkage, .visibility = common.visibility });
@export(&__pext_u64, .{ .name = "__pext_u64", .linkage = common.linkage, .visibility = common.visibility });
@export(&__pext_u128, .{ .name = "__pext_u128", .linkage = common.linkage, .visibility = common.visibility });
}

const endian = builtin.cpu.arch.endian();

inline fn limb(x: []const Limb, i: usize) Limb {
return if (endian == .little) x[i] else x[x.len - 1 - i];
}

inline fn limb_ptr(x: []Limb, i: usize) *Limb {
return if (endian == .little) &x[i] else &x[x.len - 1 - i];
}

inline fn limb_set(x: []Limb, i: usize, v: Limb) void {
if (endian == .little) {
x[i] = v;
} else {
x[x.len - 1 - i] = v;
}
}

// Assumes that `result` is zeroed.
inline fn pdep_bigint(result: []Limb, source: []const Limb, mask: []const Limb) void {
var mask_limb: Limb = limb(mask, 0);
var mask_limb_index: usize = 0;
var i: usize = 0;

outer: while (true) : (i += 1) {
// Find the lowest set bit in mask
const mask_limb_bit: Log2Limb = limb_bit: while (true) {
const mask_limb_tz = @ctz(mask_limb);
if (mask_limb_tz != @bitSizeOf(Limb)) {
const cast_limb_bit: Log2Limb = @intCast(mask_limb_tz);
mask_limb ^= @as(Limb, 1) << cast_limb_bit;
break :limb_bit cast_limb_bit;
}

mask_limb_index += 1;
if (mask_limb_index >= mask.len) break :outer;

mask_limb = limb(mask, mask_limb_index);
};

const i_limb_index = i / 32;
const i_limb_bit: Log2Limb = @truncate(i);

if (i_limb_index >= source.len) break;

const source_bit_set = limb(source, i_limb_index) & (@as(Limb, 1) << i_limb_bit) != 0;

limb_ptr(result, mask_limb_index).* |= @as(Limb, @intFromBool(source_bit_set)) << mask_limb_bit;
}
}

pub fn __pdep_bigint(r: [*]Limb, s: [*]const Limb, m: [*]const Limb, bits: usize) callconv(.c) void {
const result_full = r[0 .. std.math.divCeil(usize, @intCast(intAbiSize(@intCast(bits), builtin.target)), 4) catch unreachable];
@memset(result_full, 0);

const result = r[0 .. std.math.divCeil(usize, bits, 32) catch unreachable];
const source = s[0 .. std.math.divCeil(usize, bits, 32) catch unreachable];
const mask = m[0 .. std.math.divCeil(usize, bits, 32) catch unreachable];

pdep_bigint(result, source, mask);
}

// Assumes that `result` is zeroed.
inline fn pext_bigint(result: []Limb, source: []const Limb, mask: []const Limb) void {
var mask_limb: Limb = limb(mask, 0);
var mask_limb_index: usize = 0;
var i: usize = 0;

outer: while (true) : (i += 1) {
const mask_limb_bit: Log2Limb = limb_bit: while (true) {
const mask_limb_tz = @ctz(mask_limb);
if (mask_limb_tz != @bitSizeOf(Limb)) {
const cast_limb_bit: Log2Limb = @intCast(mask_limb_tz);
mask_limb ^= @as(Limb, 1) << cast_limb_bit;
break :limb_bit cast_limb_bit;
}

mask_limb_index += 1;
if (mask_limb_index >= mask.len) break :outer;

mask_limb = limb(mask, mask_limb_index);
};

const i_limb_index = i / 32;
const i_limb_bit: Log2Limb = @truncate(i);

if (i_limb_index >= source.len) break;

const source_bit_set = limb(source, mask_limb_index) & (@as(Limb, 1) << mask_limb_bit) != 0;

limb_ptr(result, i_limb_index).* |= @as(Limb, @intFromBool(source_bit_set)) << i_limb_bit;
}
}

pub fn __pext_bigint(r: [*]Limb, s: [*]const Limb, m: [*]const Limb, bits: usize) callconv(.c) void {
const result_full = r[0 .. std.math.divCeil(usize, @intCast(intAbiSize(@intCast(bits), builtin.target)), 4) catch unreachable];
@memset(result_full, 0);

const result = r[0 .. std.math.divCeil(usize, bits, 32) catch unreachable];
const source = s[0 .. std.math.divCeil(usize, bits, 32) catch unreachable];
const mask = m[0 .. std.math.divCeil(usize, bits, 32) catch unreachable];

pext_bigint(result, source, mask);
}

inline fn pdep_uX(comptime T: type, source: T, mask_: T) T {
var bb: T = 1;
var result: T = 0;
var mask = mask_;

while (mask != 0) {
const bit = mask & ~(mask - 1);
mask &= ~bit;
const source_bit = source & bb;
if (source_bit != 0) result |= bit;
bb += bb;
}

return result;
}

pub fn __pdep_u32(source: u32, mask: u32) callconv(.c) u32 {
return pdep_uX(u32, source, mask);
}

pub fn __pdep_u64(source: u64, mask: u64) callconv(.c) u64 {
return pdep_uX(u64, source, mask);
}

pub fn __pdep_u128(source: u128, mask: u128) callconv(.c) u128 {
return pdep_uX(u128, source, mask);
}

inline fn pext_uX(comptime T: type, source: T, mask_: T) T {
var bb: T = 1;
var result: T = 0;
var mask = mask_;

while (mask != 0) {
const bit = mask & ~(mask - 1);
mask &= ~bit;
const source_bit = source & bit;
if (source_bit != 0) result |= bb;
bb += bb;
}

return result;
}

pub fn __pext_u32(source: u32, mask: u32) callconv(.c) u32 {
return pext_uX(u32, source, mask);
}

pub fn __pext_u64(source: u64, mask: u64) callconv(.c) u64 {
return pext_uX(u64, source, mask);
}

pub fn __pext_u128(source: u128, mask: u128) callconv(.c) u128 {
return pext_uX(u128, source, mask);
}

// BEGIN HACKY CODE COPY WAIT FOR ALEXRP PR
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.


const Target = std.Target;
const assert = std.debug.assert;

pub const Alignment = enum(u6) {
@"1" = 0,
@"2" = 1,
@"4" = 2,
@"8" = 3,
@"16" = 4,
@"32" = 5,
@"64" = 6,
none = std.math.maxInt(u6),
_,

pub fn fromByteUnits(n: u64) Alignment {
if (n == 0) return .none;
assert(std.math.isPowerOfTwo(n));
return @enumFromInt(@ctz(n));
}

/// Align an address forwards to this alignment.
pub fn forward(a: Alignment, addr: u64) u64 {
assert(a != .none);
const x = (@as(u64, 1) << @intFromEnum(a)) - 1;
return (addr + x) & ~x;
}
};

pub fn intAbiSize(bits: u16, target: Target) u64 {
return intAbiAlignment(bits, target).forward(@as(u16, @intCast((@as(u17, bits) + 7) / 8)));
}

pub fn intAbiAlignment(bits: u16, target: Target) Alignment {
return switch (target.cpu.arch) {
.x86 => switch (bits) {
0 => .none,
1...8 => .@"1",
9...16 => .@"2",
17...32 => .@"4",
33...64 => switch (target.os.tag) {
.uefi, .windows => .@"8",
else => .@"4",
},
else => .@"16",
},
.x86_64 => switch (bits) {
0 => .none,
1...8 => .@"1",
9...16 => .@"2",
17...32 => .@"4",
33...64 => .@"8",
else => .@"16",
},
else => return Alignment.fromByteUnits(@min(
std.math.ceilPowerOfTwoPromote(u16, @as(u16, @intCast((@as(u17, bits) + 7) / 8))),
maxIntAlignment(target),
)),
};
}

pub fn maxIntAlignment(target: std.Target) u16 {
return switch (target.cpu.arch) {
.avr => 1,
.msp430 => 2,
.xcore => 4,
.propeller => 4,

.arm,
.armeb,
.thumb,
.thumbeb,
.hexagon,
.mips,
.mipsel,
.powerpc,
.powerpcle,
.amdgcn,
.riscv32,
.sparc,
.s390x,
.lanai,
.wasm32,
.wasm64,
=> 8,

// For these, LLVMABIAlignmentOfType(i128) reports 8. Note that 16
// is a relevant number in three cases:
// 1. Different machine code instruction when loading into SIMD register.
// 2. The C ABI wants 16 for extern structs.
// 3. 16-byte cmpxchg needs 16-byte alignment.
// Same logic for powerpc64, mips64, sparc64.
.powerpc64,
.powerpc64le,
.mips64,
.mips64el,
.sparc64,
=> switch (target.ofmt) {
.c => 16,
else => 8,
},

.x86_64 => 16,

// Even LLVMABIAlignmentOfType(i128) agrees on these targets.
.x86,
.aarch64,
.aarch64_be,
.riscv64,
.bpfel,
.bpfeb,
.nvptx,
.nvptx64,
=> 16,

// Below this comment are unverified but based on the fact that C requires
// int128_t to be 16 bytes aligned, it's a safe default.
.csky,
.arc,
.m68k,
.kalimba,
.spirv,
.spirv32,
.ve,
.spirv64,
.loongarch32,
.loongarch64,
.xtensa,
=> 16,
};
}
Loading