Skip to content

Commit 2f49779

Browse files
committed
fix 128bit cttz intrinsic UB
1 parent 97aaccd commit 2f49779

File tree

1 file changed

+65
-70
lines changed

1 file changed

+65
-70
lines changed

src/intrinsic/mod.rs

Lines changed: 65 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -412,41 +412,13 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tc
412412
Some((width, signed)) => match name {
413413
sym::ctlz => self.count_leading_zeroes(width, args[0].immediate()),
414414

415-
sym::cttz => {
416-
let func = self.current_func();
417-
let then_block = func.new_block("then");
418-
let else_block = func.new_block("else");
419-
let after_block = func.new_block("after");
420-
421-
let arg = args[0].immediate();
422-
let result = func.new_local(None, self.u32_type, "zeros");
423-
let zero = self.cx.gcc_zero(arg.get_type());
424-
let cond = self.gcc_icmp(IntPredicate::IntEQ, arg, zero);
425-
self.llbb().end_with_conditional(None, cond, then_block, else_block);
426-
427-
let zero_result = self.cx.gcc_uint(self.u32_type, width);
428-
then_block.add_assignment(None, result, zero_result);
429-
then_block.end_with_jump(None, after_block);
430-
431-
// NOTE: since jumps were added in a place
432-
// count_leading_zeroes() does not expect, the current block
433-
// in the state need to be updated.
434-
self.switch_to_block(else_block);
435-
436-
let zeros = self.count_trailing_zeroes(width, arg);
437-
self.llbb().add_assignment(None, result, zeros);
438-
self.llbb().end_with_jump(None, after_block);
439-
440-
// NOTE: since jumps were added in a place rustc does not
441-
// expect, the current block in the state need to be updated.
442-
self.switch_to_block(after_block);
443-
444-
result.to_rvalue()
445-
}
446415
sym::ctlz_nonzero => {
447416
self.count_leading_zeroes_nonzero(width, args[0].immediate())
448417
}
449-
sym::cttz_nonzero => self.count_trailing_zeroes(width, args[0].immediate()),
418+
sym::cttz => self.count_trailing_zeroes(width, args[0].immediate()),
419+
sym::cttz_nonzero => {
420+
self.count_trailing_zeroes_nonzero(width, args[0].immediate())
421+
}
450422
sym::ctpop => self.pop_count(args[0].immediate()),
451423
sym::bswap => {
452424
if width == 8 {
@@ -984,7 +956,37 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
984956
self.context.new_cast(self.location, res, result_type)
985957
}
986958

987-
fn count_trailing_zeroes(&mut self, _width: u64, arg: RValue<'gcc>) -> RValue<'gcc> {
959+
fn count_trailing_zeroes(&mut self, width: u64, arg: RValue<'gcc>) -> RValue<'gcc> {
960+
let func = self.current_func();
961+
let then_block = func.new_block("then");
962+
let else_block = func.new_block("else");
963+
let after_block = func.new_block("after");
964+
965+
let result = func.new_local(None, self.u32_type, "zeros");
966+
let zero = self.cx.gcc_zero(arg.get_type());
967+
let cond = self.gcc_icmp(IntPredicate::IntEQ, arg, zero);
968+
self.llbb().end_with_conditional(None, cond, then_block, else_block);
969+
970+
let zero_result = self.cx.gcc_uint(self.u32_type, width);
971+
then_block.add_assignment(None, result, zero_result);
972+
then_block.end_with_jump(None, after_block);
973+
974+
// NOTE: since jumps were added in a place count_trailing_zeroes_nonzero() does not expect,
975+
// the current block in the state need to be updated.
976+
self.switch_to_block(else_block);
977+
978+
let zeros = self.count_trailing_zeroes_nonzero(width, arg);
979+
self.llbb().add_assignment(None, result, zeros);
980+
self.llbb().end_with_jump(None, after_block);
981+
982+
// NOTE: since jumps were added in a place rustc does not
983+
// expect, the current block in the state need to be updated.
984+
self.switch_to_block(after_block);
985+
986+
result.to_rvalue()
987+
}
988+
989+
fn count_trailing_zeroes_nonzero(&mut self, _width: u64, arg: RValue<'gcc>) -> RValue<'gcc> {
988990
let arg_type = arg.get_type();
989991
let result_type = self.u32_type;
990992
let arg = if arg_type.is_signed(self.cx) {
@@ -1008,50 +1010,43 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
10081010
("__builtin_ctzll", self.cx.ulonglong_type)
10091011
}
10101012
else if arg_type.is_u128(self.cx) {
1011-
// Adapted from the algorithm to count leading zeroes from: https://stackoverflow.com/a/28433850/389119
1012-
let array_type = self.context.new_array_type(None, arg_type, 3);
1013-
let result = self.current_func()
1014-
.new_local(None, array_type, "count_loading_zeroes_results");
1015-
1016-
let sixty_four = self.gcc_int(arg_type, 64);
1017-
let shift = self.gcc_lshr(arg, sixty_four);
1018-
let high = self.gcc_int_cast(shift, self.u64_type);
1019-
let low = self.gcc_int_cast(arg, self.u64_type);
1020-
1021-
let zero = self.context.new_rvalue_zero(self.usize_type);
1022-
let one = self.context.new_rvalue_one(self.usize_type);
1023-
let two = self.context.new_rvalue_from_long(self.usize_type, 2);
1013+
// __buildin_ctzll is UB when called with 0, so call it on the 64 low bits if they are not 0,
1014+
// else call it on the 64 high bits and add 64. In the else case, 64 high bits can't be 0
1015+
// because arg is not 0.
10241016

1025-
let ctzll = self.context.get_builtin_function("__builtin_ctzll");
1017+
let result = self.current_func()
1018+
.new_local(None, result_type, "count_trailing_zeroes_results");
10261019

1027-
let first_elem = self.context.new_array_access(self.location, result, zero);
1028-
let first_value = self.gcc_int_cast(self.context.new_call(self.location, ctzll, &[low]), arg_type);
1029-
self.llbb()
1030-
.add_assignment(self.location, first_elem, first_value);
1020+
let ctlz_then_block = self.current_func().new_block("ctlz_then");
1021+
let ctlz_else_block = self.current_func().new_block("ctlz_else");
1022+
let ctlz_after_block = self.current_func().new_block("ctlz_after");
1023+
let clzll = self.context.get_builtin_function("__builtin_ctzll");
10311024

1032-
let second_elem = self.context.new_array_access(self.location, result, one);
1033-
let second_value = self.gcc_add(self.gcc_int_cast(self.context.new_call(self.location, ctzll, &[high]), arg_type), sixty_four);
1034-
self.llbb()
1035-
.add_assignment(self.location, second_elem, second_value);
1025+
let low = self.gcc_int_cast(arg, self.u64_type);
1026+
let zero_low = self.const_uint(low.get_type(), 0);
1027+
let cond = self.gcc_icmp(IntPredicate::IntNE, low, zero_low);
1028+
self.llbb().end_with_conditional(self.location, cond, ctlz_then_block, ctlz_else_block);
1029+
self.switch_to_block(ctlz_then_block);
10361030

1037-
let third_elem = self.context.new_array_access(self.location, result, two);
1038-
let third_value = self.gcc_int(arg_type, 128);
1039-
self.llbb()
1040-
.add_assignment(self.location, third_elem, third_value);
1031+
let result_128 =
1032+
self.gcc_int_cast(self.context.new_call(None, clzll, &[low]), result_type);
10411033

1042-
let not_low = self.context.new_unary_op(self.location, UnaryOp::LogicalNegate, self.u64_type, low);
1043-
let not_high = self.context.new_unary_op(self.location, UnaryOp::LogicalNegate, self.u64_type, high);
1044-
let not_low_and_not_high = not_low & not_high;
1045-
let index = not_low + not_low_and_not_high;
1046-
// NOTE: the following cast is necessary to avoid a GIMPLE verification failure in
1047-
// gcc.
1048-
// TODO(antoyo): do the correct verification in libgccjit to avoid an error at the
1049-
// compilation stage.
1050-
let index = self.context.new_cast(self.location, index, self.i32_type);
1034+
ctlz_then_block.add_assignment(self.location, result, result_128);
1035+
ctlz_then_block.end_with_jump(self.location, ctlz_after_block);
10511036

1052-
let res = self.context.new_array_access(self.location, result, index);
1037+
self.switch_to_block(ctlz_else_block);
1038+
let sixty_four = self.const_uint(arg_type, 64);
1039+
let shift = self.lshr(arg, sixty_four);
1040+
let high = self.gcc_int_cast(shift, self.u64_type);
1041+
let high_leading_zeroes =
1042+
self.gcc_int_cast(self.context.new_call(None, clzll, &[high]), result_type);
10531043

1054-
return self.gcc_int_cast(res.to_rvalue(), result_type);
1044+
let sixty_four_result_type = self.const_uint(result_type, 64);
1045+
let result_128 = self.add(high_leading_zeroes, sixty_four_result_type);
1046+
ctlz_else_block.add_assignment(self.location, result, result_128);
1047+
ctlz_else_block.end_with_jump(self.location, ctlz_after_block);
1048+
self.switch_to_block(ctlz_after_block);
1049+
return result.to_rvalue();
10551050
}
10561051
else {
10571052
let count_trailing_zeroes = self.context.get_builtin_function("__builtin_ctzll");

0 commit comments

Comments
 (0)