@@ -412,41 +412,13 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tc
412
412
Some ( ( width, signed) ) => match name {
413
413
sym:: ctlz => self . count_leading_zeroes ( width, args[ 0 ] . immediate ( ) ) ,
414
414
415
- sym:: cttz => {
416
- let func = self . current_func ( ) ;
417
- let then_block = func. new_block ( "then" ) ;
418
- let else_block = func. new_block ( "else" ) ;
419
- let after_block = func. new_block ( "after" ) ;
420
-
421
- let arg = args[ 0 ] . immediate ( ) ;
422
- let result = func. new_local ( None , self . u32_type , "zeros" ) ;
423
- let zero = self . cx . gcc_zero ( arg. get_type ( ) ) ;
424
- let cond = self . gcc_icmp ( IntPredicate :: IntEQ , arg, zero) ;
425
- self . llbb ( ) . end_with_conditional ( None , cond, then_block, else_block) ;
426
-
427
- let zero_result = self . cx . gcc_uint ( self . u32_type , width) ;
428
- then_block. add_assignment ( None , result, zero_result) ;
429
- then_block. end_with_jump ( None , after_block) ;
430
-
431
- // NOTE: since jumps were added in a place
432
- // count_leading_zeroes() does not expect, the current block
433
- // in the state need to be updated.
434
- self . switch_to_block ( else_block) ;
435
-
436
- let zeros = self . count_trailing_zeroes ( width, arg) ;
437
- self . llbb ( ) . add_assignment ( None , result, zeros) ;
438
- self . llbb ( ) . end_with_jump ( None , after_block) ;
439
-
440
- // NOTE: since jumps were added in a place rustc does not
441
- // expect, the current block in the state need to be updated.
442
- self . switch_to_block ( after_block) ;
443
-
444
- result. to_rvalue ( )
445
- }
446
415
sym:: ctlz_nonzero => {
447
416
self . count_leading_zeroes_nonzero ( width, args[ 0 ] . immediate ( ) )
448
417
}
449
- sym:: cttz_nonzero => self . count_trailing_zeroes ( width, args[ 0 ] . immediate ( ) ) ,
418
+ sym:: cttz => self . count_trailing_zeroes ( width, args[ 0 ] . immediate ( ) ) ,
419
+ sym:: cttz_nonzero => {
420
+ self . count_trailing_zeroes_nonzero ( width, args[ 0 ] . immediate ( ) )
421
+ }
450
422
sym:: ctpop => self . pop_count ( args[ 0 ] . immediate ( ) ) ,
451
423
sym:: bswap => {
452
424
if width == 8 {
@@ -984,7 +956,37 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
984
956
self . context . new_cast ( self . location , res, result_type)
985
957
}
986
958
987
- fn count_trailing_zeroes ( & mut self , _width : u64 , arg : RValue < ' gcc > ) -> RValue < ' gcc > {
959
+ fn count_trailing_zeroes ( & mut self , width : u64 , arg : RValue < ' gcc > ) -> RValue < ' gcc > {
960
+ let func = self . current_func ( ) ;
961
+ let then_block = func. new_block ( "then" ) ;
962
+ let else_block = func. new_block ( "else" ) ;
963
+ let after_block = func. new_block ( "after" ) ;
964
+
965
+ let result = func. new_local ( None , self . u32_type , "zeros" ) ;
966
+ let zero = self . cx . gcc_zero ( arg. get_type ( ) ) ;
967
+ let cond = self . gcc_icmp ( IntPredicate :: IntEQ , arg, zero) ;
968
+ self . llbb ( ) . end_with_conditional ( None , cond, then_block, else_block) ;
969
+
970
+ let zero_result = self . cx . gcc_uint ( self . u32_type , width) ;
971
+ then_block. add_assignment ( None , result, zero_result) ;
972
+ then_block. end_with_jump ( None , after_block) ;
973
+
974
+ // NOTE: since jumps were added in a place count_trailing_zeroes_nonzero() does not expect,
975
+ // the current block in the state need to be updated.
976
+ self . switch_to_block ( else_block) ;
977
+
978
+ let zeros = self . count_trailing_zeroes_nonzero ( width, arg) ;
979
+ self . llbb ( ) . add_assignment ( None , result, zeros) ;
980
+ self . llbb ( ) . end_with_jump ( None , after_block) ;
981
+
982
+ // NOTE: since jumps were added in a place rustc does not
983
+ // expect, the current block in the state need to be updated.
984
+ self . switch_to_block ( after_block) ;
985
+
986
+ result. to_rvalue ( )
987
+ }
988
+
989
+ fn count_trailing_zeroes_nonzero ( & mut self , _width : u64 , arg : RValue < ' gcc > ) -> RValue < ' gcc > {
988
990
let arg_type = arg. get_type ( ) ;
989
991
let result_type = self . u32_type ;
990
992
let arg = if arg_type. is_signed ( self . cx ) {
@@ -1008,50 +1010,43 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
1008
1010
( "__builtin_ctzll" , self . cx . ulonglong_type )
1009
1011
}
1010
1012
else if arg_type. is_u128 ( self . cx ) {
1011
- // Adapted from the algorithm to count leading zeroes from: https://stackoverflow.com/a/28433850/389119
1012
- let array_type = self . context . new_array_type ( None , arg_type, 3 ) ;
1013
- let result = self . current_func ( )
1014
- . new_local ( None , array_type, "count_loading_zeroes_results" ) ;
1015
-
1016
- let sixty_four = self . gcc_int ( arg_type, 64 ) ;
1017
- let shift = self . gcc_lshr ( arg, sixty_four) ;
1018
- let high = self . gcc_int_cast ( shift, self . u64_type ) ;
1019
- let low = self . gcc_int_cast ( arg, self . u64_type ) ;
1020
-
1021
- let zero = self . context . new_rvalue_zero ( self . usize_type ) ;
1022
- let one = self . context . new_rvalue_one ( self . usize_type ) ;
1023
- let two = self . context . new_rvalue_from_long ( self . usize_type , 2 ) ;
1013
+ // __buildin_ctzll is UB when called with 0, so call it on the 64 low bits if they are not 0,
1014
+ // else call it on the 64 high bits and add 64. In the else case, 64 high bits can't be 0
1015
+ // because arg is not 0.
1024
1016
1025
- let ctzll = self . context . get_builtin_function ( "__builtin_ctzll" ) ;
1017
+ let result = self . current_func ( )
1018
+ . new_local ( None , result_type, "count_trailing_zeroes_results" ) ;
1026
1019
1027
- let first_elem = self . context . new_array_access ( self . location , result , zero ) ;
1028
- let first_value = self . gcc_int_cast ( self . context . new_call ( self . location , ctzll , & [ low ] ) , arg_type ) ;
1029
- self . llbb ( )
1030
- . add_assignment ( self . location , first_elem , first_value ) ;
1020
+ let ctlz_then_block = self . current_func ( ) . new_block ( "ctlz_then" ) ;
1021
+ let ctlz_else_block = self . current_func ( ) . new_block ( "ctlz_else" ) ;
1022
+ let ctlz_after_block = self . current_func ( ) . new_block ( "ctlz_after" ) ;
1023
+ let clzll = self . context . get_builtin_function ( "__builtin_ctzll" ) ;
1031
1024
1032
- let second_elem = self . context . new_array_access ( self . location , result, one) ;
1033
- let second_value = self . gcc_add ( self . gcc_int_cast ( self . context . new_call ( self . location , ctzll, & [ high] ) , arg_type) , sixty_four) ;
1034
- self . llbb ( )
1035
- . add_assignment ( self . location , second_elem, second_value) ;
1025
+ let low = self . gcc_int_cast ( arg, self . u64_type ) ;
1026
+ let zero_low = self . const_uint ( low. get_type ( ) , 0 ) ;
1027
+ let cond = self . gcc_icmp ( IntPredicate :: IntNE , low, zero_low) ;
1028
+ self . llbb ( ) . end_with_conditional ( self . location , cond, ctlz_then_block, ctlz_else_block) ;
1029
+ self . switch_to_block ( ctlz_then_block) ;
1036
1030
1037
- let third_elem = self . context . new_array_access ( self . location , result, two) ;
1038
- let third_value = self . gcc_int ( arg_type, 128 ) ;
1039
- self . llbb ( )
1040
- . add_assignment ( self . location , third_elem, third_value) ;
1031
+ let result_128 =
1032
+ self . gcc_int_cast ( self . context . new_call ( None , clzll, & [ low] ) , result_type) ;
1041
1033
1042
- let not_low = self . context . new_unary_op ( self . location , UnaryOp :: LogicalNegate , self . u64_type , low) ;
1043
- let not_high = self . context . new_unary_op ( self . location , UnaryOp :: LogicalNegate , self . u64_type , high) ;
1044
- let not_low_and_not_high = not_low & not_high;
1045
- let index = not_low + not_low_and_not_high;
1046
- // NOTE: the following cast is necessary to avoid a GIMPLE verification failure in
1047
- // gcc.
1048
- // TODO(antoyo): do the correct verification in libgccjit to avoid an error at the
1049
- // compilation stage.
1050
- let index = self . context . new_cast ( self . location , index, self . i32_type ) ;
1034
+ ctlz_then_block. add_assignment ( self . location , result, result_128) ;
1035
+ ctlz_then_block. end_with_jump ( self . location , ctlz_after_block) ;
1051
1036
1052
- let res = self . context . new_array_access ( self . location , result, index) ;
1037
+ self . switch_to_block ( ctlz_else_block) ;
1038
+ let sixty_four = self . const_uint ( arg_type, 64 ) ;
1039
+ let shift = self . lshr ( arg, sixty_four) ;
1040
+ let high = self . gcc_int_cast ( shift, self . u64_type ) ;
1041
+ let high_leading_zeroes =
1042
+ self . gcc_int_cast ( self . context . new_call ( None , clzll, & [ high] ) , result_type) ;
1053
1043
1054
- return self . gcc_int_cast ( res. to_rvalue ( ) , result_type) ;
1044
+ let sixty_four_result_type = self . const_uint ( result_type, 64 ) ;
1045
+ let result_128 = self . add ( high_leading_zeroes, sixty_four_result_type) ;
1046
+ ctlz_else_block. add_assignment ( self . location , result, result_128) ;
1047
+ ctlz_else_block. end_with_jump ( self . location , ctlz_after_block) ;
1048
+ self . switch_to_block ( ctlz_after_block) ;
1049
+ return result. to_rvalue ( ) ;
1055
1050
}
1056
1051
else {
1057
1052
let count_trailing_zeroes = self . context . get_builtin_function ( "__builtin_ctzll" ) ;
0 commit comments