-
Notifications
You must be signed in to change notification settings - Fork 54
Open
Description
This generates gpu-incompatible code:
julia> foo(x,y) = reinterpret(UInt64, (x,y))
foo (generic function with 1 method)
julia> OpenCL.code_llvm(foo, (Float32, Float32))
; @ REPL[3]:1 within `foo`
define i64 @julia_foo_81905(float %"x::Float32", float %"y::Float32") local_unnamed_addr {
top:
%jlcallframe10 = alloca [9 x ptr], align 8
%"new::Tuple" = alloca [2 x float], align 4
store float %"x::Float32", ptr %"new::Tuple", align 4
%0 = getelementptr inbounds i8, ptr %"new::Tuple", i64 4
store float %"y::Float32", ptr %0, align 4
; ┌ @ essentials.jl:736 within `reinterpret`
; │┌ @ reinterpretarray.jl:857 within `_reinterpret`
%1 = call fastcc nonnull ptr @julia_packedsize_81937()
; ││ @ reinterpretarray.jl:859 within `_reinterpret`
store ptr %1, ptr %jlcallframe10, align 8
%2 = getelementptr inbounds ptr, ptr %jlcallframe10, i64 1
store ptr inttoptr (i64 139924789764000 to ptr), ptr %2, align 8
%3 = call nonnull ptr @ijl_apply_generic(ptr inttoptr (i64 139924866051216 to ptr), ptr nonnull %jlcallframe10, i32 2)
%.tag_addr = getelementptr inbounds i64, ptr %3, i64 -1
%.tag = load atomic i64, ptr %.tag_addr unordered, align 8
%4 = and i64 %.tag, -16
%5 = inttoptr i64 %4 to ptr
%exactly_isa = icmp eq ptr %5, inttoptr (i64 192 to ptr)
br i1 %exactly_isa, label %pass, label %fail
L5: ; preds = %pass
; ││ @ reinterpretarray.jl:862 within `_reinterpret`
; ││┌ @ refpointer.jl:147 within `Ref`
; │││┌ @ refvalue.jl:8 within `RefValue`
%"new::RefValue.sroa.0.0.copyload" = load i64, ptr %"new::Tuple", align 1
; ││└└
; ││ @ reinterpretarray.jl:864 within `_reinterpret`
; ││┌ @ reinterpretarray.jl:811 within `struct_subpadding`
%6 = load ptr, ptr addrspacecast (ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @jl_small_typeof, i64 320) to ptr), align 8
store ptr %6, ptr %jlcallframe10, align 8
%7 = call nonnull ptr @ijl_invoke(ptr inttoptr (i64 139924884548304 to ptr), ptr nonnull %jlcallframe10, i32 1, ptr inttoptr (i64 139924885003056 to ptr))
; │││┌ @ reinterpretarray.jl:755 within `padding`
store ptr inttoptr (i64 139924922404224 to ptr), ptr %jlcallframe10, align 8
store ptr inttoptr (i64 139924789760416 to ptr), ptr %2, align 8
%8 = call nonnull ptr @ijl_invoke(ptr inttoptr (i64 139924884548304 to ptr), ptr nonnull %jlcallframe10, i32 2, ptr inttoptr (i64 139924884994992 to ptr))
; │││└
%9 = call fastcc i8 @julia____81955(ptr %7, ptr %8)
; ││└
%10 = and i8 %9, 1
%.not = icmp eq i8 %10, 0
br i1 %.not, label %L22, label %L31
L22: ; preds = %L5
; ││ @ reinterpretarray.jl:874 within `_reinterpret`
%11 = call fastcc i64 @julia__reinterpret_padding_81925(ptr nocapture readonly %"new::Tuple")
br label %L31
L24: ; preds = %pass
%12 = load ptr, ptr addrspacecast (ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @jl_small_typeof, i64 320) to ptr), align 8
store ptr inttoptr (i64 139924981334912 to ptr), ptr %jlcallframe10, align 8
store ptr %12, ptr %2, align 8
%13 = getelementptr inbounds ptr, ptr %jlcallframe10, i64 2
store ptr inttoptr (i64 139924954237824 to ptr), ptr %13, align 8
%14 = getelementptr inbounds ptr, ptr %jlcallframe10, i64 3
store ptr inttoptr (i64 139924922404224 to ptr), ptr %14, align 8
%15 = getelementptr inbounds ptr, ptr %jlcallframe10, i64 4
store ptr inttoptr (i64 139924981334864 to ptr), ptr %15, align 8
%16 = getelementptr inbounds ptr, ptr %jlcallframe10, i64 5
store ptr inttoptr (i64 139924789764000 to ptr), ptr %16, align 8
%17 = getelementptr inbounds ptr, ptr %jlcallframe10, i64 6
store ptr inttoptr (i64 139924954237824 to ptr), ptr %17, align 8
%18 = getelementptr inbounds ptr, ptr %jlcallframe10, i64 7
store ptr %1, ptr %18, align 8
%19 = getelementptr inbounds ptr, ptr %jlcallframe10, i64 8
store ptr inttoptr (i64 139924981334832 to ptr), ptr %19, align 8
%jl_f_tuple_ret = call nonnull ptr @jl_f_tuple(ptr null, ptr nonnull %jlcallframe10, i32 9)
; ││ @ reinterpretarray.jl:859 within `_reinterpret`
call fastcc void @gpu_report_exception()
call fastcc void @gpu_signal_exception()
call void @llvm.trap()
unreachable
L31: ; preds = %L22, %L5
%value_phi = phi i64 [ %11, %L22 ], [ %"new::RefValue.sroa.0.0.copyload", %L5 ]
; │└
ret i64 %value_phi
fail: ; preds = %top
; │┌ @ reinterpretarray.jl:859 within `_reinterpret`
call fastcc void @gpu_report_exception()
call fastcc void @gpu_signal_exception()
call void @llvm.trap()
unreachable
pass: ; preds = %top
%jl_false = load ptr, ptr addrspace(1) @jl_false, align 8
%20 = icmp eq ptr %3, %jl_false
br i1 %20, label %L24, label %L5
; └└
}
Even though it produces good code on the CPU:
julia> code_llvm(foo, (Float32, Float32))
; Function Signature: foo(Float32, Float32)
; @ REPL[3]:1 within `foo`
define i64 @julia_foo_85335(float %"x::Float32", float %"y::Float32") #0 {
top:
; ┌ @ essentials.jl:736 within `reinterpret`
; │┌ @ reinterpretarray.jl:862 within `_reinterpret`
; ││┌ @ refpointer.jl:147 within `Ref`
; │││┌ @ refvalue.jl:8 within `RefValue`
%0 = bitcast float %"x::Float32" to i32
%1 = bitcast float %"y::Float32" to i32
; ││└└
; ││ @ reinterpretarray.jl:871 within `_reinterpret`
; ││┌ @ refvalue.jl:59 within `getindex`
; │││┌ @ Base_compiler.jl:54 within `getproperty`
%"new::RefValue3.sroa.5.0.insert.ext10" = zext i32 %1 to i64
%"new::RefValue3.sroa.5.0.insert.shift11" = shl nuw i64 %"new::RefValue3.sroa.5.0.insert.ext10", 32
%"new::RefValue3.sroa.0.0.insert.ext7" = zext i32 %0 to i64
%"new::RefValue3.sroa.0.0.insert.insert9" = or disjoint i64 %"new::RefValue3.sroa.5.0.insert.shift11", %"new::RefValue3.sroa.0.0.insert.ext7"
ret i64 %"new::RefValue3.sroa.0.0.insert.insert9"
; └└└└
}
Metadata
Metadata
Assignees
Labels
No labels