diff --git a/src/mono/mono/mini/cpu-amd64.mdesc b/src/mono/mono/mini/cpu-amd64.mdesc index a380b4dc7b4a1d..f01ae166255317 100644 --- a/src/mono/mono/mini/cpu-amd64.mdesc +++ b/src/mono/mono/mini/cpu-amd64.mdesc @@ -171,6 +171,9 @@ vcall_reg: src1:i len:64 clob:c vcall_membase: src1:b len:64 clob:c call_reg: dest:a src1:i len:32 clob:c call_membase: dest:a src1:b len:32 clob:c +xcall: dest:x len:64 clob:c +xcall_reg: dest:x src1:i len:64 clob:c +xcall_membase: dest:x src1:b len:64 clob:c iconst: dest:i len:10 i8const: dest:i len:10 r4const: dest:f len:17 @@ -597,7 +600,7 @@ vcall2: len:64 clob:c vcall2_reg: src1:i len:64 clob:c vcall2_membase: src1:b len:64 clob:c -dyn_call: src1:i src2:i len:192 clob:c +dyn_call: src1:i src2:i len:252 clob:c localloc_imm: dest:i len:120 @@ -786,6 +789,7 @@ cvttpd2dq: dest:x src1:x len:5 clob:1 cvttps2dq: dest:x src1:x len:5 clob:1 xmove: dest:x src1:x len:5 +xmove_arg: dest:x src1:x len:5 xzero: dest:x len:5 xones: dest:x len:5 xconst: dest:x len:12 diff --git a/src/mono/mono/mini/decompose.c b/src/mono/mono/mini/decompose.c index 84221fc808afb6..5c6e0a33e436c5 100644 --- a/src/mono/mono/mini/decompose.c +++ b/src/mono/mono/mini/decompose.c @@ -1344,7 +1344,7 @@ mono_decompose_vtype_opts (MonoCompile *cfg) MonoCallInst *call = (MonoCallInst*)ins; int size; - if (COMPILE_LLVM (cfg)) + if (COMPILE_LLVM (cfg) || call->dont_decompose) break; if (call->vret_in_reg) { diff --git a/src/mono/mono/mini/method-to-ir.c b/src/mono/mono/mini/method-to-ir.c index fd8a31e1e4360e..df19553c83695d 100644 --- a/src/mono/mono/mini/method-to-ir.c +++ b/src/mono/mono/mini/method-to-ir.c @@ -250,6 +250,12 @@ mono_alloc_preg (MonoCompile *cfg) return alloc_preg (cfg); } +guint32 +mono_alloc_xreg (MonoCompile *cfg) +{ + return alloc_xreg (cfg); +} + guint32 mono_alloc_dreg (MonoCompile *cfg, MonoStackType stack_type) { @@ -1692,6 +1698,34 @@ MONO_RESTORE_WARNING return ins; } +MonoInst* +mini_emit_regmove (MonoCompile *cfg, int sreg, MonoType *type) +{ + MonoInst *ins; + int opcode = mono_type_to_regmove (cfg, type); + + if (opcode == OP_FMOVE) { + MONO_INST_NEW (cfg, ins, OP_FMOVE); + ins->dreg = mono_alloc_freg (cfg); + } else if (opcode == OP_LMOVE) { + MONO_INST_NEW (cfg, ins, OP_LMOVE); + ins->dreg = mono_alloc_lreg (cfg); + } else if (opcode == OP_RMOVE) { + MONO_INST_NEW (cfg, ins, OP_RMOVE); + ins->dreg = mono_alloc_freg (cfg); + } else if (opcode == OP_XMOVE) { + MONO_INST_NEW (cfg, ins, OP_XMOVE); + ins->dreg = mono_alloc_xreg (cfg); + ins->klass = mono_class_from_mono_type_internal (type); + } else { + MONO_INST_NEW (cfg, ins, OP_MOVE); + ins->dreg = mono_alloc_ireg (cfg); + } + ins->sreg1 = sreg; + + return ins; +} + static MonoInst* mono_create_fast_tls_getter (MonoCompile *cfg, MonoTlsKey key) { diff --git a/src/mono/mono/mini/mini-amd64-gsharedvt.c b/src/mono/mono/mini/mini-amd64-gsharedvt.c index 0aa2f1559006b3..d839cf858276f6 100644 --- a/src/mono/mono/mini/mini-amd64-gsharedvt.c +++ b/src/mono/mono/mini/mini-amd64-gsharedvt.c @@ -163,6 +163,7 @@ get_arg_slots (ArgInfo *ainfo, int **out_slots, gboolean is_source_argument) break; case ArgInDoubleSSEReg: case ArgInFloatSSEReg: + case ArgSIMDInSSEReg: nsrc = 1; src = g_malloc (nsrc * sizeof (int)); src [0] = map_freg (sreg); @@ -224,20 +225,21 @@ static void handle_marshal_when_dst_gsharedvt (ArgInfo *src_info, int *arg_marshal) { switch (src_info->storage) { - case ArgInIReg: - case ArgInDoubleSSEReg: - case ArgInFloatSSEReg: - case ArgValuetypeInReg: - case ArgOnStack: - *arg_marshal = GSHAREDVT_ARG_BYVAL_TO_BYREF; - break; - case ArgValuetypeAddrInIReg: - case ArgValuetypeAddrOnStack: - *arg_marshal = GSHAREDVT_ARG_NONE; - break; - default: - NOT_IMPLEMENTED; // See above - break; + case ArgInIReg: + case ArgInDoubleSSEReg: + case ArgInFloatSSEReg: + case ArgSIMDInSSEReg: + case ArgValuetypeInReg: + case ArgOnStack: + *arg_marshal = GSHAREDVT_ARG_BYVAL_TO_BYREF; + break; + case ArgValuetypeAddrInIReg: + case ArgValuetypeAddrOnStack: + *arg_marshal = GSHAREDVT_ARG_NONE; + break; + default: + NOT_IMPLEMENTED; // See above + break; } } @@ -331,6 +333,7 @@ mono_arch_get_gsharedvt_call_info (MonoMemoryManager *mem_manager, gpointer addr case ArgInIReg: case ArgInDoubleSSEReg: case ArgInFloatSSEReg: + case ArgSIMDInSSEReg: case ArgValuetypeInReg: case ArgOnStack: nsrc = get_arg_slots (src_info, &src, TRUE); @@ -500,6 +503,9 @@ mono_arch_get_gsharedvt_call_info (MonoMemoryManager *mem_manager, gpointer addr case ArgInFloatSSEReg: info->ret_marshal = GSHAREDVT_RET_R8; break; + case ArgSIMDInSSEReg: + info->ret_marshal = GSHAREDVT_RET_SIMD; + break; case ArgValuetypeAddrInIReg: break; default: diff --git a/src/mono/mono/mini/mini-amd64-gsharedvt.h b/src/mono/mono/mini/mini-amd64-gsharedvt.h index 539dff9ff51e16..fea5d385b7d968 100644 --- a/src/mono/mono/mini/mini-amd64-gsharedvt.h +++ b/src/mono/mono/mini/mini-amd64-gsharedvt.h @@ -37,6 +37,7 @@ typedef enum { GSHAREDVT_RET_I8, // 8 byte integer GSHAREDVT_RET_IREGS_1, // Load in first return register GSHAREDVT_RET_R8, // Double + GSHAREDVT_RET_SIMD, // SIMD GSHAREDVT_RET_NUM, } GSharedVtRetMarshal; diff --git a/src/mono/mono/mini/mini-amd64.c b/src/mono/mono/mini/mini-amd64.c index 6278ae4b9aea7a..2e00c6f8fa710b 100644 --- a/src/mono/mono/mini/mini-amd64.c +++ b/src/mono/mono/mini/mini-amd64.c @@ -674,6 +674,24 @@ add_valuetype (MonoMethodSignature *sig, ArgInfo *ainfo, MonoType *type, return; } +#ifndef TARGET_WIN32 + /* + * Can't use mini_class_is_simd () here as we don't have access to a MonoCompile. + * So have to restict the types to the ones which are supported both in llvm and + * non-llvm mode. + */ + if (m_class_is_simd_type (klass) && struct_size == 16 && !sig->pinvoke) { + if (*fr >= FLOAT_PARAM_REGS) { + pass_on_stack = TRUE; + } else { + ainfo->storage = ArgSIMDInSSEReg; + ainfo->reg = (guint8)*fr; + (*fr) ++; + return; + } + } +#endif + if (pass_on_stack) { /* Always pass in memory */ ainfo->offset = GINT32_TO_INT16 (*stack_size); @@ -1110,7 +1128,7 @@ arg_get_storage (CallContext *ccontext, ArgInfo *ainfo) return &ccontext->gregs [ainfo->reg]; case ArgInFloatSSEReg: case ArgInDoubleSSEReg: - return &ccontext->fregs [ainfo->reg]; + return &ccontext->fregs [ainfo->reg * 2]; case ArgOnStack: case ArgValuetypeAddrOnStack: return ccontext->stack + ainfo->offset; @@ -1126,7 +1144,7 @@ arg_get_storage (CallContext *ccontext, ArgInfo *ainfo) return &ccontext->gregs [ainfo->pair_regs [0]]; case ArgInFloatSSEReg: case ArgInDoubleSSEReg: - return &ccontext->fregs [ainfo->pair_regs [0]]; + return &ccontext->fregs [ainfo->pair_regs [0] * 2]; default: g_assert_not_reached (); } @@ -1154,7 +1172,7 @@ arg_get_val (CallContext *ccontext, ArgInfo *ainfo, gpointer dest) break; case ArgInFloatSSEReg: case ArgInDoubleSSEReg: - *(double*)dest_cast = ccontext->fregs [reg_storage]; + *(double*)dest_cast = ccontext->fregs [reg_storage * 2]; break; default: g_assert_not_reached (); @@ -1178,7 +1196,7 @@ arg_set_val (CallContext *ccontext, ArgInfo *ainfo, gpointer src) break; case ArgInFloatSSEReg: case ArgInDoubleSSEReg: - ccontext->fregs [reg_storage] = *(double*)src_cast; + ccontext->fregs [reg_storage * 2] = *(double*)src_cast; break; default: g_assert_not_reached (); @@ -1790,6 +1808,7 @@ mono_arch_allocate_vars (MonoCompile *cfg) case ArgInIReg: case ArgInFloatSSEReg: case ArgInDoubleSSEReg: + case ArgSIMDInSSEReg: cfg->ret->opcode = OP_REGVAR; cfg->ret->inst_c0 = cinfo->ret.reg; cfg->ret->dreg = cinfo->ret.reg; @@ -1877,7 +1896,7 @@ mono_arch_allocate_vars (MonoCompile *cfg) * are volatile across calls. * FIXME: Optimize this. */ - if ((ainfo->storage == ArgInIReg) || (ainfo->storage == ArgInFloatSSEReg) || (ainfo->storage == ArgInDoubleSSEReg) || (ainfo->storage == ArgValuetypeInReg) || (ainfo->storage == ArgGSharedVtInReg)) + if ((ainfo->storage == ArgInIReg) || (ainfo->storage == ArgInFloatSSEReg) || (ainfo->storage == ArgInDoubleSSEReg) || (ainfo->storage == ArgSIMDInSSEReg) || (ainfo->storage == ArgValuetypeInReg) || (ainfo->storage == ArgGSharedVtInReg)) inreg = FALSE; ins->opcode = OP_REGOFFSET; @@ -1886,6 +1905,7 @@ mono_arch_allocate_vars (MonoCompile *cfg) case ArgInIReg: case ArgInFloatSSEReg: case ArgInDoubleSSEReg: + case ArgSIMDInSSEReg: case ArgGSharedVtInReg: if (inreg) { ins->opcode = OP_REGVAR; @@ -1933,14 +1953,23 @@ mono_arch_allocate_vars (MonoCompile *cfg) ins->opcode = OP_REGOFFSET; ins->inst_basereg = cfg->frame_reg; /* These arguments are saved to the stack in the prolog */ - offset = ALIGN_TO (offset, sizeof (target_mgreg_t)); + int argsize, align; + if (ainfo->storage == ArgSIMDInSSEReg) { + argsize = 16; + align = 16; + offset = ALIGN_TO (offset, 16); + } else { + argsize = (ainfo->storage == ArgValuetypeInReg) ? ainfo->nregs * sizeof (target_mgreg_t) : sizeof (target_mgreg_t); + align = sizeof (target_mgreg_t); + } + offset = ALIGN_TO (offset, align); if (cfg->arch.omit_fp) { ins->inst_offset = offset; - offset += (ainfo->storage == ArgValuetypeInReg) ? ainfo->nregs * sizeof (target_mgreg_t) : sizeof (target_mgreg_t); + offset += argsize; // Arguments are yet supported by the stack map creation code //cfg->locals_max_stack_offset = MAX (cfg->locals_max_stack_offset, offset); } else { - offset += (ainfo->storage == ArgValuetypeInReg) ? ainfo->nregs * sizeof (target_mgreg_t) : sizeof (target_mgreg_t); + offset += argsize; ins->inst_offset = - offset; //cfg->locals_min_stack_offset = MIN (cfg->locals_min_stack_offset, offset); } @@ -2027,7 +2056,15 @@ add_outarg_reg (MonoCompile *cfg, MonoCallInst *call, ArgStorage storage, int re MONO_ADD_INS (cfg->cbb, ins); mono_call_inst_add_outarg_reg (cfg, call, ins->dreg, reg, TRUE); + break; + case ArgSIMDInSSEReg: + MONO_INST_NEW (cfg, ins, OP_XMOVE_ARG); + ins->dreg = alloc_xreg (cfg); + ins->sreg1 = tree->dreg; + ins->klass = tree->klass; + MONO_ADD_INS (cfg->cbb, ins); + mono_call_inst_add_outarg_reg (cfg, call, ins->dreg, reg, MONO_REG_SIMD); break; default: g_assert_not_reached (); @@ -2129,6 +2166,7 @@ mono_arch_get_llvm_call_info (MonoCompile *cfg, MonoMethodSignature *sig) case ArgInIReg: case ArgInFloatSSEReg: case ArgInDoubleSSEReg: + case ArgSIMDInSSEReg: linfo->ret.storage = LLVMArgNormal; break; case ArgValuetypeInReg: { @@ -2171,10 +2209,9 @@ mono_arch_get_llvm_call_info (MonoCompile *cfg, MonoMethodSignature *sig) switch (ainfo->storage) { case ArgInIReg: - linfo->args [i].storage = LLVMArgNormal; - break; case ArgInDoubleSSEReg: case ArgInFloatSSEReg: + case ArgSIMDInSSEReg: linfo->args [i].storage = LLVMArgNormal; break; case ArgOnStack: @@ -2192,21 +2229,6 @@ mono_arch_get_llvm_call_info (MonoCompile *cfg, MonoMethodSignature *sig) return linfo; } -#if 0 - /* FIXME: the non-LLVM codegen should also pass arguments in registers or - * else there could a mismatch when LLVM code calls non-LLVM code - * - * See https://github.com/dotnet/runtime/issues/73454 - */ - if ((t->type == MONO_TYPE_GENERICINST) && !cfg->full_aot && !sig->pinvoke) { - MonoClass *klass = mono_class_from_mono_type_internal (t); - if (mini_class_is_simd (cfg, klass)) { - linfo->args [i].storage = LLVMArgVtypeInSIMDReg; - break; - } - } -#endif - linfo->args [i].storage = LLVMArgVtypeInReg; for (j = 0; j < 2; ++j) linfo->args [i].pair_storage [j] = arg_storage_to_llvm_arg_storage (cfg, ainfo->pair_storage [j]); @@ -2318,6 +2340,7 @@ mono_arch_emit_call (MonoCompile *cfg, MonoCallInst *call) break; case ArgInFloatSSEReg: case ArgInDoubleSSEReg: + case ArgSIMDInSSEReg: add_outarg_reg (cfg, call, ainfo->storage, ainfo->reg, in); break; case ArgOnStack: @@ -2420,6 +2443,11 @@ mono_arch_emit_call (MonoCompile *cfg, MonoCallInst *call) mono_call_inst_add_outarg_reg (cfg, call, vtarg->dreg, cinfo->ret.reg, FALSE); break; } + case ArgSIMDInSSEReg: + call->dont_decompose = TRUE; + g_assert (call->vret_var); + NULLIFY_INS (call->vret_var); + break; default: break; } @@ -2584,6 +2612,7 @@ dyn_call_supported (MonoMethodSignature *sig, CallInfo *cinfo) case ArgInIReg: case ArgInFloatSSEReg: case ArgInDoubleSSEReg: + case ArgSIMDInSSEReg: case ArgValuetypeAddrInIReg: case ArgValuetypeInReg: break; @@ -2597,6 +2626,7 @@ dyn_call_supported (MonoMethodSignature *sig, CallInfo *cinfo) case ArgInIReg: case ArgInFloatSSEReg: case ArgInDoubleSSEReg: + case ArgSIMDInSSEReg: case ArgValuetypeInReg: case ArgValuetypeAddrInIReg: case ArgValuetypeAddrOnStack: @@ -2712,7 +2742,8 @@ mono_arch_start_dyn_call (MonoDynCallInfo *info, gpointer **args, guint8 *ret, g for (i = 0; i < PARAM_REGS; ++i) general_param_reg_to_index [param_regs[i]] = i; for (i = 0; i < FLOAT_PARAM_REGS; ++i) - float_param_reg_to_index [float_param_regs[i]] = i; + /* 2 entries per SIMD reg */ + float_param_reg_to_index [float_param_regs[i]] = i * 2; mono_memory_barrier (); param_reg_to_index_inited = 1; } else { @@ -2747,7 +2778,7 @@ mono_arch_start_dyn_call (MonoDynCallInfo *info, gpointer **args, guint8 *ret, g } else if (ainfo->storage == ArgValuetypeAddrInIReg) { g_assert (ainfo->pair_storage [0] == ArgInIReg && ainfo->pair_storage [1] == ArgNone); slot = general_param_reg_to_index [ainfo->pair_regs [0]]; - } else if (ainfo->storage == ArgInFloatSSEReg || ainfo->storage == ArgInDoubleSSEReg) { + } else if (ainfo->storage == ArgInFloatSSEReg || ainfo->storage == ArgInDoubleSSEReg || ainfo->storage == ArgSIMDInSSEReg) { slot = float_param_reg_to_index [ainfo->reg]; } else { slot = general_param_reg_to_index [ainfo->reg]; @@ -2859,6 +2890,10 @@ mono_arch_start_dyn_call (MonoDynCallInfo *info, gpointer **args, guint8 *ret, g for (i = 0; i < ainfo->arg_size / 8; ++i) p->regs [slot + i] = ((target_mgreg_t*)(arg))[i]; break; + case ArgSIMDInSSEReg: + p->has_fp = 1; + memcpy (&(p->fregs [slot]), arg, 16); + break; default: g_assert_not_reached (); break; @@ -2938,21 +2973,24 @@ mono_arch_finish_dyn_call (MonoDynCallInfo *info, guint8 *buf) } else { /* Fall through */ } - case MONO_TYPE_VALUETYPE: - if (dinfo->cinfo->ret.storage == ArgValuetypeAddrInIReg || dinfo->cinfo->ret.storage == ArgGsharedvtVariableInReg) { + case MONO_TYPE_VALUETYPE: { + ArgInfo *ainfo = &dinfo->cinfo->ret; + switch (ainfo->storage) { + case ArgValuetypeAddrInIReg: + case ArgGsharedvtVariableInReg: /* Nothing to do */ - } else { - ArgInfo *ainfo = &dinfo->cinfo->ret; - - g_assert (ainfo->storage == ArgValuetypeInReg); - + break; + case ArgSIMDInSSEReg: + memcpy (ret, &dargs->fregs [0], 16); + break; + case ArgValuetypeInReg: { for (i = 0; i < 2; ++i) { switch (ainfo->pair_storage [0]) { case ArgInIReg: ((host_mgreg_t*)ret)[i] = res; break; case ArgInDoubleSSEReg: - ((double*)ret)[i] = dargs->fregs [i]; + ((double*)ret)[i] = dargs->fregs [i * 2]; break; case ArgNone: break; @@ -2961,8 +2999,14 @@ mono_arch_finish_dyn_call (MonoDynCallInfo *info, guint8 *buf) break; } } + break; + } + default: + g_assert_not_reached (); + break; } break; + } default: g_assert_not_reached (); } @@ -4229,6 +4273,12 @@ emit_move_return_value (MonoCompile *cfg, MonoInst *ins, guint8 *code) if (ins->dreg != AMD64_XMM0) amd64_sse_movss_reg_reg (code, ins->dreg, AMD64_XMM0); break; + case OP_XCALL: + case OP_XCALL_REG: + case OP_XCALL_MEMBASE: + if (ins->dreg != AMD64_XMM0) + amd64_sse_movaps_reg_reg (code, ins->dreg, AMD64_XMM0); + break; case OP_VCALL: case OP_VCALL_REG: case OP_VCALL_MEMBASE: @@ -4262,6 +4312,13 @@ emit_move_return_value (MonoCompile *cfg, MonoInst *ins, guint8 *code) } } break; + case OP_VOIDCALL: + case OP_VOIDCALL_REG: + case OP_VOIDCALL_MEMBASE: + break; + default: + g_assert_not_reached (); + break; } return code; @@ -5523,6 +5580,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) case OP_VCALL: case OP_VCALL2: case OP_VOIDCALL: + case OP_XCALL: call = (MonoCallInst*)ins; code = amd64_handle_varargs_call (cfg, code, call, FALSE); @@ -5538,6 +5596,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) case OP_VCALL2_REG: case OP_VOIDCALL_REG: case OP_CALL_REG: + case OP_XCALL_REG: call = (MonoCallInst*)ins; if (AMD64_IS_ARGUMENT_REG (ins->sreg1)) { @@ -5558,6 +5617,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) case OP_VCALL2_MEMBASE: case OP_VOIDCALL_MEMBASE: case OP_CALL_MEMBASE: + case OP_XCALL_MEMBASE: call = (MonoCallInst*)ins; amd64_call_membase (code, ins->sreg1, ins->inst_offset); @@ -5587,7 +5647,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) label = code; amd64_branch8 (code, X86_CC_Z, -1, 1); for (i = 0; i < FLOAT_PARAM_REGS; ++i) - amd64_sse_movsd_reg_membase (code, i, AMD64_R11, MONO_STRUCT_OFFSET (DynCallArgs, fregs) + (i * sizeof (double))); + amd64_sse_movups_reg_membase (code, i, AMD64_R11, MONO_STRUCT_OFFSET (DynCallArgs, fregs) + (i * 2 * sizeof (double))); amd64_patch (label, code); /* Allocate param area */ @@ -5632,8 +5692,8 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) /* Save result */ amd64_mov_reg_membase (code, AMD64_R11, var->inst_basereg, var->inst_offset, 8); amd64_mov_membase_reg (code, AMD64_R11, MONO_STRUCT_OFFSET (DynCallArgs, res), AMD64_RAX, 8); - amd64_sse_movsd_membase_reg (code, AMD64_R11, MONO_STRUCT_OFFSET (DynCallArgs, fregs), AMD64_XMM0); - amd64_sse_movsd_membase_reg (code, AMD64_R11, MONO_STRUCT_OFFSET (DynCallArgs, fregs) + sizeof (double), AMD64_XMM1); + amd64_sse_movups_membase_reg (code, AMD64_R11, MONO_STRUCT_OFFSET (DynCallArgs, fregs), AMD64_XMM0); + amd64_sse_movups_membase_reg (code, AMD64_R11, MONO_STRUCT_OFFSET (DynCallArgs, fregs) + (sizeof (double) * 2), AMD64_XMM1); break; } case OP_AMD64_SAVE_SP_TO_LMF: { @@ -7502,6 +7562,10 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) if (ins->dreg != ins->sreg1) amd64_sse_movaps_reg_reg (code, ins->dreg, ins->sreg1); break; + case OP_XMOVE_ARG: + if (ins->dreg != ins->sreg1) + amd64_sse_movaps_reg_reg (code, ins->dreg, ins->sreg1); + break; case OP_XZERO: amd64_sse_pxor_reg_reg (code, ins->dreg, ins->dreg); break; @@ -8094,6 +8158,9 @@ MONO_RESTORE_WARNING case ArgInDoubleSSEReg: amd64_movsd_membase_reg (code, ins->inst_basereg, ins->inst_offset, ainfo->reg); break; + case ArgSIMDInSSEReg: + amd64_sse_movups_membase_reg (code, ins->inst_basereg, ins->inst_offset, ainfo->reg); + break; case ArgValuetypeInReg: for (quad = 0; quad < 2; quad ++) { switch (ainfo->pair_storage [quad]) { diff --git a/src/mono/mono/mini/mini-amd64.h b/src/mono/mono/mini/mini-amd64.h index 2c775c1585545b..9fcc081431fbd7 100644 --- a/src/mono/mono/mini/mini-amd64.h +++ b/src/mono/mono/mini/mini-amd64.h @@ -277,7 +277,8 @@ struct SeqPointInfo { typedef struct { host_mgreg_t res; guint8 *ret; - double fregs [8]; + /* 2 entries per SIMD reg */ + double fregs [16]; host_mgreg_t has_fp; host_mgreg_t nstack_args; /* This should come last as the structure is dynamically extended */ @@ -292,6 +293,8 @@ typedef enum { ArgValuetypeInReg, ArgValuetypeAddrInIReg, ArgValuetypeAddrOnStack, + /* SIMD value in SSE register */ + ArgSIMDInSSEReg, /* gsharedvt argument passed by addr */ ArgGSharedVtInReg, ArgGSharedVtOnStack, diff --git a/src/mono/mono/mini/mini-llvm.c b/src/mono/mono/mini/mini-llvm.c index 6ba6a106d34a2a..b61ede102059c8 100644 --- a/src/mono/mono/mini/mini-llvm.c +++ b/src/mono/mono/mini/mini-llvm.c @@ -4060,6 +4060,15 @@ emit_entry_bb (EmitContext *ctx, LLVMBuilderRef builder) } break; default: { + if (m_class_is_simd_type (mono_class_from_mono_type_internal (ainfo->type))) { + /* SIMD value passed by value */ + if (ctx->addresses [reg]) { + LLVMValueRef arg = LLVMGetParam (ctx->lmethod, pindex); + LLVMBuildStore (builder, arg, build_ptr_cast (builder, ctx->addresses [reg]->value, pointer_type(LLVMTypeOf (arg)))); + } + break; + } + LLVMTypeRef t; /* Needed to avoid phi argument mismatch errors since operations on pointers produce i32/i64 */ if (m_type_is_byref (ainfo->type)) @@ -6294,6 +6303,7 @@ process_bb (EmitContext *ctx, MonoBasicBlock *bb) case OP_MOVE: case OP_LMOVE: case OP_XMOVE: + case OP_XMOVE_ARG: case OP_SETFRET: g_assert (lhs); values [ins->dreg] = lhs; @@ -13292,23 +13302,8 @@ mono_llvm_emit_call (MonoCompile *cfg, MonoCallInst *call) switch (ainfo->storage) { case LLVMArgNormal: { MonoType *t = (sig->hasthis && i == 0) ? m_class_get_byval_arg (mono_get_intptr_class ()) : ainfo->type; - int opcode; - - opcode = mono_type_to_regmove (cfg, t); - if (opcode == OP_FMOVE) { - MONO_INST_NEW (cfg, ins, OP_FMOVE); - ins->dreg = mono_alloc_freg (cfg); - } else if (opcode == OP_LMOVE) { - MONO_INST_NEW (cfg, ins, OP_LMOVE); - ins->dreg = mono_alloc_lreg (cfg); - } else if (opcode == OP_RMOVE) { - MONO_INST_NEW (cfg, ins, OP_RMOVE); - ins->dreg = mono_alloc_freg (cfg); - } else { - MONO_INST_NEW (cfg, ins, OP_MOVE); - ins->dreg = mono_alloc_ireg (cfg); - } - ins->sreg1 = in->dreg; + + ins = mini_emit_regmove (cfg, in->dreg, t); break; } case LLVMArgVtypeByVal: diff --git a/src/mono/mono/mini/mini-ops.h b/src/mono/mono/mini/mini-ops.h index 17dbc05d8763a1..f8a6f4f3104f2c 100644 --- a/src/mono/mono/mini/mini-ops.h +++ b/src/mono/mono/mini/mini-ops.h @@ -1222,6 +1222,8 @@ MINI_OP(OP_XZERO, "xzero", XREG, NONE, NONE) MINI_OP(OP_XONES, "xones", XREG, NONE, NONE) MINI_OP(OP_XCONST, "xconst", XREG, NONE, NONE) MINI_OP(OP_XPHI, "xphi", XREG, NONE, NONE) +/* Same as xmove, but not optimized away by copyprop etc. */ +MINI_OP(OP_XMOVE_ARG, "xmove_arg", XREG, XREG, NONE) /* * These are used for efficient implementation of the diff --git a/src/mono/mono/mini/mini.h b/src/mono/mono/mini/mini.h index 4924a0be9d4899..5afc6355909a77 100644 --- a/src/mono/mono/mini/mini.h +++ b/src/mono/mono/mini/mini.h @@ -818,6 +818,7 @@ struct MonoCallInst { guint32 rgctx_reg : 1; /* Whenever the call will need an unbox trampoline */ guint need_unbox_trampoline : 1; + guint dont_decompose: 1; }; struct MonoCallArgParm { @@ -2168,6 +2169,7 @@ guint32 mono_alloc_ireg (MonoCompile *cfg); guint32 mono_alloc_lreg (MonoCompile *cfg); guint32 mono_alloc_freg (MonoCompile *cfg); guint32 mono_alloc_preg (MonoCompile *cfg); +guint32 mono_alloc_xreg (MonoCompile *cfg); guint32 mono_alloc_dreg (MonoCompile *cfg, MonoStackType stack_type); guint32 mono_alloc_ireg_ref (MonoCompile *cfg); guint32 mono_alloc_ireg_mp (MonoCompile *cfg); @@ -2350,6 +2352,7 @@ MonoInst* mini_emit_array_store (MonoCompile *cfg, MonoClass *klass, Mon MonoInst* mini_emit_inst_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args, gboolean *ins_type_initialized); MonoInst* mini_emit_inst_for_ctor (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args); MonoInst* mini_emit_inst_for_field_load (MonoCompile *cfg, MonoClassField *field); +MonoInst* mini_emit_regmove (MonoCompile *cfg, int sreg, MonoType *type); MonoInst* mini_handle_enum_has_flag (MonoCompile *cfg, MonoClass *klass, MonoInst *enum_this, int enum_val_reg, MonoInst *enum_flag); MonoInst* mini_handle_unbox (MonoCompile *cfg, MonoClass *klass, MonoInst *val, int context_used); diff --git a/src/mono/mono/mini/tramp-amd64-gsharedvt.c b/src/mono/mono/mini/tramp-amd64-gsharedvt.c index 43b575b9f3d7dd..57860c794829d5 100644 --- a/src/mono/mono/mini/tramp-amd64-gsharedvt.c +++ b/src/mono/mono/mini/tramp-amd64-gsharedvt.c @@ -224,7 +224,7 @@ mono_arch_get_gsharedvt_trampoline (MonoTrampInfo **info, gboolean aot) /*callconv in regs */ caller_reg_area_offset = offset; - reg_area_size = ALIGN_TO ((n_arg_regs + n_arg_fregs) * 8, MONO_ARCH_FRAME_ALIGNMENT); + reg_area_size = ALIGN_TO ((n_arg_regs * 8) + (n_arg_fregs * 16), MONO_ARCH_FRAME_ALIGNMENT); offset += reg_area_size; framesize = offset; @@ -266,7 +266,7 @@ mono_arch_get_gsharedvt_trampoline (MonoTrampInfo **info, gboolean aot) amd64_mov_membase_reg (code, AMD64_RSP, caller_reg_area_offset + i * 8, param_regs [i], sizeof (target_mgreg_t)); for (i = 0; i < n_arg_fregs; ++i) - amd64_sse_movsd_membase_reg (code, AMD64_RSP, caller_reg_area_offset + (i + n_arg_regs) * 8, i); + amd64_sse_movups_membase_reg (code, AMD64_RSP, caller_reg_area_offset + (n_arg_regs * 8) + (i * 16), i); /* TODO Allocate stack area used to pass arguments to the method */ @@ -411,6 +411,9 @@ mono_arch_get_gsharedvt_trampoline (MonoTrampInfo **info, gboolean aot) case GSHAREDVT_RET_R8: amd64_sse_movsd_reg_membase (code, AMD64_XMM0, AMD64_R11, 0); break; + case GSHAREDVT_RET_SIMD: + amd64_sse_movups_reg_membase (code, AMD64_XMM0, AMD64_R11, 0); + break; default: x86_breakpoint (code); /* can't handle specific case */ }