diff --git a/Zend/zend_vm_execute.h b/Zend/zend_vm_execute.h index ccfa626f90120..c352902bbe9b1 100644 --- a/Zend/zend_vm_execute.h +++ b/Zend/zend_vm_execute.h @@ -53444,14 +53444,14 @@ ZEND_API void execute_ex(zend_execute_data *ex) #if defined(ZEND_VM_IP_GLOBAL_REG) || defined(ZEND_VM_FP_GLOBAL_REG) struct { +#ifdef ZEND_VM_HYBRID_JIT_RED_ZONE_SIZE + char hybrid_jit_red_zone[ZEND_VM_HYBRID_JIT_RED_ZONE_SIZE]; +#endif #ifdef ZEND_VM_IP_GLOBAL_REG const zend_op *orig_opline; #endif #ifdef ZEND_VM_FP_GLOBAL_REG zend_execute_data *orig_execute_data; -#ifdef ZEND_VM_HYBRID_JIT_RED_ZONE_SIZE - char hybrid_jit_red_zone[ZEND_VM_HYBRID_JIT_RED_ZONE_SIZE]; -#endif #endif } vm_stack_data; #endif @@ -56960,6 +56960,16 @@ ZEND_API void execute_ex(zend_execute_data *ex) } #endif +#if (ZEND_VM_KIND == ZEND_VM_KIND_HYBRID) + /* Force C compiler to store preserved registers to allow JIT using them */ +# if defined(__GNUC__) && defined(__i386__) + __asm__ __volatile__ (""::: "ebx"); +# elif defined(__GNUC__) && defined(__x86_64__) + __asm__ __volatile__ (""::: "rbx","r12","r13"); +# elif defined(__GNUC__) && defined(__aarch64__) + __asm__ __volatile__ (""::: "x19","x20","x21","x22","x23","x24","x25","x26"); +# endif +#endif LOAD_OPLINE(); ZEND_VM_LOOP_INTERRUPT_CHECK(); diff --git a/Zend/zend_vm_execute.skl b/Zend/zend_vm_execute.skl index 717d4ffd3e8af..65aa52962bdb7 100644 --- a/Zend/zend_vm_execute.skl +++ b/Zend/zend_vm_execute.skl @@ -13,6 +13,16 @@ ZEND_API void {%EXECUTOR_NAME%}_ex(zend_execute_data *ex) {%INTERNAL_LABELS%} +#if (ZEND_VM_KIND == ZEND_VM_KIND_HYBRID) + /* Force C compiler to store preserved registers to allow JIT using them */ +# if defined(__GNUC__) && defined(__i386__) + __asm__ __volatile__ (""::: "ebx"); +# elif defined(__GNUC__) && defined(__x86_64__) + __asm__ __volatile__ (""::: "rbx","r12","r13"); +# elif defined(__GNUC__) && defined(__aarch64__) + __asm__ __volatile__ (""::: "x19","x20","x21","x22","x23","x24","x25","x26"); +# endif +#endif LOAD_OPLINE(); ZEND_VM_LOOP_INTERRUPT_CHECK(); diff --git a/Zend/zend_vm_gen.php b/Zend/zend_vm_gen.php index 79b7653871272..978c13dc6b005 100755 --- a/Zend/zend_vm_gen.php +++ b/Zend/zend_vm_gen.php @@ -2046,14 +2046,14 @@ function gen_executor($f, $skl, $spec, $kind, $executor_name, $initializer_name) } else { out($f,"#if defined(ZEND_VM_IP_GLOBAL_REG) || defined(ZEND_VM_FP_GLOBAL_REG)\n"); out($f,$m[1]."struct {\n"); + out($f,"#ifdef ZEND_VM_HYBRID_JIT_RED_ZONE_SIZE\n"); + out($f,$m[1]."\tchar hybrid_jit_red_zone[ZEND_VM_HYBRID_JIT_RED_ZONE_SIZE];\n"); + out($f,"#endif\n"); out($f,"#ifdef ZEND_VM_IP_GLOBAL_REG\n"); out($f,$m[1]."\tconst zend_op *orig_opline;\n"); out($f,"#endif\n"); out($f,"#ifdef ZEND_VM_FP_GLOBAL_REG\n"); out($f,$m[1]."\tzend_execute_data *orig_execute_data;\n"); - out($f,"#ifdef ZEND_VM_HYBRID_JIT_RED_ZONE_SIZE\n"); - out($f,$m[1]."\tchar hybrid_jit_red_zone[ZEND_VM_HYBRID_JIT_RED_ZONE_SIZE];\n"); - out($f,"#endif\n"); out($f,"#endif\n"); out($f,$m[1]."} vm_stack_data;\n"); out($f,"#endif\n"); @@ -2339,7 +2339,7 @@ function gen_vm_opcodes_header( $str .= "\n"; $str .= "#if (ZEND_VM_KIND == ZEND_VM_KIND_HYBRID) && !defined(__SANITIZE_ADDRESS__)\n"; $str .= "# if ((defined(i386) && !defined(__PIC__)) || defined(__x86_64__) || defined(_M_X64))\n"; - $str .= "# define ZEND_VM_HYBRID_JIT_RED_ZONE_SIZE 16\n"; + $str .= "# define ZEND_VM_HYBRID_JIT_RED_ZONE_SIZE 48\n"; $str .= "# endif\n"; $str .= "#endif\n"; $str .= "\n"; diff --git a/Zend/zend_vm_opcodes.h b/Zend/zend_vm_opcodes.h index 5531accbf0c20..17453f0aed652 100644 --- a/Zend/zend_vm_opcodes.h +++ b/Zend/zend_vm_opcodes.h @@ -36,7 +36,7 @@ #if (ZEND_VM_KIND == ZEND_VM_KIND_HYBRID) && !defined(__SANITIZE_ADDRESS__) # if ((defined(i386) && !defined(__PIC__)) || defined(__x86_64__) || defined(_M_X64)) -# define ZEND_VM_HYBRID_JIT_RED_ZONE_SIZE 16 +# define ZEND_VM_HYBRID_JIT_RED_ZONE_SIZE 48 # endif #endif diff --git a/build/Makefile.global b/build/Makefile.global index dee5fa5ecde73..14d90a1f3da9d 100644 --- a/build/Makefile.global +++ b/build/Makefile.global @@ -122,6 +122,11 @@ clean: rm -f ext/opcache/jit/zend_jit_x86.c rm -f ext/opcache/jit/zend_jit_arm64.c rm -f ext/opcache/minilua + rm -f ext/opcache/jit/ir/gen_ir_fold_hash + rm -f ext/opcache/jit/ir/minilua + rm -f ext/opcache/jit/ir/ir_fold_hash.h + rm -f ext/opcache/jit/ir/ir_emit_x86.h + rm -f ext/opcache/jit/ir/ir_emit_aarch64.h distclean: clean rm -f Makefile config.cache config.log config.status Makefile.objects Makefile.fragments libtool main/php_config.h main/internal_functions_cli.c main/internal_functions.c Zend/zend_dtrace_gen.h Zend/zend_dtrace_gen.h.bak Zend/zend_config.h diff --git a/ext/opcache/config.m4 b/ext/opcache/config.m4 index 58e9b2643f9ac..4fa403337b04c 100644 --- a/ext/opcache/config.m4 +++ b/ext/opcache/config.m4 @@ -24,6 +24,13 @@ PHP_ARG_WITH([capstone],, [no], [no]) +PHP_ARG_ENABLE([opcache-jit-ir], + [whether to enable JIT based on IR framework], + [AS_HELP_STRING([--disable-opcache-jit-ir], + [Disable JIT based on IR framework (use old JIT)])], + [yes], + [no]) + if test "$PHP_OPCACHE" != "no"; then dnl Always build as shared extension @@ -44,7 +51,7 @@ if test "$PHP_OPCACHE" != "no"; then esac fi - if test "$PHP_OPCACHE_JIT" = "yes"; then + if test "$PHP_OPCACHE_JIT" = "yes" -a "$PHP_OPCACHE_JIT_IR" = "no" ; then AC_DEFINE(HAVE_JIT, 1, [Define to enable JIT]) ZEND_JIT_SRC="jit/zend_jit.c jit/zend_jit_gdb.c jit/zend_jit_vm_helpers.c" @@ -86,6 +93,62 @@ if test "$PHP_OPCACHE" != "no"; then PHP_SUBST(DASM_FLAGS) PHP_SUBST(DASM_ARCH) + + JIT_CFLAGS= + + elif test "$PHP_OPCACHE_JIT" = "yes" -a "$PHP_OPCACHE_JIT_IR" = "yes"; then + AC_DEFINE(HAVE_JIT, 1, [Define to enable JIT]) + AC_DEFINE(ZEND_JIT_IR, 1, [Use JIT IR framework]) + ZEND_JIT_SRC="jit/zend_jit.c jit/zend_jit_vm_helpers.c jit/ir/ir.c jit/ir/ir_strtab.c \ + jit/ir/ir_cfg.c jit/ir/ir_sccp.c jit/ir/ir_gcm.c jit/ir/ir_ra.c jit/ir/ir_save.c \ + jit/ir/ir_dump.c jit/ir/ir_gdb.c jit/ir/ir_perf.c jit/ir/ir_check.c \ + jit/ir/ir_patch.c jit/ir/ir_emit.c" + + dnl Find out which ABI we are using. + case $host_alias in + x86_64-*-darwin*) + IR_TARGET=IR_TARGET_X64 + DASM_FLAGS="-D X64APPLE=1 -D X64=1" + DASM_ARCH="x86" + ;; + x86_64*) + IR_TARGET=IR_TARGET_X64 + DASM_FLAGS="-D X64=1" + DASM_ARCH="x86" + ;; + i[[34567]]86*) + IR_TARGET=IR_TARGET_X86 + DASM_ARCH="x86" + ;; + x86*) + IR_TARGET=IR_TARGET_X86 + DASM_ARCH="x86" + ;; + aarch64*) + IR_TARGET=IR_TARGET_AARCH64 + DASM_ARCH="aarch64" + ;; + esac + + AS_IF([test x"$with_capstone" = "xyes"],[ + PKG_CHECK_MODULES([CAPSTONE],[capstone >= 3.0.0],[ + AC_DEFINE([HAVE_CAPSTONE], [1], [Capstone is available]) + PHP_EVAL_LIBLINE($CAPSTONE_LIBS, OPCACHE_SHARED_LIBADD) + PHP_EVAL_INCLINE($CAPSTONE_CFLAGS) + ZEND_JIT_SRC+=" jit/ir/ir_disasm.c" + ],[ + AC_MSG_ERROR([capstone >= 3.0 required but not found]) + ]) + ]) + + PHP_SUBST(IR_TARGET) + PHP_SUBST(DASM_FLAGS) + PHP_SUBST(DASM_ARCH) + + JIT_CFLAGS="-I@ext_builddir@/jit/ir -D${IR_TARGET} -DIR_PHP" + if test "$ZEND_DEBUG" = "yes"; then + JIT_CFLAGS="${JIT_CFLAGS} -DIR_DEBUG" + fi fi AC_CHECK_FUNCS([mprotect memfd_create shm_create_largepage]) @@ -310,7 +373,7 @@ int main(void) { shared_alloc_mmap.c \ shared_alloc_posix.c \ $ZEND_JIT_SRC, - shared,,"-Wno-implicit-fallthrough -DZEND_ENABLE_STATIC_TSRMLS_CACHE=1",,yes) + shared,,"-Wno-implicit-fallthrough -DZEND_ENABLE_STATIC_TSRMLS_CACHE=1 ${JIT_CFLAGS}",,yes) PHP_ADD_EXTENSION_DEP(opcache, pcre) @@ -320,6 +383,9 @@ int main(void) { if test "$PHP_OPCACHE_JIT" = "yes"; then PHP_ADD_BUILD_DIR([$ext_builddir/jit], 1) + if test "$PHP_OPCACHE_JIT_IR" = "yes"; then + PHP_ADD_BUILD_DIR([$ext_builddir/jit/ir], 1) + fi PHP_ADD_MAKEFILE_FRAGMENT($ext_srcdir/jit/Makefile.frag) fi PHP_SUBST(OPCACHE_SHARED_LIBADD) diff --git a/ext/opcache/config.w32 b/ext/opcache/config.w32 index 764a2edaab146..da60492b59f7e 100644 --- a/ext/opcache/config.w32 +++ b/ext/opcache/config.w32 @@ -5,6 +5,8 @@ if (PHP_OPCACHE != "no") { ARG_ENABLE("opcache-jit", "whether to enable JIT", "yes"); + ARG_ENABLE("opcache-jit-ir", "whether to enable JIT based on IR framework", "yes"); + ZEND_EXTENSION('opcache', "\ ZendAccelerator.c \ zend_accelerator_blacklist.c \ @@ -18,7 +20,7 @@ if (PHP_OPCACHE != "no") { zend_shared_alloc.c \ shared_alloc_win32.c", true, "/DZEND_ENABLE_STATIC_TSRMLS_CACHE=1"); - if (PHP_OPCACHE_JIT == "yes") { + if (PHP_OPCACHE_JIT == "yes" && PHP_OPCACHE_JIT_IR == "no") { if (CHECK_HEADER_ADD_INCLUDE("dynasm/dasm_x86.h", "CFLAGS_OPCACHE", PHP_OPCACHE + ";ext\\opcache\\jit")) { var dasm_flags = (X64 ? "-D X64=1" : "") + (X64 ? " -D X64WIN=1" : "") + " -D WIN=1"; if (PHP_ZTS == "yes") { @@ -37,6 +39,45 @@ if (PHP_OPCACHE != "no") { } else { WARNING("JIT not enabled, headers not found"); } + } else if (PHP_OPCACHE_JIT == "yes" && PHP_OPCACHE_JIT_IR == "yes") { + if (CHECK_HEADER_ADD_INCLUDE("ir/ir.h", "CFLAGS_OPCACHE", PHP_OPCACHE + ";ext\\opcache\\jit")) { + var dasm_flags = (X64 ? "-D X64=1" : "") + (X64 ? " -D X64WIN=1" : "") + " -D WIN=1"; + var ir_target = (X64 ? "IR_TARGET_X64" : "IR_TARGET_X86"); + var ir_src = "ir_strtab.c ir_cfg.c ir_sccp.c ir_gcm.c ir_ra.c ir_save.c \ + ir_dump.c ir_check.c ir_patch.c"; + + DEFINE("IR_TARGET", ir_target); + DEFINE("DASM_FLAGS", dasm_flags); + DEFINE("DASM_ARCH", "x86"); + + AC_DEFINE('HAVE_JIT', 1, 'Define to enable JIT'); + AC_DEFINE('ZEND_JIT_IR', 1, 'Use JIT IR framework'); + + ADD_FLAG("CFLAGS_OPCACHE", "/I \"ext\\opcache\\jit\\ir\" /D "+ir_target+" /D IR_PHP"); + if (PHP_DEBUG == "yes") { + ADD_FLAG("CFLAGS_OPCACHE", "/D IR_DEBUG"); + } + + if (CHECK_HEADER_ADD_INCLUDE("capstone\\capstone.h", "CFLAGS_OPCACHE", PHP_OPCACHE+ ";" + PHP_PHP_BUILD + "\\include") && + CHECK_LIB("capstone.lib", "opcache", PHP_OPCACHE)) { + AC_DEFINE('HAVE_CAPSTONE', 1, 'capstone support enabled'); + ir_src += " ir_disasm.c"; + } + + ADD_MAKEFILE_FRAGMENT(configure_module_dirname + "\\jit\\Makefile.frag.w32"); + + ADD_SOURCES(configure_module_dirname + "\\jit", + "zend_jit.c zend_jit_vm_helpers.c", + "opcache", "ext\\opcache\\jit"); + ADD_SOURCES(configure_module_dirname + "\\jit\\ir", + "ir.c", "opcache", "ext\\opcache\\jit\\ir"); + ADD_SOURCES(configure_module_dirname + "\\jit\\ir", + "ir_emit.c", "opcache", "ext\\opcache\\jit\\ir"); + ADD_SOURCES(configure_module_dirname + "\\jit\\ir", + ir_src, "opcache", "ext\\opcache\\jit\\ir"); + } else { + WARNING("JIT not enabled, headers not found"); + } } ADD_FLAG('CFLAGS_OPCACHE', "/I " + configure_module_dirname); diff --git a/ext/opcache/jit/Makefile.frag b/ext/opcache/jit/Makefile.frag index f9ae2e0cf4b99..07a826764ed72 100644 --- a/ext/opcache/jit/Makefile.frag +++ b/ext/opcache/jit/Makefile.frag @@ -1,4 +1,29 @@ +ifdef IR_TARGET +# New IR based JIT +$(builddir)/jit/ir/minilua: $(srcdir)/jit/ir/dynasm/minilua.c + $(BUILD_CC) $(srcdir)/jit/ir/dynasm/minilua.c -lm -o $@ +$(builddir)/jit/ir/ir_emit_$(DASM_ARCH).h: $(srcdir)/jit/ir/ir_$(DASM_ARCH).dasc $(srcdir)/jit/ir/dynasm/*.lua $(builddir)/jit/ir/minilua + $(builddir)/jit/ir/minilua $(srcdir)/jit/ir/dynasm/dynasm.lua $(DASM_FLAGS) -o $@ $(srcdir)/jit/ir/ir_$(DASM_ARCH).dasc + +$(builddir)/jit/ir/ir_emit.lo: \ + $(srcdir)/jit/ir/ir_emit.c $(builddir)/jit/ir/ir_emit_$(DASM_ARCH).h + +$(builddir)/jit/ir/gen_ir_fold_hash: $(srcdir)/jit/ir/gen_ir_fold_hash.c $(srcdir)/jit/ir/ir_strtab.c + $(BUILD_CC) -D${IR_TARGET} -DIR_PHP -DIR_PHP_MM=0 -o $@ $< + +$(builddir)/jit/ir/ir_fold_hash.h: $(builddir)/jit/ir/gen_ir_fold_hash $(srcdir)/jit/ir/ir_fold.h $(srcdir)/jit/ir/ir.h + $(builddir)/jit/ir/gen_ir_fold_hash < $(srcdir)/jit/ir/ir_fold.h > $(builddir)/jit/ir/ir_fold_hash.h + +$(builddir)/jit/ir/ir.lo: \ + $(builddir)/jit/ir/ir_fold_hash.h + +$(builddir)/jit/zend_jit.lo: \ + $(srcdir)/jit/zend_jit_helpers.c \ + $(srcdir)/jit/zend_jit_ir.c + +else +# Old DynAsm based JIT $(builddir)/minilua: $(srcdir)/jit/dynasm/minilua.c $(BUILD_CC) $(srcdir)/jit/dynasm/minilua.c -lm -o $@ @@ -15,6 +40,8 @@ $(builddir)/jit/zend_jit.lo: \ $(srcdir)/jit/zend_jit_trace.c \ $(srcdir)/jit/zend_elf.c +endif + # For non-GNU make, jit/zend_jit.lo and ./jit/zend_jit.lo are considered distinct targets. # Use this workaround to allow building from inside ext/opcache. jit/zend_jit.lo: $(builddir)/jit/zend_jit.lo diff --git a/ext/opcache/jit/Makefile.frag.w32 b/ext/opcache/jit/Makefile.frag.w32 index a9533e98edcea..f2c1995fc79ea 100644 --- a/ext/opcache/jit/Makefile.frag.w32 +++ b/ext/opcache/jit/Makefile.frag.w32 @@ -1,3 +1,48 @@ +!if "$(IR_TARGET)" != "" +# New IR based JIT + +$(BUILD_DIR)\\minilua.exe: ext\opcache\jit\ir\dynasm\minilua.c + @if exist $(BUILD_DIR)\\minilua.exe del $(BUILD_DIR)\\minilua.exe + $(PHP_CL) /Fo$(BUILD_DIR)\ /Fd$(BUILD_DIR)\ /Fp$(BUILD_DIR)\ /FR$(BUILD_DIR) /Fe$(BUILD_DIR)\minilua.exe ext\opcache\jit\ir\dynasm\minilua.c + +ext\opcache\jit\ir\ir_emit_x86.h: ext\opcache\jit\ir\ir_x86.dasc $(BUILD_DIR)\\minilua.exe + @if exist ext\opcache\jit\ir\ir_emit_x86.h del ext\opcache\jit\ir\ir_emit_x86.h + $(BUILD_DIR)\\minilua.exe ext/opcache/jit/ir/dynasm/dynasm.lua $(DASM_FLAGS) -o $@ ext/opcache/jit/ir/ir_x86.dasc + +$(BUILD_DIR)\\gen_ir_fold_hash.exe: ext\opcache\jit\ir\gen_ir_fold_hash.c ext\opcache\jit\ir\ir_strtab.c + @if exist $(BUILD_DIR)\\gen_ir_fold_hash.exe del $(BUILD_DIR)\\gen_ir_fold_hash.exe + $(PHP_CL) /D $(IR_TARGET) /Fo$(BUILD_DIR)\ /Fd$(BUILD_DIR)\ /Fp$(BUILD_DIR)\ /Fe$(BUILD_DIR)\\gen_ir_fold_hash.exe ext\opcache\jit\ir\gen_ir_fold_hash.c + +ext\opcache\jit\ir\ir_fold_hash.h: $(BUILD_DIR)\\gen_ir_fold_hash.exe ext\opcache\jit\ir\ir_fold.h ext\opcache\jit\ir\ir.h + @if exist ext\opcache\jit\ir\ir_fold_hash.h del ext\opcache\jit\ir\ir_fold_hash.h + $(BUILD_DIR)\\gen_ir_fold_hash.exe < ext\opcache\jit\ir\ir_fold.h > ext\opcache\jit\ir\ir_fold_hash.h + +$(BUILD_DIR)\ext\opcache\jit\ir\ir_ra.obj: \ + ext\opcache\jit\ir\ir.h \ + ext\opcache\jit\ir\ir_private.h \ + ext\opcache\jit\ir\ir_x86.h + +$(BUILD_DIR)\ext\opcache\jit\ir\ir_emit.obj: \ + ext\opcache\jit\ir\ir.h \ + ext\opcache\jit\ir\ir_private.h \ + ext\opcache\jit\ir\ir_x86.h \ + ext\opcache\jit\ir\ir_emit_x86.h + +$(BUILD_DIR)\ext\opcache\jit\ir\ir.obj: \ + ext\opcache\jit\ir\ir.h \ + ext\opcache\jit\ir\ir_private.h \ + ext\opcache\jit\ir\ir_fold.h \ + ext\opcache\jit\ir\ir_fold_hash.h + +$(BUILD_DIR)\ext\opcache\jit\zend_jit.obj: \ + ext\opcache\jit\zend_jit_ir.c \ + ext\opcache\jit\zend_jit_helpers.c \ + ext\opcache\jit\ir\ir.h \ + ext\opcache\jit\ir\ir_builder.h + +!else +# Old DynAsm based JIT + $(BUILD_DIR)\\minilua.exe: ext\opcache\jit\dynasm\minilua.c @if exist $(BUILD_DIR)\\minilua.exe del $(BUILD_DIR)\\minilua.exe $(PHP_CL) /Fo$(BUILD_DIR)\ /Fd$(BUILD_DIR)\ /Fp$(BUILD_DIR)\ /FR$(BUILD_DIR) /Fe$(BUILD_DIR)\minilua.exe ext\opcache\jit\dynasm\minilua.c @@ -14,3 +59,4 @@ $(BUILD_DIR)\ext\opcache\jit\zend_jit.obj: \ ext/opcache/jit/zend_jit_perf_dump.c \ ext/opcache/jit/zend_jit_trace.c \ ext/opcache/jit/zend_jit_vtune.c +!endif diff --git a/ext/opcache/jit/README-IR.md b/ext/opcache/jit/README-IR.md new file mode 100644 index 0000000000000..b547792e493d8 --- /dev/null +++ b/ext/opcache/jit/README-IR.md @@ -0,0 +1,32 @@ +New JIT implementation +====================== + +This branch provides a new JIT implementation based on [IR - Lightweight +JIT Compilation Framework](https://github.com/dstogov/ir). + +As opposed to the PHP 8.* JIT approach that generates native code directly from +PHP byte-code, this implementation generates intermediate representation (IR) +and delegates all lower-level tasks to the IR Framework. IR for JIT is like an +AST for compiler. + +Key benefits of the new JIT implementation: +- Usage of IR opens possibilities for better optimization and register + allocation (the resulting native code is more efficient) +- PHP doesn't have to care about most low-level details (different CPUs, + calling conventions, TLS details, etc) +- it's much easier to implement support for new targets (e.g. RISCV) +- IR framework is going to be developed separately from PHP and may accept + contributions from other projects (new optimizations, improvements, bug fixes) + +Disadvantages: +- JIT compilation becomes slower (this is almost invisible for tracing + JIT, but function JIT compilation of Wordpress becomes 4 times slower) + +The necessary part of the IR Framework is embedded into php-src. So, the PR +doesn't introduce new dependencies. + +The new JIT implementation successfully passes all CI workflows, but it's still +not mature and may cause failures. To reduce risks, this patch doesn't remove +the old JIT implementation (that is the same as PHP-8.3 JIT). It's possible +to build PHP with the old JIT by configuring with **--disable-opcache-jit-ir**. +In the future the old implementation should be removed. diff --git a/ext/opcache/jit/ir/.gitignore b/ext/opcache/jit/ir/.gitignore new file mode 100644 index 0000000000000..7a37a4fd05933 --- /dev/null +++ b/ext/opcache/jit/ir/.gitignore @@ -0,0 +1,22 @@ +*.o +*.log +*.dot +*.pdf +ir_fold_hash.h +ir_emit_x86.h +ir_emit_aarch64.h +minilua +gen_ir_fold_hash +ir_test +tester +ir +b.c + +tests/**/*.diff +tests/**/*.exp +tests/**/*.ir +tests/**/*.out +tests/**/*.log + +win32/vcpkg +win32/build_* diff --git a/ext/opcache/jit/ir/LICENSE b/ext/opcache/jit/ir/LICENSE new file mode 100644 index 0000000000000..c43a12a770f8f --- /dev/null +++ b/ext/opcache/jit/ir/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2022 Zend by Perforce + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/ext/opcache/jit/ir/README b/ext/opcache/jit/ir/README new file mode 100644 index 0000000000000..68288d21bfc02 --- /dev/null +++ b/ext/opcache/jit/ir/README @@ -0,0 +1,2 @@ +This directory contains an embeded version of IR Framework. +See the full version at https://github.com/dstogov/ir diff --git a/ext/opcache/jit/ir/dynasm/dasm_arm.h b/ext/opcache/jit/ir/dynasm/dasm_arm.h new file mode 100644 index 0000000000000..ebcf4ac0ec13f --- /dev/null +++ b/ext/opcache/jit/ir/dynasm/dasm_arm.h @@ -0,0 +1,461 @@ +/* +** DynASM ARM encoding engine. +** Copyright (C) 2005-2021 Mike Pall. All rights reserved. +** Released under the MIT license. See dynasm.lua for full copyright notice. +*/ + +#include +#include +#include +#include + +#define DASM_ARCH "arm" + +#ifndef DASM_EXTERN +#define DASM_EXTERN(a,b,c,d) 0 +#endif + +/* Action definitions. */ +enum { + DASM_STOP, DASM_SECTION, DASM_ESC, DASM_REL_EXT, + /* The following actions need a buffer position. */ + DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG, + /* The following actions also have an argument. */ + DASM_REL_PC, DASM_LABEL_PC, + DASM_IMM, DASM_IMM12, DASM_IMM16, DASM_IMML8, DASM_IMML12, DASM_IMMV8, + DASM__MAX +}; + +/* Maximum number of section buffer positions for a single dasm_put() call. */ +#define DASM_MAXSECPOS 25 + +/* DynASM encoder status codes. Action list offset or number are or'ed in. */ +#define DASM_S_OK 0x00000000 +#define DASM_S_NOMEM 0x01000000 +#define DASM_S_PHASE 0x02000000 +#define DASM_S_MATCH_SEC 0x03000000 +#define DASM_S_RANGE_I 0x11000000 +#define DASM_S_RANGE_SEC 0x12000000 +#define DASM_S_RANGE_LG 0x13000000 +#define DASM_S_RANGE_PC 0x14000000 +#define DASM_S_RANGE_REL 0x15000000 +#define DASM_S_UNDEF_LG 0x21000000 +#define DASM_S_UNDEF_PC 0x22000000 + +/* Macros to convert positions (8 bit section + 24 bit index). */ +#define DASM_POS2IDX(pos) ((pos)&0x00ffffff) +#define DASM_POS2BIAS(pos) ((pos)&0xff000000) +#define DASM_SEC2POS(sec) ((sec)<<24) +#define DASM_POS2SEC(pos) ((pos)>>24) +#define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos)) + +/* Action list type. */ +typedef const unsigned int *dasm_ActList; + +/* Per-section structure. */ +typedef struct dasm_Section { + int *rbuf; /* Biased buffer pointer (negative section bias). */ + int *buf; /* True buffer pointer. */ + size_t bsize; /* Buffer size in bytes. */ + int pos; /* Biased buffer position. */ + int epos; /* End of biased buffer position - max single put. */ + int ofs; /* Byte offset into section. */ +} dasm_Section; + +/* Core structure holding the DynASM encoding state. */ +struct dasm_State { + size_t psize; /* Allocated size of this structure. */ + dasm_ActList actionlist; /* Current actionlist pointer. */ + int *lglabels; /* Local/global chain/pos ptrs. */ + size_t lgsize; + int *pclabels; /* PC label chains/pos ptrs. */ + size_t pcsize; + void **globals; /* Array of globals (bias -10). */ + dasm_Section *section; /* Pointer to active section. */ + size_t codesize; /* Total size of all code sections. */ + int maxsection; /* 0 <= sectionidx < maxsection. */ + int status; /* Status code. */ + dasm_Section sections[1]; /* All sections. Alloc-extended. */ +}; + +/* The size of the core structure depends on the max. number of sections. */ +#define DASM_PSZ(ms) (sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section)) + + +/* Initialize DynASM state. */ +void dasm_init(Dst_DECL, int maxsection) +{ + dasm_State *D; + size_t psz = 0; + int i; + Dst_REF = NULL; + DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection)); + D = Dst_REF; + D->psize = psz; + D->lglabels = NULL; + D->lgsize = 0; + D->pclabels = NULL; + D->pcsize = 0; + D->globals = NULL; + D->maxsection = maxsection; + for (i = 0; i < maxsection; i++) { + D->sections[i].buf = NULL; /* Need this for pass3. */ + D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i); + D->sections[i].bsize = 0; + D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */ + } +} + +/* Free DynASM state. */ +void dasm_free(Dst_DECL) +{ + dasm_State *D = Dst_REF; + int i; + for (i = 0; i < D->maxsection; i++) + if (D->sections[i].buf) + DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize); + if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize); + if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize); + DASM_M_FREE(Dst, D, D->psize); +} + +/* Setup global label array. Must be called before dasm_setup(). */ +void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl) +{ + dasm_State *D = Dst_REF; + D->globals = gl - 10; /* Negative bias to compensate for locals. */ + DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int)); +} + +/* Grow PC label array. Can be called after dasm_setup(), too. */ +void dasm_growpc(Dst_DECL, unsigned int maxpc) +{ + dasm_State *D = Dst_REF; + size_t osz = D->pcsize; + DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int)); + memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz); +} + +/* Setup encoder. */ +void dasm_setup(Dst_DECL, const void *actionlist) +{ + dasm_State *D = Dst_REF; + int i; + D->actionlist = (dasm_ActList)actionlist; + D->status = DASM_S_OK; + D->section = &D->sections[0]; + memset((void *)D->lglabels, 0, D->lgsize); + if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize); + for (i = 0; i < D->maxsection; i++) { + D->sections[i].pos = DASM_SEC2POS(i); + D->sections[i].ofs = 0; + } +} + + +#ifdef DASM_CHECKS +#define CK(x, st) \ + do { if (!(x)) { \ + D->status = DASM_S_##st|(p-D->actionlist-1); return; } } while (0) +#define CKPL(kind, st) \ + do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \ + D->status = DASM_S_RANGE_##st|(p-D->actionlist-1); return; } } while (0) +#else +#define CK(x, st) ((void)0) +#define CKPL(kind, st) ((void)0) +#endif + +static int dasm_imm12(unsigned int n) +{ + int i; + for (i = 0; i < 16; i++, n = (n << 2) | (n >> 30)) + if (n <= 255) return (int)(n + (i << 8)); + return -1; +} + +/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */ +void dasm_put(Dst_DECL, int start, ...) +{ + va_list ap; + dasm_State *D = Dst_REF; + dasm_ActList p = D->actionlist + start; + dasm_Section *sec = D->section; + int pos = sec->pos, ofs = sec->ofs; + int *b; + + if (pos >= sec->epos) { + DASM_M_GROW(Dst, int, sec->buf, sec->bsize, + sec->bsize + 2*DASM_MAXSECPOS*sizeof(int)); + sec->rbuf = sec->buf - DASM_POS2BIAS(pos); + sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos); + } + + b = sec->rbuf; + b[pos++] = start; + + va_start(ap, start); + while (1) { + unsigned int ins = *p++; + unsigned int action = (ins >> 16); + if (action >= DASM__MAX) { + ofs += 4; + } else { + int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0; + switch (action) { + case DASM_STOP: goto stop; + case DASM_SECTION: + n = (ins & 255); CK(n < D->maxsection, RANGE_SEC); + D->section = &D->sections[n]; goto stop; + case DASM_ESC: p++; ofs += 4; break; + case DASM_REL_EXT: break; + case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break; + case DASM_REL_LG: + n = (ins & 2047) - 10; pl = D->lglabels + n; + /* Bkwd rel or global. */ + if (n >= 0) { CK(n>=10||*pl<0, RANGE_LG); CKPL(lg, LG); goto putrel; } + pl += 10; n = *pl; + if (n < 0) n = 0; /* Start new chain for fwd rel if label exists. */ + goto linkrel; + case DASM_REL_PC: + pl = D->pclabels + n; CKPL(pc, PC); + putrel: + n = *pl; + if (n < 0) { /* Label exists. Get label pos and store it. */ + b[pos] = -n; + } else { + linkrel: + b[pos] = n; /* Else link to rel chain, anchored at label. */ + *pl = pos; + } + pos++; + break; + case DASM_LABEL_LG: + pl = D->lglabels + (ins & 2047) - 10; CKPL(lg, LG); goto putlabel; + case DASM_LABEL_PC: + pl = D->pclabels + n; CKPL(pc, PC); + putlabel: + n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */ + while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos; + } + *pl = -pos; /* Label exists now. */ + b[pos++] = ofs; /* Store pass1 offset estimate. */ + break; + case DASM_IMM: + case DASM_IMM16: +#ifdef DASM_CHECKS + CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I); + if ((ins & 0x8000)) + CK(((n + (1<<(((ins>>5)&31)-1)))>>((ins>>5)&31)) == 0, RANGE_I); + else + CK((n>>((ins>>5)&31)) == 0, RANGE_I); +#endif + b[pos++] = n; + break; + case DASM_IMMV8: + CK((n & 3) == 0, RANGE_I); + n >>= 2; + /* fallthrough */ + case DASM_IMML8: + case DASM_IMML12: + CK(n >= 0 ? ((n>>((ins>>5)&31)) == 0) : + (((-n)>>((ins>>5)&31)) == 0), RANGE_I); + b[pos++] = n; + break; + case DASM_IMM12: + CK(dasm_imm12((unsigned int)n) != -1, RANGE_I); + b[pos++] = n; + break; + } + } + } +stop: + va_end(ap); + sec->pos = pos; + sec->ofs = ofs; +} +#undef CK + +/* Pass 2: Link sections, shrink aligns, fix label offsets. */ +int dasm_link(Dst_DECL, size_t *szp) +{ + dasm_State *D = Dst_REF; + int secnum; + int ofs = 0; + +#ifdef DASM_CHECKS + *szp = 0; + if (D->status != DASM_S_OK) return D->status; + { + int pc; + for (pc = 0; pc*sizeof(int) < D->pcsize; pc++) + if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc; + } +#endif + + { /* Handle globals not defined in this translation unit. */ + int idx; + for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) { + int n = D->lglabels[idx]; + /* Undefined label: Collapse rel chain and replace with marker (< 0). */ + while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; } + } + } + + /* Combine all code sections. No support for data sections (yet). */ + for (secnum = 0; secnum < D->maxsection; secnum++) { + dasm_Section *sec = D->sections + secnum; + int *b = sec->rbuf; + int pos = DASM_SEC2POS(secnum); + int lastpos = sec->pos; + + while (pos != lastpos) { + dasm_ActList p = D->actionlist + b[pos++]; + while (1) { + unsigned int ins = *p++; + unsigned int action = (ins >> 16); + switch (action) { + case DASM_STOP: case DASM_SECTION: goto stop; + case DASM_ESC: p++; break; + case DASM_REL_EXT: break; + case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break; + case DASM_REL_LG: case DASM_REL_PC: pos++; break; + case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break; + case DASM_IMM: case DASM_IMM12: case DASM_IMM16: + case DASM_IMML8: case DASM_IMML12: case DASM_IMMV8: pos++; break; + } + } + stop: (void)0; + } + ofs += sec->ofs; /* Next section starts right after current section. */ + } + + D->codesize = ofs; /* Total size of all code sections */ + *szp = ofs; + return DASM_S_OK; +} + +#ifdef DASM_CHECKS +#define CK(x, st) \ + do { if (!(x)) return DASM_S_##st|(p-D->actionlist-1); } while (0) +#else +#define CK(x, st) ((void)0) +#endif + +/* Pass 3: Encode sections. */ +int dasm_encode(Dst_DECL, void *buffer) +{ + dasm_State *D = Dst_REF; + char *base = (char *)buffer; + unsigned int *cp = (unsigned int *)buffer; + int secnum; + + /* Encode all code sections. No support for data sections (yet). */ + for (secnum = 0; secnum < D->maxsection; secnum++) { + dasm_Section *sec = D->sections + secnum; + int *b = sec->buf; + int *endb = sec->rbuf + sec->pos; + + while (b != endb) { + dasm_ActList p = D->actionlist + *b++; + while (1) { + unsigned int ins = *p++; + unsigned int action = (ins >> 16); + int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0; + switch (action) { + case DASM_STOP: case DASM_SECTION: goto stop; + case DASM_ESC: *cp++ = *p++; break; + case DASM_REL_EXT: + n = DASM_EXTERN(Dst, (unsigned char *)cp, (ins&2047), !(ins&2048)); + goto patchrel; + case DASM_ALIGN: + ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0xe1a00000; + break; + case DASM_REL_LG: + if (n < 0) { + n = (int)((ptrdiff_t)D->globals[-n] - (ptrdiff_t)cp - 4); + goto patchrel; + } + /* fallthrough */ + case DASM_REL_PC: + CK(n >= 0, UNDEF_PC); + n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base) - 4; + patchrel: + if ((ins & 0x800) == 0) { + CK((n & 3) == 0 && ((n+0x02000000) >> 26) == 0, RANGE_REL); + cp[-1] |= ((n >> 2) & 0x00ffffff); + } else if ((ins & 0x1000)) { + CK((n & 3) == 0 && -256 <= n && n <= 256, RANGE_REL); + goto patchimml8; + } else if ((ins & 0x2000) == 0) { + CK((n & 3) == 0 && -4096 <= n && n <= 4096, RANGE_REL); + goto patchimml; + } else { + CK((n & 3) == 0 && -1020 <= n && n <= 1020, RANGE_REL); + n >>= 2; + goto patchimml; + } + break; + case DASM_LABEL_LG: + ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n); + break; + case DASM_LABEL_PC: break; + case DASM_IMM: + cp[-1] |= ((n>>((ins>>10)&31)) & ((1<<((ins>>5)&31))-1)) << (ins&31); + break; + case DASM_IMM12: + cp[-1] |= dasm_imm12((unsigned int)n); + break; + case DASM_IMM16: + cp[-1] |= ((n & 0xf000) << 4) | (n & 0x0fff); + break; + case DASM_IMML8: patchimml8: + cp[-1] |= n >= 0 ? (0x00800000 | (n & 0x0f) | ((n & 0xf0) << 4)) : + ((-n & 0x0f) | ((-n & 0xf0) << 4)); + break; + case DASM_IMML12: case DASM_IMMV8: patchimml: + cp[-1] |= n >= 0 ? (0x00800000 | n) : (-n); + break; + default: *cp++ = ins; break; + } + } + stop: (void)0; + } + } + + if (base + D->codesize != (char *)cp) /* Check for phase errors. */ + return DASM_S_PHASE; + return DASM_S_OK; +} +#undef CK + +/* Get PC label offset. */ +int dasm_getpclabel(Dst_DECL, unsigned int pc) +{ + dasm_State *D = Dst_REF; + if (pc*sizeof(int) < D->pcsize) { + int pos = D->pclabels[pc]; + if (pos < 0) return *DASM_POS2PTR(D, -pos); + if (pos > 0) return -1; /* Undefined. */ + } + return -2; /* Unused or out of range. */ +} + +#ifdef DASM_CHECKS +/* Optional sanity checker to call between isolated encoding steps. */ +int dasm_checkstep(Dst_DECL, int secmatch) +{ + dasm_State *D = Dst_REF; + if (D->status == DASM_S_OK) { + int i; + for (i = 1; i <= 9; i++) { + if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_LG|i; break; } + D->lglabels[i] = 0; + } + } + if (D->status == DASM_S_OK && secmatch >= 0 && + D->section != &D->sections[secmatch]) + D->status = DASM_S_MATCH_SEC|(D->section-D->sections); + return D->status; +} +#endif + diff --git a/ext/opcache/jit/ir/dynasm/dasm_arm.lua b/ext/opcache/jit/ir/dynasm/dasm_arm.lua new file mode 100644 index 0000000000000..0c775ae2687db --- /dev/null +++ b/ext/opcache/jit/ir/dynasm/dasm_arm.lua @@ -0,0 +1,1125 @@ +------------------------------------------------------------------------------ +-- DynASM ARM module. +-- +-- Copyright (C) 2005-2021 Mike Pall. All rights reserved. +-- See dynasm.lua for full copyright notice. +------------------------------------------------------------------------------ + +-- Module information: +local _info = { + arch = "arm", + description = "DynASM ARM module", + version = "1.5.0", + vernum = 10500, + release = "2021-05-02", + author = "Mike Pall", + license = "MIT", +} + +-- Exported glue functions for the arch-specific module. +local _M = { _info = _info } + +-- Cache library functions. +local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs +local assert, setmetatable, rawget = assert, setmetatable, rawget +local _s = string +local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char +local match, gmatch, gsub = _s.match, _s.gmatch, _s.gsub +local concat, sort, insert = table.concat, table.sort, table.insert +local bit = bit or require("bit") +local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift +local ror, tohex = bit.ror, bit.tohex + +-- Inherited tables and callbacks. +local g_opt, g_arch +local wline, werror, wfatal, wwarn + +-- Action name list. +-- CHECK: Keep this in sync with the C code! +local action_names = { + "STOP", "SECTION", "ESC", "REL_EXT", + "ALIGN", "REL_LG", "LABEL_LG", + "REL_PC", "LABEL_PC", "IMM", "IMM12", "IMM16", "IMML8", "IMML12", "IMMV8", +} + +-- Maximum number of section buffer positions for dasm_put(). +-- CHECK: Keep this in sync with the C code! +local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines. + +-- Action name -> action number. +local map_action = {} +for n,name in ipairs(action_names) do + map_action[name] = n-1 +end + +-- Action list buffer. +local actlist = {} + +-- Argument list for next dasm_put(). Start with offset 0 into action list. +local actargs = { 0 } + +-- Current number of section buffer positions for dasm_put(). +local secpos = 1 + +------------------------------------------------------------------------------ + +-- Dump action names and numbers. +local function dumpactions(out) + out:write("DynASM encoding engine action codes:\n") + for n,name in ipairs(action_names) do + local num = map_action[name] + out:write(format(" %-10s %02X %d\n", name, num, num)) + end + out:write("\n") +end + +-- Write action list buffer as a huge static C array. +local function writeactions(out, name) + local nn = #actlist + if nn == 0 then nn = 1; actlist[0] = map_action.STOP end + out:write("static const unsigned int ", name, "[", nn, "] = {\n") + for i = 1,nn-1 do + assert(out:write("0x", tohex(actlist[i]), ",\n")) + end + assert(out:write("0x", tohex(actlist[nn]), "\n};\n\n")) +end + +------------------------------------------------------------------------------ + +-- Add word to action list. +local function wputxw(n) + assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range") + actlist[#actlist+1] = n +end + +-- Add action to list with optional arg. Advance buffer pos, too. +local function waction(action, val, a, num) + local w = assert(map_action[action], "bad action name `"..action.."'") + wputxw(w * 0x10000 + (val or 0)) + if a then actargs[#actargs+1] = a end + if a or num then secpos = secpos + (num or 1) end +end + +-- Flush action list (intervening C code or buffer pos overflow). +local function wflush(term) + if #actlist == actargs[1] then return end -- Nothing to flush. + if not term then waction("STOP") end -- Terminate action list. + wline(format("dasm_put(Dst, %s);", concat(actargs, ", ")), true) + actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put(). + secpos = 1 -- The actionlist offset occupies a buffer position, too. +end + +-- Put escaped word. +local function wputw(n) + if n <= 0x000fffff then waction("ESC") end + wputxw(n) +end + +-- Reserve position for word. +local function wpos() + local pos = #actlist+1 + actlist[pos] = "" + return pos +end + +-- Store word to reserved position. +local function wputpos(pos, n) + assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range") + if n <= 0x000fffff then + insert(actlist, pos+1, n) + n = map_action.ESC * 0x10000 + end + actlist[pos] = n +end + +------------------------------------------------------------------------------ + +-- Global label name -> global label number. With auto assignment on 1st use. +local next_global = 20 +local map_global = setmetatable({}, { __index = function(t, name) + if not match(name, "^[%a_][%w_]*$") then werror("bad global label") end + local n = next_global + if n > 2047 then werror("too many global labels") end + next_global = n + 1 + t[name] = n + return n +end}) + +-- Dump global labels. +local function dumpglobals(out, lvl) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("Global labels:\n") + for i=20,next_global-1 do + out:write(format(" %s\n", t[i])) + end + out:write("\n") +end + +-- Write global label enum. +local function writeglobals(out, prefix) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("enum {\n") + for i=20,next_global-1 do + out:write(" ", prefix, t[i], ",\n") + end + out:write(" ", prefix, "_MAX\n};\n") +end + +-- Write global label names. +local function writeglobalnames(out, name) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("static const char *const ", name, "[] = {\n") + for i=20,next_global-1 do + out:write(" \"", t[i], "\",\n") + end + out:write(" (const char *)0\n};\n") +end + +------------------------------------------------------------------------------ + +-- Extern label name -> extern label number. With auto assignment on 1st use. +local next_extern = 0 +local map_extern_ = {} +local map_extern = setmetatable({}, { __index = function(t, name) + -- No restrictions on the name for now. + local n = next_extern + if n > 2047 then werror("too many extern labels") end + next_extern = n + 1 + t[name] = n + map_extern_[n] = name + return n +end}) + +-- Dump extern labels. +local function dumpexterns(out, lvl) + out:write("Extern labels:\n") + for i=0,next_extern-1 do + out:write(format(" %s\n", map_extern_[i])) + end + out:write("\n") +end + +-- Write extern label names. +local function writeexternnames(out, name) + out:write("static const char *const ", name, "[] = {\n") + for i=0,next_extern-1 do + out:write(" \"", map_extern_[i], "\",\n") + end + out:write(" (const char *)0\n};\n") +end + +------------------------------------------------------------------------------ + +-- Arch-specific maps. + +-- Ext. register name -> int. name. +local map_archdef = { sp = "r13", lr = "r14", pc = "r15", } + +-- Int. register name -> ext. name. +local map_reg_rev = { r13 = "sp", r14 = "lr", r15 = "pc", } + +local map_type = {} -- Type name -> { ctype, reg } +local ctypenum = 0 -- Type number (for Dt... macros). + +-- Reverse defines for registers. +function _M.revdef(s) + return map_reg_rev[s] or s +end + +local map_shift = { lsl = 0, lsr = 1, asr = 2, ror = 3, } + +local map_cond = { + eq = 0, ne = 1, cs = 2, cc = 3, mi = 4, pl = 5, vs = 6, vc = 7, + hi = 8, ls = 9, ge = 10, lt = 11, gt = 12, le = 13, al = 14, + hs = 2, lo = 3, +} + +------------------------------------------------------------------------------ + +-- Template strings for ARM instructions. +local map_op = { + -- Basic data processing instructions. + and_3 = "e0000000DNPs", + eor_3 = "e0200000DNPs", + sub_3 = "e0400000DNPs", + rsb_3 = "e0600000DNPs", + add_3 = "e0800000DNPs", + adc_3 = "e0a00000DNPs", + sbc_3 = "e0c00000DNPs", + rsc_3 = "e0e00000DNPs", + tst_2 = "e1100000NP", + teq_2 = "e1300000NP", + cmp_2 = "e1500000NP", + cmn_2 = "e1700000NP", + orr_3 = "e1800000DNPs", + mov_2 = "e1a00000DPs", + bic_3 = "e1c00000DNPs", + mvn_2 = "e1e00000DPs", + + and_4 = "e0000000DNMps", + eor_4 = "e0200000DNMps", + sub_4 = "e0400000DNMps", + rsb_4 = "e0600000DNMps", + add_4 = "e0800000DNMps", + adc_4 = "e0a00000DNMps", + sbc_4 = "e0c00000DNMps", + rsc_4 = "e0e00000DNMps", + tst_3 = "e1100000NMp", + teq_3 = "e1300000NMp", + cmp_3 = "e1500000NMp", + cmn_3 = "e1700000NMp", + orr_4 = "e1800000DNMps", + mov_3 = "e1a00000DMps", + bic_4 = "e1c00000DNMps", + mvn_3 = "e1e00000DMps", + + lsl_3 = "e1a00000DMws", + lsr_3 = "e1a00020DMws", + asr_3 = "e1a00040DMws", + ror_3 = "e1a00060DMws", + rrx_2 = "e1a00060DMs", + + -- Multiply and multiply-accumulate. + mul_3 = "e0000090NMSs", + mla_4 = "e0200090NMSDs", + umaal_4 = "e0400090DNMSs", -- v6 + mls_4 = "e0600090DNMSs", -- v6T2 + umull_4 = "e0800090DNMSs", + umlal_4 = "e0a00090DNMSs", + smull_4 = "e0c00090DNMSs", + smlal_4 = "e0e00090DNMSs", + + -- Halfword multiply and multiply-accumulate. + smlabb_4 = "e1000080NMSD", -- v5TE + smlatb_4 = "e10000a0NMSD", -- v5TE + smlabt_4 = "e10000c0NMSD", -- v5TE + smlatt_4 = "e10000e0NMSD", -- v5TE + smlawb_4 = "e1200080NMSD", -- v5TE + smulwb_3 = "e12000a0NMS", -- v5TE + smlawt_4 = "e12000c0NMSD", -- v5TE + smulwt_3 = "e12000e0NMS", -- v5TE + smlalbb_4 = "e1400080NMSD", -- v5TE + smlaltb_4 = "e14000a0NMSD", -- v5TE + smlalbt_4 = "e14000c0NMSD", -- v5TE + smlaltt_4 = "e14000e0NMSD", -- v5TE + smulbb_3 = "e1600080NMS", -- v5TE + smultb_3 = "e16000a0NMS", -- v5TE + smulbt_3 = "e16000c0NMS", -- v5TE + smultt_3 = "e16000e0NMS", -- v5TE + + -- Miscellaneous data processing instructions. + clz_2 = "e16f0f10DM", -- v5T + rev_2 = "e6bf0f30DM", -- v6 + rev16_2 = "e6bf0fb0DM", -- v6 + revsh_2 = "e6ff0fb0DM", -- v6 + sel_3 = "e6800fb0DNM", -- v6 + usad8_3 = "e780f010NMS", -- v6 + usada8_4 = "e7800010NMSD", -- v6 + rbit_2 = "e6ff0f30DM", -- v6T2 + movw_2 = "e3000000DW", -- v6T2 + movt_2 = "e3400000DW", -- v6T2 + -- Note: the X encodes width-1, not width. + sbfx_4 = "e7a00050DMvX", -- v6T2 + ubfx_4 = "e7e00050DMvX", -- v6T2 + -- Note: the X encodes the msb field, not the width. + bfc_3 = "e7c0001fDvX", -- v6T2 + bfi_4 = "e7c00010DMvX", -- v6T2 + + -- Packing and unpacking instructions. + pkhbt_3 = "e6800010DNM", pkhbt_4 = "e6800010DNMv", -- v6 + pkhtb_3 = "e6800050DNM", pkhtb_4 = "e6800050DNMv", -- v6 + sxtab_3 = "e6a00070DNM", sxtab_4 = "e6a00070DNMv", -- v6 + sxtab16_3 = "e6800070DNM", sxtab16_4 = "e6800070DNMv", -- v6 + sxtah_3 = "e6b00070DNM", sxtah_4 = "e6b00070DNMv", -- v6 + sxtb_2 = "e6af0070DM", sxtb_3 = "e6af0070DMv", -- v6 + sxtb16_2 = "e68f0070DM", sxtb16_3 = "e68f0070DMv", -- v6 + sxth_2 = "e6bf0070DM", sxth_3 = "e6bf0070DMv", -- v6 + uxtab_3 = "e6e00070DNM", uxtab_4 = "e6e00070DNMv", -- v6 + uxtab16_3 = "e6c00070DNM", uxtab16_4 = "e6c00070DNMv", -- v6 + uxtah_3 = "e6f00070DNM", uxtah_4 = "e6f00070DNMv", -- v6 + uxtb_2 = "e6ef0070DM", uxtb_3 = "e6ef0070DMv", -- v6 + uxtb16_2 = "e6cf0070DM", uxtb16_3 = "e6cf0070DMv", -- v6 + uxth_2 = "e6ff0070DM", uxth_3 = "e6ff0070DMv", -- v6 + + -- Saturating instructions. + qadd_3 = "e1000050DMN", -- v5TE + qsub_3 = "e1200050DMN", -- v5TE + qdadd_3 = "e1400050DMN", -- v5TE + qdsub_3 = "e1600050DMN", -- v5TE + -- Note: the X for ssat* encodes sat_imm-1, not sat_imm. + ssat_3 = "e6a00010DXM", ssat_4 = "e6a00010DXMp", -- v6 + usat_3 = "e6e00010DXM", usat_4 = "e6e00010DXMp", -- v6 + ssat16_3 = "e6a00f30DXM", -- v6 + usat16_3 = "e6e00f30DXM", -- v6 + + -- Parallel addition and subtraction. + sadd16_3 = "e6100f10DNM", -- v6 + sasx_3 = "e6100f30DNM", -- v6 + ssax_3 = "e6100f50DNM", -- v6 + ssub16_3 = "e6100f70DNM", -- v6 + sadd8_3 = "e6100f90DNM", -- v6 + ssub8_3 = "e6100ff0DNM", -- v6 + qadd16_3 = "e6200f10DNM", -- v6 + qasx_3 = "e6200f30DNM", -- v6 + qsax_3 = "e6200f50DNM", -- v6 + qsub16_3 = "e6200f70DNM", -- v6 + qadd8_3 = "e6200f90DNM", -- v6 + qsub8_3 = "e6200ff0DNM", -- v6 + shadd16_3 = "e6300f10DNM", -- v6 + shasx_3 = "e6300f30DNM", -- v6 + shsax_3 = "e6300f50DNM", -- v6 + shsub16_3 = "e6300f70DNM", -- v6 + shadd8_3 = "e6300f90DNM", -- v6 + shsub8_3 = "e6300ff0DNM", -- v6 + uadd16_3 = "e6500f10DNM", -- v6 + uasx_3 = "e6500f30DNM", -- v6 + usax_3 = "e6500f50DNM", -- v6 + usub16_3 = "e6500f70DNM", -- v6 + uadd8_3 = "e6500f90DNM", -- v6 + usub8_3 = "e6500ff0DNM", -- v6 + uqadd16_3 = "e6600f10DNM", -- v6 + uqasx_3 = "e6600f30DNM", -- v6 + uqsax_3 = "e6600f50DNM", -- v6 + uqsub16_3 = "e6600f70DNM", -- v6 + uqadd8_3 = "e6600f90DNM", -- v6 + uqsub8_3 = "e6600ff0DNM", -- v6 + uhadd16_3 = "e6700f10DNM", -- v6 + uhasx_3 = "e6700f30DNM", -- v6 + uhsax_3 = "e6700f50DNM", -- v6 + uhsub16_3 = "e6700f70DNM", -- v6 + uhadd8_3 = "e6700f90DNM", -- v6 + uhsub8_3 = "e6700ff0DNM", -- v6 + + -- Load/store instructions. + str_2 = "e4000000DL", str_3 = "e4000000DL", str_4 = "e4000000DL", + strb_2 = "e4400000DL", strb_3 = "e4400000DL", strb_4 = "e4400000DL", + ldr_2 = "e4100000DL", ldr_3 = "e4100000DL", ldr_4 = "e4100000DL", + ldrb_2 = "e4500000DL", ldrb_3 = "e4500000DL", ldrb_4 = "e4500000DL", + strh_2 = "e00000b0DL", strh_3 = "e00000b0DL", + ldrh_2 = "e01000b0DL", ldrh_3 = "e01000b0DL", + ldrd_2 = "e00000d0DL", ldrd_3 = "e00000d0DL", -- v5TE + ldrsb_2 = "e01000d0DL", ldrsb_3 = "e01000d0DL", + strd_2 = "e00000f0DL", strd_3 = "e00000f0DL", -- v5TE + ldrsh_2 = "e01000f0DL", ldrsh_3 = "e01000f0DL", + + ldm_2 = "e8900000oR", ldmia_2 = "e8900000oR", ldmfd_2 = "e8900000oR", + ldmda_2 = "e8100000oR", ldmfa_2 = "e8100000oR", + ldmdb_2 = "e9100000oR", ldmea_2 = "e9100000oR", + ldmib_2 = "e9900000oR", ldmed_2 = "e9900000oR", + stm_2 = "e8800000oR", stmia_2 = "e8800000oR", stmfd_2 = "e8800000oR", + stmda_2 = "e8000000oR", stmfa_2 = "e8000000oR", + stmdb_2 = "e9000000oR", stmea_2 = "e9000000oR", + stmib_2 = "e9800000oR", stmed_2 = "e9800000oR", + pop_1 = "e8bd0000R", push_1 = "e92d0000R", + + -- Branch instructions. + b_1 = "ea000000B", + bl_1 = "eb000000B", + blx_1 = "e12fff30C", + bx_1 = "e12fff10M", + + -- Miscellaneous instructions. + nop_0 = "e1a00000", + mrs_1 = "e10f0000D", + bkpt_1 = "e1200070K", -- v5T + svc_1 = "ef000000T", swi_1 = "ef000000T", + ud_0 = "e7f001f0", + + -- VFP instructions. + ["vadd.f32_3"] = "ee300a00dnm", + ["vadd.f64_3"] = "ee300b00Gdnm", + ["vsub.f32_3"] = "ee300a40dnm", + ["vsub.f64_3"] = "ee300b40Gdnm", + ["vmul.f32_3"] = "ee200a00dnm", + ["vmul.f64_3"] = "ee200b00Gdnm", + ["vnmul.f32_3"] = "ee200a40dnm", + ["vnmul.f64_3"] = "ee200b40Gdnm", + ["vmla.f32_3"] = "ee000a00dnm", + ["vmla.f64_3"] = "ee000b00Gdnm", + ["vmls.f32_3"] = "ee000a40dnm", + ["vmls.f64_3"] = "ee000b40Gdnm", + ["vnmla.f32_3"] = "ee100a40dnm", + ["vnmla.f64_3"] = "ee100b40Gdnm", + ["vnmls.f32_3"] = "ee100a00dnm", + ["vnmls.f64_3"] = "ee100b00Gdnm", + ["vdiv.f32_3"] = "ee800a00dnm", + ["vdiv.f64_3"] = "ee800b00Gdnm", + + ["vabs.f32_2"] = "eeb00ac0dm", + ["vabs.f64_2"] = "eeb00bc0Gdm", + ["vneg.f32_2"] = "eeb10a40dm", + ["vneg.f64_2"] = "eeb10b40Gdm", + ["vsqrt.f32_2"] = "eeb10ac0dm", + ["vsqrt.f64_2"] = "eeb10bc0Gdm", + ["vcmp.f32_2"] = "eeb40a40dm", + ["vcmp.f64_2"] = "eeb40b40Gdm", + ["vcmpe.f32_2"] = "eeb40ac0dm", + ["vcmpe.f64_2"] = "eeb40bc0Gdm", + ["vcmpz.f32_1"] = "eeb50a40d", + ["vcmpz.f64_1"] = "eeb50b40Gd", + ["vcmpze.f32_1"] = "eeb50ac0d", + ["vcmpze.f64_1"] = "eeb50bc0Gd", + + vldr_2 = "ed100a00dl|ed100b00Gdl", + vstr_2 = "ed000a00dl|ed000b00Gdl", + vldm_2 = "ec900a00or", + vldmia_2 = "ec900a00or", + vldmdb_2 = "ed100a00or", + vpop_1 = "ecbd0a00r", + vstm_2 = "ec800a00or", + vstmia_2 = "ec800a00or", + vstmdb_2 = "ed000a00or", + vpush_1 = "ed2d0a00r", + + ["vmov.f32_2"] = "eeb00a40dm|eeb00a00dY", -- #imm is VFPv3 only + ["vmov.f64_2"] = "eeb00b40Gdm|eeb00b00GdY", -- #imm is VFPv3 only + vmov_2 = "ee100a10Dn|ee000a10nD", + vmov_3 = "ec500a10DNm|ec400a10mDN|ec500b10GDNm|ec400b10GmDN", + + vmrs_0 = "eef1fa10", + vmrs_1 = "eef10a10D", + vmsr_1 = "eee10a10D", + + ["vcvt.s32.f32_2"] = "eebd0ac0dm", + ["vcvt.s32.f64_2"] = "eebd0bc0dGm", + ["vcvt.u32.f32_2"] = "eebc0ac0dm", + ["vcvt.u32.f64_2"] = "eebc0bc0dGm", + ["vcvtr.s32.f32_2"] = "eebd0a40dm", + ["vcvtr.s32.f64_2"] = "eebd0b40dGm", + ["vcvtr.u32.f32_2"] = "eebc0a40dm", + ["vcvtr.u32.f64_2"] = "eebc0b40dGm", + ["vcvt.f32.s32_2"] = "eeb80ac0dm", + ["vcvt.f64.s32_2"] = "eeb80bc0GdFm", + ["vcvt.f32.u32_2"] = "eeb80a40dm", + ["vcvt.f64.u32_2"] = "eeb80b40GdFm", + ["vcvt.f32.f64_2"] = "eeb70bc0dGm", + ["vcvt.f64.f32_2"] = "eeb70ac0GdFm", + + -- VFPv4 only: + ["vfma.f32_3"] = "eea00a00dnm", + ["vfma.f64_3"] = "eea00b00Gdnm", + ["vfms.f32_3"] = "eea00a40dnm", + ["vfms.f64_3"] = "eea00b40Gdnm", + ["vfnma.f32_3"] = "ee900a40dnm", + ["vfnma.f64_3"] = "ee900b40Gdnm", + ["vfnms.f32_3"] = "ee900a00dnm", + ["vfnms.f64_3"] = "ee900b00Gdnm", + + -- NYI: Advanced SIMD instructions. + + -- NYI: I have no need for these instructions right now: + -- swp, swpb, strex, ldrex, strexd, ldrexd, strexb, ldrexb, strexh, ldrexh + -- msr, nopv6, yield, wfe, wfi, sev, dbg, bxj, smc, srs, rfe + -- cps, setend, pli, pld, pldw, clrex, dsb, dmb, isb + -- stc, ldc, mcr, mcr2, mrc, mrc2, mcrr, mcrr2, mrrc, mrrc2, cdp, cdp2 +} + +-- Add mnemonics for "s" variants. +do + local t = {} + for k,v in pairs(map_op) do + if sub(v, -1) == "s" then + local v2 = sub(v, 1, 2)..char(byte(v, 3)+1)..sub(v, 4, -2) + t[sub(k, 1, -3).."s"..sub(k, -2)] = v2 + end + end + for k,v in pairs(t) do + map_op[k] = v + end +end + +------------------------------------------------------------------------------ + +local function parse_gpr(expr) + local tname, ovreg = match(expr, "^([%w_]+):(r1?[0-9])$") + local tp = map_type[tname or expr] + if tp then + local reg = ovreg or tp.reg + if not reg then + werror("type `"..(tname or expr).."' needs a register override") + end + expr = reg + end + local r = match(expr, "^r(1?[0-9])$") + if r then + r = tonumber(r) + if r <= 15 then return r, tp end + end + werror("bad register name `"..expr.."'") +end + +local function parse_gpr_pm(expr) + local pm, expr2 = match(expr, "^([+-]?)(.*)$") + return parse_gpr(expr2), (pm == "-") +end + +local function parse_vr(expr, tp) + local t, r = match(expr, "^([sd])([0-9]+)$") + if t == tp then + r = tonumber(r) + if r <= 31 then + if t == "s" then return shr(r, 1), band(r, 1) end + return band(r, 15), shr(r, 4) + end + end + werror("bad register name `"..expr.."'") +end + +local function parse_reglist(reglist) + reglist = match(reglist, "^{%s*([^}]*)}$") + if not reglist then werror("register list expected") end + local rr = 0 + for p in gmatch(reglist..",", "%s*([^,]*),") do + local rbit = shl(1, parse_gpr(gsub(p, "%s+$", ""))) + if band(rr, rbit) ~= 0 then + werror("duplicate register `"..p.."'") + end + rr = rr + rbit + end + return rr +end + +local function parse_vrlist(reglist) + local ta, ra, tb, rb = match(reglist, + "^{%s*([sd])([0-9]+)%s*%-%s*([sd])([0-9]+)%s*}$") + ra, rb = tonumber(ra), tonumber(rb) + if ta and ta == tb and ra and rb and ra <= 31 and rb <= 31 and ra <= rb then + local nr = rb+1 - ra + if ta == "s" then + return shl(shr(ra,1),12)+shl(band(ra,1),22) + nr + else + return shl(band(ra,15),12)+shl(shr(ra,4),22) + nr*2 + 0x100 + end + end + werror("register list expected") +end + +local function parse_imm(imm, bits, shift, scale, signed) + imm = match(imm, "^#(.*)$") + if not imm then werror("expected immediate operand") end + local n = tonumber(imm) + if n then + local m = sar(n, scale) + if shl(m, scale) == n then + if signed then + local s = sar(m, bits-1) + if s == 0 then return shl(m, shift) + elseif s == -1 then return shl(m + shl(1, bits), shift) end + else + if sar(m, bits) == 0 then return shl(m, shift) end + end + end + werror("out of range immediate `"..imm.."'") + else + waction("IMM", (signed and 32768 or 0)+scale*1024+bits*32+shift, imm) + return 0 + end +end + +local function parse_imm12(imm) + local n = tonumber(imm) + if n then + local m = band(n) + for i=0,-15,-1 do + if shr(m, 8) == 0 then return m + shl(band(i, 15), 8) end + m = ror(m, 2) + end + werror("out of range immediate `"..imm.."'") + else + waction("IMM12", 0, imm) + return 0 + end +end + +local function parse_imm16(imm) + imm = match(imm, "^#(.*)$") + if not imm then werror("expected immediate operand") end + local n = tonumber(imm) + if n then + if shr(n, 16) == 0 then return band(n, 0x0fff) + shl(band(n, 0xf000), 4) end + werror("out of range immediate `"..imm.."'") + else + waction("IMM16", 32*16, imm) + return 0 + end +end + +local function parse_imm_load(imm, ext) + local n = tonumber(imm) + if n then + if ext then + if n >= -255 and n <= 255 then + local up = 0x00800000 + if n < 0 then n = -n; up = 0 end + return shl(band(n, 0xf0), 4) + band(n, 0x0f) + up + end + else + if n >= -4095 and n <= 4095 then + if n >= 0 then return n+0x00800000 end + return -n + end + end + werror("out of range immediate `"..imm.."'") + else + waction(ext and "IMML8" or "IMML12", 32768 + shl(ext and 8 or 12, 5), imm) + return 0 + end +end + +local function parse_shift(shift, gprok) + if shift == "rrx" then + return 3 * 32 + else + local s, s2 = match(shift, "^(%S+)%s*(.*)$") + s = map_shift[s] + if not s then werror("expected shift operand") end + if sub(s2, 1, 1) == "#" then + return parse_imm(s2, 5, 7, 0, false) + shl(s, 5) + else + if not gprok then werror("expected immediate shift operand") end + return shl(parse_gpr(s2), 8) + shl(s, 5) + 16 + end + end +end + +local function parse_label(label, def) + local prefix = sub(label, 1, 2) + -- =>label (pc label reference) + if prefix == "=>" then + return "PC", 0, sub(label, 3) + end + -- ->name (global label reference) + if prefix == "->" then + return "LG", map_global[sub(label, 3)] + end + if def then + -- [1-9] (local label definition) + if match(label, "^[1-9]$") then + return "LG", 10+tonumber(label) + end + else + -- [<>][1-9] (local label reference) + local dir, lnum = match(label, "^([<>])([1-9])$") + if dir then -- Fwd: 1-9, Bkwd: 11-19. + return "LG", lnum + (dir == ">" and 0 or 10) + end + -- extern label (extern label reference) + local extname = match(label, "^extern%s+(%S+)$") + if extname then + return "EXT", map_extern[extname] + end + end + werror("bad label `"..label.."'") +end + +local function parse_load(params, nparams, n, op) + local oplo = band(op, 255) + local ext, ldrd = (oplo ~= 0), (oplo == 208) + local d + if (ldrd or oplo == 240) then + d = band(shr(op, 12), 15) + if band(d, 1) ~= 0 then werror("odd destination register") end + end + local pn = params[n] + local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$") + local p2 = params[n+1] + if not p1 then + if not p2 then + if match(pn, "^[<>=%-]") or match(pn, "^extern%s+") then + local mode, n, s = parse_label(pn, false) + waction("REL_"..mode, n + (ext and 0x1800 or 0x0800), s, 1) + return op + 15 * 65536 + 0x01000000 + (ext and 0x00400000 or 0) + end + local reg, tailr = match(pn, "^([%w_:]+)%s*(.*)$") + if reg and tailr ~= "" then + local d, tp = parse_gpr(reg) + if tp then + waction(ext and "IMML8" or "IMML12", 32768 + 32*(ext and 8 or 12), + format(tp.ctypefmt, tailr)) + return op + shl(d, 16) + 0x01000000 + (ext and 0x00400000 or 0) + end + end + end + werror("expected address operand") + end + if wb == "!" then op = op + 0x00200000 end + if p2 then + if wb == "!" then werror("bad use of '!'") end + local p3 = params[n+2] + op = op + shl(parse_gpr(p1), 16) + local imm = match(p2, "^#(.*)$") + if imm then + local m = parse_imm_load(imm, ext) + if p3 then werror("too many parameters") end + op = op + m + (ext and 0x00400000 or 0) + else + local m, neg = parse_gpr_pm(p2) + if ldrd and (m == d or m-1 == d) then werror("register conflict") end + op = op + m + (neg and 0 or 0x00800000) + (ext and 0 or 0x02000000) + if p3 then op = op + parse_shift(p3) end + end + else + local p1a, p2 = match(p1, "^([^,%s]*)%s*(.*)$") + op = op + shl(parse_gpr(p1a), 16) + 0x01000000 + if p2 ~= "" then + local imm = match(p2, "^,%s*#(.*)$") + if imm then + local m = parse_imm_load(imm, ext) + op = op + m + (ext and 0x00400000 or 0) + else + local p2a, p3 = match(p2, "^,%s*([^,%s]*)%s*,?%s*(.*)$") + local m, neg = parse_gpr_pm(p2a) + if ldrd and (m == d or m-1 == d) then werror("register conflict") end + op = op + m + (neg and 0 or 0x00800000) + (ext and 0 or 0x02000000) + if p3 ~= "" then + if ext then werror("too many parameters") end + op = op + parse_shift(p3) + end + end + else + if wb == "!" then werror("bad use of '!'") end + op = op + (ext and 0x00c00000 or 0x00800000) + end + end + return op +end + +local function parse_vload(q) + local reg, imm = match(q, "^%[%s*([^,%s]*)%s*(.*)%]$") + if reg then + local d = shl(parse_gpr(reg), 16) + if imm == "" then return d end + imm = match(imm, "^,%s*#(.*)$") + if imm then + local n = tonumber(imm) + if n then + if n >= -1020 and n <= 1020 and n%4 == 0 then + return d + (n >= 0 and n/4+0x00800000 or -n/4) + end + werror("out of range immediate `"..imm.."'") + else + waction("IMMV8", 32768 + 32*8, imm) + return d + end + end + else + if match(q, "^[<>=%-]") or match(q, "^extern%s+") then + local mode, n, s = parse_label(q, false) + waction("REL_"..mode, n + 0x2800, s, 1) + return 15 * 65536 + end + local reg, tailr = match(q, "^([%w_:]+)%s*(.*)$") + if reg and tailr ~= "" then + local d, tp = parse_gpr(reg) + if tp then + waction("IMMV8", 32768 + 32*8, format(tp.ctypefmt, tailr)) + return shl(d, 16) + end + end + end + werror("expected address operand") +end + +------------------------------------------------------------------------------ + +-- Handle opcodes defined with template strings. +local function parse_template(params, template, nparams, pos) + local op = tonumber(sub(template, 1, 8), 16) + local n = 1 + local vr = "s" + + -- Process each character. + for p in gmatch(sub(template, 9), ".") do + local q = params[n] + if p == "D" then + op = op + shl(parse_gpr(q), 12); n = n + 1 + elseif p == "N" then + op = op + shl(parse_gpr(q), 16); n = n + 1 + elseif p == "S" then + op = op + shl(parse_gpr(q), 8); n = n + 1 + elseif p == "M" then + op = op + parse_gpr(q); n = n + 1 + elseif p == "d" then + local r,h = parse_vr(q, vr); op = op+shl(r,12)+shl(h,22); n = n + 1 + elseif p == "n" then + local r,h = parse_vr(q, vr); op = op+shl(r,16)+shl(h,7); n = n + 1 + elseif p == "m" then + local r,h = parse_vr(q, vr); op = op+r+shl(h,5); n = n + 1 + elseif p == "P" then + local imm = match(q, "^#(.*)$") + if imm then + op = op + parse_imm12(imm) + 0x02000000 + else + op = op + parse_gpr(q) + end + n = n + 1 + elseif p == "p" then + op = op + parse_shift(q, true); n = n + 1 + elseif p == "L" then + op = parse_load(params, nparams, n, op) + elseif p == "l" then + op = op + parse_vload(q) + elseif p == "B" then + local mode, n, s = parse_label(q, false) + waction("REL_"..mode, n, s, 1) + elseif p == "C" then -- blx gpr vs. blx label. + if match(q, "^([%w_]+):(r1?[0-9])$") or match(q, "^r(1?[0-9])$") then + op = op + parse_gpr(q) + else + if op < 0xe0000000 then werror("unconditional instruction") end + local mode, n, s = parse_label(q, false) + waction("REL_"..mode, n, s, 1) + op = 0xfa000000 + end + elseif p == "F" then + vr = "s" + elseif p == "G" then + vr = "d" + elseif p == "o" then + local r, wb = match(q, "^([^!]*)(!?)$") + op = op + shl(parse_gpr(r), 16) + (wb == "!" and 0x00200000 or 0) + n = n + 1 + elseif p == "R" then + op = op + parse_reglist(q); n = n + 1 + elseif p == "r" then + op = op + parse_vrlist(q); n = n + 1 + elseif p == "W" then + op = op + parse_imm16(q); n = n + 1 + elseif p == "v" then + op = op + parse_imm(q, 5, 7, 0, false); n = n + 1 + elseif p == "w" then + local imm = match(q, "^#(.*)$") + if imm then + op = op + parse_imm(q, 5, 7, 0, false); n = n + 1 + else + op = op + shl(parse_gpr(q), 8) + 16 + end + elseif p == "X" then + op = op + parse_imm(q, 5, 16, 0, false); n = n + 1 + elseif p == "Y" then + local imm = tonumber(match(q, "^#(.*)$")); n = n + 1 + if not imm or shr(imm, 8) ~= 0 then + werror("bad immediate operand") + end + op = op + shl(band(imm, 0xf0), 12) + band(imm, 0x0f) + elseif p == "K" then + local imm = tonumber(match(q, "^#(.*)$")); n = n + 1 + if not imm or shr(imm, 16) ~= 0 then + werror("bad immediate operand") + end + op = op + shl(band(imm, 0xfff0), 4) + band(imm, 0x000f) + elseif p == "T" then + op = op + parse_imm(q, 24, 0, 0, false); n = n + 1 + elseif p == "s" then + -- Ignored. + else + assert(false) + end + end + wputpos(pos, op) +end + +map_op[".template__"] = function(params, template, nparams) + if not params then return template:gsub("%x%x%x%x%x%x%x%x", "") end + + -- Limit number of section buffer positions used by a single dasm_put(). + -- A single opcode needs a maximum of 3 positions. + if secpos+3 > maxsecpos then wflush() end + local pos = wpos() + local lpos, apos, spos = #actlist, #actargs, secpos + + local ok, err + for t in gmatch(template, "[^|]+") do + ok, err = pcall(parse_template, params, t, nparams, pos) + if ok then return end + secpos = spos + actlist[lpos+1] = nil + actlist[lpos+2] = nil + actlist[lpos+3] = nil + actargs[apos+1] = nil + actargs[apos+2] = nil + actargs[apos+3] = nil + end + error(err, 0) +end + +------------------------------------------------------------------------------ + +-- Pseudo-opcode to mark the position where the action list is to be emitted. +map_op[".actionlist_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeactions(out, name) end) +end + +-- Pseudo-opcode to mark the position where the global enum is to be emitted. +map_op[".globals_1"] = function(params) + if not params then return "prefix" end + local prefix = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeglobals(out, prefix) end) +end + +-- Pseudo-opcode to mark the position where the global names are to be emitted. +map_op[".globalnames_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeglobalnames(out, name) end) +end + +-- Pseudo-opcode to mark the position where the extern names are to be emitted. +map_op[".externnames_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeexternnames(out, name) end) +end + +------------------------------------------------------------------------------ + +-- Label pseudo-opcode (converted from trailing colon form). +map_op[".label_1"] = function(params) + if not params then return "[1-9] | ->global | =>pcexpr" end + if secpos+1 > maxsecpos then wflush() end + local mode, n, s = parse_label(params[1], true) + if mode == "EXT" then werror("bad label definition") end + waction("LABEL_"..mode, n, s, 1) +end + +------------------------------------------------------------------------------ + +-- Pseudo-opcodes for data storage. +map_op[".long_*"] = function(params) + if not params then return "imm..." end + for _,p in ipairs(params) do + local n = tonumber(p) + if not n then werror("bad immediate `"..p.."'") end + if n < 0 then n = n + 2^32 end + wputw(n) + if secpos+2 > maxsecpos then wflush() end + end +end + +-- Alignment pseudo-opcode. +map_op[".align_1"] = function(params) + if not params then return "numpow2" end + if secpos+1 > maxsecpos then wflush() end + local align = tonumber(params[1]) + if align then + local x = align + -- Must be a power of 2 in the range (2 ... 256). + for i=1,8 do + x = x / 2 + if x == 1 then + waction("ALIGN", align-1, nil, 1) -- Action byte is 2**n-1. + return + end + end + end + werror("bad alignment") +end + +------------------------------------------------------------------------------ + +-- Pseudo-opcode for (primitive) type definitions (map to C types). +map_op[".type_3"] = function(params, nparams) + if not params then + return nparams == 2 and "name, ctype" or "name, ctype, reg" + end + local name, ctype, reg = params[1], params[2], params[3] + if not match(name, "^[%a_][%w_]*$") then + werror("bad type name `"..name.."'") + end + local tp = map_type[name] + if tp then + werror("duplicate type `"..name.."'") + end + -- Add #type to defines. A bit unclean to put it in map_archdef. + map_archdef["#"..name] = "sizeof("..ctype..")" + -- Add new type and emit shortcut define. + local num = ctypenum + 1 + map_type[name] = { + ctype = ctype, + ctypefmt = format("Dt%X(%%s)", num), + reg = reg, + } + wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype)) + ctypenum = num +end +map_op[".type_2"] = map_op[".type_3"] + +-- Dump type definitions. +local function dumptypes(out, lvl) + local t = {} + for name in pairs(map_type) do t[#t+1] = name end + sort(t) + out:write("Type definitions:\n") + for _,name in ipairs(t) do + local tp = map_type[name] + local reg = tp.reg or "" + out:write(format(" %-20s %-20s %s\n", name, tp.ctype, reg)) + end + out:write("\n") +end + +------------------------------------------------------------------------------ + +-- Set the current section. +function _M.section(num) + waction("SECTION", num) + wflush(true) -- SECTION is a terminal action. +end + +------------------------------------------------------------------------------ + +-- Dump architecture description. +function _M.dumparch(out) + out:write(format("DynASM %s version %s, released %s\n\n", + _info.arch, _info.version, _info.release)) + dumpactions(out) +end + +-- Dump all user defined elements. +function _M.dumpdef(out, lvl) + dumptypes(out, lvl) + dumpglobals(out, lvl) + dumpexterns(out, lvl) +end + +------------------------------------------------------------------------------ + +-- Pass callbacks from/to the DynASM core. +function _M.passcb(wl, we, wf, ww) + wline, werror, wfatal, wwarn = wl, we, wf, ww + return wflush +end + +-- Setup the arch-specific module. +function _M.setup(arch, opt) + g_arch, g_opt = arch, opt +end + +-- Merge the core maps and the arch-specific maps. +function _M.mergemaps(map_coreop, map_def) + setmetatable(map_op, { __index = function(t, k) + local v = map_coreop[k] + if v then return v end + local k1, cc, k2 = match(k, "^(.-)(..)([._].*)$") + local cv = map_cond[cc] + if cv then + local v = rawget(t, k1..k2) + if type(v) == "string" then + local scv = format("%x", cv) + return gsub(scv..sub(v, 2), "|e", "|"..scv) + end + end + end }) + setmetatable(map_def, { __index = map_archdef }) + return map_op, map_def +end + +return _M + +------------------------------------------------------------------------------ + diff --git a/ext/opcache/jit/ir/dynasm/dasm_arm64.h b/ext/opcache/jit/ir/dynasm/dasm_arm64.h new file mode 100644 index 0000000000000..9a8a39a258695 --- /dev/null +++ b/ext/opcache/jit/ir/dynasm/dasm_arm64.h @@ -0,0 +1,570 @@ +/* +** DynASM ARM64 encoding engine. +** Copyright (C) 2005-2021 Mike Pall. All rights reserved. +** Released under the MIT license. See dynasm.lua for full copyright notice. +*/ + +#include +#include +#include +#include + +#define DASM_ARCH "arm64" + +#ifndef DASM_EXTERN +#define DASM_EXTERN(a,b,c,d) 0 +#endif + +/* Action definitions. */ +enum { + DASM_STOP, DASM_SECTION, DASM_ESC, DASM_REL_EXT, + /* The following actions need a buffer position. */ + DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG, + /* The following actions also have an argument. */ + DASM_REL_PC, DASM_LABEL_PC, DASM_REL_A, + DASM_IMM, DASM_IMM6, DASM_IMM12, DASM_IMM13W, DASM_IMM13X, DASM_IMML, + DASM_IMMV, DASM_VREG, + DASM__MAX +}; + +/* Maximum number of section buffer positions for a single dasm_put() call. */ +#define DASM_MAXSECPOS 25 + +/* DynASM encoder status codes. Action list offset or number are or'ed in. */ +#define DASM_S_OK 0x00000000 +#define DASM_S_NOMEM 0x01000000 +#define DASM_S_PHASE 0x02000000 +#define DASM_S_MATCH_SEC 0x03000000 +#define DASM_S_RANGE_I 0x11000000 +#define DASM_S_RANGE_SEC 0x12000000 +#define DASM_S_RANGE_LG 0x13000000 +#define DASM_S_RANGE_PC 0x14000000 +#define DASM_S_RANGE_REL 0x15000000 +#define DASM_S_RANGE_VREG 0x16000000 +#define DASM_S_UNDEF_LG 0x21000000 +#define DASM_S_UNDEF_PC 0x22000000 + +/* Macros to convert positions (8 bit section + 24 bit index). */ +#define DASM_POS2IDX(pos) ((pos)&0x00ffffff) +#define DASM_POS2BIAS(pos) ((pos)&0xff000000) +#define DASM_SEC2POS(sec) ((sec)<<24) +#define DASM_POS2SEC(pos) ((pos)>>24) +#define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos)) + +/* Action list type. */ +typedef const unsigned int *dasm_ActList; + +/* Per-section structure. */ +typedef struct dasm_Section { + int *rbuf; /* Biased buffer pointer (negative section bias). */ + int *buf; /* True buffer pointer. */ + size_t bsize; /* Buffer size in bytes. */ + int pos; /* Biased buffer position. */ + int epos; /* End of biased buffer position - max single put. */ + int ofs; /* Byte offset into section. */ +} dasm_Section; + +/* Core structure holding the DynASM encoding state. */ +struct dasm_State { + size_t psize; /* Allocated size of this structure. */ + dasm_ActList actionlist; /* Current actionlist pointer. */ + int *lglabels; /* Local/global chain/pos ptrs. */ + size_t lgsize; + int *pclabels; /* PC label chains/pos ptrs. */ + size_t pcsize; + void **globals; /* Array of globals (bias -10). */ + dasm_Section *section; /* Pointer to active section. */ + size_t codesize; /* Total size of all code sections. */ + int maxsection; /* 0 <= sectionidx < maxsection. */ + int status; /* Status code. */ + dasm_Section sections[1]; /* All sections. Alloc-extended. */ +}; + +/* The size of the core structure depends on the max. number of sections. */ +#define DASM_PSZ(ms) (sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section)) + + +/* Initialize DynASM state. */ +void dasm_init(Dst_DECL, int maxsection) +{ + dasm_State *D; + size_t psz = 0; + int i; + Dst_REF = NULL; + DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection)); + D = Dst_REF; + D->psize = psz; + D->lglabels = NULL; + D->lgsize = 0; + D->pclabels = NULL; + D->pcsize = 0; + D->globals = NULL; + D->maxsection = maxsection; + for (i = 0; i < maxsection; i++) { + D->sections[i].buf = NULL; /* Need this for pass3. */ + D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i); + D->sections[i].bsize = 0; + D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */ + } +} + +/* Free DynASM state. */ +void dasm_free(Dst_DECL) +{ + dasm_State *D = Dst_REF; + int i; + for (i = 0; i < D->maxsection; i++) + if (D->sections[i].buf) + DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize); + if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize); + if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize); + DASM_M_FREE(Dst, D, D->psize); +} + +/* Setup global label array. Must be called before dasm_setup(). */ +void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl) +{ + dasm_State *D = Dst_REF; + D->globals = gl - 10; /* Negative bias to compensate for locals. */ + DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int)); +} + +/* Grow PC label array. Can be called after dasm_setup(), too. */ +void dasm_growpc(Dst_DECL, unsigned int maxpc) +{ + dasm_State *D = Dst_REF; + size_t osz = D->pcsize; + DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int)); + memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz); +} + +/* Setup encoder. */ +void dasm_setup(Dst_DECL, const void *actionlist) +{ + dasm_State *D = Dst_REF; + int i; + D->actionlist = (dasm_ActList)actionlist; + D->status = DASM_S_OK; + D->section = &D->sections[0]; + memset((void *)D->lglabels, 0, D->lgsize); + if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize); + for (i = 0; i < D->maxsection; i++) { + D->sections[i].pos = DASM_SEC2POS(i); + D->sections[i].ofs = 0; + } +} + + +#ifdef DASM_CHECKS +#define CK(x, st) \ + do { if (!(x)) { \ + D->status = DASM_S_##st|(p-D->actionlist-1); return; } } while (0) +#define CKPL(kind, st) \ + do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \ + D->status = DASM_S_RANGE_##st|(p-D->actionlist-1); return; } } while (0) +#else +#define CK(x, st) ((void)0) +#define CKPL(kind, st) ((void)0) +#endif + +static int dasm_imm12(unsigned int n) +{ + if ((n >> 12) == 0) + return n; + else if ((n & 0xff000fff) == 0) + return (n >> 12) | 0x1000; + else + return -1; +} + +static int dasm_ffs(unsigned long long x) +{ + int n = -1; + while (x) { x >>= 1; n++; } + return n; +} + +static int dasm_imm13(int lo, int hi) +{ + int inv = 0, w = 64, s = 0xfff, xa, xb; + unsigned long long n = (((unsigned long long)hi) << 32) | (unsigned int)lo; + unsigned long long m = 1ULL, a, b, c; + if (n & 1) { n = ~n; inv = 1; } + a = n & -n; b = (n+a)&-(n+a); c = (n+a-b)&-(n+a-b); + xa = dasm_ffs(a); xb = dasm_ffs(b); + if (c) { + w = dasm_ffs(c) - xa; + if (w == 32) m = 0x0000000100000001UL; + else if (w == 16) m = 0x0001000100010001UL; + else if (w == 8) m = 0x0101010101010101UL; + else if (w == 4) m = 0x1111111111111111UL; + else if (w == 2) m = 0x5555555555555555UL; + else return -1; + s = (-2*w & 0x3f) - 1; + } else if (!a) { + return -1; + } else if (xb == -1) { + xb = 64; + } + if ((b-a) * m != n) return -1; + if (inv) { + return ((w - xb) << 6) | (s+w+xa-xb); + } else { + return ((w - xa) << 6) | (s+xb-xa); + } + return -1; +} + +/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */ +void dasm_put(Dst_DECL, int start, ...) +{ + va_list ap; + dasm_State *D = Dst_REF; + dasm_ActList p = D->actionlist + start; + dasm_Section *sec = D->section; + int pos = sec->pos, ofs = sec->ofs; + int *b; + + if (pos >= sec->epos) { + DASM_M_GROW(Dst, int, sec->buf, sec->bsize, + sec->bsize + 2*DASM_MAXSECPOS*sizeof(int)); + sec->rbuf = sec->buf - DASM_POS2BIAS(pos); + sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos); + } + + b = sec->rbuf; + b[pos++] = start; + + va_start(ap, start); + while (1) { + unsigned int ins = *p++; + unsigned int action = (ins >> 16); + if (action >= DASM__MAX) { + ofs += 4; + } else { + int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0; + switch (action) { + case DASM_STOP: goto stop; + case DASM_SECTION: + n = (ins & 255); CK(n < D->maxsection, RANGE_SEC); + D->section = &D->sections[n]; goto stop; + case DASM_ESC: p++; ofs += 4; break; + case DASM_REL_EXT: if ((ins & 0x8000)) ofs += 8; break; + case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break; + case DASM_REL_LG: + n = (ins & 2047) - 10; pl = D->lglabels + n; + /* Bkwd rel or global. */ + if (n >= 0) { CK(n>=10||*pl<0, RANGE_LG); CKPL(lg, LG); goto putrel; } + pl += 10; n = *pl; + if (n < 0) n = 0; /* Start new chain for fwd rel if label exists. */ + goto linkrel; + case DASM_REL_PC: + pl = D->pclabels + n; CKPL(pc, PC); + putrel: + n = *pl; + if (n < 0) { /* Label exists. Get label pos and store it. */ + b[pos] = -n; + } else { + linkrel: + b[pos] = n; /* Else link to rel chain, anchored at label. */ + *pl = pos; + } + pos++; + if ((ins & 0x8000)) ofs += 8; + break; + case DASM_REL_A: + b[pos++] = n; + b[pos++] = va_arg(ap, int); + break; + case DASM_LABEL_LG: + pl = D->lglabels + (ins & 2047) - 10; CKPL(lg, LG); goto putlabel; + case DASM_LABEL_PC: + pl = D->pclabels + n; CKPL(pc, PC); + putlabel: + n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */ + while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos; + } + *pl = -pos; /* Label exists now. */ + b[pos++] = ofs; /* Store pass1 offset estimate. */ + break; + case DASM_IMM: + CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I); + n >>= ((ins>>10)&31); +#ifdef DASM_CHECKS + if ((ins & 0x8000)) + CK(((n + (1<<(((ins>>5)&31)-1)))>>((ins>>5)&31)) == 0, RANGE_I); + else + CK((n>>((ins>>5)&31)) == 0, RANGE_I); +#endif + b[pos++] = n; + break; + case DASM_IMM6: + CK((n >> 6) == 0, RANGE_I); + b[pos++] = n; + break; + case DASM_IMM12: + CK(dasm_imm12((unsigned int)n) != -1, RANGE_I); + b[pos++] = n; + break; + case DASM_IMM13W: + CK(dasm_imm13(n, n) != -1, RANGE_I); + b[pos++] = n; + break; + case DASM_IMM13X: { + int m = va_arg(ap, int); + CK(dasm_imm13(n, m) != -1, RANGE_I); + b[pos++] = n; + b[pos++] = m; + break; + } + case DASM_IMML: { +#ifdef DASM_CHECKS + int scale = (ins & 3); + CK((!(n & ((1<>scale) < 4096) || + (unsigned int)(n+256) < 512, RANGE_I); +#endif + b[pos++] = n; + break; + } + case DASM_IMMV: + ofs += 4; + b[pos++] = n; + break; + case DASM_VREG: + CK(n < 32, RANGE_VREG); + b[pos++] = n; + break; + } + } + } +stop: + va_end(ap); + sec->pos = pos; + sec->ofs = ofs; +} +#undef CK + +/* Pass 2: Link sections, shrink aligns, fix label offsets. */ +int dasm_link(Dst_DECL, size_t *szp) +{ + dasm_State *D = Dst_REF; + int secnum; + int ofs = 0; + +#ifdef DASM_CHECKS + *szp = 0; + if (D->status != DASM_S_OK) return D->status; + { + int pc; + for (pc = 0; pc*sizeof(int) < D->pcsize; pc++) + if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc; + } +#endif + + { /* Handle globals not defined in this translation unit. */ + int idx; + for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) { + int n = D->lglabels[idx]; + /* Undefined label: Collapse rel chain and replace with marker (< 0). */ + while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; } + } + } + + /* Combine all code sections. No support for data sections (yet). */ + for (secnum = 0; secnum < D->maxsection; secnum++) { + dasm_Section *sec = D->sections + secnum; + int *b = sec->rbuf; + int pos = DASM_SEC2POS(secnum); + int lastpos = sec->pos; + + while (pos != lastpos) { + dasm_ActList p = D->actionlist + b[pos++]; + while (1) { + unsigned int ins = *p++; + unsigned int action = (ins >> 16); + switch (action) { + case DASM_STOP: case DASM_SECTION: goto stop; + case DASM_ESC: p++; break; + case DASM_REL_EXT: break; + case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break; + case DASM_REL_LG: case DASM_REL_PC: pos++; break; + case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break; + case DASM_IMM: case DASM_IMM6: case DASM_IMM12: case DASM_IMM13W: + case DASM_IMML: case DASM_IMMV: case DASM_VREG: pos++; break; + case DASM_IMM13X: case DASM_REL_A: pos += 2; break; + } + } + stop: (void)0; + } + ofs += sec->ofs; /* Next section starts right after current section. */ + } + + D->codesize = ofs; /* Total size of all code sections */ + *szp = ofs; + return DASM_S_OK; +} + +#ifdef DASM_ADD_VENEER +#define CK_REL(x, o) \ + do { if (!(x) && !(n = DASM_ADD_VENEER(D, buffer, ins, b, cp, o))) \ + return DASM_S_RANGE_REL|(p-D->actionlist-1); \ + } while (0) +#else +#define CK_REL(x, o) CK(x, RANGE_REL) +#endif + +#ifdef DASM_CHECKS +#define CK(x, st) \ + do { if (!(x)) return DASM_S_##st|(p-D->actionlist-1); } while (0) +#else +#define CK(x, st) ((void)0) +#endif + +/* Pass 3: Encode sections. */ +int dasm_encode(Dst_DECL, void *buffer) +{ + dasm_State *D = Dst_REF; + char *base = (char *)buffer; + unsigned int *cp = (unsigned int *)buffer; + int secnum; + + /* Encode all code sections. No support for data sections (yet). */ + for (secnum = 0; secnum < D->maxsection; secnum++) { + dasm_Section *sec = D->sections + secnum; + int *b = sec->buf; + int *endb = sec->rbuf + sec->pos; + + while (b != endb) { + dasm_ActList p = D->actionlist + *b++; + while (1) { + unsigned int ins = *p++; + unsigned int action = (ins >> 16); + int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0; + switch (action) { + case DASM_STOP: case DASM_SECTION: goto stop; + case DASM_ESC: *cp++ = *p++; break; + case DASM_REL_EXT: + n = DASM_EXTERN(Dst, (unsigned char *)cp, (ins&2047), !(ins&2048)); + goto patchrel; + case DASM_ALIGN: + ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0xd503201f; + break; + case DASM_REL_LG: + if (n < 0) { + ptrdiff_t na = (ptrdiff_t)D->globals[-n] - (ptrdiff_t)cp + 4; + n = (int)na; + CK_REL((ptrdiff_t)n == na, na); + goto patchrel; + } + /* fallthrough */ + case DASM_REL_PC: + CK(n >= 0, UNDEF_PC); + n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base) + 4; + patchrel: + if (!(ins & 0xf800)) { /* B, BL */ + CK_REL((n & 3) == 0 && ((n+0x08000000) >> 28) == 0, n); + cp[-1] |= ((n >> 2) & 0x03ffffff); + } else if ((ins & 0x800)) { /* B.cond, CBZ, CBNZ, LDR* literal */ + CK_REL((n & 3) == 0 && ((n+0x00100000) >> 21) == 0, n); + cp[-1] |= ((n << 3) & 0x00ffffe0); + } else if ((ins & 0x3000) == 0x2000) { /* ADR */ + CK_REL(((n+0x00100000) >> 21) == 0, n); + cp[-1] |= ((n << 3) & 0x00ffffe0) | ((n & 3) << 29); + } else if ((ins & 0x3000) == 0x3000) { /* ADRP */ + cp[-1] |= ((n >> 9) & 0x00ffffe0) | (((n >> 12) & 3) << 29); + } else if ((ins & 0x1000)) { /* TBZ, TBNZ */ + CK_REL((n & 3) == 0 && ((n+0x00008000) >> 16) == 0, n); + cp[-1] |= ((n << 3) & 0x0007ffe0); + } else if ((ins & 0x8000)) { /* absolute */ + cp[0] = (unsigned int)((ptrdiff_t)cp - 4 + n); + cp[1] = (unsigned int)(((ptrdiff_t)cp - 4 + n) >> 32); + cp += 2; + } + break; + case DASM_REL_A: { + ptrdiff_t na = (((ptrdiff_t)(*b++) << 32) | (unsigned int)n); + if ((ins & 0x3000) == 0x3000) { /* ADRP */ + ins &= ~0x1000; + na = (na >> 12) - (((ptrdiff_t)cp - 4) >> 12); + } else { + na = na - (ptrdiff_t)cp + 4; + } + n = (int)na; + CK_REL((ptrdiff_t)n == na, na); + goto patchrel; + } + case DASM_LABEL_LG: + ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n); + break; + case DASM_LABEL_PC: break; + case DASM_IMM: + cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31); + break; + case DASM_IMM6: + cp[-1] |= ((n&31) << 19) | ((n&32) << 26); + break; + case DASM_IMM12: + cp[-1] |= (dasm_imm12((unsigned int)n) << 10); + break; + case DASM_IMM13W: + cp[-1] |= (dasm_imm13(n, n) << 10); + break; + case DASM_IMM13X: + cp[-1] |= (dasm_imm13(n, *b++) << 10); + break; + case DASM_IMML: { + int scale = (ins & 3); + cp[-1] |= (!(n & ((1<>scale) < 4096) ? + ((n << (10-scale)) | 0x01000000) : ((n & 511) << 12); + break; + } + case DASM_IMMV: + *cp++ = n; + break; + case DASM_VREG: + cp[-1] |= (n & 0x1f) << (ins & 0x1f); + break; + default: *cp++ = ins; break; + } + } + stop: (void)0; + } + } + + if (base + D->codesize != (char *)cp) /* Check for phase errors. */ + return DASM_S_PHASE; + return DASM_S_OK; +} +#undef CK + +/* Get PC label offset. */ +int dasm_getpclabel(Dst_DECL, unsigned int pc) +{ + dasm_State *D = Dst_REF; + if (pc*sizeof(int) < D->pcsize) { + int pos = D->pclabels[pc]; + if (pos < 0) return *DASM_POS2PTR(D, -pos); + if (pos > 0) return -1; /* Undefined. */ + } + return -2; /* Unused or out of range. */ +} + +#ifdef DASM_CHECKS +/* Optional sanity checker to call between isolated encoding steps. */ +int dasm_checkstep(Dst_DECL, int secmatch) +{ + dasm_State *D = Dst_REF; + if (D->status == DASM_S_OK) { + int i; + for (i = 1; i <= 9; i++) { + if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_LG|i; break; } + D->lglabels[i] = 0; + } + } + if (D->status == DASM_S_OK && secmatch >= 0 && + D->section != &D->sections[secmatch]) + D->status = DASM_S_MATCH_SEC|(D->section-D->sections); + return D->status; +} +#endif + diff --git a/ext/opcache/jit/ir/dynasm/dasm_arm64.lua b/ext/opcache/jit/ir/dynasm/dasm_arm64.lua new file mode 100644 index 0000000000000..7e9c4cbf22ef1 --- /dev/null +++ b/ext/opcache/jit/ir/dynasm/dasm_arm64.lua @@ -0,0 +1,1219 @@ +------------------------------------------------------------------------------ +-- DynASM ARM64 module. +-- +-- Copyright (C) 2005-2021 Mike Pall. All rights reserved. +-- See dynasm.lua for full copyright notice. +------------------------------------------------------------------------------ + +-- Module information: +local _info = { + arch = "arm", + description = "DynASM ARM64 module", + version = "1.5.0", + vernum = 10500, + release = "2021-05-02", + author = "Mike Pall", + license = "MIT", +} + +-- Exported glue functions for the arch-specific module. +local _M = { _info = _info } + +-- Cache library functions. +local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs +local assert, setmetatable, rawget = assert, setmetatable, rawget +local _s = string +local format, byte, char = _s.format, _s.byte, _s.char +local match, gmatch, gsub = _s.match, _s.gmatch, _s.gsub +local concat, sort, insert = table.concat, table.sort, table.insert +local bit = bit or require("bit") +local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift +local ror, tohex, tobit = bit.ror, bit.tohex, bit.tobit + +-- Inherited tables and callbacks. +local g_opt, g_arch +local wline, werror, wfatal, wwarn + +-- Action name list. +-- CHECK: Keep this in sync with the C code! +local action_names = { + "STOP", "SECTION", "ESC", "REL_EXT", + "ALIGN", "REL_LG", "LABEL_LG", + "REL_PC", "LABEL_PC", "REL_A", + "IMM", "IMM6", "IMM12", "IMM13W", "IMM13X", "IMML", "IMMV", + "VREG", +} + +-- Maximum number of section buffer positions for dasm_put(). +-- CHECK: Keep this in sync with the C code! +local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines. + +-- Action name -> action number. +local map_action = {} +for n,name in ipairs(action_names) do + map_action[name] = n-1 +end + +-- Action list buffer. +local actlist = {} + +-- Argument list for next dasm_put(). Start with offset 0 into action list. +local actargs = { 0 } + +-- Current number of section buffer positions for dasm_put(). +local secpos = 1 + +------------------------------------------------------------------------------ + +-- Dump action names and numbers. +local function dumpactions(out) + out:write("DynASM encoding engine action codes:\n") + for n,name in ipairs(action_names) do + local num = map_action[name] + out:write(format(" %-10s %02X %d\n", name, num, num)) + end + out:write("\n") +end + +-- Write action list buffer as a huge static C array. +local function writeactions(out, name) + local nn = #actlist + if nn == 0 then nn = 1; actlist[0] = map_action.STOP end + out:write("static const unsigned int ", name, "[", nn, "] = {\n") + for i = 1,nn-1 do + assert(out:write("0x", tohex(actlist[i]), ",\n")) + end + assert(out:write("0x", tohex(actlist[nn]), "\n};\n\n")) +end + +------------------------------------------------------------------------------ + +-- Add word to action list. +local function wputxw(n) + assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range") + actlist[#actlist+1] = n +end + +-- Add action to list with optional arg. Advance buffer pos, too. +local function waction(action, val, a, num) + local w = assert(map_action[action], "bad action name `"..action.."'") + wputxw(w * 0x10000 + (val or 0)) + if a then actargs[#actargs+1] = a end + if a or num then secpos = secpos + (num or 1) end +end + +-- Flush action list (intervening C code or buffer pos overflow). +local function wflush(term) + if #actlist == actargs[1] then return end -- Nothing to flush. + if not term then waction("STOP") end -- Terminate action list. + wline(format("dasm_put(Dst, %s);", concat(actargs, ", ")), true) + actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put(). + secpos = 1 -- The actionlist offset occupies a buffer position, too. +end + +-- Put escaped word. +local function wputw(n) + if n <= 0x000fffff then waction("ESC") end + wputxw(n) +end + +-- Reserve position for word. +local function wpos() + local pos = #actlist+1 + actlist[pos] = "" + return pos +end + +-- Store word to reserved position. +local function wputpos(pos, n) + assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range") + if n <= 0x000fffff then + insert(actlist, pos+1, n) + n = map_action.ESC * 0x10000 + end + actlist[pos] = n +end + +------------------------------------------------------------------------------ + +-- Global label name -> global label number. With auto assignment on 1st use. +local next_global = 20 +local map_global = setmetatable({}, { __index = function(t, name) + if not match(name, "^[%a_][%w_]*$") then werror("bad global label") end + local n = next_global + if n > 2047 then werror("too many global labels") end + next_global = n + 1 + t[name] = n + return n +end}) + +-- Dump global labels. +local function dumpglobals(out, lvl) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("Global labels:\n") + for i=20,next_global-1 do + out:write(format(" %s\n", t[i])) + end + out:write("\n") +end + +-- Write global label enum. +local function writeglobals(out, prefix) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("enum {\n") + for i=20,next_global-1 do + out:write(" ", prefix, t[i], ",\n") + end + out:write(" ", prefix, "_MAX\n};\n") +end + +-- Write global label names. +local function writeglobalnames(out, name) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("static const char *const ", name, "[] = {\n") + for i=20,next_global-1 do + out:write(" \"", t[i], "\",\n") + end + out:write(" (const char *)0\n};\n") +end + +------------------------------------------------------------------------------ + +-- Extern label name -> extern label number. With auto assignment on 1st use. +local next_extern = 0 +local map_extern_ = {} +local map_extern = setmetatable({}, { __index = function(t, name) + -- No restrictions on the name for now. + local n = next_extern + if n > 2047 then werror("too many extern labels") end + next_extern = n + 1 + t[name] = n + map_extern_[n] = name + return n +end}) + +-- Dump extern labels. +local function dumpexterns(out, lvl) + out:write("Extern labels:\n") + for i=0,next_extern-1 do + out:write(format(" %s\n", map_extern_[i])) + end + out:write("\n") +end + +-- Write extern label names. +local function writeexternnames(out, name) + out:write("static const char *const ", name, "[] = {\n") + for i=0,next_extern-1 do + out:write(" \"", map_extern_[i], "\",\n") + end + out:write(" (const char *)0\n};\n") +end + +------------------------------------------------------------------------------ + +-- Arch-specific maps. + +-- Ext. register name -> int. name. +local map_archdef = { xzr = "@x31", wzr = "@w31", lr = "x30", } + +-- Int. register name -> ext. name. +local map_reg_rev = { ["@x31"] = "xzr", ["@w31"] = "wzr", x30 = "lr", } + +local map_type = {} -- Type name -> { ctype, reg } +local ctypenum = 0 -- Type number (for Dt... macros). + +-- Reverse defines for registers. +function _M.revdef(s) + return map_reg_rev[s] or s +end + +local map_shift = { lsl = 0, lsr = 1, asr = 2, } + +local map_extend = { + uxtb = 0, uxth = 1, uxtw = 2, uxtx = 3, + sxtb = 4, sxth = 5, sxtw = 6, sxtx = 7, +} + +local map_cond = { + eq = 0, ne = 1, cs = 2, cc = 3, mi = 4, pl = 5, vs = 6, vc = 7, + hi = 8, ls = 9, ge = 10, lt = 11, gt = 12, le = 13, al = 14, + hs = 2, lo = 3, +} + +------------------------------------------------------------------------------ + +local parse_reg_type + +local function parse_reg(expr, shift, no_vreg) + if not expr then werror("expected register name") end + local tname, ovreg = match(expr, "^([%w_]+):(@?%l%d+)$") + if not tname then + tname, ovreg = match(expr, "^([%w_]+):(R[xwqdshb]%b())$") + end + local tp = map_type[tname or expr] + if tp then + local reg = ovreg or tp.reg + if not reg then + werror("type `"..(tname or expr).."' needs a register override") + end + expr = reg + end + local ok31, rt, r = match(expr, "^(@?)([xwqdshb])([123]?[0-9])$") + if r then + r = tonumber(r) + if r <= 30 or (r == 31 and ok31 ~= "" or (rt ~= "w" and rt ~= "x")) then + if not parse_reg_type then + parse_reg_type = rt + elseif parse_reg_type ~= rt then + werror("register size mismatch") + end + return shl(r, shift), tp + end + end + local vrt, vreg = match(expr, "^R([xwqdshb])(%b())$") + if vreg then + if not parse_reg_type then + parse_reg_type = vrt + elseif parse_reg_type ~= vrt then + werror("register size mismatch") + end + if not no_vreg then waction("VREG", shift, vreg) end + return 0 + end + werror("bad register name `"..expr.."'") +end + +local function parse_reg_base(expr) + if expr == "sp" then return 0x3e0 end + local base, tp = parse_reg(expr, 5) + if parse_reg_type ~= "x" then werror("bad register type") end + parse_reg_type = false + return base, tp +end + +local parse_ctx = {} + +local loadenv = setfenv and function(s) + local code = loadstring(s, "") + if code then setfenv(code, parse_ctx) end + return code +end or function(s) + return load(s, "", nil, parse_ctx) +end + +-- Try to parse simple arithmetic, too, since some basic ops are aliases. +local function parse_number(n) + local x = tonumber(n) + if x then return x end + local code = loadenv("return "..n) + if code then + local ok, y = pcall(code) + if ok and type(y) == "number" then return y end + end + return nil +end + +local function parse_imm(imm, bits, shift, scale, signed) + imm = match(imm, "^#(.*)$") + if not imm then werror("expected immediate operand") end + local n = parse_number(imm) + if n then + local m = sar(n, scale) + if shl(m, scale) == n then + if signed then + local s = sar(m, bits-1) + if s == 0 then return shl(m, shift) + elseif s == -1 then return shl(m + shl(1, bits), shift) end + else + if sar(m, bits) == 0 then return shl(m, shift) end + end + end + werror("out of range immediate `"..imm.."'") + else + waction("IMM", (signed and 32768 or 0)+scale*1024+bits*32+shift, imm) + return 0 + end +end + +local function parse_imm12(imm) + imm = match(imm, "^#(.*)$") + if not imm then werror("expected immediate operand") end + local n = parse_number(imm) + if n then + if shr(n, 12) == 0 then + return shl(n, 10) + elseif band(n, 0xff000fff) == 0 then + return shr(n, 2) + 0x00400000 + end + werror("out of range immediate `"..imm.."'") + else + waction("IMM12", 0, imm) + return 0 + end +end + +local function parse_imm13(imm) + imm = match(imm, "^#(.*)$") + if not imm then werror("expected immediate operand") end + local n = parse_number(imm) + local r64 = parse_reg_type == "x" + if n and n % 1 == 0 and n >= 0 and n <= 0xffffffff then + local inv = false + if band(n, 1) == 1 then n = bit.bnot(n); inv = true end + local t = {} + for i=1,32 do t[i] = band(n, 1); n = shr(n, 1) end + local b = table.concat(t) + b = b..(r64 and (inv and "1" or "0"):rep(32) or b) + local p0, p1, p0a, p1a = b:match("^(0+)(1+)(0*)(1*)") + if p0 then + local w = p1a == "" and (r64 and 64 or 32) or #p1+#p0a + if band(w, w-1) == 0 and b == b:sub(1, w):rep(64/w) then + local s = band(-2*w, 0x3f) - 1 + if w == 64 then s = s + 0x1000 end + if inv then + return shl(w-#p1-#p0, 16) + shl(s+w-#p1, 10) + else + return shl(w-#p0, 16) + shl(s+#p1, 10) + end + end + end + werror("out of range immediate `"..imm.."'") + elseif r64 then + waction("IMM13X", 0, format("(unsigned int)(%s)", imm)) + actargs[#actargs+1] = format("(unsigned int)((unsigned long long)(%s)>>32)", imm) + return 0 + else + waction("IMM13W", 0, imm) + return 0 + end +end + +local function parse_imm6(imm) + imm = match(imm, "^#(.*)$") + if not imm then werror("expected immediate operand") end + local n = parse_number(imm) + if n then + if n >= 0 and n <= 63 then + return shl(band(n, 0x1f), 19) + (n >= 32 and 0x80000000 or 0) + end + werror("out of range immediate `"..imm.."'") + else + waction("IMM6", 0, imm) + return 0 + end +end + +local function parse_imm_load(imm, scale) + local n = parse_number(imm) + if n then + local m = sar(n, scale) + if shl(m, scale) == n and m >= 0 and m < 0x1000 then + return shl(m, 10) + 0x01000000 -- Scaled, unsigned 12 bit offset. + elseif n >= -256 and n < 256 then + return shl(band(n, 511), 12) -- Unscaled, signed 9 bit offset. + end + werror("out of range immediate `"..imm.."'") + else + waction("IMML", scale, imm) + return 0 + end +end + +local function parse_fpimm(imm) + imm = match(imm, "^#(.*)$") + if not imm then werror("expected immediate operand") end + local n = parse_number(imm) + if n then + local m, e = math.frexp(n) + local s, e2 = 0, band(e-2, 7) + if m < 0 then m = -m; s = 0x00100000 end + m = m*32-16 + if m % 1 == 0 and m >= 0 and m <= 15 and sar(shl(e2, 29), 29)+2 == e then + return s + shl(e2, 17) + shl(m, 13) + end + werror("out of range immediate `"..imm.."'") + else + werror("NYI fpimm action") + end +end + +local function parse_shift(expr) + local s, s2 = match(expr, "^(%S+)%s*(.*)$") + s = map_shift[s] + if not s then werror("expected shift operand") end + return parse_imm(s2, 6, 10, 0, false) + shl(s, 22) +end + +local function parse_lslx16(expr) + local n = match(expr, "^lsl%s*#(%d+)$") + n = tonumber(n) + if not n then werror("expected shift operand") end + if band(n, parse_reg_type == "x" and 0xffffffcf or 0xffffffef) ~= 0 then + werror("bad shift amount") + end + return shl(n, 17) +end + +local function parse_extend(expr) + local s, s2 = match(expr, "^(%S+)%s*(.*)$") + if s == "lsl" then + s = parse_reg_type == "x" and 3 or 2 + else + s = map_extend[s] + end + if not s then werror("expected extend operand") end + return (s2 == "" and 0 or parse_imm(s2, 3, 10, 0, false)) + shl(s, 13) +end + +local function parse_cond(expr, inv) + local c = map_cond[expr] + if not c then werror("expected condition operand") end + return shl(bit.bxor(c, inv), 12) +end + +local function parse_load(params, nparams, n, op) + if params[n+2] then werror("too many operands") end + local scale = shr(op, 30) + local pn, p2 = params[n], params[n+1] + local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$") + if not p1 then + if not p2 then + local reg, tailr = match(pn, "^([%w_:]+)%s*(.*)$") + if reg and tailr ~= "" then + local base, tp = parse_reg_base(reg) + if tp then + waction("IMML", scale, format(tp.ctypefmt, tailr)) + return op + base + end + end + end + werror("expected address operand") + end + if p2 then + if wb == "!" then werror("bad use of '!'") end + op = op + parse_reg_base(p1) + parse_imm(p2, 9, 12, 0, true) + 0x400 + elseif wb == "!" then + local p1a, p2a = match(p1, "^([^,%s]*)%s*,%s*(.*)$") + if not p1a then werror("bad use of '!'") end + op = op + parse_reg_base(p1a) + parse_imm(p2a, 9, 12, 0, true) + 0xc00 + else + local p1a, p2a = match(p1, "^([^,%s]*)%s*(.*)$") + op = op + parse_reg_base(p1a) + if p2a ~= "" then + local imm = match(p2a, "^,%s*#(.*)$") + if imm then + op = op + parse_imm_load(imm, scale) + else + local p2b, p3b, p3s = match(p2a, "^,%s*([^,%s]*)%s*,?%s*(%S*)%s*(.*)$") + op = op + parse_reg(p2b, 16) + 0x00200800 + if parse_reg_type ~= "x" and parse_reg_type ~= "w" then + werror("bad index register type") + end + if p3b == "" then + if parse_reg_type ~= "x" then werror("bad index register type") end + op = op + 0x6000 + else + if p3s == "" or p3s == "#0" then + elseif p3s == "#"..scale then + op = op + 0x1000 + else + werror("bad scale") + end + if parse_reg_type == "x" then + if p3b == "lsl" and p3s ~= "" then op = op + 0x6000 + elseif p3b == "sxtx" then op = op + 0xe000 + else + werror("bad extend/shift specifier") + end + else + if p3b == "uxtw" then op = op + 0x4000 + elseif p3b == "sxtw" then op = op + 0xc000 + else + werror("bad extend/shift specifier") + end + end + end + end + else + if wb == "!" then werror("bad use of '!'") end + op = op + 0x01000000 + end + end + return op +end + +local function parse_load_pair(params, nparams, n, op) + if params[n+2] then werror("too many operands") end + local pn, p2 = params[n], params[n+1] + local scale = shr(op, 30) == 0 and 2 or 3 + local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$") + if not p1 then + if not p2 then + local reg, tailr = match(pn, "^([%w_:]+)%s*(.*)$") + if reg and tailr ~= "" then + local base, tp = parse_reg_base(reg) + if tp then + waction("IMM", 32768+7*32+15+scale*1024, format(tp.ctypefmt, tailr)) + return op + base + 0x01000000 + end + end + end + werror("expected address operand") + end + if p2 then + if wb == "!" then werror("bad use of '!'") end + op = op + 0x00800000 + else + local p1a, p2a = match(p1, "^([^,%s]*)%s*,%s*(.*)$") + if p1a then p1, p2 = p1a, p2a else p2 = "#0" end + op = op + (wb == "!" and 0x01800000 or 0x01000000) + end + return op + parse_reg_base(p1) + parse_imm(p2, 7, 15, scale, true) +end + +local function parse_label(label, def) + local prefix = label:sub(1, 2) + -- =>label (pc label reference) + if prefix == "=>" then + return "PC", 0, label:sub(3) + end + -- ->name (global label reference) + if prefix == "->" then + return "LG", map_global[label:sub(3)] + end + if def then + -- [1-9] (local label definition) + if match(label, "^[1-9]$") then + return "LG", 10+tonumber(label) + end + else + -- [<>][1-9] (local label reference) + local dir, lnum = match(label, "^([<>])([1-9])$") + if dir then -- Fwd: 1-9, Bkwd: 11-19. + return "LG", lnum + (dir == ">" and 0 or 10) + end + -- extern label (extern label reference) + local extname = match(label, "^extern%s+(%S+)$") + if extname then + return "EXT", map_extern[extname] + end + -- &expr (pointer) + if label:sub(1, 1) == "&" then + return "A", 0, format("(ptrdiff_t)(%s)", label:sub(2)) + end + end +end + +local function branch_type(op) + if band(op, 0x7c000000) == 0x14000000 then return 0 -- B, BL + elseif shr(op, 24) == 0x54 or band(op, 0x7e000000) == 0x34000000 or + band(op, 0x3b000000) == 0x18000000 then + return 0x800 -- B.cond, CBZ, CBNZ, LDR* literal + elseif band(op, 0x7e000000) == 0x36000000 then return 0x1000 -- TBZ, TBNZ + elseif band(op, 0x9f000000) == 0x10000000 then return 0x2000 -- ADR + elseif band(op, 0x9f000000) == band(0x90000000) then return 0x3000 -- ADRP + else + assert(false, "unknown branch type") + end +end + +------------------------------------------------------------------------------ + +local map_op, op_template + +local function op_alias(opname, f) + return function(params, nparams) + if not params then return "-> "..opname:sub(1, -3) end + f(params, nparams) + op_template(params, map_op[opname], nparams) + end +end + +local function alias_bfx(p) + p[4] = "#("..p[3]:sub(2)..")+("..p[4]:sub(2)..")-1" +end + +local function alias_bfiz(p) + parse_reg(p[1], 0, true) + if parse_reg_type == "w" then + p[3] = "#(32-("..p[3]:sub(2).."))%32" + p[4] = "#("..p[4]:sub(2)..")-1" + else + p[3] = "#(64-("..p[3]:sub(2).."))%64" + p[4] = "#("..p[4]:sub(2)..")-1" + end +end + +local alias_lslimm = op_alias("ubfm_4", function(p) + parse_reg(p[1], 0, true) + local sh = p[3]:sub(2) + if parse_reg_type == "w" then + p[3] = "#(32-("..sh.."))%32" + p[4] = "#31-("..sh..")" + else + p[3] = "#(64-("..sh.."))%64" + p[4] = "#63-("..sh..")" + end +end) + +-- Template strings for ARM instructions. +map_op = { + -- Basic data processing instructions. + add_3 = "0b000000DNMg|11000000pDpNIg|8b206000pDpNMx", + add_4 = "0b000000DNMSg|0b200000DNMXg|8b200000pDpNMXx|8b200000pDpNxMwX", + adds_3 = "2b000000DNMg|31000000DpNIg|ab206000DpNMx", + adds_4 = "2b000000DNMSg|2b200000DNMXg|ab200000DpNMXx|ab200000DpNxMwX", + cmn_2 = "2b00001fNMg|3100001fpNIg|ab20601fpNMx", + cmn_3 = "2b00001fNMSg|2b20001fNMXg|ab20001fpNMXx|ab20001fpNxMwX", + + sub_3 = "4b000000DNMg|51000000pDpNIg|cb206000pDpNMx", + sub_4 = "4b000000DNMSg|4b200000DNMXg|cb200000pDpNMXx|cb200000pDpNxMwX", + subs_3 = "6b000000DNMg|71000000DpNIg|eb206000DpNMx", + subs_4 = "6b000000DNMSg|6b200000DNMXg|eb200000DpNMXx|eb200000DpNxMwX", + cmp_2 = "6b00001fNMg|7100001fpNIg|eb20601fpNMx", + cmp_3 = "6b00001fNMSg|6b20001fNMXg|eb20001fpNMXx|eb20001fpNxMwX", + + neg_2 = "4b0003e0DMg", + neg_3 = "4b0003e0DMSg", + negs_2 = "6b0003e0DMg", + negs_3 = "6b0003e0DMSg", + + adc_3 = "1a000000DNMg", + adcs_3 = "3a000000DNMg", + sbc_3 = "5a000000DNMg", + sbcs_3 = "7a000000DNMg", + ngc_2 = "5a0003e0DMg", + ngcs_2 = "7a0003e0DMg", + + and_3 = "0a000000DNMg|12000000pDNig", + and_4 = "0a000000DNMSg", + orr_3 = "2a000000DNMg|32000000pDNig", + orr_4 = "2a000000DNMSg", + eor_3 = "4a000000DNMg|52000000pDNig", + eor_4 = "4a000000DNMSg", + ands_3 = "6a000000DNMg|72000000DNig", + ands_4 = "6a000000DNMSg", + tst_2 = "6a00001fNMg|7200001fNig", + tst_3 = "6a00001fNMSg", + + bic_3 = "0a200000DNMg", + bic_4 = "0a200000DNMSg", + orn_3 = "2a200000DNMg", + orn_4 = "2a200000DNMSg", + eon_3 = "4a200000DNMg", + eon_4 = "4a200000DNMSg", + bics_3 = "6a200000DNMg", + bics_4 = "6a200000DNMSg", + + movn_2 = "12800000DWg", + movn_3 = "12800000DWRg", + movz_2 = "52800000DWg", + movz_3 = "52800000DWRg", + movk_2 = "72800000DWg", + movk_3 = "72800000DWRg", + + -- TODO: this doesn't cover all valid immediates for mov reg, #imm. + mov_2 = "2a0003e0DMg|52800000DW|320003e0pDig|11000000pDpNg", + mov_3 = "2a0003e0DMSg", + mvn_2 = "2a2003e0DMg", + mvn_3 = "2a2003e0DMSg", + + adr_2 = "10000000DBx", + adrp_2 = "90000000DBx", + + csel_4 = "1a800000DNMCg", + csinc_4 = "1a800400DNMCg", + csinv_4 = "5a800000DNMCg", + csneg_4 = "5a800400DNMCg", + cset_2 = "1a9f07e0Dcg", + csetm_2 = "5a9f03e0Dcg", + cinc_3 = "1a800400DNmcg", + cinv_3 = "5a800000DNmcg", + cneg_3 = "5a800400DNmcg", + + ccmn_4 = "3a400000NMVCg|3a400800N5VCg", + ccmp_4 = "7a400000NMVCg|7a400800N5VCg", + + madd_4 = "1b000000DNMAg", + msub_4 = "1b008000DNMAg", + mul_3 = "1b007c00DNMg", + mneg_3 = "1b00fc00DNMg", + + smaddl_4 = "9b200000DxNMwAx", + smsubl_4 = "9b208000DxNMwAx", + smull_3 = "9b207c00DxNMw", + smnegl_3 = "9b20fc00DxNMw", + smulh_3 = "9b407c00DNMx", + umaddl_4 = "9ba00000DxNMwAx", + umsubl_4 = "9ba08000DxNMwAx", + umull_3 = "9ba07c00DxNMw", + umnegl_3 = "9ba0fc00DxNMw", + umulh_3 = "9bc07c00DNMx", + + udiv_3 = "1ac00800DNMg", + sdiv_3 = "1ac00c00DNMg", + + -- Bit operations. + sbfm_4 = "13000000DN12w|93400000DN12x", + bfm_4 = "33000000DN12w|b3400000DN12x", + ubfm_4 = "53000000DN12w|d3400000DN12x", + extr_4 = "13800000DNM2w|93c00000DNM2x", + + sxtb_2 = "13001c00DNw|93401c00DNx", + sxth_2 = "13003c00DNw|93403c00DNx", + sxtw_2 = "93407c00DxNw", + uxtb_2 = "53001c00DNw", + uxth_2 = "53003c00DNw", + + sbfx_4 = op_alias("sbfm_4", alias_bfx), + bfxil_4 = op_alias("bfm_4", alias_bfx), + ubfx_4 = op_alias("ubfm_4", alias_bfx), + sbfiz_4 = op_alias("sbfm_4", alias_bfiz), + bfi_4 = op_alias("bfm_4", alias_bfiz), + ubfiz_4 = op_alias("ubfm_4", alias_bfiz), + + lsl_3 = function(params, nparams) + if params and params[3]:byte() == 35 then + return alias_lslimm(params, nparams) + else + return op_template(params, "1ac02000DNMg", nparams) + end + end, + lsr_3 = "1ac02400DNMg|53007c00DN1w|d340fc00DN1x", + asr_3 = "1ac02800DNMg|13007c00DN1w|9340fc00DN1x", + ror_3 = "1ac02c00DNMg|13800000DNm2w|93c00000DNm2x", + + clz_2 = "5ac01000DNg", + cls_2 = "5ac01400DNg", + rbit_2 = "5ac00000DNg", + rev_2 = "5ac00800DNw|dac00c00DNx", + rev16_2 = "5ac00400DNg", + rev32_2 = "dac00800DNx", + + -- Loads and stores. + ["strb_*"] = "38000000DwL", + ["ldrb_*"] = "38400000DwL", + ["ldrsb_*"] = "38c00000DwL|38800000DxL", + ["strh_*"] = "78000000DwL", + ["ldrh_*"] = "78400000DwL", + ["ldrsh_*"] = "78c00000DwL|78800000DxL", + ["str_*"] = "b8000000DwL|f8000000DxL|bc000000DsL|fc000000DdL", + ["ldr_*"] = "18000000DwB|58000000DxB|1c000000DsB|5c000000DdB|b8400000DwL|f8400000DxL|bc400000DsL|fc400000DdL", + ["ldrsw_*"] = "98000000DxB|b8800000DxL", + -- NOTE: ldur etc. are handled by ldr et al. + + ["stp_*"] = "28000000DAwP|a8000000DAxP|2c000000DAsP|6c000000DAdP", + ["ldp_*"] = "28400000DAwP|a8400000DAxP|2c400000DAsP|6c400000DAdP", + ["ldpsw_*"] = "68400000DAxP", + + -- Branches. + b_1 = "14000000B", + bl_1 = "94000000B", + blr_1 = "d63f0000Nx", + br_1 = "d61f0000Nx", + ret_0 = "d65f03c0", + ret_1 = "d65f0000Nx", + -- b.cond is added below. + cbz_2 = "34000000DBg", + cbnz_2 = "35000000DBg", + tbz_3 = "36000000DTBw|36000000DTBx", + tbnz_3 = "37000000DTBw|37000000DTBx", + + -- Miscellaneous instructions. + -- TODO: hlt, hvc, smc, svc, eret, dcps[123], drps, mrs, msr + -- TODO: sys, sysl, ic, dc, at, tlbi + -- TODO: hint, yield, wfe, wfi, sev, sevl + -- TODO: clrex, dsb, dmb, isb + nop_0 = "d503201f", + brk_0 = "d4200000", + brk_1 = "d4200000W", + + -- Floating point instructions. + fmov_2 = "1e204000DNf|1e260000DwNs|1e270000DsNw|9e660000DxNd|9e670000DdNx|1e201000DFf", + fabs_2 = "1e20c000DNf", + fneg_2 = "1e214000DNf", + fsqrt_2 = "1e21c000DNf", + + fcvt_2 = "1e22c000DdNs|1e624000DsNd", + + -- TODO: half-precision and fixed-point conversions. + fcvtas_2 = "1e240000DwNs|9e240000DxNs|1e640000DwNd|9e640000DxNd", + fcvtau_2 = "1e250000DwNs|9e250000DxNs|1e650000DwNd|9e650000DxNd", + fcvtms_2 = "1e300000DwNs|9e300000DxNs|1e700000DwNd|9e700000DxNd", + fcvtmu_2 = "1e310000DwNs|9e310000DxNs|1e710000DwNd|9e710000DxNd", + fcvtns_2 = "1e200000DwNs|9e200000DxNs|1e600000DwNd|9e600000DxNd", + fcvtnu_2 = "1e210000DwNs|9e210000DxNs|1e610000DwNd|9e610000DxNd", + fcvtps_2 = "1e280000DwNs|9e280000DxNs|1e680000DwNd|9e680000DxNd", + fcvtpu_2 = "1e290000DwNs|9e290000DxNs|1e690000DwNd|9e690000DxNd", + fcvtzs_2 = "1e380000DwNs|9e380000DxNs|1e780000DwNd|9e780000DxNd", + fcvtzu_2 = "1e390000DwNs|9e390000DxNs|1e790000DwNd|9e790000DxNd", + + scvtf_2 = "1e220000DsNw|9e220000DsNx|1e620000DdNw|9e620000DdNx", + ucvtf_2 = "1e230000DsNw|9e230000DsNx|1e630000DdNw|9e630000DdNx", + + frintn_2 = "1e244000DNf", + frintp_2 = "1e24c000DNf", + frintm_2 = "1e254000DNf", + frintz_2 = "1e25c000DNf", + frinta_2 = "1e264000DNf", + frintx_2 = "1e274000DNf", + frinti_2 = "1e27c000DNf", + + fadd_3 = "1e202800DNMf", + fsub_3 = "1e203800DNMf", + fmul_3 = "1e200800DNMf", + fnmul_3 = "1e208800DNMf", + fdiv_3 = "1e201800DNMf", + + fmadd_4 = "1f000000DNMAf", + fmsub_4 = "1f008000DNMAf", + fnmadd_4 = "1f200000DNMAf", + fnmsub_4 = "1f208000DNMAf", + + fmax_3 = "1e204800DNMf", + fmaxnm_3 = "1e206800DNMf", + fmin_3 = "1e205800DNMf", + fminnm_3 = "1e207800DNMf", + + fcmp_2 = "1e202000NMf|1e202008NZf", + fcmpe_2 = "1e202010NMf|1e202018NZf", + + fccmp_4 = "1e200400NMVCf", + fccmpe_4 = "1e200410NMVCf", + + fcsel_4 = "1e200c00DNMCf", + + -- TODO: crc32*, aes*, sha*, pmull + -- TODO: SIMD instructions. +} + +for cond,c in pairs(map_cond) do + map_op["b"..cond.."_1"] = tohex(0x54000000+c).."B" +end + +------------------------------------------------------------------------------ + +-- Handle opcodes defined with template strings. +local function parse_template(params, template, nparams, pos) + local op = tonumber(template:sub(1, 8), 16) + local n = 1 + local rtt = {} + + parse_reg_type = false + + -- Process each character. + for p in gmatch(template:sub(9), ".") do + local q = params[n] + if p == "D" then + op = op + parse_reg(q, 0); n = n + 1 + elseif p == "N" then + op = op + parse_reg(q, 5); n = n + 1 + elseif p == "M" then + op = op + parse_reg(q, 16); n = n + 1 + elseif p == "A" then + op = op + parse_reg(q, 10); n = n + 1 + elseif p == "m" then + op = op + parse_reg(params[n-1], 16) + + elseif p == "p" then + if q == "sp" then params[n] = "@x31" end + elseif p == "g" then + if parse_reg_type == "x" then + op = op + 0x80000000 + elseif parse_reg_type ~= "w" then + werror("bad register type") + end + parse_reg_type = false + elseif p == "f" then + if parse_reg_type == "d" then + op = op + 0x00400000 + elseif parse_reg_type ~= "s" then + werror("bad register type") + end + parse_reg_type = false + elseif p == "x" or p == "w" or p == "d" or p == "s" then + if parse_reg_type ~= p then + werror("register size mismatch") + end + parse_reg_type = false + + elseif p == "L" then + op = parse_load(params, nparams, n, op) + elseif p == "P" then + op = parse_load_pair(params, nparams, n, op) + + elseif p == "B" then + local mode, v, s = parse_label(q, false); n = n + 1 + if not mode then werror("bad label `"..q.."'") end + local m = branch_type(op) + if mode == "A" then + waction("REL_"..mode, v+m, format("(unsigned int)(%s)", s)) + actargs[#actargs+1] = format("(unsigned int)((%s)>>32)", s) + else + waction("REL_"..mode, v+m, s, 1) + end + + elseif p == "I" then + op = op + parse_imm12(q); n = n + 1 + elseif p == "i" then + op = op + parse_imm13(q); n = n + 1 + elseif p == "W" then + op = op + parse_imm(q, 16, 5, 0, false); n = n + 1 + elseif p == "T" then + op = op + parse_imm6(q); n = n + 1 + elseif p == "1" then + op = op + parse_imm(q, 6, 16, 0, false); n = n + 1 + elseif p == "2" then + op = op + parse_imm(q, 6, 10, 0, false); n = n + 1 + elseif p == "5" then + op = op + parse_imm(q, 5, 16, 0, false); n = n + 1 + elseif p == "V" then + op = op + parse_imm(q, 4, 0, 0, false); n = n + 1 + elseif p == "F" then + op = op + parse_fpimm(q); n = n + 1 + elseif p == "Z" then + if q ~= "#0" and q ~= "#0.0" then werror("expected zero immediate") end + n = n + 1 + + elseif p == "S" then + op = op + parse_shift(q); n = n + 1 + elseif p == "X" then + op = op + parse_extend(q); n = n + 1 + elseif p == "R" then + op = op + parse_lslx16(q); n = n + 1 + elseif p == "C" then + op = op + parse_cond(q, 0); n = n + 1 + elseif p == "c" then + op = op + parse_cond(q, 1); n = n + 1 + + else + assert(false) + end + end + wputpos(pos, op) +end + +function op_template(params, template, nparams) + if not params then return template:gsub("%x%x%x%x%x%x%x%x", "") end + + -- Limit number of section buffer positions used by a single dasm_put(). + -- A single opcode needs a maximum of 4 positions. + if secpos+4 > maxsecpos then wflush() end + local pos = wpos() + local lpos, apos, spos = #actlist, #actargs, secpos + + local ok, err + for t in gmatch(template, "[^|]+") do + ok, err = pcall(parse_template, params, t, nparams, pos) + if ok then return end + secpos = spos + actlist[lpos+1] = nil + actlist[lpos+2] = nil + actlist[lpos+3] = nil + actlist[lpos+4] = nil + actargs[apos+1] = nil + actargs[apos+2] = nil + actargs[apos+3] = nil + actargs[apos+4] = nil + end + error(err, 0) +end + +map_op[".template__"] = op_template + +------------------------------------------------------------------------------ + +-- Pseudo-opcode to mark the position where the action list is to be emitted. +map_op[".actionlist_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeactions(out, name) end) +end + +-- Pseudo-opcode to mark the position where the global enum is to be emitted. +map_op[".globals_1"] = function(params) + if not params then return "prefix" end + local prefix = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeglobals(out, prefix) end) +end + +-- Pseudo-opcode to mark the position where the global names are to be emitted. +map_op[".globalnames_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeglobalnames(out, name) end) +end + +-- Pseudo-opcode to mark the position where the extern names are to be emitted. +map_op[".externnames_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeexternnames(out, name) end) +end + +------------------------------------------------------------------------------ + +-- Label pseudo-opcode (converted from trailing colon form). +map_op[".label_1"] = function(params) + if not params then return "[1-9] | ->global | =>pcexpr" end + if secpos+1 > maxsecpos then wflush() end + local mode, n, s = parse_label(params[1], true) + if not mode or mode == "EXT" then werror("bad label definition") end + waction("LABEL_"..mode, n, s, 1) +end + +------------------------------------------------------------------------------ + +-- Pseudo-opcodes for data storage. +local function op_data(params) + if not params then return "imm..." end + local sz = params.op == ".long" and 4 or 8 + for _,p in ipairs(params) do + local imm = parse_number(p) + if imm then + local n = tobit(imm) + if n == imm or (n < 0 and n + 2^32 == imm) then + wputw(n < 0 and n + 2^32 or n) + if sz == 8 then + wputw(imm < 0 and 0xffffffff or 0) + end + elseif sz == 4 then + werror("bad immediate `"..p.."'") + else + imm = nil + end + end + if not imm then + local mode, v, s = parse_label(p, false) + if sz == 4 then + if mode then werror("label does not fit into .long") end + waction("IMMV", 0, p) + elseif mode and mode ~= "A" then + waction("REL_"..mode, v+0x8000, s, 1) + else + if mode == "A" then p = s end + waction("IMMV", 0, format("(unsigned int)(%s)", p)) + waction("IMMV", 0, format("(unsigned int)((unsigned long long)(%s)>>32)", p)) + end + end + if secpos+2 > maxsecpos then wflush() end + end +end +map_op[".long_*"] = op_data +map_op[".quad_*"] = op_data +map_op[".addr_*"] = op_data + +-- Alignment pseudo-opcode. +map_op[".align_1"] = function(params) + if not params then return "numpow2" end + if secpos+1 > maxsecpos then wflush() end + local align = tonumber(params[1]) + if align then + local x = align + -- Must be a power of 2 in the range (2 ... 256). + for i=1,8 do + x = x / 2 + if x == 1 then + waction("ALIGN", align-1, nil, 1) -- Action byte is 2**n-1. + return + end + end + end + werror("bad alignment") +end + +------------------------------------------------------------------------------ + +-- Pseudo-opcode for (primitive) type definitions (map to C types). +map_op[".type_3"] = function(params, nparams) + if not params then + return nparams == 2 and "name, ctype" or "name, ctype, reg" + end + local name, ctype, reg = params[1], params[2], params[3] + if not match(name, "^[%a_][%w_]*$") then + werror("bad type name `"..name.."'") + end + local tp = map_type[name] + if tp then + werror("duplicate type `"..name.."'") + end + -- Add #type to defines. A bit unclean to put it in map_archdef. + map_archdef["#"..name] = "sizeof("..ctype..")" + -- Add new type and emit shortcut define. + local num = ctypenum + 1 + map_type[name] = { + ctype = ctype, + ctypefmt = format("Dt%X(%%s)", num), + reg = reg, + } + wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype)) + ctypenum = num +end +map_op[".type_2"] = map_op[".type_3"] + +-- Dump type definitions. +local function dumptypes(out, lvl) + local t = {} + for name in pairs(map_type) do t[#t+1] = name end + sort(t) + out:write("Type definitions:\n") + for _,name in ipairs(t) do + local tp = map_type[name] + local reg = tp.reg or "" + out:write(format(" %-20s %-20s %s\n", name, tp.ctype, reg)) + end + out:write("\n") +end + +------------------------------------------------------------------------------ + +-- Set the current section. +function _M.section(num) + waction("SECTION", num) + wflush(true) -- SECTION is a terminal action. +end + +------------------------------------------------------------------------------ + +-- Dump architecture description. +function _M.dumparch(out) + out:write(format("DynASM %s version %s, released %s\n\n", + _info.arch, _info.version, _info.release)) + dumpactions(out) +end + +-- Dump all user defined elements. +function _M.dumpdef(out, lvl) + dumptypes(out, lvl) + dumpglobals(out, lvl) + dumpexterns(out, lvl) +end + +------------------------------------------------------------------------------ + +-- Pass callbacks from/to the DynASM core. +function _M.passcb(wl, we, wf, ww) + wline, werror, wfatal, wwarn = wl, we, wf, ww + return wflush +end + +-- Setup the arch-specific module. +function _M.setup(arch, opt) + g_arch, g_opt = arch, opt +end + +-- Merge the core maps and the arch-specific maps. +function _M.mergemaps(map_coreop, map_def) + setmetatable(map_op, { __index = map_coreop }) + setmetatable(map_def, { __index = map_archdef }) + return map_op, map_def +end + +return _M + +------------------------------------------------------------------------------ + diff --git a/ext/opcache/jit/ir/dynasm/dasm_mips.h b/ext/opcache/jit/ir/dynasm/dasm_mips.h new file mode 100644 index 0000000000000..b99b56b0e9ae6 --- /dev/null +++ b/ext/opcache/jit/ir/dynasm/dasm_mips.h @@ -0,0 +1,424 @@ +/* +** DynASM MIPS encoding engine. +** Copyright (C) 2005-2021 Mike Pall. All rights reserved. +** Released under the MIT license. See dynasm.lua for full copyright notice. +*/ + +#include +#include +#include +#include + +#define DASM_ARCH "mips" + +#ifndef DASM_EXTERN +#define DASM_EXTERN(a,b,c,d) 0 +#endif + +/* Action definitions. */ +enum { + DASM_STOP, DASM_SECTION, DASM_ESC, DASM_REL_EXT, + /* The following actions need a buffer position. */ + DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG, + /* The following actions also have an argument. */ + DASM_REL_PC, DASM_LABEL_PC, DASM_IMM, DASM_IMMS, + DASM__MAX +}; + +/* Maximum number of section buffer positions for a single dasm_put() call. */ +#define DASM_MAXSECPOS 25 + +/* DynASM encoder status codes. Action list offset or number are or'ed in. */ +#define DASM_S_OK 0x00000000 +#define DASM_S_NOMEM 0x01000000 +#define DASM_S_PHASE 0x02000000 +#define DASM_S_MATCH_SEC 0x03000000 +#define DASM_S_RANGE_I 0x11000000 +#define DASM_S_RANGE_SEC 0x12000000 +#define DASM_S_RANGE_LG 0x13000000 +#define DASM_S_RANGE_PC 0x14000000 +#define DASM_S_RANGE_REL 0x15000000 +#define DASM_S_UNDEF_LG 0x21000000 +#define DASM_S_UNDEF_PC 0x22000000 + +/* Macros to convert positions (8 bit section + 24 bit index). */ +#define DASM_POS2IDX(pos) ((pos)&0x00ffffff) +#define DASM_POS2BIAS(pos) ((pos)&0xff000000) +#define DASM_SEC2POS(sec) ((sec)<<24) +#define DASM_POS2SEC(pos) ((pos)>>24) +#define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos)) + +/* Action list type. */ +typedef const unsigned int *dasm_ActList; + +/* Per-section structure. */ +typedef struct dasm_Section { + int *rbuf; /* Biased buffer pointer (negative section bias). */ + int *buf; /* True buffer pointer. */ + size_t bsize; /* Buffer size in bytes. */ + int pos; /* Biased buffer position. */ + int epos; /* End of biased buffer position - max single put. */ + int ofs; /* Byte offset into section. */ +} dasm_Section; + +/* Core structure holding the DynASM encoding state. */ +struct dasm_State { + size_t psize; /* Allocated size of this structure. */ + dasm_ActList actionlist; /* Current actionlist pointer. */ + int *lglabels; /* Local/global chain/pos ptrs. */ + size_t lgsize; + int *pclabels; /* PC label chains/pos ptrs. */ + size_t pcsize; + void **globals; /* Array of globals (bias -10). */ + dasm_Section *section; /* Pointer to active section. */ + size_t codesize; /* Total size of all code sections. */ + int maxsection; /* 0 <= sectionidx < maxsection. */ + int status; /* Status code. */ + dasm_Section sections[1]; /* All sections. Alloc-extended. */ +}; + +/* The size of the core structure depends on the max. number of sections. */ +#define DASM_PSZ(ms) (sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section)) + + +/* Initialize DynASM state. */ +void dasm_init(Dst_DECL, int maxsection) +{ + dasm_State *D; + size_t psz = 0; + int i; + Dst_REF = NULL; + DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection)); + D = Dst_REF; + D->psize = psz; + D->lglabels = NULL; + D->lgsize = 0; + D->pclabels = NULL; + D->pcsize = 0; + D->globals = NULL; + D->maxsection = maxsection; + for (i = 0; i < maxsection; i++) { + D->sections[i].buf = NULL; /* Need this for pass3. */ + D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i); + D->sections[i].bsize = 0; + D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */ + } +} + +/* Free DynASM state. */ +void dasm_free(Dst_DECL) +{ + dasm_State *D = Dst_REF; + int i; + for (i = 0; i < D->maxsection; i++) + if (D->sections[i].buf) + DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize); + if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize); + if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize); + DASM_M_FREE(Dst, D, D->psize); +} + +/* Setup global label array. Must be called before dasm_setup(). */ +void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl) +{ + dasm_State *D = Dst_REF; + D->globals = gl - 10; /* Negative bias to compensate for locals. */ + DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int)); +} + +/* Grow PC label array. Can be called after dasm_setup(), too. */ +void dasm_growpc(Dst_DECL, unsigned int maxpc) +{ + dasm_State *D = Dst_REF; + size_t osz = D->pcsize; + DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int)); + memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz); +} + +/* Setup encoder. */ +void dasm_setup(Dst_DECL, const void *actionlist) +{ + dasm_State *D = Dst_REF; + int i; + D->actionlist = (dasm_ActList)actionlist; + D->status = DASM_S_OK; + D->section = &D->sections[0]; + memset((void *)D->lglabels, 0, D->lgsize); + if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize); + for (i = 0; i < D->maxsection; i++) { + D->sections[i].pos = DASM_SEC2POS(i); + D->sections[i].ofs = 0; + } +} + + +#ifdef DASM_CHECKS +#define CK(x, st) \ + do { if (!(x)) { \ + D->status = DASM_S_##st|(p-D->actionlist-1); return; } } while (0) +#define CKPL(kind, st) \ + do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \ + D->status = DASM_S_RANGE_##st|(p-D->actionlist-1); return; } } while (0) +#else +#define CK(x, st) ((void)0) +#define CKPL(kind, st) ((void)0) +#endif + +/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */ +void dasm_put(Dst_DECL, int start, ...) +{ + va_list ap; + dasm_State *D = Dst_REF; + dasm_ActList p = D->actionlist + start; + dasm_Section *sec = D->section; + int pos = sec->pos, ofs = sec->ofs; + int *b; + + if (pos >= sec->epos) { + DASM_M_GROW(Dst, int, sec->buf, sec->bsize, + sec->bsize + 2*DASM_MAXSECPOS*sizeof(int)); + sec->rbuf = sec->buf - DASM_POS2BIAS(pos); + sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos); + } + + b = sec->rbuf; + b[pos++] = start; + + va_start(ap, start); + while (1) { + unsigned int ins = *p++; + unsigned int action = (ins >> 16) - 0xff00; + if (action >= DASM__MAX) { + ofs += 4; + } else { + int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0; + switch (action) { + case DASM_STOP: goto stop; + case DASM_SECTION: + n = (ins & 255); CK(n < D->maxsection, RANGE_SEC); + D->section = &D->sections[n]; goto stop; + case DASM_ESC: p++; ofs += 4; break; + case DASM_REL_EXT: break; + case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break; + case DASM_REL_LG: + n = (ins & 2047) - 10; pl = D->lglabels + n; + /* Bkwd rel or global. */ + if (n >= 0) { CK(n>=10||*pl<0, RANGE_LG); CKPL(lg, LG); goto putrel; } + pl += 10; n = *pl; + if (n < 0) n = 0; /* Start new chain for fwd rel if label exists. */ + goto linkrel; + case DASM_REL_PC: + pl = D->pclabels + n; CKPL(pc, PC); + putrel: + n = *pl; + if (n < 0) { /* Label exists. Get label pos and store it. */ + b[pos] = -n; + } else { + linkrel: + b[pos] = n; /* Else link to rel chain, anchored at label. */ + *pl = pos; + } + pos++; + break; + case DASM_LABEL_LG: + pl = D->lglabels + (ins & 2047) - 10; CKPL(lg, LG); goto putlabel; + case DASM_LABEL_PC: + pl = D->pclabels + n; CKPL(pc, PC); + putlabel: + n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */ + while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos; + } + *pl = -pos; /* Label exists now. */ + b[pos++] = ofs; /* Store pass1 offset estimate. */ + break; + case DASM_IMM: case DASM_IMMS: +#ifdef DASM_CHECKS + CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I); +#endif + n >>= ((ins>>10)&31); +#ifdef DASM_CHECKS + if (ins & 0x8000) + CK(((n + (1<<(((ins>>5)&31)-1)))>>((ins>>5)&31)) == 0, RANGE_I); + else + CK((n>>((ins>>5)&31)) == 0, RANGE_I); +#endif + b[pos++] = n; + break; + } + } + } +stop: + va_end(ap); + sec->pos = pos; + sec->ofs = ofs; +} +#undef CK + +/* Pass 2: Link sections, shrink aligns, fix label offsets. */ +int dasm_link(Dst_DECL, size_t *szp) +{ + dasm_State *D = Dst_REF; + int secnum; + int ofs = 0; + +#ifdef DASM_CHECKS + *szp = 0; + if (D->status != DASM_S_OK) return D->status; + { + int pc; + for (pc = 0; pc*sizeof(int) < D->pcsize; pc++) + if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc; + } +#endif + + { /* Handle globals not defined in this translation unit. */ + int idx; + for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) { + int n = D->lglabels[idx]; + /* Undefined label: Collapse rel chain and replace with marker (< 0). */ + while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; } + } + } + + /* Combine all code sections. No support for data sections (yet). */ + for (secnum = 0; secnum < D->maxsection; secnum++) { + dasm_Section *sec = D->sections + secnum; + int *b = sec->rbuf; + int pos = DASM_SEC2POS(secnum); + int lastpos = sec->pos; + + while (pos != lastpos) { + dasm_ActList p = D->actionlist + b[pos++]; + while (1) { + unsigned int ins = *p++; + unsigned int action = (ins >> 16) - 0xff00; + switch (action) { + case DASM_STOP: case DASM_SECTION: goto stop; + case DASM_ESC: p++; break; + case DASM_REL_EXT: break; + case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break; + case DASM_REL_LG: case DASM_REL_PC: pos++; break; + case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break; + case DASM_IMM: case DASM_IMMS: pos++; break; + } + } + stop: (void)0; + } + ofs += sec->ofs; /* Next section starts right after current section. */ + } + + D->codesize = ofs; /* Total size of all code sections */ + *szp = ofs; + return DASM_S_OK; +} + +#ifdef DASM_CHECKS +#define CK(x, st) \ + do { if (!(x)) return DASM_S_##st|(p-D->actionlist-1); } while (0) +#else +#define CK(x, st) ((void)0) +#endif + +/* Pass 3: Encode sections. */ +int dasm_encode(Dst_DECL, void *buffer) +{ + dasm_State *D = Dst_REF; + char *base = (char *)buffer; + unsigned int *cp = (unsigned int *)buffer; + int secnum; + + /* Encode all code sections. No support for data sections (yet). */ + for (secnum = 0; secnum < D->maxsection; secnum++) { + dasm_Section *sec = D->sections + secnum; + int *b = sec->buf; + int *endb = sec->rbuf + sec->pos; + + while (b != endb) { + dasm_ActList p = D->actionlist + *b++; + while (1) { + unsigned int ins = *p++; + unsigned int action = (ins >> 16) - 0xff00; + int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0; + switch (action) { + case DASM_STOP: case DASM_SECTION: goto stop; + case DASM_ESC: *cp++ = *p++; break; + case DASM_REL_EXT: + n = DASM_EXTERN(Dst, (unsigned char *)cp, (ins & 2047), 1); + goto patchrel; + case DASM_ALIGN: + ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0x60000000; + break; + case DASM_REL_LG: + if (n < 0) { + n = (int)((ptrdiff_t)D->globals[-n] - (ptrdiff_t)cp); + goto patchrel; + } + /* fallthrough */ + case DASM_REL_PC: + CK(n >= 0, UNDEF_PC); + n = *DASM_POS2PTR(D, n); + if (ins & 2048) + n = (n + (int)(size_t)base) & 0x0fffffff; + else + n = n - (int)((char *)cp - base); + patchrel: { + unsigned int e = 16 + ((ins >> 12) & 15); + CK((n & 3) == 0 && + ((n + ((ins & 2048) ? 0 : (1<<(e+1)))) >> (e+2)) == 0, RANGE_REL); + cp[-1] |= ((n>>2) & ((1<= 20) D->globals[ins-10] = (void *)(base + n); + break; + case DASM_LABEL_PC: break; + case DASM_IMMS: + cp[-1] |= ((n>>3) & 4); n &= 0x1f; + /* fallthrough */ + case DASM_IMM: + cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31); + break; + default: *cp++ = ins; break; + } + } + stop: (void)0; + } + } + + if (base + D->codesize != (char *)cp) /* Check for phase errors. */ + return DASM_S_PHASE; + return DASM_S_OK; +} +#undef CK + +/* Get PC label offset. */ +int dasm_getpclabel(Dst_DECL, unsigned int pc) +{ + dasm_State *D = Dst_REF; + if (pc*sizeof(int) < D->pcsize) { + int pos = D->pclabels[pc]; + if (pos < 0) return *DASM_POS2PTR(D, -pos); + if (pos > 0) return -1; /* Undefined. */ + } + return -2; /* Unused or out of range. */ +} + +#ifdef DASM_CHECKS +/* Optional sanity checker to call between isolated encoding steps. */ +int dasm_checkstep(Dst_DECL, int secmatch) +{ + dasm_State *D = Dst_REF; + if (D->status == DASM_S_OK) { + int i; + for (i = 1; i <= 9; i++) { + if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_LG|i; break; } + D->lglabels[i] = 0; + } + } + if (D->status == DASM_S_OK && secmatch >= 0 && + D->section != &D->sections[secmatch]) + D->status = DASM_S_MATCH_SEC|(D->section-D->sections); + return D->status; +} +#endif + diff --git a/ext/opcache/jit/ir/dynasm/dasm_mips.lua b/ext/opcache/jit/ir/dynasm/dasm_mips.lua new file mode 100644 index 0000000000000..591470157c4ee --- /dev/null +++ b/ext/opcache/jit/ir/dynasm/dasm_mips.lua @@ -0,0 +1,1181 @@ +------------------------------------------------------------------------------ +-- DynASM MIPS32/MIPS64 module. +-- +-- Copyright (C) 2005-2021 Mike Pall. All rights reserved. +-- See dynasm.lua for full copyright notice. +------------------------------------------------------------------------------ + +local mips64 = mips64 +local mipsr6 = _map_def.MIPSR6 + +-- Module information: +local _info = { + arch = mips64 and "mips64" or "mips", + description = "DynASM MIPS32/MIPS64 module", + version = "1.5.0", + vernum = 10500, + release = "2021-05-02", + author = "Mike Pall", + license = "MIT", +} + +-- Exported glue functions for the arch-specific module. +local _M = { _info = _info } + +-- Cache library functions. +local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs +local assert, setmetatable = assert, setmetatable +local _s = string +local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char +local match, gmatch = _s.match, _s.gmatch +local concat, sort = table.concat, table.sort +local bit = bit or require("bit") +local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift +local tohex = bit.tohex + +-- Inherited tables and callbacks. +local g_opt, g_arch +local wline, werror, wfatal, wwarn + +-- Action name list. +-- CHECK: Keep this in sync with the C code! +local action_names = { + "STOP", "SECTION", "ESC", "REL_EXT", + "ALIGN", "REL_LG", "LABEL_LG", + "REL_PC", "LABEL_PC", "IMM", "IMMS", +} + +-- Maximum number of section buffer positions for dasm_put(). +-- CHECK: Keep this in sync with the C code! +local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines. + +-- Action name -> action number. +local map_action = {} +for n,name in ipairs(action_names) do + map_action[name] = n-1 +end + +-- Action list buffer. +local actlist = {} + +-- Argument list for next dasm_put(). Start with offset 0 into action list. +local actargs = { 0 } + +-- Current number of section buffer positions for dasm_put(). +local secpos = 1 + +------------------------------------------------------------------------------ + +-- Dump action names and numbers. +local function dumpactions(out) + out:write("DynASM encoding engine action codes:\n") + for n,name in ipairs(action_names) do + local num = map_action[name] + out:write(format(" %-10s %02X %d\n", name, num, num)) + end + out:write("\n") +end + +-- Write action list buffer as a huge static C array. +local function writeactions(out, name) + local nn = #actlist + if nn == 0 then nn = 1; actlist[0] = map_action.STOP end + out:write("static const unsigned int ", name, "[", nn, "] = {\n") + for i = 1,nn-1 do + assert(out:write("0x", tohex(actlist[i]), ",\n")) + end + assert(out:write("0x", tohex(actlist[nn]), "\n};\n\n")) +end + +------------------------------------------------------------------------------ + +-- Add word to action list. +local function wputxw(n) + assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range") + actlist[#actlist+1] = n +end + +-- Add action to list with optional arg. Advance buffer pos, too. +local function waction(action, val, a, num) + local w = assert(map_action[action], "bad action name `"..action.."'") + wputxw(0xff000000 + w * 0x10000 + (val or 0)) + if a then actargs[#actargs+1] = a end + if a or num then secpos = secpos + (num or 1) end +end + +-- Flush action list (intervening C code or buffer pos overflow). +local function wflush(term) + if #actlist == actargs[1] then return end -- Nothing to flush. + if not term then waction("STOP") end -- Terminate action list. + wline(format("dasm_put(Dst, %s);", concat(actargs, ", ")), true) + actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put(). + secpos = 1 -- The actionlist offset occupies a buffer position, too. +end + +-- Put escaped word. +local function wputw(n) + if n >= 0xff000000 then waction("ESC") end + wputxw(n) +end + +-- Reserve position for word. +local function wpos() + local pos = #actlist+1 + actlist[pos] = "" + return pos +end + +-- Store word to reserved position. +local function wputpos(pos, n) + assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range") + actlist[pos] = n +end + +------------------------------------------------------------------------------ + +-- Global label name -> global label number. With auto assignment on 1st use. +local next_global = 20 +local map_global = setmetatable({}, { __index = function(t, name) + if not match(name, "^[%a_][%w_]*$") then werror("bad global label") end + local n = next_global + if n > 2047 then werror("too many global labels") end + next_global = n + 1 + t[name] = n + return n +end}) + +-- Dump global labels. +local function dumpglobals(out, lvl) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("Global labels:\n") + for i=20,next_global-1 do + out:write(format(" %s\n", t[i])) + end + out:write("\n") +end + +-- Write global label enum. +local function writeglobals(out, prefix) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("enum {\n") + for i=20,next_global-1 do + out:write(" ", prefix, t[i], ",\n") + end + out:write(" ", prefix, "_MAX\n};\n") +end + +-- Write global label names. +local function writeglobalnames(out, name) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("static const char *const ", name, "[] = {\n") + for i=20,next_global-1 do + out:write(" \"", t[i], "\",\n") + end + out:write(" (const char *)0\n};\n") +end + +------------------------------------------------------------------------------ + +-- Extern label name -> extern label number. With auto assignment on 1st use. +local next_extern = 0 +local map_extern_ = {} +local map_extern = setmetatable({}, { __index = function(t, name) + -- No restrictions on the name for now. + local n = next_extern + if n > 2047 then werror("too many extern labels") end + next_extern = n + 1 + t[name] = n + map_extern_[n] = name + return n +end}) + +-- Dump extern labels. +local function dumpexterns(out, lvl) + out:write("Extern labels:\n") + for i=0,next_extern-1 do + out:write(format(" %s\n", map_extern_[i])) + end + out:write("\n") +end + +-- Write extern label names. +local function writeexternnames(out, name) + out:write("static const char *const ", name, "[] = {\n") + for i=0,next_extern-1 do + out:write(" \"", map_extern_[i], "\",\n") + end + out:write(" (const char *)0\n};\n") +end + +------------------------------------------------------------------------------ + +-- Arch-specific maps. +local map_archdef = { sp="r29", ra="r31" } -- Ext. register name -> int. name. + +local map_type = {} -- Type name -> { ctype, reg } +local ctypenum = 0 -- Type number (for Dt... macros). + +-- Reverse defines for registers. +function _M.revdef(s) + if s == "r29" then return "sp" + elseif s == "r31" then return "ra" end + return s +end + +------------------------------------------------------------------------------ + +-- Template strings for MIPS instructions. +local map_op = { + -- First-level opcodes. + j_1 = "08000000J", + jal_1 = "0c000000J", + b_1 = "10000000B", + beqz_2 = "10000000SB", + beq_3 = "10000000STB", + bnez_2 = "14000000SB", + bne_3 = "14000000STB", + blez_2 = "18000000SB", + bgtz_2 = "1c000000SB", + li_2 = "24000000TI", + addiu_3 = "24000000TSI", + slti_3 = "28000000TSI", + sltiu_3 = "2c000000TSI", + andi_3 = "30000000TSU", + lu_2 = "34000000TU", + ori_3 = "34000000TSU", + xori_3 = "38000000TSU", + lui_2 = "3c000000TU", + daddiu_3 = mips64 and "64000000TSI", + ldl_2 = mips64 and "68000000TO", + ldr_2 = mips64 and "6c000000TO", + lb_2 = "80000000TO", + lh_2 = "84000000TO", + lw_2 = "8c000000TO", + lbu_2 = "90000000TO", + lhu_2 = "94000000TO", + lwu_2 = mips64 and "9c000000TO", + sb_2 = "a0000000TO", + sh_2 = "a4000000TO", + sw_2 = "ac000000TO", + lwc1_2 = "c4000000HO", + ldc1_2 = "d4000000HO", + ld_2 = mips64 and "dc000000TO", + swc1_2 = "e4000000HO", + sdc1_2 = "f4000000HO", + sd_2 = mips64 and "fc000000TO", + + -- Opcode SPECIAL. + nop_0 = "00000000", + sll_3 = "00000000DTA", + sextw_2 = "00000000DT", + srl_3 = "00000002DTA", + rotr_3 = "00200002DTA", + sra_3 = "00000003DTA", + sllv_3 = "00000004DTS", + srlv_3 = "00000006DTS", + rotrv_3 = "00000046DTS", + drotrv_3 = mips64 and "00000056DTS", + srav_3 = "00000007DTS", + jalr_1 = "0000f809S", + jalr_2 = "00000009DS", + syscall_0 = "0000000c", + syscall_1 = "0000000cY", + break_0 = "0000000d", + break_1 = "0000000dY", + sync_0 = "0000000f", + dsllv_3 = mips64 and "00000014DTS", + dsrlv_3 = mips64 and "00000016DTS", + dsrav_3 = mips64 and "00000017DTS", + add_3 = "00000020DST", + move_2 = mips64 and "00000025DS" or "00000021DS", + addu_3 = "00000021DST", + sub_3 = "00000022DST", + negu_2 = mips64 and "0000002fDT" or "00000023DT", + subu_3 = "00000023DST", + and_3 = "00000024DST", + or_3 = "00000025DST", + xor_3 = "00000026DST", + not_2 = "00000027DS", + nor_3 = "00000027DST", + slt_3 = "0000002aDST", + sltu_3 = "0000002bDST", + dadd_3 = mips64 and "0000002cDST", + daddu_3 = mips64 and "0000002dDST", + dsub_3 = mips64 and "0000002eDST", + dsubu_3 = mips64 and "0000002fDST", + tge_2 = "00000030ST", + tge_3 = "00000030STZ", + tgeu_2 = "00000031ST", + tgeu_3 = "00000031STZ", + tlt_2 = "00000032ST", + tlt_3 = "00000032STZ", + tltu_2 = "00000033ST", + tltu_3 = "00000033STZ", + teq_2 = "00000034ST", + teq_3 = "00000034STZ", + tne_2 = "00000036ST", + tne_3 = "00000036STZ", + dsll_3 = mips64 and "00000038DTa", + dsrl_3 = mips64 and "0000003aDTa", + drotr_3 = mips64 and "0020003aDTa", + dsra_3 = mips64 and "0000003bDTa", + dsll32_3 = mips64 and "0000003cDTA", + dsrl32_3 = mips64 and "0000003eDTA", + drotr32_3 = mips64 and "0020003eDTA", + dsra32_3 = mips64 and "0000003fDTA", + + -- Opcode REGIMM. + bltz_2 = "04000000SB", + bgez_2 = "04010000SB", + bltzl_2 = "04020000SB", + bgezl_2 = "04030000SB", + bal_1 = "04110000B", + synci_1 = "041f0000O", + + -- Opcode SPECIAL3. + ext_4 = "7c000000TSAM", -- Note: last arg is msbd = size-1 + dextm_4 = mips64 and "7c000001TSAM", -- Args: pos | size-1-32 + dextu_4 = mips64 and "7c000002TSAM", -- Args: pos-32 | size-1 + dext_4 = mips64 and "7c000003TSAM", -- Args: pos | size-1 + zextw_2 = mips64 and "7c00f803TS", + ins_4 = "7c000004TSAM", -- Note: last arg is msb = pos+size-1 + dinsm_4 = mips64 and "7c000005TSAM", -- Args: pos | pos+size-33 + dinsu_4 = mips64 and "7c000006TSAM", -- Args: pos-32 | pos+size-33 + dins_4 = mips64 and "7c000007TSAM", -- Args: pos | pos+size-1 + wsbh_2 = "7c0000a0DT", + dsbh_2 = mips64 and "7c0000a4DT", + dshd_2 = mips64 and "7c000164DT", + seb_2 = "7c000420DT", + seh_2 = "7c000620DT", + rdhwr_2 = "7c00003bTD", + + -- Opcode COP0. + mfc0_2 = "40000000TD", + mfc0_3 = "40000000TDW", + dmfc0_2 = mips64 and "40200000TD", + dmfc0_3 = mips64 and "40200000TDW", + mtc0_2 = "40800000TD", + mtc0_3 = "40800000TDW", + dmtc0_2 = mips64 and "40a00000TD", + dmtc0_3 = mips64 and "40a00000TDW", + rdpgpr_2 = "41400000DT", + di_0 = "41606000", + di_1 = "41606000T", + ei_0 = "41606020", + ei_1 = "41606020T", + wrpgpr_2 = "41c00000DT", + tlbr_0 = "42000001", + tlbwi_0 = "42000002", + tlbwr_0 = "42000006", + tlbp_0 = "42000008", + eret_0 = "42000018", + deret_0 = "4200001f", + wait_0 = "42000020", + + -- Opcode COP1. + mfc1_2 = "44000000TG", + dmfc1_2 = mips64 and "44200000TG", + cfc1_2 = "44400000TG", + mfhc1_2 = "44600000TG", + mtc1_2 = "44800000TG", + dmtc1_2 = mips64 and "44a00000TG", + ctc1_2 = "44c00000TG", + mthc1_2 = "44e00000TG", + + ["add.s_3"] = "46000000FGH", + ["sub.s_3"] = "46000001FGH", + ["mul.s_3"] = "46000002FGH", + ["div.s_3"] = "46000003FGH", + ["sqrt.s_2"] = "46000004FG", + ["abs.s_2"] = "46000005FG", + ["mov.s_2"] = "46000006FG", + ["neg.s_2"] = "46000007FG", + ["round.l.s_2"] = "46000008FG", + ["trunc.l.s_2"] = "46000009FG", + ["ceil.l.s_2"] = "4600000aFG", + ["floor.l.s_2"] = "4600000bFG", + ["round.w.s_2"] = "4600000cFG", + ["trunc.w.s_2"] = "4600000dFG", + ["ceil.w.s_2"] = "4600000eFG", + ["floor.w.s_2"] = "4600000fFG", + ["recip.s_2"] = "46000015FG", + ["rsqrt.s_2"] = "46000016FG", + ["cvt.d.s_2"] = "46000021FG", + ["cvt.w.s_2"] = "46000024FG", + ["cvt.l.s_2"] = "46000025FG", + ["add.d_3"] = "46200000FGH", + ["sub.d_3"] = "46200001FGH", + ["mul.d_3"] = "46200002FGH", + ["div.d_3"] = "46200003FGH", + ["sqrt.d_2"] = "46200004FG", + ["abs.d_2"] = "46200005FG", + ["mov.d_2"] = "46200006FG", + ["neg.d_2"] = "46200007FG", + ["round.l.d_2"] = "46200008FG", + ["trunc.l.d_2"] = "46200009FG", + ["ceil.l.d_2"] = "4620000aFG", + ["floor.l.d_2"] = "4620000bFG", + ["round.w.d_2"] = "4620000cFG", + ["trunc.w.d_2"] = "4620000dFG", + ["ceil.w.d_2"] = "4620000eFG", + ["floor.w.d_2"] = "4620000fFG", + ["recip.d_2"] = "46200015FG", + ["rsqrt.d_2"] = "46200016FG", + ["cvt.s.d_2"] = "46200020FG", + ["cvt.w.d_2"] = "46200024FG", + ["cvt.l.d_2"] = "46200025FG", + ["cvt.s.w_2"] = "46800020FG", + ["cvt.d.w_2"] = "46800021FG", + ["cvt.s.l_2"] = "46a00020FG", + ["cvt.d.l_2"] = "46a00021FG", +} + +if mipsr6 then -- Instructions added with MIPSR6. + + for k,v in pairs({ + + -- Add immediate to upper bits. + aui_3 = "3c000000TSI", + daui_3 = mips64 and "74000000TSI", + dahi_2 = mips64 and "04060000SI", + dati_2 = mips64 and "041e0000SI", + + -- TODO: addiupc, auipc, aluipc, lwpc, lwupc, ldpc. + + -- Compact branches. + blezalc_2 = "18000000TB", -- rt != 0. + bgezalc_2 = "18000000T=SB", -- rt != 0. + bgtzalc_2 = "1c000000TB", -- rt != 0. + bltzalc_2 = "1c000000T=SB", -- rt != 0. + + blezc_2 = "58000000TB", -- rt != 0. + bgezc_2 = "58000000T=SB", -- rt != 0. + bgec_3 = "58000000STB", -- rs != rt. + blec_3 = "58000000TSB", -- rt != rs. + + bgtzc_2 = "5c000000TB", -- rt != 0. + bltzc_2 = "5c000000T=SB", -- rt != 0. + bltc_3 = "5c000000STB", -- rs != rt. + bgtc_3 = "5c000000TSB", -- rt != rs. + + bgeuc_3 = "18000000STB", -- rs != rt. + bleuc_3 = "18000000TSB", -- rt != rs. + bltuc_3 = "1c000000STB", -- rs != rt. + bgtuc_3 = "1c000000TSB", -- rt != rs. + + beqzalc_2 = "20000000TB", -- rt != 0. + bnezalc_2 = "60000000TB", -- rt != 0. + beqc_3 = "20000000STB", -- rs < rt. + bnec_3 = "60000000STB", -- rs < rt. + bovc_3 = "20000000STB", -- rs >= rt. + bnvc_3 = "60000000STB", -- rs >= rt. + + beqzc_2 = "d8000000SK", -- rs != 0. + bnezc_2 = "f8000000SK", -- rs != 0. + jic_2 = "d8000000TI", + jialc_2 = "f8000000TI", + bc_1 = "c8000000L", + balc_1 = "e8000000L", + + -- Opcode SPECIAL. + jr_1 = "00000009S", + sdbbp_0 = "0000000e", + sdbbp_1 = "0000000eY", + lsa_4 = "00000005DSTA", + dlsa_4 = mips64 and "00000015DSTA", + seleqz_3 = "00000035DST", + selnez_3 = "00000037DST", + clz_2 = "00000050DS", + clo_2 = "00000051DS", + dclz_2 = mips64 and "00000052DS", + dclo_2 = mips64 and "00000053DS", + mul_3 = "00000098DST", + muh_3 = "000000d8DST", + mulu_3 = "00000099DST", + muhu_3 = "000000d9DST", + div_3 = "0000009aDST", + mod_3 = "000000daDST", + divu_3 = "0000009bDST", + modu_3 = "000000dbDST", + dmul_3 = mips64 and "0000009cDST", + dmuh_3 = mips64 and "000000dcDST", + dmulu_3 = mips64 and "0000009dDST", + dmuhu_3 = mips64 and "000000ddDST", + ddiv_3 = mips64 and "0000009eDST", + dmod_3 = mips64 and "000000deDST", + ddivu_3 = mips64 and "0000009fDST", + dmodu_3 = mips64 and "000000dfDST", + + -- Opcode SPECIAL3. + align_4 = "7c000220DSTA", + dalign_4 = mips64 and "7c000224DSTA", + bitswap_2 = "7c000020DT", + dbitswap_2 = mips64 and "7c000024DT", + + -- Opcode COP1. + bc1eqz_2 = "45200000HB", + bc1nez_2 = "45a00000HB", + + ["sel.s_3"] = "46000010FGH", + ["seleqz.s_3"] = "46000014FGH", + ["selnez.s_3"] = "46000017FGH", + ["maddf.s_3"] = "46000018FGH", + ["msubf.s_3"] = "46000019FGH", + ["rint.s_2"] = "4600001aFG", + ["class.s_2"] = "4600001bFG", + ["min.s_3"] = "4600001cFGH", + ["mina.s_3"] = "4600001dFGH", + ["max.s_3"] = "4600001eFGH", + ["maxa.s_3"] = "4600001fFGH", + ["cmp.af.s_3"] = "46800000FGH", + ["cmp.un.s_3"] = "46800001FGH", + ["cmp.or.s_3"] = "46800011FGH", + ["cmp.eq.s_3"] = "46800002FGH", + ["cmp.une.s_3"] = "46800012FGH", + ["cmp.ueq.s_3"] = "46800003FGH", + ["cmp.ne.s_3"] = "46800013FGH", + ["cmp.lt.s_3"] = "46800004FGH", + ["cmp.ult.s_3"] = "46800005FGH", + ["cmp.le.s_3"] = "46800006FGH", + ["cmp.ule.s_3"] = "46800007FGH", + ["cmp.saf.s_3"] = "46800008FGH", + ["cmp.sun.s_3"] = "46800009FGH", + ["cmp.sor.s_3"] = "46800019FGH", + ["cmp.seq.s_3"] = "4680000aFGH", + ["cmp.sune.s_3"] = "4680001aFGH", + ["cmp.sueq.s_3"] = "4680000bFGH", + ["cmp.sne.s_3"] = "4680001bFGH", + ["cmp.slt.s_3"] = "4680000cFGH", + ["cmp.sult.s_3"] = "4680000dFGH", + ["cmp.sle.s_3"] = "4680000eFGH", + ["cmp.sule.s_3"] = "4680000fFGH", + + ["sel.d_3"] = "46200010FGH", + ["seleqz.d_3"] = "46200014FGH", + ["selnez.d_3"] = "46200017FGH", + ["maddf.d_3"] = "46200018FGH", + ["msubf.d_3"] = "46200019FGH", + ["rint.d_2"] = "4620001aFG", + ["class.d_2"] = "4620001bFG", + ["min.d_3"] = "4620001cFGH", + ["mina.d_3"] = "4620001dFGH", + ["max.d_3"] = "4620001eFGH", + ["maxa.d_3"] = "4620001fFGH", + ["cmp.af.d_3"] = "46a00000FGH", + ["cmp.un.d_3"] = "46a00001FGH", + ["cmp.or.d_3"] = "46a00011FGH", + ["cmp.eq.d_3"] = "46a00002FGH", + ["cmp.une.d_3"] = "46a00012FGH", + ["cmp.ueq.d_3"] = "46a00003FGH", + ["cmp.ne.d_3"] = "46a00013FGH", + ["cmp.lt.d_3"] = "46a00004FGH", + ["cmp.ult.d_3"] = "46a00005FGH", + ["cmp.le.d_3"] = "46a00006FGH", + ["cmp.ule.d_3"] = "46a00007FGH", + ["cmp.saf.d_3"] = "46a00008FGH", + ["cmp.sun.d_3"] = "46a00009FGH", + ["cmp.sor.d_3"] = "46a00019FGH", + ["cmp.seq.d_3"] = "46a0000aFGH", + ["cmp.sune.d_3"] = "46a0001aFGH", + ["cmp.sueq.d_3"] = "46a0000bFGH", + ["cmp.sne.d_3"] = "46a0001bFGH", + ["cmp.slt.d_3"] = "46a0000cFGH", + ["cmp.sult.d_3"] = "46a0000dFGH", + ["cmp.sle.d_3"] = "46a0000eFGH", + ["cmp.sule.d_3"] = "46a0000fFGH", + + }) do map_op[k] = v end + +else -- Instructions removed by MIPSR6. + + for k,v in pairs({ + -- Traps, don't use. + addi_3 = "20000000TSI", + daddi_3 = mips64 and "60000000TSI", + + -- Branch on likely, don't use. + beqzl_2 = "50000000SB", + beql_3 = "50000000STB", + bnezl_2 = "54000000SB", + bnel_3 = "54000000STB", + blezl_2 = "58000000SB", + bgtzl_2 = "5c000000SB", + + lwl_2 = "88000000TO", + lwr_2 = "98000000TO", + swl_2 = "a8000000TO", + sdl_2 = mips64 and "b0000000TO", + sdr_2 = mips64 and "b1000000TO", + swr_2 = "b8000000TO", + cache_2 = "bc000000NO", + ll_2 = "c0000000TO", + pref_2 = "cc000000NO", + sc_2 = "e0000000TO", + scd_2 = mips64 and "f0000000TO", + + -- Opcode SPECIAL. + movf_2 = "00000001DS", + movf_3 = "00000001DSC", + movt_2 = "00010001DS", + movt_3 = "00010001DSC", + jr_1 = "00000008S", + movz_3 = "0000000aDST", + movn_3 = "0000000bDST", + mfhi_1 = "00000010D", + mthi_1 = "00000011S", + mflo_1 = "00000012D", + mtlo_1 = "00000013S", + mult_2 = "00000018ST", + multu_2 = "00000019ST", + div_3 = "0000001aST", + divu_3 = "0000001bST", + ddiv_3 = mips64 and "0000001eST", + ddivu_3 = mips64 and "0000001fST", + dmult_2 = mips64 and "0000001cST", + dmultu_2 = mips64 and "0000001dST", + + -- Opcode REGIMM. + tgei_2 = "04080000SI", + tgeiu_2 = "04090000SI", + tlti_2 = "040a0000SI", + tltiu_2 = "040b0000SI", + teqi_2 = "040c0000SI", + tnei_2 = "040e0000SI", + bltzal_2 = "04100000SB", + bgezal_2 = "04110000SB", + bltzall_2 = "04120000SB", + bgezall_2 = "04130000SB", + + -- Opcode SPECIAL2. + madd_2 = "70000000ST", + maddu_2 = "70000001ST", + mul_3 = "70000002DST", + msub_2 = "70000004ST", + msubu_2 = "70000005ST", + clz_2 = "70000020D=TS", + clo_2 = "70000021D=TS", + dclz_2 = mips64 and "70000024D=TS", + dclo_2 = mips64 and "70000025D=TS", + sdbbp_0 = "7000003f", + sdbbp_1 = "7000003fY", + + -- Opcode COP1. + bc1f_1 = "45000000B", + bc1f_2 = "45000000CB", + bc1t_1 = "45010000B", + bc1t_2 = "45010000CB", + bc1fl_1 = "45020000B", + bc1fl_2 = "45020000CB", + bc1tl_1 = "45030000B", + bc1tl_2 = "45030000CB", + + ["movf.s_2"] = "46000011FG", + ["movf.s_3"] = "46000011FGC", + ["movt.s_2"] = "46010011FG", + ["movt.s_3"] = "46010011FGC", + ["movz.s_3"] = "46000012FGT", + ["movn.s_3"] = "46000013FGT", + ["cvt.ps.s_3"] = "46000026FGH", + ["c.f.s_2"] = "46000030GH", + ["c.f.s_3"] = "46000030VGH", + ["c.un.s_2"] = "46000031GH", + ["c.un.s_3"] = "46000031VGH", + ["c.eq.s_2"] = "46000032GH", + ["c.eq.s_3"] = "46000032VGH", + ["c.ueq.s_2"] = "46000033GH", + ["c.ueq.s_3"] = "46000033VGH", + ["c.olt.s_2"] = "46000034GH", + ["c.olt.s_3"] = "46000034VGH", + ["c.ult.s_2"] = "46000035GH", + ["c.ult.s_3"] = "46000035VGH", + ["c.ole.s_2"] = "46000036GH", + ["c.ole.s_3"] = "46000036VGH", + ["c.ule.s_2"] = "46000037GH", + ["c.ule.s_3"] = "46000037VGH", + ["c.sf.s_2"] = "46000038GH", + ["c.sf.s_3"] = "46000038VGH", + ["c.ngle.s_2"] = "46000039GH", + ["c.ngle.s_3"] = "46000039VGH", + ["c.seq.s_2"] = "4600003aGH", + ["c.seq.s_3"] = "4600003aVGH", + ["c.ngl.s_2"] = "4600003bGH", + ["c.ngl.s_3"] = "4600003bVGH", + ["c.lt.s_2"] = "4600003cGH", + ["c.lt.s_3"] = "4600003cVGH", + ["c.nge.s_2"] = "4600003dGH", + ["c.nge.s_3"] = "4600003dVGH", + ["c.le.s_2"] = "4600003eGH", + ["c.le.s_3"] = "4600003eVGH", + ["c.ngt.s_2"] = "4600003fGH", + ["c.ngt.s_3"] = "4600003fVGH", + ["movf.d_2"] = "46200011FG", + ["movf.d_3"] = "46200011FGC", + ["movt.d_2"] = "46210011FG", + ["movt.d_3"] = "46210011FGC", + ["movz.d_3"] = "46200012FGT", + ["movn.d_3"] = "46200013FGT", + ["c.f.d_2"] = "46200030GH", + ["c.f.d_3"] = "46200030VGH", + ["c.un.d_2"] = "46200031GH", + ["c.un.d_3"] = "46200031VGH", + ["c.eq.d_2"] = "46200032GH", + ["c.eq.d_3"] = "46200032VGH", + ["c.ueq.d_2"] = "46200033GH", + ["c.ueq.d_3"] = "46200033VGH", + ["c.olt.d_2"] = "46200034GH", + ["c.olt.d_3"] = "46200034VGH", + ["c.ult.d_2"] = "46200035GH", + ["c.ult.d_3"] = "46200035VGH", + ["c.ole.d_2"] = "46200036GH", + ["c.ole.d_3"] = "46200036VGH", + ["c.ule.d_2"] = "46200037GH", + ["c.ule.d_3"] = "46200037VGH", + ["c.sf.d_2"] = "46200038GH", + ["c.sf.d_3"] = "46200038VGH", + ["c.ngle.d_2"] = "46200039GH", + ["c.ngle.d_3"] = "46200039VGH", + ["c.seq.d_2"] = "4620003aGH", + ["c.seq.d_3"] = "4620003aVGH", + ["c.ngl.d_2"] = "4620003bGH", + ["c.ngl.d_3"] = "4620003bVGH", + ["c.lt.d_2"] = "4620003cGH", + ["c.lt.d_3"] = "4620003cVGH", + ["c.nge.d_2"] = "4620003dGH", + ["c.nge.d_3"] = "4620003dVGH", + ["c.le.d_2"] = "4620003eGH", + ["c.le.d_3"] = "4620003eVGH", + ["c.ngt.d_2"] = "4620003fGH", + ["c.ngt.d_3"] = "4620003fVGH", + ["add.ps_3"] = "46c00000FGH", + ["sub.ps_3"] = "46c00001FGH", + ["mul.ps_3"] = "46c00002FGH", + ["abs.ps_2"] = "46c00005FG", + ["mov.ps_2"] = "46c00006FG", + ["neg.ps_2"] = "46c00007FG", + ["movf.ps_2"] = "46c00011FG", + ["movf.ps_3"] = "46c00011FGC", + ["movt.ps_2"] = "46c10011FG", + ["movt.ps_3"] = "46c10011FGC", + ["movz.ps_3"] = "46c00012FGT", + ["movn.ps_3"] = "46c00013FGT", + ["cvt.s.pu_2"] = "46c00020FG", + ["cvt.s.pl_2"] = "46c00028FG", + ["pll.ps_3"] = "46c0002cFGH", + ["plu.ps_3"] = "46c0002dFGH", + ["pul.ps_3"] = "46c0002eFGH", + ["puu.ps_3"] = "46c0002fFGH", + ["c.f.ps_2"] = "46c00030GH", + ["c.f.ps_3"] = "46c00030VGH", + ["c.un.ps_2"] = "46c00031GH", + ["c.un.ps_3"] = "46c00031VGH", + ["c.eq.ps_2"] = "46c00032GH", + ["c.eq.ps_3"] = "46c00032VGH", + ["c.ueq.ps_2"] = "46c00033GH", + ["c.ueq.ps_3"] = "46c00033VGH", + ["c.olt.ps_2"] = "46c00034GH", + ["c.olt.ps_3"] = "46c00034VGH", + ["c.ult.ps_2"] = "46c00035GH", + ["c.ult.ps_3"] = "46c00035VGH", + ["c.ole.ps_2"] = "46c00036GH", + ["c.ole.ps_3"] = "46c00036VGH", + ["c.ule.ps_2"] = "46c00037GH", + ["c.ule.ps_3"] = "46c00037VGH", + ["c.sf.ps_2"] = "46c00038GH", + ["c.sf.ps_3"] = "46c00038VGH", + ["c.ngle.ps_2"] = "46c00039GH", + ["c.ngle.ps_3"] = "46c00039VGH", + ["c.seq.ps_2"] = "46c0003aGH", + ["c.seq.ps_3"] = "46c0003aVGH", + ["c.ngl.ps_2"] = "46c0003bGH", + ["c.ngl.ps_3"] = "46c0003bVGH", + ["c.lt.ps_2"] = "46c0003cGH", + ["c.lt.ps_3"] = "46c0003cVGH", + ["c.nge.ps_2"] = "46c0003dGH", + ["c.nge.ps_3"] = "46c0003dVGH", + ["c.le.ps_2"] = "46c0003eGH", + ["c.le.ps_3"] = "46c0003eVGH", + ["c.ngt.ps_2"] = "46c0003fGH", + ["c.ngt.ps_3"] = "46c0003fVGH", + + -- Opcode COP1X. + lwxc1_2 = "4c000000FX", + ldxc1_2 = "4c000001FX", + luxc1_2 = "4c000005FX", + swxc1_2 = "4c000008FX", + sdxc1_2 = "4c000009FX", + suxc1_2 = "4c00000dFX", + prefx_2 = "4c00000fMX", + ["alnv.ps_4"] = "4c00001eFGHS", + ["madd.s_4"] = "4c000020FRGH", + ["madd.d_4"] = "4c000021FRGH", + ["madd.ps_4"] = "4c000026FRGH", + ["msub.s_4"] = "4c000028FRGH", + ["msub.d_4"] = "4c000029FRGH", + ["msub.ps_4"] = "4c00002eFRGH", + ["nmadd.s_4"] = "4c000030FRGH", + ["nmadd.d_4"] = "4c000031FRGH", + ["nmadd.ps_4"] = "4c000036FRGH", + ["nmsub.s_4"] = "4c000038FRGH", + ["nmsub.d_4"] = "4c000039FRGH", + ["nmsub.ps_4"] = "4c00003eFRGH", + + }) do map_op[k] = v end + +end + +------------------------------------------------------------------------------ + +local function parse_gpr(expr) + local tname, ovreg = match(expr, "^([%w_]+):(r[1-3]?[0-9])$") + local tp = map_type[tname or expr] + if tp then + local reg = ovreg or tp.reg + if not reg then + werror("type `"..(tname or expr).."' needs a register override") + end + expr = reg + end + local r = match(expr, "^r([1-3]?[0-9])$") + if r then + r = tonumber(r) + if r <= 31 then return r, tp end + end + werror("bad register name `"..expr.."'") +end + +local function parse_fpr(expr) + local r = match(expr, "^f([1-3]?[0-9])$") + if r then + r = tonumber(r) + if r <= 31 then return r end + end + werror("bad register name `"..expr.."'") +end + +local function parse_imm(imm, bits, shift, scale, signed, action) + local n = tonumber(imm) + if n then + local m = sar(n, scale) + if shl(m, scale) == n then + if signed then + local s = sar(m, bits-1) + if s == 0 then return shl(m, shift) + elseif s == -1 then return shl(m + shl(1, bits), shift) end + else + if sar(m, bits) == 0 then return shl(m, shift) end + end + end + werror("out of range immediate `"..imm.."'") + elseif match(imm, "^[rf]([1-3]?[0-9])$") or + match(imm, "^([%w_]+):([rf][1-3]?[0-9])$") then + werror("expected immediate operand, got register") + else + waction(action or "IMM", + (signed and 32768 or 0)+shl(scale, 10)+shl(bits, 5)+shift, imm) + return 0 + end +end + +local function parse_disp(disp) + local imm, reg = match(disp, "^(.*)%(([%w_:]+)%)$") + if imm then + local r = shl(parse_gpr(reg), 21) + local extname = match(imm, "^extern%s+(%S+)$") + if extname then + waction("REL_EXT", map_extern[extname], nil, 1) + return r + else + return r + parse_imm(imm, 16, 0, 0, true) + end + end + local reg, tailr = match(disp, "^([%w_:]+)%s*(.*)$") + if reg and tailr ~= "" then + local r, tp = parse_gpr(reg) + if tp then + waction("IMM", 32768+16*32, format(tp.ctypefmt, tailr)) + return shl(r, 21) + end + end + werror("bad displacement `"..disp.."'") +end + +local function parse_index(idx) + local rt, rs = match(idx, "^(.*)%(([%w_:]+)%)$") + if rt then + rt = parse_gpr(rt) + rs = parse_gpr(rs) + return shl(rt, 16) + shl(rs, 21) + end + werror("bad index `"..idx.."'") +end + +local function parse_label(label, def) + local prefix = sub(label, 1, 2) + -- =>label (pc label reference) + if prefix == "=>" then + return "PC", 0, sub(label, 3) + end + -- ->name (global label reference) + if prefix == "->" then + return "LG", map_global[sub(label, 3)] + end + if def then + -- [1-9] (local label definition) + if match(label, "^[1-9]$") then + return "LG", 10+tonumber(label) + end + else + -- [<>][1-9] (local label reference) + local dir, lnum = match(label, "^([<>])([1-9])$") + if dir then -- Fwd: 1-9, Bkwd: 11-19. + return "LG", lnum + (dir == ">" and 0 or 10) + end + -- extern label (extern label reference) + local extname = match(label, "^extern%s+(%S+)$") + if extname then + return "EXT", map_extern[extname] + end + end + werror("bad label `"..label.."'") +end + +------------------------------------------------------------------------------ + +-- Handle opcodes defined with template strings. +map_op[".template__"] = function(params, template, nparams) + if not params then return sub(template, 9) end + local op = tonumber(sub(template, 1, 8), 16) + local n = 1 + + -- Limit number of section buffer positions used by a single dasm_put(). + -- A single opcode needs a maximum of 2 positions (ins/ext). + if secpos+2 > maxsecpos then wflush() end + local pos = wpos() + + -- Process each character. + for p in gmatch(sub(template, 9), ".") do + if p == "D" then + op = op + shl(parse_gpr(params[n]), 11); n = n + 1 + elseif p == "T" then + op = op + shl(parse_gpr(params[n]), 16); n = n + 1 + elseif p == "S" then + op = op + shl(parse_gpr(params[n]), 21); n = n + 1 + elseif p == "F" then + op = op + shl(parse_fpr(params[n]), 6); n = n + 1 + elseif p == "G" then + op = op + shl(parse_fpr(params[n]), 11); n = n + 1 + elseif p == "H" then + op = op + shl(parse_fpr(params[n]), 16); n = n + 1 + elseif p == "R" then + op = op + shl(parse_fpr(params[n]), 21); n = n + 1 + elseif p == "I" then + op = op + parse_imm(params[n], 16, 0, 0, true); n = n + 1 + elseif p == "U" then + op = op + parse_imm(params[n], 16, 0, 0, false); n = n + 1 + elseif p == "O" then + op = op + parse_disp(params[n]); n = n + 1 + elseif p == "X" then + op = op + parse_index(params[n]); n = n + 1 + elseif p == "B" or p == "J" or p == "K" or p == "L" then + local mode, m, s = parse_label(params[n], false) + if p == "J" then m = m + 0xa800 + elseif p == "K" then m = m + 0x5000 + elseif p == "L" then m = m + 0xa000 end + waction("REL_"..mode, m, s, 1) + n = n + 1 + elseif p == "A" then + op = op + parse_imm(params[n], 5, 6, 0, false); n = n + 1 + elseif p == "a" then + local m = parse_imm(params[n], 6, 6, 0, false, "IMMS"); n = n + 1 + op = op + band(m, 0x7c0) + band(shr(m, 9), 4) + elseif p == "M" then + op = op + parse_imm(params[n], 5, 11, 0, false); n = n + 1 + elseif p == "N" then + op = op + parse_imm(params[n], 5, 16, 0, false); n = n + 1 + elseif p == "C" then + op = op + parse_imm(params[n], 3, 18, 0, false); n = n + 1 + elseif p == "V" then + op = op + parse_imm(params[n], 3, 8, 0, false); n = n + 1 + elseif p == "W" then + op = op + parse_imm(params[n], 3, 0, 0, false); n = n + 1 + elseif p == "Y" then + op = op + parse_imm(params[n], 20, 6, 0, false); n = n + 1 + elseif p == "Z" then + op = op + parse_imm(params[n], 10, 6, 0, false); n = n + 1 + elseif p == "=" then + n = n - 1 -- Re-use previous parameter for next template char. + else + assert(false) + end + end + wputpos(pos, op) +end + +------------------------------------------------------------------------------ + +-- Pseudo-opcode to mark the position where the action list is to be emitted. +map_op[".actionlist_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeactions(out, name) end) +end + +-- Pseudo-opcode to mark the position where the global enum is to be emitted. +map_op[".globals_1"] = function(params) + if not params then return "prefix" end + local prefix = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeglobals(out, prefix) end) +end + +-- Pseudo-opcode to mark the position where the global names are to be emitted. +map_op[".globalnames_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeglobalnames(out, name) end) +end + +-- Pseudo-opcode to mark the position where the extern names are to be emitted. +map_op[".externnames_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeexternnames(out, name) end) +end + +------------------------------------------------------------------------------ + +-- Label pseudo-opcode (converted from trailing colon form). +map_op[".label_1"] = function(params) + if not params then return "[1-9] | ->global | =>pcexpr" end + if secpos+1 > maxsecpos then wflush() end + local mode, n, s = parse_label(params[1], true) + if mode == "EXT" then werror("bad label definition") end + waction("LABEL_"..mode, n, s, 1) +end + +------------------------------------------------------------------------------ + +-- Pseudo-opcodes for data storage. +map_op[".long_*"] = function(params) + if not params then return "imm..." end + for _,p in ipairs(params) do + local n = tonumber(p) + if not n then werror("bad immediate `"..p.."'") end + if n < 0 then n = n + 2^32 end + wputw(n) + if secpos+2 > maxsecpos then wflush() end + end +end + +-- Alignment pseudo-opcode. +map_op[".align_1"] = function(params) + if not params then return "numpow2" end + if secpos+1 > maxsecpos then wflush() end + local align = tonumber(params[1]) + if align then + local x = align + -- Must be a power of 2 in the range (2 ... 256). + for i=1,8 do + x = x / 2 + if x == 1 then + waction("ALIGN", align-1, nil, 1) -- Action byte is 2**n-1. + return + end + end + end + werror("bad alignment") +end + +------------------------------------------------------------------------------ + +-- Pseudo-opcode for (primitive) type definitions (map to C types). +map_op[".type_3"] = function(params, nparams) + if not params then + return nparams == 2 and "name, ctype" or "name, ctype, reg" + end + local name, ctype, reg = params[1], params[2], params[3] + if not match(name, "^[%a_][%w_]*$") then + werror("bad type name `"..name.."'") + end + local tp = map_type[name] + if tp then + werror("duplicate type `"..name.."'") + end + -- Add #type to defines. A bit unclean to put it in map_archdef. + map_archdef["#"..name] = "sizeof("..ctype..")" + -- Add new type and emit shortcut define. + local num = ctypenum + 1 + map_type[name] = { + ctype = ctype, + ctypefmt = format("Dt%X(%%s)", num), + reg = reg, + } + wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype)) + ctypenum = num +end +map_op[".type_2"] = map_op[".type_3"] + +-- Dump type definitions. +local function dumptypes(out, lvl) + local t = {} + for name in pairs(map_type) do t[#t+1] = name end + sort(t) + out:write("Type definitions:\n") + for _,name in ipairs(t) do + local tp = map_type[name] + local reg = tp.reg or "" + out:write(format(" %-20s %-20s %s\n", name, tp.ctype, reg)) + end + out:write("\n") +end + +------------------------------------------------------------------------------ + +-- Set the current section. +function _M.section(num) + waction("SECTION", num) + wflush(true) -- SECTION is a terminal action. +end + +------------------------------------------------------------------------------ + +-- Dump architecture description. +function _M.dumparch(out) + out:write(format("DynASM %s version %s, released %s\n\n", + _info.arch, _info.version, _info.release)) + dumpactions(out) +end + +-- Dump all user defined elements. +function _M.dumpdef(out, lvl) + dumptypes(out, lvl) + dumpglobals(out, lvl) + dumpexterns(out, lvl) +end + +------------------------------------------------------------------------------ + +-- Pass callbacks from/to the DynASM core. +function _M.passcb(wl, we, wf, ww) + wline, werror, wfatal, wwarn = wl, we, wf, ww + return wflush +end + +-- Setup the arch-specific module. +function _M.setup(arch, opt) + g_arch, g_opt = arch, opt +end + +-- Merge the core maps and the arch-specific maps. +function _M.mergemaps(map_coreop, map_def) + setmetatable(map_op, { __index = map_coreop }) + setmetatable(map_def, { __index = map_archdef }) + return map_op, map_def +end + +return _M + +------------------------------------------------------------------------------ + diff --git a/ext/opcache/jit/ir/dynasm/dasm_mips64.lua b/ext/opcache/jit/ir/dynasm/dasm_mips64.lua new file mode 100644 index 0000000000000..8ab5d33a2084f --- /dev/null +++ b/ext/opcache/jit/ir/dynasm/dasm_mips64.lua @@ -0,0 +1,12 @@ +------------------------------------------------------------------------------ +-- DynASM MIPS64 module. +-- +-- Copyright (C) 2005-2021 Mike Pall. All rights reserved. +-- See dynasm.lua for full copyright notice. +------------------------------------------------------------------------------ +-- This module just sets 64 bit mode for the combined MIPS/MIPS64 module. +-- All the interesting stuff is there. +------------------------------------------------------------------------------ + +mips64 = true -- Using a global is an ugly, but effective solution. +return require("dasm_mips") diff --git a/ext/opcache/jit/ir/dynasm/dasm_ppc.h b/ext/opcache/jit/ir/dynasm/dasm_ppc.h new file mode 100644 index 0000000000000..35264f2eb93b9 --- /dev/null +++ b/ext/opcache/jit/ir/dynasm/dasm_ppc.h @@ -0,0 +1,423 @@ +/* +** DynASM PPC/PPC64 encoding engine. +** Copyright (C) 2005-2021 Mike Pall. All rights reserved. +** Released under the MIT license. See dynasm.lua for full copyright notice. +*/ + +#include +#include +#include +#include + +#define DASM_ARCH "ppc" + +#ifndef DASM_EXTERN +#define DASM_EXTERN(a,b,c,d) 0 +#endif + +/* Action definitions. */ +enum { + DASM_STOP, DASM_SECTION, DASM_ESC, DASM_REL_EXT, + /* The following actions need a buffer position. */ + DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG, + /* The following actions also have an argument. */ + DASM_REL_PC, DASM_LABEL_PC, DASM_IMM, DASM_IMMSH, + DASM__MAX +}; + +/* Maximum number of section buffer positions for a single dasm_put() call. */ +#define DASM_MAXSECPOS 25 + +/* DynASM encoder status codes. Action list offset or number are or'ed in. */ +#define DASM_S_OK 0x00000000 +#define DASM_S_NOMEM 0x01000000 +#define DASM_S_PHASE 0x02000000 +#define DASM_S_MATCH_SEC 0x03000000 +#define DASM_S_RANGE_I 0x11000000 +#define DASM_S_RANGE_SEC 0x12000000 +#define DASM_S_RANGE_LG 0x13000000 +#define DASM_S_RANGE_PC 0x14000000 +#define DASM_S_RANGE_REL 0x15000000 +#define DASM_S_UNDEF_LG 0x21000000 +#define DASM_S_UNDEF_PC 0x22000000 + +/* Macros to convert positions (8 bit section + 24 bit index). */ +#define DASM_POS2IDX(pos) ((pos)&0x00ffffff) +#define DASM_POS2BIAS(pos) ((pos)&0xff000000) +#define DASM_SEC2POS(sec) ((sec)<<24) +#define DASM_POS2SEC(pos) ((pos)>>24) +#define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos)) + +/* Action list type. */ +typedef const unsigned int *dasm_ActList; + +/* Per-section structure. */ +typedef struct dasm_Section { + int *rbuf; /* Biased buffer pointer (negative section bias). */ + int *buf; /* True buffer pointer. */ + size_t bsize; /* Buffer size in bytes. */ + int pos; /* Biased buffer position. */ + int epos; /* End of biased buffer position - max single put. */ + int ofs; /* Byte offset into section. */ +} dasm_Section; + +/* Core structure holding the DynASM encoding state. */ +struct dasm_State { + size_t psize; /* Allocated size of this structure. */ + dasm_ActList actionlist; /* Current actionlist pointer. */ + int *lglabels; /* Local/global chain/pos ptrs. */ + size_t lgsize; + int *pclabels; /* PC label chains/pos ptrs. */ + size_t pcsize; + void **globals; /* Array of globals (bias -10). */ + dasm_Section *section; /* Pointer to active section. */ + size_t codesize; /* Total size of all code sections. */ + int maxsection; /* 0 <= sectionidx < maxsection. */ + int status; /* Status code. */ + dasm_Section sections[1]; /* All sections. Alloc-extended. */ +}; + +/* The size of the core structure depends on the max. number of sections. */ +#define DASM_PSZ(ms) (sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section)) + + +/* Initialize DynASM state. */ +void dasm_init(Dst_DECL, int maxsection) +{ + dasm_State *D; + size_t psz = 0; + int i; + Dst_REF = NULL; + DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection)); + D = Dst_REF; + D->psize = psz; + D->lglabels = NULL; + D->lgsize = 0; + D->pclabels = NULL; + D->pcsize = 0; + D->globals = NULL; + D->maxsection = maxsection; + for (i = 0; i < maxsection; i++) { + D->sections[i].buf = NULL; /* Need this for pass3. */ + D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i); + D->sections[i].bsize = 0; + D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */ + } +} + +/* Free DynASM state. */ +void dasm_free(Dst_DECL) +{ + dasm_State *D = Dst_REF; + int i; + for (i = 0; i < D->maxsection; i++) + if (D->sections[i].buf) + DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize); + if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize); + if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize); + DASM_M_FREE(Dst, D, D->psize); +} + +/* Setup global label array. Must be called before dasm_setup(). */ +void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl) +{ + dasm_State *D = Dst_REF; + D->globals = gl - 10; /* Negative bias to compensate for locals. */ + DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int)); +} + +/* Grow PC label array. Can be called after dasm_setup(), too. */ +void dasm_growpc(Dst_DECL, unsigned int maxpc) +{ + dasm_State *D = Dst_REF; + size_t osz = D->pcsize; + DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int)); + memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz); +} + +/* Setup encoder. */ +void dasm_setup(Dst_DECL, const void *actionlist) +{ + dasm_State *D = Dst_REF; + int i; + D->actionlist = (dasm_ActList)actionlist; + D->status = DASM_S_OK; + D->section = &D->sections[0]; + memset((void *)D->lglabels, 0, D->lgsize); + if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize); + for (i = 0; i < D->maxsection; i++) { + D->sections[i].pos = DASM_SEC2POS(i); + D->sections[i].ofs = 0; + } +} + + +#ifdef DASM_CHECKS +#define CK(x, st) \ + do { if (!(x)) { \ + D->status = DASM_S_##st|(p-D->actionlist-1); return; } } while (0) +#define CKPL(kind, st) \ + do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \ + D->status = DASM_S_RANGE_##st|(p-D->actionlist-1); return; } } while (0) +#else +#define CK(x, st) ((void)0) +#define CKPL(kind, st) ((void)0) +#endif + +/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */ +void dasm_put(Dst_DECL, int start, ...) +{ + va_list ap; + dasm_State *D = Dst_REF; + dasm_ActList p = D->actionlist + start; + dasm_Section *sec = D->section; + int pos = sec->pos, ofs = sec->ofs; + int *b; + + if (pos >= sec->epos) { + DASM_M_GROW(Dst, int, sec->buf, sec->bsize, + sec->bsize + 2*DASM_MAXSECPOS*sizeof(int)); + sec->rbuf = sec->buf - DASM_POS2BIAS(pos); + sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos); + } + + b = sec->rbuf; + b[pos++] = start; + + va_start(ap, start); + while (1) { + unsigned int ins = *p++; + unsigned int action = (ins >> 16); + if (action >= DASM__MAX) { + ofs += 4; + } else { + int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0; + switch (action) { + case DASM_STOP: goto stop; + case DASM_SECTION: + n = (ins & 255); CK(n < D->maxsection, RANGE_SEC); + D->section = &D->sections[n]; goto stop; + case DASM_ESC: p++; ofs += 4; break; + case DASM_REL_EXT: break; + case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break; + case DASM_REL_LG: + n = (ins & 2047) - 10; pl = D->lglabels + n; + /* Bkwd rel or global. */ + if (n >= 0) { CK(n>=10||*pl<0, RANGE_LG); CKPL(lg, LG); goto putrel; } + pl += 10; n = *pl; + if (n < 0) n = 0; /* Start new chain for fwd rel if label exists. */ + goto linkrel; + case DASM_REL_PC: + pl = D->pclabels + n; CKPL(pc, PC); + putrel: + n = *pl; + if (n < 0) { /* Label exists. Get label pos and store it. */ + b[pos] = -n; + } else { + linkrel: + b[pos] = n; /* Else link to rel chain, anchored at label. */ + *pl = pos; + } + pos++; + break; + case DASM_LABEL_LG: + pl = D->lglabels + (ins & 2047) - 10; CKPL(lg, LG); goto putlabel; + case DASM_LABEL_PC: + pl = D->pclabels + n; CKPL(pc, PC); + putlabel: + n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */ + while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos; + } + *pl = -pos; /* Label exists now. */ + b[pos++] = ofs; /* Store pass1 offset estimate. */ + break; + case DASM_IMM: +#ifdef DASM_CHECKS + CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I); +#endif + n >>= ((ins>>10)&31); +#ifdef DASM_CHECKS + if (ins & 0x8000) + CK(((n + (1<<(((ins>>5)&31)-1)))>>((ins>>5)&31)) == 0, RANGE_I); + else + CK((n>>((ins>>5)&31)) == 0, RANGE_I); +#endif + b[pos++] = n; + break; + case DASM_IMMSH: + CK((n >> 6) == 0, RANGE_I); + b[pos++] = n; + break; + } + } + } +stop: + va_end(ap); + sec->pos = pos; + sec->ofs = ofs; +} +#undef CK + +/* Pass 2: Link sections, shrink aligns, fix label offsets. */ +int dasm_link(Dst_DECL, size_t *szp) +{ + dasm_State *D = Dst_REF; + int secnum; + int ofs = 0; + +#ifdef DASM_CHECKS + *szp = 0; + if (D->status != DASM_S_OK) return D->status; + { + int pc; + for (pc = 0; pc*sizeof(int) < D->pcsize; pc++) + if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc; + } +#endif + + { /* Handle globals not defined in this translation unit. */ + int idx; + for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) { + int n = D->lglabels[idx]; + /* Undefined label: Collapse rel chain and replace with marker (< 0). */ + while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; } + } + } + + /* Combine all code sections. No support for data sections (yet). */ + for (secnum = 0; secnum < D->maxsection; secnum++) { + dasm_Section *sec = D->sections + secnum; + int *b = sec->rbuf; + int pos = DASM_SEC2POS(secnum); + int lastpos = sec->pos; + + while (pos != lastpos) { + dasm_ActList p = D->actionlist + b[pos++]; + while (1) { + unsigned int ins = *p++; + unsigned int action = (ins >> 16); + switch (action) { + case DASM_STOP: case DASM_SECTION: goto stop; + case DASM_ESC: p++; break; + case DASM_REL_EXT: break; + case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break; + case DASM_REL_LG: case DASM_REL_PC: pos++; break; + case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break; + case DASM_IMM: case DASM_IMMSH: pos++; break; + } + } + stop: (void)0; + } + ofs += sec->ofs; /* Next section starts right after current section. */ + } + + D->codesize = ofs; /* Total size of all code sections */ + *szp = ofs; + return DASM_S_OK; +} + +#ifdef DASM_CHECKS +#define CK(x, st) \ + do { if (!(x)) return DASM_S_##st|(p-D->actionlist-1); } while (0) +#else +#define CK(x, st) ((void)0) +#endif + +/* Pass 3: Encode sections. */ +int dasm_encode(Dst_DECL, void *buffer) +{ + dasm_State *D = Dst_REF; + char *base = (char *)buffer; + unsigned int *cp = (unsigned int *)buffer; + int secnum; + + /* Encode all code sections. No support for data sections (yet). */ + for (secnum = 0; secnum < D->maxsection; secnum++) { + dasm_Section *sec = D->sections + secnum; + int *b = sec->buf; + int *endb = sec->rbuf + sec->pos; + + while (b != endb) { + dasm_ActList p = D->actionlist + *b++; + while (1) { + unsigned int ins = *p++; + unsigned int action = (ins >> 16); + int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0; + switch (action) { + case DASM_STOP: case DASM_SECTION: goto stop; + case DASM_ESC: *cp++ = *p++; break; + case DASM_REL_EXT: + n = DASM_EXTERN(Dst, (unsigned char *)cp, (ins & 2047), 1) - 4; + goto patchrel; + case DASM_ALIGN: + ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0x60000000; + break; + case DASM_REL_LG: + if (n < 0) { + n = (int)((ptrdiff_t)D->globals[-n] - (ptrdiff_t)cp); + goto patchrel; + } + /* fallthrough */ + case DASM_REL_PC: + CK(n >= 0, UNDEF_PC); + n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base); + patchrel: + CK((n & 3) == 0 && + (((n+4) + ((ins & 2048) ? 0x00008000 : 0x02000000)) >> + ((ins & 2048) ? 16 : 26)) == 0, RANGE_REL); + cp[-1] |= ((n+4) & ((ins & 2048) ? 0x0000fffc: 0x03fffffc)); + break; + case DASM_LABEL_LG: + ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n); + break; + case DASM_LABEL_PC: break; + case DASM_IMM: + cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31); + break; + case DASM_IMMSH: + cp[-1] |= (ins & 1) ? ((n&31)<<11)|((n&32)>>4) : ((n&31)<<6)|(n&32); + break; + default: *cp++ = ins; break; + } + } + stop: (void)0; + } + } + + if (base + D->codesize != (char *)cp) /* Check for phase errors. */ + return DASM_S_PHASE; + return DASM_S_OK; +} +#undef CK + +/* Get PC label offset. */ +int dasm_getpclabel(Dst_DECL, unsigned int pc) +{ + dasm_State *D = Dst_REF; + if (pc*sizeof(int) < D->pcsize) { + int pos = D->pclabels[pc]; + if (pos < 0) return *DASM_POS2PTR(D, -pos); + if (pos > 0) return -1; /* Undefined. */ + } + return -2; /* Unused or out of range. */ +} + +#ifdef DASM_CHECKS +/* Optional sanity checker to call between isolated encoding steps. */ +int dasm_checkstep(Dst_DECL, int secmatch) +{ + dasm_State *D = Dst_REF; + if (D->status == DASM_S_OK) { + int i; + for (i = 1; i <= 9; i++) { + if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_LG|i; break; } + D->lglabels[i] = 0; + } + } + if (D->status == DASM_S_OK && secmatch >= 0 && + D->section != &D->sections[secmatch]) + D->status = DASM_S_MATCH_SEC|(D->section-D->sections); + return D->status; +} +#endif + diff --git a/ext/opcache/jit/ir/dynasm/dasm_ppc.lua b/ext/opcache/jit/ir/dynasm/dasm_ppc.lua new file mode 100644 index 0000000000000..ee2afb2ecfadf --- /dev/null +++ b/ext/opcache/jit/ir/dynasm/dasm_ppc.lua @@ -0,0 +1,1919 @@ +------------------------------------------------------------------------------ +-- DynASM PPC/PPC64 module. +-- +-- Copyright (C) 2005-2021 Mike Pall. All rights reserved. +-- See dynasm.lua for full copyright notice. +-- +-- Support for various extensions contributed by Caio Souza Oliveira. +------------------------------------------------------------------------------ + +-- Module information: +local _info = { + arch = "ppc", + description = "DynASM PPC module", + version = "1.5.0", + vernum = 10500, + release = "2021-05-02", + author = "Mike Pall", + license = "MIT", +} + +-- Exported glue functions for the arch-specific module. +local _M = { _info = _info } + +-- Cache library functions. +local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs +local assert, setmetatable = assert, setmetatable +local _s = string +local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char +local match, gmatch = _s.match, _s.gmatch +local concat, sort = table.concat, table.sort +local bit = bit or require("bit") +local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift +local tohex = bit.tohex + +-- Inherited tables and callbacks. +local g_opt, g_arch +local wline, werror, wfatal, wwarn + +-- Action name list. +-- CHECK: Keep this in sync with the C code! +local action_names = { + "STOP", "SECTION", "ESC", "REL_EXT", + "ALIGN", "REL_LG", "LABEL_LG", + "REL_PC", "LABEL_PC", "IMM", "IMMSH" +} + +-- Maximum number of section buffer positions for dasm_put(). +-- CHECK: Keep this in sync with the C code! +local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines. + +-- Action name -> action number. +local map_action = {} +for n,name in ipairs(action_names) do + map_action[name] = n-1 +end + +-- Action list buffer. +local actlist = {} + +-- Argument list for next dasm_put(). Start with offset 0 into action list. +local actargs = { 0 } + +-- Current number of section buffer positions for dasm_put(). +local secpos = 1 + +------------------------------------------------------------------------------ + +-- Dump action names and numbers. +local function dumpactions(out) + out:write("DynASM encoding engine action codes:\n") + for n,name in ipairs(action_names) do + local num = map_action[name] + out:write(format(" %-10s %02X %d\n", name, num, num)) + end + out:write("\n") +end + +-- Write action list buffer as a huge static C array. +local function writeactions(out, name) + local nn = #actlist + if nn == 0 then nn = 1; actlist[0] = map_action.STOP end + out:write("static const unsigned int ", name, "[", nn, "] = {\n") + for i = 1,nn-1 do + assert(out:write("0x", tohex(actlist[i]), ",\n")) + end + assert(out:write("0x", tohex(actlist[nn]), "\n};\n\n")) +end + +------------------------------------------------------------------------------ + +-- Add word to action list. +local function wputxw(n) + assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range") + actlist[#actlist+1] = n +end + +-- Add action to list with optional arg. Advance buffer pos, too. +local function waction(action, val, a, num) + local w = assert(map_action[action], "bad action name `"..action.."'") + wputxw(w * 0x10000 + (val or 0)) + if a then actargs[#actargs+1] = a end + if a or num then secpos = secpos + (num or 1) end +end + +-- Flush action list (intervening C code or buffer pos overflow). +local function wflush(term) + if #actlist == actargs[1] then return end -- Nothing to flush. + if not term then waction("STOP") end -- Terminate action list. + wline(format("dasm_put(Dst, %s);", concat(actargs, ", ")), true) + actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put(). + secpos = 1 -- The actionlist offset occupies a buffer position, too. +end + +-- Put escaped word. +local function wputw(n) + if n <= 0xffffff then waction("ESC") end + wputxw(n) +end + +-- Reserve position for word. +local function wpos() + local pos = #actlist+1 + actlist[pos] = "" + return pos +end + +-- Store word to reserved position. +local function wputpos(pos, n) + assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range") + actlist[pos] = n +end + +------------------------------------------------------------------------------ + +-- Global label name -> global label number. With auto assignment on 1st use. +local next_global = 20 +local map_global = setmetatable({}, { __index = function(t, name) + if not match(name, "^[%a_][%w_]*$") then werror("bad global label") end + local n = next_global + if n > 2047 then werror("too many global labels") end + next_global = n + 1 + t[name] = n + return n +end}) + +-- Dump global labels. +local function dumpglobals(out, lvl) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("Global labels:\n") + for i=20,next_global-1 do + out:write(format(" %s\n", t[i])) + end + out:write("\n") +end + +-- Write global label enum. +local function writeglobals(out, prefix) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("enum {\n") + for i=20,next_global-1 do + out:write(" ", prefix, t[i], ",\n") + end + out:write(" ", prefix, "_MAX\n};\n") +end + +-- Write global label names. +local function writeglobalnames(out, name) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("static const char *const ", name, "[] = {\n") + for i=20,next_global-1 do + out:write(" \"", t[i], "\",\n") + end + out:write(" (const char *)0\n};\n") +end + +------------------------------------------------------------------------------ + +-- Extern label name -> extern label number. With auto assignment on 1st use. +local next_extern = 0 +local map_extern_ = {} +local map_extern = setmetatable({}, { __index = function(t, name) + -- No restrictions on the name for now. + local n = next_extern + if n > 2047 then werror("too many extern labels") end + next_extern = n + 1 + t[name] = n + map_extern_[n] = name + return n +end}) + +-- Dump extern labels. +local function dumpexterns(out, lvl) + out:write("Extern labels:\n") + for i=0,next_extern-1 do + out:write(format(" %s\n", map_extern_[i])) + end + out:write("\n") +end + +-- Write extern label names. +local function writeexternnames(out, name) + out:write("static const char *const ", name, "[] = {\n") + for i=0,next_extern-1 do + out:write(" \"", map_extern_[i], "\",\n") + end + out:write(" (const char *)0\n};\n") +end + +------------------------------------------------------------------------------ + +-- Arch-specific maps. +local map_archdef = { sp = "r1" } -- Ext. register name -> int. name. + +local map_type = {} -- Type name -> { ctype, reg } +local ctypenum = 0 -- Type number (for Dt... macros). + +-- Reverse defines for registers. +function _M.revdef(s) + if s == "r1" then return "sp" end + return s +end + +local map_cond = { + lt = 0, gt = 1, eq = 2, so = 3, + ge = 4, le = 5, ne = 6, ns = 7, +} + +------------------------------------------------------------------------------ + +local map_op, op_template + +local function op_alias(opname, f) + return function(params, nparams) + if not params then return "-> "..opname:sub(1, -3) end + f(params, nparams) + op_template(params, map_op[opname], nparams) + end +end + +-- Template strings for PPC instructions. +map_op = { + tdi_3 = "08000000ARI", + twi_3 = "0c000000ARI", + mulli_3 = "1c000000RRI", + subfic_3 = "20000000RRI", + cmplwi_3 = "28000000XRU", + cmplwi_2 = "28000000-RU", + cmpldi_3 = "28200000XRU", + cmpldi_2 = "28200000-RU", + cmpwi_3 = "2c000000XRI", + cmpwi_2 = "2c000000-RI", + cmpdi_3 = "2c200000XRI", + cmpdi_2 = "2c200000-RI", + addic_3 = "30000000RRI", + ["addic._3"] = "34000000RRI", + addi_3 = "38000000RR0I", + li_2 = "38000000RI", + la_2 = "38000000RD", + addis_3 = "3c000000RR0I", + lis_2 = "3c000000RI", + lus_2 = "3c000000RU", + bc_3 = "40000000AAK", + bcl_3 = "40000001AAK", + bdnz_1 = "42000000K", + bdz_1 = "42400000K", + sc_0 = "44000000", + b_1 = "48000000J", + bl_1 = "48000001J", + rlwimi_5 = "50000000RR~AAA.", + rlwinm_5 = "54000000RR~AAA.", + rlwnm_5 = "5c000000RR~RAA.", + ori_3 = "60000000RR~U", + nop_0 = "60000000", + oris_3 = "64000000RR~U", + xori_3 = "68000000RR~U", + xoris_3 = "6c000000RR~U", + ["andi._3"] = "70000000RR~U", + ["andis._3"] = "74000000RR~U", + lwz_2 = "80000000RD", + lwzu_2 = "84000000RD", + lbz_2 = "88000000RD", + lbzu_2 = "8c000000RD", + stw_2 = "90000000RD", + stwu_2 = "94000000RD", + stb_2 = "98000000RD", + stbu_2 = "9c000000RD", + lhz_2 = "a0000000RD", + lhzu_2 = "a4000000RD", + lha_2 = "a8000000RD", + lhau_2 = "ac000000RD", + sth_2 = "b0000000RD", + sthu_2 = "b4000000RD", + lmw_2 = "b8000000RD", + stmw_2 = "bc000000RD", + lfs_2 = "c0000000FD", + lfsu_2 = "c4000000FD", + lfd_2 = "c8000000FD", + lfdu_2 = "cc000000FD", + stfs_2 = "d0000000FD", + stfsu_2 = "d4000000FD", + stfd_2 = "d8000000FD", + stfdu_2 = "dc000000FD", + ld_2 = "e8000000RD", -- NYI: displacement must be divisible by 4. + ldu_2 = "e8000001RD", + lwa_2 = "e8000002RD", + std_2 = "f8000000RD", + stdu_2 = "f8000001RD", + + subi_3 = op_alias("addi_3", function(p) p[3] = "-("..p[3]..")" end), + subis_3 = op_alias("addis_3", function(p) p[3] = "-("..p[3]..")" end), + subic_3 = op_alias("addic_3", function(p) p[3] = "-("..p[3]..")" end), + ["subic._3"] = op_alias("addic._3", function(p) p[3] = "-("..p[3]..")" end), + + rotlwi_3 = op_alias("rlwinm_5", function(p) + p[4] = "0"; p[5] = "31" + end), + rotrwi_3 = op_alias("rlwinm_5", function(p) + p[3] = "32-("..p[3]..")"; p[4] = "0"; p[5] = "31" + end), + rotlw_3 = op_alias("rlwnm_5", function(p) + p[4] = "0"; p[5] = "31" + end), + slwi_3 = op_alias("rlwinm_5", function(p) + p[5] = "31-("..p[3]..")"; p[4] = "0" + end), + srwi_3 = op_alias("rlwinm_5", function(p) + p[4] = p[3]; p[3] = "32-("..p[3]..")"; p[5] = "31" + end), + clrlwi_3 = op_alias("rlwinm_5", function(p) + p[4] = p[3]; p[3] = "0"; p[5] = "31" + end), + clrrwi_3 = op_alias("rlwinm_5", function(p) + p[5] = "31-("..p[3]..")"; p[3] = "0"; p[4] = "0" + end), + + -- Primary opcode 4: + mulhhwu_3 = "10000010RRR.", + machhwu_3 = "10000018RRR.", + mulhhw_3 = "10000050RRR.", + nmachhw_3 = "1000005cRRR.", + machhwsu_3 = "10000098RRR.", + machhws_3 = "100000d8RRR.", + nmachhws_3 = "100000dcRRR.", + mulchwu_3 = "10000110RRR.", + macchwu_3 = "10000118RRR.", + mulchw_3 = "10000150RRR.", + macchw_3 = "10000158RRR.", + nmacchw_3 = "1000015cRRR.", + macchwsu_3 = "10000198RRR.", + macchws_3 = "100001d8RRR.", + nmacchws_3 = "100001dcRRR.", + mullhw_3 = "10000350RRR.", + maclhw_3 = "10000358RRR.", + nmaclhw_3 = "1000035cRRR.", + maclhwsu_3 = "10000398RRR.", + maclhws_3 = "100003d8RRR.", + nmaclhws_3 = "100003dcRRR.", + machhwuo_3 = "10000418RRR.", + nmachhwo_3 = "1000045cRRR.", + machhwsuo_3 = "10000498RRR.", + machhwso_3 = "100004d8RRR.", + nmachhwso_3 = "100004dcRRR.", + macchwuo_3 = "10000518RRR.", + macchwo_3 = "10000558RRR.", + nmacchwo_3 = "1000055cRRR.", + macchwsuo_3 = "10000598RRR.", + macchwso_3 = "100005d8RRR.", + nmacchwso_3 = "100005dcRRR.", + maclhwo_3 = "10000758RRR.", + nmaclhwo_3 = "1000075cRRR.", + maclhwsuo_3 = "10000798RRR.", + maclhwso_3 = "100007d8RRR.", + nmaclhwso_3 = "100007dcRRR.", + + vaddubm_3 = "10000000VVV", + vmaxub_3 = "10000002VVV", + vrlb_3 = "10000004VVV", + vcmpequb_3 = "10000006VVV", + vmuloub_3 = "10000008VVV", + vaddfp_3 = "1000000aVVV", + vmrghb_3 = "1000000cVVV", + vpkuhum_3 = "1000000eVVV", + vmhaddshs_4 = "10000020VVVV", + vmhraddshs_4 = "10000021VVVV", + vmladduhm_4 = "10000022VVVV", + vmsumubm_4 = "10000024VVVV", + vmsummbm_4 = "10000025VVVV", + vmsumuhm_4 = "10000026VVVV", + vmsumuhs_4 = "10000027VVVV", + vmsumshm_4 = "10000028VVVV", + vmsumshs_4 = "10000029VVVV", + vsel_4 = "1000002aVVVV", + vperm_4 = "1000002bVVVV", + vsldoi_4 = "1000002cVVVP", + vpermxor_4 = "1000002dVVVV", + vmaddfp_4 = "1000002eVVVV~", + vnmsubfp_4 = "1000002fVVVV~", + vaddeuqm_4 = "1000003cVVVV", + vaddecuq_4 = "1000003dVVVV", + vsubeuqm_4 = "1000003eVVVV", + vsubecuq_4 = "1000003fVVVV", + vadduhm_3 = "10000040VVV", + vmaxuh_3 = "10000042VVV", + vrlh_3 = "10000044VVV", + vcmpequh_3 = "10000046VVV", + vmulouh_3 = "10000048VVV", + vsubfp_3 = "1000004aVVV", + vmrghh_3 = "1000004cVVV", + vpkuwum_3 = "1000004eVVV", + vadduwm_3 = "10000080VVV", + vmaxuw_3 = "10000082VVV", + vrlw_3 = "10000084VVV", + vcmpequw_3 = "10000086VVV", + vmulouw_3 = "10000088VVV", + vmuluwm_3 = "10000089VVV", + vmrghw_3 = "1000008cVVV", + vpkuhus_3 = "1000008eVVV", + vaddudm_3 = "100000c0VVV", + vmaxud_3 = "100000c2VVV", + vrld_3 = "100000c4VVV", + vcmpeqfp_3 = "100000c6VVV", + vcmpequd_3 = "100000c7VVV", + vpkuwus_3 = "100000ceVVV", + vadduqm_3 = "10000100VVV", + vmaxsb_3 = "10000102VVV", + vslb_3 = "10000104VVV", + vmulosb_3 = "10000108VVV", + vrefp_2 = "1000010aV-V", + vmrglb_3 = "1000010cVVV", + vpkshus_3 = "1000010eVVV", + vaddcuq_3 = "10000140VVV", + vmaxsh_3 = "10000142VVV", + vslh_3 = "10000144VVV", + vmulosh_3 = "10000148VVV", + vrsqrtefp_2 = "1000014aV-V", + vmrglh_3 = "1000014cVVV", + vpkswus_3 = "1000014eVVV", + vaddcuw_3 = "10000180VVV", + vmaxsw_3 = "10000182VVV", + vslw_3 = "10000184VVV", + vmulosw_3 = "10000188VVV", + vexptefp_2 = "1000018aV-V", + vmrglw_3 = "1000018cVVV", + vpkshss_3 = "1000018eVVV", + vmaxsd_3 = "100001c2VVV", + vsl_3 = "100001c4VVV", + vcmpgefp_3 = "100001c6VVV", + vlogefp_2 = "100001caV-V", + vpkswss_3 = "100001ceVVV", + vadduhs_3 = "10000240VVV", + vminuh_3 = "10000242VVV", + vsrh_3 = "10000244VVV", + vcmpgtuh_3 = "10000246VVV", + vmuleuh_3 = "10000248VVV", + vrfiz_2 = "1000024aV-V", + vsplth_3 = "1000024cVV3", + vupkhsh_2 = "1000024eV-V", + vminuw_3 = "10000282VVV", + vminud_3 = "100002c2VVV", + vcmpgtud_3 = "100002c7VVV", + vrfim_2 = "100002caV-V", + vcmpgtsb_3 = "10000306VVV", + vcfux_3 = "1000030aVVA~", + vaddshs_3 = "10000340VVV", + vminsh_3 = "10000342VVV", + vsrah_3 = "10000344VVV", + vcmpgtsh_3 = "10000346VVV", + vmulesh_3 = "10000348VVV", + vcfsx_3 = "1000034aVVA~", + vspltish_2 = "1000034cVS", + vupkhpx_2 = "1000034eV-V", + vaddsws_3 = "10000380VVV", + vminsw_3 = "10000382VVV", + vsraw_3 = "10000384VVV", + vcmpgtsw_3 = "10000386VVV", + vmulesw_3 = "10000388VVV", + vctuxs_3 = "1000038aVVA~", + vspltisw_2 = "1000038cVS", + vminsd_3 = "100003c2VVV", + vsrad_3 = "100003c4VVV", + vcmpbfp_3 = "100003c6VVV", + vcmpgtsd_3 = "100003c7VVV", + vctsxs_3 = "100003caVVA~", + vupklpx_2 = "100003ceV-V", + vsububm_3 = "10000400VVV", + ["bcdadd._4"] = "10000401VVVy.", + vavgub_3 = "10000402VVV", + vand_3 = "10000404VVV", + ["vcmpequb._3"] = "10000406VVV", + vmaxfp_3 = "1000040aVVV", + vsubuhm_3 = "10000440VVV", + ["bcdsub._4"] = "10000441VVVy.", + vavguh_3 = "10000442VVV", + vandc_3 = "10000444VVV", + ["vcmpequh._3"] = "10000446VVV", + vminfp_3 = "1000044aVVV", + vpkudum_3 = "1000044eVVV", + vsubuwm_3 = "10000480VVV", + vavguw_3 = "10000482VVV", + vor_3 = "10000484VVV", + ["vcmpequw._3"] = "10000486VVV", + vpmsumw_3 = "10000488VVV", + ["vcmpeqfp._3"] = "100004c6VVV", + ["vcmpequd._3"] = "100004c7VVV", + vpkudus_3 = "100004ceVVV", + vavgsb_3 = "10000502VVV", + vavgsh_3 = "10000542VVV", + vorc_3 = "10000544VVV", + vbpermq_3 = "1000054cVVV", + vpksdus_3 = "1000054eVVV", + vavgsw_3 = "10000582VVV", + vsld_3 = "100005c4VVV", + ["vcmpgefp._3"] = "100005c6VVV", + vpksdss_3 = "100005ceVVV", + vsububs_3 = "10000600VVV", + mfvscr_1 = "10000604V--", + vsum4ubs_3 = "10000608VVV", + vsubuhs_3 = "10000640VVV", + mtvscr_1 = "10000644--V", + ["vcmpgtuh._3"] = "10000646VVV", + vsum4shs_3 = "10000648VVV", + vupkhsw_2 = "1000064eV-V", + vsubuws_3 = "10000680VVV", + vshasigmaw_4 = "10000682VVYp", + veqv_3 = "10000684VVV", + vsum2sws_3 = "10000688VVV", + vmrgow_3 = "1000068cVVV", + vshasigmad_4 = "100006c2VVYp", + vsrd_3 = "100006c4VVV", + ["vcmpgtud._3"] = "100006c7VVV", + vupklsw_2 = "100006ceV-V", + vupkslw_2 = "100006ceV-V", + vsubsbs_3 = "10000700VVV", + vclzb_2 = "10000702V-V", + vpopcntb_2 = "10000703V-V", + ["vcmpgtsb._3"] = "10000706VVV", + vsum4sbs_3 = "10000708VVV", + vsubshs_3 = "10000740VVV", + vclzh_2 = "10000742V-V", + vpopcnth_2 = "10000743V-V", + ["vcmpgtsh._3"] = "10000746VVV", + vsubsws_3 = "10000780VVV", + vclzw_2 = "10000782V-V", + vpopcntw_2 = "10000783V-V", + ["vcmpgtsw._3"] = "10000786VVV", + vsumsws_3 = "10000788VVV", + vmrgew_3 = "1000078cVVV", + vclzd_2 = "100007c2V-V", + vpopcntd_2 = "100007c3V-V", + ["vcmpbfp._3"] = "100007c6VVV", + ["vcmpgtsd._3"] = "100007c7VVV", + + -- Primary opcode 19: + mcrf_2 = "4c000000XX", + isync_0 = "4c00012c", + crnor_3 = "4c000042CCC", + crnot_2 = "4c000042CC=", + crandc_3 = "4c000102CCC", + crxor_3 = "4c000182CCC", + crclr_1 = "4c000182C==", + crnand_3 = "4c0001c2CCC", + crand_3 = "4c000202CCC", + creqv_3 = "4c000242CCC", + crset_1 = "4c000242C==", + crorc_3 = "4c000342CCC", + cror_3 = "4c000382CCC", + crmove_2 = "4c000382CC=", + bclr_2 = "4c000020AA", + bclrl_2 = "4c000021AA", + bcctr_2 = "4c000420AA", + bcctrl_2 = "4c000421AA", + bctar_2 = "4c000460AA", + bctarl_2 = "4c000461AA", + blr_0 = "4e800020", + blrl_0 = "4e800021", + bctr_0 = "4e800420", + bctrl_0 = "4e800421", + + -- Primary opcode 31: + cmpw_3 = "7c000000XRR", + cmpw_2 = "7c000000-RR", + cmpd_3 = "7c200000XRR", + cmpd_2 = "7c200000-RR", + tw_3 = "7c000008ARR", + lvsl_3 = "7c00000cVRR", + subfc_3 = "7c000010RRR.", + subc_3 = "7c000010RRR~.", + mulhdu_3 = "7c000012RRR.", + addc_3 = "7c000014RRR.", + mulhwu_3 = "7c000016RRR.", + isel_4 = "7c00001eRRRC", + isellt_3 = "7c00001eRRR", + iselgt_3 = "7c00005eRRR", + iseleq_3 = "7c00009eRRR", + mfcr_1 = "7c000026R", + mfocrf_2 = "7c100026RG", + mtcrf_2 = "7c000120GR", + mtocrf_2 = "7c100120GR", + lwarx_3 = "7c000028RR0R", + ldx_3 = "7c00002aRR0R", + lwzx_3 = "7c00002eRR0R", + slw_3 = "7c000030RR~R.", + cntlzw_2 = "7c000034RR~", + sld_3 = "7c000036RR~R.", + and_3 = "7c000038RR~R.", + cmplw_3 = "7c000040XRR", + cmplw_2 = "7c000040-RR", + cmpld_3 = "7c200040XRR", + cmpld_2 = "7c200040-RR", + lvsr_3 = "7c00004cVRR", + subf_3 = "7c000050RRR.", + sub_3 = "7c000050RRR~.", + lbarx_3 = "7c000068RR0R", + ldux_3 = "7c00006aRR0R", + dcbst_2 = "7c00006c-RR", + lwzux_3 = "7c00006eRR0R", + cntlzd_2 = "7c000074RR~", + andc_3 = "7c000078RR~R.", + td_3 = "7c000088ARR", + lvewx_3 = "7c00008eVRR", + mulhd_3 = "7c000092RRR.", + addg6s_3 = "7c000094RRR", + mulhw_3 = "7c000096RRR.", + dlmzb_3 = "7c00009cRR~R.", + ldarx_3 = "7c0000a8RR0R", + dcbf_2 = "7c0000ac-RR", + lbzx_3 = "7c0000aeRR0R", + lvx_3 = "7c0000ceVRR", + neg_2 = "7c0000d0RR.", + lharx_3 = "7c0000e8RR0R", + lbzux_3 = "7c0000eeRR0R", + popcntb_2 = "7c0000f4RR~", + not_2 = "7c0000f8RR~%.", + nor_3 = "7c0000f8RR~R.", + stvebx_3 = "7c00010eVRR", + subfe_3 = "7c000110RRR.", + sube_3 = "7c000110RRR~.", + adde_3 = "7c000114RRR.", + stdx_3 = "7c00012aRR0R", + ["stwcx._3"] = "7c00012dRR0R.", + stwx_3 = "7c00012eRR0R", + prtyw_2 = "7c000134RR~", + stvehx_3 = "7c00014eVRR", + stdux_3 = "7c00016aRR0R", + ["stqcx._3"] = "7c00016dR:R0R.", + stwux_3 = "7c00016eRR0R", + prtyd_2 = "7c000174RR~", + stvewx_3 = "7c00018eVRR", + subfze_2 = "7c000190RR.", + addze_2 = "7c000194RR.", + ["stdcx._3"] = "7c0001adRR0R.", + stbx_3 = "7c0001aeRR0R", + stvx_3 = "7c0001ceVRR", + subfme_2 = "7c0001d0RR.", + mulld_3 = "7c0001d2RRR.", + addme_2 = "7c0001d4RR.", + mullw_3 = "7c0001d6RRR.", + dcbtst_2 = "7c0001ec-RR", + stbux_3 = "7c0001eeRR0R", + bpermd_3 = "7c0001f8RR~R", + lvepxl_3 = "7c00020eVRR", + add_3 = "7c000214RRR.", + lqarx_3 = "7c000228R:R0R", + dcbt_2 = "7c00022c-RR", + lhzx_3 = "7c00022eRR0R", + cdtbcd_2 = "7c000234RR~", + eqv_3 = "7c000238RR~R.", + lvepx_3 = "7c00024eVRR", + eciwx_3 = "7c00026cRR0R", + lhzux_3 = "7c00026eRR0R", + cbcdtd_2 = "7c000274RR~", + xor_3 = "7c000278RR~R.", + mfspefscr_1 = "7c0082a6R", + mfxer_1 = "7c0102a6R", + mflr_1 = "7c0802a6R", + mfctr_1 = "7c0902a6R", + lwax_3 = "7c0002aaRR0R", + lhax_3 = "7c0002aeRR0R", + mftb_1 = "7c0c42e6R", + mftbu_1 = "7c0d42e6R", + lvxl_3 = "7c0002ceVRR", + lwaux_3 = "7c0002eaRR0R", + lhaux_3 = "7c0002eeRR0R", + popcntw_2 = "7c0002f4RR~", + divdeu_3 = "7c000312RRR.", + divweu_3 = "7c000316RRR.", + sthx_3 = "7c00032eRR0R", + orc_3 = "7c000338RR~R.", + ecowx_3 = "7c00036cRR0R", + sthux_3 = "7c00036eRR0R", + or_3 = "7c000378RR~R.", + mr_2 = "7c000378RR~%.", + divdu_3 = "7c000392RRR.", + divwu_3 = "7c000396RRR.", + mtspefscr_1 = "7c0083a6R", + mtxer_1 = "7c0103a6R", + mtlr_1 = "7c0803a6R", + mtctr_1 = "7c0903a6R", + dcbi_2 = "7c0003ac-RR", + nand_3 = "7c0003b8RR~R.", + dsn_2 = "7c0003c6-RR", + stvxl_3 = "7c0003ceVRR", + divd_3 = "7c0003d2RRR.", + divw_3 = "7c0003d6RRR.", + popcntd_2 = "7c0003f4RR~", + cmpb_3 = "7c0003f8RR~R.", + mcrxr_1 = "7c000400X", + lbdx_3 = "7c000406RRR", + subfco_3 = "7c000410RRR.", + subco_3 = "7c000410RRR~.", + addco_3 = "7c000414RRR.", + ldbrx_3 = "7c000428RR0R", + lswx_3 = "7c00042aRR0R", + lwbrx_3 = "7c00042cRR0R", + lfsx_3 = "7c00042eFR0R", + srw_3 = "7c000430RR~R.", + srd_3 = "7c000436RR~R.", + lhdx_3 = "7c000446RRR", + subfo_3 = "7c000450RRR.", + subo_3 = "7c000450RRR~.", + lfsux_3 = "7c00046eFR0R", + lwdx_3 = "7c000486RRR", + lswi_3 = "7c0004aaRR0A", + sync_0 = "7c0004ac", + lwsync_0 = "7c2004ac", + ptesync_0 = "7c4004ac", + lfdx_3 = "7c0004aeFR0R", + lddx_3 = "7c0004c6RRR", + nego_2 = "7c0004d0RR.", + lfdux_3 = "7c0004eeFR0R", + stbdx_3 = "7c000506RRR", + subfeo_3 = "7c000510RRR.", + subeo_3 = "7c000510RRR~.", + addeo_3 = "7c000514RRR.", + stdbrx_3 = "7c000528RR0R", + stswx_3 = "7c00052aRR0R", + stwbrx_3 = "7c00052cRR0R", + stfsx_3 = "7c00052eFR0R", + sthdx_3 = "7c000546RRR", + ["stbcx._3"] = "7c00056dRRR", + stfsux_3 = "7c00056eFR0R", + stwdx_3 = "7c000586RRR", + subfzeo_2 = "7c000590RR.", + addzeo_2 = "7c000594RR.", + stswi_3 = "7c0005aaRR0A", + ["sthcx._3"] = "7c0005adRRR", + stfdx_3 = "7c0005aeFR0R", + stddx_3 = "7c0005c6RRR", + subfmeo_2 = "7c0005d0RR.", + mulldo_3 = "7c0005d2RRR.", + addmeo_2 = "7c0005d4RR.", + mullwo_3 = "7c0005d6RRR.", + dcba_2 = "7c0005ec-RR", + stfdux_3 = "7c0005eeFR0R", + stvepxl_3 = "7c00060eVRR", + addo_3 = "7c000614RRR.", + lhbrx_3 = "7c00062cRR0R", + lfdpx_3 = "7c00062eF:RR", + sraw_3 = "7c000630RR~R.", + srad_3 = "7c000634RR~R.", + lfddx_3 = "7c000646FRR", + stvepx_3 = "7c00064eVRR", + srawi_3 = "7c000670RR~A.", + sradi_3 = "7c000674RR~H.", + eieio_0 = "7c0006ac", + lfiwax_3 = "7c0006aeFR0R", + divdeuo_3 = "7c000712RRR.", + divweuo_3 = "7c000716RRR.", + sthbrx_3 = "7c00072cRR0R", + stfdpx_3 = "7c00072eF:RR", + extsh_2 = "7c000734RR~.", + stfddx_3 = "7c000746FRR", + divdeo_3 = "7c000752RRR.", + divweo_3 = "7c000756RRR.", + extsb_2 = "7c000774RR~.", + divduo_3 = "7c000792RRR.", + divwou_3 = "7c000796RRR.", + icbi_2 = "7c0007ac-RR", + stfiwx_3 = "7c0007aeFR0R", + extsw_2 = "7c0007b4RR~.", + divdo_3 = "7c0007d2RRR.", + divwo_3 = "7c0007d6RRR.", + dcbz_2 = "7c0007ec-RR", + + ["tbegin._1"] = "7c00051d1", + ["tbegin._0"] = "7c00051d", + ["tend._1"] = "7c00055dY", + ["tend._0"] = "7c00055d", + ["tendall._0"] = "7e00055d", + tcheck_1 = "7c00059cX", + ["tsr._1"] = "7c0005dd1", + ["tsuspend._0"] = "7c0005dd", + ["tresume._0"] = "7c2005dd", + ["tabortwc._3"] = "7c00061dARR", + ["tabortdc._3"] = "7c00065dARR", + ["tabortwci._3"] = "7c00069dARS", + ["tabortdci._3"] = "7c0006ddARS", + ["tabort._1"] = "7c00071d-R-", + ["treclaim._1"] = "7c00075d-R", + ["trechkpt._0"] = "7c0007dd", + + lxsiwzx_3 = "7c000018QRR", + lxsiwax_3 = "7c000098QRR", + mfvsrd_2 = "7c000066-Rq", + mfvsrwz_2 = "7c0000e6-Rq", + stxsiwx_3 = "7c000118QRR", + mtvsrd_2 = "7c000166QR", + mtvsrwa_2 = "7c0001a6QR", + lxvdsx_3 = "7c000298QRR", + lxsspx_3 = "7c000418QRR", + lxsdx_3 = "7c000498QRR", + stxsspx_3 = "7c000518QRR", + stxsdx_3 = "7c000598QRR", + lxvw4x_3 = "7c000618QRR", + lxvd2x_3 = "7c000698QRR", + stxvw4x_3 = "7c000718QRR", + stxvd2x_3 = "7c000798QRR", + + -- Primary opcode 30: + rldicl_4 = "78000000RR~HM.", + rldicr_4 = "78000004RR~HM.", + rldic_4 = "78000008RR~HM.", + rldimi_4 = "7800000cRR~HM.", + rldcl_4 = "78000010RR~RM.", + rldcr_4 = "78000012RR~RM.", + + rotldi_3 = op_alias("rldicl_4", function(p) + p[4] = "0" + end), + rotrdi_3 = op_alias("rldicl_4", function(p) + p[3] = "64-("..p[3]..")"; p[4] = "0" + end), + rotld_3 = op_alias("rldcl_4", function(p) + p[4] = "0" + end), + sldi_3 = op_alias("rldicr_4", function(p) + p[4] = "63-("..p[3]..")" + end), + srdi_3 = op_alias("rldicl_4", function(p) + p[4] = p[3]; p[3] = "64-("..p[3]..")" + end), + clrldi_3 = op_alias("rldicl_4", function(p) + p[4] = p[3]; p[3] = "0" + end), + clrrdi_3 = op_alias("rldicr_4", function(p) + p[4] = "63-("..p[3]..")"; p[3] = "0" + end), + + -- Primary opcode 56: + lq_2 = "e0000000R:D", -- NYI: displacement must be divisible by 8. + + -- Primary opcode 57: + lfdp_2 = "e4000000F:D", -- NYI: displacement must be divisible by 4. + + -- Primary opcode 59: + fdivs_3 = "ec000024FFF.", + fsubs_3 = "ec000028FFF.", + fadds_3 = "ec00002aFFF.", + fsqrts_2 = "ec00002cF-F.", + fres_2 = "ec000030F-F.", + fmuls_3 = "ec000032FF-F.", + frsqrtes_2 = "ec000034F-F.", + fmsubs_4 = "ec000038FFFF~.", + fmadds_4 = "ec00003aFFFF~.", + fnmsubs_4 = "ec00003cFFFF~.", + fnmadds_4 = "ec00003eFFFF~.", + fcfids_2 = "ec00069cF-F.", + fcfidus_2 = "ec00079cF-F.", + + dadd_3 = "ec000004FFF.", + dqua_4 = "ec000006FFFZ.", + dmul_3 = "ec000044FFF.", + drrnd_4 = "ec000046FFFZ.", + dscli_3 = "ec000084FF6.", + dquai_4 = "ec000086SF~FZ.", + dscri_3 = "ec0000c4FF6.", + drintx_4 = "ec0000c61F~FZ.", + dcmpo_3 = "ec000104XFF", + dtstex_3 = "ec000144XFF", + dtstdc_3 = "ec000184XF6", + dtstdg_3 = "ec0001c4XF6", + drintn_4 = "ec0001c61F~FZ.", + dctdp_2 = "ec000204F-F.", + dctfix_2 = "ec000244F-F.", + ddedpd_3 = "ec000284ZF~F.", + dxex_2 = "ec0002c4F-F.", + dsub_3 = "ec000404FFF.", + ddiv_3 = "ec000444FFF.", + dcmpu_3 = "ec000504XFF", + dtstsf_3 = "ec000544XFF", + drsp_2 = "ec000604F-F.", + dcffix_2 = "ec000644F-F.", + denbcd_3 = "ec000684YF~F.", + diex_3 = "ec0006c4FFF.", + + -- Primary opcode 60: + xsaddsp_3 = "f0000000QQQ", + xsmaddasp_3 = "f0000008QQQ", + xxsldwi_4 = "f0000010QQQz", + xsrsqrtesp_2 = "f0000028Q-Q", + xssqrtsp_2 = "f000002cQ-Q", + xxsel_4 = "f0000030QQQQ", + xssubsp_3 = "f0000040QQQ", + xsmaddmsp_3 = "f0000048QQQ", + xxpermdi_4 = "f0000050QQQz", + xsresp_2 = "f0000068Q-Q", + xsmulsp_3 = "f0000080QQQ", + xsmsubasp_3 = "f0000088QQQ", + xxmrghw_3 = "f0000090QQQ", + xsdivsp_3 = "f00000c0QQQ", + xsmsubmsp_3 = "f00000c8QQQ", + xsadddp_3 = "f0000100QQQ", + xsmaddadp_3 = "f0000108QQQ", + xscmpudp_3 = "f0000118XQQ", + xscvdpuxws_2 = "f0000120Q-Q", + xsrdpi_2 = "f0000124Q-Q", + xsrsqrtedp_2 = "f0000128Q-Q", + xssqrtdp_2 = "f000012cQ-Q", + xssubdp_3 = "f0000140QQQ", + xsmaddmdp_3 = "f0000148QQQ", + xscmpodp_3 = "f0000158XQQ", + xscvdpsxws_2 = "f0000160Q-Q", + xsrdpiz_2 = "f0000164Q-Q", + xsredp_2 = "f0000168Q-Q", + xsmuldp_3 = "f0000180QQQ", + xsmsubadp_3 = "f0000188QQQ", + xxmrglw_3 = "f0000190QQQ", + xsrdpip_2 = "f00001a4Q-Q", + xstsqrtdp_2 = "f00001a8X-Q", + xsrdpic_2 = "f00001acQ-Q", + xsdivdp_3 = "f00001c0QQQ", + xsmsubmdp_3 = "f00001c8QQQ", + xsrdpim_2 = "f00001e4Q-Q", + xstdivdp_3 = "f00001e8XQQ", + xvaddsp_3 = "f0000200QQQ", + xvmaddasp_3 = "f0000208QQQ", + xvcmpeqsp_3 = "f0000218QQQ", + xvcvspuxws_2 = "f0000220Q-Q", + xvrspi_2 = "f0000224Q-Q", + xvrsqrtesp_2 = "f0000228Q-Q", + xvsqrtsp_2 = "f000022cQ-Q", + xvsubsp_3 = "f0000240QQQ", + xvmaddmsp_3 = "f0000248QQQ", + xvcmpgtsp_3 = "f0000258QQQ", + xvcvspsxws_2 = "f0000260Q-Q", + xvrspiz_2 = "f0000264Q-Q", + xvresp_2 = "f0000268Q-Q", + xvmulsp_3 = "f0000280QQQ", + xvmsubasp_3 = "f0000288QQQ", + xxspltw_3 = "f0000290QQg~", + xvcmpgesp_3 = "f0000298QQQ", + xvcvuxwsp_2 = "f00002a0Q-Q", + xvrspip_2 = "f00002a4Q-Q", + xvtsqrtsp_2 = "f00002a8X-Q", + xvrspic_2 = "f00002acQ-Q", + xvdivsp_3 = "f00002c0QQQ", + xvmsubmsp_3 = "f00002c8QQQ", + xvcvsxwsp_2 = "f00002e0Q-Q", + xvrspim_2 = "f00002e4Q-Q", + xvtdivsp_3 = "f00002e8XQQ", + xvadddp_3 = "f0000300QQQ", + xvmaddadp_3 = "f0000308QQQ", + xvcmpeqdp_3 = "f0000318QQQ", + xvcvdpuxws_2 = "f0000320Q-Q", + xvrdpi_2 = "f0000324Q-Q", + xvrsqrtedp_2 = "f0000328Q-Q", + xvsqrtdp_2 = "f000032cQ-Q", + xvsubdp_3 = "f0000340QQQ", + xvmaddmdp_3 = "f0000348QQQ", + xvcmpgtdp_3 = "f0000358QQQ", + xvcvdpsxws_2 = "f0000360Q-Q", + xvrdpiz_2 = "f0000364Q-Q", + xvredp_2 = "f0000368Q-Q", + xvmuldp_3 = "f0000380QQQ", + xvmsubadp_3 = "f0000388QQQ", + xvcmpgedp_3 = "f0000398QQQ", + xvcvuxwdp_2 = "f00003a0Q-Q", + xvrdpip_2 = "f00003a4Q-Q", + xvtsqrtdp_2 = "f00003a8X-Q", + xvrdpic_2 = "f00003acQ-Q", + xvdivdp_3 = "f00003c0QQQ", + xvmsubmdp_3 = "f00003c8QQQ", + xvcvsxwdp_2 = "f00003e0Q-Q", + xvrdpim_2 = "f00003e4Q-Q", + xvtdivdp_3 = "f00003e8XQQ", + xsnmaddasp_3 = "f0000408QQQ", + xxland_3 = "f0000410QQQ", + xscvdpsp_2 = "f0000424Q-Q", + xscvdpspn_2 = "f000042cQ-Q", + xsnmaddmsp_3 = "f0000448QQQ", + xxlandc_3 = "f0000450QQQ", + xsrsp_2 = "f0000464Q-Q", + xsnmsubasp_3 = "f0000488QQQ", + xxlor_3 = "f0000490QQQ", + xscvuxdsp_2 = "f00004a0Q-Q", + xsnmsubmsp_3 = "f00004c8QQQ", + xxlxor_3 = "f00004d0QQQ", + xscvsxdsp_2 = "f00004e0Q-Q", + xsmaxdp_3 = "f0000500QQQ", + xsnmaddadp_3 = "f0000508QQQ", + xxlnor_3 = "f0000510QQQ", + xscvdpuxds_2 = "f0000520Q-Q", + xscvspdp_2 = "f0000524Q-Q", + xscvspdpn_2 = "f000052cQ-Q", + xsmindp_3 = "f0000540QQQ", + xsnmaddmdp_3 = "f0000548QQQ", + xxlorc_3 = "f0000550QQQ", + xscvdpsxds_2 = "f0000560Q-Q", + xsabsdp_2 = "f0000564Q-Q", + xscpsgndp_3 = "f0000580QQQ", + xsnmsubadp_3 = "f0000588QQQ", + xxlnand_3 = "f0000590QQQ", + xscvuxddp_2 = "f00005a0Q-Q", + xsnabsdp_2 = "f00005a4Q-Q", + xsnmsubmdp_3 = "f00005c8QQQ", + xxleqv_3 = "f00005d0QQQ", + xscvsxddp_2 = "f00005e0Q-Q", + xsnegdp_2 = "f00005e4Q-Q", + xvmaxsp_3 = "f0000600QQQ", + xvnmaddasp_3 = "f0000608QQQ", + ["xvcmpeqsp._3"] = "f0000618QQQ", + xvcvspuxds_2 = "f0000620Q-Q", + xvcvdpsp_2 = "f0000624Q-Q", + xvminsp_3 = "f0000640QQQ", + xvnmaddmsp_3 = "f0000648QQQ", + ["xvcmpgtsp._3"] = "f0000658QQQ", + xvcvspsxds_2 = "f0000660Q-Q", + xvabssp_2 = "f0000664Q-Q", + xvcpsgnsp_3 = "f0000680QQQ", + xvnmsubasp_3 = "f0000688QQQ", + ["xvcmpgesp._3"] = "f0000698QQQ", + xvcvuxdsp_2 = "f00006a0Q-Q", + xvnabssp_2 = "f00006a4Q-Q", + xvnmsubmsp_3 = "f00006c8QQQ", + xvcvsxdsp_2 = "f00006e0Q-Q", + xvnegsp_2 = "f00006e4Q-Q", + xvmaxdp_3 = "f0000700QQQ", + xvnmaddadp_3 = "f0000708QQQ", + ["xvcmpeqdp._3"] = "f0000718QQQ", + xvcvdpuxds_2 = "f0000720Q-Q", + xvcvspdp_2 = "f0000724Q-Q", + xvmindp_3 = "f0000740QQQ", + xvnmaddmdp_3 = "f0000748QQQ", + ["xvcmpgtdp._3"] = "f0000758QQQ", + xvcvdpsxds_2 = "f0000760Q-Q", + xvabsdp_2 = "f0000764Q-Q", + xvcpsgndp_3 = "f0000780QQQ", + xvnmsubadp_3 = "f0000788QQQ", + ["xvcmpgedp._3"] = "f0000798QQQ", + xvcvuxddp_2 = "f00007a0Q-Q", + xvnabsdp_2 = "f00007a4Q-Q", + xvnmsubmdp_3 = "f00007c8QQQ", + xvcvsxddp_2 = "f00007e0Q-Q", + xvnegdp_2 = "f00007e4Q-Q", + + -- Primary opcode 61: + stfdp_2 = "f4000000F:D", -- NYI: displacement must be divisible by 4. + + -- Primary opcode 62: + stq_2 = "f8000002R:D", -- NYI: displacement must be divisible by 8. + + -- Primary opcode 63: + fdiv_3 = "fc000024FFF.", + fsub_3 = "fc000028FFF.", + fadd_3 = "fc00002aFFF.", + fsqrt_2 = "fc00002cF-F.", + fsel_4 = "fc00002eFFFF~.", + fre_2 = "fc000030F-F.", + fmul_3 = "fc000032FF-F.", + frsqrte_2 = "fc000034F-F.", + fmsub_4 = "fc000038FFFF~.", + fmadd_4 = "fc00003aFFFF~.", + fnmsub_4 = "fc00003cFFFF~.", + fnmadd_4 = "fc00003eFFFF~.", + fcmpu_3 = "fc000000XFF", + fcpsgn_3 = "fc000010FFF.", + fcmpo_3 = "fc000040XFF", + mtfsb1_1 = "fc00004cA", + fneg_2 = "fc000050F-F.", + mcrfs_2 = "fc000080XX", + mtfsb0_1 = "fc00008cA", + fmr_2 = "fc000090F-F.", + frsp_2 = "fc000018F-F.", + fctiw_2 = "fc00001cF-F.", + fctiwz_2 = "fc00001eF-F.", + ftdiv_2 = "fc000100X-F.", + fctiwu_2 = "fc00011cF-F.", + fctiwuz_2 = "fc00011eF-F.", + mtfsfi_2 = "fc00010cAA", -- NYI: upshift. + fnabs_2 = "fc000110F-F.", + ftsqrt_2 = "fc000140X-F.", + fabs_2 = "fc000210F-F.", + frin_2 = "fc000310F-F.", + friz_2 = "fc000350F-F.", + frip_2 = "fc000390F-F.", + frim_2 = "fc0003d0F-F.", + mffs_1 = "fc00048eF.", + -- NYI: mtfsf, mtfsb0, mtfsb1. + fctid_2 = "fc00065cF-F.", + fctidz_2 = "fc00065eF-F.", + fmrgow_3 = "fc00068cFFF", + fcfid_2 = "fc00069cF-F.", + fctidu_2 = "fc00075cF-F.", + fctiduz_2 = "fc00075eF-F.", + fmrgew_3 = "fc00078cFFF", + fcfidu_2 = "fc00079cF-F.", + + daddq_3 = "fc000004F:F:F:.", + dquaq_4 = "fc000006F:F:F:Z.", + dmulq_3 = "fc000044F:F:F:.", + drrndq_4 = "fc000046F:F:F:Z.", + dscliq_3 = "fc000084F:F:6.", + dquaiq_4 = "fc000086SF:~F:Z.", + dscriq_3 = "fc0000c4F:F:6.", + drintxq_4 = "fc0000c61F:~F:Z.", + dcmpoq_3 = "fc000104XF:F:", + dtstexq_3 = "fc000144XF:F:", + dtstdcq_3 = "fc000184XF:6", + dtstdgq_3 = "fc0001c4XF:6", + drintnq_4 = "fc0001c61F:~F:Z.", + dctqpq_2 = "fc000204F:-F:.", + dctfixq_2 = "fc000244F:-F:.", + ddedpdq_3 = "fc000284ZF:~F:.", + dxexq_2 = "fc0002c4F:-F:.", + dsubq_3 = "fc000404F:F:F:.", + ddivq_3 = "fc000444F:F:F:.", + dcmpuq_3 = "fc000504XF:F:", + dtstsfq_3 = "fc000544XF:F:", + drdpq_2 = "fc000604F:-F:.", + dcffixq_2 = "fc000644F:-F:.", + denbcdq_3 = "fc000684YF:~F:.", + diexq_3 = "fc0006c4F:FF:.", + + -- Primary opcode 4, SPE APU extension: + evaddw_3 = "10000200RRR", + evaddiw_3 = "10000202RAR~", + evsubw_3 = "10000204RRR~", + evsubiw_3 = "10000206RAR~", + evabs_2 = "10000208RR", + evneg_2 = "10000209RR", + evextsb_2 = "1000020aRR", + evextsh_2 = "1000020bRR", + evrndw_2 = "1000020cRR", + evcntlzw_2 = "1000020dRR", + evcntlsw_2 = "1000020eRR", + brinc_3 = "1000020fRRR", + evand_3 = "10000211RRR", + evandc_3 = "10000212RRR", + evxor_3 = "10000216RRR", + evor_3 = "10000217RRR", + evmr_2 = "10000217RR=", + evnor_3 = "10000218RRR", + evnot_2 = "10000218RR=", + eveqv_3 = "10000219RRR", + evorc_3 = "1000021bRRR", + evnand_3 = "1000021eRRR", + evsrwu_3 = "10000220RRR", + evsrws_3 = "10000221RRR", + evsrwiu_3 = "10000222RRA", + evsrwis_3 = "10000223RRA", + evslw_3 = "10000224RRR", + evslwi_3 = "10000226RRA", + evrlw_3 = "10000228RRR", + evsplati_2 = "10000229RS", + evrlwi_3 = "1000022aRRA", + evsplatfi_2 = "1000022bRS", + evmergehi_3 = "1000022cRRR", + evmergelo_3 = "1000022dRRR", + evcmpgtu_3 = "10000230XRR", + evcmpgtu_2 = "10000230-RR", + evcmpgts_3 = "10000231XRR", + evcmpgts_2 = "10000231-RR", + evcmpltu_3 = "10000232XRR", + evcmpltu_2 = "10000232-RR", + evcmplts_3 = "10000233XRR", + evcmplts_2 = "10000233-RR", + evcmpeq_3 = "10000234XRR", + evcmpeq_2 = "10000234-RR", + evsel_4 = "10000278RRRW", + evsel_3 = "10000278RRR", + evfsadd_3 = "10000280RRR", + evfssub_3 = "10000281RRR", + evfsabs_2 = "10000284RR", + evfsnabs_2 = "10000285RR", + evfsneg_2 = "10000286RR", + evfsmul_3 = "10000288RRR", + evfsdiv_3 = "10000289RRR", + evfscmpgt_3 = "1000028cXRR", + evfscmpgt_2 = "1000028c-RR", + evfscmplt_3 = "1000028dXRR", + evfscmplt_2 = "1000028d-RR", + evfscmpeq_3 = "1000028eXRR", + evfscmpeq_2 = "1000028e-RR", + evfscfui_2 = "10000290R-R", + evfscfsi_2 = "10000291R-R", + evfscfuf_2 = "10000292R-R", + evfscfsf_2 = "10000293R-R", + evfsctui_2 = "10000294R-R", + evfsctsi_2 = "10000295R-R", + evfsctuf_2 = "10000296R-R", + evfsctsf_2 = "10000297R-R", + evfsctuiz_2 = "10000298R-R", + evfsctsiz_2 = "1000029aR-R", + evfststgt_3 = "1000029cXRR", + evfststgt_2 = "1000029c-RR", + evfststlt_3 = "1000029dXRR", + evfststlt_2 = "1000029d-RR", + evfststeq_3 = "1000029eXRR", + evfststeq_2 = "1000029e-RR", + efsadd_3 = "100002c0RRR", + efssub_3 = "100002c1RRR", + efsabs_2 = "100002c4RR", + efsnabs_2 = "100002c5RR", + efsneg_2 = "100002c6RR", + efsmul_3 = "100002c8RRR", + efsdiv_3 = "100002c9RRR", + efscmpgt_3 = "100002ccXRR", + efscmpgt_2 = "100002cc-RR", + efscmplt_3 = "100002cdXRR", + efscmplt_2 = "100002cd-RR", + efscmpeq_3 = "100002ceXRR", + efscmpeq_2 = "100002ce-RR", + efscfd_2 = "100002cfR-R", + efscfui_2 = "100002d0R-R", + efscfsi_2 = "100002d1R-R", + efscfuf_2 = "100002d2R-R", + efscfsf_2 = "100002d3R-R", + efsctui_2 = "100002d4R-R", + efsctsi_2 = "100002d5R-R", + efsctuf_2 = "100002d6R-R", + efsctsf_2 = "100002d7R-R", + efsctuiz_2 = "100002d8R-R", + efsctsiz_2 = "100002daR-R", + efststgt_3 = "100002dcXRR", + efststgt_2 = "100002dc-RR", + efststlt_3 = "100002ddXRR", + efststlt_2 = "100002dd-RR", + efststeq_3 = "100002deXRR", + efststeq_2 = "100002de-RR", + efdadd_3 = "100002e0RRR", + efdsub_3 = "100002e1RRR", + efdcfuid_2 = "100002e2R-R", + efdcfsid_2 = "100002e3R-R", + efdabs_2 = "100002e4RR", + efdnabs_2 = "100002e5RR", + efdneg_2 = "100002e6RR", + efdmul_3 = "100002e8RRR", + efddiv_3 = "100002e9RRR", + efdctuidz_2 = "100002eaR-R", + efdctsidz_2 = "100002ebR-R", + efdcmpgt_3 = "100002ecXRR", + efdcmpgt_2 = "100002ec-RR", + efdcmplt_3 = "100002edXRR", + efdcmplt_2 = "100002ed-RR", + efdcmpeq_3 = "100002eeXRR", + efdcmpeq_2 = "100002ee-RR", + efdcfs_2 = "100002efR-R", + efdcfui_2 = "100002f0R-R", + efdcfsi_2 = "100002f1R-R", + efdcfuf_2 = "100002f2R-R", + efdcfsf_2 = "100002f3R-R", + efdctui_2 = "100002f4R-R", + efdctsi_2 = "100002f5R-R", + efdctuf_2 = "100002f6R-R", + efdctsf_2 = "100002f7R-R", + efdctuiz_2 = "100002f8R-R", + efdctsiz_2 = "100002faR-R", + efdtstgt_3 = "100002fcXRR", + efdtstgt_2 = "100002fc-RR", + efdtstlt_3 = "100002fdXRR", + efdtstlt_2 = "100002fd-RR", + efdtsteq_3 = "100002feXRR", + efdtsteq_2 = "100002fe-RR", + evlddx_3 = "10000300RR0R", + evldd_2 = "10000301R8", + evldwx_3 = "10000302RR0R", + evldw_2 = "10000303R8", + evldhx_3 = "10000304RR0R", + evldh_2 = "10000305R8", + evlwhex_3 = "10000310RR0R", + evlwhe_2 = "10000311R4", + evlwhoux_3 = "10000314RR0R", + evlwhou_2 = "10000315R4", + evlwhosx_3 = "10000316RR0R", + evlwhos_2 = "10000317R4", + evstddx_3 = "10000320RR0R", + evstdd_2 = "10000321R8", + evstdwx_3 = "10000322RR0R", + evstdw_2 = "10000323R8", + evstdhx_3 = "10000324RR0R", + evstdh_2 = "10000325R8", + evstwhex_3 = "10000330RR0R", + evstwhe_2 = "10000331R4", + evstwhox_3 = "10000334RR0R", + evstwho_2 = "10000335R4", + evstwwex_3 = "10000338RR0R", + evstwwe_2 = "10000339R4", + evstwwox_3 = "1000033cRR0R", + evstwwo_2 = "1000033dR4", + evmhessf_3 = "10000403RRR", + evmhossf_3 = "10000407RRR", + evmheumi_3 = "10000408RRR", + evmhesmi_3 = "10000409RRR", + evmhesmf_3 = "1000040bRRR", + evmhoumi_3 = "1000040cRRR", + evmhosmi_3 = "1000040dRRR", + evmhosmf_3 = "1000040fRRR", + evmhessfa_3 = "10000423RRR", + evmhossfa_3 = "10000427RRR", + evmheumia_3 = "10000428RRR", + evmhesmia_3 = "10000429RRR", + evmhesmfa_3 = "1000042bRRR", + evmhoumia_3 = "1000042cRRR", + evmhosmia_3 = "1000042dRRR", + evmhosmfa_3 = "1000042fRRR", + evmwhssf_3 = "10000447RRR", + evmwlumi_3 = "10000448RRR", + evmwhumi_3 = "1000044cRRR", + evmwhsmi_3 = "1000044dRRR", + evmwhsmf_3 = "1000044fRRR", + evmwssf_3 = "10000453RRR", + evmwumi_3 = "10000458RRR", + evmwsmi_3 = "10000459RRR", + evmwsmf_3 = "1000045bRRR", + evmwhssfa_3 = "10000467RRR", + evmwlumia_3 = "10000468RRR", + evmwhumia_3 = "1000046cRRR", + evmwhsmia_3 = "1000046dRRR", + evmwhsmfa_3 = "1000046fRRR", + evmwssfa_3 = "10000473RRR", + evmwumia_3 = "10000478RRR", + evmwsmia_3 = "10000479RRR", + evmwsmfa_3 = "1000047bRRR", + evmra_2 = "100004c4RR", + evdivws_3 = "100004c6RRR", + evdivwu_3 = "100004c7RRR", + evmwssfaa_3 = "10000553RRR", + evmwumiaa_3 = "10000558RRR", + evmwsmiaa_3 = "10000559RRR", + evmwsmfaa_3 = "1000055bRRR", + evmwssfan_3 = "100005d3RRR", + evmwumian_3 = "100005d8RRR", + evmwsmian_3 = "100005d9RRR", + evmwsmfan_3 = "100005dbRRR", + evmergehilo_3 = "1000022eRRR", + evmergelohi_3 = "1000022fRRR", + evlhhesplatx_3 = "10000308RR0R", + evlhhesplat_2 = "10000309R2", + evlhhousplatx_3 = "1000030cRR0R", + evlhhousplat_2 = "1000030dR2", + evlhhossplatx_3 = "1000030eRR0R", + evlhhossplat_2 = "1000030fR2", + evlwwsplatx_3 = "10000318RR0R", + evlwwsplat_2 = "10000319R4", + evlwhsplatx_3 = "1000031cRR0R", + evlwhsplat_2 = "1000031dR4", + evaddusiaaw_2 = "100004c0RR", + evaddssiaaw_2 = "100004c1RR", + evsubfusiaaw_2 = "100004c2RR", + evsubfssiaaw_2 = "100004c3RR", + evaddumiaaw_2 = "100004c8RR", + evaddsmiaaw_2 = "100004c9RR", + evsubfumiaaw_2 = "100004caRR", + evsubfsmiaaw_2 = "100004cbRR", + evmheusiaaw_3 = "10000500RRR", + evmhessiaaw_3 = "10000501RRR", + evmhessfaaw_3 = "10000503RRR", + evmhousiaaw_3 = "10000504RRR", + evmhossiaaw_3 = "10000505RRR", + evmhossfaaw_3 = "10000507RRR", + evmheumiaaw_3 = "10000508RRR", + evmhesmiaaw_3 = "10000509RRR", + evmhesmfaaw_3 = "1000050bRRR", + evmhoumiaaw_3 = "1000050cRRR", + evmhosmiaaw_3 = "1000050dRRR", + evmhosmfaaw_3 = "1000050fRRR", + evmhegumiaa_3 = "10000528RRR", + evmhegsmiaa_3 = "10000529RRR", + evmhegsmfaa_3 = "1000052bRRR", + evmhogumiaa_3 = "1000052cRRR", + evmhogsmiaa_3 = "1000052dRRR", + evmhogsmfaa_3 = "1000052fRRR", + evmwlusiaaw_3 = "10000540RRR", + evmwlssiaaw_3 = "10000541RRR", + evmwlumiaaw_3 = "10000548RRR", + evmwlsmiaaw_3 = "10000549RRR", + evmheusianw_3 = "10000580RRR", + evmhessianw_3 = "10000581RRR", + evmhessfanw_3 = "10000583RRR", + evmhousianw_3 = "10000584RRR", + evmhossianw_3 = "10000585RRR", + evmhossfanw_3 = "10000587RRR", + evmheumianw_3 = "10000588RRR", + evmhesmianw_3 = "10000589RRR", + evmhesmfanw_3 = "1000058bRRR", + evmhoumianw_3 = "1000058cRRR", + evmhosmianw_3 = "1000058dRRR", + evmhosmfanw_3 = "1000058fRRR", + evmhegumian_3 = "100005a8RRR", + evmhegsmian_3 = "100005a9RRR", + evmhegsmfan_3 = "100005abRRR", + evmhogumian_3 = "100005acRRR", + evmhogsmian_3 = "100005adRRR", + evmhogsmfan_3 = "100005afRRR", + evmwlusianw_3 = "100005c0RRR", + evmwlssianw_3 = "100005c1RRR", + evmwlumianw_3 = "100005c8RRR", + evmwlsmianw_3 = "100005c9RRR", + + -- NYI: Book E instructions. +} + +-- Add mnemonics for "." variants. +do + local t = {} + for k,v in pairs(map_op) do + if type(v) == "string" and sub(v, -1) == "." then + local v2 = sub(v, 1, 7)..char(byte(v, 8)+1)..sub(v, 9, -2) + t[sub(k, 1, -3).."."..sub(k, -2)] = v2 + end + end + for k,v in pairs(t) do + map_op[k] = v + end +end + +-- Add more branch mnemonics. +for cond,c in pairs(map_cond) do + local b1 = "b"..cond + local c1 = shl(band(c, 3), 16) + (c < 4 and 0x01000000 or 0) + -- bX[l] + map_op[b1.."_1"] = tohex(0x40800000 + c1).."K" + map_op[b1.."y_1"] = tohex(0x40a00000 + c1).."K" + map_op[b1.."l_1"] = tohex(0x40800001 + c1).."K" + map_op[b1.."_2"] = tohex(0x40800000 + c1).."-XK" + map_op[b1.."y_2"] = tohex(0x40a00000 + c1).."-XK" + map_op[b1.."l_2"] = tohex(0x40800001 + c1).."-XK" + -- bXlr[l] + map_op[b1.."lr_0"] = tohex(0x4c800020 + c1) + map_op[b1.."lrl_0"] = tohex(0x4c800021 + c1) + map_op[b1.."ctr_0"] = tohex(0x4c800420 + c1) + map_op[b1.."ctrl_0"] = tohex(0x4c800421 + c1) + -- bXctr[l] + map_op[b1.."lr_1"] = tohex(0x4c800020 + c1).."-X" + map_op[b1.."lrl_1"] = tohex(0x4c800021 + c1).."-X" + map_op[b1.."ctr_1"] = tohex(0x4c800420 + c1).."-X" + map_op[b1.."ctrl_1"] = tohex(0x4c800421 + c1).."-X" +end + +------------------------------------------------------------------------------ + +local function parse_gpr(expr) + local tname, ovreg = match(expr, "^([%w_]+):(r[1-3]?[0-9])$") + local tp = map_type[tname or expr] + if tp then + local reg = ovreg or tp.reg + if not reg then + werror("type `"..(tname or expr).."' needs a register override") + end + expr = reg + end + local r = match(expr, "^r([1-3]?[0-9])$") + if r then + r = tonumber(r) + if r <= 31 then return r, tp end + end + werror("bad register name `"..expr.."'") +end + +local function parse_fpr(expr) + local r = match(expr, "^f([1-3]?[0-9])$") + if r then + r = tonumber(r) + if r <= 31 then return r end + end + werror("bad register name `"..expr.."'") +end + +local function parse_vr(expr) + local r = match(expr, "^v([1-3]?[0-9])$") + if r then + r = tonumber(r) + if r <= 31 then return r end + end + werror("bad register name `"..expr.."'") +end + +local function parse_vs(expr) + local r = match(expr, "^vs([1-6]?[0-9])$") + if r then + r = tonumber(r) + if r <= 63 then return r end + end + werror("bad register name `"..expr.."'") +end + +local function parse_cr(expr) + local r = match(expr, "^cr([0-7])$") + if r then return tonumber(r) end + werror("bad condition register name `"..expr.."'") +end + +local function parse_cond(expr) + local r, cond = match(expr, "^4%*cr([0-7])%+(%w%w)$") + if r then + r = tonumber(r) + local c = map_cond[cond] + if c and c < 4 then return r*4+c end + end + werror("bad condition bit name `"..expr.."'") +end + +local parse_ctx = {} + +local loadenv = setfenv and function(s) + local code = loadstring(s, "") + if code then setfenv(code, parse_ctx) end + return code +end or function(s) + return load(s, "", nil, parse_ctx) +end + +-- Try to parse simple arithmetic, too, since some basic ops are aliases. +local function parse_number(n) + local x = tonumber(n) + if x then return x end + local code = loadenv("return "..n) + if code then + local ok, y = pcall(code) + if ok then return y end + end + return nil +end + +local function parse_imm(imm, bits, shift, scale, signed) + local n = parse_number(imm) + if n then + local m = sar(n, scale) + if shl(m, scale) == n then + if signed then + local s = sar(m, bits-1) + if s == 0 then return shl(m, shift) + elseif s == -1 then return shl(m + shl(1, bits), shift) end + else + if sar(m, bits) == 0 then return shl(m, shift) end + end + end + werror("out of range immediate `"..imm.."'") + elseif match(imm, "^[rfv]([1-3]?[0-9])$") or + match(imm, "^vs([1-6]?[0-9])$") or + match(imm, "^([%w_]+):(r[1-3]?[0-9])$") then + werror("expected immediate operand, got register") + else + waction("IMM", (signed and 32768 or 0)+scale*1024+bits*32+shift, imm) + return 0 + end +end + +local function parse_shiftmask(imm, isshift) + local n = parse_number(imm) + if n then + if shr(n, 6) == 0 then + local lsb = band(n, 31) + local msb = n - lsb + return isshift and (shl(lsb, 11)+shr(msb, 4)) or (shl(lsb, 6)+msb) + end + werror("out of range immediate `"..imm.."'") + elseif match(imm, "^r([1-3]?[0-9])$") or + match(imm, "^([%w_]+):(r[1-3]?[0-9])$") then + werror("expected immediate operand, got register") + else + waction("IMMSH", isshift and 1 or 0, imm) + return 0; + end +end + +local function parse_disp(disp) + local imm, reg = match(disp, "^(.*)%(([%w_:]+)%)$") + if imm then + local r = parse_gpr(reg) + if r == 0 then werror("cannot use r0 in displacement") end + return shl(r, 16) + parse_imm(imm, 16, 0, 0, true) + end + local reg, tailr = match(disp, "^([%w_:]+)%s*(.*)$") + if reg and tailr ~= "" then + local r, tp = parse_gpr(reg) + if r == 0 then werror("cannot use r0 in displacement") end + if tp then + waction("IMM", 32768+16*32, format(tp.ctypefmt, tailr)) + return shl(r, 16) + end + end + werror("bad displacement `"..disp.."'") +end + +local function parse_u5disp(disp, scale) + local imm, reg = match(disp, "^(.*)%(([%w_:]+)%)$") + if imm then + local r = parse_gpr(reg) + if r == 0 then werror("cannot use r0 in displacement") end + return shl(r, 16) + parse_imm(imm, 5, 11, scale, false) + end + local reg, tailr = match(disp, "^([%w_:]+)%s*(.*)$") + if reg and tailr ~= "" then + local r, tp = parse_gpr(reg) + if r == 0 then werror("cannot use r0 in displacement") end + if tp then + waction("IMM", scale*1024+5*32+11, format(tp.ctypefmt, tailr)) + return shl(r, 16) + end + end + werror("bad displacement `"..disp.."'") +end + +local function parse_label(label, def) + local prefix = sub(label, 1, 2) + -- =>label (pc label reference) + if prefix == "=>" then + return "PC", 0, sub(label, 3) + end + -- ->name (global label reference) + if prefix == "->" then + return "LG", map_global[sub(label, 3)] + end + if def then + -- [1-9] (local label definition) + if match(label, "^[1-9]$") then + return "LG", 10+tonumber(label) + end + else + -- [<>][1-9] (local label reference) + local dir, lnum = match(label, "^([<>])([1-9])$") + if dir then -- Fwd: 1-9, Bkwd: 11-19. + return "LG", lnum + (dir == ">" and 0 or 10) + end + -- extern label (extern label reference) + local extname = match(label, "^extern%s+(%S+)$") + if extname then + return "EXT", map_extern[extname] + end + end + werror("bad label `"..label.."'") +end + +------------------------------------------------------------------------------ + +-- Handle opcodes defined with template strings. +op_template = function(params, template, nparams) + if not params then return sub(template, 9) end + local op = tonumber(sub(template, 1, 8), 16) + local n, rs = 1, 26 + + -- Limit number of section buffer positions used by a single dasm_put(). + -- A single opcode needs a maximum of 3 positions (rlwinm). + if secpos+3 > maxsecpos then wflush() end + local pos = wpos() + + -- Process each character. + for p in gmatch(sub(template, 9), ".") do + if p == "R" then + rs = rs - 5; op = op + shl(parse_gpr(params[n]), rs); n = n + 1 + elseif p == "F" then + rs = rs - 5; op = op + shl(parse_fpr(params[n]), rs); n = n + 1 + elseif p == "V" then + rs = rs - 5; op = op + shl(parse_vr(params[n]), rs); n = n + 1 + elseif p == "Q" then + local vs = parse_vs(params[n]); n = n + 1; rs = rs - 5 + local sh = rs == 6 and 2 or 3 + band(shr(rs, 1), 3) + op = op + shl(band(vs, 31), rs) + shr(band(vs, 32), sh) + elseif p == "q" then + local vs = parse_vs(params[n]); n = n + 1 + op = op + shl(band(vs, 31), 21) + shr(band(vs, 32), 5) + elseif p == "A" then + rs = rs - 5; op = op + parse_imm(params[n], 5, rs, 0, false); n = n + 1 + elseif p == "S" then + rs = rs - 5; op = op + parse_imm(params[n], 5, rs, 0, true); n = n + 1 + elseif p == "I" then + op = op + parse_imm(params[n], 16, 0, 0, true); n = n + 1 + elseif p == "U" then + op = op + parse_imm(params[n], 16, 0, 0, false); n = n + 1 + elseif p == "D" then + op = op + parse_disp(params[n]); n = n + 1 + elseif p == "2" then + op = op + parse_u5disp(params[n], 1); n = n + 1 + elseif p == "4" then + op = op + parse_u5disp(params[n], 2); n = n + 1 + elseif p == "8" then + op = op + parse_u5disp(params[n], 3); n = n + 1 + elseif p == "C" then + rs = rs - 5; op = op + shl(parse_cond(params[n]), rs); n = n + 1 + elseif p == "X" then + rs = rs - 5; op = op + shl(parse_cr(params[n]), rs+2); n = n + 1 + elseif p == "1" then + rs = rs - 5; op = op + parse_imm(params[n], 1, rs, 0, false); n = n + 1 + elseif p == "g" then + rs = rs - 5; op = op + parse_imm(params[n], 2, rs, 0, false); n = n + 1 + elseif p == "3" then + rs = rs - 5; op = op + parse_imm(params[n], 3, rs, 0, false); n = n + 1 + elseif p == "P" then + rs = rs - 5; op = op + parse_imm(params[n], 4, rs, 0, false); n = n + 1 + elseif p == "p" then + op = op + parse_imm(params[n], 4, rs, 0, false); n = n + 1 + elseif p == "6" then + rs = rs - 6; op = op + parse_imm(params[n], 6, rs, 0, false); n = n + 1 + elseif p == "Y" then + rs = rs - 5; op = op + parse_imm(params[n], 1, rs+4, 0, false); n = n + 1 + elseif p == "y" then + rs = rs - 5; op = op + parse_imm(params[n], 1, rs+3, 0, false); n = n + 1 + elseif p == "Z" then + rs = rs - 5; op = op + parse_imm(params[n], 2, rs+3, 0, false); n = n + 1 + elseif p == "z" then + rs = rs - 5; op = op + parse_imm(params[n], 2, rs+2, 0, false); n = n + 1 + elseif p == "W" then + op = op + parse_cr(params[n]); n = n + 1 + elseif p == "G" then + op = op + parse_imm(params[n], 8, 12, 0, false); n = n + 1 + elseif p == "H" then + op = op + parse_shiftmask(params[n], true); n = n + 1 + elseif p == "M" then + op = op + parse_shiftmask(params[n], false); n = n + 1 + elseif p == "J" or p == "K" then + local mode, m, s = parse_label(params[n], false) + if p == "K" then m = m + 2048 end + waction("REL_"..mode, m, s, 1) + n = n + 1 + elseif p == "0" then + if band(shr(op, rs), 31) == 0 then werror("cannot use r0") end + elseif p == "=" or p == "%" then + local t = band(shr(op, p == "%" and rs+5 or rs), 31) + rs = rs - 5 + op = op + shl(t, rs) + elseif p == "~" then + local mm = shl(31, rs) + local lo = band(op, mm) + local hi = band(op, shl(mm, 5)) + op = op - lo - hi + shl(lo, 5) + shr(hi, 5) + elseif p == ":" then + if band(shr(op, rs), 1) ~= 0 then werror("register pair expected") end + elseif p == "-" then + rs = rs - 5 + elseif p == "." then + -- Ignored. + else + assert(false) + end + end + wputpos(pos, op) +end + +map_op[".template__"] = op_template + +------------------------------------------------------------------------------ + +-- Pseudo-opcode to mark the position where the action list is to be emitted. +map_op[".actionlist_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeactions(out, name) end) +end + +-- Pseudo-opcode to mark the position where the global enum is to be emitted. +map_op[".globals_1"] = function(params) + if not params then return "prefix" end + local prefix = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeglobals(out, prefix) end) +end + +-- Pseudo-opcode to mark the position where the global names are to be emitted. +map_op[".globalnames_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeglobalnames(out, name) end) +end + +-- Pseudo-opcode to mark the position where the extern names are to be emitted. +map_op[".externnames_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeexternnames(out, name) end) +end + +------------------------------------------------------------------------------ + +-- Label pseudo-opcode (converted from trailing colon form). +map_op[".label_1"] = function(params) + if not params then return "[1-9] | ->global | =>pcexpr" end + if secpos+1 > maxsecpos then wflush() end + local mode, n, s = parse_label(params[1], true) + if mode == "EXT" then werror("bad label definition") end + waction("LABEL_"..mode, n, s, 1) +end + +------------------------------------------------------------------------------ + +-- Pseudo-opcodes for data storage. +map_op[".long_*"] = function(params) + if not params then return "imm..." end + for _,p in ipairs(params) do + local n = tonumber(p) + if not n then werror("bad immediate `"..p.."'") end + if n < 0 then n = n + 2^32 end + wputw(n) + if secpos+2 > maxsecpos then wflush() end + end +end + +-- Alignment pseudo-opcode. +map_op[".align_1"] = function(params) + if not params then return "numpow2" end + if secpos+1 > maxsecpos then wflush() end + local align = tonumber(params[1]) + if align then + local x = align + -- Must be a power of 2 in the range (2 ... 256). + for i=1,8 do + x = x / 2 + if x == 1 then + waction("ALIGN", align-1, nil, 1) -- Action byte is 2**n-1. + return + end + end + end + werror("bad alignment") +end + +------------------------------------------------------------------------------ + +-- Pseudo-opcode for (primitive) type definitions (map to C types). +map_op[".type_3"] = function(params, nparams) + if not params then + return nparams == 2 and "name, ctype" or "name, ctype, reg" + end + local name, ctype, reg = params[1], params[2], params[3] + if not match(name, "^[%a_][%w_]*$") then + werror("bad type name `"..name.."'") + end + local tp = map_type[name] + if tp then + werror("duplicate type `"..name.."'") + end + -- Add #type to defines. A bit unclean to put it in map_archdef. + map_archdef["#"..name] = "sizeof("..ctype..")" + -- Add new type and emit shortcut define. + local num = ctypenum + 1 + map_type[name] = { + ctype = ctype, + ctypefmt = format("Dt%X(%%s)", num), + reg = reg, + } + wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype)) + ctypenum = num +end +map_op[".type_2"] = map_op[".type_3"] + +-- Dump type definitions. +local function dumptypes(out, lvl) + local t = {} + for name in pairs(map_type) do t[#t+1] = name end + sort(t) + out:write("Type definitions:\n") + for _,name in ipairs(t) do + local tp = map_type[name] + local reg = tp.reg or "" + out:write(format(" %-20s %-20s %s\n", name, tp.ctype, reg)) + end + out:write("\n") +end + +------------------------------------------------------------------------------ + +-- Set the current section. +function _M.section(num) + waction("SECTION", num) + wflush(true) -- SECTION is a terminal action. +end + +------------------------------------------------------------------------------ + +-- Dump architecture description. +function _M.dumparch(out) + out:write(format("DynASM %s version %s, released %s\n\n", + _info.arch, _info.version, _info.release)) + dumpactions(out) +end + +-- Dump all user defined elements. +function _M.dumpdef(out, lvl) + dumptypes(out, lvl) + dumpglobals(out, lvl) + dumpexterns(out, lvl) +end + +------------------------------------------------------------------------------ + +-- Pass callbacks from/to the DynASM core. +function _M.passcb(wl, we, wf, ww) + wline, werror, wfatal, wwarn = wl, we, wf, ww + return wflush +end + +-- Setup the arch-specific module. +function _M.setup(arch, opt) + g_arch, g_opt = arch, opt +end + +-- Merge the core maps and the arch-specific maps. +function _M.mergemaps(map_coreop, map_def) + setmetatable(map_op, { __index = map_coreop }) + setmetatable(map_def, { __index = map_archdef }) + return map_op, map_def +end + +return _M + +------------------------------------------------------------------------------ + diff --git a/ext/opcache/jit/ir/dynasm/dasm_proto.h b/ext/opcache/jit/ir/dynasm/dasm_proto.h new file mode 100644 index 0000000000000..8914596adf5f4 --- /dev/null +++ b/ext/opcache/jit/ir/dynasm/dasm_proto.h @@ -0,0 +1,83 @@ +/* +** DynASM encoding engine prototypes. +** Copyright (C) 2005-2021 Mike Pall. All rights reserved. +** Released under the MIT license. See dynasm.lua for full copyright notice. +*/ + +#ifndef _DASM_PROTO_H +#define _DASM_PROTO_H + +#include +#include + +#define DASM_IDENT "DynASM 1.5.0" +#define DASM_VERSION 10500 /* 1.5.0 */ + +#ifndef Dst_DECL +#define Dst_DECL dasm_State **Dst +#endif + +#ifndef Dst_REF +#define Dst_REF (*Dst) +#endif + +#ifndef DASM_FDEF +#define DASM_FDEF extern +#endif + +#ifndef DASM_M_GROW +#define DASM_M_GROW(ctx, t, p, sz, need) \ + do { \ + size_t _sz = (sz), _need = (need); \ + if (_sz < _need) { \ + if (_sz < 16) _sz = 16; \ + while (_sz < _need) _sz += _sz; \ + (p) = (t *)realloc((p), _sz); \ + if ((p) == NULL) exit(1); \ + (sz) = _sz; \ + } \ + } while(0) +#endif + +#ifndef DASM_M_FREE +#define DASM_M_FREE(ctx, p, sz) free(p) +#endif + +/* Internal DynASM encoder state. */ +typedef struct dasm_State dasm_State; + + +/* Initialize and free DynASM state. */ +DASM_FDEF void dasm_init(Dst_DECL, int maxsection); +DASM_FDEF void dasm_free(Dst_DECL); + +/* Setup global array. Must be called before dasm_setup(). */ +DASM_FDEF void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl); + +/* Grow PC label array. Can be called after dasm_setup(), too. */ +DASM_FDEF void dasm_growpc(Dst_DECL, unsigned int maxpc); + +/* Setup encoder. */ +DASM_FDEF void dasm_setup(Dst_DECL, const void *actionlist); + +/* Feed encoder with actions. Calls are generated by pre-processor. */ +DASM_FDEF void dasm_put(Dst_DECL, int start, ...); + +/* Link sections and return the resulting size. */ +DASM_FDEF int dasm_link(Dst_DECL, size_t *szp); + +/* Encode sections into buffer. */ +DASM_FDEF int dasm_encode(Dst_DECL, void *buffer); + +/* Get PC label offset. */ +DASM_FDEF int dasm_getpclabel(Dst_DECL, unsigned int pc); + +#ifdef DASM_CHECKS +/* Optional sanity checker to call between isolated encoding steps. */ +DASM_FDEF int dasm_checkstep(Dst_DECL, int secmatch); +#else +#define dasm_checkstep(a, b) 0 +#endif + + +#endif /* _DASM_PROTO_H */ diff --git a/ext/opcache/jit/ir/dynasm/dasm_x64.lua b/ext/opcache/jit/ir/dynasm/dasm_x64.lua new file mode 100644 index 0000000000000..2c0a0e8681f17 --- /dev/null +++ b/ext/opcache/jit/ir/dynasm/dasm_x64.lua @@ -0,0 +1,12 @@ +------------------------------------------------------------------------------ +-- DynASM x64 module. +-- +-- Copyright (C) 2005-2021 Mike Pall. All rights reserved. +-- See dynasm.lua for full copyright notice. +------------------------------------------------------------------------------ +-- This module just sets 64 bit mode for the combined x86/x64 module. +-- All the interesting stuff is there. +------------------------------------------------------------------------------ + +x64 = true -- Using a global is an ugly, but effective solution. +return require("dasm_x86") diff --git a/ext/opcache/jit/ir/dynasm/dasm_x86.h b/ext/opcache/jit/ir/dynasm/dasm_x86.h new file mode 100644 index 0000000000000..a2b46cc951e06 --- /dev/null +++ b/ext/opcache/jit/ir/dynasm/dasm_x86.h @@ -0,0 +1,546 @@ +/* +** DynASM x86 encoding engine. +** Copyright (C) 2005-2021 Mike Pall. All rights reserved. +** Released under the MIT license. See dynasm.lua for full copyright notice. +*/ + +#include +#include +#include +#include + +#define DASM_ARCH "x86" + +#ifndef DASM_EXTERN +#define DASM_EXTERN(a,b,c,d) 0 +#endif + +/* Action definitions. DASM_STOP must be 255. */ +enum { + DASM_DISP = 233, + DASM_IMM_S, DASM_IMM_B, DASM_IMM_W, DASM_IMM_D, DASM_IMM_WB, DASM_IMM_DB, + DASM_VREG, DASM_SPACE, DASM_SETLABEL, DASM_REL_A, DASM_REL_LG, DASM_REL_PC, + DASM_IMM_LG, DASM_IMM_PC, DASM_LABEL_LG, DASM_LABEL_PC, DASM_ALIGN, + DASM_EXTERN, DASM_ESC, DASM_MARK, DASM_SECTION, DASM_STOP +}; + +/* Maximum number of section buffer positions for a single dasm_put() call. */ +#define DASM_MAXSECPOS 25 + +/* DynASM encoder status codes. Action list offset or number are or'ed in. */ +#define DASM_S_OK 0x00000000 +#define DASM_S_NOMEM 0x01000000 +#define DASM_S_PHASE 0x02000000 +#define DASM_S_MATCH_SEC 0x03000000 +#define DASM_S_RANGE_I 0x11000000 +#define DASM_S_RANGE_SEC 0x12000000 +#define DASM_S_RANGE_LG 0x13000000 +#define DASM_S_RANGE_PC 0x14000000 +#define DASM_S_RANGE_VREG 0x15000000 +#define DASM_S_UNDEF_L 0x21000000 +#define DASM_S_UNDEF_PC 0x22000000 + +/* Macros to convert positions (8 bit section + 24 bit index). */ +#define DASM_POS2IDX(pos) ((pos)&0x00ffffff) +#define DASM_POS2BIAS(pos) ((pos)&0xff000000) +#define DASM_SEC2POS(sec) ((sec)<<24) +#define DASM_POS2SEC(pos) ((pos)>>24) +#define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos)) + +/* Action list type. */ +typedef const unsigned char *dasm_ActList; + +/* Per-section structure. */ +typedef struct dasm_Section { + int *rbuf; /* Biased buffer pointer (negative section bias). */ + int *buf; /* True buffer pointer. */ + size_t bsize; /* Buffer size in bytes. */ + int pos; /* Biased buffer position. */ + int epos; /* End of biased buffer position - max single put. */ + int ofs; /* Byte offset into section. */ +} dasm_Section; + +/* Core structure holding the DynASM encoding state. */ +struct dasm_State { + size_t psize; /* Allocated size of this structure. */ + dasm_ActList actionlist; /* Current actionlist pointer. */ + int *lglabels; /* Local/global chain/pos ptrs. */ + size_t lgsize; + int *pclabels; /* PC label chains/pos ptrs. */ + size_t pcsize; + void **globals; /* Array of globals (bias -10). */ + dasm_Section *section; /* Pointer to active section. */ + size_t codesize; /* Total size of all code sections. */ + int maxsection; /* 0 <= sectionidx < maxsection. */ + int status; /* Status code. */ + dasm_Section sections[1]; /* All sections. Alloc-extended. */ +}; + +/* The size of the core structure depends on the max. number of sections. */ +#define DASM_PSZ(ms) (sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section)) + +/* Perform potentially overflowing pointer operations in a way that avoids UB. */ +#define DASM_PTR_SUB(p1, off) ((void *) ((uintptr_t) (p1) - sizeof(*p1) * (uintptr_t) (off))) +#define DASM_PTR_ADD(p1, off) ((void *) ((uintptr_t) (p1) + sizeof(*p1) * (uintptr_t) (off))) + +/* Initialize DynASM state. */ +void dasm_init(Dst_DECL, int maxsection) +{ + dasm_State *D; + size_t psz = 0; + int i; + Dst_REF = NULL; + DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection)); + D = Dst_REF; + D->psize = psz; + D->lglabels = NULL; + D->lgsize = 0; + D->pclabels = NULL; + D->pcsize = 0; + D->globals = NULL; + D->maxsection = maxsection; + for (i = 0; i < maxsection; i++) { + D->sections[i].buf = NULL; /* Need this for pass3. */ + D->sections[i].rbuf = DASM_PTR_SUB(D->sections[i].buf, DASM_SEC2POS(i)); + D->sections[i].bsize = 0; + D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */ + } +} + +/* Free DynASM state. */ +void dasm_free(Dst_DECL) +{ + dasm_State *D = Dst_REF; + int i; + for (i = 0; i < D->maxsection; i++) + if (D->sections[i].buf) + DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize); + if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize); + if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize); + DASM_M_FREE(Dst, D, D->psize); +} + +/* Setup global label array. Must be called before dasm_setup(). */ +void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl) +{ + dasm_State *D = Dst_REF; + D->globals = gl - 10; /* Negative bias to compensate for locals. */ + DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int)); +} + +/* Grow PC label array. Can be called after dasm_setup(), too. */ +void dasm_growpc(Dst_DECL, unsigned int maxpc) +{ + dasm_State *D = Dst_REF; + size_t osz = D->pcsize; + DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int)); + memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz); +} + +/* Setup encoder. */ +void dasm_setup(Dst_DECL, const void *actionlist) +{ + dasm_State *D = Dst_REF; + int i; + D->actionlist = (dasm_ActList)actionlist; + D->status = DASM_S_OK; + D->section = &D->sections[0]; + memset((void *)D->lglabels, 0, D->lgsize); + if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize); + for (i = 0; i < D->maxsection; i++) { + D->sections[i].pos = DASM_SEC2POS(i); + D->sections[i].ofs = 0; + } +} + + +#ifdef DASM_CHECKS +#define CK(x, st) \ + do { if (!(x)) { \ + D->status = DASM_S_##st|(int)(p-D->actionlist-1); return; } } while (0) +#define CKPL(kind, st) \ + do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \ + D->status=DASM_S_RANGE_##st|(int)(p-D->actionlist-1); return; } } while (0) +#else +#define CK(x, st) ((void)0) +#define CKPL(kind, st) ((void)0) +#endif + +/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */ +void dasm_put(Dst_DECL, int start, ...) +{ + va_list ap; + dasm_State *D = Dst_REF; + dasm_ActList p = D->actionlist + start; + dasm_Section *sec = D->section; + int pos = sec->pos, ofs = sec->ofs, mrm = -1; + int *b; + + if (pos >= sec->epos) { + DASM_M_GROW(Dst, int, sec->buf, sec->bsize, + sec->bsize + 2*DASM_MAXSECPOS*sizeof(int)); + sec->rbuf = sec->buf - DASM_POS2BIAS(pos); + sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos); + } + + b = sec->rbuf; + b[pos++] = start; + + va_start(ap, start); + while (1) { + int action = *p++; + while (action < DASM_DISP) { + ofs++; + action = *p++; + } + if (action <= DASM_REL_A) { + int n = va_arg(ap, int); + b[pos++] = n; + switch (action) { + case DASM_DISP: + if (n == 0) { if (mrm < 0) mrm = p[-2]; if ((mrm&7) != 5) break; } + /* fallthrough */ + case DASM_IMM_DB: if ((((unsigned)n+128)&-256) == 0) goto ob; /* fallthrough */ + case DASM_REL_A: /* Assumes ptrdiff_t is int. !x64 */ + case DASM_IMM_D: ofs += 4; break; + case DASM_IMM_S: CK(((n+128)&-256) == 0, RANGE_I); goto ob; + case DASM_IMM_B: CK((n&-256) == 0, RANGE_I); ob: ofs++; break; + case DASM_IMM_WB: if (((n+128)&-256) == 0) goto ob; /* fallthrough */ + case DASM_IMM_W: CK((n&-65536) == 0, RANGE_I); ofs += 2; break; + case DASM_SPACE: p++; ofs += n; break; + case DASM_SETLABEL: b[pos-2] = -0x40000000; break; /* Neg. label ofs. */ + case DASM_VREG: CK((n&-16) == 0 && (n != 4 || (*p>>5) != 2), RANGE_VREG); + if (*p < 0x40 && p[1] == DASM_DISP) mrm = n; + if (*p < 0x20 && (n&7) == 4) ofs++; + switch ((*p++ >> 3) & 3) { + case 3: n |= b[pos-3]; /* fallthrough */ + case 2: n |= b[pos-2]; /* fallthrough */ + case 1: if (n <= 7) { b[pos-1] |= 0x10; ofs--; } + } + continue; + } + mrm = -1; + } else { + int *pl, n; + switch (action) { + case DASM_REL_LG: + case DASM_IMM_LG: + n = *p++; pl = D->lglabels + n; + /* Bkwd rel or global. */ + if (n <= 246) { CK(n>=10||*pl<0, RANGE_LG); CKPL(lg, LG); goto putrel; } + pl -= 246; n = *pl; + if (n < 0) n = 0; /* Start new chain for fwd rel if label exists. */ + goto linkrel; + case DASM_REL_PC: + case DASM_IMM_PC: pl = D->pclabels + va_arg(ap, int); CKPL(pc, PC); + putrel: + n = *pl; + if (n < 0) { /* Label exists. Get label pos and store it. */ + b[pos] = -n; + } else { + linkrel: + b[pos] = n; /* Else link to rel chain, anchored at label. */ + *pl = pos; + } + pos++; + ofs += 4; /* Maximum offset needed. */ + if (action == DASM_REL_LG || action == DASM_REL_PC) { + b[pos++] = ofs; /* Store pass1 offset estimate. */ + } else if (sizeof(ptrdiff_t) == 8) { + ofs += 4; + } + break; + case DASM_LABEL_LG: pl = D->lglabels + *p++; CKPL(lg, LG); goto putlabel; + case DASM_LABEL_PC: pl = D->pclabels + va_arg(ap, int); CKPL(pc, PC); + putlabel: + n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */ + while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos; } + *pl = -pos; /* Label exists now. */ + b[pos++] = ofs; /* Store pass1 offset estimate. */ + break; + case DASM_ALIGN: + ofs += *p++; /* Maximum alignment needed (arg is 2**n-1). */ + b[pos++] = ofs; /* Store pass1 offset estimate. */ + break; + case DASM_EXTERN: p += 2; ofs += 4; break; + case DASM_ESC: p++; ofs++; break; + case DASM_MARK: mrm = p[-2]; break; + case DASM_SECTION: + n = *p; CK(n < D->maxsection, RANGE_SEC); D->section = &D->sections[n]; + case DASM_STOP: goto stop; + } + } + } +stop: + va_end(ap); + sec->pos = pos; + sec->ofs = ofs; +} +#undef CK + +/* Pass 2: Link sections, shrink branches/aligns, fix label offsets. */ +int dasm_link(Dst_DECL, size_t *szp) +{ + dasm_State *D = Dst_REF; + int secnum; + int ofs = 0; + +#ifdef DASM_CHECKS + *szp = 0; + if (D->status != DASM_S_OK) return D->status; + { + int pc; + for (pc = 0; pc*sizeof(int) < D->pcsize; pc++) + if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc; + } +#endif + + { /* Handle globals not defined in this translation unit. */ + int idx; + for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) { + int n = D->lglabels[idx]; + /* Undefined label: Collapse rel chain and replace with marker (< 0). */ + while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; } + } + } + + /* Combine all code sections. No support for data sections (yet). */ + for (secnum = 0; secnum < D->maxsection; secnum++) { + dasm_Section *sec = D->sections + secnum; + int *b = sec->rbuf; + int pos = DASM_SEC2POS(secnum); + int lastpos = sec->pos; + + while (pos != lastpos) { + dasm_ActList p = D->actionlist + b[pos++]; + int op = 0; + while (1) { + int action = *p++; + while (action < DASM_DISP) { + op = action; + action = *p++; + } + switch (action) { + case DASM_REL_LG: p++; + /* fallthrough */ + case DASM_REL_PC: { + int shrink = op == 0xe9 ? 3 : ((op&0xf0) == 0x80 ? 4 : 0); + if (shrink) { /* Shrinkable branch opcode? */ + int lofs, lpos = b[pos]; + if (lpos < 0) goto noshrink; /* Ext global? */ + lofs = *DASM_POS2PTR(D, lpos); + if (lpos > pos) { /* Fwd label: add cumulative section offsets. */ + int i; + for (i = secnum; i < DASM_POS2SEC(lpos); i++) + lofs += D->sections[i].ofs; + } else { + lofs -= ofs; /* Bkwd label: unfix offset. */ + } + lofs -= b[pos+1]; /* Short branch ok? */ + if (lofs >= -128-shrink && lofs <= 127) ofs -= shrink; /* Yes. */ + else { noshrink: shrink = 0; } /* No, cannot shrink op. */ + } + b[pos+1] = shrink; + pos += 2; + break; + } + /* fallthrough */ + case DASM_SPACE: case DASM_IMM_LG: case DASM_VREG: p++; + case DASM_DISP: case DASM_IMM_S: case DASM_IMM_B: case DASM_IMM_W: + case DASM_IMM_D: case DASM_IMM_WB: case DASM_IMM_DB: + case DASM_SETLABEL: case DASM_REL_A: case DASM_IMM_PC: pos++; break; + case DASM_LABEL_LG: p++; + /* fallthrough */ + case DASM_LABEL_PC: b[pos++] += ofs; break; /* Fix label offset. */ + case DASM_ALIGN: ofs -= (b[pos++]+ofs)&*p++; break; /* Adjust ofs. */ + case DASM_EXTERN: p += 2; break; + case DASM_ESC: op = *p++; break; + case DASM_MARK: break; + case DASM_SECTION: case DASM_STOP: goto stop; + } + } + stop: (void)0; + } + ofs += sec->ofs; /* Next section starts right after current section. */ + } + + D->codesize = ofs; /* Total size of all code sections */ + *szp = ofs; + return DASM_S_OK; +} + +#define dasmb(x) *cp++ = (unsigned char)(x) +#ifndef DASM_ALIGNED_WRITES +typedef IR_SET_ALIGNED(1, unsigned short unaligned_short); +typedef IR_SET_ALIGNED(1, unsigned int unaligned_int); +typedef IR_SET_ALIGNED(1, unsigned long long unaligned_long_long); +#define dasmw(x) \ + do { *((unaligned_short *)cp) = (unsigned short)(x); cp+=2; } while (0) +#define dasmd(x) \ + do { *((unaligned_int *)cp) = (unsigned int)(x); cp+=4; } while (0) +#define dasmq(x) \ + do { *((unaligned_long_long *)cp) = (unsigned long long)(x); cp+=8; } while (0) +#else +#define dasmw(x) do { dasmb(x); dasmb((x)>>8); } while (0) +#define dasmd(x) do { dasmw(x); dasmw((x)>>16); } while (0) +#define dasmq(x) do { dasmd(x); dasmd((x)>>32); } while (0) +#endif +static unsigned char *dasma_(unsigned char *cp, ptrdiff_t x) +{ + if (sizeof(ptrdiff_t) == 8) + dasmq((unsigned long long)x); + else + dasmd((unsigned int)x); + return cp; +} +#define dasma(x) (cp = dasma_(cp, (x))) + +/* Pass 3: Encode sections. */ +int dasm_encode(Dst_DECL, void *buffer) +{ + dasm_State *D = Dst_REF; + unsigned char *base = (unsigned char *)buffer; + unsigned char *cp = base; + int secnum; + + /* Encode all code sections. No support for data sections (yet). */ + for (secnum = 0; secnum < D->maxsection; secnum++) { + dasm_Section *sec = D->sections + secnum; + int *b = sec->buf; + int *endb = DASM_PTR_ADD(sec->rbuf, sec->pos); + + while (b != endb) { + dasm_ActList p = D->actionlist + *b++; + unsigned char *mark = NULL; + while (1) { + int n; + int action = *p++; + while (action < DASM_DISP) { + *cp++ = action; + action = *p++; + } + if (action >= DASM_ALIGN) { + switch (action) { + case DASM_ALIGN: + b++; + n = *p++; + while (((cp-base) & n)) *cp++ = 0x90; /* nop */ + continue; + case DASM_EXTERN: n = DASM_EXTERN(Dst, cp, p[1], *p); p += 2; goto wd; + case DASM_MARK: mark = cp; continue; + case DASM_ESC: action = *p++; *cp++ = action; continue; + case DASM_SECTION: case DASM_STOP: goto stop; + } + } + n = *b++; + switch (action) { + case DASM_DISP: if (!mark) mark = cp; { + unsigned char *mm = mark; + if (*p != DASM_IMM_DB && *p != DASM_IMM_WB) mark = NULL; + if (n == 0) { int mrm = mm[-1]&7; if (mrm == 4) mrm = mm[0]&7; + if (mrm != 5) { mm[-1] -= 0x80; break; } } + if ((((unsigned)n+128) & -256) != 0) goto wd; else mm[-1] -= 0x40; + } + /* fallthrough */ + case DASM_IMM_S: case DASM_IMM_B: wb: dasmb(n); break; + case DASM_IMM_DB: if ((((unsigned)n+128)&-256) == 0) { + db: if (!mark) mark = cp; mark[-2] += 2; mark = NULL; goto wb; + } else mark = NULL; + /* fallthrough */ + case DASM_IMM_D: wd: dasmd(n); break; + case DASM_IMM_WB: if ((((unsigned)n+128)&-256) == 0) goto db; else mark = NULL; + /* fallthrough */ + case DASM_IMM_W: dasmw(n); break; + case DASM_VREG: { + int t = *p++; + unsigned char *ex = cp - (t&7); + if ((n & 8) && t < 0xa0) { + if (*ex & 0x80) ex[1] ^= 0x20 << (t>>6); else *ex ^= 1 << (t>>6); + n &= 7; + } else if (n & 0x10) { + if (*ex & 0x80) { + *ex = 0xc5; ex[1] = (ex[1] & 0x80) | ex[2]; ex += 2; + } + while (++ex < cp) ex[-1] = *ex; + if (mark) mark--; + cp--; + n &= 7; + } + if (t >= 0xc0) n <<= 4; + else if (t >= 0x40) n <<= 3; + else if (n == 4 && t < 0x20) { cp[-1] ^= n; *cp++ = 0x20; } + cp[-1] ^= n; + break; + } + case DASM_REL_LG: p++; if (n >= 0) goto rel_pc; + b++; n = (int)(ptrdiff_t)D->globals[-n]; + /* fallthrough */ + case DASM_REL_A: rel_a: + n -= (unsigned int)(ptrdiff_t)(cp+4); goto wd; /* !x64 */ + case DASM_REL_PC: rel_pc: { + int shrink = *b++; + int *pb = DASM_POS2PTR(D, n); if (*pb < 0) { n = pb[1]; goto rel_a; } + n = *pb - ((int)(cp-base) + 4-shrink); + if (shrink == 0) goto wd; + if (shrink == 4) { cp--; cp[-1] = *cp-0x10; } else cp[-1] = 0xeb; + goto wb; + } + case DASM_IMM_LG: + p++; + if (n < 0) { dasma((ptrdiff_t)D->globals[-n]); break; } + /* fallthrough */ + case DASM_IMM_PC: { + int *pb = DASM_POS2PTR(D, n); + dasma(*pb < 0 ? (ptrdiff_t)pb[1] : (*pb + (ptrdiff_t)base)); + break; + } + case DASM_LABEL_LG: { + int idx = *p++; + if (idx >= 10) + D->globals[idx] = (void *)(base + (*p == DASM_SETLABEL ? *b : n)); + break; + } + case DASM_LABEL_PC: case DASM_SETLABEL: break; + case DASM_SPACE: { int fill = *p++; while (n--) *cp++ = fill; break; } + } + } + stop: (void)0; + } + } + + if (base + D->codesize != cp) /* Check for phase errors. */ + return DASM_S_PHASE; + return DASM_S_OK; +} + +/* Get PC label offset. */ +int dasm_getpclabel(Dst_DECL, unsigned int pc) +{ + dasm_State *D = Dst_REF; + if (pc*sizeof(int) < D->pcsize) { + int pos = D->pclabels[pc]; + if (pos < 0) return *DASM_POS2PTR(D, -pos); + if (pos > 0) return -1; /* Undefined. */ + } + return -2; /* Unused or out of range. */ +} + +#ifdef DASM_CHECKS +/* Optional sanity checker to call between isolated encoding steps. */ +int dasm_checkstep(Dst_DECL, int secmatch) +{ + dasm_State *D = Dst_REF; + if (D->status == DASM_S_OK) { + int i; + for (i = 1; i <= 9; i++) { + if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_L|i; break; } + D->lglabels[i] = 0; + } + } + if (D->status == DASM_S_OK && secmatch >= 0 && + D->section != &D->sections[secmatch]) + D->status = DASM_S_MATCH_SEC|(int)(D->section-D->sections); + return D->status; +} +#endif + diff --git a/ext/opcache/jit/ir/dynasm/dasm_x86.lua b/ext/opcache/jit/ir/dynasm/dasm_x86.lua new file mode 100644 index 0000000000000..d5eea69e485a7 --- /dev/null +++ b/ext/opcache/jit/ir/dynasm/dasm_x86.lua @@ -0,0 +1,2390 @@ +------------------------------------------------------------------------------ +-- DynASM x86/x64 module. +-- +-- Copyright (C) 2005-2021 Mike Pall. All rights reserved. +-- See dynasm.lua for full copyright notice. +------------------------------------------------------------------------------ + +local x64 = x64 + +-- Module information: +local _info = { + arch = x64 and "x64" or "x86", + description = "DynASM x86/x64 module", + version = "1.5.0", + vernum = 10500, + release = "2021-05-02", + author = "Mike Pall", + license = "MIT", +} + +-- Exported glue functions for the arch-specific module. +local _M = { _info = _info } + +-- Cache library functions. +local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs +local assert, unpack, setmetatable = assert, unpack or table.unpack, setmetatable +local _s = string +local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char +local find, match, gmatch, gsub = _s.find, _s.match, _s.gmatch, _s.gsub +local concat, sort, remove = table.concat, table.sort, table.remove +local bit = bit or require("bit") +local band, bxor, shl, shr = bit.band, bit.bxor, bit.lshift, bit.rshift + +-- Inherited tables and callbacks. +local g_opt, g_arch +local wline, werror, wfatal, wwarn + +-- Action name list. +-- CHECK: Keep this in sync with the C code! +local action_names = { + -- int arg, 1 buffer pos: + "DISP", "IMM_S", "IMM_B", "IMM_W", "IMM_D", "IMM_WB", "IMM_DB", + -- action arg (1 byte), int arg, 1 buffer pos (reg/num): + "VREG", "SPACE", + -- ptrdiff_t arg, 1 buffer pos (address): !x64 + "SETLABEL", "REL_A", + -- action arg (1 byte) or int arg, 2 buffer pos (link, offset): + "REL_LG", "REL_PC", + -- action arg (1 byte) or int arg, 1 buffer pos (link): + "IMM_LG", "IMM_PC", + -- action arg (1 byte) or int arg, 1 buffer pos (offset): + "LABEL_LG", "LABEL_PC", + -- action arg (1 byte), 1 buffer pos (offset): + "ALIGN", + -- action args (2 bytes), no buffer pos. + "EXTERN", + -- action arg (1 byte), no buffer pos. + "ESC", + -- no action arg, no buffer pos. + "MARK", + -- action arg (1 byte), no buffer pos, terminal action: + "SECTION", + -- no args, no buffer pos, terminal action: + "STOP" +} + +-- Maximum number of section buffer positions for dasm_put(). +-- CHECK: Keep this in sync with the C code! +local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines. + +-- Action name -> action number (dynamically generated below). +local map_action = {} +-- First action number. Everything below does not need to be escaped. +local actfirst = 256-#action_names + +-- Action list buffer and string (only used to remove dupes). +local actlist = {} +local actstr = "" + +-- Argument list for next dasm_put(). Start with offset 0 into action list. +local actargs = { 0 } + +-- Current number of section buffer positions for dasm_put(). +local secpos = 1 + +-- VREG kind encodings, pre-shifted by 5 bits. +local map_vreg = { + ["modrm.rm.m"] = 0x00, + ["modrm.rm.r"] = 0x20, + ["opcode"] = 0x20, + ["sib.base"] = 0x20, + ["sib.index"] = 0x40, + ["modrm.reg"] = 0x80, + ["vex.v"] = 0xa0, + ["imm.hi"] = 0xc0, +} + +-- Current number of VREG actions contributing to REX/VEX shrinkage. +local vreg_shrink_count = 0 + +------------------------------------------------------------------------------ + +-- Compute action numbers for action names. +for n,name in ipairs(action_names) do + local num = actfirst + n - 1 + map_action[name] = num +end + +-- Dump action names and numbers. +local function dumpactions(out) + out:write("DynASM encoding engine action codes:\n") + for n,name in ipairs(action_names) do + local num = map_action[name] + out:write(format(" %-10s %02X %d\n", name, num, num)) + end + out:write("\n") +end + +-- Write action list buffer as a huge static C array. +local function writeactions(out, name) + local nn = #actlist + local last = actlist[nn] or 255 + actlist[nn] = nil -- Remove last byte. + if nn == 0 then nn = 1 end + out:write("static const unsigned char ", name, "[", nn, "] = {\n") + local s = " " + for n,b in ipairs(actlist) do + s = s..b.."," + if #s >= 75 then + assert(out:write(s, "\n")) + s = " " + end + end + out:write(s, last, "\n};\n\n") -- Add last byte back. +end + +------------------------------------------------------------------------------ + +-- Add byte to action list. +local function wputxb(n) + assert(n >= 0 and n <= 255 and n % 1 == 0, "byte out of range") + actlist[#actlist+1] = n +end + +-- Add action to list with optional arg. Advance buffer pos, too. +local function waction(action, a, num) + wputxb(assert(map_action[action], "bad action name `"..action.."'")) + if a then actargs[#actargs+1] = a end + if a or num then secpos = secpos + (num or 1) end +end + +-- Optionally add a VREG action. +local function wvreg(kind, vreg, psz, sk, defer) + if not vreg then return end + waction("VREG", vreg) + local b = assert(map_vreg[kind], "bad vreg kind `"..vreg.."'") + if b < (sk or 0) then + vreg_shrink_count = vreg_shrink_count + 1 + end + if not defer then + b = b + vreg_shrink_count * 8 + vreg_shrink_count = 0 + end + wputxb(b + (psz or 0)) +end + +-- Add call to embedded DynASM C code. +local function wcall(func, args) + wline(format("dasm_%s(Dst, %s);", func, concat(args, ", ")), true) +end + +-- Delete duplicate action list chunks. A tad slow, but so what. +local function dedupechunk(offset) + local al, as = actlist, actstr + local chunk = char(unpack(al, offset+1, #al)) + local orig = find(as, chunk, 1, true) + if orig then + actargs[1] = orig-1 -- Replace with original offset. + for i=offset+1,#al do al[i] = nil end -- Kill dupe. + else + actstr = as..chunk + end +end + +-- Flush action list (intervening C code or buffer pos overflow). +local function wflush(term) + local offset = actargs[1] + if #actlist == offset then return end -- Nothing to flush. + if not term then waction("STOP") end -- Terminate action list. + dedupechunk(offset) + wcall("put", actargs) -- Add call to dasm_put(). + actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put(). + secpos = 1 -- The actionlist offset occupies a buffer position, too. +end + +-- Put escaped byte. +local function wputb(n) + if n >= actfirst then waction("ESC") end -- Need to escape byte. + wputxb(n) +end + +------------------------------------------------------------------------------ + +-- Global label name -> global label number. With auto assignment on 1st use. +local next_global = 10 +local map_global = setmetatable({}, { __index = function(t, name) + if not match(name, "^[%a_][%w_@]*$") then werror("bad global label") end + local n = next_global + if n > 246 then werror("too many global labels") end + next_global = n + 1 + t[name] = n + return n +end}) + +-- Dump global labels. +local function dumpglobals(out, lvl) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("Global labels:\n") + for i=10,next_global-1 do + out:write(format(" %s\n", t[i])) + end + out:write("\n") +end + +-- Write global label enum. +local function writeglobals(out, prefix) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("enum {\n") + for i=10,next_global-1 do + out:write(" ", prefix, gsub(t[i], "@.*", ""), ",\n") + end + out:write(" ", prefix, "_MAX\n};\n") +end + +-- Write global label names. +local function writeglobalnames(out, name) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("static const char *const ", name, "[] = {\n") + for i=10,next_global-1 do + out:write(" \"", t[i], "\",\n") + end + out:write(" (const char *)0\n};\n") +end + +------------------------------------------------------------------------------ + +-- Extern label name -> extern label number. With auto assignment on 1st use. +local next_extern = -1 +local map_extern = setmetatable({}, { __index = function(t, name) + -- No restrictions on the name for now. + local n = next_extern + if n < -256 then werror("too many extern labels") end + next_extern = n - 1 + t[name] = n + return n +end}) + +-- Dump extern labels. +local function dumpexterns(out, lvl) + local t = {} + for name, n in pairs(map_extern) do t[-n] = name end + out:write("Extern labels:\n") + for i=1,-next_extern-1 do + out:write(format(" %s\n", t[i])) + end + out:write("\n") +end + +-- Write extern label names. +local function writeexternnames(out, name) + local t = {} + for name, n in pairs(map_extern) do t[-n] = name end + out:write("static const char *const ", name, "[] = {\n") + for i=1,-next_extern-1 do + out:write(" \"", t[i], "\",\n") + end + out:write(" (const char *)0\n};\n") +end + +------------------------------------------------------------------------------ + +-- Arch-specific maps. +local map_archdef = {} -- Ext. register name -> int. name. +local map_reg_rev = {} -- Int. register name -> ext. name. +local map_reg_num = {} -- Int. register name -> register number. +local map_reg_opsize = {} -- Int. register name -> operand size. +local map_reg_valid_base = {} -- Int. register name -> valid base register? +local map_reg_valid_index = {} -- Int. register name -> valid index register? +local map_reg_needrex = {} -- Int. register name -> need rex vs. no rex. +local reg_list = {} -- Canonical list of int. register names. + +local map_type = {} -- Type name -> { ctype, reg } +local ctypenum = 0 -- Type number (for _PTx macros). + +local addrsize = x64 and "q" or "d" -- Size for address operands. + +-- Helper functions to fill register maps. +local function mkrmap(sz, cl, names) + local cname = format("@%s", sz) + reg_list[#reg_list+1] = cname + map_archdef[cl] = cname + map_reg_rev[cname] = cl + map_reg_num[cname] = -1 + map_reg_opsize[cname] = sz + if sz == addrsize or sz == "d" then + map_reg_valid_base[cname] = true + map_reg_valid_index[cname] = true + end + if names then + for n,name in ipairs(names) do + local iname = format("@%s%x", sz, n-1) + reg_list[#reg_list+1] = iname + map_archdef[name] = iname + map_reg_rev[iname] = name + map_reg_num[iname] = n-1 + map_reg_opsize[iname] = sz + if sz == "b" and n > 4 then map_reg_needrex[iname] = false end + if sz == addrsize or sz == "d" then + map_reg_valid_base[iname] = true + map_reg_valid_index[iname] = true + end + end + end + for i=0,(x64 and sz ~= "f") and 15 or 7 do + local needrex = sz == "b" and i > 3 + local iname = format("@%s%x%s", sz, i, needrex and "R" or "") + if needrex then map_reg_needrex[iname] = true end + local name + if sz == "o" or sz == "y" then name = format("%s%d", cl, i) + elseif sz == "f" then name = format("st%d", i) + else name = format("r%d%s", i, sz == addrsize and "" or sz) end + map_archdef[name] = iname + if not map_reg_rev[iname] then + reg_list[#reg_list+1] = iname + map_reg_rev[iname] = name + map_reg_num[iname] = i + map_reg_opsize[iname] = sz + if sz == addrsize or sz == "d" then + map_reg_valid_base[iname] = true + map_reg_valid_index[iname] = true + end + end + end + reg_list[#reg_list+1] = "" +end + +-- Integer registers (qword, dword, word and byte sized). +if x64 then + mkrmap("q", "Rq", {"rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi"}) +end +mkrmap("d", "Rd", {"eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi"}) +mkrmap("w", "Rw", {"ax", "cx", "dx", "bx", "sp", "bp", "si", "di"}) +mkrmap("b", "Rb", {"al", "cl", "dl", "bl", "ah", "ch", "dh", "bh"}) +map_reg_valid_index[map_archdef.esp] = false +if x64 then map_reg_valid_index[map_archdef.rsp] = false end +if x64 then map_reg_needrex[map_archdef.Rb] = true end +map_archdef["Ra"] = "@"..addrsize + +-- FP registers (internally tword sized, but use "f" as operand size). +mkrmap("f", "Rf") + +-- SSE registers (oword sized, but qword and dword accessible). +mkrmap("o", "xmm") + +-- AVX registers (yword sized, but oword, qword and dword accessible). +mkrmap("y", "ymm") + +-- Operand size prefixes to codes. +local map_opsize = { + byte = "b", word = "w", dword = "d", qword = "q", oword = "o", yword = "y", + tword = "t", aword = addrsize, +} + +-- Operand size code to number. +local map_opsizenum = { + b = 1, w = 2, d = 4, q = 8, o = 16, y = 32, t = 10, +} + +-- Operand size code to name. +local map_opsizename = { + b = "byte", w = "word", d = "dword", q = "qword", o = "oword", y = "yword", + t = "tword", f = "fpword", +} + +-- Valid index register scale factors. +local map_xsc = { + ["1"] = 0, ["2"] = 1, ["4"] = 2, ["8"] = 3, +} + +-- Condition codes. +local map_cc = { + o = 0, no = 1, b = 2, nb = 3, e = 4, ne = 5, be = 6, nbe = 7, + s = 8, ns = 9, p = 10, np = 11, l = 12, nl = 13, le = 14, nle = 15, + c = 2, nae = 2, nc = 3, ae = 3, z = 4, nz = 5, na = 6, a = 7, + pe = 10, po = 11, nge = 12, ge = 13, ng = 14, g = 15, +} + + +-- Reverse defines for registers. +function _M.revdef(s) + return gsub(s, "@%w+", map_reg_rev) +end + +-- Dump register names and numbers +local function dumpregs(out) + out:write("Register names, sizes and internal numbers:\n") + for _,reg in ipairs(reg_list) do + if reg == "" then + out:write("\n") + else + local name = map_reg_rev[reg] + local num = map_reg_num[reg] + local opsize = map_opsizename[map_reg_opsize[reg]] + out:write(format(" %-5s %-8s %s\n", name, opsize, + num < 0 and "(variable)" or num)) + end + end +end + +------------------------------------------------------------------------------ + +-- Put action for label arg (IMM_LG, IMM_PC, REL_LG, REL_PC). +local function wputlabel(aprefix, imm, num) + if type(imm) == "number" then + if imm < 0 then + waction("EXTERN") + wputxb(aprefix == "IMM_" and 0 or 1) + imm = -imm-1 + else + waction(aprefix.."LG", nil, num); + end + wputxb(imm) + else + waction(aprefix.."PC", imm, num) + end +end + +-- Put signed byte or arg. +local function wputsbarg(n) + if type(n) == "number" then + if n < -128 or n > 127 then + werror("signed immediate byte out of range") + end + if n < 0 then n = n + 256 end + wputb(n) + else waction("IMM_S", n) end +end + +-- Put unsigned byte or arg. +local function wputbarg(n) + if type(n) == "number" then + if n < 0 or n > 255 then + werror("unsigned immediate byte out of range") + end + wputb(n) + else waction("IMM_B", n) end +end + +-- Put unsigned word or arg. +local function wputwarg(n) + if type(n) == "number" then + if shr(n, 16) ~= 0 then + werror("unsigned immediate word out of range") + end + wputb(band(n, 255)); wputb(shr(n, 8)); + else waction("IMM_W", n) end +end + +-- Put signed or unsigned dword or arg. +local function wputdarg(n) + local tn = type(n) + if tn == "number" then + wputb(band(n, 255)) + wputb(band(shr(n, 8), 255)) + wputb(band(shr(n, 16), 255)) + wputb(shr(n, 24)) + elseif tn == "table" then + wputlabel("IMM_", n[1], 1) + else + waction("IMM_D", n) + end +end + +-- Put signed or unsigned qword or arg. +local function wputqarg(n) + local tn = type(n) + if tn == "number" then -- This is only used for numbers from -2^31..2^32-1. + wputb(band(n, 255)) + wputb(band(shr(n, 8), 255)) + wputb(band(shr(n, 16), 255)) + wputb(shr(n, 24)) + local sign = n < 0 and 255 or 0 + wputb(sign); wputb(sign); wputb(sign); wputb(sign) + else + waction("IMM_D", format("(unsigned int)(%s)", n)) + waction("IMM_D", format("(unsigned int)((unsigned long long)(%s)>>32)", n)) + end +end + +-- Put operand-size dependent number or arg (defaults to dword). +local function wputszarg(sz, n) + if not sz or sz == "d" or sz == "q" then wputdarg(n) + elseif sz == "w" then wputwarg(n) + elseif sz == "b" then wputbarg(n) + elseif sz == "s" then wputsbarg(n) + else werror("bad operand size") end +end + +-- Put multi-byte opcode with operand-size dependent modifications. +local function wputop(sz, op, rex, vex, vregr, vregxb) + local psz, sk = 0, nil + if vex then + local tail + if vex.m == 1 and band(rex, 11) == 0 then + if x64 and vregxb then + sk = map_vreg["modrm.reg"] + else + wputb(0xc5) + tail = shl(bxor(band(rex, 4), 4), 5) + psz = 3 + end + end + if not tail then + wputb(0xc4) + wputb(shl(bxor(band(rex, 7), 7), 5) + vex.m) + tail = shl(band(rex, 8), 4) + psz = 4 + end + local reg, vreg = 0, nil + if vex.v then + reg = vex.v.reg + if not reg then werror("bad vex operand") end + if reg < 0 then reg = 0; vreg = vex.v.vreg end + end + if sz == "y" or vex.l then tail = tail + 4 end + wputb(tail + shl(bxor(reg, 15), 3) + vex.p) + wvreg("vex.v", vreg) + rex = 0 + if op >= 256 then werror("bad vex opcode") end + else + if rex ~= 0 then + if not x64 then werror("bad operand size") end + elseif (vregr or vregxb) and x64 then + rex = 0x10 + sk = map_vreg["vex.v"] + end + end + local r + if sz == "w" then wputb(102) end + -- Needs >32 bit numbers, but only for crc32 eax, word [ebx] + if op >= 4294967296 then r = op%4294967296 wputb((op-r)/4294967296) op = r end + if op >= 16777216 then wputb(shr(op, 24)); op = band(op, 0xffffff) end + if op >= 65536 then + if rex ~= 0 then + local opc3 = band(op, 0xffff00) + if opc3 == 0x0f3a00 or opc3 == 0x0f3800 then + wputb(64 + band(rex, 15)); rex = 0; psz = 2 + end + end + wputb(shr(op, 16)); op = band(op, 0xffff); psz = psz + 1 + end + if op >= 256 then + local b = shr(op, 8) + if b == 15 and rex ~= 0 then wputb(64 + band(rex, 15)); rex = 0; psz = 2 end + wputb(b); op = band(op, 255); psz = psz + 1 + end + if rex ~= 0 then wputb(64 + band(rex, 15)); psz = 2 end + if sz == "b" then op = op - 1 end + wputb(op) + return psz, sk +end + +-- Put ModRM or SIB formatted byte. +local function wputmodrm(m, s, rm, vs, vrm) + assert(m < 4 and s < 16 and rm < 16, "bad modrm operands") + wputb(shl(m, 6) + shl(band(s, 7), 3) + band(rm, 7)) +end + +-- Put ModRM/SIB plus optional displacement. +local function wputmrmsib(t, imark, s, vsreg, psz, sk) + local vreg, vxreg + local reg, xreg = t.reg, t.xreg + if reg and reg < 0 then reg = 0; vreg = t.vreg end + if xreg and xreg < 0 then xreg = 0; vxreg = t.vxreg end + if s < 0 then s = 0 end + + -- Register mode. + if sub(t.mode, 1, 1) == "r" then + wputmodrm(3, s, reg) + wvreg("modrm.reg", vsreg, psz+1, sk, vreg) + wvreg("modrm.rm.r", vreg, psz+1, sk) + return + end + + local disp = t.disp + local tdisp = type(disp) + -- No base register? + if not reg then + local riprel = false + if xreg then + -- Indexed mode with index register only. + -- [xreg*xsc+disp] -> (0, s, esp) (xsc, xreg, ebp) + wputmodrm(0, s, 4) + if imark == "I" then waction("MARK") end + wvreg("modrm.reg", vsreg, psz+1, sk, vxreg) + wputmodrm(t.xsc, xreg, 5) + wvreg("sib.index", vxreg, psz+2, sk) + else + -- Pure 32 bit displacement. + if x64 and tdisp ~= "table" then + wputmodrm(0, s, 4) -- [disp] -> (0, s, esp) (0, esp, ebp) + wvreg("modrm.reg", vsreg, psz+1, sk) + if imark == "I" then waction("MARK") end + wputmodrm(0, 4, 5) + else + riprel = x64 + wputmodrm(0, s, 5) -- [disp|rip-label] -> (0, s, ebp) + wvreg("modrm.reg", vsreg, psz+1, sk) + if imark == "I" then waction("MARK") end + end + end + if riprel then -- Emit rip-relative displacement. + if match("UWSiI", imark) then + werror("NYI: rip-relative displacement followed by immediate") + end + -- The previous byte in the action buffer cannot be 0xe9 or 0x80-0x8f. + wputlabel("REL_", disp[1], 2) + else + wputdarg(disp) + end + return + end + + local m + if tdisp == "number" then -- Check displacement size at assembly time. + if disp == 0 and band(reg, 7) ~= 5 then -- [ebp] -> [ebp+0] (in SIB, too) + if not vreg then m = 0 end -- Force DISP to allow [Rd(5)] -> [ebp+0] + elseif disp >= -128 and disp <= 127 then m = 1 + else m = 2 end + elseif tdisp == "table" then + m = 2 + end + + -- Index register present or esp as base register: need SIB encoding. + if xreg or band(reg, 7) == 4 then + wputmodrm(m or 2, s, 4) -- ModRM. + if m == nil or imark == "I" then waction("MARK") end + wvreg("modrm.reg", vsreg, psz+1, sk, vxreg or vreg) + wputmodrm(t.xsc or 0, xreg or 4, reg) -- SIB. + wvreg("sib.index", vxreg, psz+2, sk, vreg) + wvreg("sib.base", vreg, psz+2, sk) + else + wputmodrm(m or 2, s, reg) -- ModRM. + if (imark == "I" and (m == 1 or m == 2)) or + (m == nil and (vsreg or vreg)) then waction("MARK") end + wvreg("modrm.reg", vsreg, psz+1, sk, vreg) + wvreg("modrm.rm.m", vreg, psz+1, sk) + end + + -- Put displacement. + if m == 1 then wputsbarg(disp) + elseif m == 2 then wputdarg(disp) + elseif m == nil then waction("DISP", disp) end +end + +------------------------------------------------------------------------------ + +-- Return human-readable operand mode string. +local function opmodestr(op, args) + local m = {} + for i=1,#args do + local a = args[i] + m[#m+1] = sub(a.mode, 1, 1)..(a.opsize or "?") + end + return op.." "..concat(m, ",") +end + +-- Convert number to valid integer or nil. +local function toint(expr, isqword) + local n = tonumber(expr) + if n then + if n % 1 ~= 0 then + werror("not an integer number `"..expr.."'") + elseif isqword then + if n < -2147483648 or n > 2147483647 then + n = nil -- Handle it as an expression to avoid precision loss. + end + elseif n < -2147483648 or n > 4294967295 then + werror("bad integer number `"..expr.."'") + end + return n + end +end + +-- Parse immediate expression. +local function immexpr(expr) + -- &expr (pointer) + if sub(expr, 1, 1) == "&" then + return "iPJ", format("(ptrdiff_t)(%s)", sub(expr,2)) + end + + local prefix = sub(expr, 1, 2) + -- =>expr (pc label reference) + if prefix == "=>" then + return "iJ", sub(expr, 3) + end + -- ->name (global label reference) + if prefix == "->" then + return "iJ", map_global[sub(expr, 3)] + end + + -- [<>][1-9] (local label reference) + local dir, lnum = match(expr, "^([<>])([1-9])$") + if dir then -- Fwd: 247-255, Bkwd: 1-9. + return "iJ", lnum + (dir == ">" and 246 or 0) + end + + local extname = match(expr, "^extern%s+(%S+)$") + if extname then + return "iJ", map_extern[extname] + end + + -- expr (interpreted as immediate) + return "iI", expr +end + +-- Parse displacement expression: +-num, +-expr, +-opsize*num +local function dispexpr(expr) + local disp = expr == "" and 0 or toint(expr) + if disp then return disp end + local c, dispt = match(expr, "^([+-])%s*(.+)$") + if c == "+" then + expr = dispt + elseif not c then + werror("bad displacement expression `"..expr.."'") + end + local opsize, tailops = match(dispt, "^(%w+)%s*%*%s*(.+)$") + local ops, imm = map_opsize[opsize], toint(tailops) + if ops and imm then + if c == "-" then imm = -imm end + return imm*map_opsizenum[ops] + end + local mode, iexpr = immexpr(dispt) + if mode == "iJ" then + if c == "-" then werror("cannot invert label reference") end + return { iexpr } + end + return expr -- Need to return original signed expression. +end + +-- Parse register or type expression. +local function rtexpr(expr) + if not expr then return end + local tname, ovreg = match(expr, "^([%w_]+):(@[%w_]+)$") + local tp = map_type[tname or expr] + if tp then + local reg = ovreg or tp.reg + local rnum = map_reg_num[reg] + if not rnum then + werror("type `"..(tname or expr).."' needs a register override") + end + if not map_reg_valid_base[reg] then + werror("bad base register override `"..(map_reg_rev[reg] or reg).."'") + end + return reg, rnum, tp + end + return expr, map_reg_num[expr] +end + +-- Parse operand and return { mode, opsize, reg, xreg, xsc, disp, imm }. +local function parseoperand(param, isqword) + local t = {} + + local expr = param + local opsize, tailops = match(param, "^(%w+)%s*(.+)$") + if opsize then + t.opsize = map_opsize[opsize] + if t.opsize then expr = tailops end + end + + local br = match(expr, "^%[%s*(.-)%s*%]$") + repeat + if br then + t.mode = "xm" + + -- [disp] + t.disp = toint(br) + if t.disp then + t.mode = x64 and "xm" or "xmO" + break + end + + -- [reg...] + local tp + local reg, tailr = match(br, "^([@%w_:]+)%s*(.*)$") + reg, t.reg, tp = rtexpr(reg) + if not t.reg then + -- [expr] + t.mode = x64 and "xm" or "xmO" + t.disp = dispexpr("+"..br) + break + end + + if t.reg == -1 then + t.vreg, tailr = match(tailr, "^(%b())(.*)$") + if not t.vreg then werror("bad variable register expression") end + end + + -- [xreg*xsc] or [xreg*xsc+-disp] or [xreg*xsc+-expr] + local xsc, tailsc = match(tailr, "^%*%s*([1248])%s*(.*)$") + if xsc then + if not map_reg_valid_index[reg] then + werror("bad index register `"..map_reg_rev[reg].."'") + end + t.xsc = map_xsc[xsc] + t.xreg = t.reg + t.vxreg = t.vreg + t.reg = nil + t.vreg = nil + t.disp = dispexpr(tailsc) + break + end + if not map_reg_valid_base[reg] then + werror("bad base register `"..map_reg_rev[reg].."'") + end + + -- [reg] or [reg+-disp] + t.disp = toint(tailr) or (tailr == "" and 0) + if t.disp then break end + + -- [reg+xreg...] + local xreg, tailx = match(tailr, "^%+%s*([@%w_:]+)%s*(.*)$") + xreg, t.xreg, tp = rtexpr(xreg) + if not t.xreg then + -- [reg+-expr] + t.disp = dispexpr(tailr) + break + end + if not map_reg_valid_index[xreg] then + werror("bad index register `"..map_reg_rev[xreg].."'") + end + + if t.xreg == -1 then + t.vxreg, tailx = match(tailx, "^(%b())(.*)$") + if not t.vxreg then werror("bad variable register expression") end + end + + -- [reg+xreg*xsc...] + local xsc, tailsc = match(tailx, "^%*%s*([1248])%s*(.*)$") + if xsc then + t.xsc = map_xsc[xsc] + tailx = tailsc + end + + -- [...] or [...+-disp] or [...+-expr] + t.disp = dispexpr(tailx) + else + -- imm or opsize*imm + local imm = toint(expr, isqword) + if not imm and sub(expr, 1, 1) == "*" and t.opsize then + imm = toint(sub(expr, 2)) + if imm then + imm = imm * map_opsizenum[t.opsize] + t.opsize = nil + end + end + if imm then + if t.opsize then werror("bad operand size override") end + local m = "i" + if imm == 1 then m = m.."1" end + if imm >= 4294967168 and imm <= 4294967295 then imm = imm-4294967296 end + if imm >= -128 and imm <= 127 then m = m.."S" end + t.imm = imm + t.mode = m + break + end + + local tp + local reg, tailr = match(expr, "^([@%w_:]+)%s*(.*)$") + reg, t.reg, tp = rtexpr(reg) + if t.reg then + if t.reg == -1 then + t.vreg, tailr = match(tailr, "^(%b())(.*)$") + if not t.vreg then werror("bad variable register expression") end + end + -- reg + if tailr == "" then + if t.opsize then werror("bad operand size override") end + t.opsize = map_reg_opsize[reg] + if t.opsize == "f" then + t.mode = t.reg == 0 and "fF" or "f" + else + if reg == "@w4" or (x64 and reg == "@d4") then + wwarn("bad idea, try again with `"..(x64 and "rsp'" or "esp'")) + end + t.mode = t.reg == 0 and "rmR" or (reg == "@b1" and "rmC" or "rm") + end + t.needrex = map_reg_needrex[reg] + break + end + + -- type[idx], type[idx].field, type->field -> [reg+offset_expr] + if not tp then werror("bad operand `"..param.."'") end + t.mode = "xm" + t.disp = format(tp.ctypefmt, tailr) + else + t.mode, t.imm = immexpr(expr) + if sub(t.mode, -1) == "J" then + if t.opsize and t.opsize ~= addrsize then + werror("bad operand size override") + end + t.opsize = addrsize + end + end + end + until true + return t +end + +------------------------------------------------------------------------------ +-- x86 Template String Description +-- =============================== +-- +-- Each template string is a list of [match:]pattern pairs, +-- separated by "|". The first match wins. No match means a +-- bad or unsupported combination of operand modes or sizes. +-- +-- The match part and the ":" is omitted if the operation has +-- no operands. Otherwise the first N characters are matched +-- against the mode strings of each of the N operands. +-- +-- The mode string for each operand type is (see parseoperand()): +-- Integer register: "rm", +"R" for eax, ax, al, +"C" for cl +-- FP register: "f", +"F" for st0 +-- Index operand: "xm", +"O" for [disp] (pure offset) +-- Immediate: "i", +"S" for signed 8 bit, +"1" for 1, +-- +"I" for arg, +"P" for pointer +-- Any: +"J" for valid jump targets +-- +-- So a match character "m" (mixed) matches both an integer register +-- and an index operand (to be encoded with the ModRM/SIB scheme). +-- But "r" matches only a register and "x" only an index operand +-- (e.g. for FP memory access operations). +-- +-- The operand size match string starts right after the mode match +-- characters and ends before the ":". "dwb" or "qdwb" is assumed, if empty. +-- The effective data size of the operation is matched against this list. +-- +-- If only the regular "b", "w", "d", "q", "t" operand sizes are +-- present, then all operands must be the same size. Unspecified sizes +-- are ignored, but at least one operand must have a size or the pattern +-- won't match (use the "byte", "word", "dword", "qword", "tword" +-- operand size overrides. E.g.: mov dword [eax], 1). +-- +-- If the list has a "1" or "2" prefix, the operand size is taken +-- from the respective operand and any other operand sizes are ignored. +-- If the list contains only ".", all operand sizes are ignored. +-- If the list has a "/" prefix, the concatenated (mixed) operand sizes +-- are compared to the match. +-- +-- E.g. "rrdw" matches for either two dword registers or two word +-- registers. "Fx2dq" matches an st0 operand plus an index operand +-- pointing to a dword (float) or qword (double). +-- +-- Every character after the ":" is part of the pattern string: +-- Hex chars are accumulated to form the opcode (left to right). +-- "n" disables the standard opcode mods +-- (otherwise: -1 for "b", o16 prefix for "w", rex.w for "q") +-- "X" Force REX.W. +-- "r"/"R" adds the reg. number from the 1st/2nd operand to the opcode. +-- "m"/"M" generates ModRM/SIB from the 1st/2nd operand. +-- The spare 3 bits are either filled with the last hex digit or +-- the result from a previous "r"/"R". The opcode is restored. +-- "u" Use VEX encoding, vvvv unused. +-- "v"/"V" Use VEX encoding, vvvv from 1st/2nd operand (the operand is +-- removed from the list used by future characters). +-- "w" Use VEX encoding, vvvv from 3rd operand. +-- "L" Force VEX.L +-- +-- All of the following characters force a flush of the opcode: +-- "o"/"O" stores a pure 32 bit disp (offset) from the 1st/2nd operand. +-- "s" stores a 4 bit immediate from the last register operand, +-- followed by 4 zero bits. +-- "S" stores a signed 8 bit immediate from the last operand. +-- "U" stores an unsigned 8 bit immediate from the last operand. +-- "W" stores an unsigned 16 bit immediate from the last operand. +-- "i" stores an operand sized immediate from the last operand. +-- "I" dito, but generates an action code to optionally modify +-- the opcode (+2) for a signed 8 bit immediate. +-- "J" generates one of the REL action codes from the last operand. +-- +------------------------------------------------------------------------------ + +-- Template strings for x86 instructions. Ordered by first opcode byte. +-- Unimplemented opcodes (deliberate omissions) are marked with *. +local map_op = { + -- 00-05: add... + -- 06: *push es + -- 07: *pop es + -- 08-0D: or... + -- 0E: *push cs + -- 0F: two byte opcode prefix + -- 10-15: adc... + -- 16: *push ss + -- 17: *pop ss + -- 18-1D: sbb... + -- 1E: *push ds + -- 1F: *pop ds + -- 20-25: and... + es_0 = "26", + -- 27: *daa + -- 28-2D: sub... + cs_0 = "2E", + -- 2F: *das + -- 30-35: xor... + ss_0 = "36", + -- 37: *aaa + -- 38-3D: cmp... + ds_0 = "3E", + -- 3F: *aas + inc_1 = x64 and "m:FF0m" or "rdw:40r|m:FF0m", + dec_1 = x64 and "m:FF1m" or "rdw:48r|m:FF1m", + push_1 = (x64 and "rq:n50r|rw:50r|mq:nFF6m|mw:FF6m" or + "rdw:50r|mdw:FF6m").."|S.:6AS|ib:n6Ai|i.:68i", + pop_1 = x64 and "rq:n58r|rw:58r|mq:n8F0m|mw:8F0m" or "rdw:58r|mdw:8F0m", + -- 60: *pusha, *pushad, *pushaw + -- 61: *popa, *popad, *popaw + -- 62: *bound rdw,x + -- 63: x86: *arpl mw,rw + movsxd_2 = x64 and "rm/qd:63rM", + fs_0 = "64", + gs_0 = "65", + o16_0 = "66", + a16_0 = not x64 and "67" or nil, + a32_0 = x64 and "67", + -- 68: push idw + -- 69: imul rdw,mdw,idw + -- 6A: push ib + -- 6B: imul rdw,mdw,S + -- 6C: *insb + -- 6D: *insd, *insw + -- 6E: *outsb + -- 6F: *outsd, *outsw + -- 70-7F: jcc lb + -- 80: add... mb,i + -- 81: add... mdw,i + -- 82: *undefined + -- 83: add... mdw,S + test_2 = "mr:85Rm|rm:85rM|Ri:A9ri|mi:F70mi", + -- 86: xchg rb,mb + -- 87: xchg rdw,mdw + -- 88: mov mb,r + -- 89: mov mdw,r + -- 8A: mov r,mb + -- 8B: mov r,mdw + -- 8C: *mov mdw,seg + lea_2 = "rx1dq:8DrM", + -- 8E: *mov seg,mdw + -- 8F: pop mdw + nop_0 = "90", + xchg_2 = "Rrqdw:90R|rRqdw:90r|rm:87rM|mr:87Rm", + cbw_0 = "6698", + cwde_0 = "98", + cdqe_0 = "4898", + cwd_0 = "6699", + cdq_0 = "99", + cqo_0 = "4899", + -- 9A: *call iw:idw + wait_0 = "9B", + fwait_0 = "9B", + pushf_0 = "9C", + pushfd_0 = not x64 and "9C", + pushfq_0 = x64 and "9C", + popf_0 = "9D", + popfd_0 = not x64 and "9D", + popfq_0 = x64 and "9D", + sahf_0 = "9E", + lahf_0 = "9F", + mov_2 = "OR:A3o|RO:A1O|mr:89Rm|rm:8BrM|rib:nB0ri|ridw:B8ri|mi:C70mi", + movsb_0 = "A4", + movsw_0 = "66A5", + movsd_0 = "A5", + cmpsb_0 = "A6", + cmpsw_0 = "66A7", + cmpsd_0 = "A7", + -- A8: test Rb,i + -- A9: test Rdw,i + stosb_0 = "AA", + stosw_0 = "66AB", + stosd_0 = "AB", + lodsb_0 = "AC", + lodsw_0 = "66AD", + lodsd_0 = "AD", + scasb_0 = "AE", + scasw_0 = "66AF", + scasd_0 = "AF", + -- B0-B7: mov rb,i + -- B8-BF: mov rdw,i + -- C0: rol... mb,i + -- C1: rol... mdw,i + ret_1 = "i.:nC2W", + ret_0 = "C3", + -- C4: *les rdw,mq + -- C5: *lds rdw,mq + -- C6: mov mb,i + -- C7: mov mdw,i + -- C8: *enter iw,ib + leave_0 = "C9", + -- CA: *retf iw + -- CB: *retf + int3_0 = "CC", + int_1 = "i.:nCDU", + into_0 = "CE", + -- CF: *iret + -- D0: rol... mb,1 + -- D1: rol... mdw,1 + -- D2: rol... mb,cl + -- D3: rol... mb,cl + -- D4: *aam ib + -- D5: *aad ib + -- D6: *salc + -- D7: *xlat + -- D8-DF: floating point ops + -- E0: *loopne + -- E1: *loope + -- E2: *loop + -- E3: *jcxz, *jecxz + -- E4: *in Rb,ib + -- E5: *in Rdw,ib + -- E6: *out ib,Rb + -- E7: *out ib,Rdw + call_1 = x64 and "mq:nFF2m|J.:E8nJ" or "md:FF2m|J.:E8J", + jmp_1 = x64 and "mq:nFF4m|J.:E9nJ" or "md:FF4m|J.:E9J", -- short: EB + -- EA: *jmp iw:idw + -- EB: jmp ib + -- EC: *in Rb,dx + -- ED: *in Rdw,dx + -- EE: *out dx,Rb + -- EF: *out dx,Rdw + lock_0 = "F0", + int1_0 = "F1", + repne_0 = "F2", + repnz_0 = "F2", + rep_0 = "F3", + repe_0 = "F3", + repz_0 = "F3", + endbr32_0 = "F30F1EFB", + endbr64_0 = "F30F1EFA", + -- F4: *hlt + cmc_0 = "F5", + -- F6: test... mb,i; div... mb + -- F7: test... mdw,i; div... mdw + clc_0 = "F8", + stc_0 = "F9", + -- FA: *cli + cld_0 = "FC", + std_0 = "FD", + -- FE: inc... mb + -- FF: inc... mdw + + -- misc ops + not_1 = "m:F72m", + neg_1 = "m:F73m", + mul_1 = "m:F74m", + imul_1 = "m:F75m", + div_1 = "m:F76m", + idiv_1 = "m:F77m", + + imul_2 = "rmqdw:0FAFrM|rIqdw:69rmI|rSqdw:6BrmS|riqdw:69rmi", + imul_3 = "rmIqdw:69rMI|rmSqdw:6BrMS|rmiqdw:69rMi", + + movzx_2 = "rm/db:0FB6rM|rm/qb:|rm/wb:0FB6rM|rm/dw:0FB7rM|rm/qw:", + movsx_2 = "rm/db:0FBErM|rm/qb:|rm/wb:0FBErM|rm/dw:0FBFrM|rm/qw:", + + bswap_1 = "rqd:0FC8r", + bsf_2 = "rmqdw:0FBCrM", + bsr_2 = "rmqdw:0FBDrM", + bt_2 = "mrqdw:0FA3Rm|miqdw:0FBA4mU", + btc_2 = "mrqdw:0FBBRm|miqdw:0FBA7mU", + btr_2 = "mrqdw:0FB3Rm|miqdw:0FBA6mU", + bts_2 = "mrqdw:0FABRm|miqdw:0FBA5mU", + + shld_3 = "mriqdw:0FA4RmU|mrC/qq:0FA5Rm|mrC/dd:|mrC/ww:", + shrd_3 = "mriqdw:0FACRmU|mrC/qq:0FADRm|mrC/dd:|mrC/ww:", + + rdtsc_0 = "0F31", -- P1+ + rdpmc_0 = "0F33", -- P6+ + cpuid_0 = "0FA2", -- P1+ + + -- floating point ops + fst_1 = "ff:DDD0r|xd:D92m|xq:nDD2m", + fstp_1 = "ff:DDD8r|xd:D93m|xq:nDD3m|xt:DB7m", + fld_1 = "ff:D9C0r|xd:D90m|xq:nDD0m|xt:DB5m", + + fpop_0 = "DDD8", -- Alias for fstp st0. + + fist_1 = "xw:nDF2m|xd:DB2m", + fistp_1 = "xw:nDF3m|xd:DB3m|xq:nDF7m", + fild_1 = "xw:nDF0m|xd:DB0m|xq:nDF5m", + + fxch_0 = "D9C9", + fxch_1 = "ff:D9C8r", + fxch_2 = "fFf:D9C8r|Fff:D9C8R", + + fucom_1 = "ff:DDE0r", + fucom_2 = "Fff:DDE0R", + fucomp_1 = "ff:DDE8r", + fucomp_2 = "Fff:DDE8R", + fucomi_1 = "ff:DBE8r", -- P6+ + fucomi_2 = "Fff:DBE8R", -- P6+ + fucomip_1 = "ff:DFE8r", -- P6+ + fucomip_2 = "Fff:DFE8R", -- P6+ + fcomi_1 = "ff:DBF0r", -- P6+ + fcomi_2 = "Fff:DBF0R", -- P6+ + fcomip_1 = "ff:DFF0r", -- P6+ + fcomip_2 = "Fff:DFF0R", -- P6+ + fucompp_0 = "DAE9", + fcompp_0 = "DED9", + + fldenv_1 = "x.:D94m", + fnstenv_1 = "x.:D96m", + fstenv_1 = "x.:9BD96m", + fldcw_1 = "xw:nD95m", + fstcw_1 = "xw:n9BD97m", + fnstcw_1 = "xw:nD97m", + fstsw_1 = "Rw:n9BDFE0|xw:n9BDD7m", + fnstsw_1 = "Rw:nDFE0|xw:nDD7m", + fclex_0 = "9BDBE2", + fnclex_0 = "DBE2", + + fnop_0 = "D9D0", + -- D9D1-D9DF: unassigned + + fchs_0 = "D9E0", + fabs_0 = "D9E1", + -- D9E2: unassigned + -- D9E3: unassigned + ftst_0 = "D9E4", + fxam_0 = "D9E5", + -- D9E6: unassigned + -- D9E7: unassigned + fld1_0 = "D9E8", + fldl2t_0 = "D9E9", + fldl2e_0 = "D9EA", + fldpi_0 = "D9EB", + fldlg2_0 = "D9EC", + fldln2_0 = "D9ED", + fldz_0 = "D9EE", + -- D9EF: unassigned + + f2xm1_0 = "D9F0", + fyl2x_0 = "D9F1", + fptan_0 = "D9F2", + fpatan_0 = "D9F3", + fxtract_0 = "D9F4", + fprem1_0 = "D9F5", + fdecstp_0 = "D9F6", + fincstp_0 = "D9F7", + fprem_0 = "D9F8", + fyl2xp1_0 = "D9F9", + fsqrt_0 = "D9FA", + fsincos_0 = "D9FB", + frndint_0 = "D9FC", + fscale_0 = "D9FD", + fsin_0 = "D9FE", + fcos_0 = "D9FF", + + -- SSE, SSE2 + andnpd_2 = "rmo:660F55rM", + andnps_2 = "rmo:0F55rM", + andpd_2 = "rmo:660F54rM", + andps_2 = "rmo:0F54rM", + clflush_1 = "x.:0FAE7m", + cmppd_3 = "rmio:660FC2rMU", + cmpps_3 = "rmio:0FC2rMU", + cmpsd_3 = "rrio:F20FC2rMU|rxi/oq:", + cmpss_3 = "rrio:F30FC2rMU|rxi/od:", + comisd_2 = "rro:660F2FrM|rx/oq:", + comiss_2 = "rro:0F2FrM|rx/od:", + cvtdq2pd_2 = "rro:F30FE6rM|rx/oq:", + cvtdq2ps_2 = "rmo:0F5BrM", + cvtpd2dq_2 = "rmo:F20FE6rM", + cvtpd2ps_2 = "rmo:660F5ArM", + cvtpi2pd_2 = "rx/oq:660F2ArM", + cvtpi2ps_2 = "rx/oq:0F2ArM", + cvtps2dq_2 = "rmo:660F5BrM", + cvtps2pd_2 = "rro:0F5ArM|rx/oq:", + cvtsd2si_2 = "rr/do:F20F2DrM|rr/qo:|rx/dq:|rxq:", + cvtsd2ss_2 = "rro:F20F5ArM|rx/oq:", + cvtsi2sd_2 = "rm/od:F20F2ArM|rm/oq:F20F2ArXM", + cvtsi2ss_2 = "rm/od:F30F2ArM|rm/oq:F30F2ArXM", + cvtss2sd_2 = "rro:F30F5ArM|rx/od:", + cvtss2si_2 = "rr/do:F30F2DrM|rr/qo:|rxd:|rx/qd:", + cvttpd2dq_2 = "rmo:660FE6rM", + cvttps2dq_2 = "rmo:F30F5BrM", + cvttsd2si_2 = "rr/do:F20F2CrM|rr/qo:|rx/dq:|rxq:", + cvttss2si_2 = "rr/do:F30F2CrM|rr/qo:|rxd:|rx/qd:", + fxsave_1 = "x.:0FAE0m", + fxrstor_1 = "x.:0FAE1m", + ldmxcsr_1 = "xd:0FAE2m", + lfence_0 = "0FAEE8", + maskmovdqu_2 = "rro:660FF7rM", + mfence_0 = "0FAEF0", + movapd_2 = "rmo:660F28rM|mro:660F29Rm", + movaps_2 = "rmo:0F28rM|mro:0F29Rm", + movd_2 = "rm/od:660F6ErM|rm/oq:660F6ErXM|mr/do:660F7ERm|mr/qo:", + movdqa_2 = "rmo:660F6FrM|mro:660F7FRm", + movdqu_2 = "rmo:F30F6FrM|mro:F30F7FRm", + movhlps_2 = "rro:0F12rM", + movhpd_2 = "rx/oq:660F16rM|xr/qo:n660F17Rm", + movhps_2 = "rx/oq:0F16rM|xr/qo:n0F17Rm", + movlhps_2 = "rro:0F16rM", + movlpd_2 = "rx/oq:660F12rM|xr/qo:n660F13Rm", + movlps_2 = "rx/oq:0F12rM|xr/qo:n0F13Rm", + movmskpd_2 = "rr/do:660F50rM", + movmskps_2 = "rr/do:0F50rM", + movntdq_2 = "xro:660FE7Rm", + movnti_2 = "xrqd:0FC3Rm", + movntpd_2 = "xro:660F2BRm", + movntps_2 = "xro:0F2BRm", + movq_2 = "rro:F30F7ErM|rx/oq:|xr/qo:n660FD6Rm", + movsd_2 = "rro:F20F10rM|rx/oq:|xr/qo:nF20F11Rm", + movss_2 = "rro:F30F10rM|rx/od:|xr/do:F30F11Rm", + movupd_2 = "rmo:660F10rM|mro:660F11Rm", + movups_2 = "rmo:0F10rM|mro:0F11Rm", + orpd_2 = "rmo:660F56rM", + orps_2 = "rmo:0F56rM", + pause_0 = "F390", + pextrw_3 = "rri/do:660FC5rMU|xri/wo:660F3A15nRmU", -- Mem op: SSE4.1 only. + pinsrw_3 = "rri/od:660FC4rMU|rxi/ow:", + pmovmskb_2 = "rr/do:660FD7rM", + prefetchnta_1 = "xb:n0F180m", + prefetcht0_1 = "xb:n0F181m", + prefetcht1_1 = "xb:n0F182m", + prefetcht2_1 = "xb:n0F183m", + pshufd_3 = "rmio:660F70rMU", + pshufhw_3 = "rmio:F30F70rMU", + pshuflw_3 = "rmio:F20F70rMU", + pslld_2 = "rmo:660FF2rM|rio:660F726mU", + pslldq_2 = "rio:660F737mU", + psllq_2 = "rmo:660FF3rM|rio:660F736mU", + psllw_2 = "rmo:660FF1rM|rio:660F716mU", + psrad_2 = "rmo:660FE2rM|rio:660F724mU", + psraw_2 = "rmo:660FE1rM|rio:660F714mU", + psrld_2 = "rmo:660FD2rM|rio:660F722mU", + psrldq_2 = "rio:660F733mU", + psrlq_2 = "rmo:660FD3rM|rio:660F732mU", + psrlw_2 = "rmo:660FD1rM|rio:660F712mU", + rcpps_2 = "rmo:0F53rM", + rcpss_2 = "rro:F30F53rM|rx/od:", + rsqrtps_2 = "rmo:0F52rM", + rsqrtss_2 = "rmo:F30F52rM", + sfence_0 = "0FAEF8", + shufpd_3 = "rmio:660FC6rMU", + shufps_3 = "rmio:0FC6rMU", + stmxcsr_1 = "xd:0FAE3m", + ucomisd_2 = "rro:660F2ErM|rx/oq:", + ucomiss_2 = "rro:0F2ErM|rx/od:", + unpckhpd_2 = "rmo:660F15rM", + unpckhps_2 = "rmo:0F15rM", + unpcklpd_2 = "rmo:660F14rM", + unpcklps_2 = "rmo:0F14rM", + xorpd_2 = "rmo:660F57rM", + xorps_2 = "rmo:0F57rM", + + -- SSE3 ops + fisttp_1 = "xw:nDF1m|xd:DB1m|xq:nDD1m", + addsubpd_2 = "rmo:660FD0rM", + addsubps_2 = "rmo:F20FD0rM", + haddpd_2 = "rmo:660F7CrM", + haddps_2 = "rmo:F20F7CrM", + hsubpd_2 = "rmo:660F7DrM", + hsubps_2 = "rmo:F20F7DrM", + lddqu_2 = "rxo:F20FF0rM", + movddup_2 = "rmo:F20F12rM", + movshdup_2 = "rmo:F30F16rM", + movsldup_2 = "rmo:F30F12rM", + + -- SSSE3 ops + pabsb_2 = "rmo:660F381CrM", + pabsd_2 = "rmo:660F381ErM", + pabsw_2 = "rmo:660F381DrM", + palignr_3 = "rmio:660F3A0FrMU", + phaddd_2 = "rmo:660F3802rM", + phaddsw_2 = "rmo:660F3803rM", + phaddw_2 = "rmo:660F3801rM", + phsubd_2 = "rmo:660F3806rM", + phsubsw_2 = "rmo:660F3807rM", + phsubw_2 = "rmo:660F3805rM", + pmaddubsw_2 = "rmo:660F3804rM", + pmulhrsw_2 = "rmo:660F380BrM", + pshufb_2 = "rmo:660F3800rM", + psignb_2 = "rmo:660F3808rM", + psignd_2 = "rmo:660F380ArM", + psignw_2 = "rmo:660F3809rM", + + -- SSE4.1 ops + blendpd_3 = "rmio:660F3A0DrMU", + blendps_3 = "rmio:660F3A0CrMU", + blendvpd_3 = "rmRo:660F3815rM", + blendvps_3 = "rmRo:660F3814rM", + dppd_3 = "rmio:660F3A41rMU", + dpps_3 = "rmio:660F3A40rMU", + extractps_3 = "mri/do:660F3A17RmU|rri/qo:660F3A17RXmU", + insertps_3 = "rrio:660F3A41rMU|rxi/od:", + movntdqa_2 = "rxo:660F382ArM", + mpsadbw_3 = "rmio:660F3A42rMU", + packusdw_2 = "rmo:660F382BrM", + pblendvb_3 = "rmRo:660F3810rM", + pblendw_3 = "rmio:660F3A0ErMU", + pcmpeqq_2 = "rmo:660F3829rM", + pextrb_3 = "rri/do:660F3A14nRmU|rri/qo:|xri/bo:", + pextrd_3 = "mri/do:660F3A16RmU", + pextrq_3 = "mri/qo:660F3A16RmU", + -- pextrw is SSE2, mem operand is SSE4.1 only + phminposuw_2 = "rmo:660F3841rM", + pinsrb_3 = "rri/od:660F3A20nrMU|rxi/ob:", + pinsrd_3 = "rmi/od:660F3A22rMU", + pinsrq_3 = "rmi/oq:660F3A22rXMU", + pmaxsb_2 = "rmo:660F383CrM", + pmaxsd_2 = "rmo:660F383DrM", + pmaxud_2 = "rmo:660F383FrM", + pmaxuw_2 = "rmo:660F383ErM", + pminsb_2 = "rmo:660F3838rM", + pminsd_2 = "rmo:660F3839rM", + pminud_2 = "rmo:660F383BrM", + pminuw_2 = "rmo:660F383ArM", + pmovsxbd_2 = "rro:660F3821rM|rx/od:", + pmovsxbq_2 = "rro:660F3822rM|rx/ow:", + pmovsxbw_2 = "rro:660F3820rM|rx/oq:", + pmovsxdq_2 = "rro:660F3825rM|rx/oq:", + pmovsxwd_2 = "rro:660F3823rM|rx/oq:", + pmovsxwq_2 = "rro:660F3824rM|rx/od:", + pmovzxbd_2 = "rro:660F3831rM|rx/od:", + pmovzxbq_2 = "rro:660F3832rM|rx/ow:", + pmovzxbw_2 = "rro:660F3830rM|rx/oq:", + pmovzxdq_2 = "rro:660F3835rM|rx/oq:", + pmovzxwd_2 = "rro:660F3833rM|rx/oq:", + pmovzxwq_2 = "rro:660F3834rM|rx/od:", + pmuldq_2 = "rmo:660F3828rM", + pmulld_2 = "rmo:660F3840rM", + ptest_2 = "rmo:660F3817rM", + roundpd_3 = "rmio:660F3A09rMU", + roundps_3 = "rmio:660F3A08rMU", + roundsd_3 = "rrio:660F3A0BrMU|rxi/oq:", + roundss_3 = "rrio:660F3A0ArMU|rxi/od:", + + -- SSE4.2 ops + crc32_2 = "rmqd:F20F38F1rM|rm/dw:66F20F38F1rM|rm/db:F20F38F0rM|rm/qb:", + pcmpestri_3 = "rmio:660F3A61rMU", + pcmpestrm_3 = "rmio:660F3A60rMU", + pcmpgtq_2 = "rmo:660F3837rM", + pcmpistri_3 = "rmio:660F3A63rMU", + pcmpistrm_3 = "rmio:660F3A62rMU", + popcnt_2 = "rmqdw:F30FB8rM", + + -- SSE4a + extrq_2 = "rro:660F79rM", + extrq_3 = "riio:660F780mUU", + insertq_2 = "rro:F20F79rM", + insertq_4 = "rriio:F20F78rMUU", + lzcnt_2 = "rmqdw:F30FBDrM", + movntsd_2 = "xr/qo:nF20F2BRm", + movntss_2 = "xr/do:F30F2BRm", + -- popcnt is also in SSE4.2 + + -- AES-NI + aesdec_2 = "rmo:660F38DErM", + aesdeclast_2 = "rmo:660F38DFrM", + aesenc_2 = "rmo:660F38DCrM", + aesenclast_2 = "rmo:660F38DDrM", + aesimc_2 = "rmo:660F38DBrM", + aeskeygenassist_3 = "rmio:660F3ADFrMU", + pclmulqdq_3 = "rmio:660F3A44rMU", + + -- AVX FP ops + vaddsubpd_3 = "rrmoy:660FVD0rM", + vaddsubps_3 = "rrmoy:F20FVD0rM", + vandpd_3 = "rrmoy:660FV54rM", + vandps_3 = "rrmoy:0FV54rM", + vandnpd_3 = "rrmoy:660FV55rM", + vandnps_3 = "rrmoy:0FV55rM", + vblendpd_4 = "rrmioy:660F3AV0DrMU", + vblendps_4 = "rrmioy:660F3AV0CrMU", + vblendvpd_4 = "rrmroy:660F3AV4BrMs", + vblendvps_4 = "rrmroy:660F3AV4ArMs", + vbroadcastf128_2 = "rx/yo:660F38u1ArM", + vcmppd_4 = "rrmioy:660FVC2rMU", + vcmpps_4 = "rrmioy:0FVC2rMU", + vcmpsd_4 = "rrrio:F20FVC2rMU|rrxi/ooq:", + vcmpss_4 = "rrrio:F30FVC2rMU|rrxi/ood:", + vcomisd_2 = "rro:660Fu2FrM|rx/oq:", + vcomiss_2 = "rro:0Fu2FrM|rx/od:", + vcvtdq2pd_2 = "rro:F30FuE6rM|rx/oq:|rm/yo:", + vcvtdq2ps_2 = "rmoy:0Fu5BrM", + vcvtpd2dq_2 = "rmoy:F20FuE6rM", + vcvtpd2ps_2 = "rmoy:660Fu5ArM", + vcvtps2dq_2 = "rmoy:660Fu5BrM", + vcvtps2pd_2 = "rro:0Fu5ArM|rx/oq:|rm/yo:", + vcvtsd2si_2 = "rr/do:F20Fu2DrM|rx/dq:|rr/qo:|rxq:", + vcvtsd2ss_3 = "rrro:F20FV5ArM|rrx/ooq:", + vcvtsi2sd_3 = "rrm/ood:F20FV2ArM|rrm/ooq:F20FVX2ArM", + vcvtsi2ss_3 = "rrm/ood:F30FV2ArM|rrm/ooq:F30FVX2ArM", + vcvtss2sd_3 = "rrro:F30FV5ArM|rrx/ood:", + vcvtss2si_2 = "rr/do:F30Fu2DrM|rxd:|rr/qo:|rx/qd:", + vcvttpd2dq_2 = "rmo:660FuE6rM|rm/oy:660FuLE6rM", + vcvttps2dq_2 = "rmoy:F30Fu5BrM", + vcvttsd2si_2 = "rr/do:F20Fu2CrM|rx/dq:|rr/qo:|rxq:", + vcvttss2si_2 = "rr/do:F30Fu2CrM|rxd:|rr/qo:|rx/qd:", + vdppd_4 = "rrmio:660F3AV41rMU", + vdpps_4 = "rrmioy:660F3AV40rMU", + vextractf128_3 = "mri/oy:660F3AuL19RmU", + vextractps_3 = "mri/do:660F3Au17RmU", + vhaddpd_3 = "rrmoy:660FV7CrM", + vhaddps_3 = "rrmoy:F20FV7CrM", + vhsubpd_3 = "rrmoy:660FV7DrM", + vhsubps_3 = "rrmoy:F20FV7DrM", + vinsertf128_4 = "rrmi/yyo:660F3AV18rMU", + vinsertps_4 = "rrrio:660F3AV21rMU|rrxi/ood:", + vldmxcsr_1 = "xd:0FuAE2m", + vmaskmovps_3 = "rrxoy:660F38V2CrM|xrroy:660F38V2ERm", + vmaskmovpd_3 = "rrxoy:660F38V2DrM|xrroy:660F38V2FRm", + vmovapd_2 = "rmoy:660Fu28rM|mroy:660Fu29Rm", + vmovaps_2 = "rmoy:0Fu28rM|mroy:0Fu29Rm", + vmovd_2 = "rm/od:660Fu6ErM|rm/oq:660FuX6ErM|mr/do:660Fu7ERm|mr/qo:", + vmovq_2 = "rro:F30Fu7ErM|rx/oq:|xr/qo:660FuD6Rm", + vmovddup_2 = "rmy:F20Fu12rM|rro:|rx/oq:", + vmovhlps_3 = "rrro:0FV12rM", + vmovhpd_2 = "xr/qo:660Fu17Rm", + vmovhpd_3 = "rrx/ooq:660FV16rM", + vmovhps_2 = "xr/qo:0Fu17Rm", + vmovhps_3 = "rrx/ooq:0FV16rM", + vmovlhps_3 = "rrro:0FV16rM", + vmovlpd_2 = "xr/qo:660Fu13Rm", + vmovlpd_3 = "rrx/ooq:660FV12rM", + vmovlps_2 = "xr/qo:0Fu13Rm", + vmovlps_3 = "rrx/ooq:0FV12rM", + vmovmskpd_2 = "rr/do:660Fu50rM|rr/dy:660FuL50rM", + vmovmskps_2 = "rr/do:0Fu50rM|rr/dy:0FuL50rM", + vmovntpd_2 = "xroy:660Fu2BRm", + vmovntps_2 = "xroy:0Fu2BRm", + vmovsd_2 = "rx/oq:F20Fu10rM|xr/qo:F20Fu11Rm", + vmovsd_3 = "rrro:F20FV10rM", + vmovshdup_2 = "rmoy:F30Fu16rM", + vmovsldup_2 = "rmoy:F30Fu12rM", + vmovss_2 = "rx/od:F30Fu10rM|xr/do:F30Fu11Rm", + vmovss_3 = "rrro:F30FV10rM", + vmovupd_2 = "rmoy:660Fu10rM|mroy:660Fu11Rm", + vmovups_2 = "rmoy:0Fu10rM|mroy:0Fu11Rm", + vorpd_3 = "rrmoy:660FV56rM", + vorps_3 = "rrmoy:0FV56rM", + vpermilpd_3 = "rrmoy:660F38V0DrM|rmioy:660F3Au05rMU", + vpermilps_3 = "rrmoy:660F38V0CrM|rmioy:660F3Au04rMU", + vperm2f128_4 = "rrmiy:660F3AV06rMU", + vptestpd_2 = "rmoy:660F38u0FrM", + vptestps_2 = "rmoy:660F38u0ErM", + vrcpps_2 = "rmoy:0Fu53rM", + vrcpss_3 = "rrro:F30FV53rM|rrx/ood:", + vrsqrtps_2 = "rmoy:0Fu52rM", + vrsqrtss_3 = "rrro:F30FV52rM|rrx/ood:", + vroundpd_3 = "rmioy:660F3Au09rMU", + vroundps_3 = "rmioy:660F3Au08rMU", + vroundsd_4 = "rrrio:660F3AV0BrMU|rrxi/ooq:", + vroundss_4 = "rrrio:660F3AV0ArMU|rrxi/ood:", + vshufpd_4 = "rrmioy:660FVC6rMU", + vshufps_4 = "rrmioy:0FVC6rMU", + vsqrtps_2 = "rmoy:0Fu51rM", + vsqrtss_2 = "rro:F30Fu51rM|rx/od:", + vsqrtpd_2 = "rmoy:660Fu51rM", + vsqrtsd_2 = "rro:F20Fu51rM|rx/oq:", + vstmxcsr_1 = "xd:0FuAE3m", + vucomisd_2 = "rro:660Fu2ErM|rx/oq:", + vucomiss_2 = "rro:0Fu2ErM|rx/od:", + vunpckhpd_3 = "rrmoy:660FV15rM", + vunpckhps_3 = "rrmoy:0FV15rM", + vunpcklpd_3 = "rrmoy:660FV14rM", + vunpcklps_3 = "rrmoy:0FV14rM", + vxorpd_3 = "rrmoy:660FV57rM", + vxorps_3 = "rrmoy:0FV57rM", + vzeroall_0 = "0FuL77", + vzeroupper_0 = "0Fu77", + + -- AVX2 FP ops + vbroadcastss_2 = "rx/od:660F38u18rM|rx/yd:|rro:|rr/yo:", + vbroadcastsd_2 = "rx/yq:660F38u19rM|rr/yo:", + -- *vgather* (!vsib) + vpermpd_3 = "rmiy:660F3AuX01rMU", + vpermps_3 = "rrmy:660F38V16rM", + + -- AVX, AVX2 integer ops + -- In general, xmm requires AVX, ymm requires AVX2. + vaesdec_3 = "rrmo:660F38VDErM", + vaesdeclast_3 = "rrmo:660F38VDFrM", + vaesenc_3 = "rrmo:660F38VDCrM", + vaesenclast_3 = "rrmo:660F38VDDrM", + vaesimc_2 = "rmo:660F38uDBrM", + vaeskeygenassist_3 = "rmio:660F3AuDFrMU", + vlddqu_2 = "rxoy:F20FuF0rM", + vmaskmovdqu_2 = "rro:660FuF7rM", + vmovdqa_2 = "rmoy:660Fu6FrM|mroy:660Fu7FRm", + vmovdqu_2 = "rmoy:F30Fu6FrM|mroy:F30Fu7FRm", + vmovntdq_2 = "xroy:660FuE7Rm", + vmovntdqa_2 = "rxoy:660F38u2ArM", + vmpsadbw_4 = "rrmioy:660F3AV42rMU", + vpabsb_2 = "rmoy:660F38u1CrM", + vpabsd_2 = "rmoy:660F38u1ErM", + vpabsw_2 = "rmoy:660F38u1DrM", + vpackusdw_3 = "rrmoy:660F38V2BrM", + vpalignr_4 = "rrmioy:660F3AV0FrMU", + vpblendvb_4 = "rrmroy:660F3AV4CrMs", + vpblendw_4 = "rrmioy:660F3AV0ErMU", + vpclmulqdq_4 = "rrmio:660F3AV44rMU", + vpcmpeqq_3 = "rrmoy:660F38V29rM", + vpcmpestri_3 = "rmio:660F3Au61rMU", + vpcmpestrm_3 = "rmio:660F3Au60rMU", + vpcmpgtq_3 = "rrmoy:660F38V37rM", + vpcmpistri_3 = "rmio:660F3Au63rMU", + vpcmpistrm_3 = "rmio:660F3Au62rMU", + vpextrb_3 = "rri/do:660F3Au14nRmU|rri/qo:|xri/bo:", + vpextrw_3 = "rri/do:660FuC5rMU|xri/wo:660F3Au15nRmU", + vpextrd_3 = "mri/do:660F3Au16RmU", + vpextrq_3 = "mri/qo:660F3Au16RmU", + vphaddw_3 = "rrmoy:660F38V01rM", + vphaddd_3 = "rrmoy:660F38V02rM", + vphaddsw_3 = "rrmoy:660F38V03rM", + vphminposuw_2 = "rmo:660F38u41rM", + vphsubw_3 = "rrmoy:660F38V05rM", + vphsubd_3 = "rrmoy:660F38V06rM", + vphsubsw_3 = "rrmoy:660F38V07rM", + vpinsrb_4 = "rrri/ood:660F3AV20rMU|rrxi/oob:", + vpinsrw_4 = "rrri/ood:660FVC4rMU|rrxi/oow:", + vpinsrd_4 = "rrmi/ood:660F3AV22rMU", + vpinsrq_4 = "rrmi/ooq:660F3AVX22rMU", + vpmaddubsw_3 = "rrmoy:660F38V04rM", + vpmaxsb_3 = "rrmoy:660F38V3CrM", + vpmaxsd_3 = "rrmoy:660F38V3DrM", + vpmaxuw_3 = "rrmoy:660F38V3ErM", + vpmaxud_3 = "rrmoy:660F38V3FrM", + vpminsb_3 = "rrmoy:660F38V38rM", + vpminsd_3 = "rrmoy:660F38V39rM", + vpminuw_3 = "rrmoy:660F38V3ArM", + vpminud_3 = "rrmoy:660F38V3BrM", + vpmovmskb_2 = "rr/do:660FuD7rM|rr/dy:660FuLD7rM", + vpmovsxbw_2 = "rroy:660F38u20rM|rx/oq:|rx/yo:", + vpmovsxbd_2 = "rroy:660F38u21rM|rx/od:|rx/yq:", + vpmovsxbq_2 = "rroy:660F38u22rM|rx/ow:|rx/yd:", + vpmovsxwd_2 = "rroy:660F38u23rM|rx/oq:|rx/yo:", + vpmovsxwq_2 = "rroy:660F38u24rM|rx/od:|rx/yq:", + vpmovsxdq_2 = "rroy:660F38u25rM|rx/oq:|rx/yo:", + vpmovzxbw_2 = "rroy:660F38u30rM|rx/oq:|rx/yo:", + vpmovzxbd_2 = "rroy:660F38u31rM|rx/od:|rx/yq:", + vpmovzxbq_2 = "rroy:660F38u32rM|rx/ow:|rx/yd:", + vpmovzxwd_2 = "rroy:660F38u33rM|rx/oq:|rx/yo:", + vpmovzxwq_2 = "rroy:660F38u34rM|rx/od:|rx/yq:", + vpmovzxdq_2 = "rroy:660F38u35rM|rx/oq:|rx/yo:", + vpmuldq_3 = "rrmoy:660F38V28rM", + vpmulhrsw_3 = "rrmoy:660F38V0BrM", + vpmulld_3 = "rrmoy:660F38V40rM", + vpshufb_3 = "rrmoy:660F38V00rM", + vpshufd_3 = "rmioy:660Fu70rMU", + vpshufhw_3 = "rmioy:F30Fu70rMU", + vpshuflw_3 = "rmioy:F20Fu70rMU", + vpsignb_3 = "rrmoy:660F38V08rM", + vpsignw_3 = "rrmoy:660F38V09rM", + vpsignd_3 = "rrmoy:660F38V0ArM", + vpslldq_3 = "rrioy:660Fv737mU", + vpsllw_3 = "rrmoy:660FVF1rM|rrioy:660Fv716mU", + vpslld_3 = "rrmoy:660FVF2rM|rrioy:660Fv726mU", + vpsllq_3 = "rrmoy:660FVF3rM|rrioy:660Fv736mU", + vpsraw_3 = "rrmoy:660FVE1rM|rrioy:660Fv714mU", + vpsrad_3 = "rrmoy:660FVE2rM|rrioy:660Fv724mU", + vpsrldq_3 = "rrioy:660Fv733mU", + vpsrlw_3 = "rrmoy:660FVD1rM|rrioy:660Fv712mU", + vpsrld_3 = "rrmoy:660FVD2rM|rrioy:660Fv722mU", + vpsrlq_3 = "rrmoy:660FVD3rM|rrioy:660Fv732mU", + vptest_2 = "rmoy:660F38u17rM", + + -- AVX2 integer ops + vbroadcasti128_2 = "rx/yo:660F38u5ArM", + vinserti128_4 = "rrmi/yyo:660F3AV38rMU", + vextracti128_3 = "mri/oy:660F3AuL39RmU", + vpblendd_4 = "rrmioy:660F3AV02rMU", + vpbroadcastb_2 = "rro:660F38u78rM|rx/ob:|rr/yo:|rx/yb:", + vpbroadcastw_2 = "rro:660F38u79rM|rx/ow:|rr/yo:|rx/yw:", + vpbroadcastd_2 = "rro:660F38u58rM|rx/od:|rr/yo:|rx/yd:", + vpbroadcastq_2 = "rro:660F38u59rM|rx/oq:|rr/yo:|rx/yq:", + vpermd_3 = "rrmy:660F38V36rM", + vpermq_3 = "rmiy:660F3AuX00rMU", + -- *vpgather* (!vsib) + vperm2i128_4 = "rrmiy:660F3AV46rMU", + vpmaskmovd_3 = "rrxoy:660F38V8CrM|xrroy:660F38V8ERm", + vpmaskmovq_3 = "rrxoy:660F38VX8CrM|xrroy:660F38VX8ERm", + vpsllvd_3 = "rrmoy:660F38V47rM", + vpsllvq_3 = "rrmoy:660F38VX47rM", + vpsravd_3 = "rrmoy:660F38V46rM", + vpsrlvd_3 = "rrmoy:660F38V45rM", + vpsrlvq_3 = "rrmoy:660F38VX45rM", + + -- Intel ADX + adcx_2 = "rmqd:660F38F6rM", + adox_2 = "rmqd:F30F38F6rM", + + -- BMI1 + andn_3 = "rrmqd:0F38VF2rM", + bextr_3 = "rmrqd:0F38wF7rM", + blsi_2 = "rmqd:0F38vF33m", + blsmsk_2 = "rmqd:0F38vF32m", + blsr_2 = "rmqd:0F38vF31m", + tzcnt_2 = "rmqdw:F30FBCrM", + + -- BMI2 + bzhi_3 = "rmrqd:0F38wF5rM", + mulx_3 = "rrmqd:F20F38VF6rM", + pdep_3 = "rrmqd:F20F38VF5rM", + pext_3 = "rrmqd:F30F38VF5rM", + rorx_3 = "rmSqd:F20F3AuF0rMS", + sarx_3 = "rmrqd:F30F38wF7rM", + shrx_3 = "rmrqd:F20F38wF7rM", + shlx_3 = "rmrqd:660F38wF7rM", + + -- FMA3 + vfmaddsub132pd_3 = "rrmoy:660F38VX96rM", + vfmaddsub132ps_3 = "rrmoy:660F38V96rM", + vfmaddsub213pd_3 = "rrmoy:660F38VXA6rM", + vfmaddsub213ps_3 = "rrmoy:660F38VA6rM", + vfmaddsub231pd_3 = "rrmoy:660F38VXB6rM", + vfmaddsub231ps_3 = "rrmoy:660F38VB6rM", + + vfmsubadd132pd_3 = "rrmoy:660F38VX97rM", + vfmsubadd132ps_3 = "rrmoy:660F38V97rM", + vfmsubadd213pd_3 = "rrmoy:660F38VXA7rM", + vfmsubadd213ps_3 = "rrmoy:660F38VA7rM", + vfmsubadd231pd_3 = "rrmoy:660F38VXB7rM", + vfmsubadd231ps_3 = "rrmoy:660F38VB7rM", + + vfmadd132pd_3 = "rrmoy:660F38VX98rM", + vfmadd132ps_3 = "rrmoy:660F38V98rM", + vfmadd132sd_3 = "rrro:660F38VX99rM|rrx/ooq:", + vfmadd132ss_3 = "rrro:660F38V99rM|rrx/ood:", + vfmadd213pd_3 = "rrmoy:660F38VXA8rM", + vfmadd213ps_3 = "rrmoy:660F38VA8rM", + vfmadd213sd_3 = "rrro:660F38VXA9rM|rrx/ooq:", + vfmadd213ss_3 = "rrro:660F38VA9rM|rrx/ood:", + vfmadd231pd_3 = "rrmoy:660F38VXB8rM", + vfmadd231ps_3 = "rrmoy:660F38VB8rM", + vfmadd231sd_3 = "rrro:660F38VXB9rM|rrx/ooq:", + vfmadd231ss_3 = "rrro:660F38VB9rM|rrx/ood:", + + vfmsub132pd_3 = "rrmoy:660F38VX9ArM", + vfmsub132ps_3 = "rrmoy:660F38V9ArM", + vfmsub132sd_3 = "rrro:660F38VX9BrM|rrx/ooq:", + vfmsub132ss_3 = "rrro:660F38V9BrM|rrx/ood:", + vfmsub213pd_3 = "rrmoy:660F38VXAArM", + vfmsub213ps_3 = "rrmoy:660F38VAArM", + vfmsub213sd_3 = "rrro:660F38VXABrM|rrx/ooq:", + vfmsub213ss_3 = "rrro:660F38VABrM|rrx/ood:", + vfmsub231pd_3 = "rrmoy:660F38VXBArM", + vfmsub231ps_3 = "rrmoy:660F38VBArM", + vfmsub231sd_3 = "rrro:660F38VXBBrM|rrx/ooq:", + vfmsub231ss_3 = "rrro:660F38VBBrM|rrx/ood:", + + vfnmadd132pd_3 = "rrmoy:660F38VX9CrM", + vfnmadd132ps_3 = "rrmoy:660F38V9CrM", + vfnmadd132sd_3 = "rrro:660F38VX9DrM|rrx/ooq:", + vfnmadd132ss_3 = "rrro:660F38V9DrM|rrx/ood:", + vfnmadd213pd_3 = "rrmoy:660F38VXACrM", + vfnmadd213ps_3 = "rrmoy:660F38VACrM", + vfnmadd213sd_3 = "rrro:660F38VXADrM|rrx/ooq:", + vfnmadd213ss_3 = "rrro:660F38VADrM|rrx/ood:", + vfnmadd231pd_3 = "rrmoy:660F38VXBCrM", + vfnmadd231ps_3 = "rrmoy:660F38VBCrM", + vfnmadd231sd_3 = "rrro:660F38VXBDrM|rrx/ooq:", + vfnmadd231ss_3 = "rrro:660F38VBDrM|rrx/ood:", + + vfnmsub132pd_3 = "rrmoy:660F38VX9ErM", + vfnmsub132ps_3 = "rrmoy:660F38V9ErM", + vfnmsub132sd_3 = "rrro:660F38VX9FrM|rrx/ooq:", + vfnmsub132ss_3 = "rrro:660F38V9FrM|rrx/ood:", + vfnmsub213pd_3 = "rrmoy:660F38VXAErM", + vfnmsub213ps_3 = "rrmoy:660F38VAErM", + vfnmsub213sd_3 = "rrro:660F38VXAFrM|rrx/ooq:", + vfnmsub213ss_3 = "rrro:660F38VAFrM|rrx/ood:", + vfnmsub231pd_3 = "rrmoy:660F38VXBErM", + vfnmsub231ps_3 = "rrmoy:660F38VBErM", + vfnmsub231sd_3 = "rrro:660F38VXBFrM|rrx/ooq:", + vfnmsub231ss_3 = "rrro:660F38VBFrM|rrx/ood:", +} + +------------------------------------------------------------------------------ + +-- Arithmetic ops. +for name,n in pairs{ add = 0, ["or"] = 1, adc = 2, sbb = 3, + ["and"] = 4, sub = 5, xor = 6, cmp = 7 } do + local n8 = shl(n, 3) + map_op[name.."_2"] = format( + "mr:%02XRm|rm:%02XrM|mI1qdw:81%XmI|mS1qdw:83%XmS|Ri1qdwb:%02Xri|mi1qdwb:81%Xmi", + 1+n8, 3+n8, n, n, 5+n8, n) +end + +-- Shift ops. +for name,n in pairs{ rol = 0, ror = 1, rcl = 2, rcr = 3, + shl = 4, shr = 5, sar = 7, sal = 4 } do + map_op[name.."_2"] = format("m1:D1%Xm|mC1qdwb:D3%Xm|mi:C1%XmU", n, n, n) +end + +-- Conditional ops. +for cc,n in pairs(map_cc) do + map_op["j"..cc.."_1"] = format("J.:n0F8%XJ", n) -- short: 7%X + map_op["set"..cc.."_1"] = format("mb:n0F9%X2m", n) + map_op["cmov"..cc.."_2"] = format("rmqdw:0F4%XrM", n) -- P6+ +end + +-- FP arithmetic ops. +for name,n in pairs{ add = 0, mul = 1, com = 2, comp = 3, + sub = 4, subr = 5, div = 6, divr = 7 } do + local nc = 0xc0 + shl(n, 3) + local nr = nc + (n < 4 and 0 or (n % 2 == 0 and 8 or -8)) + local fn = "f"..name + map_op[fn.."_1"] = format("ff:D8%02Xr|xd:D8%Xm|xq:nDC%Xm", nc, n, n) + if n == 2 or n == 3 then + map_op[fn.."_2"] = format("Fff:D8%02XR|Fx2d:D8%XM|Fx2q:nDC%XM", nc, n, n) + else + map_op[fn.."_2"] = format("Fff:D8%02XR|fFf:DC%02Xr|Fx2d:D8%XM|Fx2q:nDC%XM", nc, nr, n, n) + map_op[fn.."p_1"] = format("ff:DE%02Xr", nr) + map_op[fn.."p_2"] = format("fFf:DE%02Xr", nr) + end + map_op["fi"..name.."_1"] = format("xd:DA%Xm|xw:nDE%Xm", n, n) +end + +-- FP conditional moves. +for cc,n in pairs{ b=0, e=1, be=2, u=3, nb=4, ne=5, nbe=6, nu=7 } do + local nc = 0xdac0 + shl(band(n, 3), 3) + shl(band(n, 4), 6) + map_op["fcmov"..cc.."_1"] = format("ff:%04Xr", nc) -- P6+ + map_op["fcmov"..cc.."_2"] = format("Fff:%04XR", nc) -- P6+ +end + +-- SSE / AVX FP arithmetic ops. +for name,n in pairs{ sqrt = 1, add = 8, mul = 9, + sub = 12, min = 13, div = 14, max = 15 } do + map_op[name.."ps_2"] = format("rmo:0F5%XrM", n) + map_op[name.."ss_2"] = format("rro:F30F5%XrM|rx/od:", n) + map_op[name.."pd_2"] = format("rmo:660F5%XrM", n) + map_op[name.."sd_2"] = format("rro:F20F5%XrM|rx/oq:", n) + if n ~= 1 then + map_op["v"..name.."ps_3"] = format("rrmoy:0FV5%XrM", n) + map_op["v"..name.."ss_3"] = format("rrro:F30FV5%XrM|rrx/ood:", n) + map_op["v"..name.."pd_3"] = format("rrmoy:660FV5%XrM", n) + map_op["v"..name.."sd_3"] = format("rrro:F20FV5%XrM|rrx/ooq:", n) + end +end + +-- SSE2 / AVX / AVX2 integer arithmetic ops (66 0F leaf). +for name,n in pairs{ + paddb = 0xFC, paddw = 0xFD, paddd = 0xFE, paddq = 0xD4, + paddsb = 0xEC, paddsw = 0xED, packssdw = 0x6B, + packsswb = 0x63, packuswb = 0x67, paddusb = 0xDC, + paddusw = 0xDD, pand = 0xDB, pandn = 0xDF, pavgb = 0xE0, + pavgw = 0xE3, pcmpeqb = 0x74, pcmpeqd = 0x76, + pcmpeqw = 0x75, pcmpgtb = 0x64, pcmpgtd = 0x66, + pcmpgtw = 0x65, pmaddwd = 0xF5, pmaxsw = 0xEE, + pmaxub = 0xDE, pminsw = 0xEA, pminub = 0xDA, + pmulhuw = 0xE4, pmulhw = 0xE5, pmullw = 0xD5, + pmuludq = 0xF4, por = 0xEB, psadbw = 0xF6, psubb = 0xF8, + psubw = 0xF9, psubd = 0xFA, psubq = 0xFB, psubsb = 0xE8, + psubsw = 0xE9, psubusb = 0xD8, psubusw = 0xD9, + punpckhbw = 0x68, punpckhwd = 0x69, punpckhdq = 0x6A, + punpckhqdq = 0x6D, punpcklbw = 0x60, punpcklwd = 0x61, + punpckldq = 0x62, punpcklqdq = 0x6C, pxor = 0xEF +} do + map_op[name.."_2"] = format("rmo:660F%02XrM", n) + map_op["v"..name.."_3"] = format("rrmoy:660FV%02XrM", n) +end + +------------------------------------------------------------------------------ + +local map_vexarg = { u = false, v = 1, V = 2, w = 3 } + +-- Process pattern string. +local function dopattern(pat, args, sz, op, needrex) + local digit, addin, vex + local opcode = 0 + local szov = sz + local narg = 1 + local rex = 0 + + -- Limit number of section buffer positions used by a single dasm_put(). + -- A single opcode needs a maximum of 6 positions. + if secpos+6 > maxsecpos then wflush() end + + -- Process each character. + for c in gmatch(pat.."|", ".") do + if match(c, "%x") then -- Hex digit. + digit = byte(c) - 48 + if digit > 48 then digit = digit - 39 + elseif digit > 16 then digit = digit - 7 end + opcode = opcode*16 + digit + addin = nil + elseif c == "n" then -- Disable operand size mods for opcode. + szov = nil + elseif c == "X" then -- Force REX.W. + rex = 8 + elseif c == "L" then -- Force VEX.L. + vex.l = true + elseif c == "r" then -- Merge 1st operand regno. into opcode. + addin = args[1]; opcode = opcode + (addin.reg % 8) + if narg < 2 then narg = 2 end + elseif c == "R" then -- Merge 2nd operand regno. into opcode. + addin = args[2]; opcode = opcode + (addin.reg % 8) + narg = 3 + elseif c == "m" or c == "M" then -- Encode ModRM/SIB. + local s + if addin then + s = addin.reg + opcode = opcode - band(s, 7) -- Undo regno opcode merge. + else + s = band(opcode, 15) -- Undo last digit. + opcode = shr(opcode, 4) + end + local nn = c == "m" and 1 or 2 + local t = args[nn] + if narg <= nn then narg = nn + 1 end + if szov == "q" and rex == 0 then rex = rex + 8 end + if t.reg and t.reg > 7 then rex = rex + 1 end + if t.xreg and t.xreg > 7 then rex = rex + 2 end + if s > 7 then rex = rex + 4 end + if needrex then rex = rex + 16 end + local psz, sk = wputop(szov, opcode, rex, vex, s < 0, t.vreg or t.vxreg) + opcode = nil + local imark = sub(pat, -1) -- Force a mark (ugly). + -- Put ModRM/SIB with regno/last digit as spare. + wputmrmsib(t, imark, s, addin and addin.vreg, psz, sk) + addin = nil + elseif map_vexarg[c] ~= nil then -- Encode using VEX prefix + local b = band(opcode, 255); opcode = shr(opcode, 8) + local m = 1 + if b == 0x38 then m = 2 + elseif b == 0x3a then m = 3 end + if m ~= 1 then b = band(opcode, 255); opcode = shr(opcode, 8) end + if b ~= 0x0f then + werror("expected `0F', `0F38', or `0F3A' to precede `"..c.. + "' in pattern `"..pat.."' for `"..op.."'") + end + local v = map_vexarg[c] + if v then v = remove(args, v) end + b = band(opcode, 255) + local p = 0 + if b == 0x66 then p = 1 + elseif b == 0xf3 then p = 2 + elseif b == 0xf2 then p = 3 end + if p ~= 0 then opcode = shr(opcode, 8) end + if opcode ~= 0 then wputop(nil, opcode, 0); opcode = 0 end + vex = { m = m, p = p, v = v } + else + if opcode then -- Flush opcode. + if szov == "q" and rex == 0 then rex = rex + 8 end + if needrex then rex = rex + 16 end + if addin and addin.reg == -1 then + local psz, sk = wputop(szov, opcode - 7, rex, vex, true) + wvreg("opcode", addin.vreg, psz, sk) + else + if addin and addin.reg > 7 then rex = rex + 1 end + wputop(szov, opcode, rex, vex) + end + opcode = nil + end + if c == "|" then break end + if c == "o" then -- Offset (pure 32 bit displacement). + wputdarg(args[1].disp); if narg < 2 then narg = 2 end + elseif c == "O" then + wputdarg(args[2].disp); narg = 3 + else + -- Anything else is an immediate operand. + local a = args[narg] + narg = narg + 1 + local mode, imm = a.mode, a.imm + if mode == "iJ" and not match(x64 and "J" or "iIJ", c) then + werror("bad operand size for label") + end + if c == "S" then + wputsbarg(imm) + elseif c == "U" then + wputbarg(imm) + elseif c == "W" then + wputwarg(imm) + elseif c == "i" or c == "I" then + if mode == "iJ" then + wputlabel("IMM_", imm, 1) + elseif mode == "iI" and c == "I" then + waction(sz == "w" and "IMM_WB" or "IMM_DB", imm) + else + wputszarg(sz, imm) + end + elseif c == "J" then + if mode == "iPJ" then + waction("REL_A", imm) -- !x64 (secpos) + else + wputlabel("REL_", imm, 2) + end + elseif c == "s" then + local reg = a.reg + if reg < 0 then + wputb(0) + wvreg("imm.hi", a.vreg) + else + wputb(shl(reg, 4)) + end + else + werror("bad char `"..c.."' in pattern `"..pat.."' for `"..op.."'") + end + end + end + end +end + +------------------------------------------------------------------------------ + +-- Mapping of operand modes to short names. Suppress output with '#'. +local map_modename = { + r = "reg", R = "eax", C = "cl", x = "mem", m = "mrm", i = "imm", + f = "stx", F = "st0", J = "lbl", ["1"] = "1", + I = "#", S = "#", O = "#", +} + +-- Return a table/string showing all possible operand modes. +local function templatehelp(template, nparams) + if nparams == 0 then return "" end + local t = {} + for tm in gmatch(template, "[^%|]+") do + local s = map_modename[sub(tm, 1, 1)] + s = s..gsub(sub(tm, 2, nparams), ".", function(c) + return ", "..map_modename[c] + end) + if not match(s, "#") then t[#t+1] = s end + end + return t +end + +-- Match operand modes against mode match part of template. +local function matchtm(tm, args) + for i=1,#args do + if not match(args[i].mode, sub(tm, i, i)) then return end + end + return true +end + +-- Handle opcodes defined with template strings. +map_op[".template__"] = function(params, template, nparams) + if not params then return templatehelp(template, nparams) end + local args = {} + + -- Zero-operand opcodes have no match part. + if #params == 0 then + dopattern(template, args, "d", params.op, nil) + return + end + + -- Determine common operand size (coerce undefined size) or flag as mixed. + local sz, szmix, needrex + for i,p in ipairs(params) do + args[i] = parseoperand(p) + local nsz = args[i].opsize + if nsz then + if sz and sz ~= nsz then szmix = true else sz = nsz end + end + local nrex = args[i].needrex + if nrex ~= nil then + if needrex == nil then + needrex = nrex + elseif needrex ~= nrex then + werror("bad mix of byte-addressable registers") + end + end + end + + -- Try all match:pattern pairs (separated by '|'). + local gotmatch, lastpat + for tm in gmatch(template, "[^%|]+") do + -- Split off size match (starts after mode match) and pattern string. + local szm, pat = match(tm, "^(.-):(.*)$", #args+1) + if pat == "" then pat = lastpat else lastpat = pat end + if matchtm(tm, args) then + local prefix = sub(szm, 1, 1) + if prefix == "/" then -- Exactly match leading operand sizes. + for i = #szm,1,-1 do + if i == 1 then + dopattern(pat, args, sz, params.op, needrex) -- Process pattern. + return + elseif args[i-1].opsize ~= sub(szm, i, i) then + break + end + end + else -- Match common operand size. + local szp = sz + if szm == "" then szm = x64 and "qdwb" or "dwb" end -- Default sizes. + if prefix == "1" then szp = args[1].opsize; szmix = nil + elseif prefix == "2" then szp = args[2].opsize; szmix = nil end + if not szmix and (prefix == "." or match(szm, szp or "#")) then + dopattern(pat, args, szp, params.op, needrex) -- Process pattern. + return + end + end + gotmatch = true + end + end + + local msg = "bad operand mode" + if gotmatch then + if szmix then + msg = "mixed operand size" + else + msg = sz and "bad operand size" or "missing operand size" + end + end + + werror(msg.." in `"..opmodestr(params.op, args).."'") +end + +------------------------------------------------------------------------------ + +-- x64-specific opcode for 64 bit immediates and displacements. +if x64 then + function map_op.mov64_2(params) + if not params then return { "reg, imm", "reg, [disp]", "[disp], reg" } end + if secpos+2 > maxsecpos then wflush() end + local opcode, op64, sz, rex, vreg + local op64 = match(params[1], "^%[%s*(.-)%s*%]$") + if op64 then + local a = parseoperand(params[2]) + if a.mode ~= "rmR" then werror("bad operand mode") end + sz = a.opsize + rex = sz == "q" and 8 or 0 + opcode = 0xa3 + else + op64 = match(params[2], "^%[%s*(.-)%s*%]$") + local a = parseoperand(params[1]) + if op64 then + if a.mode ~= "rmR" then werror("bad operand mode") end + sz = a.opsize + rex = sz == "q" and 8 or 0 + opcode = 0xa1 + else + if sub(a.mode, 1, 1) ~= "r" or a.opsize ~= "q" then + werror("bad operand mode") + end + op64 = params[2] + if a.reg == -1 then + vreg = a.vreg + opcode = 0xb8 + else + opcode = 0xb8 + band(a.reg, 7) + end + rex = a.reg > 7 and 9 or 8 + end + end + local psz, sk = wputop(sz, opcode, rex, nil, vreg) + wvreg("opcode", vreg, psz, sk) + waction("IMM_D", format("(unsigned int)(%s)", op64)) + waction("IMM_D", format("(unsigned int)((%s)>>32)", op64)) + end +end + +------------------------------------------------------------------------------ + +-- Pseudo-opcodes for data storage. +local function op_data(params) + if not params then return "imm..." end + local sz = sub(params.op, 2, 2) + if sz == "l" then sz = "d" elseif sz == "a" then sz = addrsize end + for _,p in ipairs(params) do + local a = parseoperand(p, sz == "q") + if sub(a.mode, 1, 1) ~= "i" or (a.opsize and a.opsize ~= sz) then + werror("bad mode or size in `"..p.."'") + end + if a.mode == "iJ" then + wputlabel("IMM_", a.imm, 1) + elseif sz == "q" then + wputqarg(a.imm) + else + wputszarg(sz, a.imm) + end + if secpos+2 > maxsecpos then wflush() end + end +end + +map_op[".byte_*"] = op_data +map_op[".sbyte_*"] = op_data +map_op[".word_*"] = op_data +map_op[".dword_*"] = op_data +map_op[".qword_*"] = op_data +map_op[".aword_*"] = op_data +map_op[".long_*"] = op_data +map_op[".quad_*"] = op_data +map_op[".addr_*"] = op_data + +------------------------------------------------------------------------------ + +-- Pseudo-opcode to mark the position where the action list is to be emitted. +map_op[".actionlist_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeactions(out, name) end) +end + +-- Pseudo-opcode to mark the position where the global enum is to be emitted. +map_op[".globals_1"] = function(params) + if not params then return "prefix" end + local prefix = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeglobals(out, prefix) end) +end + +-- Pseudo-opcode to mark the position where the global names are to be emitted. +map_op[".globalnames_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeglobalnames(out, name) end) +end + +-- Pseudo-opcode to mark the position where the extern names are to be emitted. +map_op[".externnames_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeexternnames(out, name) end) +end + +------------------------------------------------------------------------------ + +-- Label pseudo-opcode (converted from trailing colon form). +map_op[".label_2"] = function(params) + if not params then return "[1-9] | ->global | =>pcexpr [, addr]" end + if secpos+2 > maxsecpos then wflush() end + local a = parseoperand(params[1]) + local mode, imm = a.mode, a.imm + if type(imm) == "number" and (mode == "iJ" or (imm >= 1 and imm <= 9)) then + -- Local label (1: ... 9:) or global label (->global:). + waction("LABEL_LG", nil, 1) + wputxb(imm) + elseif mode == "iJ" then + -- PC label (=>pcexpr:). + waction("LABEL_PC", imm) + else + werror("bad label definition") + end + -- SETLABEL must immediately follow LABEL_LG/LABEL_PC. + local addr = params[2] + if addr then + local a = parseoperand(addr) + if a.mode == "iPJ" then + waction("SETLABEL", a.imm) + else + werror("bad label assignment") + end + end +end +map_op[".label_1"] = map_op[".label_2"] + +------------------------------------------------------------------------------ + +-- Alignment pseudo-opcode. +map_op[".align_1"] = function(params) + if not params then return "numpow2" end + if secpos+1 > maxsecpos then wflush() end + local align = tonumber(params[1]) or map_opsizenum[map_opsize[params[1]]] + if align then + local x = align + -- Must be a power of 2 in the range (2 ... 256). + for i=1,8 do + x = x / 2 + if x == 1 then + waction("ALIGN", nil, 1) + wputxb(align-1) -- Action byte is 2**n-1. + return + end + end + end + werror("bad alignment") +end + +-- Spacing pseudo-opcode. +map_op[".space_2"] = function(params) + if not params then return "num [, filler]" end + if secpos+1 > maxsecpos then wflush() end + waction("SPACE", params[1]) + local fill = params[2] + if fill then + fill = tonumber(fill) + if not fill or fill < 0 or fill > 255 then werror("bad filler") end + end + wputxb(fill or 0) +end +map_op[".space_1"] = map_op[".space_2"] + +------------------------------------------------------------------------------ + +-- Pseudo-opcode for (primitive) type definitions (map to C types). +map_op[".type_3"] = function(params, nparams) + if not params then + return nparams == 2 and "name, ctype" or "name, ctype, reg" + end + local name, ctype, reg = params[1], params[2], params[3] + if not match(name, "^[%a_][%w_]*$") then + werror("bad type name `"..name.."'") + end + local tp = map_type[name] + if tp then + werror("duplicate type `"..name.."'") + end + if reg and not map_reg_valid_base[reg] then + werror("bad base register `"..(map_reg_rev[reg] or reg).."'") + end + -- Add #type to defines. A bit unclean to put it in map_archdef. + map_archdef["#"..name] = "sizeof("..ctype..")" + -- Add new type and emit shortcut define. + local num = ctypenum + 1 + map_type[name] = { + ctype = ctype, + ctypefmt = format("Dt%X(%%s)", num), + reg = reg, + } + wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype)) + ctypenum = num +end +map_op[".type_2"] = map_op[".type_3"] + +-- Dump type definitions. +local function dumptypes(out, lvl) + local t = {} + for name in pairs(map_type) do t[#t+1] = name end + sort(t) + out:write("Type definitions:\n") + for _,name in ipairs(t) do + local tp = map_type[name] + local reg = tp.reg and map_reg_rev[tp.reg] or "" + out:write(format(" %-20s %-20s %s\n", name, tp.ctype, reg)) + end + out:write("\n") +end + +------------------------------------------------------------------------------ + +-- Set the current section. +function _M.section(num) + waction("SECTION") + wputxb(num) + wflush(true) -- SECTION is a terminal action. +end + +------------------------------------------------------------------------------ + +-- Dump architecture description. +function _M.dumparch(out) + out:write(format("DynASM %s version %s, released %s\n\n", + _info.arch, _info.version, _info.release)) + dumpregs(out) + dumpactions(out) +end + +-- Dump all user defined elements. +function _M.dumpdef(out, lvl) + dumptypes(out, lvl) + dumpglobals(out, lvl) + dumpexterns(out, lvl) +end + +------------------------------------------------------------------------------ + +-- Pass callbacks from/to the DynASM core. +function _M.passcb(wl, we, wf, ww) + wline, werror, wfatal, wwarn = wl, we, wf, ww + return wflush +end + +-- Setup the arch-specific module. +function _M.setup(arch, opt) + g_arch, g_opt = arch, opt +end + +-- Merge the core maps and the arch-specific maps. +function _M.mergemaps(map_coreop, map_def) + setmetatable(map_op, { __index = map_coreop }) + setmetatable(map_def, { __index = map_archdef }) + return map_op, map_def +end + +return _M + +------------------------------------------------------------------------------ + diff --git a/ext/opcache/jit/ir/dynasm/dynasm.lua b/ext/opcache/jit/ir/dynasm/dynasm.lua new file mode 100644 index 0000000000000..2583295fce368 --- /dev/null +++ b/ext/opcache/jit/ir/dynasm/dynasm.lua @@ -0,0 +1,1095 @@ +------------------------------------------------------------------------------ +-- DynASM. A dynamic assembler for code generation engines. +-- Originally designed and implemented for LuaJIT. +-- +-- Copyright (C) 2005-2021 Mike Pall. All rights reserved. +-- See below for full copyright notice. +------------------------------------------------------------------------------ + +-- Application information. +local _info = { + name = "DynASM", + description = "A dynamic assembler for code generation engines", + version = "1.5.0", + vernum = 10500, + release = "2021-05-02", + author = "Mike Pall", + url = "https://luajit.org/dynasm.html", + license = "MIT", + copyright = [[ +Copyright (C) 2005-2021 Mike Pall. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +[ MIT license: https://www.opensource.org/licenses/mit-license.php ] +]], +} + +-- Cache library functions. +local type, pairs, ipairs = type, pairs, ipairs +local pcall, error, assert = pcall, error, assert +local _s = string +local sub, match, gmatch, gsub = _s.sub, _s.match, _s.gmatch, _s.gsub +local format, rep, upper = _s.format, _s.rep, _s.upper +local _t = table +local insert, remove, concat, sort = _t.insert, _t.remove, _t.concat, _t.sort +local exit = os.exit +local io = io +local stdin, stdout, stderr = io.stdin, io.stdout, io.stderr + +------------------------------------------------------------------------------ + +-- Program options. +local g_opt = {} + +-- Global state for current file. +local g_fname, g_curline, g_indent, g_lineno, g_synclineno, g_arch +local g_errcount = 0 + +-- Write buffer for output file. +local g_wbuffer, g_capbuffer + +------------------------------------------------------------------------------ + +-- Write an output line (or callback function) to the buffer. +local function wline(line, needindent) + local buf = g_capbuffer or g_wbuffer + buf[#buf+1] = needindent and g_indent..line or line + g_synclineno = g_synclineno + 1 +end + +-- Write assembler line as a comment, if requested. +local function wcomment(aline) + if g_opt.comment then + wline(g_opt.comment..aline..g_opt.endcomment, true) + end +end + +-- Resync CPP line numbers. +local function wsync() + if g_synclineno ~= g_lineno and g_opt.cpp then + wline("#line "..g_lineno..' "'..g_fname..'"') + g_synclineno = g_lineno + end +end + +-- Dummy action flush function. Replaced with arch-specific function later. +local function wflush(term) +end + +-- Dump all buffered output lines. +local function wdumplines(out, buf) + for _,line in ipairs(buf) do + if type(line) == "string" then + assert(out:write(line, "\n")) + else + -- Special callback to dynamically insert lines after end of processing. + line(out) + end + end +end + +------------------------------------------------------------------------------ + +-- Emit an error. Processing continues with next statement. +local function werror(msg) + error(format("%s:%s: error: %s:\n%s", g_fname, g_lineno, msg, g_curline), 0) +end + +-- Emit a fatal error. Processing stops. +local function wfatal(msg) + g_errcount = "fatal" + werror(msg) +end + +-- Print a warning. Processing continues. +local function wwarn(msg) + stderr:write(format("%s:%s: warning: %s:\n%s\n", + g_fname, g_lineno, msg, g_curline)) +end + +-- Print caught error message. But suppress excessive errors. +local function wprinterr(...) + if type(g_errcount) == "number" then + -- Regular error. + g_errcount = g_errcount + 1 + if g_errcount < 21 then -- Seems to be a reasonable limit. + stderr:write(...) + elseif g_errcount == 21 then + stderr:write(g_fname, + ":*: warning: too many errors (suppressed further messages).\n") + end + else + -- Fatal error. + stderr:write(...) + return true -- Stop processing. + end +end + +------------------------------------------------------------------------------ + +-- Map holding all option handlers. +local opt_map = {} +local opt_current + +-- Print error and exit with error status. +local function opterror(...) + stderr:write("dynasm.lua: ERROR: ", ...) + stderr:write("\n") + exit(1) +end + +-- Get option parameter. +local function optparam(args) + local argn = args.argn + local p = args[argn] + if not p then + opterror("missing parameter for option `", opt_current, "'.") + end + args.argn = argn + 1 + return p +end + +------------------------------------------------------------------------------ + +-- Core pseudo-opcodes. +local map_coreop = {} +-- Dummy opcode map. Replaced by arch-specific map. +local map_op = {} + +-- Forward declarations. +local dostmt +local readfile + +------------------------------------------------------------------------------ + +-- Map for defines (initially empty, chains to arch-specific map). +local map_def = {} + +-- Pseudo-opcode to define a substitution. +map_coreop[".define_2"] = function(params, nparams) + if not params then return nparams == 1 and "name" or "name, subst" end + local name, def = params[1], params[2] or "1" + if not match(name, "^[%a_][%w_]*$") then werror("bad or duplicate define") end + map_def[name] = def +end +map_coreop[".define_1"] = map_coreop[".define_2"] + +-- Define a substitution on the command line. +function opt_map.D(args) + local namesubst = optparam(args) + local name, subst = match(namesubst, "^([%a_][%w_]*)=(.*)$") + if name then + map_def[name] = subst + elseif match(namesubst, "^[%a_][%w_]*$") then + map_def[namesubst] = "1" + else + opterror("bad define") + end +end + +-- Undefine a substitution on the command line. +function opt_map.U(args) + local name = optparam(args) + if match(name, "^[%a_][%w_]*$") then + map_def[name] = nil + else + opterror("bad define") + end +end + +-- Helper for definesubst. +local gotsubst + +local function definesubst_one(word) + local subst = map_def[word] + if subst then gotsubst = word; return subst else return word end +end + +-- Iteratively substitute defines. +local function definesubst(stmt) + -- Limit number of iterations. + for i=1,100 do + gotsubst = false + stmt = gsub(stmt, "#?[%w_]+", definesubst_one) + if not gotsubst then break end + end + if gotsubst then wfatal("recursive define involving `"..gotsubst.."'") end + return stmt +end + +-- Dump all defines. +local function dumpdefines(out, lvl) + local t = {} + for name in pairs(map_def) do + t[#t+1] = name + end + sort(t) + out:write("Defines:\n") + for _,name in ipairs(t) do + local subst = map_def[name] + if g_arch then subst = g_arch.revdef(subst) end + out:write(format(" %-20s %s\n", name, subst)) + end + out:write("\n") +end + +------------------------------------------------------------------------------ + +-- Support variables for conditional assembly. +local condlevel = 0 +local condstack = {} + +-- Evaluate condition with a Lua expression. Substitutions already performed. +local function cond_eval(cond) + local func, err + if setfenv then + func, err = loadstring("return "..cond, "=expr") + else + -- No globals. All unknown identifiers evaluate to nil. + func, err = load("return "..cond, "=expr", "t", {}) + end + if func then + if setfenv then + setfenv(func, {}) -- No globals. All unknown identifiers evaluate to nil. + end + local ok, res = pcall(func) + if ok then + if res == 0 then return false end -- Oh well. + return not not res + end + err = res + end + wfatal("bad condition: "..err) +end + +-- Skip statements until next conditional pseudo-opcode at the same level. +local function stmtskip() + local dostmt_save = dostmt + local lvl = 0 + dostmt = function(stmt) + local op = match(stmt, "^%s*(%S+)") + if op == ".if" then + lvl = lvl + 1 + elseif lvl ~= 0 then + if op == ".endif" then lvl = lvl - 1 end + elseif op == ".elif" or op == ".else" or op == ".endif" then + dostmt = dostmt_save + dostmt(stmt) + end + end +end + +-- Pseudo-opcodes for conditional assembly. +map_coreop[".if_1"] = function(params) + if not params then return "condition" end + local lvl = condlevel + 1 + local res = cond_eval(params[1]) + condlevel = lvl + condstack[lvl] = res + if not res then stmtskip() end +end + +map_coreop[".elif_1"] = function(params) + if not params then return "condition" end + if condlevel == 0 then wfatal(".elif without .if") end + local lvl = condlevel + local res = condstack[lvl] + if res then + if res == "else" then wfatal(".elif after .else") end + else + res = cond_eval(params[1]) + if res then + condstack[lvl] = res + return + end + end + stmtskip() +end + +map_coreop[".else_0"] = function(params) + if condlevel == 0 then wfatal(".else without .if") end + local lvl = condlevel + local res = condstack[lvl] + condstack[lvl] = "else" + if res then + if res == "else" then wfatal(".else after .else") end + stmtskip() + end +end + +map_coreop[".endif_0"] = function(params) + local lvl = condlevel + if lvl == 0 then wfatal(".endif without .if") end + condlevel = lvl - 1 +end + +-- Check for unfinished conditionals. +local function checkconds() + if g_errcount ~= "fatal" and condlevel ~= 0 then + wprinterr(g_fname, ":*: error: unbalanced conditional\n") + end +end + +------------------------------------------------------------------------------ + +-- Search for a file in the given path and open it for reading. +local function pathopen(path, name) + local dirsep = package and match(package.path, "\\") and "\\" or "/" + for _,p in ipairs(path) do + local fullname = p == "" and name or p..dirsep..name + local fin = io.open(fullname, "r") + if fin then + g_fname = fullname + return fin + end + end +end + +-- Include a file. +map_coreop[".include_1"] = function(params) + if not params then return "filename" end + local name = params[1] + -- Save state. Ugly, I know. but upvalues are fast. + local gf, gl, gcl, gi = g_fname, g_lineno, g_curline, g_indent + -- Read the included file. + local fatal = readfile(pathopen(g_opt.include, name) or + wfatal("include file `"..name.."' not found")) + -- Restore state. + g_synclineno = -1 + g_fname, g_lineno, g_curline, g_indent = gf, gl, gcl, gi + if fatal then wfatal("in include file") end +end + +-- Make .include and conditionals initially available, too. +map_op[".include_1"] = map_coreop[".include_1"] +map_op[".if_1"] = map_coreop[".if_1"] +map_op[".elif_1"] = map_coreop[".elif_1"] +map_op[".else_0"] = map_coreop[".else_0"] +map_op[".endif_0"] = map_coreop[".endif_0"] + +------------------------------------------------------------------------------ + +-- Support variables for macros. +local mac_capture, mac_lineno, mac_name +local mac_active = {} +local mac_list = {} + +-- Pseudo-opcode to define a macro. +map_coreop[".macro_*"] = function(mparams) + if not mparams then return "name [, params...]" end + -- Split off and validate macro name. + local name = remove(mparams, 1) + if not name then werror("missing macro name") end + if not (match(name, "^[%a_][%w_%.]*$") or match(name, "^%.[%w_%.]*$")) then + wfatal("bad macro name `"..name.."'") + end + -- Validate macro parameter names. + local mdup = {} + for _,mp in ipairs(mparams) do + if not match(mp, "^[%a_][%w_]*$") then + wfatal("bad macro parameter name `"..mp.."'") + end + if mdup[mp] then wfatal("duplicate macro parameter name `"..mp.."'") end + mdup[mp] = true + end + -- Check for duplicate or recursive macro definitions. + local opname = name.."_"..#mparams + if map_op[opname] or map_op[name.."_*"] then + wfatal("duplicate macro `"..name.."' ("..#mparams.." parameters)") + end + if mac_capture then wfatal("recursive macro definition") end + + -- Enable statement capture. + local lines = {} + mac_lineno = g_lineno + mac_name = name + mac_capture = function(stmt) -- Statement capture function. + -- Stop macro definition with .endmacro pseudo-opcode. + if not match(stmt, "^%s*.endmacro%s*$") then + lines[#lines+1] = stmt + return + end + mac_capture = nil + mac_lineno = nil + mac_name = nil + mac_list[#mac_list+1] = opname + -- Add macro-op definition. + map_op[opname] = function(params) + if not params then return mparams, lines end + -- Protect against recursive macro invocation. + if mac_active[opname] then wfatal("recursive macro invocation") end + mac_active[opname] = true + -- Setup substitution map. + local subst = {} + for i,mp in ipairs(mparams) do subst[mp] = params[i] end + local mcom + if g_opt.maccomment and g_opt.comment then + mcom = " MACRO "..name.." ("..#mparams..")" + wcomment("{"..mcom) + end + -- Loop through all captured statements + for _,stmt in ipairs(lines) do + -- Substitute macro parameters. + local st = gsub(stmt, "[%w_]+", subst) + st = definesubst(st) + st = gsub(st, "%s*%.%.%s*", "") -- Token paste a..b. + if mcom and sub(st, 1, 1) ~= "|" then wcomment(st) end + -- Emit statement. Use a protected call for better diagnostics. + local ok, err = pcall(dostmt, st) + if not ok then + -- Add the captured statement to the error. + wprinterr(err, "\n", g_indent, "| ", stmt, + "\t[MACRO ", name, " (", #mparams, ")]\n") + end + end + if mcom then wcomment("}"..mcom) end + mac_active[opname] = nil + end + end +end + +-- An .endmacro pseudo-opcode outside of a macro definition is an error. +map_coreop[".endmacro_0"] = function(params) + wfatal(".endmacro without .macro") +end + +-- Dump all macros and their contents (with -PP only). +local function dumpmacros(out, lvl) + sort(mac_list) + out:write("Macros:\n") + for _,opname in ipairs(mac_list) do + local name = sub(opname, 1, -3) + local params, lines = map_op[opname]() + out:write(format(" %-20s %s\n", name, concat(params, ", "))) + if lvl > 1 then + for _,line in ipairs(lines) do + out:write(" |", line, "\n") + end + out:write("\n") + end + end + out:write("\n") +end + +-- Check for unfinished macro definitions. +local function checkmacros() + if mac_capture then + wprinterr(g_fname, ":", mac_lineno, + ": error: unfinished .macro `", mac_name ,"'\n") + end +end + +------------------------------------------------------------------------------ + +-- Support variables for captures. +local cap_lineno, cap_name +local cap_buffers = {} +local cap_used = {} + +-- Start a capture. +map_coreop[".capture_1"] = function(params) + if not params then return "name" end + wflush() + local name = params[1] + if not match(name, "^[%a_][%w_]*$") then + wfatal("bad capture name `"..name.."'") + end + if cap_name then + wfatal("already capturing to `"..cap_name.."' since line "..cap_lineno) + end + cap_name = name + cap_lineno = g_lineno + -- Create or continue a capture buffer and start the output line capture. + local buf = cap_buffers[name] + if not buf then buf = {}; cap_buffers[name] = buf end + g_capbuffer = buf + g_synclineno = 0 +end + +-- Stop a capture. +map_coreop[".endcapture_0"] = function(params) + wflush() + if not cap_name then wfatal(".endcapture without a valid .capture") end + cap_name = nil + cap_lineno = nil + g_capbuffer = nil + g_synclineno = 0 +end + +-- Dump a capture buffer. +map_coreop[".dumpcapture_1"] = function(params) + if not params then return "name" end + wflush() + local name = params[1] + if not match(name, "^[%a_][%w_]*$") then + wfatal("bad capture name `"..name.."'") + end + cap_used[name] = true + wline(function(out) + local buf = cap_buffers[name] + if buf then wdumplines(out, buf) end + end) + g_synclineno = 0 +end + +-- Dump all captures and their buffers (with -PP only). +local function dumpcaptures(out, lvl) + out:write("Captures:\n") + for name,buf in pairs(cap_buffers) do + out:write(format(" %-20s %4s)\n", name, "("..#buf)) + if lvl > 1 then + local bar = rep("=", 76) + out:write(" ", bar, "\n") + for _,line in ipairs(buf) do + out:write(" ", line, "\n") + end + out:write(" ", bar, "\n\n") + end + end + out:write("\n") +end + +-- Check for unfinished or unused captures. +local function checkcaptures() + if cap_name then + wprinterr(g_fname, ":", cap_lineno, + ": error: unfinished .capture `", cap_name,"'\n") + return + end + for name in pairs(cap_buffers) do + if not cap_used[name] then + wprinterr(g_fname, ":*: error: missing .dumpcapture ", name ,"\n") + end + end +end + +------------------------------------------------------------------------------ + +-- Sections names. +local map_sections = {} + +-- Pseudo-opcode to define code sections. +-- TODO: Data sections, BSS sections. Needs extra C code and API. +map_coreop[".section_*"] = function(params) + if not params then return "name..." end + if #map_sections > 0 then werror("duplicate section definition") end + wflush() + for sn,name in ipairs(params) do + local opname = "."..name.."_0" + if not match(name, "^[%a][%w_]*$") or + map_op[opname] or map_op["."..name.."_*"] then + werror("bad section name `"..name.."'") + end + map_sections[#map_sections+1] = name + wline(format("#define DASM_SECTION_%s\t%d", upper(name), sn-1)) + map_op[opname] = function(params) g_arch.section(sn-1) end + end + wline(format("#define DASM_MAXSECTION\t\t%d", #map_sections)) +end + +-- Dump all sections. +local function dumpsections(out, lvl) + out:write("Sections:\n") + for _,name in ipairs(map_sections) do + out:write(format(" %s\n", name)) + end + out:write("\n") +end + +------------------------------------------------------------------------------ + +-- Replacement for customized Lua, which lacks the package library. +local prefix = "" +if not require then + function require(name) + local fp = assert(io.open(prefix..name..".lua")) + local s = fp:read("*a") + assert(fp:close()) + return assert(loadstring(s, "@"..name..".lua"))() + end +end + +-- Load architecture-specific module. +local function loadarch(arch) + if not match(arch, "^[%w_]+$") then return "bad arch name" end + _G._map_def = map_def + local ok, m_arch = pcall(require, "dasm_"..arch) + if not ok then return "cannot load module: "..m_arch end + g_arch = m_arch + wflush = m_arch.passcb(wline, werror, wfatal, wwarn) + m_arch.setup(arch, g_opt) + map_op, map_def = m_arch.mergemaps(map_coreop, map_def) +end + +-- Dump architecture description. +function opt_map.dumparch(args) + local name = optparam(args) + if not g_arch then + local err = loadarch(name) + if err then opterror(err) end + end + + local t = {} + for name in pairs(map_coreop) do t[#t+1] = name end + for name in pairs(map_op) do t[#t+1] = name end + sort(t) + + local out = stdout + local _arch = g_arch._info + out:write(format("%s version %s, released %s, %s\n", + _info.name, _info.version, _info.release, _info.url)) + g_arch.dumparch(out) + + local pseudo = true + out:write("Pseudo-Opcodes:\n") + for _,sname in ipairs(t) do + local name, nparam = match(sname, "^(.+)_([0-9%*])$") + if name then + if pseudo and sub(name, 1, 1) ~= "." then + out:write("\nOpcodes:\n") + pseudo = false + end + local f = map_op[sname] + local s + if nparam ~= "*" then nparam = nparam + 0 end + if nparam == 0 then + s = "" + elseif type(f) == "string" then + s = map_op[".template__"](nil, f, nparam) + else + s = f(nil, nparam) + end + if type(s) == "table" then + for _,s2 in ipairs(s) do + out:write(format(" %-12s %s\n", name, s2)) + end + else + out:write(format(" %-12s %s\n", name, s)) + end + end + end + out:write("\n") + exit(0) +end + +-- Pseudo-opcode to set the architecture. +-- Only initially available (map_op is replaced when called). +map_op[".arch_1"] = function(params) + if not params then return "name" end + local err = loadarch(params[1]) + if err then wfatal(err) end + wline(format("#if DASM_VERSION != %d", _info.vernum)) + wline('#error "Version mismatch between DynASM and included encoding engine"') + wline("#endif") +end + +-- Dummy .arch pseudo-opcode to improve the error report. +map_coreop[".arch_1"] = function(params) + if not params then return "name" end + wfatal("duplicate .arch statement") +end + +------------------------------------------------------------------------------ + +-- Dummy pseudo-opcode. Don't confuse '.nop' with 'nop'. +map_coreop[".nop_*"] = function(params) + if not params then return "[ignored...]" end +end + +-- Pseudo-opcodes to raise errors. +map_coreop[".error_1"] = function(params) + if not params then return "message" end + werror(params[1]) +end + +map_coreop[".fatal_1"] = function(params) + if not params then return "message" end + wfatal(params[1]) +end + +-- Dump all user defined elements. +local function dumpdef(out) + local lvl = g_opt.dumpdef + if lvl == 0 then return end + dumpsections(out, lvl) + dumpdefines(out, lvl) + if g_arch then g_arch.dumpdef(out, lvl) end + dumpmacros(out, lvl) + dumpcaptures(out, lvl) +end + +------------------------------------------------------------------------------ + +-- Helper for splitstmt. +local splitlvl + +local function splitstmt_one(c) + if c == "(" then + splitlvl = ")"..splitlvl + elseif c == "[" then + splitlvl = "]"..splitlvl + elseif c == "{" then + splitlvl = "}"..splitlvl + elseif c == ")" or c == "]" or c == "}" then + if sub(splitlvl, 1, 1) ~= c then werror("unbalanced (), [] or {}") end + splitlvl = sub(splitlvl, 2) + elseif splitlvl == "" then + return " \0 " + end + return c +end + +-- Split statement into (pseudo-)opcode and params. +local function splitstmt(stmt) + -- Convert label with trailing-colon into .label statement. + local label = match(stmt, "^%s*(.+):%s*$") + if label then return ".label", {label} end + + -- Split at commas and equal signs, but obey parentheses and brackets. + splitlvl = "" + stmt = gsub(stmt, "[,%(%)%[%]{}]", splitstmt_one) + if splitlvl ~= "" then werror("unbalanced () or []") end + + -- Split off opcode. + local op, other = match(stmt, "^%s*([^%s%z]+)%s*(.*)$") + if not op then werror("bad statement syntax") end + + -- Split parameters. + local params = {} + for p in gmatch(other, "%s*(%Z+)%z?") do + params[#params+1] = gsub(p, "%s+$", "") + end + if #params > 16 then werror("too many parameters") end + + params.op = op + return op, params +end + +-- Process a single statement. +dostmt = function(stmt) + -- Ignore empty statements. + if match(stmt, "^%s*$") then return end + + -- Capture macro defs before substitution. + if mac_capture then return mac_capture(stmt) end + stmt = definesubst(stmt) + + -- Emit C code without parsing the line. + if sub(stmt, 1, 1) == "|" then + local tail = sub(stmt, 2) + wflush() + if sub(tail, 1, 2) == "//" then wcomment(tail) else wline(tail, true) end + return + end + + -- Split into (pseudo-)opcode and params. + local op, params = splitstmt(stmt) + + -- Get opcode handler (matching # of parameters or generic handler). + local f = map_op[op.."_"..#params] or map_op[op.."_*"] + if not f then + if not g_arch then wfatal("first statement must be .arch") end + -- Improve error report. + for i=0,9 do + if map_op[op.."_"..i] then + werror("wrong number of parameters for `"..op.."'") + end + end + werror("unknown statement `"..op.."'") + end + + -- Call opcode handler or special handler for template strings. + if type(f) == "string" then + map_op[".template__"](params, f) + else + f(params) + end +end + +-- Process a single line. +local function doline(line) + if g_opt.flushline then wflush() end + + -- Assembler line? + local indent, aline = match(line, "^(%s*)%|(.*)$") + if not aline then + -- No, plain C code line, need to flush first. + wflush() + wsync() + wline(line, false) + return + end + + g_indent = indent -- Remember current line indentation. + + -- Emit C code (even from macros). Avoids echo and line parsing. + if sub(aline, 1, 1) == "|" then + if not mac_capture then + wsync() + elseif g_opt.comment then + wsync() + wcomment(aline) + end + dostmt(aline) + return + end + + -- Echo assembler line as a comment. + if g_opt.comment then + wsync() + wcomment(aline) + end + + -- Strip assembler comments. + aline = gsub(aline, "//.*$", "") + + -- Split line into statements at semicolons. + if match(aline, ";") then + for stmt in gmatch(aline, "[^;]+") do dostmt(stmt) end + else + dostmt(aline) + end +end + +------------------------------------------------------------------------------ + +-- Write DynASM header. +local function dasmhead(out) + out:write(format([[ +/* +** This file has been pre-processed with DynASM. +** %s +** DynASM version %s, DynASM %s version %s +** DO NOT EDIT! The original file is in "%s". +*/ + +]], _info.url, + _info.version, g_arch._info.arch, g_arch._info.version, + g_fname)) +end + +-- Read input file. +readfile = function(fin) + g_indent = "" + g_lineno = 0 + g_synclineno = -1 + + -- Process all lines. + for line in fin:lines() do + g_lineno = g_lineno + 1 + g_curline = line + local ok, err = pcall(doline, line) + if not ok and wprinterr(err, "\n") then return true end + end + wflush() + + -- Close input file. + assert(fin == stdin or fin:close()) +end + +-- Write output file. +local function writefile(outfile) + local fout + + -- Open output file. + if outfile == nil or outfile == "-" then + fout = stdout + else + fout = assert(io.open(outfile, "w")) + end + + -- Write all buffered lines + wdumplines(fout, g_wbuffer) + + -- Close output file. + assert(fout == stdout or fout:close()) + + -- Optionally dump definitions. + dumpdef(fout == stdout and stderr or stdout) +end + +-- Translate an input file to an output file. +local function translate(infile, outfile) + g_wbuffer = {} + g_indent = "" + g_lineno = 0 + g_synclineno = -1 + + -- Put header. + wline(dasmhead) + + -- Read input file. + local fin + if infile == "-" then + g_fname = "(stdin)" + fin = stdin + else + g_fname = infile + fin = assert(io.open(infile, "r")) + end + readfile(fin) + + -- Check for errors. + if not g_arch then + wprinterr(g_fname, ":*: error: missing .arch directive\n") + end + checkconds() + checkmacros() + checkcaptures() + + if g_errcount ~= 0 then + stderr:write(g_fname, ":*: info: ", g_errcount, " error", + (type(g_errcount) == "number" and g_errcount > 1) and "s" or "", + " in input file -- no output file generated.\n") + dumpdef(stderr) + exit(1) + end + + -- Write output file. + writefile(outfile) +end + +------------------------------------------------------------------------------ + +-- Print help text. +function opt_map.help() + stdout:write("DynASM -- ", _info.description, ".\n") + stdout:write("DynASM ", _info.version, " ", _info.release, " ", _info.url, "\n") + stdout:write[[ + +Usage: dynasm [OPTION]... INFILE.dasc|- + + -h, --help Display this help text. + -V, --version Display version and copyright information. + + -o, --outfile FILE Output file name (default is stdout). + -I, --include DIR Add directory to the include search path. + + -c, --ccomment Use /* */ comments for assembler lines. + -C, --cppcomment Use // comments for assembler lines (default). + -N, --nocomment Suppress assembler lines in output. + -M, --maccomment Show macro expansions as comments (default off). + + -L, --nolineno Suppress CPP line number information in output. + -F, --flushline Flush action list for every line. + + -D NAME[=SUBST] Define a substitution. + -U NAME Undefine a substitution. + + -P, --dumpdef Dump defines, macros, etc. Repeat for more output. + -A, --dumparch ARCH Load architecture ARCH and dump description. +]] + exit(0) +end + +-- Print version information. +function opt_map.version() + stdout:write(format("%s version %s, released %s\n%s\n\n%s", + _info.name, _info.version, _info.release, _info.url, _info.copyright)) + exit(0) +end + +-- Misc. options. +function opt_map.outfile(args) g_opt.outfile = optparam(args) end +function opt_map.include(args) insert(g_opt.include, 1, optparam(args)) end +function opt_map.ccomment() g_opt.comment = "/*|"; g_opt.endcomment = " */" end +function opt_map.cppcomment() g_opt.comment = "//|"; g_opt.endcomment = "" end +function opt_map.nocomment() g_opt.comment = false end +function opt_map.maccomment() g_opt.maccomment = true end +function opt_map.nolineno() g_opt.cpp = false end +function opt_map.flushline() g_opt.flushline = true end +function opt_map.dumpdef() g_opt.dumpdef = g_opt.dumpdef + 1 end + +------------------------------------------------------------------------------ + +-- Short aliases for long options. +local opt_alias = { + h = "help", ["?"] = "help", V = "version", + o = "outfile", I = "include", + c = "ccomment", C = "cppcomment", N = "nocomment", M = "maccomment", + L = "nolineno", F = "flushline", + P = "dumpdef", A = "dumparch", +} + +-- Parse single option. +local function parseopt(opt, args) + opt_current = #opt == 1 and "-"..opt or "--"..opt + local f = opt_map[opt] or opt_map[opt_alias[opt]] + if not f then + opterror("unrecognized option `", opt_current, "'. Try `--help'.\n") + end + f(args) +end + +-- Parse arguments. +local function parseargs(args) + -- Default options. + g_opt.comment = "//|" + g_opt.endcomment = "" + g_opt.cpp = true + g_opt.dumpdef = 0 + g_opt.include = { "" } + + -- Process all option arguments. + args.argn = 1 + repeat + local a = args[args.argn] + if not a then break end + local lopt, opt = match(a, "^%-(%-?)(.+)") + if not opt then break end + args.argn = args.argn + 1 + if lopt == "" then + -- Loop through short options. + for o in gmatch(opt, ".") do parseopt(o, args) end + else + -- Long option. + parseopt(opt, args) + end + until false + + -- Check for proper number of arguments. + local nargs = #args - args.argn + 1 + if nargs ~= 1 then + if nargs == 0 then + if g_opt.dumpdef > 0 then return dumpdef(stdout) end + end + opt_map.help() + end + + -- Translate a single input file to a single output file + -- TODO: Handle multiple files? + translate(args[args.argn], g_opt.outfile) +end + +------------------------------------------------------------------------------ + +-- Add the directory dynasm.lua resides in to the Lua module search path. +local arg = arg +if arg and arg[0] then + prefix = match(arg[0], "^(.*[/\\])") + if package and prefix then package.path = prefix.."?.lua;"..package.path end +end + +-- Start DynASM. +parseargs{...} + +------------------------------------------------------------------------------ + diff --git a/ext/opcache/jit/ir/dynasm/minilua.c b/ext/opcache/jit/ir/dynasm/minilua.c new file mode 100644 index 0000000000000..a8d7c305e109e --- /dev/null +++ b/ext/opcache/jit/ir/dynasm/minilua.c @@ -0,0 +1,7770 @@ +/* This is a heavily customized and minimized copy of Lua 5.1.5. */ +/* It's only used to build LuaJIT. It does NOT have all standard functions! */ +/****************************************************************************** +* Copyright (C) 1994-2012 Lua.org, PUC-Rio. All rights reserved. +* +* Permission is hereby granted, free of charge, to any person obtaining +* a copy of this software and associated documentation files (the +* "Software"), to deal in the Software without restriction, including +* without limitation the rights to use, copy, modify, merge, publish, +* distribute, sublicense, and/or sell copies of the Software, and to +* permit persons to whom the Software is furnished to do so, subject to +* the following conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +******************************************************************************/ +#ifdef _MSC_VER +typedef unsigned __int64 U64; +#else +typedef unsigned long long U64; +#endif +int _CRT_glob = 0; +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +typedef enum{ +TM_INDEX, +TM_NEWINDEX, +TM_GC, +TM_MODE, +TM_EQ, +TM_ADD, +TM_SUB, +TM_MUL, +TM_DIV, +TM_MOD, +TM_POW, +TM_UNM, +TM_LEN, +TM_LT, +TM_LE, +TM_CONCAT, +TM_CALL, +TM_N +}TMS; +enum OpMode{iABC,iABx,iAsBx}; +typedef enum{ +OP_MOVE, +OP_LOADK, +OP_LOADBOOL, +OP_LOADNIL, +OP_GETUPVAL, +OP_GETGLOBAL, +OP_GETTABLE, +OP_SETGLOBAL, +OP_SETUPVAL, +OP_SETTABLE, +OP_NEWTABLE, +OP_SELF, +OP_ADD, +OP_SUB, +OP_MUL, +OP_DIV, +OP_MOD, +OP_POW, +OP_UNM, +OP_NOT, +OP_LEN, +OP_CONCAT, +OP_JMP, +OP_EQ, +OP_LT, +OP_LE, +OP_TEST, +OP_TESTSET, +OP_CALL, +OP_TAILCALL, +OP_RETURN, +OP_FORLOOP, +OP_FORPREP, +OP_TFORLOOP, +OP_SETLIST, +OP_CLOSE, +OP_CLOSURE, +OP_VARARG +}OpCode; +enum OpArgMask{ +OpArgN, +OpArgU, +OpArgR, +OpArgK +}; +typedef enum{ +VVOID, +VNIL, +VTRUE, +VFALSE, +VK, +VKNUM, +VLOCAL, +VUPVAL, +VGLOBAL, +VINDEXED, +VJMP, +VRELOCABLE, +VNONRELOC, +VCALL, +VVARARG +}expkind; +enum RESERVED{ +TK_AND=257,TK_BREAK, +TK_DO,TK_ELSE,TK_ELSEIF,TK_END,TK_FALSE,TK_FOR,TK_FUNCTION, +TK_IF,TK_IN,TK_LOCAL,TK_NIL,TK_NOT,TK_OR,TK_REPEAT, +TK_RETURN,TK_THEN,TK_TRUE,TK_UNTIL,TK_WHILE, +TK_CONCAT,TK_DOTS,TK_EQ,TK_GE,TK_LE,TK_NE,TK_NUMBER, +TK_NAME,TK_STRING,TK_EOS +}; +typedef enum BinOpr{ +OPR_ADD,OPR_SUB,OPR_MUL,OPR_DIV,OPR_MOD,OPR_POW, +OPR_CONCAT, +OPR_NE,OPR_EQ, +OPR_LT,OPR_LE,OPR_GT,OPR_GE, +OPR_AND,OPR_OR, +OPR_NOBINOPR +}BinOpr; +typedef enum UnOpr{OPR_MINUS,OPR_NOT,OPR_LEN,OPR_NOUNOPR}UnOpr; +#define LUA_QL(x)"'"x"'" +#define luai_apicheck(L,o){(void)L;} +#define lua_number2str(s,n)sprintf((s),"%.14g",(n)) +#define lua_str2number(s,p)strtod((s),(p)) +#define luai_numadd(a,b)((a)+(b)) +#define luai_numsub(a,b)((a)-(b)) +#define luai_nummul(a,b)((a)*(b)) +#define luai_numdiv(a,b)((a)/(b)) +#define luai_nummod(a,b)((a)-floor((a)/(b))*(b)) +#define luai_numpow(a,b)(pow(a,b)) +#define luai_numunm(a)(-(a)) +#define luai_numeq(a,b)((a)==(b)) +#define luai_numlt(a,b)((a)<(b)) +#define luai_numle(a,b)((a)<=(b)) +#define luai_numisnan(a)(!luai_numeq((a),(a))) +#define lua_number2int(i,d)((i)=(int)(d)) +#define lua_number2integer(i,d)((i)=(lua_Integer)(d)) +#define LUAI_THROW(L,c)longjmp((c)->b,1) +#define LUAI_TRY(L,c,a)if(setjmp((c)->b)==0){a} +#define lua_pclose(L,file)((void)((void)L,file),0) +#define lua_upvalueindex(i)((-10002)-(i)) +typedef struct lua_State lua_State; +typedef int(*lua_CFunction)(lua_State*L); +typedef const char*(*lua_Reader)(lua_State*L,void*ud,size_t*sz); +typedef void*(*lua_Alloc)(void*ud,void*ptr,size_t osize,size_t nsize); +typedef double lua_Number; +typedef ptrdiff_t lua_Integer; +static void lua_settop(lua_State*L,int idx); +static int lua_type(lua_State*L,int idx); +static const char* lua_tolstring(lua_State*L,int idx,size_t*len); +static size_t lua_objlen(lua_State*L,int idx); +static void lua_pushlstring(lua_State*L,const char*s,size_t l); +static void lua_pushcclosure(lua_State*L,lua_CFunction fn,int n); +static void lua_createtable(lua_State*L,int narr,int nrec); +static void lua_setfield(lua_State*L,int idx,const char*k); +#define lua_pop(L,n)lua_settop(L,-(n)-1) +#define lua_newtable(L)lua_createtable(L,0,0) +#define lua_pushcfunction(L,f)lua_pushcclosure(L,(f),0) +#define lua_strlen(L,i)lua_objlen(L,(i)) +#define lua_isfunction(L,n)(lua_type(L,(n))==6) +#define lua_istable(L,n)(lua_type(L,(n))==5) +#define lua_isnil(L,n)(lua_type(L,(n))==0) +#define lua_isboolean(L,n)(lua_type(L,(n))==1) +#define lua_isnone(L,n)(lua_type(L,(n))==(-1)) +#define lua_isnoneornil(L,n)(lua_type(L,(n))<=0) +#define lua_pushliteral(L,s)lua_pushlstring(L,""s,(sizeof(s)/sizeof(char))-1) +#define lua_setglobal(L,s)lua_setfield(L,(-10002),(s)) +#define lua_tostring(L,i)lua_tolstring(L,(i),NULL) +typedef struct lua_Debug lua_Debug; +typedef void(*lua_Hook)(lua_State*L,lua_Debug*ar); +struct lua_Debug{ +int event; +const char*name; +const char*namewhat; +const char*what; +const char*source; +int currentline; +int nups; +int linedefined; +int lastlinedefined; +char short_src[60]; +int i_ci; +}; +typedef unsigned int lu_int32; +typedef size_t lu_mem; +typedef ptrdiff_t l_mem; +typedef unsigned char lu_byte; +#define IntPoint(p)((unsigned int)(lu_mem)(p)) +typedef union{double u;void*s;long l;}L_Umaxalign; +typedef double l_uacNumber; +#define check_exp(c,e)(e) +#define UNUSED(x)((void)(x)) +#define cast(t,exp)((t)(exp)) +#define cast_byte(i)cast(lu_byte,(i)) +#define cast_num(i)cast(lua_Number,(i)) +#define cast_int(i)cast(int,(i)) +typedef lu_int32 Instruction; +#define condhardstacktests(x)((void)0) +typedef union GCObject GCObject; +typedef struct GCheader{ +GCObject*next;lu_byte tt;lu_byte marked; +}GCheader; +typedef union{ +GCObject*gc; +void*p; +lua_Number n; +int b; +}Value; +typedef struct lua_TValue{ +Value value;int tt; +}TValue; +#define ttisnil(o)(ttype(o)==0) +#define ttisnumber(o)(ttype(o)==3) +#define ttisstring(o)(ttype(o)==4) +#define ttistable(o)(ttype(o)==5) +#define ttisfunction(o)(ttype(o)==6) +#define ttisboolean(o)(ttype(o)==1) +#define ttisuserdata(o)(ttype(o)==7) +#define ttisthread(o)(ttype(o)==8) +#define ttislightuserdata(o)(ttype(o)==2) +#define ttype(o)((o)->tt) +#define gcvalue(o)check_exp(iscollectable(o),(o)->value.gc) +#define pvalue(o)check_exp(ttislightuserdata(o),(o)->value.p) +#define nvalue(o)check_exp(ttisnumber(o),(o)->value.n) +#define rawtsvalue(o)check_exp(ttisstring(o),&(o)->value.gc->ts) +#define tsvalue(o)(&rawtsvalue(o)->tsv) +#define rawuvalue(o)check_exp(ttisuserdata(o),&(o)->value.gc->u) +#define uvalue(o)(&rawuvalue(o)->uv) +#define clvalue(o)check_exp(ttisfunction(o),&(o)->value.gc->cl) +#define hvalue(o)check_exp(ttistable(o),&(o)->value.gc->h) +#define bvalue(o)check_exp(ttisboolean(o),(o)->value.b) +#define thvalue(o)check_exp(ttisthread(o),&(o)->value.gc->th) +#define l_isfalse(o)(ttisnil(o)||(ttisboolean(o)&&bvalue(o)==0)) +#define checkconsistency(obj) +#define checkliveness(g,obj) +#define setnilvalue(obj)((obj)->tt=0) +#define setnvalue(obj,x){TValue*i_o=(obj);i_o->value.n=(x);i_o->tt=3;} +#define setbvalue(obj,x){TValue*i_o=(obj);i_o->value.b=(x);i_o->tt=1;} +#define setsvalue(L,obj,x){TValue*i_o=(obj);i_o->value.gc=cast(GCObject*,(x));i_o->tt=4;checkliveness(G(L),i_o);} +#define setuvalue(L,obj,x){TValue*i_o=(obj);i_o->value.gc=cast(GCObject*,(x));i_o->tt=7;checkliveness(G(L),i_o);} +#define setthvalue(L,obj,x){TValue*i_o=(obj);i_o->value.gc=cast(GCObject*,(x));i_o->tt=8;checkliveness(G(L),i_o);} +#define setclvalue(L,obj,x){TValue*i_o=(obj);i_o->value.gc=cast(GCObject*,(x));i_o->tt=6;checkliveness(G(L),i_o);} +#define sethvalue(L,obj,x){TValue*i_o=(obj);i_o->value.gc=cast(GCObject*,(x));i_o->tt=5;checkliveness(G(L),i_o);} +#define setptvalue(L,obj,x){TValue*i_o=(obj);i_o->value.gc=cast(GCObject*,(x));i_o->tt=(8+1);checkliveness(G(L),i_o);} +#define setobj(L,obj1,obj2){const TValue*o2=(obj2);TValue*o1=(obj1);o1->value=o2->value;o1->tt=o2->tt;checkliveness(G(L),o1);} +#define setttype(obj,tt)(ttype(obj)=(tt)) +#define iscollectable(o)(ttype(o)>=4) +typedef TValue*StkId; +typedef union TString{ +L_Umaxalign dummy; +struct{ +GCObject*next;lu_byte tt;lu_byte marked; +lu_byte reserved; +unsigned int hash; +size_t len; +}tsv; +}TString; +#define getstr(ts)cast(const char*,(ts)+1) +#define svalue(o)getstr(rawtsvalue(o)) +typedef union Udata{ +L_Umaxalign dummy; +struct{ +GCObject*next;lu_byte tt;lu_byte marked; +struct Table*metatable; +struct Table*env; +size_t len; +}uv; +}Udata; +typedef struct Proto{ +GCObject*next;lu_byte tt;lu_byte marked; +TValue*k; +Instruction*code; +struct Proto**p; +int*lineinfo; +struct LocVar*locvars; +TString**upvalues; +TString*source; +int sizeupvalues; +int sizek; +int sizecode; +int sizelineinfo; +int sizep; +int sizelocvars; +int linedefined; +int lastlinedefined; +GCObject*gclist; +lu_byte nups; +lu_byte numparams; +lu_byte is_vararg; +lu_byte maxstacksize; +}Proto; +typedef struct LocVar{ +TString*varname; +int startpc; +int endpc; +}LocVar; +typedef struct UpVal{ +GCObject*next;lu_byte tt;lu_byte marked; +TValue*v; +union{ +TValue value; +struct{ +struct UpVal*prev; +struct UpVal*next; +}l; +}u; +}UpVal; +typedef struct CClosure{ +GCObject*next;lu_byte tt;lu_byte marked;lu_byte isC;lu_byte nupvalues;GCObject*gclist;struct Table*env; +lua_CFunction f; +TValue upvalue[1]; +}CClosure; +typedef struct LClosure{ +GCObject*next;lu_byte tt;lu_byte marked;lu_byte isC;lu_byte nupvalues;GCObject*gclist;struct Table*env; +struct Proto*p; +UpVal*upvals[1]; +}LClosure; +typedef union Closure{ +CClosure c; +LClosure l; +}Closure; +#define iscfunction(o)(ttype(o)==6&&clvalue(o)->c.isC) +typedef union TKey{ +struct{ +Value value;int tt; +struct Node*next; +}nk; +TValue tvk; +}TKey; +typedef struct Node{ +TValue i_val; +TKey i_key; +}Node; +typedef struct Table{ +GCObject*next;lu_byte tt;lu_byte marked; +lu_byte flags; +lu_byte lsizenode; +struct Table*metatable; +TValue*array; +Node*node; +Node*lastfree; +GCObject*gclist; +int sizearray; +}Table; +#define lmod(s,size)(check_exp((size&(size-1))==0,(cast(int,(s)&((size)-1))))) +#define twoto(x)((size_t)1<<(x)) +#define sizenode(t)(twoto((t)->lsizenode)) +static const TValue luaO_nilobject_; +#define ceillog2(x)(luaO_log2((x)-1)+1) +static int luaO_log2(unsigned int x); +#define gfasttm(g,et,e)((et)==NULL?NULL:((et)->flags&(1u<<(e)))?NULL:luaT_gettm(et,e,(g)->tmname[e])) +#define fasttm(l,et,e)gfasttm(G(l),et,e) +static const TValue*luaT_gettm(Table*events,TMS event,TString*ename); +#define luaM_reallocv(L,b,on,n,e)((cast(size_t,(n)+1)<=((size_t)(~(size_t)0)-2)/(e))?luaM_realloc_(L,(b),(on)*(e),(n)*(e)):luaM_toobig(L)) +#define luaM_freemem(L,b,s)luaM_realloc_(L,(b),(s),0) +#define luaM_free(L,b)luaM_realloc_(L,(b),sizeof(*(b)),0) +#define luaM_freearray(L,b,n,t)luaM_reallocv(L,(b),n,0,sizeof(t)) +#define luaM_malloc(L,t)luaM_realloc_(L,NULL,0,(t)) +#define luaM_new(L,t)cast(t*,luaM_malloc(L,sizeof(t))) +#define luaM_newvector(L,n,t)cast(t*,luaM_reallocv(L,NULL,0,n,sizeof(t))) +#define luaM_growvector(L,v,nelems,size,t,limit,e)if((nelems)+1>(size))((v)=cast(t*,luaM_growaux_(L,v,&(size),sizeof(t),limit,e))) +#define luaM_reallocvector(L,v,oldn,n,t)((v)=cast(t*,luaM_reallocv(L,v,oldn,n,sizeof(t)))) +static void*luaM_realloc_(lua_State*L,void*block,size_t oldsize, +size_t size); +static void*luaM_toobig(lua_State*L); +static void*luaM_growaux_(lua_State*L,void*block,int*size, +size_t size_elem,int limit, +const char*errormsg); +typedef struct Zio ZIO; +#define char2int(c)cast(int,cast(unsigned char,(c))) +#define zgetc(z)(((z)->n--)>0?char2int(*(z)->p++):luaZ_fill(z)) +typedef struct Mbuffer{ +char*buffer; +size_t n; +size_t buffsize; +}Mbuffer; +#define luaZ_initbuffer(L,buff)((buff)->buffer=NULL,(buff)->buffsize=0) +#define luaZ_buffer(buff)((buff)->buffer) +#define luaZ_sizebuffer(buff)((buff)->buffsize) +#define luaZ_bufflen(buff)((buff)->n) +#define luaZ_resetbuffer(buff)((buff)->n=0) +#define luaZ_resizebuffer(L,buff,size)(luaM_reallocvector(L,(buff)->buffer,(buff)->buffsize,size,char),(buff)->buffsize=size) +#define luaZ_freebuffer(L,buff)luaZ_resizebuffer(L,buff,0) +struct Zio{ +size_t n; +const char*p; +lua_Reader reader; +void*data; +lua_State*L; +}; +static int luaZ_fill(ZIO*z); +struct lua_longjmp; +#define gt(L)(&L->l_gt) +#define registry(L)(&G(L)->l_registry) +typedef struct stringtable{ +GCObject**hash; +lu_int32 nuse; +int size; +}stringtable; +typedef struct CallInfo{ +StkId base; +StkId func; +StkId top; +const Instruction*savedpc; +int nresults; +int tailcalls; +}CallInfo; +#define curr_func(L)(clvalue(L->ci->func)) +#define ci_func(ci)(clvalue((ci)->func)) +#define f_isLua(ci)(!ci_func(ci)->c.isC) +#define isLua(ci)(ttisfunction((ci)->func)&&f_isLua(ci)) +typedef struct global_State{ +stringtable strt; +lua_Alloc frealloc; +void*ud; +lu_byte currentwhite; +lu_byte gcstate; +int sweepstrgc; +GCObject*rootgc; +GCObject**sweepgc; +GCObject*gray; +GCObject*grayagain; +GCObject*weak; +GCObject*tmudata; +Mbuffer buff; +lu_mem GCthreshold; +lu_mem totalbytes; +lu_mem estimate; +lu_mem gcdept; +int gcpause; +int gcstepmul; +lua_CFunction panic; +TValue l_registry; +struct lua_State*mainthread; +UpVal uvhead; +struct Table*mt[(8+1)]; +TString*tmname[TM_N]; +}global_State; +struct lua_State{ +GCObject*next;lu_byte tt;lu_byte marked; +lu_byte status; +StkId top; +StkId base; +global_State*l_G; +CallInfo*ci; +const Instruction*savedpc; +StkId stack_last; +StkId stack; +CallInfo*end_ci; +CallInfo*base_ci; +int stacksize; +int size_ci; +unsigned short nCcalls; +unsigned short baseCcalls; +lu_byte hookmask; +lu_byte allowhook; +int basehookcount; +int hookcount; +lua_Hook hook; +TValue l_gt; +TValue env; +GCObject*openupval; +GCObject*gclist; +struct lua_longjmp*errorJmp; +ptrdiff_t errfunc; +}; +#define G(L)(L->l_G) +union GCObject{ +GCheader gch; +union TString ts; +union Udata u; +union Closure cl; +struct Table h; +struct Proto p; +struct UpVal uv; +struct lua_State th; +}; +#define rawgco2ts(o)check_exp((o)->gch.tt==4,&((o)->ts)) +#define gco2ts(o)(&rawgco2ts(o)->tsv) +#define rawgco2u(o)check_exp((o)->gch.tt==7,&((o)->u)) +#define gco2u(o)(&rawgco2u(o)->uv) +#define gco2cl(o)check_exp((o)->gch.tt==6,&((o)->cl)) +#define gco2h(o)check_exp((o)->gch.tt==5,&((o)->h)) +#define gco2p(o)check_exp((o)->gch.tt==(8+1),&((o)->p)) +#define gco2uv(o)check_exp((o)->gch.tt==(8+2),&((o)->uv)) +#define ngcotouv(o)check_exp((o)==NULL||(o)->gch.tt==(8+2),&((o)->uv)) +#define gco2th(o)check_exp((o)->gch.tt==8,&((o)->th)) +#define obj2gco(v)(cast(GCObject*,(v))) +static void luaE_freethread(lua_State*L,lua_State*L1); +#define pcRel(pc,p)(cast(int,(pc)-(p)->code)-1) +#define getline_(f,pc)(((f)->lineinfo)?(f)->lineinfo[pc]:0) +#define resethookcount(L)(L->hookcount=L->basehookcount) +static void luaG_typeerror(lua_State*L,const TValue*o, +const char*opname); +static void luaG_runerror(lua_State*L,const char*fmt,...); +#define luaD_checkstack(L,n)if((char*)L->stack_last-(char*)L->top<=(n)*(int)sizeof(TValue))luaD_growstack(L,n);else condhardstacktests(luaD_reallocstack(L,L->stacksize-5-1)); +#define incr_top(L){luaD_checkstack(L,1);L->top++;} +#define savestack(L,p)((char*)(p)-(char*)L->stack) +#define restorestack(L,n)((TValue*)((char*)L->stack+(n))) +#define saveci(L,p)((char*)(p)-(char*)L->base_ci) +#define restoreci(L,n)((CallInfo*)((char*)L->base_ci+(n))) +typedef void(*Pfunc)(lua_State*L,void*ud); +static int luaD_poscall(lua_State*L,StkId firstResult); +static void luaD_reallocCI(lua_State*L,int newsize); +static void luaD_reallocstack(lua_State*L,int newsize); +static void luaD_growstack(lua_State*L,int n); +static void luaD_throw(lua_State*L,int errcode); +static void*luaM_growaux_(lua_State*L,void*block,int*size,size_t size_elems, +int limit,const char*errormsg){ +void*newblock; +int newsize; +if(*size>=limit/2){ +if(*size>=limit) +luaG_runerror(L,errormsg); +newsize=limit; +} +else{ +newsize=(*size)*2; +if(newsize<4) +newsize=4; +} +newblock=luaM_reallocv(L,block,*size,newsize,size_elems); +*size=newsize; +return newblock; +} +static void*luaM_toobig(lua_State*L){ +luaG_runerror(L,"memory allocation error: block too big"); +return NULL; +} +static void*luaM_realloc_(lua_State*L,void*block,size_t osize,size_t nsize){ +global_State*g=G(L); +block=(*g->frealloc)(g->ud,block,osize,nsize); +if(block==NULL&&nsize>0) +luaD_throw(L,4); +g->totalbytes=(g->totalbytes-osize)+nsize; +return block; +} +#define resetbits(x,m)((x)&=cast(lu_byte,~(m))) +#define setbits(x,m)((x)|=(m)) +#define testbits(x,m)((x)&(m)) +#define bitmask(b)(1<<(b)) +#define bit2mask(b1,b2)(bitmask(b1)|bitmask(b2)) +#define l_setbit(x,b)setbits(x,bitmask(b)) +#define resetbit(x,b)resetbits(x,bitmask(b)) +#define testbit(x,b)testbits(x,bitmask(b)) +#define set2bits(x,b1,b2)setbits(x,(bit2mask(b1,b2))) +#define reset2bits(x,b1,b2)resetbits(x,(bit2mask(b1,b2))) +#define test2bits(x,b1,b2)testbits(x,(bit2mask(b1,b2))) +#define iswhite(x)test2bits((x)->gch.marked,0,1) +#define isblack(x)testbit((x)->gch.marked,2) +#define isgray(x)(!isblack(x)&&!iswhite(x)) +#define otherwhite(g)(g->currentwhite^bit2mask(0,1)) +#define isdead(g,v)((v)->gch.marked&otherwhite(g)&bit2mask(0,1)) +#define changewhite(x)((x)->gch.marked^=bit2mask(0,1)) +#define gray2black(x)l_setbit((x)->gch.marked,2) +#define valiswhite(x)(iscollectable(x)&&iswhite(gcvalue(x))) +#define luaC_white(g)cast(lu_byte,(g)->currentwhite&bit2mask(0,1)) +#define luaC_checkGC(L){condhardstacktests(luaD_reallocstack(L,L->stacksize-5-1));if(G(L)->totalbytes>=G(L)->GCthreshold)luaC_step(L);} +#define luaC_barrier(L,p,v){if(valiswhite(v)&&isblack(obj2gco(p)))luaC_barrierf(L,obj2gco(p),gcvalue(v));} +#define luaC_barriert(L,t,v){if(valiswhite(v)&&isblack(obj2gco(t)))luaC_barrierback(L,t);} +#define luaC_objbarrier(L,p,o){if(iswhite(obj2gco(o))&&isblack(obj2gco(p)))luaC_barrierf(L,obj2gco(p),obj2gco(o));} +#define luaC_objbarriert(L,t,o){if(iswhite(obj2gco(o))&&isblack(obj2gco(t)))luaC_barrierback(L,t);} +static void luaC_step(lua_State*L); +static void luaC_link(lua_State*L,GCObject*o,lu_byte tt); +static void luaC_linkupval(lua_State*L,UpVal*uv); +static void luaC_barrierf(lua_State*L,GCObject*o,GCObject*v); +static void luaC_barrierback(lua_State*L,Table*t); +#define sizestring(s)(sizeof(union TString)+((s)->len+1)*sizeof(char)) +#define sizeudata(u)(sizeof(union Udata)+(u)->len) +#define luaS_new(L,s)(luaS_newlstr(L,s,strlen(s))) +#define luaS_newliteral(L,s)(luaS_newlstr(L,""s,(sizeof(s)/sizeof(char))-1)) +#define luaS_fix(s)l_setbit((s)->tsv.marked,5) +static TString*luaS_newlstr(lua_State*L,const char*str,size_t l); +#define tostring(L,o)((ttype(o)==4)||(luaV_tostring(L,o))) +#define tonumber(o,n)(ttype(o)==3||(((o)=luaV_tonumber(o,n))!=NULL)) +#define equalobj(L,o1,o2)(ttype(o1)==ttype(o2)&&luaV_equalval(L,o1,o2)) +static int luaV_equalval(lua_State*L,const TValue*t1,const TValue*t2); +static const TValue*luaV_tonumber(const TValue*obj,TValue*n); +static int luaV_tostring(lua_State*L,StkId obj); +static void luaV_execute(lua_State*L,int nexeccalls); +static void luaV_concat(lua_State*L,int total,int last); +static const TValue luaO_nilobject_={{NULL},0}; +static int luaO_int2fb(unsigned int x){ +int e=0; +while(x>=16){ +x=(x+1)>>1; +e++; +} +if(x<8)return x; +else return((e+1)<<3)|(cast_int(x)-8); +} +static int luaO_fb2int(int x){ +int e=(x>>3)&31; +if(e==0)return x; +else return((x&7)+8)<<(e-1); +} +static int luaO_log2(unsigned int x){ +static const lu_byte log_2[256]={ +0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, +7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, +7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, +8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, +8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, +8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, +8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8 +}; +int l=-1; +while(x>=256){l+=8;x>>=8;} +return l+log_2[x]; +} +static int luaO_rawequalObj(const TValue*t1,const TValue*t2){ +if(ttype(t1)!=ttype(t2))return 0; +else switch(ttype(t1)){ +case 0: +return 1; +case 3: +return luai_numeq(nvalue(t1),nvalue(t2)); +case 1: +return bvalue(t1)==bvalue(t2); +case 2: +return pvalue(t1)==pvalue(t2); +default: +return gcvalue(t1)==gcvalue(t2); +} +} +static int luaO_str2d(const char*s,lua_Number*result){ +char*endptr; +*result=lua_str2number(s,&endptr); +if(endptr==s)return 0; +if(*endptr=='x'||*endptr=='X') +*result=cast_num(strtoul(s,&endptr,16)); +if(*endptr=='\0')return 1; +while(isspace(cast(unsigned char,*endptr)))endptr++; +if(*endptr!='\0')return 0; +return 1; +} +static void pushstr(lua_State*L,const char*str){ +setsvalue(L,L->top,luaS_new(L,str)); +incr_top(L); +} +static const char*luaO_pushvfstring(lua_State*L,const char*fmt,va_list argp){ +int n=1; +pushstr(L,""); +for(;;){ +const char*e=strchr(fmt,'%'); +if(e==NULL)break; +setsvalue(L,L->top,luaS_newlstr(L,fmt,e-fmt)); +incr_top(L); +switch(*(e+1)){ +case's':{ +const char*s=va_arg(argp,char*); +if(s==NULL)s="(null)"; +pushstr(L,s); +break; +} +case'c':{ +char buff[2]; +buff[0]=cast(char,va_arg(argp,int)); +buff[1]='\0'; +pushstr(L,buff); +break; +} +case'd':{ +setnvalue(L->top,cast_num(va_arg(argp,int))); +incr_top(L); +break; +} +case'f':{ +setnvalue(L->top,cast_num(va_arg(argp,l_uacNumber))); +incr_top(L); +break; +} +case'p':{ +char buff[4*sizeof(void*)+8]; +sprintf(buff,"%p",va_arg(argp,void*)); +pushstr(L,buff); +break; +} +case'%':{ +pushstr(L,"%"); +break; +} +default:{ +char buff[3]; +buff[0]='%'; +buff[1]=*(e+1); +buff[2]='\0'; +pushstr(L,buff); +break; +} +} +n+=2; +fmt=e+2; +} +pushstr(L,fmt); +luaV_concat(L,n+1,cast_int(L->top-L->base)-1); +L->top-=n; +return svalue(L->top-1); +} +static const char*luaO_pushfstring(lua_State*L,const char*fmt,...){ +const char*msg; +va_list argp; +va_start(argp,fmt); +msg=luaO_pushvfstring(L,fmt,argp); +va_end(argp); +return msg; +} +static void luaO_chunkid(char*out,const char*source,size_t bufflen){ +if(*source=='='){ +strncpy(out,source+1,bufflen); +out[bufflen-1]='\0'; +} +else{ +if(*source=='@'){ +size_t l; +source++; +bufflen-=sizeof(" '...' "); +l=strlen(source); +strcpy(out,""); +if(l>bufflen){ +source+=(l-bufflen); +strcat(out,"..."); +} +strcat(out,source); +} +else{ +size_t len=strcspn(source,"\n\r"); +bufflen-=sizeof(" [string \"...\"] "); +if(len>bufflen)len=bufflen; +strcpy(out,"[string \""); +if(source[len]!='\0'){ +strncat(out,source,len); +strcat(out,"..."); +} +else +strcat(out,source); +strcat(out,"\"]"); +} +} +} +#define gnode(t,i)(&(t)->node[i]) +#define gkey(n)(&(n)->i_key.nk) +#define gval(n)(&(n)->i_val) +#define gnext(n)((n)->i_key.nk.next) +#define key2tval(n)(&(n)->i_key.tvk) +static TValue*luaH_setnum(lua_State*L,Table*t,int key); +static const TValue*luaH_getstr(Table*t,TString*key); +static TValue*luaH_set(lua_State*L,Table*t,const TValue*key); +static const char*const luaT_typenames[]={ +"nil","boolean","userdata","number", +"string","table","function","userdata","thread", +"proto","upval" +}; +static void luaT_init(lua_State*L){ +static const char*const luaT_eventname[]={ +"__index","__newindex", +"__gc","__mode","__eq", +"__add","__sub","__mul","__div","__mod", +"__pow","__unm","__len","__lt","__le", +"__concat","__call" +}; +int i; +for(i=0;itmname[i]=luaS_new(L,luaT_eventname[i]); +luaS_fix(G(L)->tmname[i]); +} +} +static const TValue*luaT_gettm(Table*events,TMS event,TString*ename){ +const TValue*tm=luaH_getstr(events,ename); +if(ttisnil(tm)){ +events->flags|=cast_byte(1u<metatable; +break; +case 7: +mt=uvalue(o)->metatable; +break; +default: +mt=G(L)->mt[ttype(o)]; +} +return(mt?luaH_getstr(mt,G(L)->tmname[event]):(&luaO_nilobject_)); +} +#define sizeCclosure(n)(cast(int,sizeof(CClosure))+cast(int,sizeof(TValue)*((n)-1))) +#define sizeLclosure(n)(cast(int,sizeof(LClosure))+cast(int,sizeof(TValue*)*((n)-1))) +static Closure*luaF_newCclosure(lua_State*L,int nelems,Table*e){ +Closure*c=cast(Closure*,luaM_malloc(L,sizeCclosure(nelems))); +luaC_link(L,obj2gco(c),6); +c->c.isC=1; +c->c.env=e; +c->c.nupvalues=cast_byte(nelems); +return c; +} +static Closure*luaF_newLclosure(lua_State*L,int nelems,Table*e){ +Closure*c=cast(Closure*,luaM_malloc(L,sizeLclosure(nelems))); +luaC_link(L,obj2gco(c),6); +c->l.isC=0; +c->l.env=e; +c->l.nupvalues=cast_byte(nelems); +while(nelems--)c->l.upvals[nelems]=NULL; +return c; +} +static UpVal*luaF_newupval(lua_State*L){ +UpVal*uv=luaM_new(L,UpVal); +luaC_link(L,obj2gco(uv),(8+2)); +uv->v=&uv->u.value; +setnilvalue(uv->v); +return uv; +} +static UpVal*luaF_findupval(lua_State*L,StkId level){ +global_State*g=G(L); +GCObject**pp=&L->openupval; +UpVal*p; +UpVal*uv; +while(*pp!=NULL&&(p=ngcotouv(*pp))->v>=level){ +if(p->v==level){ +if(isdead(g,obj2gco(p))) +changewhite(obj2gco(p)); +return p; +} +pp=&p->next; +} +uv=luaM_new(L,UpVal); +uv->tt=(8+2); +uv->marked=luaC_white(g); +uv->v=level; +uv->next=*pp; +*pp=obj2gco(uv); +uv->u.l.prev=&g->uvhead; +uv->u.l.next=g->uvhead.u.l.next; +uv->u.l.next->u.l.prev=uv; +g->uvhead.u.l.next=uv; +return uv; +} +static void unlinkupval(UpVal*uv){ +uv->u.l.next->u.l.prev=uv->u.l.prev; +uv->u.l.prev->u.l.next=uv->u.l.next; +} +static void luaF_freeupval(lua_State*L,UpVal*uv){ +if(uv->v!=&uv->u.value) +unlinkupval(uv); +luaM_free(L,uv); +} +static void luaF_close(lua_State*L,StkId level){ +UpVal*uv; +global_State*g=G(L); +while(L->openupval!=NULL&&(uv=ngcotouv(L->openupval))->v>=level){ +GCObject*o=obj2gco(uv); +L->openupval=uv->next; +if(isdead(g,o)) +luaF_freeupval(L,uv); +else{ +unlinkupval(uv); +setobj(L,&uv->u.value,uv->v); +uv->v=&uv->u.value; +luaC_linkupval(L,uv); +} +} +} +static Proto*luaF_newproto(lua_State*L){ +Proto*f=luaM_new(L,Proto); +luaC_link(L,obj2gco(f),(8+1)); +f->k=NULL; +f->sizek=0; +f->p=NULL; +f->sizep=0; +f->code=NULL; +f->sizecode=0; +f->sizelineinfo=0; +f->sizeupvalues=0; +f->nups=0; +f->upvalues=NULL; +f->numparams=0; +f->is_vararg=0; +f->maxstacksize=0; +f->lineinfo=NULL; +f->sizelocvars=0; +f->locvars=NULL; +f->linedefined=0; +f->lastlinedefined=0; +f->source=NULL; +return f; +} +static void luaF_freeproto(lua_State*L,Proto*f){ +luaM_freearray(L,f->code,f->sizecode,Instruction); +luaM_freearray(L,f->p,f->sizep,Proto*); +luaM_freearray(L,f->k,f->sizek,TValue); +luaM_freearray(L,f->lineinfo,f->sizelineinfo,int); +luaM_freearray(L,f->locvars,f->sizelocvars,struct LocVar); +luaM_freearray(L,f->upvalues,f->sizeupvalues,TString*); +luaM_free(L,f); +} +static void luaF_freeclosure(lua_State*L,Closure*c){ +int size=(c->c.isC)?sizeCclosure(c->c.nupvalues): +sizeLclosure(c->l.nupvalues); +luaM_freemem(L,c,size); +} +#define MASK1(n,p)((~((~(Instruction)0)<>0)&MASK1(6,0))) +#define SET_OPCODE(i,o)((i)=(((i)&MASK0(6,0))|((cast(Instruction,o)<<0)&MASK1(6,0)))) +#define GETARG_A(i)(cast(int,((i)>>(0+6))&MASK1(8,0))) +#define SETARG_A(i,u)((i)=(((i)&MASK0(8,(0+6)))|((cast(Instruction,u)<<(0+6))&MASK1(8,(0+6))))) +#define GETARG_B(i)(cast(int,((i)>>(((0+6)+8)+9))&MASK1(9,0))) +#define SETARG_B(i,b)((i)=(((i)&MASK0(9,(((0+6)+8)+9)))|((cast(Instruction,b)<<(((0+6)+8)+9))&MASK1(9,(((0+6)+8)+9))))) +#define GETARG_C(i)(cast(int,((i)>>((0+6)+8))&MASK1(9,0))) +#define SETARG_C(i,b)((i)=(((i)&MASK0(9,((0+6)+8)))|((cast(Instruction,b)<<((0+6)+8))&MASK1(9,((0+6)+8))))) +#define GETARG_Bx(i)(cast(int,((i)>>((0+6)+8))&MASK1((9+9),0))) +#define SETARG_Bx(i,b)((i)=(((i)&MASK0((9+9),((0+6)+8)))|((cast(Instruction,b)<<((0+6)+8))&MASK1((9+9),((0+6)+8))))) +#define GETARG_sBx(i)(GETARG_Bx(i)-(((1<<(9+9))-1)>>1)) +#define SETARG_sBx(i,b)SETARG_Bx((i),cast(unsigned int,(b)+(((1<<(9+9))-1)>>1))) +#define CREATE_ABC(o,a,b,c)((cast(Instruction,o)<<0)|(cast(Instruction,a)<<(0+6))|(cast(Instruction,b)<<(((0+6)+8)+9))|(cast(Instruction,c)<<((0+6)+8))) +#define CREATE_ABx(o,a,bc)((cast(Instruction,o)<<0)|(cast(Instruction,a)<<(0+6))|(cast(Instruction,bc)<<((0+6)+8))) +#define ISK(x)((x)&(1<<(9-1))) +#define INDEXK(r)((int)(r)&~(1<<(9-1))) +#define RKASK(x)((x)|(1<<(9-1))) +static const lu_byte luaP_opmodes[(cast(int,OP_VARARG)+1)]; +#define getBMode(m)(cast(enum OpArgMask,(luaP_opmodes[m]>>4)&3)) +#define getCMode(m)(cast(enum OpArgMask,(luaP_opmodes[m]>>2)&3)) +#define testTMode(m)(luaP_opmodes[m]&(1<<7)) +typedef struct expdesc{ +expkind k; +union{ +struct{int info,aux;}s; +lua_Number nval; +}u; +int t; +int f; +}expdesc; +typedef struct upvaldesc{ +lu_byte k; +lu_byte info; +}upvaldesc; +struct BlockCnt; +typedef struct FuncState{ +Proto*f; +Table*h; +struct FuncState*prev; +struct LexState*ls; +struct lua_State*L; +struct BlockCnt*bl; +int pc; +int lasttarget; +int jpc; +int freereg; +int nk; +int np; +short nlocvars; +lu_byte nactvar; +upvaldesc upvalues[60]; +unsigned short actvar[200]; +}FuncState; +static Proto*luaY_parser(lua_State*L,ZIO*z,Mbuffer*buff, +const char*name); +struct lua_longjmp{ +struct lua_longjmp*previous; +jmp_buf b; +volatile int status; +}; +static void luaD_seterrorobj(lua_State*L,int errcode,StkId oldtop){ +switch(errcode){ +case 4:{ +setsvalue(L,oldtop,luaS_newliteral(L,"not enough memory")); +break; +} +case 5:{ +setsvalue(L,oldtop,luaS_newliteral(L,"error in error handling")); +break; +} +case 3: +case 2:{ +setobj(L,oldtop,L->top-1); +break; +} +} +L->top=oldtop+1; +} +static void restore_stack_limit(lua_State*L){ +if(L->size_ci>20000){ +int inuse=cast_int(L->ci-L->base_ci); +if(inuse+1<20000) +luaD_reallocCI(L,20000); +} +} +static void resetstack(lua_State*L,int status){ +L->ci=L->base_ci; +L->base=L->ci->base; +luaF_close(L,L->base); +luaD_seterrorobj(L,status,L->base); +L->nCcalls=L->baseCcalls; +L->allowhook=1; +restore_stack_limit(L); +L->errfunc=0; +L->errorJmp=NULL; +} +static void luaD_throw(lua_State*L,int errcode){ +if(L->errorJmp){ +L->errorJmp->status=errcode; +LUAI_THROW(L,L->errorJmp); +} +else{ +L->status=cast_byte(errcode); +if(G(L)->panic){ +resetstack(L,errcode); +G(L)->panic(L); +} +exit(EXIT_FAILURE); +} +} +static int luaD_rawrunprotected(lua_State*L,Pfunc f,void*ud){ +struct lua_longjmp lj; +lj.status=0; +lj.previous=L->errorJmp; +L->errorJmp=&lj; +LUAI_TRY(L,&lj, +(*f)(L,ud); +); +L->errorJmp=lj.previous; +return lj.status; +} +static void correctstack(lua_State*L,TValue*oldstack){ +CallInfo*ci; +GCObject*up; +L->top=(L->top-oldstack)+L->stack; +for(up=L->openupval;up!=NULL;up=up->gch.next) +gco2uv(up)->v=(gco2uv(up)->v-oldstack)+L->stack; +for(ci=L->base_ci;ci<=L->ci;ci++){ +ci->top=(ci->top-oldstack)+L->stack; +ci->base=(ci->base-oldstack)+L->stack; +ci->func=(ci->func-oldstack)+L->stack; +} +L->base=(L->base-oldstack)+L->stack; +} +static void luaD_reallocstack(lua_State*L,int newsize){ +TValue*oldstack=L->stack; +int realsize=newsize+1+5; +luaM_reallocvector(L,L->stack,L->stacksize,realsize,TValue); +L->stacksize=realsize; +L->stack_last=L->stack+newsize; +correctstack(L,oldstack); +} +static void luaD_reallocCI(lua_State*L,int newsize){ +CallInfo*oldci=L->base_ci; +luaM_reallocvector(L,L->base_ci,L->size_ci,newsize,CallInfo); +L->size_ci=newsize; +L->ci=(L->ci-oldci)+L->base_ci; +L->end_ci=L->base_ci+L->size_ci-1; +} +static void luaD_growstack(lua_State*L,int n){ +if(n<=L->stacksize) +luaD_reallocstack(L,2*L->stacksize); +else +luaD_reallocstack(L,L->stacksize+n); +} +static CallInfo*growCI(lua_State*L){ +if(L->size_ci>20000) +luaD_throw(L,5); +else{ +luaD_reallocCI(L,2*L->size_ci); +if(L->size_ci>20000) +luaG_runerror(L,"stack overflow"); +} +return++L->ci; +} +static StkId adjust_varargs(lua_State*L,Proto*p,int actual){ +int i; +int nfixargs=p->numparams; +Table*htab=NULL; +StkId base,fixed; +for(;actualtop++); +fixed=L->top-actual; +base=L->top; +for(i=0;itop++,fixed+i); +setnilvalue(fixed+i); +} +if(htab){ +sethvalue(L,L->top++,htab); +} +return base; +} +static StkId tryfuncTM(lua_State*L,StkId func){ +const TValue*tm=luaT_gettmbyobj(L,func,TM_CALL); +StkId p; +ptrdiff_t funcr=savestack(L,func); +if(!ttisfunction(tm)) +luaG_typeerror(L,func,"call"); +for(p=L->top;p>func;p--)setobj(L,p,p-1); +incr_top(L); +func=restorestack(L,funcr); +setobj(L,func,tm); +return func; +} +#define inc_ci(L)((L->ci==L->end_ci)?growCI(L):(condhardstacktests(luaD_reallocCI(L,L->size_ci)),++L->ci)) +static int luaD_precall(lua_State*L,StkId func,int nresults){ +LClosure*cl; +ptrdiff_t funcr; +if(!ttisfunction(func)) +func=tryfuncTM(L,func); +funcr=savestack(L,func); +cl=&clvalue(func)->l; +L->ci->savedpc=L->savedpc; +if(!cl->isC){ +CallInfo*ci; +StkId st,base; +Proto*p=cl->p; +luaD_checkstack(L,p->maxstacksize); +func=restorestack(L,funcr); +if(!p->is_vararg){ +base=func+1; +if(L->top>base+p->numparams) +L->top=base+p->numparams; +} +else{ +int nargs=cast_int(L->top-func)-1; +base=adjust_varargs(L,p,nargs); +func=restorestack(L,funcr); +} +ci=inc_ci(L); +ci->func=func; +L->base=ci->base=base; +ci->top=L->base+p->maxstacksize; +L->savedpc=p->code; +ci->tailcalls=0; +ci->nresults=nresults; +for(st=L->top;sttop;st++) +setnilvalue(st); +L->top=ci->top; +return 0; +} +else{ +CallInfo*ci; +int n; +luaD_checkstack(L,20); +ci=inc_ci(L); +ci->func=restorestack(L,funcr); +L->base=ci->base=ci->func+1; +ci->top=L->top+20; +ci->nresults=nresults; +n=(*curr_func(L)->c.f)(L); +if(n<0) +return 2; +else{ +luaD_poscall(L,L->top-n); +return 1; +} +} +} +static int luaD_poscall(lua_State*L,StkId firstResult){ +StkId res; +int wanted,i; +CallInfo*ci; +ci=L->ci--; +res=ci->func; +wanted=ci->nresults; +L->base=(ci-1)->base; +L->savedpc=(ci-1)->savedpc; +for(i=wanted;i!=0&&firstResulttop;i--) +setobj(L,res++,firstResult++); +while(i-->0) +setnilvalue(res++); +L->top=res; +return(wanted-(-1)); +} +static void luaD_call(lua_State*L,StkId func,int nResults){ +if(++L->nCcalls>=200){ +if(L->nCcalls==200) +luaG_runerror(L,"C stack overflow"); +else if(L->nCcalls>=(200+(200>>3))) +luaD_throw(L,5); +} +if(luaD_precall(L,func,nResults)==0) +luaV_execute(L,1); +L->nCcalls--; +luaC_checkGC(L); +} +static int luaD_pcall(lua_State*L,Pfunc func,void*u, +ptrdiff_t old_top,ptrdiff_t ef){ +int status; +unsigned short oldnCcalls=L->nCcalls; +ptrdiff_t old_ci=saveci(L,L->ci); +lu_byte old_allowhooks=L->allowhook; +ptrdiff_t old_errfunc=L->errfunc; +L->errfunc=ef; +status=luaD_rawrunprotected(L,func,u); +if(status!=0){ +StkId oldtop=restorestack(L,old_top); +luaF_close(L,oldtop); +luaD_seterrorobj(L,status,oldtop); +L->nCcalls=oldnCcalls; +L->ci=restoreci(L,old_ci); +L->base=L->ci->base; +L->savedpc=L->ci->savedpc; +L->allowhook=old_allowhooks; +restore_stack_limit(L); +} +L->errfunc=old_errfunc; +return status; +} +struct SParser{ +ZIO*z; +Mbuffer buff; +const char*name; +}; +static void f_parser(lua_State*L,void*ud){ +int i; +Proto*tf; +Closure*cl; +struct SParser*p=cast(struct SParser*,ud); +luaC_checkGC(L); +tf=luaY_parser(L,p->z, +&p->buff,p->name); +cl=luaF_newLclosure(L,tf->nups,hvalue(gt(L))); +cl->l.p=tf; +for(i=0;inups;i++) +cl->l.upvals[i]=luaF_newupval(L); +setclvalue(L,L->top,cl); +incr_top(L); +} +static int luaD_protectedparser(lua_State*L,ZIO*z,const char*name){ +struct SParser p; +int status; +p.z=z;p.name=name; +luaZ_initbuffer(L,&p.buff); +status=luaD_pcall(L,f_parser,&p,savestack(L,L->top),L->errfunc); +luaZ_freebuffer(L,&p.buff); +return status; +} +static void luaS_resize(lua_State*L,int newsize){ +GCObject**newhash; +stringtable*tb; +int i; +if(G(L)->gcstate==2) +return; +newhash=luaM_newvector(L,newsize,GCObject*); +tb=&G(L)->strt; +for(i=0;isize;i++){ +GCObject*p=tb->hash[i]; +while(p){ +GCObject*next=p->gch.next; +unsigned int h=gco2ts(p)->hash; +int h1=lmod(h,newsize); +p->gch.next=newhash[h1]; +newhash[h1]=p; +p=next; +} +} +luaM_freearray(L,tb->hash,tb->size,TString*); +tb->size=newsize; +tb->hash=newhash; +} +static TString*newlstr(lua_State*L,const char*str,size_t l, +unsigned int h){ +TString*ts; +stringtable*tb; +if(l+1>(((size_t)(~(size_t)0)-2)-sizeof(TString))/sizeof(char)) +luaM_toobig(L); +ts=cast(TString*,luaM_malloc(L,(l+1)*sizeof(char)+sizeof(TString))); +ts->tsv.len=l; +ts->tsv.hash=h; +ts->tsv.marked=luaC_white(G(L)); +ts->tsv.tt=4; +ts->tsv.reserved=0; +memcpy(ts+1,str,l*sizeof(char)); +((char*)(ts+1))[l]='\0'; +tb=&G(L)->strt; +h=lmod(h,tb->size); +ts->tsv.next=tb->hash[h]; +tb->hash[h]=obj2gco(ts); +tb->nuse++; +if(tb->nuse>cast(lu_int32,tb->size)&&tb->size<=(INT_MAX-2)/2) +luaS_resize(L,tb->size*2); +return ts; +} +static TString*luaS_newlstr(lua_State*L,const char*str,size_t l){ +GCObject*o; +unsigned int h=cast(unsigned int,l); +size_t step=(l>>5)+1; +size_t l1; +for(l1=l;l1>=step;l1-=step) +h=h^((h<<5)+(h>>2)+cast(unsigned char,str[l1-1])); +for(o=G(L)->strt.hash[lmod(h,G(L)->strt.size)]; +o!=NULL; +o=o->gch.next){ +TString*ts=rawgco2ts(o); +if(ts->tsv.len==l&&(memcmp(str,getstr(ts),l)==0)){ +if(isdead(G(L),o))changewhite(o); +return ts; +} +} +return newlstr(L,str,l,h); +} +static Udata*luaS_newudata(lua_State*L,size_t s,Table*e){ +Udata*u; +if(s>((size_t)(~(size_t)0)-2)-sizeof(Udata)) +luaM_toobig(L); +u=cast(Udata*,luaM_malloc(L,s+sizeof(Udata))); +u->uv.marked=luaC_white(G(L)); +u->uv.tt=7; +u->uv.len=s; +u->uv.metatable=NULL; +u->uv.env=e; +u->uv.next=G(L)->mainthread->next; +G(L)->mainthread->next=obj2gco(u); +return u; +} +#define hashpow2(t,n)(gnode(t,lmod((n),sizenode(t)))) +#define hashstr(t,str)hashpow2(t,(str)->tsv.hash) +#define hashboolean(t,p)hashpow2(t,p) +#define hashmod(t,n)(gnode(t,((n)%((sizenode(t)-1)|1)))) +#define hashpointer(t,p)hashmod(t,IntPoint(p)) +static const Node dummynode_={ +{{NULL},0}, +{{{NULL},0,NULL}} +}; +static Node*hashnum(const Table*t,lua_Number n){ +unsigned int a[cast_int(sizeof(lua_Number)/sizeof(int))]; +int i; +if(luai_numeq(n,0)) +return gnode(t,0); +memcpy(a,&n,sizeof(a)); +for(i=1;isizearray) +return i-1; +else{ +Node*n=mainposition(t,key); +do{ +if(luaO_rawequalObj(key2tval(n),key)|| +(ttype(gkey(n))==(8+3)&&iscollectable(key)&& +gcvalue(gkey(n))==gcvalue(key))){ +i=cast_int(n-gnode(t,0)); +return i+t->sizearray; +} +else n=gnext(n); +}while(n); +luaG_runerror(L,"invalid key to "LUA_QL("next")); +return 0; +} +} +static int luaH_next(lua_State*L,Table*t,StkId key){ +int i=findindex(L,t,key); +for(i++;isizearray;i++){ +if(!ttisnil(&t->array[i])){ +setnvalue(key,cast_num(i+1)); +setobj(L,key+1,&t->array[i]); +return 1; +} +} +for(i-=t->sizearray;i<(int)sizenode(t);i++){ +if(!ttisnil(gval(gnode(t,i)))){ +setobj(L,key,key2tval(gnode(t,i))); +setobj(L,key+1,gval(gnode(t,i))); +return 1; +} +} +return 0; +} +static int computesizes(int nums[],int*narray){ +int i; +int twotoi; +int a=0; +int na=0; +int n=0; +for(i=0,twotoi=1;twotoi/2<*narray;i++,twotoi*=2){ +if(nums[i]>0){ +a+=nums[i]; +if(a>twotoi/2){ +n=twotoi; +na=a; +} +} +if(a==*narray)break; +} +*narray=n; +return na; +} +static int countint(const TValue*key,int*nums){ +int k=arrayindex(key); +if(0t->sizearray){ +lim=t->sizearray; +if(i>lim) +break; +} +for(;i<=lim;i++){ +if(!ttisnil(&t->array[i-1])) +lc++; +} +nums[lg]+=lc; +ause+=lc; +} +return ause; +} +static int numusehash(const Table*t,int*nums,int*pnasize){ +int totaluse=0; +int ause=0; +int i=sizenode(t); +while(i--){ +Node*n=&t->node[i]; +if(!ttisnil(gval(n))){ +ause+=countint(key2tval(n),nums); +totaluse++; +} +} +*pnasize+=ause; +return totaluse; +} +static void setarrayvector(lua_State*L,Table*t,int size){ +int i; +luaM_reallocvector(L,t->array,t->sizearray,size,TValue); +for(i=t->sizearray;iarray[i]); +t->sizearray=size; +} +static void setnodevector(lua_State*L,Table*t,int size){ +int lsize; +if(size==0){ +t->node=cast(Node*,(&dummynode_)); +lsize=0; +} +else{ +int i; +lsize=ceillog2(size); +if(lsize>(32-2)) +luaG_runerror(L,"table overflow"); +size=twoto(lsize); +t->node=luaM_newvector(L,size,Node); +for(i=0;ilsizenode=cast_byte(lsize); +t->lastfree=gnode(t,size); +} +static void resize(lua_State*L,Table*t,int nasize,int nhsize){ +int i; +int oldasize=t->sizearray; +int oldhsize=t->lsizenode; +Node*nold=t->node; +if(nasize>oldasize) +setarrayvector(L,t,nasize); +setnodevector(L,t,nhsize); +if(nasizesizearray=nasize; +for(i=nasize;iarray[i])) +setobj(L,luaH_setnum(L,t,i+1),&t->array[i]); +} +luaM_reallocvector(L,t->array,oldasize,nasize,TValue); +} +for(i=twoto(oldhsize)-1;i>=0;i--){ +Node*old=nold+i; +if(!ttisnil(gval(old))) +setobj(L,luaH_set(L,t,key2tval(old)),gval(old)); +} +if(nold!=(&dummynode_)) +luaM_freearray(L,nold,twoto(oldhsize),Node); +} +static void luaH_resizearray(lua_State*L,Table*t,int nasize){ +int nsize=(t->node==(&dummynode_))?0:sizenode(t); +resize(L,t,nasize,nsize); +} +static void rehash(lua_State*L,Table*t,const TValue*ek){ +int nasize,na; +int nums[(32-2)+1]; +int i; +int totaluse; +for(i=0;i<=(32-2);i++)nums[i]=0; +nasize=numusearray(t,nums); +totaluse=nasize; +totaluse+=numusehash(t,nums,&nasize); +nasize+=countint(ek,nums); +totaluse++; +na=computesizes(nums,&nasize); +resize(L,t,nasize,totaluse-na); +} +static Table*luaH_new(lua_State*L,int narray,int nhash){ +Table*t=luaM_new(L,Table); +luaC_link(L,obj2gco(t),5); +t->metatable=NULL; +t->flags=cast_byte(~0); +t->array=NULL; +t->sizearray=0; +t->lsizenode=0; +t->node=cast(Node*,(&dummynode_)); +setarrayvector(L,t,narray); +setnodevector(L,t,nhash); +return t; +} +static void luaH_free(lua_State*L,Table*t){ +if(t->node!=(&dummynode_)) +luaM_freearray(L,t->node,sizenode(t),Node); +luaM_freearray(L,t->array,t->sizearray,TValue); +luaM_free(L,t); +} +static Node*getfreepos(Table*t){ +while(t->lastfree-->t->node){ +if(ttisnil(gkey(t->lastfree))) +return t->lastfree; +} +return NULL; +} +static TValue*newkey(lua_State*L,Table*t,const TValue*key){ +Node*mp=mainposition(t,key); +if(!ttisnil(gval(mp))||mp==(&dummynode_)){ +Node*othern; +Node*n=getfreepos(t); +if(n==NULL){ +rehash(L,t,key); +return luaH_set(L,t,key); +} +othern=mainposition(t,key2tval(mp)); +if(othern!=mp){ +while(gnext(othern)!=mp)othern=gnext(othern); +gnext(othern)=n; +*n=*mp; +gnext(mp)=NULL; +setnilvalue(gval(mp)); +} +else{ +gnext(n)=gnext(mp); +gnext(mp)=n; +mp=n; +} +} +gkey(mp)->value=key->value;gkey(mp)->tt=key->tt; +luaC_barriert(L,t,key); +return gval(mp); +} +static const TValue*luaH_getnum(Table*t,int key){ +if(cast(unsigned int,key)-1sizearray)) +return&t->array[key-1]; +else{ +lua_Number nk=cast_num(key); +Node*n=hashnum(t,nk); +do{ +if(ttisnumber(gkey(n))&&luai_numeq(nvalue(gkey(n)),nk)) +return gval(n); +else n=gnext(n); +}while(n); +return(&luaO_nilobject_); +} +} +static const TValue*luaH_getstr(Table*t,TString*key){ +Node*n=hashstr(t,key); +do{ +if(ttisstring(gkey(n))&&rawtsvalue(gkey(n))==key) +return gval(n); +else n=gnext(n); +}while(n); +return(&luaO_nilobject_); +} +static const TValue*luaH_get(Table*t,const TValue*key){ +switch(ttype(key)){ +case 0:return(&luaO_nilobject_); +case 4:return luaH_getstr(t,rawtsvalue(key)); +case 3:{ +int k; +lua_Number n=nvalue(key); +lua_number2int(k,n); +if(luai_numeq(cast_num(k),nvalue(key))) +return luaH_getnum(t,k); +} +default:{ +Node*n=mainposition(t,key); +do{ +if(luaO_rawequalObj(key2tval(n),key)) +return gval(n); +else n=gnext(n); +}while(n); +return(&luaO_nilobject_); +} +} +} +static TValue*luaH_set(lua_State*L,Table*t,const TValue*key){ +const TValue*p=luaH_get(t,key); +t->flags=0; +if(p!=(&luaO_nilobject_)) +return cast(TValue*,p); +else{ +if(ttisnil(key))luaG_runerror(L,"table index is nil"); +else if(ttisnumber(key)&&luai_numisnan(nvalue(key))) +luaG_runerror(L,"table index is NaN"); +return newkey(L,t,key); +} +} +static TValue*luaH_setnum(lua_State*L,Table*t,int key){ +const TValue*p=luaH_getnum(t,key); +if(p!=(&luaO_nilobject_)) +return cast(TValue*,p); +else{ +TValue k; +setnvalue(&k,cast_num(key)); +return newkey(L,t,&k); +} +} +static TValue*luaH_setstr(lua_State*L,Table*t,TString*key){ +const TValue*p=luaH_getstr(t,key); +if(p!=(&luaO_nilobject_)) +return cast(TValue*,p); +else{ +TValue k; +setsvalue(L,&k,key); +return newkey(L,t,&k); +} +} +static int unbound_search(Table*t,unsigned int j){ +unsigned int i=j; +j++; +while(!ttisnil(luaH_getnum(t,j))){ +i=j; +j*=2; +if(j>cast(unsigned int,(INT_MAX-2))){ +i=1; +while(!ttisnil(luaH_getnum(t,i)))i++; +return i-1; +} +} +while(j-i>1){ +unsigned int m=(i+j)/2; +if(ttisnil(luaH_getnum(t,m)))j=m; +else i=m; +} +return i; +} +static int luaH_getn(Table*t){ +unsigned int j=t->sizearray; +if(j>0&&ttisnil(&t->array[j-1])){ +unsigned int i=0; +while(j-i>1){ +unsigned int m=(i+j)/2; +if(ttisnil(&t->array[m-1]))j=m; +else i=m; +} +return i; +} +else if(t->node==(&dummynode_)) +return j; +else return unbound_search(t,j); +} +#define makewhite(g,x)((x)->gch.marked=cast_byte(((x)->gch.marked&cast_byte(~(bitmask(2)|bit2mask(0,1))))|luaC_white(g))) +#define white2gray(x)reset2bits((x)->gch.marked,0,1) +#define black2gray(x)resetbit((x)->gch.marked,2) +#define stringmark(s)reset2bits((s)->tsv.marked,0,1) +#define isfinalized(u)testbit((u)->marked,3) +#define markfinalized(u)l_setbit((u)->marked,3) +#define markvalue(g,o){checkconsistency(o);if(iscollectable(o)&&iswhite(gcvalue(o)))reallymarkobject(g,gcvalue(o));} +#define markobject(g,t){if(iswhite(obj2gco(t)))reallymarkobject(g,obj2gco(t));} +#define setthreshold(g)(g->GCthreshold=(g->estimate/100)*g->gcpause) +static void removeentry(Node*n){ +if(iscollectable(gkey(n))) +setttype(gkey(n),(8+3)); +} +static void reallymarkobject(global_State*g,GCObject*o){ +white2gray(o); +switch(o->gch.tt){ +case 4:{ +return; +} +case 7:{ +Table*mt=gco2u(o)->metatable; +gray2black(o); +if(mt)markobject(g,mt); +markobject(g,gco2u(o)->env); +return; +} +case(8+2):{ +UpVal*uv=gco2uv(o); +markvalue(g,uv->v); +if(uv->v==&uv->u.value) +gray2black(o); +return; +} +case 6:{ +gco2cl(o)->c.gclist=g->gray; +g->gray=o; +break; +} +case 5:{ +gco2h(o)->gclist=g->gray; +g->gray=o; +break; +} +case 8:{ +gco2th(o)->gclist=g->gray; +g->gray=o; +break; +} +case(8+1):{ +gco2p(o)->gclist=g->gray; +g->gray=o; +break; +} +default:; +} +} +static void marktmu(global_State*g){ +GCObject*u=g->tmudata; +if(u){ +do{ +u=u->gch.next; +makewhite(g,u); +reallymarkobject(g,u); +}while(u!=g->tmudata); +} +} +static size_t luaC_separateudata(lua_State*L,int all){ +global_State*g=G(L); +size_t deadmem=0; +GCObject**p=&g->mainthread->next; +GCObject*curr; +while((curr=*p)!=NULL){ +if(!(iswhite(curr)||all)||isfinalized(gco2u(curr))) +p=&curr->gch.next; +else if(fasttm(L,gco2u(curr)->metatable,TM_GC)==NULL){ +markfinalized(gco2u(curr)); +p=&curr->gch.next; +} +else{ +deadmem+=sizeudata(gco2u(curr)); +markfinalized(gco2u(curr)); +*p=curr->gch.next; +if(g->tmudata==NULL) +g->tmudata=curr->gch.next=curr; +else{ +curr->gch.next=g->tmudata->gch.next; +g->tmudata->gch.next=curr; +g->tmudata=curr; +} +} +} +return deadmem; +} +static int traversetable(global_State*g,Table*h){ +int i; +int weakkey=0; +int weakvalue=0; +const TValue*mode; +if(h->metatable) +markobject(g,h->metatable); +mode=gfasttm(g,h->metatable,TM_MODE); +if(mode&&ttisstring(mode)){ +weakkey=(strchr(svalue(mode),'k')!=NULL); +weakvalue=(strchr(svalue(mode),'v')!=NULL); +if(weakkey||weakvalue){ +h->marked&=~(bitmask(3)|bitmask(4)); +h->marked|=cast_byte((weakkey<<3)| +(weakvalue<<4)); +h->gclist=g->weak; +g->weak=obj2gco(h); +} +} +if(weakkey&&weakvalue)return 1; +if(!weakvalue){ +i=h->sizearray; +while(i--) +markvalue(g,&h->array[i]); +} +i=sizenode(h); +while(i--){ +Node*n=gnode(h,i); +if(ttisnil(gval(n))) +removeentry(n); +else{ +if(!weakkey)markvalue(g,gkey(n)); +if(!weakvalue)markvalue(g,gval(n)); +} +} +return weakkey||weakvalue; +} +static void traverseproto(global_State*g,Proto*f){ +int i; +if(f->source)stringmark(f->source); +for(i=0;isizek;i++) +markvalue(g,&f->k[i]); +for(i=0;isizeupvalues;i++){ +if(f->upvalues[i]) +stringmark(f->upvalues[i]); +} +for(i=0;isizep;i++){ +if(f->p[i]) +markobject(g,f->p[i]); +} +for(i=0;isizelocvars;i++){ +if(f->locvars[i].varname) +stringmark(f->locvars[i].varname); +} +} +static void traverseclosure(global_State*g,Closure*cl){ +markobject(g,cl->c.env); +if(cl->c.isC){ +int i; +for(i=0;ic.nupvalues;i++) +markvalue(g,&cl->c.upvalue[i]); +} +else{ +int i; +markobject(g,cl->l.p); +for(i=0;il.nupvalues;i++) +markobject(g,cl->l.upvals[i]); +} +} +static void checkstacksizes(lua_State*L,StkId max){ +int ci_used=cast_int(L->ci-L->base_ci); +int s_used=cast_int(max-L->stack); +if(L->size_ci>20000) +return; +if(4*ci_usedsize_ci&&2*8size_ci) +luaD_reallocCI(L,L->size_ci/2); +condhardstacktests(luaD_reallocCI(L,ci_used+1)); +if(4*s_usedstacksize&& +2*((2*20)+5)stacksize) +luaD_reallocstack(L,L->stacksize/2); +condhardstacktests(luaD_reallocstack(L,s_used)); +} +static void traversestack(global_State*g,lua_State*l){ +StkId o,lim; +CallInfo*ci; +markvalue(g,gt(l)); +lim=l->top; +for(ci=l->base_ci;ci<=l->ci;ci++){ +if(limtop)lim=ci->top; +} +for(o=l->stack;otop;o++) +markvalue(g,o); +for(;o<=lim;o++) +setnilvalue(o); +checkstacksizes(l,lim); +} +static l_mem propagatemark(global_State*g){ +GCObject*o=g->gray; +gray2black(o); +switch(o->gch.tt){ +case 5:{ +Table*h=gco2h(o); +g->gray=h->gclist; +if(traversetable(g,h)) +black2gray(o); +return sizeof(Table)+sizeof(TValue)*h->sizearray+ +sizeof(Node)*sizenode(h); +} +case 6:{ +Closure*cl=gco2cl(o); +g->gray=cl->c.gclist; +traverseclosure(g,cl); +return(cl->c.isC)?sizeCclosure(cl->c.nupvalues): +sizeLclosure(cl->l.nupvalues); +} +case 8:{ +lua_State*th=gco2th(o); +g->gray=th->gclist; +th->gclist=g->grayagain; +g->grayagain=o; +black2gray(o); +traversestack(g,th); +return sizeof(lua_State)+sizeof(TValue)*th->stacksize+ +sizeof(CallInfo)*th->size_ci; +} +case(8+1):{ +Proto*p=gco2p(o); +g->gray=p->gclist; +traverseproto(g,p); +return sizeof(Proto)+sizeof(Instruction)*p->sizecode+ +sizeof(Proto*)*p->sizep+ +sizeof(TValue)*p->sizek+ +sizeof(int)*p->sizelineinfo+ +sizeof(LocVar)*p->sizelocvars+ +sizeof(TString*)*p->sizeupvalues; +} +default:return 0; +} +} +static size_t propagateall(global_State*g){ +size_t m=0; +while(g->gray)m+=propagatemark(g); +return m; +} +static int iscleared(const TValue*o,int iskey){ +if(!iscollectable(o))return 0; +if(ttisstring(o)){ +stringmark(rawtsvalue(o)); +return 0; +} +return iswhite(gcvalue(o))|| +(ttisuserdata(o)&&(!iskey&&isfinalized(uvalue(o)))); +} +static void cleartable(GCObject*l){ +while(l){ +Table*h=gco2h(l); +int i=h->sizearray; +if(testbit(h->marked,4)){ +while(i--){ +TValue*o=&h->array[i]; +if(iscleared(o,0)) +setnilvalue(o); +} +} +i=sizenode(h); +while(i--){ +Node*n=gnode(h,i); +if(!ttisnil(gval(n))&& +(iscleared(key2tval(n),1)||iscleared(gval(n),0))){ +setnilvalue(gval(n)); +removeentry(n); +} +} +l=h->gclist; +} +} +static void freeobj(lua_State*L,GCObject*o){ +switch(o->gch.tt){ +case(8+1):luaF_freeproto(L,gco2p(o));break; +case 6:luaF_freeclosure(L,gco2cl(o));break; +case(8+2):luaF_freeupval(L,gco2uv(o));break; +case 5:luaH_free(L,gco2h(o));break; +case 8:{ +luaE_freethread(L,gco2th(o)); +break; +} +case 4:{ +G(L)->strt.nuse--; +luaM_freemem(L,o,sizestring(gco2ts(o))); +break; +} +case 7:{ +luaM_freemem(L,o,sizeudata(gco2u(o))); +break; +} +default:; +} +} +#define sweepwholelist(L,p)sweeplist(L,p,((lu_mem)(~(lu_mem)0)-2)) +static GCObject**sweeplist(lua_State*L,GCObject**p,lu_mem count){ +GCObject*curr; +global_State*g=G(L); +int deadmask=otherwhite(g); +while((curr=*p)!=NULL&&count-->0){ +if(curr->gch.tt==8) +sweepwholelist(L,&gco2th(curr)->openupval); +if((curr->gch.marked^bit2mask(0,1))&deadmask){ +makewhite(g,curr); +p=&curr->gch.next; +} +else{ +*p=curr->gch.next; +if(curr==g->rootgc) +g->rootgc=curr->gch.next; +freeobj(L,curr); +} +} +return p; +} +static void checkSizes(lua_State*L){ +global_State*g=G(L); +if(g->strt.nusestrt.size/4)&& +g->strt.size>32*2) +luaS_resize(L,g->strt.size/2); +if(luaZ_sizebuffer(&g->buff)>32*2){ +size_t newsize=luaZ_sizebuffer(&g->buff)/2; +luaZ_resizebuffer(L,&g->buff,newsize); +} +} +static void GCTM(lua_State*L){ +global_State*g=G(L); +GCObject*o=g->tmudata->gch.next; +Udata*udata=rawgco2u(o); +const TValue*tm; +if(o==g->tmudata) +g->tmudata=NULL; +else +g->tmudata->gch.next=udata->uv.next; +udata->uv.next=g->mainthread->next; +g->mainthread->next=o; +makewhite(g,o); +tm=fasttm(L,udata->uv.metatable,TM_GC); +if(tm!=NULL){ +lu_byte oldah=L->allowhook; +lu_mem oldt=g->GCthreshold; +L->allowhook=0; +g->GCthreshold=2*g->totalbytes; +setobj(L,L->top,tm); +setuvalue(L,L->top+1,udata); +L->top+=2; +luaD_call(L,L->top-2,0); +L->allowhook=oldah; +g->GCthreshold=oldt; +} +} +static void luaC_callGCTM(lua_State*L){ +while(G(L)->tmudata) +GCTM(L); +} +static void luaC_freeall(lua_State*L){ +global_State*g=G(L); +int i; +g->currentwhite=bit2mask(0,1)|bitmask(6); +sweepwholelist(L,&g->rootgc); +for(i=0;istrt.size;i++) +sweepwholelist(L,&g->strt.hash[i]); +} +static void markmt(global_State*g){ +int i; +for(i=0;i<(8+1);i++) +if(g->mt[i])markobject(g,g->mt[i]); +} +static void markroot(lua_State*L){ +global_State*g=G(L); +g->gray=NULL; +g->grayagain=NULL; +g->weak=NULL; +markobject(g,g->mainthread); +markvalue(g,gt(g->mainthread)); +markvalue(g,registry(L)); +markmt(g); +g->gcstate=1; +} +static void remarkupvals(global_State*g){ +UpVal*uv; +for(uv=g->uvhead.u.l.next;uv!=&g->uvhead;uv=uv->u.l.next){ +if(isgray(obj2gco(uv))) +markvalue(g,uv->v); +} +} +static void atomic(lua_State*L){ +global_State*g=G(L); +size_t udsize; +remarkupvals(g); +propagateall(g); +g->gray=g->weak; +g->weak=NULL; +markobject(g,L); +markmt(g); +propagateall(g); +g->gray=g->grayagain; +g->grayagain=NULL; +propagateall(g); +udsize=luaC_separateudata(L,0); +marktmu(g); +udsize+=propagateall(g); +cleartable(g->weak); +g->currentwhite=cast_byte(otherwhite(g)); +g->sweepstrgc=0; +g->sweepgc=&g->rootgc; +g->gcstate=2; +g->estimate=g->totalbytes-udsize; +} +static l_mem singlestep(lua_State*L){ +global_State*g=G(L); +switch(g->gcstate){ +case 0:{ +markroot(L); +return 0; +} +case 1:{ +if(g->gray) +return propagatemark(g); +else{ +atomic(L); +return 0; +} +} +case 2:{ +lu_mem old=g->totalbytes; +sweepwholelist(L,&g->strt.hash[g->sweepstrgc++]); +if(g->sweepstrgc>=g->strt.size) +g->gcstate=3; +g->estimate-=old-g->totalbytes; +return 10; +} +case 3:{ +lu_mem old=g->totalbytes; +g->sweepgc=sweeplist(L,g->sweepgc,40); +if(*g->sweepgc==NULL){ +checkSizes(L); +g->gcstate=4; +} +g->estimate-=old-g->totalbytes; +return 40*10; +} +case 4:{ +if(g->tmudata){ +GCTM(L); +if(g->estimate>100) +g->estimate-=100; +return 100; +} +else{ +g->gcstate=0; +g->gcdept=0; +return 0; +} +} +default:return 0; +} +} +static void luaC_step(lua_State*L){ +global_State*g=G(L); +l_mem lim=(1024u/100)*g->gcstepmul; +if(lim==0) +lim=(((lu_mem)(~(lu_mem)0)-2)-1)/2; +g->gcdept+=g->totalbytes-g->GCthreshold; +do{ +lim-=singlestep(L); +if(g->gcstate==0) +break; +}while(lim>0); +if(g->gcstate!=0){ +if(g->gcdept<1024u) +g->GCthreshold=g->totalbytes+1024u; +else{ +g->gcdept-=1024u; +g->GCthreshold=g->totalbytes; +} +} +else{ +setthreshold(g); +} +} +static void luaC_barrierf(lua_State*L,GCObject*o,GCObject*v){ +global_State*g=G(L); +if(g->gcstate==1) +reallymarkobject(g,v); +else +makewhite(g,o); +} +static void luaC_barrierback(lua_State*L,Table*t){ +global_State*g=G(L); +GCObject*o=obj2gco(t); +black2gray(o); +t->gclist=g->grayagain; +g->grayagain=o; +} +static void luaC_link(lua_State*L,GCObject*o,lu_byte tt){ +global_State*g=G(L); +o->gch.next=g->rootgc; +g->rootgc=o; +o->gch.marked=luaC_white(g); +o->gch.tt=tt; +} +static void luaC_linkupval(lua_State*L,UpVal*uv){ +global_State*g=G(L); +GCObject*o=obj2gco(uv); +o->gch.next=g->rootgc; +g->rootgc=o; +if(isgray(o)){ +if(g->gcstate==1){ +gray2black(o); +luaC_barrier(L,uv,uv->v); +} +else{ +makewhite(g,o); +} +} +} +typedef union{ +lua_Number r; +TString*ts; +}SemInfo; +typedef struct Token{ +int token; +SemInfo seminfo; +}Token; +typedef struct LexState{ +int current; +int linenumber; +int lastline; +Token t; +Token lookahead; +struct FuncState*fs; +struct lua_State*L; +ZIO*z; +Mbuffer*buff; +TString*source; +char decpoint; +}LexState; +static void luaX_init(lua_State*L); +static void luaX_lexerror(LexState*ls,const char*msg,int token); +#define state_size(x)(sizeof(x)+0) +#define fromstate(l)(cast(lu_byte*,(l))-0) +#define tostate(l)(cast(lua_State*,cast(lu_byte*,l)+0)) +typedef struct LG{ +lua_State l; +global_State g; +}LG; +static void stack_init(lua_State*L1,lua_State*L){ +L1->base_ci=luaM_newvector(L,8,CallInfo); +L1->ci=L1->base_ci; +L1->size_ci=8; +L1->end_ci=L1->base_ci+L1->size_ci-1; +L1->stack=luaM_newvector(L,(2*20)+5,TValue); +L1->stacksize=(2*20)+5; +L1->top=L1->stack; +L1->stack_last=L1->stack+(L1->stacksize-5)-1; +L1->ci->func=L1->top; +setnilvalue(L1->top++); +L1->base=L1->ci->base=L1->top; +L1->ci->top=L1->top+20; +} +static void freestack(lua_State*L,lua_State*L1){ +luaM_freearray(L,L1->base_ci,L1->size_ci,CallInfo); +luaM_freearray(L,L1->stack,L1->stacksize,TValue); +} +static void f_luaopen(lua_State*L,void*ud){ +global_State*g=G(L); +UNUSED(ud); +stack_init(L,L); +sethvalue(L,gt(L),luaH_new(L,0,2)); +sethvalue(L,registry(L),luaH_new(L,0,2)); +luaS_resize(L,32); +luaT_init(L); +luaX_init(L); +luaS_fix(luaS_newliteral(L,"not enough memory")); +g->GCthreshold=4*g->totalbytes; +} +static void preinit_state(lua_State*L,global_State*g){ +G(L)=g; +L->stack=NULL; +L->stacksize=0; +L->errorJmp=NULL; +L->hook=NULL; +L->hookmask=0; +L->basehookcount=0; +L->allowhook=1; +resethookcount(L); +L->openupval=NULL; +L->size_ci=0; +L->nCcalls=L->baseCcalls=0; +L->status=0; +L->base_ci=L->ci=NULL; +L->savedpc=NULL; +L->errfunc=0; +setnilvalue(gt(L)); +} +static void close_state(lua_State*L){ +global_State*g=G(L); +luaF_close(L,L->stack); +luaC_freeall(L); +luaM_freearray(L,G(L)->strt.hash,G(L)->strt.size,TString*); +luaZ_freebuffer(L,&g->buff); +freestack(L,L); +(*g->frealloc)(g->ud,fromstate(L),state_size(LG),0); +} +static void luaE_freethread(lua_State*L,lua_State*L1){ +luaF_close(L1,L1->stack); +freestack(L,L1); +luaM_freemem(L,fromstate(L1),state_size(lua_State)); +} +static lua_State*lua_newstate(lua_Alloc f,void*ud){ +int i; +lua_State*L; +global_State*g; +void*l=(*f)(ud,NULL,0,state_size(LG)); +if(l==NULL)return NULL; +L=tostate(l); +g=&((LG*)L)->g; +L->next=NULL; +L->tt=8; +g->currentwhite=bit2mask(0,5); +L->marked=luaC_white(g); +set2bits(L->marked,5,6); +preinit_state(L,g); +g->frealloc=f; +g->ud=ud; +g->mainthread=L; +g->uvhead.u.l.prev=&g->uvhead; +g->uvhead.u.l.next=&g->uvhead; +g->GCthreshold=0; +g->strt.size=0; +g->strt.nuse=0; +g->strt.hash=NULL; +setnilvalue(registry(L)); +luaZ_initbuffer(L,&g->buff); +g->panic=NULL; +g->gcstate=0; +g->rootgc=obj2gco(L); +g->sweepstrgc=0; +g->sweepgc=&g->rootgc; +g->gray=NULL; +g->grayagain=NULL; +g->weak=NULL; +g->tmudata=NULL; +g->totalbytes=sizeof(LG); +g->gcpause=200; +g->gcstepmul=200; +g->gcdept=0; +for(i=0;i<(8+1);i++)g->mt[i]=NULL; +if(luaD_rawrunprotected(L,f_luaopen,NULL)!=0){ +close_state(L); +L=NULL; +} +else +{} +return L; +} +static void callallgcTM(lua_State*L,void*ud){ +UNUSED(ud); +luaC_callGCTM(L); +} +static void lua_close(lua_State*L){ +L=G(L)->mainthread; +luaF_close(L,L->stack); +luaC_separateudata(L,1); +L->errfunc=0; +do{ +L->ci=L->base_ci; +L->base=L->top=L->ci->base; +L->nCcalls=L->baseCcalls=0; +}while(luaD_rawrunprotected(L,callallgcTM,NULL)!=0); +close_state(L); +} +#define getcode(fs,e)((fs)->f->code[(e)->u.s.info]) +#define luaK_codeAsBx(fs,o,A,sBx)luaK_codeABx(fs,o,A,(sBx)+(((1<<(9+9))-1)>>1)) +#define luaK_setmultret(fs,e)luaK_setreturns(fs,e,(-1)) +static int luaK_codeABx(FuncState*fs,OpCode o,int A,unsigned int Bx); +static int luaK_codeABC(FuncState*fs,OpCode o,int A,int B,int C); +static void luaK_setreturns(FuncState*fs,expdesc*e,int nresults); +static void luaK_patchtohere(FuncState*fs,int list); +static void luaK_concat(FuncState*fs,int*l1,int l2); +static int currentpc(lua_State*L,CallInfo*ci){ +if(!isLua(ci))return-1; +if(ci==L->ci) +ci->savedpc=L->savedpc; +return pcRel(ci->savedpc,ci_func(ci)->l.p); +} +static int currentline(lua_State*L,CallInfo*ci){ +int pc=currentpc(L,ci); +if(pc<0) +return-1; +else +return getline_(ci_func(ci)->l.p,pc); +} +static int lua_getstack(lua_State*L,int level,lua_Debug*ar){ +int status; +CallInfo*ci; +for(ci=L->ci;level>0&&ci>L->base_ci;ci--){ +level--; +if(f_isLua(ci)) +level-=ci->tailcalls; +} +if(level==0&&ci>L->base_ci){ +status=1; +ar->i_ci=cast_int(ci-L->base_ci); +} +else if(level<0){ +status=1; +ar->i_ci=0; +} +else status=0; +return status; +} +static Proto*getluaproto(CallInfo*ci){ +return(isLua(ci)?ci_func(ci)->l.p:NULL); +} +static void funcinfo(lua_Debug*ar,Closure*cl){ +if(cl->c.isC){ +ar->source="=[C]"; +ar->linedefined=-1; +ar->lastlinedefined=-1; +ar->what="C"; +} +else{ +ar->source=getstr(cl->l.p->source); +ar->linedefined=cl->l.p->linedefined; +ar->lastlinedefined=cl->l.p->lastlinedefined; +ar->what=(ar->linedefined==0)?"main":"Lua"; +} +luaO_chunkid(ar->short_src,ar->source,60); +} +static void info_tailcall(lua_Debug*ar){ +ar->name=ar->namewhat=""; +ar->what="tail"; +ar->lastlinedefined=ar->linedefined=ar->currentline=-1; +ar->source="=(tail call)"; +luaO_chunkid(ar->short_src,ar->source,60); +ar->nups=0; +} +static void collectvalidlines(lua_State*L,Closure*f){ +if(f==NULL||f->c.isC){ +setnilvalue(L->top); +} +else{ +Table*t=luaH_new(L,0,0); +int*lineinfo=f->l.p->lineinfo; +int i; +for(i=0;il.p->sizelineinfo;i++) +setbvalue(luaH_setnum(L,t,lineinfo[i]),1); +sethvalue(L,L->top,t); +} +incr_top(L); +} +static int auxgetinfo(lua_State*L,const char*what,lua_Debug*ar, +Closure*f,CallInfo*ci){ +int status=1; +if(f==NULL){ +info_tailcall(ar); +return status; +} +for(;*what;what++){ +switch(*what){ +case'S':{ +funcinfo(ar,f); +break; +} +case'l':{ +ar->currentline=(ci)?currentline(L,ci):-1; +break; +} +case'u':{ +ar->nups=f->c.nupvalues; +break; +} +case'n':{ +ar->namewhat=(ci)?NULL:NULL; +if(ar->namewhat==NULL){ +ar->namewhat=""; +ar->name=NULL; +} +break; +} +case'L': +case'f': +break; +default:status=0; +} +} +return status; +} +static int lua_getinfo(lua_State*L,const char*what,lua_Debug*ar){ +int status; +Closure*f=NULL; +CallInfo*ci=NULL; +if(*what=='>'){ +StkId func=L->top-1; +luai_apicheck(L,ttisfunction(func)); +what++; +f=clvalue(func); +L->top--; +} +else if(ar->i_ci!=0){ +ci=L->base_ci+ar->i_ci; +f=clvalue(ci->func); +} +status=auxgetinfo(L,what,ar,f,ci); +if(strchr(what,'f')){ +if(f==NULL)setnilvalue(L->top); +else setclvalue(L,L->top,f); +incr_top(L); +} +if(strchr(what,'L')) +collectvalidlines(L,f); +return status; +} +static int isinstack(CallInfo*ci,const TValue*o){ +StkId p; +for(p=ci->base;ptop;p++) +if(o==p)return 1; +return 0; +} +static void luaG_typeerror(lua_State*L,const TValue*o,const char*op){ +const char*name=NULL; +const char*t=luaT_typenames[ttype(o)]; +const char*kind=(isinstack(L->ci,o))? +NULL: +NULL; +if(kind) +luaG_runerror(L,"attempt to %s %s "LUA_QL("%s")" (a %s value)", +op,kind,name,t); +else +luaG_runerror(L,"attempt to %s a %s value",op,t); +} +static void luaG_concaterror(lua_State*L,StkId p1,StkId p2){ +if(ttisstring(p1)||ttisnumber(p1))p1=p2; +luaG_typeerror(L,p1,"concatenate"); +} +static void luaG_aritherror(lua_State*L,const TValue*p1,const TValue*p2){ +TValue temp; +if(luaV_tonumber(p1,&temp)==NULL) +p2=p1; +luaG_typeerror(L,p2,"perform arithmetic on"); +} +static int luaG_ordererror(lua_State*L,const TValue*p1,const TValue*p2){ +const char*t1=luaT_typenames[ttype(p1)]; +const char*t2=luaT_typenames[ttype(p2)]; +if(t1[2]==t2[2]) +luaG_runerror(L,"attempt to compare two %s values",t1); +else +luaG_runerror(L,"attempt to compare %s with %s",t1,t2); +return 0; +} +static void addinfo(lua_State*L,const char*msg){ +CallInfo*ci=L->ci; +if(isLua(ci)){ +char buff[60]; +int line=currentline(L,ci); +luaO_chunkid(buff,getstr(getluaproto(ci)->source),60); +luaO_pushfstring(L,"%s:%d: %s",buff,line,msg); +} +} +static void luaG_errormsg(lua_State*L){ +if(L->errfunc!=0){ +StkId errfunc=restorestack(L,L->errfunc); +if(!ttisfunction(errfunc))luaD_throw(L,5); +setobj(L,L->top,L->top-1); +setobj(L,L->top-1,errfunc); +incr_top(L); +luaD_call(L,L->top-2,1); +} +luaD_throw(L,2); +} +static void luaG_runerror(lua_State*L,const char*fmt,...){ +va_list argp; +va_start(argp,fmt); +addinfo(L,luaO_pushvfstring(L,fmt,argp)); +va_end(argp); +luaG_errormsg(L); +} +static int luaZ_fill(ZIO*z){ +size_t size; +lua_State*L=z->L; +const char*buff; +buff=z->reader(L,z->data,&size); +if(buff==NULL||size==0)return(-1); +z->n=size-1; +z->p=buff; +return char2int(*(z->p++)); +} +static void luaZ_init(lua_State*L,ZIO*z,lua_Reader reader,void*data){ +z->L=L; +z->reader=reader; +z->data=data; +z->n=0; +z->p=NULL; +} +static char*luaZ_openspace(lua_State*L,Mbuffer*buff,size_t n){ +if(n>buff->buffsize){ +if(n<32)n=32; +luaZ_resizebuffer(L,buff,n); +} +return buff->buffer; +} +#define opmode(t,a,b,c,m)(((t)<<7)|((a)<<6)|((b)<<4)|((c)<<2)|(m)) +static const lu_byte luaP_opmodes[(cast(int,OP_VARARG)+1)]={ +opmode(0,1,OpArgR,OpArgN,iABC) +,opmode(0,1,OpArgK,OpArgN,iABx) +,opmode(0,1,OpArgU,OpArgU,iABC) +,opmode(0,1,OpArgR,OpArgN,iABC) +,opmode(0,1,OpArgU,OpArgN,iABC) +,opmode(0,1,OpArgK,OpArgN,iABx) +,opmode(0,1,OpArgR,OpArgK,iABC) +,opmode(0,0,OpArgK,OpArgN,iABx) +,opmode(0,0,OpArgU,OpArgN,iABC) +,opmode(0,0,OpArgK,OpArgK,iABC) +,opmode(0,1,OpArgU,OpArgU,iABC) +,opmode(0,1,OpArgR,OpArgK,iABC) +,opmode(0,1,OpArgK,OpArgK,iABC) +,opmode(0,1,OpArgK,OpArgK,iABC) +,opmode(0,1,OpArgK,OpArgK,iABC) +,opmode(0,1,OpArgK,OpArgK,iABC) +,opmode(0,1,OpArgK,OpArgK,iABC) +,opmode(0,1,OpArgK,OpArgK,iABC) +,opmode(0,1,OpArgR,OpArgN,iABC) +,opmode(0,1,OpArgR,OpArgN,iABC) +,opmode(0,1,OpArgR,OpArgN,iABC) +,opmode(0,1,OpArgR,OpArgR,iABC) +,opmode(0,0,OpArgR,OpArgN,iAsBx) +,opmode(1,0,OpArgK,OpArgK,iABC) +,opmode(1,0,OpArgK,OpArgK,iABC) +,opmode(1,0,OpArgK,OpArgK,iABC) +,opmode(1,1,OpArgR,OpArgU,iABC) +,opmode(1,1,OpArgR,OpArgU,iABC) +,opmode(0,1,OpArgU,OpArgU,iABC) +,opmode(0,1,OpArgU,OpArgU,iABC) +,opmode(0,0,OpArgU,OpArgN,iABC) +,opmode(0,1,OpArgR,OpArgN,iAsBx) +,opmode(0,1,OpArgR,OpArgN,iAsBx) +,opmode(1,0,OpArgN,OpArgU,iABC) +,opmode(0,0,OpArgU,OpArgU,iABC) +,opmode(0,0,OpArgN,OpArgN,iABC) +,opmode(0,1,OpArgU,OpArgN,iABx) +,opmode(0,1,OpArgU,OpArgN,iABC) +}; +#define next(ls)(ls->current=zgetc(ls->z)) +#define currIsNewline(ls)(ls->current=='\n'||ls->current=='\r') +static const char*const luaX_tokens[]={ +"and","break","do","else","elseif", +"end","false","for","function","if", +"in","local","nil","not","or","repeat", +"return","then","true","until","while", +"..","...","==",">=","<=","~=", +"","","","", +NULL +}; +#define save_and_next(ls)(save(ls,ls->current),next(ls)) +static void save(LexState*ls,int c){ +Mbuffer*b=ls->buff; +if(b->n+1>b->buffsize){ +size_t newsize; +if(b->buffsize>=((size_t)(~(size_t)0)-2)/2) +luaX_lexerror(ls,"lexical element too long",0); +newsize=b->buffsize*2; +luaZ_resizebuffer(ls->L,b,newsize); +} +b->buffer[b->n++]=cast(char,c); +} +static void luaX_init(lua_State*L){ +int i; +for(i=0;i<(cast(int,TK_WHILE-257+1));i++){ +TString*ts=luaS_new(L,luaX_tokens[i]); +luaS_fix(ts); +ts->tsv.reserved=cast_byte(i+1); +} +} +static const char*luaX_token2str(LexState*ls,int token){ +if(token<257){ +return(iscntrl(token))?luaO_pushfstring(ls->L,"char(%d)",token): +luaO_pushfstring(ls->L,"%c",token); +} +else +return luaX_tokens[token-257]; +} +static const char*txtToken(LexState*ls,int token){ +switch(token){ +case TK_NAME: +case TK_STRING: +case TK_NUMBER: +save(ls,'\0'); +return luaZ_buffer(ls->buff); +default: +return luaX_token2str(ls,token); +} +} +static void luaX_lexerror(LexState*ls,const char*msg,int token){ +char buff[80]; +luaO_chunkid(buff,getstr(ls->source),80); +msg=luaO_pushfstring(ls->L,"%s:%d: %s",buff,ls->linenumber,msg); +if(token) +luaO_pushfstring(ls->L,"%s near "LUA_QL("%s"),msg,txtToken(ls,token)); +luaD_throw(ls->L,3); +} +static void luaX_syntaxerror(LexState*ls,const char*msg){ +luaX_lexerror(ls,msg,ls->t.token); +} +static TString*luaX_newstring(LexState*ls,const char*str,size_t l){ +lua_State*L=ls->L; +TString*ts=luaS_newlstr(L,str,l); +TValue*o=luaH_setstr(L,ls->fs->h,ts); +if(ttisnil(o)){ +setbvalue(o,1); +luaC_checkGC(L); +} +return ts; +} +static void inclinenumber(LexState*ls){ +int old=ls->current; +next(ls); +if(currIsNewline(ls)&&ls->current!=old) +next(ls); +if(++ls->linenumber>=(INT_MAX-2)) +luaX_syntaxerror(ls,"chunk has too many lines"); +} +static void luaX_setinput(lua_State*L,LexState*ls,ZIO*z,TString*source){ +ls->decpoint='.'; +ls->L=L; +ls->lookahead.token=TK_EOS; +ls->z=z; +ls->fs=NULL; +ls->linenumber=1; +ls->lastline=1; +ls->source=source; +luaZ_resizebuffer(ls->L,ls->buff,32); +next(ls); +} +static int check_next(LexState*ls,const char*set){ +if(!strchr(set,ls->current)) +return 0; +save_and_next(ls); +return 1; +} +static void buffreplace(LexState*ls,char from,char to){ +size_t n=luaZ_bufflen(ls->buff); +char*p=luaZ_buffer(ls->buff); +while(n--) +if(p[n]==from)p[n]=to; +} +static void read_numeral(LexState*ls,SemInfo*seminfo){ +do{ +save_and_next(ls); +}while(isdigit(ls->current)||ls->current=='.'); +if(check_next(ls,"Ee")) +check_next(ls,"+-"); +while(isalnum(ls->current)||ls->current=='_') +save_and_next(ls); +save(ls,'\0'); +buffreplace(ls,'.',ls->decpoint); +if(!luaO_str2d(luaZ_buffer(ls->buff),&seminfo->r)) +luaX_lexerror(ls,"malformed number",TK_NUMBER); +} +static int skip_sep(LexState*ls){ +int count=0; +int s=ls->current; +save_and_next(ls); +while(ls->current=='='){ +save_and_next(ls); +count++; +} +return(ls->current==s)?count:(-count)-1; +} +static void read_long_string(LexState*ls,SemInfo*seminfo,int sep){ +int cont=0; +(void)(cont); +save_and_next(ls); +if(currIsNewline(ls)) +inclinenumber(ls); +for(;;){ +switch(ls->current){ +case(-1): +luaX_lexerror(ls,(seminfo)?"unfinished long string": +"unfinished long comment",TK_EOS); +break; +case']':{ +if(skip_sep(ls)==sep){ +save_and_next(ls); +goto endloop; +} +break; +} +case'\n': +case'\r':{ +save(ls,'\n'); +inclinenumber(ls); +if(!seminfo)luaZ_resetbuffer(ls->buff); +break; +} +default:{ +if(seminfo)save_and_next(ls); +else next(ls); +} +} +}endloop: +if(seminfo) +seminfo->ts=luaX_newstring(ls,luaZ_buffer(ls->buff)+(2+sep), +luaZ_bufflen(ls->buff)-2*(2+sep)); +} +static void read_string(LexState*ls,int del,SemInfo*seminfo){ +save_and_next(ls); +while(ls->current!=del){ +switch(ls->current){ +case(-1): +luaX_lexerror(ls,"unfinished string",TK_EOS); +continue; +case'\n': +case'\r': +luaX_lexerror(ls,"unfinished string",TK_STRING); +continue; +case'\\':{ +int c; +next(ls); +switch(ls->current){ +case'a':c='\a';break; +case'b':c='\b';break; +case'f':c='\f';break; +case'n':c='\n';break; +case'r':c='\r';break; +case't':c='\t';break; +case'v':c='\v';break; +case'\n': +case'\r':save(ls,'\n');inclinenumber(ls);continue; +case(-1):continue; +default:{ +if(!isdigit(ls->current)) +save_and_next(ls); +else{ +int i=0; +c=0; +do{ +c=10*c+(ls->current-'0'); +next(ls); +}while(++i<3&&isdigit(ls->current)); +if(c>UCHAR_MAX) +luaX_lexerror(ls,"escape sequence too large",TK_STRING); +save(ls,c); +} +continue; +} +} +save(ls,c); +next(ls); +continue; +} +default: +save_and_next(ls); +} +} +save_and_next(ls); +seminfo->ts=luaX_newstring(ls,luaZ_buffer(ls->buff)+1, +luaZ_bufflen(ls->buff)-2); +} +static int llex(LexState*ls,SemInfo*seminfo){ +luaZ_resetbuffer(ls->buff); +for(;;){ +switch(ls->current){ +case'\n': +case'\r':{ +inclinenumber(ls); +continue; +} +case'-':{ +next(ls); +if(ls->current!='-')return'-'; +next(ls); +if(ls->current=='['){ +int sep=skip_sep(ls); +luaZ_resetbuffer(ls->buff); +if(sep>=0){ +read_long_string(ls,NULL,sep); +luaZ_resetbuffer(ls->buff); +continue; +} +} +while(!currIsNewline(ls)&&ls->current!=(-1)) +next(ls); +continue; +} +case'[':{ +int sep=skip_sep(ls); +if(sep>=0){ +read_long_string(ls,seminfo,sep); +return TK_STRING; +} +else if(sep==-1)return'['; +else luaX_lexerror(ls,"invalid long string delimiter",TK_STRING); +} +case'=':{ +next(ls); +if(ls->current!='=')return'='; +else{next(ls);return TK_EQ;} +} +case'<':{ +next(ls); +if(ls->current!='=')return'<'; +else{next(ls);return TK_LE;} +} +case'>':{ +next(ls); +if(ls->current!='=')return'>'; +else{next(ls);return TK_GE;} +} +case'~':{ +next(ls); +if(ls->current!='=')return'~'; +else{next(ls);return TK_NE;} +} +case'"': +case'\'':{ +read_string(ls,ls->current,seminfo); +return TK_STRING; +} +case'.':{ +save_and_next(ls); +if(check_next(ls,".")){ +if(check_next(ls,".")) +return TK_DOTS; +else return TK_CONCAT; +} +else if(!isdigit(ls->current))return'.'; +else{ +read_numeral(ls,seminfo); +return TK_NUMBER; +} +} +case(-1):{ +return TK_EOS; +} +default:{ +if(isspace(ls->current)){ +next(ls); +continue; +} +else if(isdigit(ls->current)){ +read_numeral(ls,seminfo); +return TK_NUMBER; +} +else if(isalpha(ls->current)||ls->current=='_'){ +TString*ts; +do{ +save_and_next(ls); +}while(isalnum(ls->current)||ls->current=='_'); +ts=luaX_newstring(ls,luaZ_buffer(ls->buff), +luaZ_bufflen(ls->buff)); +if(ts->tsv.reserved>0) +return ts->tsv.reserved-1+257; +else{ +seminfo->ts=ts; +return TK_NAME; +} +} +else{ +int c=ls->current; +next(ls); +return c; +} +} +} +} +} +static void luaX_next(LexState*ls){ +ls->lastline=ls->linenumber; +if(ls->lookahead.token!=TK_EOS){ +ls->t=ls->lookahead; +ls->lookahead.token=TK_EOS; +} +else +ls->t.token=llex(ls,&ls->t.seminfo); +} +static void luaX_lookahead(LexState*ls){ +ls->lookahead.token=llex(ls,&ls->lookahead.seminfo); +} +#define hasjumps(e)((e)->t!=(e)->f) +static int isnumeral(expdesc*e){ +return(e->k==VKNUM&&e->t==(-1)&&e->f==(-1)); +} +static void luaK_nil(FuncState*fs,int from,int n){ +Instruction*previous; +if(fs->pc>fs->lasttarget){ +if(fs->pc==0){ +if(from>=fs->nactvar) +return; +} +else{ +previous=&fs->f->code[fs->pc-1]; +if(GET_OPCODE(*previous)==OP_LOADNIL){ +int pfrom=GETARG_A(*previous); +int pto=GETARG_B(*previous); +if(pfrom<=from&&from<=pto+1){ +if(from+n-1>pto) +SETARG_B(*previous,from+n-1); +return; +} +} +} +} +luaK_codeABC(fs,OP_LOADNIL,from,from+n-1,0); +} +static int luaK_jump(FuncState*fs){ +int jpc=fs->jpc; +int j; +fs->jpc=(-1); +j=luaK_codeAsBx(fs,OP_JMP,0,(-1)); +luaK_concat(fs,&j,jpc); +return j; +} +static void luaK_ret(FuncState*fs,int first,int nret){ +luaK_codeABC(fs,OP_RETURN,first,nret+1,0); +} +static int condjump(FuncState*fs,OpCode op,int A,int B,int C){ +luaK_codeABC(fs,op,A,B,C); +return luaK_jump(fs); +} +static void fixjump(FuncState*fs,int pc,int dest){ +Instruction*jmp=&fs->f->code[pc]; +int offset=dest-(pc+1); +if(abs(offset)>(((1<<(9+9))-1)>>1)) +luaX_syntaxerror(fs->ls,"control structure too long"); +SETARG_sBx(*jmp,offset); +} +static int luaK_getlabel(FuncState*fs){ +fs->lasttarget=fs->pc; +return fs->pc; +} +static int getjump(FuncState*fs,int pc){ +int offset=GETARG_sBx(fs->f->code[pc]); +if(offset==(-1)) +return(-1); +else +return(pc+1)+offset; +} +static Instruction*getjumpcontrol(FuncState*fs,int pc){ +Instruction*pi=&fs->f->code[pc]; +if(pc>=1&&testTMode(GET_OPCODE(*(pi-1)))) +return pi-1; +else +return pi; +} +static int need_value(FuncState*fs,int list){ +for(;list!=(-1);list=getjump(fs,list)){ +Instruction i=*getjumpcontrol(fs,list); +if(GET_OPCODE(i)!=OP_TESTSET)return 1; +} +return 0; +} +static int patchtestreg(FuncState*fs,int node,int reg){ +Instruction*i=getjumpcontrol(fs,node); +if(GET_OPCODE(*i)!=OP_TESTSET) +return 0; +if(reg!=((1<<8)-1)&®!=GETARG_B(*i)) +SETARG_A(*i,reg); +else +*i=CREATE_ABC(OP_TEST,GETARG_B(*i),0,GETARG_C(*i)); +return 1; +} +static void removevalues(FuncState*fs,int list){ +for(;list!=(-1);list=getjump(fs,list)) +patchtestreg(fs,list,((1<<8)-1)); +} +static void patchlistaux(FuncState*fs,int list,int vtarget,int reg, +int dtarget){ +while(list!=(-1)){ +int next=getjump(fs,list); +if(patchtestreg(fs,list,reg)) +fixjump(fs,list,vtarget); +else +fixjump(fs,list,dtarget); +list=next; +} +} +static void dischargejpc(FuncState*fs){ +patchlistaux(fs,fs->jpc,fs->pc,((1<<8)-1),fs->pc); +fs->jpc=(-1); +} +static void luaK_patchlist(FuncState*fs,int list,int target){ +if(target==fs->pc) +luaK_patchtohere(fs,list); +else{ +patchlistaux(fs,list,target,((1<<8)-1),target); +} +} +static void luaK_patchtohere(FuncState*fs,int list){ +luaK_getlabel(fs); +luaK_concat(fs,&fs->jpc,list); +} +static void luaK_concat(FuncState*fs,int*l1,int l2){ +if(l2==(-1))return; +else if(*l1==(-1)) +*l1=l2; +else{ +int list=*l1; +int next; +while((next=getjump(fs,list))!=(-1)) +list=next; +fixjump(fs,list,l2); +} +} +static void luaK_checkstack(FuncState*fs,int n){ +int newstack=fs->freereg+n; +if(newstack>fs->f->maxstacksize){ +if(newstack>=250) +luaX_syntaxerror(fs->ls,"function or expression too complex"); +fs->f->maxstacksize=cast_byte(newstack); +} +} +static void luaK_reserveregs(FuncState*fs,int n){ +luaK_checkstack(fs,n); +fs->freereg+=n; +} +static void freereg(FuncState*fs,int reg){ +if(!ISK(reg)&®>=fs->nactvar){ +fs->freereg--; +} +} +static void freeexp(FuncState*fs,expdesc*e){ +if(e->k==VNONRELOC) +freereg(fs,e->u.s.info); +} +static int addk(FuncState*fs,TValue*k,TValue*v){ +lua_State*L=fs->L; +TValue*idx=luaH_set(L,fs->h,k); +Proto*f=fs->f; +int oldsize=f->sizek; +if(ttisnumber(idx)){ +return cast_int(nvalue(idx)); +} +else{ +setnvalue(idx,cast_num(fs->nk)); +luaM_growvector(L,f->k,fs->nk,f->sizek,TValue, +((1<<(9+9))-1),"constant table overflow"); +while(oldsizesizek)setnilvalue(&f->k[oldsize++]); +setobj(L,&f->k[fs->nk],v); +luaC_barrier(L,f,v); +return fs->nk++; +} +} +static int luaK_stringK(FuncState*fs,TString*s){ +TValue o; +setsvalue(fs->L,&o,s); +return addk(fs,&o,&o); +} +static int luaK_numberK(FuncState*fs,lua_Number r){ +TValue o; +setnvalue(&o,r); +return addk(fs,&o,&o); +} +static int boolK(FuncState*fs,int b){ +TValue o; +setbvalue(&o,b); +return addk(fs,&o,&o); +} +static int nilK(FuncState*fs){ +TValue k,v; +setnilvalue(&v); +sethvalue(fs->L,&k,fs->h); +return addk(fs,&k,&v); +} +static void luaK_setreturns(FuncState*fs,expdesc*e,int nresults){ +if(e->k==VCALL){ +SETARG_C(getcode(fs,e),nresults+1); +} +else if(e->k==VVARARG){ +SETARG_B(getcode(fs,e),nresults+1); +SETARG_A(getcode(fs,e),fs->freereg); +luaK_reserveregs(fs,1); +} +} +static void luaK_setoneret(FuncState*fs,expdesc*e){ +if(e->k==VCALL){ +e->k=VNONRELOC; +e->u.s.info=GETARG_A(getcode(fs,e)); +} +else if(e->k==VVARARG){ +SETARG_B(getcode(fs,e),2); +e->k=VRELOCABLE; +} +} +static void luaK_dischargevars(FuncState*fs,expdesc*e){ +switch(e->k){ +case VLOCAL:{ +e->k=VNONRELOC; +break; +} +case VUPVAL:{ +e->u.s.info=luaK_codeABC(fs,OP_GETUPVAL,0,e->u.s.info,0); +e->k=VRELOCABLE; +break; +} +case VGLOBAL:{ +e->u.s.info=luaK_codeABx(fs,OP_GETGLOBAL,0,e->u.s.info); +e->k=VRELOCABLE; +break; +} +case VINDEXED:{ +freereg(fs,e->u.s.aux); +freereg(fs,e->u.s.info); +e->u.s.info=luaK_codeABC(fs,OP_GETTABLE,0,e->u.s.info,e->u.s.aux); +e->k=VRELOCABLE; +break; +} +case VVARARG: +case VCALL:{ +luaK_setoneret(fs,e); +break; +} +default:break; +} +} +static int code_label(FuncState*fs,int A,int b,int jump){ +luaK_getlabel(fs); +return luaK_codeABC(fs,OP_LOADBOOL,A,b,jump); +} +static void discharge2reg(FuncState*fs,expdesc*e,int reg){ +luaK_dischargevars(fs,e); +switch(e->k){ +case VNIL:{ +luaK_nil(fs,reg,1); +break; +} +case VFALSE:case VTRUE:{ +luaK_codeABC(fs,OP_LOADBOOL,reg,e->k==VTRUE,0); +break; +} +case VK:{ +luaK_codeABx(fs,OP_LOADK,reg,e->u.s.info); +break; +} +case VKNUM:{ +luaK_codeABx(fs,OP_LOADK,reg,luaK_numberK(fs,e->u.nval)); +break; +} +case VRELOCABLE:{ +Instruction*pc=&getcode(fs,e); +SETARG_A(*pc,reg); +break; +} +case VNONRELOC:{ +if(reg!=e->u.s.info) +luaK_codeABC(fs,OP_MOVE,reg,e->u.s.info,0); +break; +} +default:{ +return; +} +} +e->u.s.info=reg; +e->k=VNONRELOC; +} +static void discharge2anyreg(FuncState*fs,expdesc*e){ +if(e->k!=VNONRELOC){ +luaK_reserveregs(fs,1); +discharge2reg(fs,e,fs->freereg-1); +} +} +static void exp2reg(FuncState*fs,expdesc*e,int reg){ +discharge2reg(fs,e,reg); +if(e->k==VJMP) +luaK_concat(fs,&e->t,e->u.s.info); +if(hasjumps(e)){ +int final; +int p_f=(-1); +int p_t=(-1); +if(need_value(fs,e->t)||need_value(fs,e->f)){ +int fj=(e->k==VJMP)?(-1):luaK_jump(fs); +p_f=code_label(fs,reg,0,1); +p_t=code_label(fs,reg,1,0); +luaK_patchtohere(fs,fj); +} +final=luaK_getlabel(fs); +patchlistaux(fs,e->f,final,reg,p_f); +patchlistaux(fs,e->t,final,reg,p_t); +} +e->f=e->t=(-1); +e->u.s.info=reg; +e->k=VNONRELOC; +} +static void luaK_exp2nextreg(FuncState*fs,expdesc*e){ +luaK_dischargevars(fs,e); +freeexp(fs,e); +luaK_reserveregs(fs,1); +exp2reg(fs,e,fs->freereg-1); +} +static int luaK_exp2anyreg(FuncState*fs,expdesc*e){ +luaK_dischargevars(fs,e); +if(e->k==VNONRELOC){ +if(!hasjumps(e))return e->u.s.info; +if(e->u.s.info>=fs->nactvar){ +exp2reg(fs,e,e->u.s.info); +return e->u.s.info; +} +} +luaK_exp2nextreg(fs,e); +return e->u.s.info; +} +static void luaK_exp2val(FuncState*fs,expdesc*e){ +if(hasjumps(e)) +luaK_exp2anyreg(fs,e); +else +luaK_dischargevars(fs,e); +} +static int luaK_exp2RK(FuncState*fs,expdesc*e){ +luaK_exp2val(fs,e); +switch(e->k){ +case VKNUM: +case VTRUE: +case VFALSE: +case VNIL:{ +if(fs->nk<=((1<<(9-1))-1)){ +e->u.s.info=(e->k==VNIL)?nilK(fs): +(e->k==VKNUM)?luaK_numberK(fs,e->u.nval): +boolK(fs,(e->k==VTRUE)); +e->k=VK; +return RKASK(e->u.s.info); +} +else break; +} +case VK:{ +if(e->u.s.info<=((1<<(9-1))-1)) +return RKASK(e->u.s.info); +else break; +} +default:break; +} +return luaK_exp2anyreg(fs,e); +} +static void luaK_storevar(FuncState*fs,expdesc*var,expdesc*ex){ +switch(var->k){ +case VLOCAL:{ +freeexp(fs,ex); +exp2reg(fs,ex,var->u.s.info); +return; +} +case VUPVAL:{ +int e=luaK_exp2anyreg(fs,ex); +luaK_codeABC(fs,OP_SETUPVAL,e,var->u.s.info,0); +break; +} +case VGLOBAL:{ +int e=luaK_exp2anyreg(fs,ex); +luaK_codeABx(fs,OP_SETGLOBAL,e,var->u.s.info); +break; +} +case VINDEXED:{ +int e=luaK_exp2RK(fs,ex); +luaK_codeABC(fs,OP_SETTABLE,var->u.s.info,var->u.s.aux,e); +break; +} +default:{ +break; +} +} +freeexp(fs,ex); +} +static void luaK_self(FuncState*fs,expdesc*e,expdesc*key){ +int func; +luaK_exp2anyreg(fs,e); +freeexp(fs,e); +func=fs->freereg; +luaK_reserveregs(fs,2); +luaK_codeABC(fs,OP_SELF,func,e->u.s.info,luaK_exp2RK(fs,key)); +freeexp(fs,key); +e->u.s.info=func; +e->k=VNONRELOC; +} +static void invertjump(FuncState*fs,expdesc*e){ +Instruction*pc=getjumpcontrol(fs,e->u.s.info); +SETARG_A(*pc,!(GETARG_A(*pc))); +} +static int jumponcond(FuncState*fs,expdesc*e,int cond){ +if(e->k==VRELOCABLE){ +Instruction ie=getcode(fs,e); +if(GET_OPCODE(ie)==OP_NOT){ +fs->pc--; +return condjump(fs,OP_TEST,GETARG_B(ie),0,!cond); +} +} +discharge2anyreg(fs,e); +freeexp(fs,e); +return condjump(fs,OP_TESTSET,((1<<8)-1),e->u.s.info,cond); +} +static void luaK_goiftrue(FuncState*fs,expdesc*e){ +int pc; +luaK_dischargevars(fs,e); +switch(e->k){ +case VK:case VKNUM:case VTRUE:{ +pc=(-1); +break; +} +case VJMP:{ +invertjump(fs,e); +pc=e->u.s.info; +break; +} +default:{ +pc=jumponcond(fs,e,0); +break; +} +} +luaK_concat(fs,&e->f,pc); +luaK_patchtohere(fs,e->t); +e->t=(-1); +} +static void luaK_goiffalse(FuncState*fs,expdesc*e){ +int pc; +luaK_dischargevars(fs,e); +switch(e->k){ +case VNIL:case VFALSE:{ +pc=(-1); +break; +} +case VJMP:{ +pc=e->u.s.info; +break; +} +default:{ +pc=jumponcond(fs,e,1); +break; +} +} +luaK_concat(fs,&e->t,pc); +luaK_patchtohere(fs,e->f); +e->f=(-1); +} +static void codenot(FuncState*fs,expdesc*e){ +luaK_dischargevars(fs,e); +switch(e->k){ +case VNIL:case VFALSE:{ +e->k=VTRUE; +break; +} +case VK:case VKNUM:case VTRUE:{ +e->k=VFALSE; +break; +} +case VJMP:{ +invertjump(fs,e); +break; +} +case VRELOCABLE: +case VNONRELOC:{ +discharge2anyreg(fs,e); +freeexp(fs,e); +e->u.s.info=luaK_codeABC(fs,OP_NOT,0,e->u.s.info,0); +e->k=VRELOCABLE; +break; +} +default:{ +break; +} +} +{int temp=e->f;e->f=e->t;e->t=temp;} +removevalues(fs,e->f); +removevalues(fs,e->t); +} +static void luaK_indexed(FuncState*fs,expdesc*t,expdesc*k){ +t->u.s.aux=luaK_exp2RK(fs,k); +t->k=VINDEXED; +} +static int constfolding(OpCode op,expdesc*e1,expdesc*e2){ +lua_Number v1,v2,r; +if(!isnumeral(e1)||!isnumeral(e2))return 0; +v1=e1->u.nval; +v2=e2->u.nval; +switch(op){ +case OP_ADD:r=luai_numadd(v1,v2);break; +case OP_SUB:r=luai_numsub(v1,v2);break; +case OP_MUL:r=luai_nummul(v1,v2);break; +case OP_DIV: +if(v2==0)return 0; +r=luai_numdiv(v1,v2);break; +case OP_MOD: +if(v2==0)return 0; +r=luai_nummod(v1,v2);break; +case OP_POW:r=luai_numpow(v1,v2);break; +case OP_UNM:r=luai_numunm(v1);break; +case OP_LEN:return 0; +default:r=0;break; +} +if(luai_numisnan(r))return 0; +e1->u.nval=r; +return 1; +} +static void codearith(FuncState*fs,OpCode op,expdesc*e1,expdesc*e2){ +if(constfolding(op,e1,e2)) +return; +else{ +int o2=(op!=OP_UNM&&op!=OP_LEN)?luaK_exp2RK(fs,e2):0; +int o1=luaK_exp2RK(fs,e1); +if(o1>o2){ +freeexp(fs,e1); +freeexp(fs,e2); +} +else{ +freeexp(fs,e2); +freeexp(fs,e1); +} +e1->u.s.info=luaK_codeABC(fs,op,0,o1,o2); +e1->k=VRELOCABLE; +} +} +static void codecomp(FuncState*fs,OpCode op,int cond,expdesc*e1, +expdesc*e2){ +int o1=luaK_exp2RK(fs,e1); +int o2=luaK_exp2RK(fs,e2); +freeexp(fs,e2); +freeexp(fs,e1); +if(cond==0&&op!=OP_EQ){ +int temp; +temp=o1;o1=o2;o2=temp; +cond=1; +} +e1->u.s.info=condjump(fs,op,cond,o1,o2); +e1->k=VJMP; +} +static void luaK_prefix(FuncState*fs,UnOpr op,expdesc*e){ +expdesc e2; +e2.t=e2.f=(-1);e2.k=VKNUM;e2.u.nval=0; +switch(op){ +case OPR_MINUS:{ +if(!isnumeral(e)) +luaK_exp2anyreg(fs,e); +codearith(fs,OP_UNM,e,&e2); +break; +} +case OPR_NOT:codenot(fs,e);break; +case OPR_LEN:{ +luaK_exp2anyreg(fs,e); +codearith(fs,OP_LEN,e,&e2); +break; +} +default:; +} +} +static void luaK_infix(FuncState*fs,BinOpr op,expdesc*v){ +switch(op){ +case OPR_AND:{ +luaK_goiftrue(fs,v); +break; +} +case OPR_OR:{ +luaK_goiffalse(fs,v); +break; +} +case OPR_CONCAT:{ +luaK_exp2nextreg(fs,v); +break; +} +case OPR_ADD:case OPR_SUB:case OPR_MUL:case OPR_DIV: +case OPR_MOD:case OPR_POW:{ +if(!isnumeral(v))luaK_exp2RK(fs,v); +break; +} +default:{ +luaK_exp2RK(fs,v); +break; +} +} +} +static void luaK_posfix(FuncState*fs,BinOpr op,expdesc*e1,expdesc*e2){ +switch(op){ +case OPR_AND:{ +luaK_dischargevars(fs,e2); +luaK_concat(fs,&e2->f,e1->f); +*e1=*e2; +break; +} +case OPR_OR:{ +luaK_dischargevars(fs,e2); +luaK_concat(fs,&e2->t,e1->t); +*e1=*e2; +break; +} +case OPR_CONCAT:{ +luaK_exp2val(fs,e2); +if(e2->k==VRELOCABLE&&GET_OPCODE(getcode(fs,e2))==OP_CONCAT){ +freeexp(fs,e1); +SETARG_B(getcode(fs,e2),e1->u.s.info); +e1->k=VRELOCABLE;e1->u.s.info=e2->u.s.info; +} +else{ +luaK_exp2nextreg(fs,e2); +codearith(fs,OP_CONCAT,e1,e2); +} +break; +} +case OPR_ADD:codearith(fs,OP_ADD,e1,e2);break; +case OPR_SUB:codearith(fs,OP_SUB,e1,e2);break; +case OPR_MUL:codearith(fs,OP_MUL,e1,e2);break; +case OPR_DIV:codearith(fs,OP_DIV,e1,e2);break; +case OPR_MOD:codearith(fs,OP_MOD,e1,e2);break; +case OPR_POW:codearith(fs,OP_POW,e1,e2);break; +case OPR_EQ:codecomp(fs,OP_EQ,1,e1,e2);break; +case OPR_NE:codecomp(fs,OP_EQ,0,e1,e2);break; +case OPR_LT:codecomp(fs,OP_LT,1,e1,e2);break; +case OPR_LE:codecomp(fs,OP_LE,1,e1,e2);break; +case OPR_GT:codecomp(fs,OP_LT,0,e1,e2);break; +case OPR_GE:codecomp(fs,OP_LE,0,e1,e2);break; +default:; +} +} +static void luaK_fixline(FuncState*fs,int line){ +fs->f->lineinfo[fs->pc-1]=line; +} +static int luaK_code(FuncState*fs,Instruction i,int line){ +Proto*f=fs->f; +dischargejpc(fs); +luaM_growvector(fs->L,f->code,fs->pc,f->sizecode,Instruction, +(INT_MAX-2),"code size overflow"); +f->code[fs->pc]=i; +luaM_growvector(fs->L,f->lineinfo,fs->pc,f->sizelineinfo,int, +(INT_MAX-2),"code size overflow"); +f->lineinfo[fs->pc]=line; +return fs->pc++; +} +static int luaK_codeABC(FuncState*fs,OpCode o,int a,int b,int c){ +return luaK_code(fs,CREATE_ABC(o,a,b,c),fs->ls->lastline); +} +static int luaK_codeABx(FuncState*fs,OpCode o,int a,unsigned int bc){ +return luaK_code(fs,CREATE_ABx(o,a,bc),fs->ls->lastline); +} +static void luaK_setlist(FuncState*fs,int base,int nelems,int tostore){ +int c=(nelems-1)/50+1; +int b=(tostore==(-1))?0:tostore; +if(c<=((1<<9)-1)) +luaK_codeABC(fs,OP_SETLIST,base,b,c); +else{ +luaK_codeABC(fs,OP_SETLIST,base,b,0); +luaK_code(fs,cast(Instruction,c),fs->ls->lastline); +} +fs->freereg=base+1; +} +#define hasmultret(k)((k)==VCALL||(k)==VVARARG) +#define getlocvar(fs,i)((fs)->f->locvars[(fs)->actvar[i]]) +#define luaY_checklimit(fs,v,l,m)if((v)>(l))errorlimit(fs,l,m) +typedef struct BlockCnt{ +struct BlockCnt*previous; +int breaklist; +lu_byte nactvar; +lu_byte upval; +lu_byte isbreakable; +}BlockCnt; +static void chunk(LexState*ls); +static void expr(LexState*ls,expdesc*v); +static void anchor_token(LexState*ls){ +if(ls->t.token==TK_NAME||ls->t.token==TK_STRING){ +TString*ts=ls->t.seminfo.ts; +luaX_newstring(ls,getstr(ts),ts->tsv.len); +} +} +static void error_expected(LexState*ls,int token){ +luaX_syntaxerror(ls, +luaO_pushfstring(ls->L,LUA_QL("%s")" expected",luaX_token2str(ls,token))); +} +static void errorlimit(FuncState*fs,int limit,const char*what){ +const char*msg=(fs->f->linedefined==0)? +luaO_pushfstring(fs->L,"main function has more than %d %s",limit,what): +luaO_pushfstring(fs->L,"function at line %d has more than %d %s", +fs->f->linedefined,limit,what); +luaX_lexerror(fs->ls,msg,0); +} +static int testnext(LexState*ls,int c){ +if(ls->t.token==c){ +luaX_next(ls); +return 1; +} +else return 0; +} +static void check(LexState*ls,int c){ +if(ls->t.token!=c) +error_expected(ls,c); +} +static void checknext(LexState*ls,int c){ +check(ls,c); +luaX_next(ls); +} +#define check_condition(ls,c,msg){if(!(c))luaX_syntaxerror(ls,msg);} +static void check_match(LexState*ls,int what,int who,int where){ +if(!testnext(ls,what)){ +if(where==ls->linenumber) +error_expected(ls,what); +else{ +luaX_syntaxerror(ls,luaO_pushfstring(ls->L, +LUA_QL("%s")" expected (to close "LUA_QL("%s")" at line %d)", +luaX_token2str(ls,what),luaX_token2str(ls,who),where)); +} +} +} +static TString*str_checkname(LexState*ls){ +TString*ts; +check(ls,TK_NAME); +ts=ls->t.seminfo.ts; +luaX_next(ls); +return ts; +} +static void init_exp(expdesc*e,expkind k,int i){ +e->f=e->t=(-1); +e->k=k; +e->u.s.info=i; +} +static void codestring(LexState*ls,expdesc*e,TString*s){ +init_exp(e,VK,luaK_stringK(ls->fs,s)); +} +static void checkname(LexState*ls,expdesc*e){ +codestring(ls,e,str_checkname(ls)); +} +static int registerlocalvar(LexState*ls,TString*varname){ +FuncState*fs=ls->fs; +Proto*f=fs->f; +int oldsize=f->sizelocvars; +luaM_growvector(ls->L,f->locvars,fs->nlocvars,f->sizelocvars, +LocVar,SHRT_MAX,"too many local variables"); +while(oldsizesizelocvars)f->locvars[oldsize++].varname=NULL; +f->locvars[fs->nlocvars].varname=varname; +luaC_objbarrier(ls->L,f,varname); +return fs->nlocvars++; +} +#define new_localvarliteral(ls,v,n)new_localvar(ls,luaX_newstring(ls,""v,(sizeof(v)/sizeof(char))-1),n) +static void new_localvar(LexState*ls,TString*name,int n){ +FuncState*fs=ls->fs; +luaY_checklimit(fs,fs->nactvar+n+1,200,"local variables"); +fs->actvar[fs->nactvar+n]=cast(unsigned short,registerlocalvar(ls,name)); +} +static void adjustlocalvars(LexState*ls,int nvars){ +FuncState*fs=ls->fs; +fs->nactvar=cast_byte(fs->nactvar+nvars); +for(;nvars;nvars--){ +getlocvar(fs,fs->nactvar-nvars).startpc=fs->pc; +} +} +static void removevars(LexState*ls,int tolevel){ +FuncState*fs=ls->fs; +while(fs->nactvar>tolevel) +getlocvar(fs,--fs->nactvar).endpc=fs->pc; +} +static int indexupvalue(FuncState*fs,TString*name,expdesc*v){ +int i; +Proto*f=fs->f; +int oldsize=f->sizeupvalues; +for(i=0;inups;i++){ +if(fs->upvalues[i].k==v->k&&fs->upvalues[i].info==v->u.s.info){ +return i; +} +} +luaY_checklimit(fs,f->nups+1,60,"upvalues"); +luaM_growvector(fs->L,f->upvalues,f->nups,f->sizeupvalues, +TString*,(INT_MAX-2),""); +while(oldsizesizeupvalues)f->upvalues[oldsize++]=NULL; +f->upvalues[f->nups]=name; +luaC_objbarrier(fs->L,f,name); +fs->upvalues[f->nups].k=cast_byte(v->k); +fs->upvalues[f->nups].info=cast_byte(v->u.s.info); +return f->nups++; +} +static int searchvar(FuncState*fs,TString*n){ +int i; +for(i=fs->nactvar-1;i>=0;i--){ +if(n==getlocvar(fs,i).varname) +return i; +} +return-1; +} +static void markupval(FuncState*fs,int level){ +BlockCnt*bl=fs->bl; +while(bl&&bl->nactvar>level)bl=bl->previous; +if(bl)bl->upval=1; +} +static int singlevaraux(FuncState*fs,TString*n,expdesc*var,int base){ +if(fs==NULL){ +init_exp(var,VGLOBAL,((1<<8)-1)); +return VGLOBAL; +} +else{ +int v=searchvar(fs,n); +if(v>=0){ +init_exp(var,VLOCAL,v); +if(!base) +markupval(fs,v); +return VLOCAL; +} +else{ +if(singlevaraux(fs->prev,n,var,0)==VGLOBAL) +return VGLOBAL; +var->u.s.info=indexupvalue(fs,n,var); +var->k=VUPVAL; +return VUPVAL; +} +} +} +static void singlevar(LexState*ls,expdesc*var){ +TString*varname=str_checkname(ls); +FuncState*fs=ls->fs; +if(singlevaraux(fs,varname,var,1)==VGLOBAL) +var->u.s.info=luaK_stringK(fs,varname); +} +static void adjust_assign(LexState*ls,int nvars,int nexps,expdesc*e){ +FuncState*fs=ls->fs; +int extra=nvars-nexps; +if(hasmultret(e->k)){ +extra++; +if(extra<0)extra=0; +luaK_setreturns(fs,e,extra); +if(extra>1)luaK_reserveregs(fs,extra-1); +} +else{ +if(e->k!=VVOID)luaK_exp2nextreg(fs,e); +if(extra>0){ +int reg=fs->freereg; +luaK_reserveregs(fs,extra); +luaK_nil(fs,reg,extra); +} +} +} +static void enterlevel(LexState*ls){ +if(++ls->L->nCcalls>200) +luaX_lexerror(ls,"chunk has too many syntax levels",0); +} +#define leavelevel(ls)((ls)->L->nCcalls--) +static void enterblock(FuncState*fs,BlockCnt*bl,lu_byte isbreakable){ +bl->breaklist=(-1); +bl->isbreakable=isbreakable; +bl->nactvar=fs->nactvar; +bl->upval=0; +bl->previous=fs->bl; +fs->bl=bl; +} +static void leaveblock(FuncState*fs){ +BlockCnt*bl=fs->bl; +fs->bl=bl->previous; +removevars(fs->ls,bl->nactvar); +if(bl->upval) +luaK_codeABC(fs,OP_CLOSE,bl->nactvar,0,0); +fs->freereg=fs->nactvar; +luaK_patchtohere(fs,bl->breaklist); +} +static void pushclosure(LexState*ls,FuncState*func,expdesc*v){ +FuncState*fs=ls->fs; +Proto*f=fs->f; +int oldsize=f->sizep; +int i; +luaM_growvector(ls->L,f->p,fs->np,f->sizep,Proto*, +((1<<(9+9))-1),"constant table overflow"); +while(oldsizesizep)f->p[oldsize++]=NULL; +f->p[fs->np++]=func->f; +luaC_objbarrier(ls->L,f,func->f); +init_exp(v,VRELOCABLE,luaK_codeABx(fs,OP_CLOSURE,0,fs->np-1)); +for(i=0;if->nups;i++){ +OpCode o=(func->upvalues[i].k==VLOCAL)?OP_MOVE:OP_GETUPVAL; +luaK_codeABC(fs,o,0,func->upvalues[i].info,0); +} +} +static void open_func(LexState*ls,FuncState*fs){ +lua_State*L=ls->L; +Proto*f=luaF_newproto(L); +fs->f=f; +fs->prev=ls->fs; +fs->ls=ls; +fs->L=L; +ls->fs=fs; +fs->pc=0; +fs->lasttarget=-1; +fs->jpc=(-1); +fs->freereg=0; +fs->nk=0; +fs->np=0; +fs->nlocvars=0; +fs->nactvar=0; +fs->bl=NULL; +f->source=ls->source; +f->maxstacksize=2; +fs->h=luaH_new(L,0,0); +sethvalue(L,L->top,fs->h); +incr_top(L); +setptvalue(L,L->top,f); +incr_top(L); +} +static void close_func(LexState*ls){ +lua_State*L=ls->L; +FuncState*fs=ls->fs; +Proto*f=fs->f; +removevars(ls,0); +luaK_ret(fs,0,0); +luaM_reallocvector(L,f->code,f->sizecode,fs->pc,Instruction); +f->sizecode=fs->pc; +luaM_reallocvector(L,f->lineinfo,f->sizelineinfo,fs->pc,int); +f->sizelineinfo=fs->pc; +luaM_reallocvector(L,f->k,f->sizek,fs->nk,TValue); +f->sizek=fs->nk; +luaM_reallocvector(L,f->p,f->sizep,fs->np,Proto*); +f->sizep=fs->np; +luaM_reallocvector(L,f->locvars,f->sizelocvars,fs->nlocvars,LocVar); +f->sizelocvars=fs->nlocvars; +luaM_reallocvector(L,f->upvalues,f->sizeupvalues,f->nups,TString*); +f->sizeupvalues=f->nups; +ls->fs=fs->prev; +if(fs)anchor_token(ls); +L->top-=2; +} +static Proto*luaY_parser(lua_State*L,ZIO*z,Mbuffer*buff,const char*name){ +struct LexState lexstate; +struct FuncState funcstate; +lexstate.buff=buff; +luaX_setinput(L,&lexstate,z,luaS_new(L,name)); +open_func(&lexstate,&funcstate); +funcstate.f->is_vararg=2; +luaX_next(&lexstate); +chunk(&lexstate); +check(&lexstate,TK_EOS); +close_func(&lexstate); +return funcstate.f; +} +static void field(LexState*ls,expdesc*v){ +FuncState*fs=ls->fs; +expdesc key; +luaK_exp2anyreg(fs,v); +luaX_next(ls); +checkname(ls,&key); +luaK_indexed(fs,v,&key); +} +static void yindex(LexState*ls,expdesc*v){ +luaX_next(ls); +expr(ls,v); +luaK_exp2val(ls->fs,v); +checknext(ls,']'); +} +struct ConsControl{ +expdesc v; +expdesc*t; +int nh; +int na; +int tostore; +}; +static void recfield(LexState*ls,struct ConsControl*cc){ +FuncState*fs=ls->fs; +int reg=ls->fs->freereg; +expdesc key,val; +int rkkey; +if(ls->t.token==TK_NAME){ +luaY_checklimit(fs,cc->nh,(INT_MAX-2),"items in a constructor"); +checkname(ls,&key); +} +else +yindex(ls,&key); +cc->nh++; +checknext(ls,'='); +rkkey=luaK_exp2RK(fs,&key); +expr(ls,&val); +luaK_codeABC(fs,OP_SETTABLE,cc->t->u.s.info,rkkey,luaK_exp2RK(fs,&val)); +fs->freereg=reg; +} +static void closelistfield(FuncState*fs,struct ConsControl*cc){ +if(cc->v.k==VVOID)return; +luaK_exp2nextreg(fs,&cc->v); +cc->v.k=VVOID; +if(cc->tostore==50){ +luaK_setlist(fs,cc->t->u.s.info,cc->na,cc->tostore); +cc->tostore=0; +} +} +static void lastlistfield(FuncState*fs,struct ConsControl*cc){ +if(cc->tostore==0)return; +if(hasmultret(cc->v.k)){ +luaK_setmultret(fs,&cc->v); +luaK_setlist(fs,cc->t->u.s.info,cc->na,(-1)); +cc->na--; +} +else{ +if(cc->v.k!=VVOID) +luaK_exp2nextreg(fs,&cc->v); +luaK_setlist(fs,cc->t->u.s.info,cc->na,cc->tostore); +} +} +static void listfield(LexState*ls,struct ConsControl*cc){ +expr(ls,&cc->v); +luaY_checklimit(ls->fs,cc->na,(INT_MAX-2),"items in a constructor"); +cc->na++; +cc->tostore++; +} +static void constructor(LexState*ls,expdesc*t){ +FuncState*fs=ls->fs; +int line=ls->linenumber; +int pc=luaK_codeABC(fs,OP_NEWTABLE,0,0,0); +struct ConsControl cc; +cc.na=cc.nh=cc.tostore=0; +cc.t=t; +init_exp(t,VRELOCABLE,pc); +init_exp(&cc.v,VVOID,0); +luaK_exp2nextreg(ls->fs,t); +checknext(ls,'{'); +do{ +if(ls->t.token=='}')break; +closelistfield(fs,&cc); +switch(ls->t.token){ +case TK_NAME:{ +luaX_lookahead(ls); +if(ls->lookahead.token!='=') +listfield(ls,&cc); +else +recfield(ls,&cc); +break; +} +case'[':{ +recfield(ls,&cc); +break; +} +default:{ +listfield(ls,&cc); +break; +} +} +}while(testnext(ls,',')||testnext(ls,';')); +check_match(ls,'}','{',line); +lastlistfield(fs,&cc); +SETARG_B(fs->f->code[pc],luaO_int2fb(cc.na)); +SETARG_C(fs->f->code[pc],luaO_int2fb(cc.nh)); +} +static void parlist(LexState*ls){ +FuncState*fs=ls->fs; +Proto*f=fs->f; +int nparams=0; +f->is_vararg=0; +if(ls->t.token!=')'){ +do{ +switch(ls->t.token){ +case TK_NAME:{ +new_localvar(ls,str_checkname(ls),nparams++); +break; +} +case TK_DOTS:{ +luaX_next(ls); +f->is_vararg|=2; +break; +} +default:luaX_syntaxerror(ls," or "LUA_QL("...")" expected"); +} +}while(!f->is_vararg&&testnext(ls,',')); +} +adjustlocalvars(ls,nparams); +f->numparams=cast_byte(fs->nactvar-(f->is_vararg&1)); +luaK_reserveregs(fs,fs->nactvar); +} +static void body(LexState*ls,expdesc*e,int needself,int line){ +FuncState new_fs; +open_func(ls,&new_fs); +new_fs.f->linedefined=line; +checknext(ls,'('); +if(needself){ +new_localvarliteral(ls,"self",0); +adjustlocalvars(ls,1); +} +parlist(ls); +checknext(ls,')'); +chunk(ls); +new_fs.f->lastlinedefined=ls->linenumber; +check_match(ls,TK_END,TK_FUNCTION,line); +close_func(ls); +pushclosure(ls,&new_fs,e); +} +static int explist1(LexState*ls,expdesc*v){ +int n=1; +expr(ls,v); +while(testnext(ls,',')){ +luaK_exp2nextreg(ls->fs,v); +expr(ls,v); +n++; +} +return n; +} +static void funcargs(LexState*ls,expdesc*f){ +FuncState*fs=ls->fs; +expdesc args; +int base,nparams; +int line=ls->linenumber; +switch(ls->t.token){ +case'(':{ +if(line!=ls->lastline) +luaX_syntaxerror(ls,"ambiguous syntax (function call x new statement)"); +luaX_next(ls); +if(ls->t.token==')') +args.k=VVOID; +else{ +explist1(ls,&args); +luaK_setmultret(fs,&args); +} +check_match(ls,')','(',line); +break; +} +case'{':{ +constructor(ls,&args); +break; +} +case TK_STRING:{ +codestring(ls,&args,ls->t.seminfo.ts); +luaX_next(ls); +break; +} +default:{ +luaX_syntaxerror(ls,"function arguments expected"); +return; +} +} +base=f->u.s.info; +if(hasmultret(args.k)) +nparams=(-1); +else{ +if(args.k!=VVOID) +luaK_exp2nextreg(fs,&args); +nparams=fs->freereg-(base+1); +} +init_exp(f,VCALL,luaK_codeABC(fs,OP_CALL,base,nparams+1,2)); +luaK_fixline(fs,line); +fs->freereg=base+1; +} +static void prefixexp(LexState*ls,expdesc*v){ +switch(ls->t.token){ +case'(':{ +int line=ls->linenumber; +luaX_next(ls); +expr(ls,v); +check_match(ls,')','(',line); +luaK_dischargevars(ls->fs,v); +return; +} +case TK_NAME:{ +singlevar(ls,v); +return; +} +default:{ +luaX_syntaxerror(ls,"unexpected symbol"); +return; +} +} +} +static void primaryexp(LexState*ls,expdesc*v){ +FuncState*fs=ls->fs; +prefixexp(ls,v); +for(;;){ +switch(ls->t.token){ +case'.':{ +field(ls,v); +break; +} +case'[':{ +expdesc key; +luaK_exp2anyreg(fs,v); +yindex(ls,&key); +luaK_indexed(fs,v,&key); +break; +} +case':':{ +expdesc key; +luaX_next(ls); +checkname(ls,&key); +luaK_self(fs,v,&key); +funcargs(ls,v); +break; +} +case'(':case TK_STRING:case'{':{ +luaK_exp2nextreg(fs,v); +funcargs(ls,v); +break; +} +default:return; +} +} +} +static void simpleexp(LexState*ls,expdesc*v){ +switch(ls->t.token){ +case TK_NUMBER:{ +init_exp(v,VKNUM,0); +v->u.nval=ls->t.seminfo.r; +break; +} +case TK_STRING:{ +codestring(ls,v,ls->t.seminfo.ts); +break; +} +case TK_NIL:{ +init_exp(v,VNIL,0); +break; +} +case TK_TRUE:{ +init_exp(v,VTRUE,0); +break; +} +case TK_FALSE:{ +init_exp(v,VFALSE,0); +break; +} +case TK_DOTS:{ +FuncState*fs=ls->fs; +check_condition(ls,fs->f->is_vararg, +"cannot use "LUA_QL("...")" outside a vararg function"); +fs->f->is_vararg&=~4; +init_exp(v,VVARARG,luaK_codeABC(fs,OP_VARARG,0,1,0)); +break; +} +case'{':{ +constructor(ls,v); +return; +} +case TK_FUNCTION:{ +luaX_next(ls); +body(ls,v,0,ls->linenumber); +return; +} +default:{ +primaryexp(ls,v); +return; +} +} +luaX_next(ls); +} +static UnOpr getunopr(int op){ +switch(op){ +case TK_NOT:return OPR_NOT; +case'-':return OPR_MINUS; +case'#':return OPR_LEN; +default:return OPR_NOUNOPR; +} +} +static BinOpr getbinopr(int op){ +switch(op){ +case'+':return OPR_ADD; +case'-':return OPR_SUB; +case'*':return OPR_MUL; +case'/':return OPR_DIV; +case'%':return OPR_MOD; +case'^':return OPR_POW; +case TK_CONCAT:return OPR_CONCAT; +case TK_NE:return OPR_NE; +case TK_EQ:return OPR_EQ; +case'<':return OPR_LT; +case TK_LE:return OPR_LE; +case'>':return OPR_GT; +case TK_GE:return OPR_GE; +case TK_AND:return OPR_AND; +case TK_OR:return OPR_OR; +default:return OPR_NOBINOPR; +} +} +static const struct{ +lu_byte left; +lu_byte right; +}priority[]={ +{6,6},{6,6},{7,7},{7,7},{7,7}, +{10,9},{5,4}, +{3,3},{3,3}, +{3,3},{3,3},{3,3},{3,3}, +{2,2},{1,1} +}; +static BinOpr subexpr(LexState*ls,expdesc*v,unsigned int limit){ +BinOpr op; +UnOpr uop; +enterlevel(ls); +uop=getunopr(ls->t.token); +if(uop!=OPR_NOUNOPR){ +luaX_next(ls); +subexpr(ls,v,8); +luaK_prefix(ls->fs,uop,v); +} +else simpleexp(ls,v); +op=getbinopr(ls->t.token); +while(op!=OPR_NOBINOPR&&priority[op].left>limit){ +expdesc v2; +BinOpr nextop; +luaX_next(ls); +luaK_infix(ls->fs,op,v); +nextop=subexpr(ls,&v2,priority[op].right); +luaK_posfix(ls->fs,op,v,&v2); +op=nextop; +} +leavelevel(ls); +return op; +} +static void expr(LexState*ls,expdesc*v){ +subexpr(ls,v,0); +} +static int block_follow(int token){ +switch(token){ +case TK_ELSE:case TK_ELSEIF:case TK_END: +case TK_UNTIL:case TK_EOS: +return 1; +default:return 0; +} +} +static void block(LexState*ls){ +FuncState*fs=ls->fs; +BlockCnt bl; +enterblock(fs,&bl,0); +chunk(ls); +leaveblock(fs); +} +struct LHS_assign{ +struct LHS_assign*prev; +expdesc v; +}; +static void check_conflict(LexState*ls,struct LHS_assign*lh,expdesc*v){ +FuncState*fs=ls->fs; +int extra=fs->freereg; +int conflict=0; +for(;lh;lh=lh->prev){ +if(lh->v.k==VINDEXED){ +if(lh->v.u.s.info==v->u.s.info){ +conflict=1; +lh->v.u.s.info=extra; +} +if(lh->v.u.s.aux==v->u.s.info){ +conflict=1; +lh->v.u.s.aux=extra; +} +} +} +if(conflict){ +luaK_codeABC(fs,OP_MOVE,fs->freereg,v->u.s.info,0); +luaK_reserveregs(fs,1); +} +} +static void assignment(LexState*ls,struct LHS_assign*lh,int nvars){ +expdesc e; +check_condition(ls,VLOCAL<=lh->v.k&&lh->v.k<=VINDEXED, +"syntax error"); +if(testnext(ls,',')){ +struct LHS_assign nv; +nv.prev=lh; +primaryexp(ls,&nv.v); +if(nv.v.k==VLOCAL) +check_conflict(ls,lh,&nv.v); +luaY_checklimit(ls->fs,nvars,200-ls->L->nCcalls, +"variables in assignment"); +assignment(ls,&nv,nvars+1); +} +else{ +int nexps; +checknext(ls,'='); +nexps=explist1(ls,&e); +if(nexps!=nvars){ +adjust_assign(ls,nvars,nexps,&e); +if(nexps>nvars) +ls->fs->freereg-=nexps-nvars; +} +else{ +luaK_setoneret(ls->fs,&e); +luaK_storevar(ls->fs,&lh->v,&e); +return; +} +} +init_exp(&e,VNONRELOC,ls->fs->freereg-1); +luaK_storevar(ls->fs,&lh->v,&e); +} +static int cond(LexState*ls){ +expdesc v; +expr(ls,&v); +if(v.k==VNIL)v.k=VFALSE; +luaK_goiftrue(ls->fs,&v); +return v.f; +} +static void breakstat(LexState*ls){ +FuncState*fs=ls->fs; +BlockCnt*bl=fs->bl; +int upval=0; +while(bl&&!bl->isbreakable){ +upval|=bl->upval; +bl=bl->previous; +} +if(!bl) +luaX_syntaxerror(ls,"no loop to break"); +if(upval) +luaK_codeABC(fs,OP_CLOSE,bl->nactvar,0,0); +luaK_concat(fs,&bl->breaklist,luaK_jump(fs)); +} +static void whilestat(LexState*ls,int line){ +FuncState*fs=ls->fs; +int whileinit; +int condexit; +BlockCnt bl; +luaX_next(ls); +whileinit=luaK_getlabel(fs); +condexit=cond(ls); +enterblock(fs,&bl,1); +checknext(ls,TK_DO); +block(ls); +luaK_patchlist(fs,luaK_jump(fs),whileinit); +check_match(ls,TK_END,TK_WHILE,line); +leaveblock(fs); +luaK_patchtohere(fs,condexit); +} +static void repeatstat(LexState*ls,int line){ +int condexit; +FuncState*fs=ls->fs; +int repeat_init=luaK_getlabel(fs); +BlockCnt bl1,bl2; +enterblock(fs,&bl1,1); +enterblock(fs,&bl2,0); +luaX_next(ls); +chunk(ls); +check_match(ls,TK_UNTIL,TK_REPEAT,line); +condexit=cond(ls); +if(!bl2.upval){ +leaveblock(fs); +luaK_patchlist(ls->fs,condexit,repeat_init); +} +else{ +breakstat(ls); +luaK_patchtohere(ls->fs,condexit); +leaveblock(fs); +luaK_patchlist(ls->fs,luaK_jump(fs),repeat_init); +} +leaveblock(fs); +} +static int exp1(LexState*ls){ +expdesc e; +int k; +expr(ls,&e); +k=e.k; +luaK_exp2nextreg(ls->fs,&e); +return k; +} +static void forbody(LexState*ls,int base,int line,int nvars,int isnum){ +BlockCnt bl; +FuncState*fs=ls->fs; +int prep,endfor; +adjustlocalvars(ls,3); +checknext(ls,TK_DO); +prep=isnum?luaK_codeAsBx(fs,OP_FORPREP,base,(-1)):luaK_jump(fs); +enterblock(fs,&bl,0); +adjustlocalvars(ls,nvars); +luaK_reserveregs(fs,nvars); +block(ls); +leaveblock(fs); +luaK_patchtohere(fs,prep); +endfor=(isnum)?luaK_codeAsBx(fs,OP_FORLOOP,base,(-1)): +luaK_codeABC(fs,OP_TFORLOOP,base,0,nvars); +luaK_fixline(fs,line); +luaK_patchlist(fs,(isnum?endfor:luaK_jump(fs)),prep+1); +} +static void fornum(LexState*ls,TString*varname,int line){ +FuncState*fs=ls->fs; +int base=fs->freereg; +new_localvarliteral(ls,"(for index)",0); +new_localvarliteral(ls,"(for limit)",1); +new_localvarliteral(ls,"(for step)",2); +new_localvar(ls,varname,3); +checknext(ls,'='); +exp1(ls); +checknext(ls,','); +exp1(ls); +if(testnext(ls,',')) +exp1(ls); +else{ +luaK_codeABx(fs,OP_LOADK,fs->freereg,luaK_numberK(fs,1)); +luaK_reserveregs(fs,1); +} +forbody(ls,base,line,1,1); +} +static void forlist(LexState*ls,TString*indexname){ +FuncState*fs=ls->fs; +expdesc e; +int nvars=0; +int line; +int base=fs->freereg; +new_localvarliteral(ls,"(for generator)",nvars++); +new_localvarliteral(ls,"(for state)",nvars++); +new_localvarliteral(ls,"(for control)",nvars++); +new_localvar(ls,indexname,nvars++); +while(testnext(ls,',')) +new_localvar(ls,str_checkname(ls),nvars++); +checknext(ls,TK_IN); +line=ls->linenumber; +adjust_assign(ls,3,explist1(ls,&e),&e); +luaK_checkstack(fs,3); +forbody(ls,base,line,nvars-3,0); +} +static void forstat(LexState*ls,int line){ +FuncState*fs=ls->fs; +TString*varname; +BlockCnt bl; +enterblock(fs,&bl,1); +luaX_next(ls); +varname=str_checkname(ls); +switch(ls->t.token){ +case'=':fornum(ls,varname,line);break; +case',':case TK_IN:forlist(ls,varname);break; +default:luaX_syntaxerror(ls,LUA_QL("=")" or "LUA_QL("in")" expected"); +} +check_match(ls,TK_END,TK_FOR,line); +leaveblock(fs); +} +static int test_then_block(LexState*ls){ +int condexit; +luaX_next(ls); +condexit=cond(ls); +checknext(ls,TK_THEN); +block(ls); +return condexit; +} +static void ifstat(LexState*ls,int line){ +FuncState*fs=ls->fs; +int flist; +int escapelist=(-1); +flist=test_then_block(ls); +while(ls->t.token==TK_ELSEIF){ +luaK_concat(fs,&escapelist,luaK_jump(fs)); +luaK_patchtohere(fs,flist); +flist=test_then_block(ls); +} +if(ls->t.token==TK_ELSE){ +luaK_concat(fs,&escapelist,luaK_jump(fs)); +luaK_patchtohere(fs,flist); +luaX_next(ls); +block(ls); +} +else +luaK_concat(fs,&escapelist,flist); +luaK_patchtohere(fs,escapelist); +check_match(ls,TK_END,TK_IF,line); +} +static void localfunc(LexState*ls){ +expdesc v,b; +FuncState*fs=ls->fs; +new_localvar(ls,str_checkname(ls),0); +init_exp(&v,VLOCAL,fs->freereg); +luaK_reserveregs(fs,1); +adjustlocalvars(ls,1); +body(ls,&b,0,ls->linenumber); +luaK_storevar(fs,&v,&b); +getlocvar(fs,fs->nactvar-1).startpc=fs->pc; +} +static void localstat(LexState*ls){ +int nvars=0; +int nexps; +expdesc e; +do{ +new_localvar(ls,str_checkname(ls),nvars++); +}while(testnext(ls,',')); +if(testnext(ls,'=')) +nexps=explist1(ls,&e); +else{ +e.k=VVOID; +nexps=0; +} +adjust_assign(ls,nvars,nexps,&e); +adjustlocalvars(ls,nvars); +} +static int funcname(LexState*ls,expdesc*v){ +int needself=0; +singlevar(ls,v); +while(ls->t.token=='.') +field(ls,v); +if(ls->t.token==':'){ +needself=1; +field(ls,v); +} +return needself; +} +static void funcstat(LexState*ls,int line){ +int needself; +expdesc v,b; +luaX_next(ls); +needself=funcname(ls,&v); +body(ls,&b,needself,line); +luaK_storevar(ls->fs,&v,&b); +luaK_fixline(ls->fs,line); +} +static void exprstat(LexState*ls){ +FuncState*fs=ls->fs; +struct LHS_assign v; +primaryexp(ls,&v.v); +if(v.v.k==VCALL) +SETARG_C(getcode(fs,&v.v),1); +else{ +v.prev=NULL; +assignment(ls,&v,1); +} +} +static void retstat(LexState*ls){ +FuncState*fs=ls->fs; +expdesc e; +int first,nret; +luaX_next(ls); +if(block_follow(ls->t.token)||ls->t.token==';') +first=nret=0; +else{ +nret=explist1(ls,&e); +if(hasmultret(e.k)){ +luaK_setmultret(fs,&e); +if(e.k==VCALL&&nret==1){ +SET_OPCODE(getcode(fs,&e),OP_TAILCALL); +} +first=fs->nactvar; +nret=(-1); +} +else{ +if(nret==1) +first=luaK_exp2anyreg(fs,&e); +else{ +luaK_exp2nextreg(fs,&e); +first=fs->nactvar; +} +} +} +luaK_ret(fs,first,nret); +} +static int statement(LexState*ls){ +int line=ls->linenumber; +switch(ls->t.token){ +case TK_IF:{ +ifstat(ls,line); +return 0; +} +case TK_WHILE:{ +whilestat(ls,line); +return 0; +} +case TK_DO:{ +luaX_next(ls); +block(ls); +check_match(ls,TK_END,TK_DO,line); +return 0; +} +case TK_FOR:{ +forstat(ls,line); +return 0; +} +case TK_REPEAT:{ +repeatstat(ls,line); +return 0; +} +case TK_FUNCTION:{ +funcstat(ls,line); +return 0; +} +case TK_LOCAL:{ +luaX_next(ls); +if(testnext(ls,TK_FUNCTION)) +localfunc(ls); +else +localstat(ls); +return 0; +} +case TK_RETURN:{ +retstat(ls); +return 1; +} +case TK_BREAK:{ +luaX_next(ls); +breakstat(ls); +return 1; +} +default:{ +exprstat(ls); +return 0; +} +} +} +static void chunk(LexState*ls){ +int islast=0; +enterlevel(ls); +while(!islast&&!block_follow(ls->t.token)){ +islast=statement(ls); +testnext(ls,';'); +ls->fs->freereg=ls->fs->nactvar; +} +leavelevel(ls); +} +static const TValue*luaV_tonumber(const TValue*obj,TValue*n){ +lua_Number num; +if(ttisnumber(obj))return obj; +if(ttisstring(obj)&&luaO_str2d(svalue(obj),&num)){ +setnvalue(n,num); +return n; +} +else +return NULL; +} +static int luaV_tostring(lua_State*L,StkId obj){ +if(!ttisnumber(obj)) +return 0; +else{ +char s[32]; +lua_Number n=nvalue(obj); +lua_number2str(s,n); +setsvalue(L,obj,luaS_new(L,s)); +return 1; +} +} +static void callTMres(lua_State*L,StkId res,const TValue*f, +const TValue*p1,const TValue*p2){ +ptrdiff_t result=savestack(L,res); +setobj(L,L->top,f); +setobj(L,L->top+1,p1); +setobj(L,L->top+2,p2); +luaD_checkstack(L,3); +L->top+=3; +luaD_call(L,L->top-3,1); +res=restorestack(L,result); +L->top--; +setobj(L,res,L->top); +} +static void callTM(lua_State*L,const TValue*f,const TValue*p1, +const TValue*p2,const TValue*p3){ +setobj(L,L->top,f); +setobj(L,L->top+1,p1); +setobj(L,L->top+2,p2); +setobj(L,L->top+3,p3); +luaD_checkstack(L,4); +L->top+=4; +luaD_call(L,L->top-4,0); +} +static void luaV_gettable(lua_State*L,const TValue*t,TValue*key,StkId val){ +int loop; +for(loop=0;loop<100;loop++){ +const TValue*tm; +if(ttistable(t)){ +Table*h=hvalue(t); +const TValue*res=luaH_get(h,key); +if(!ttisnil(res)|| +(tm=fasttm(L,h->metatable,TM_INDEX))==NULL){ +setobj(L,val,res); +return; +} +} +else if(ttisnil(tm=luaT_gettmbyobj(L,t,TM_INDEX))) +luaG_typeerror(L,t,"index"); +if(ttisfunction(tm)){ +callTMres(L,val,tm,t,key); +return; +} +t=tm; +} +luaG_runerror(L,"loop in gettable"); +} +static void luaV_settable(lua_State*L,const TValue*t,TValue*key,StkId val){ +int loop; +TValue temp; +for(loop=0;loop<100;loop++){ +const TValue*tm; +if(ttistable(t)){ +Table*h=hvalue(t); +TValue*oldval=luaH_set(L,h,key); +if(!ttisnil(oldval)|| +(tm=fasttm(L,h->metatable,TM_NEWINDEX))==NULL){ +setobj(L,oldval,val); +h->flags=0; +luaC_barriert(L,h,val); +return; +} +} +else if(ttisnil(tm=luaT_gettmbyobj(L,t,TM_NEWINDEX))) +luaG_typeerror(L,t,"index"); +if(ttisfunction(tm)){ +callTM(L,tm,t,key,val); +return; +} +setobj(L,&temp,tm); +t=&temp; +} +luaG_runerror(L,"loop in settable"); +} +static int call_binTM(lua_State*L,const TValue*p1,const TValue*p2, +StkId res,TMS event){ +const TValue*tm=luaT_gettmbyobj(L,p1,event); +if(ttisnil(tm)) +tm=luaT_gettmbyobj(L,p2,event); +if(ttisnil(tm))return 0; +callTMres(L,res,tm,p1,p2); +return 1; +} +static const TValue*get_compTM(lua_State*L,Table*mt1,Table*mt2, +TMS event){ +const TValue*tm1=fasttm(L,mt1,event); +const TValue*tm2; +if(tm1==NULL)return NULL; +if(mt1==mt2)return tm1; +tm2=fasttm(L,mt2,event); +if(tm2==NULL)return NULL; +if(luaO_rawequalObj(tm1,tm2)) +return tm1; +return NULL; +} +static int call_orderTM(lua_State*L,const TValue*p1,const TValue*p2, +TMS event){ +const TValue*tm1=luaT_gettmbyobj(L,p1,event); +const TValue*tm2; +if(ttisnil(tm1))return-1; +tm2=luaT_gettmbyobj(L,p2,event); +if(!luaO_rawequalObj(tm1,tm2)) +return-1; +callTMres(L,L->top,tm1,p1,p2); +return!l_isfalse(L->top); +} +static int l_strcmp(const TString*ls,const TString*rs){ +const char*l=getstr(ls); +size_t ll=ls->tsv.len; +const char*r=getstr(rs); +size_t lr=rs->tsv.len; +for(;;){ +int temp=strcoll(l,r); +if(temp!=0)return temp; +else{ +size_t len=strlen(l); +if(len==lr) +return(len==ll)?0:1; +else if(len==ll) +return-1; +len++; +l+=len;ll-=len;r+=len;lr-=len; +} +} +} +static int luaV_lessthan(lua_State*L,const TValue*l,const TValue*r){ +int res; +if(ttype(l)!=ttype(r)) +return luaG_ordererror(L,l,r); +else if(ttisnumber(l)) +return luai_numlt(nvalue(l),nvalue(r)); +else if(ttisstring(l)) +return l_strcmp(rawtsvalue(l),rawtsvalue(r))<0; +else if((res=call_orderTM(L,l,r,TM_LT))!=-1) +return res; +return luaG_ordererror(L,l,r); +} +static int lessequal(lua_State*L,const TValue*l,const TValue*r){ +int res; +if(ttype(l)!=ttype(r)) +return luaG_ordererror(L,l,r); +else if(ttisnumber(l)) +return luai_numle(nvalue(l),nvalue(r)); +else if(ttisstring(l)) +return l_strcmp(rawtsvalue(l),rawtsvalue(r))<=0; +else if((res=call_orderTM(L,l,r,TM_LE))!=-1) +return res; +else if((res=call_orderTM(L,r,l,TM_LT))!=-1) +return!res; +return luaG_ordererror(L,l,r); +} +static int luaV_equalval(lua_State*L,const TValue*t1,const TValue*t2){ +const TValue*tm; +switch(ttype(t1)){ +case 0:return 1; +case 3:return luai_numeq(nvalue(t1),nvalue(t2)); +case 1:return bvalue(t1)==bvalue(t2); +case 2:return pvalue(t1)==pvalue(t2); +case 7:{ +if(uvalue(t1)==uvalue(t2))return 1; +tm=get_compTM(L,uvalue(t1)->metatable,uvalue(t2)->metatable, +TM_EQ); +break; +} +case 5:{ +if(hvalue(t1)==hvalue(t2))return 1; +tm=get_compTM(L,hvalue(t1)->metatable,hvalue(t2)->metatable,TM_EQ); +break; +} +default:return gcvalue(t1)==gcvalue(t2); +} +if(tm==NULL)return 0; +callTMres(L,L->top,tm,t1,t2); +return!l_isfalse(L->top); +} +static void luaV_concat(lua_State*L,int total,int last){ +do{ +StkId top=L->base+last+1; +int n=2; +if(!(ttisstring(top-2)||ttisnumber(top-2))||!tostring(L,top-1)){ +if(!call_binTM(L,top-2,top-1,top-2,TM_CONCAT)) +luaG_concaterror(L,top-2,top-1); +}else if(tsvalue(top-1)->len==0) +(void)tostring(L,top-2); +else{ +size_t tl=tsvalue(top-1)->len; +char*buffer; +int i; +for(n=1;nlen; +if(l>=((size_t)(~(size_t)0)-2)-tl)luaG_runerror(L,"string length overflow"); +tl+=l; +} +buffer=luaZ_openspace(L,&G(L)->buff,tl); +tl=0; +for(i=n;i>0;i--){ +size_t l=tsvalue(top-i)->len; +memcpy(buffer+tl,svalue(top-i),l); +tl+=l; +} +setsvalue(L,top-n,luaS_newlstr(L,buffer,tl)); +} +total-=n-1; +last-=n-1; +}while(total>1); +} +static void Arith(lua_State*L,StkId ra,const TValue*rb, +const TValue*rc,TMS op){ +TValue tempb,tempc; +const TValue*b,*c; +if((b=luaV_tonumber(rb,&tempb))!=NULL&& +(c=luaV_tonumber(rc,&tempc))!=NULL){ +lua_Number nb=nvalue(b),nc=nvalue(c); +switch(op){ +case TM_ADD:setnvalue(ra,luai_numadd(nb,nc));break; +case TM_SUB:setnvalue(ra,luai_numsub(nb,nc));break; +case TM_MUL:setnvalue(ra,luai_nummul(nb,nc));break; +case TM_DIV:setnvalue(ra,luai_numdiv(nb,nc));break; +case TM_MOD:setnvalue(ra,luai_nummod(nb,nc));break; +case TM_POW:setnvalue(ra,luai_numpow(nb,nc));break; +case TM_UNM:setnvalue(ra,luai_numunm(nb));break; +default:break; +} +} +else if(!call_binTM(L,rb,rc,ra,op)) +luaG_aritherror(L,rb,rc); +} +#define runtime_check(L,c){if(!(c))break;} +#define RA(i)(base+GETARG_A(i)) +#define RB(i)check_exp(getBMode(GET_OPCODE(i))==OpArgR,base+GETARG_B(i)) +#define RKB(i)check_exp(getBMode(GET_OPCODE(i))==OpArgK,ISK(GETARG_B(i))?k+INDEXK(GETARG_B(i)):base+GETARG_B(i)) +#define RKC(i)check_exp(getCMode(GET_OPCODE(i))==OpArgK,ISK(GETARG_C(i))?k+INDEXK(GETARG_C(i)):base+GETARG_C(i)) +#define KBx(i)check_exp(getBMode(GET_OPCODE(i))==OpArgK,k+GETARG_Bx(i)) +#define dojump(L,pc,i){(pc)+=(i);} +#define Protect(x){L->savedpc=pc;{x;};base=L->base;} +#define arith_op(op,tm){TValue*rb=RKB(i);TValue*rc=RKC(i);if(ttisnumber(rb)&&ttisnumber(rc)){lua_Number nb=nvalue(rb),nc=nvalue(rc);setnvalue(ra,op(nb,nc));}else Protect(Arith(L,ra,rb,rc,tm));} +static void luaV_execute(lua_State*L,int nexeccalls){ +LClosure*cl; +StkId base; +TValue*k; +const Instruction*pc; +reentry: +pc=L->savedpc; +cl=&clvalue(L->ci->func)->l; +base=L->base; +k=cl->p->k; +for(;;){ +const Instruction i=*pc++; +StkId ra; +ra=RA(i); +switch(GET_OPCODE(i)){ +case OP_MOVE:{ +setobj(L,ra,RB(i)); +continue; +} +case OP_LOADK:{ +setobj(L,ra,KBx(i)); +continue; +} +case OP_LOADBOOL:{ +setbvalue(ra,GETARG_B(i)); +if(GETARG_C(i))pc++; +continue; +} +case OP_LOADNIL:{ +TValue*rb=RB(i); +do{ +setnilvalue(rb--); +}while(rb>=ra); +continue; +} +case OP_GETUPVAL:{ +int b=GETARG_B(i); +setobj(L,ra,cl->upvals[b]->v); +continue; +} +case OP_GETGLOBAL:{ +TValue g; +TValue*rb=KBx(i); +sethvalue(L,&g,cl->env); +Protect(luaV_gettable(L,&g,rb,ra)); +continue; +} +case OP_GETTABLE:{ +Protect(luaV_gettable(L,RB(i),RKC(i),ra)); +continue; +} +case OP_SETGLOBAL:{ +TValue g; +sethvalue(L,&g,cl->env); +Protect(luaV_settable(L,&g,KBx(i),ra)); +continue; +} +case OP_SETUPVAL:{ +UpVal*uv=cl->upvals[GETARG_B(i)]; +setobj(L,uv->v,ra); +luaC_barrier(L,uv,ra); +continue; +} +case OP_SETTABLE:{ +Protect(luaV_settable(L,ra,RKB(i),RKC(i))); +continue; +} +case OP_NEWTABLE:{ +int b=GETARG_B(i); +int c=GETARG_C(i); +sethvalue(L,ra,luaH_new(L,luaO_fb2int(b),luaO_fb2int(c))); +Protect(luaC_checkGC(L)); +continue; +} +case OP_SELF:{ +StkId rb=RB(i); +setobj(L,ra+1,rb); +Protect(luaV_gettable(L,rb,RKC(i),ra)); +continue; +} +case OP_ADD:{ +arith_op(luai_numadd,TM_ADD); +continue; +} +case OP_SUB:{ +arith_op(luai_numsub,TM_SUB); +continue; +} +case OP_MUL:{ +arith_op(luai_nummul,TM_MUL); +continue; +} +case OP_DIV:{ +arith_op(luai_numdiv,TM_DIV); +continue; +} +case OP_MOD:{ +arith_op(luai_nummod,TM_MOD); +continue; +} +case OP_POW:{ +arith_op(luai_numpow,TM_POW); +continue; +} +case OP_UNM:{ +TValue*rb=RB(i); +if(ttisnumber(rb)){ +lua_Number nb=nvalue(rb); +setnvalue(ra,luai_numunm(nb)); +} +else{ +Protect(Arith(L,ra,rb,rb,TM_UNM)); +} +continue; +} +case OP_NOT:{ +int res=l_isfalse(RB(i)); +setbvalue(ra,res); +continue; +} +case OP_LEN:{ +const TValue*rb=RB(i); +switch(ttype(rb)){ +case 5:{ +setnvalue(ra,cast_num(luaH_getn(hvalue(rb)))); +break; +} +case 4:{ +setnvalue(ra,cast_num(tsvalue(rb)->len)); +break; +} +default:{ +Protect( +if(!call_binTM(L,rb,(&luaO_nilobject_),ra,TM_LEN)) +luaG_typeerror(L,rb,"get length of"); +) +} +} +continue; +} +case OP_CONCAT:{ +int b=GETARG_B(i); +int c=GETARG_C(i); +Protect(luaV_concat(L,c-b+1,c);luaC_checkGC(L)); +setobj(L,RA(i),base+b); +continue; +} +case OP_JMP:{ +dojump(L,pc,GETARG_sBx(i)); +continue; +} +case OP_EQ:{ +TValue*rb=RKB(i); +TValue*rc=RKC(i); +Protect( +if(equalobj(L,rb,rc)==GETARG_A(i)) +dojump(L,pc,GETARG_sBx(*pc)); +) +pc++; +continue; +} +case OP_LT:{ +Protect( +if(luaV_lessthan(L,RKB(i),RKC(i))==GETARG_A(i)) +dojump(L,pc,GETARG_sBx(*pc)); +) +pc++; +continue; +} +case OP_LE:{ +Protect( +if(lessequal(L,RKB(i),RKC(i))==GETARG_A(i)) +dojump(L,pc,GETARG_sBx(*pc)); +) +pc++; +continue; +} +case OP_TEST:{ +if(l_isfalse(ra)!=GETARG_C(i)) +dojump(L,pc,GETARG_sBx(*pc)); +pc++; +continue; +} +case OP_TESTSET:{ +TValue*rb=RB(i); +if(l_isfalse(rb)!=GETARG_C(i)){ +setobj(L,ra,rb); +dojump(L,pc,GETARG_sBx(*pc)); +} +pc++; +continue; +} +case OP_CALL:{ +int b=GETARG_B(i); +int nresults=GETARG_C(i)-1; +if(b!=0)L->top=ra+b; +L->savedpc=pc; +switch(luaD_precall(L,ra,nresults)){ +case 0:{ +nexeccalls++; +goto reentry; +} +case 1:{ +if(nresults>=0)L->top=L->ci->top; +base=L->base; +continue; +} +default:{ +return; +} +} +} +case OP_TAILCALL:{ +int b=GETARG_B(i); +if(b!=0)L->top=ra+b; +L->savedpc=pc; +switch(luaD_precall(L,ra,(-1))){ +case 0:{ +CallInfo*ci=L->ci-1; +int aux; +StkId func=ci->func; +StkId pfunc=(ci+1)->func; +if(L->openupval)luaF_close(L,ci->base); +L->base=ci->base=ci->func+((ci+1)->base-pfunc); +for(aux=0;pfunc+auxtop;aux++) +setobj(L,func+aux,pfunc+aux); +ci->top=L->top=func+aux; +ci->savedpc=L->savedpc; +ci->tailcalls++; +L->ci--; +goto reentry; +} +case 1:{ +base=L->base; +continue; +} +default:{ +return; +} +} +} +case OP_RETURN:{ +int b=GETARG_B(i); +if(b!=0)L->top=ra+b-1; +if(L->openupval)luaF_close(L,base); +L->savedpc=pc; +b=luaD_poscall(L,ra); +if(--nexeccalls==0) +return; +else{ +if(b)L->top=L->ci->top; +goto reentry; +} +} +case OP_FORLOOP:{ +lua_Number step=nvalue(ra+2); +lua_Number idx=luai_numadd(nvalue(ra),step); +lua_Number limit=nvalue(ra+1); +if(luai_numlt(0,step)?luai_numle(idx,limit) +:luai_numle(limit,idx)){ +dojump(L,pc,GETARG_sBx(i)); +setnvalue(ra,idx); +setnvalue(ra+3,idx); +} +continue; +} +case OP_FORPREP:{ +const TValue*init=ra; +const TValue*plimit=ra+1; +const TValue*pstep=ra+2; +L->savedpc=pc; +if(!tonumber(init,ra)) +luaG_runerror(L,LUA_QL("for")" initial value must be a number"); +else if(!tonumber(plimit,ra+1)) +luaG_runerror(L,LUA_QL("for")" limit must be a number"); +else if(!tonumber(pstep,ra+2)) +luaG_runerror(L,LUA_QL("for")" step must be a number"); +setnvalue(ra,luai_numsub(nvalue(ra),nvalue(pstep))); +dojump(L,pc,GETARG_sBx(i)); +continue; +} +case OP_TFORLOOP:{ +StkId cb=ra+3; +setobj(L,cb+2,ra+2); +setobj(L,cb+1,ra+1); +setobj(L,cb,ra); +L->top=cb+3; +Protect(luaD_call(L,cb,GETARG_C(i))); +L->top=L->ci->top; +cb=RA(i)+3; +if(!ttisnil(cb)){ +setobj(L,cb-1,cb); +dojump(L,pc,GETARG_sBx(*pc)); +} +pc++; +continue; +} +case OP_SETLIST:{ +int n=GETARG_B(i); +int c=GETARG_C(i); +int last; +Table*h; +if(n==0){ +n=cast_int(L->top-ra)-1; +L->top=L->ci->top; +} +if(c==0)c=cast_int(*pc++); +runtime_check(L,ttistable(ra)); +h=hvalue(ra); +last=((c-1)*50)+n; +if(last>h->sizearray) +luaH_resizearray(L,h,last); +for(;n>0;n--){ +TValue*val=ra+n; +setobj(L,luaH_setnum(L,h,last--),val); +luaC_barriert(L,h,val); +} +continue; +} +case OP_CLOSE:{ +luaF_close(L,ra); +continue; +} +case OP_CLOSURE:{ +Proto*p; +Closure*ncl; +int nup,j; +p=cl->p->p[GETARG_Bx(i)]; +nup=p->nups; +ncl=luaF_newLclosure(L,nup,cl->env); +ncl->l.p=p; +for(j=0;jl.upvals[j]=cl->upvals[GETARG_B(*pc)]; +else{ +ncl->l.upvals[j]=luaF_findupval(L,base+GETARG_B(*pc)); +} +} +setclvalue(L,ra,ncl); +Protect(luaC_checkGC(L)); +continue; +} +case OP_VARARG:{ +int b=GETARG_B(i)-1; +int j; +CallInfo*ci=L->ci; +int n=cast_int(ci->base-ci->func)-cl->p->numparams-1; +if(b==(-1)){ +Protect(luaD_checkstack(L,n)); +ra=RA(i); +b=n; +L->top=ra+n; +} +for(j=0;jbase-n+j); +} +else{ +setnilvalue(ra+j); +} +} +continue; +} +} +} +} +#define api_checknelems(L,n)luai_apicheck(L,(n)<=(L->top-L->base)) +#define api_checkvalidindex(L,i)luai_apicheck(L,(i)!=(&luaO_nilobject_)) +#define api_incr_top(L){luai_apicheck(L,L->topci->top);L->top++;} +static TValue*index2adr(lua_State*L,int idx){ +if(idx>0){ +TValue*o=L->base+(idx-1); +luai_apicheck(L,idx<=L->ci->top-L->base); +if(o>=L->top)return cast(TValue*,(&luaO_nilobject_)); +else return o; +} +else if(idx>(-10000)){ +luai_apicheck(L,idx!=0&&-idx<=L->top-L->base); +return L->top+idx; +} +else switch(idx){ +case(-10000):return registry(L); +case(-10001):{ +Closure*func=curr_func(L); +sethvalue(L,&L->env,func->c.env); +return&L->env; +} +case(-10002):return gt(L); +default:{ +Closure*func=curr_func(L); +idx=(-10002)-idx; +return(idx<=func->c.nupvalues) +?&func->c.upvalue[idx-1] +:cast(TValue*,(&luaO_nilobject_)); +} +} +} +static Table*getcurrenv(lua_State*L){ +if(L->ci==L->base_ci) +return hvalue(gt(L)); +else{ +Closure*func=curr_func(L); +return func->c.env; +} +} +static int lua_checkstack(lua_State*L,int size){ +int res=1; +if(size>8000||(L->top-L->base+size)>8000) +res=0; +else if(size>0){ +luaD_checkstack(L,size); +if(L->ci->toptop+size) +L->ci->top=L->top+size; +} +return res; +} +static lua_CFunction lua_atpanic(lua_State*L,lua_CFunction panicf){ +lua_CFunction old; +old=G(L)->panic; +G(L)->panic=panicf; +return old; +} +static int lua_gettop(lua_State*L){ +return cast_int(L->top-L->base); +} +static void lua_settop(lua_State*L,int idx){ +if(idx>=0){ +luai_apicheck(L,idx<=L->stack_last-L->base); +while(L->topbase+idx) +setnilvalue(L->top++); +L->top=L->base+idx; +} +else{ +luai_apicheck(L,-(idx+1)<=(L->top-L->base)); +L->top+=idx+1; +} +} +static void lua_remove(lua_State*L,int idx){ +StkId p; +p=index2adr(L,idx); +api_checkvalidindex(L,p); +while(++ptop)setobj(L,p-1,p); +L->top--; +} +static void lua_insert(lua_State*L,int idx){ +StkId p; +StkId q; +p=index2adr(L,idx); +api_checkvalidindex(L,p); +for(q=L->top;q>p;q--)setobj(L,q,q-1); +setobj(L,p,L->top); +} +static void lua_replace(lua_State*L,int idx){ +StkId o; +if(idx==(-10001)&&L->ci==L->base_ci) +luaG_runerror(L,"no calling environment"); +api_checknelems(L,1); +o=index2adr(L,idx); +api_checkvalidindex(L,o); +if(idx==(-10001)){ +Closure*func=curr_func(L); +luai_apicheck(L,ttistable(L->top-1)); +func->c.env=hvalue(L->top-1); +luaC_barrier(L,func,L->top-1); +} +else{ +setobj(L,o,L->top-1); +if(idx<(-10002)) +luaC_barrier(L,curr_func(L),L->top-1); +} +L->top--; +} +static void lua_pushvalue(lua_State*L,int idx){ +setobj(L,L->top,index2adr(L,idx)); +api_incr_top(L); +} +static int lua_type(lua_State*L,int idx){ +StkId o=index2adr(L,idx); +return(o==(&luaO_nilobject_))?(-1):ttype(o); +} +static const char*lua_typename(lua_State*L,int t){ +UNUSED(L); +return(t==(-1))?"no value":luaT_typenames[t]; +} +static int lua_iscfunction(lua_State*L,int idx){ +StkId o=index2adr(L,idx); +return iscfunction(o); +} +static int lua_isnumber(lua_State*L,int idx){ +TValue n; +const TValue*o=index2adr(L,idx); +return tonumber(o,&n); +} +static int lua_isstring(lua_State*L,int idx){ +int t=lua_type(L,idx); +return(t==4||t==3); +} +static int lua_rawequal(lua_State*L,int index1,int index2){ +StkId o1=index2adr(L,index1); +StkId o2=index2adr(L,index2); +return(o1==(&luaO_nilobject_)||o2==(&luaO_nilobject_))?0 +:luaO_rawequalObj(o1,o2); +} +static int lua_lessthan(lua_State*L,int index1,int index2){ +StkId o1,o2; +int i; +o1=index2adr(L,index1); +o2=index2adr(L,index2); +i=(o1==(&luaO_nilobject_)||o2==(&luaO_nilobject_))?0 +:luaV_lessthan(L,o1,o2); +return i; +} +static lua_Number lua_tonumber(lua_State*L,int idx){ +TValue n; +const TValue*o=index2adr(L,idx); +if(tonumber(o,&n)) +return nvalue(o); +else +return 0; +} +static lua_Integer lua_tointeger(lua_State*L,int idx){ +TValue n; +const TValue*o=index2adr(L,idx); +if(tonumber(o,&n)){ +lua_Integer res; +lua_Number num=nvalue(o); +lua_number2integer(res,num); +return res; +} +else +return 0; +} +static int lua_toboolean(lua_State*L,int idx){ +const TValue*o=index2adr(L,idx); +return!l_isfalse(o); +} +static const char*lua_tolstring(lua_State*L,int idx,size_t*len){ +StkId o=index2adr(L,idx); +if(!ttisstring(o)){ +if(!luaV_tostring(L,o)){ +if(len!=NULL)*len=0; +return NULL; +} +luaC_checkGC(L); +o=index2adr(L,idx); +} +if(len!=NULL)*len=tsvalue(o)->len; +return svalue(o); +} +static size_t lua_objlen(lua_State*L,int idx){ +StkId o=index2adr(L,idx); +switch(ttype(o)){ +case 4:return tsvalue(o)->len; +case 7:return uvalue(o)->len; +case 5:return luaH_getn(hvalue(o)); +case 3:{ +size_t l; +l=(luaV_tostring(L,o)?tsvalue(o)->len:0); +return l; +} +default:return 0; +} +} +static lua_CFunction lua_tocfunction(lua_State*L,int idx){ +StkId o=index2adr(L,idx); +return(!iscfunction(o))?NULL:clvalue(o)->c.f; +} +static void*lua_touserdata(lua_State*L,int idx){ +StkId o=index2adr(L,idx); +switch(ttype(o)){ +case 7:return(rawuvalue(o)+1); +case 2:return pvalue(o); +default:return NULL; +} +} +static void lua_pushnil(lua_State*L){ +setnilvalue(L->top); +api_incr_top(L); +} +static void lua_pushnumber(lua_State*L,lua_Number n){ +setnvalue(L->top,n); +api_incr_top(L); +} +static void lua_pushinteger(lua_State*L,lua_Integer n){ +setnvalue(L->top,cast_num(n)); +api_incr_top(L); +} +static void lua_pushlstring(lua_State*L,const char*s,size_t len){ +luaC_checkGC(L); +setsvalue(L,L->top,luaS_newlstr(L,s,len)); +api_incr_top(L); +} +static void lua_pushstring(lua_State*L,const char*s){ +if(s==NULL) +lua_pushnil(L); +else +lua_pushlstring(L,s,strlen(s)); +} +static const char*lua_pushvfstring(lua_State*L,const char*fmt, +va_list argp){ +const char*ret; +luaC_checkGC(L); +ret=luaO_pushvfstring(L,fmt,argp); +return ret; +} +static const char*lua_pushfstring(lua_State*L,const char*fmt,...){ +const char*ret; +va_list argp; +luaC_checkGC(L); +va_start(argp,fmt); +ret=luaO_pushvfstring(L,fmt,argp); +va_end(argp); +return ret; +} +static void lua_pushcclosure(lua_State*L,lua_CFunction fn,int n){ +Closure*cl; +luaC_checkGC(L); +api_checknelems(L,n); +cl=luaF_newCclosure(L,n,getcurrenv(L)); +cl->c.f=fn; +L->top-=n; +while(n--) +setobj(L,&cl->c.upvalue[n],L->top+n); +setclvalue(L,L->top,cl); +api_incr_top(L); +} +static void lua_pushboolean(lua_State*L,int b){ +setbvalue(L->top,(b!=0)); +api_incr_top(L); +} +static int lua_pushthread(lua_State*L){ +setthvalue(L,L->top,L); +api_incr_top(L); +return(G(L)->mainthread==L); +} +static void lua_gettable(lua_State*L,int idx){ +StkId t; +t=index2adr(L,idx); +api_checkvalidindex(L,t); +luaV_gettable(L,t,L->top-1,L->top-1); +} +static void lua_getfield(lua_State*L,int idx,const char*k){ +StkId t; +TValue key; +t=index2adr(L,idx); +api_checkvalidindex(L,t); +setsvalue(L,&key,luaS_new(L,k)); +luaV_gettable(L,t,&key,L->top); +api_incr_top(L); +} +static void lua_rawget(lua_State*L,int idx){ +StkId t; +t=index2adr(L,idx); +luai_apicheck(L,ttistable(t)); +setobj(L,L->top-1,luaH_get(hvalue(t),L->top-1)); +} +static void lua_rawgeti(lua_State*L,int idx,int n){ +StkId o; +o=index2adr(L,idx); +luai_apicheck(L,ttistable(o)); +setobj(L,L->top,luaH_getnum(hvalue(o),n)); +api_incr_top(L); +} +static void lua_createtable(lua_State*L,int narray,int nrec){ +luaC_checkGC(L); +sethvalue(L,L->top,luaH_new(L,narray,nrec)); +api_incr_top(L); +} +static int lua_getmetatable(lua_State*L,int objindex){ +const TValue*obj; +Table*mt=NULL; +int res; +obj=index2adr(L,objindex); +switch(ttype(obj)){ +case 5: +mt=hvalue(obj)->metatable; +break; +case 7: +mt=uvalue(obj)->metatable; +break; +default: +mt=G(L)->mt[ttype(obj)]; +break; +} +if(mt==NULL) +res=0; +else{ +sethvalue(L,L->top,mt); +api_incr_top(L); +res=1; +} +return res; +} +static void lua_getfenv(lua_State*L,int idx){ +StkId o; +o=index2adr(L,idx); +api_checkvalidindex(L,o); +switch(ttype(o)){ +case 6: +sethvalue(L,L->top,clvalue(o)->c.env); +break; +case 7: +sethvalue(L,L->top,uvalue(o)->env); +break; +case 8: +setobj(L,L->top,gt(thvalue(o))); +break; +default: +setnilvalue(L->top); +break; +} +api_incr_top(L); +} +static void lua_settable(lua_State*L,int idx){ +StkId t; +api_checknelems(L,2); +t=index2adr(L,idx); +api_checkvalidindex(L,t); +luaV_settable(L,t,L->top-2,L->top-1); +L->top-=2; +} +static void lua_setfield(lua_State*L,int idx,const char*k){ +StkId t; +TValue key; +api_checknelems(L,1); +t=index2adr(L,idx); +api_checkvalidindex(L,t); +setsvalue(L,&key,luaS_new(L,k)); +luaV_settable(L,t,&key,L->top-1); +L->top--; +} +static void lua_rawset(lua_State*L,int idx){ +StkId t; +api_checknelems(L,2); +t=index2adr(L,idx); +luai_apicheck(L,ttistable(t)); +setobj(L,luaH_set(L,hvalue(t),L->top-2),L->top-1); +luaC_barriert(L,hvalue(t),L->top-1); +L->top-=2; +} +static void lua_rawseti(lua_State*L,int idx,int n){ +StkId o; +api_checknelems(L,1); +o=index2adr(L,idx); +luai_apicheck(L,ttistable(o)); +setobj(L,luaH_setnum(L,hvalue(o),n),L->top-1); +luaC_barriert(L,hvalue(o),L->top-1); +L->top--; +} +static int lua_setmetatable(lua_State*L,int objindex){ +TValue*obj; +Table*mt; +api_checknelems(L,1); +obj=index2adr(L,objindex); +api_checkvalidindex(L,obj); +if(ttisnil(L->top-1)) +mt=NULL; +else{ +luai_apicheck(L,ttistable(L->top-1)); +mt=hvalue(L->top-1); +} +switch(ttype(obj)){ +case 5:{ +hvalue(obj)->metatable=mt; +if(mt) +luaC_objbarriert(L,hvalue(obj),mt); +break; +} +case 7:{ +uvalue(obj)->metatable=mt; +if(mt) +luaC_objbarrier(L,rawuvalue(obj),mt); +break; +} +default:{ +G(L)->mt[ttype(obj)]=mt; +break; +} +} +L->top--; +return 1; +} +static int lua_setfenv(lua_State*L,int idx){ +StkId o; +int res=1; +api_checknelems(L,1); +o=index2adr(L,idx); +api_checkvalidindex(L,o); +luai_apicheck(L,ttistable(L->top-1)); +switch(ttype(o)){ +case 6: +clvalue(o)->c.env=hvalue(L->top-1); +break; +case 7: +uvalue(o)->env=hvalue(L->top-1); +break; +case 8: +sethvalue(L,gt(thvalue(o)),hvalue(L->top-1)); +break; +default: +res=0; +break; +} +if(res)luaC_objbarrier(L,gcvalue(o),hvalue(L->top-1)); +L->top--; +return res; +} +#define adjustresults(L,nres){if(nres==(-1)&&L->top>=L->ci->top)L->ci->top=L->top;} +#define checkresults(L,na,nr)luai_apicheck(L,(nr)==(-1)||(L->ci->top-L->top>=(nr)-(na))) +static void lua_call(lua_State*L,int nargs,int nresults){ +StkId func; +api_checknelems(L,nargs+1); +checkresults(L,nargs,nresults); +func=L->top-(nargs+1); +luaD_call(L,func,nresults); +adjustresults(L,nresults); +} +struct CallS{ +StkId func; +int nresults; +}; +static void f_call(lua_State*L,void*ud){ +struct CallS*c=cast(struct CallS*,ud); +luaD_call(L,c->func,c->nresults); +} +static int lua_pcall(lua_State*L,int nargs,int nresults,int errfunc){ +struct CallS c; +int status; +ptrdiff_t func; +api_checknelems(L,nargs+1); +checkresults(L,nargs,nresults); +if(errfunc==0) +func=0; +else{ +StkId o=index2adr(L,errfunc); +api_checkvalidindex(L,o); +func=savestack(L,o); +} +c.func=L->top-(nargs+1); +c.nresults=nresults; +status=luaD_pcall(L,f_call,&c,savestack(L,c.func),func); +adjustresults(L,nresults); +return status; +} +static int lua_load(lua_State*L,lua_Reader reader,void*data, +const char*chunkname){ +ZIO z; +int status; +if(!chunkname)chunkname="?"; +luaZ_init(L,&z,reader,data); +status=luaD_protectedparser(L,&z,chunkname); +return status; +} +static int lua_error(lua_State*L){ +api_checknelems(L,1); +luaG_errormsg(L); +return 0; +} +static int lua_next(lua_State*L,int idx){ +StkId t; +int more; +t=index2adr(L,idx); +luai_apicheck(L,ttistable(t)); +more=luaH_next(L,hvalue(t),L->top-1); +if(more){ +api_incr_top(L); +} +else +L->top-=1; +return more; +} +static void lua_concat(lua_State*L,int n){ +api_checknelems(L,n); +if(n>=2){ +luaC_checkGC(L); +luaV_concat(L,n,cast_int(L->top-L->base)-1); +L->top-=(n-1); +} +else if(n==0){ +setsvalue(L,L->top,luaS_newlstr(L,"",0)); +api_incr_top(L); +} +} +static void*lua_newuserdata(lua_State*L,size_t size){ +Udata*u; +luaC_checkGC(L); +u=luaS_newudata(L,size,getcurrenv(L)); +setuvalue(L,L->top,u); +api_incr_top(L); +return u+1; +} +#define luaL_getn(L,i)((int)lua_objlen(L,i)) +#define luaL_setn(L,i,j)((void)0) +typedef struct luaL_Reg{ +const char*name; +lua_CFunction func; +}luaL_Reg; +static void luaI_openlib(lua_State*L,const char*libname, +const luaL_Reg*l,int nup); +static int luaL_argerror(lua_State*L,int numarg,const char*extramsg); +static const char* luaL_checklstring(lua_State*L,int numArg, +size_t*l); +static const char* luaL_optlstring(lua_State*L,int numArg, +const char*def,size_t*l); +static lua_Integer luaL_checkinteger(lua_State*L,int numArg); +static lua_Integer luaL_optinteger(lua_State*L,int nArg, +lua_Integer def); +static int luaL_error(lua_State*L,const char*fmt,...); +static const char* luaL_findtable(lua_State*L,int idx, +const char*fname,int szhint); +#define luaL_argcheck(L,cond,numarg,extramsg)((void)((cond)||luaL_argerror(L,(numarg),(extramsg)))) +#define luaL_checkstring(L,n)(luaL_checklstring(L,(n),NULL)) +#define luaL_optstring(L,n,d)(luaL_optlstring(L,(n),(d),NULL)) +#define luaL_checkint(L,n)((int)luaL_checkinteger(L,(n))) +#define luaL_optint(L,n,d)((int)luaL_optinteger(L,(n),(d))) +#define luaL_typename(L,i)lua_typename(L,lua_type(L,(i))) +#define luaL_getmetatable(L,n)(lua_getfield(L,(-10000),(n))) +#define luaL_opt(L,f,n,d)(lua_isnoneornil(L,(n))?(d):f(L,(n))) +typedef struct luaL_Buffer{ +char*p; +int lvl; +lua_State*L; +char buffer[BUFSIZ]; +}luaL_Buffer; +#define luaL_addchar(B,c)((void)((B)->p<((B)->buffer+BUFSIZ)||luaL_prepbuffer(B)),(*(B)->p++=(char)(c))) +#define luaL_addsize(B,n)((B)->p+=(n)) +static char* luaL_prepbuffer(luaL_Buffer*B); +static int luaL_argerror(lua_State*L,int narg,const char*extramsg){ +lua_Debug ar; +if(!lua_getstack(L,0,&ar)) +return luaL_error(L,"bad argument #%d (%s)",narg,extramsg); +lua_getinfo(L,"n",&ar); +if(strcmp(ar.namewhat,"method")==0){ +narg--; +if(narg==0) +return luaL_error(L,"calling "LUA_QL("%s")" on bad self (%s)", +ar.name,extramsg); +} +if(ar.name==NULL) +ar.name="?"; +return luaL_error(L,"bad argument #%d to "LUA_QL("%s")" (%s)", +narg,ar.name,extramsg); +} +static int luaL_typerror(lua_State*L,int narg,const char*tname){ +const char*msg=lua_pushfstring(L,"%s expected, got %s", +tname,luaL_typename(L,narg)); +return luaL_argerror(L,narg,msg); +} +static void tag_error(lua_State*L,int narg,int tag){ +luaL_typerror(L,narg,lua_typename(L,tag)); +} +static void luaL_where(lua_State*L,int level){ +lua_Debug ar; +if(lua_getstack(L,level,&ar)){ +lua_getinfo(L,"Sl",&ar); +if(ar.currentline>0){ +lua_pushfstring(L,"%s:%d: ",ar.short_src,ar.currentline); +return; +} +} +lua_pushliteral(L,""); +} +static int luaL_error(lua_State*L,const char*fmt,...){ +va_list argp; +va_start(argp,fmt); +luaL_where(L,1); +lua_pushvfstring(L,fmt,argp); +va_end(argp); +lua_concat(L,2); +return lua_error(L); +} +static int luaL_newmetatable(lua_State*L,const char*tname){ +lua_getfield(L,(-10000),tname); +if(!lua_isnil(L,-1)) +return 0; +lua_pop(L,1); +lua_newtable(L); +lua_pushvalue(L,-1); +lua_setfield(L,(-10000),tname); +return 1; +} +static void*luaL_checkudata(lua_State*L,int ud,const char*tname){ +void*p=lua_touserdata(L,ud); +if(p!=NULL){ +if(lua_getmetatable(L,ud)){ +lua_getfield(L,(-10000),tname); +if(lua_rawequal(L,-1,-2)){ +lua_pop(L,2); +return p; +} +} +} +luaL_typerror(L,ud,tname); +return NULL; +} +static void luaL_checkstack(lua_State*L,int space,const char*mes){ +if(!lua_checkstack(L,space)) +luaL_error(L,"stack overflow (%s)",mes); +} +static void luaL_checktype(lua_State*L,int narg,int t){ +if(lua_type(L,narg)!=t) +tag_error(L,narg,t); +} +static void luaL_checkany(lua_State*L,int narg){ +if(lua_type(L,narg)==(-1)) +luaL_argerror(L,narg,"value expected"); +} +static const char*luaL_checklstring(lua_State*L,int narg,size_t*len){ +const char*s=lua_tolstring(L,narg,len); +if(!s)tag_error(L,narg,4); +return s; +} +static const char*luaL_optlstring(lua_State*L,int narg, +const char*def,size_t*len){ +if(lua_isnoneornil(L,narg)){ +if(len) +*len=(def?strlen(def):0); +return def; +} +else return luaL_checklstring(L,narg,len); +} +static lua_Number luaL_checknumber(lua_State*L,int narg){ +lua_Number d=lua_tonumber(L,narg); +if(d==0&&!lua_isnumber(L,narg)) +tag_error(L,narg,3); +return d; +} +static lua_Integer luaL_checkinteger(lua_State*L,int narg){ +lua_Integer d=lua_tointeger(L,narg); +if(d==0&&!lua_isnumber(L,narg)) +tag_error(L,narg,3); +return d; +} +static lua_Integer luaL_optinteger(lua_State*L,int narg, +lua_Integer def){ +return luaL_opt(L,luaL_checkinteger,narg,def); +} +static int luaL_getmetafield(lua_State*L,int obj,const char*event){ +if(!lua_getmetatable(L,obj)) +return 0; +lua_pushstring(L,event); +lua_rawget(L,-2); +if(lua_isnil(L,-1)){ +lua_pop(L,2); +return 0; +} +else{ +lua_remove(L,-2); +return 1; +} +} +static void luaL_register(lua_State*L,const char*libname, +const luaL_Reg*l){ +luaI_openlib(L,libname,l,0); +} +static int libsize(const luaL_Reg*l){ +int size=0; +for(;l->name;l++)size++; +return size; +} +static void luaI_openlib(lua_State*L,const char*libname, +const luaL_Reg*l,int nup){ +if(libname){ +int size=libsize(l); +luaL_findtable(L,(-10000),"_LOADED",1); +lua_getfield(L,-1,libname); +if(!lua_istable(L,-1)){ +lua_pop(L,1); +if(luaL_findtable(L,(-10002),libname,size)!=NULL) +luaL_error(L,"name conflict for module "LUA_QL("%s"),libname); +lua_pushvalue(L,-1); +lua_setfield(L,-3,libname); +} +lua_remove(L,-2); +lua_insert(L,-(nup+1)); +} +for(;l->name;l++){ +int i; +for(i=0;ifunc,nup); +lua_setfield(L,-(nup+2),l->name); +} +lua_pop(L,nup); +} +static const char*luaL_findtable(lua_State*L,int idx, +const char*fname,int szhint){ +const char*e; +lua_pushvalue(L,idx); +do{ +e=strchr(fname,'.'); +if(e==NULL)e=fname+strlen(fname); +lua_pushlstring(L,fname,e-fname); +lua_rawget(L,-2); +if(lua_isnil(L,-1)){ +lua_pop(L,1); +lua_createtable(L,0,(*e=='.'?1:szhint)); +lua_pushlstring(L,fname,e-fname); +lua_pushvalue(L,-2); +lua_settable(L,-4); +} +else if(!lua_istable(L,-1)){ +lua_pop(L,2); +return fname; +} +lua_remove(L,-2); +fname=e+1; +}while(*e=='.'); +return NULL; +} +#define bufflen(B)((B)->p-(B)->buffer) +#define bufffree(B)((size_t)(BUFSIZ-bufflen(B))) +static int emptybuffer(luaL_Buffer*B){ +size_t l=bufflen(B); +if(l==0)return 0; +else{ +lua_pushlstring(B->L,B->buffer,l); +B->p=B->buffer; +B->lvl++; +return 1; +} +} +static void adjuststack(luaL_Buffer*B){ +if(B->lvl>1){ +lua_State*L=B->L; +int toget=1; +size_t toplen=lua_strlen(L,-1); +do{ +size_t l=lua_strlen(L,-(toget+1)); +if(B->lvl-toget+1>=(20/2)||toplen>l){ +toplen+=l; +toget++; +} +else break; +}while(togetlvl); +lua_concat(L,toget); +B->lvl=B->lvl-toget+1; +} +} +static char*luaL_prepbuffer(luaL_Buffer*B){ +if(emptybuffer(B)) +adjuststack(B); +return B->buffer; +} +static void luaL_addlstring(luaL_Buffer*B,const char*s,size_t l){ +while(l--) +luaL_addchar(B,*s++); +} +static void luaL_pushresult(luaL_Buffer*B){ +emptybuffer(B); +lua_concat(B->L,B->lvl); +B->lvl=1; +} +static void luaL_addvalue(luaL_Buffer*B){ +lua_State*L=B->L; +size_t vl; +const char*s=lua_tolstring(L,-1,&vl); +if(vl<=bufffree(B)){ +memcpy(B->p,s,vl); +B->p+=vl; +lua_pop(L,1); +} +else{ +if(emptybuffer(B)) +lua_insert(L,-2); +B->lvl++; +adjuststack(B); +} +} +static void luaL_buffinit(lua_State*L,luaL_Buffer*B){ +B->L=L; +B->p=B->buffer; +B->lvl=0; +} +typedef struct LoadF{ +int extraline; +FILE*f; +char buff[BUFSIZ]; +}LoadF; +static const char*getF(lua_State*L,void*ud,size_t*size){ +LoadF*lf=(LoadF*)ud; +(void)L; +if(lf->extraline){ +lf->extraline=0; +*size=1; +return"\n"; +} +if(feof(lf->f))return NULL; +*size=fread(lf->buff,1,sizeof(lf->buff),lf->f); +return(*size>0)?lf->buff:NULL; +} +static int errfile(lua_State*L,const char*what,int fnameindex){ +const char*serr=strerror(errno); +const char*filename=lua_tostring(L,fnameindex)+1; +lua_pushfstring(L,"cannot %s %s: %s",what,filename,serr); +lua_remove(L,fnameindex); +return(5+1); +} +static int luaL_loadfile(lua_State*L,const char*filename){ +LoadF lf; +int status,readstatus; +int c; +int fnameindex=lua_gettop(L)+1; +lf.extraline=0; +if(filename==NULL){ +lua_pushliteral(L,"=stdin"); +lf.f=stdin; +} +else{ +lua_pushfstring(L,"@%s",filename); +lf.f=fopen(filename,"r"); +if(lf.f==NULL)return errfile(L,"open",fnameindex); +} +c=getc(lf.f); +if(c=='#'){ +lf.extraline=1; +while((c=getc(lf.f))!=EOF&&c!='\n'); +if(c=='\n')c=getc(lf.f); +} +if(c=="\033Lua"[0]&&filename){ +lf.f=freopen(filename,"rb",lf.f); +if(lf.f==NULL)return errfile(L,"reopen",fnameindex); +while((c=getc(lf.f))!=EOF&&c!="\033Lua"[0]); +lf.extraline=0; +} +ungetc(c,lf.f); +status=lua_load(L,getF,&lf,lua_tostring(L,-1)); +readstatus=ferror(lf.f); +if(filename)fclose(lf.f); +if(readstatus){ +lua_settop(L,fnameindex); +return errfile(L,"read",fnameindex); +} +lua_remove(L,fnameindex); +return status; +} +typedef struct LoadS{ +const char*s; +size_t size; +}LoadS; +static const char*getS(lua_State*L,void*ud,size_t*size){ +LoadS*ls=(LoadS*)ud; +(void)L; +if(ls->size==0)return NULL; +*size=ls->size; +ls->size=0; +return ls->s; +} +static int luaL_loadbuffer(lua_State*L,const char*buff,size_t size, +const char*name){ +LoadS ls; +ls.s=buff; +ls.size=size; +return lua_load(L,getS,&ls,name); +} +static void*l_alloc(void*ud,void*ptr,size_t osize,size_t nsize){ +(void)ud; +(void)osize; +if(nsize==0){ +free(ptr); +return NULL; +} +else +return realloc(ptr,nsize); +} +static int panic(lua_State*L){ +(void)L; +fprintf(stderr,"PANIC: unprotected error in call to Lua API (%s)\n", +lua_tostring(L,-1)); +return 0; +} +static lua_State*luaL_newstate(void){ +lua_State*L=lua_newstate(l_alloc,NULL); +if(L)lua_atpanic(L,&panic); +return L; +} +static int luaB_tonumber(lua_State*L){ +int base=luaL_optint(L,2,10); +if(base==10){ +luaL_checkany(L,1); +if(lua_isnumber(L,1)){ +lua_pushnumber(L,lua_tonumber(L,1)); +return 1; +} +} +else{ +const char*s1=luaL_checkstring(L,1); +char*s2; +unsigned long n; +luaL_argcheck(L,2<=base&&base<=36,2,"base out of range"); +n=strtoul(s1,&s2,base); +if(s1!=s2){ +while(isspace((unsigned char)(*s2)))s2++; +if(*s2=='\0'){ +lua_pushnumber(L,(lua_Number)n); +return 1; +} +} +} +lua_pushnil(L); +return 1; +} +static int luaB_error(lua_State*L){ +int level=luaL_optint(L,2,1); +lua_settop(L,1); +if(lua_isstring(L,1)&&level>0){ +luaL_where(L,level); +lua_pushvalue(L,1); +lua_concat(L,2); +} +return lua_error(L); +} +static int luaB_setmetatable(lua_State*L){ +int t=lua_type(L,2); +luaL_checktype(L,1,5); +luaL_argcheck(L,t==0||t==5,2, +"nil or table expected"); +if(luaL_getmetafield(L,1,"__metatable")) +luaL_error(L,"cannot change a protected metatable"); +lua_settop(L,2); +lua_setmetatable(L,1); +return 1; +} +static void getfunc(lua_State*L,int opt){ +if(lua_isfunction(L,1))lua_pushvalue(L,1); +else{ +lua_Debug ar; +int level=opt?luaL_optint(L,1,1):luaL_checkint(L,1); +luaL_argcheck(L,level>=0,1,"level must be non-negative"); +if(lua_getstack(L,level,&ar)==0) +luaL_argerror(L,1,"invalid level"); +lua_getinfo(L,"f",&ar); +if(lua_isnil(L,-1)) +luaL_error(L,"no function environment for tail call at level %d", +level); +} +} +static int luaB_setfenv(lua_State*L){ +luaL_checktype(L,2,5); +getfunc(L,0); +lua_pushvalue(L,2); +if(lua_isnumber(L,1)&&lua_tonumber(L,1)==0){ +lua_pushthread(L); +lua_insert(L,-2); +lua_setfenv(L,-2); +return 0; +} +else if(lua_iscfunction(L,-2)||lua_setfenv(L,-2)==0) +luaL_error(L, +LUA_QL("setfenv")" cannot change environment of given object"); +return 1; +} +static int luaB_rawget(lua_State*L){ +luaL_checktype(L,1,5); +luaL_checkany(L,2); +lua_settop(L,2); +lua_rawget(L,1); +return 1; +} +static int luaB_type(lua_State*L){ +luaL_checkany(L,1); +lua_pushstring(L,luaL_typename(L,1)); +return 1; +} +static int luaB_next(lua_State*L){ +luaL_checktype(L,1,5); +lua_settop(L,2); +if(lua_next(L,1)) +return 2; +else{ +lua_pushnil(L); +return 1; +} +} +static int luaB_pairs(lua_State*L){ +luaL_checktype(L,1,5); +lua_pushvalue(L,lua_upvalueindex(1)); +lua_pushvalue(L,1); +lua_pushnil(L); +return 3; +} +static int ipairsaux(lua_State*L){ +int i=luaL_checkint(L,2); +luaL_checktype(L,1,5); +i++; +lua_pushinteger(L,i); +lua_rawgeti(L,1,i); +return(lua_isnil(L,-1))?0:2; +} +static int luaB_ipairs(lua_State*L){ +luaL_checktype(L,1,5); +lua_pushvalue(L,lua_upvalueindex(1)); +lua_pushvalue(L,1); +lua_pushinteger(L,0); +return 3; +} +static int load_aux(lua_State*L,int status){ +if(status==0) +return 1; +else{ +lua_pushnil(L); +lua_insert(L,-2); +return 2; +} +} +static int luaB_loadstring(lua_State*L){ +size_t l; +const char*s=luaL_checklstring(L,1,&l); +const char*chunkname=luaL_optstring(L,2,s); +return load_aux(L,luaL_loadbuffer(L,s,l,chunkname)); +} +static int luaB_loadfile(lua_State*L){ +const char*fname=luaL_optstring(L,1,NULL); +return load_aux(L,luaL_loadfile(L,fname)); +} +static int luaB_assert(lua_State*L){ +luaL_checkany(L,1); +if(!lua_toboolean(L,1)) +return luaL_error(L,"%s",luaL_optstring(L,2,"assertion failed!")); +return lua_gettop(L); +} +static int luaB_unpack(lua_State*L){ +int i,e,n; +luaL_checktype(L,1,5); +i=luaL_optint(L,2,1); +e=luaL_opt(L,luaL_checkint,3,luaL_getn(L,1)); +if(i>e)return 0; +n=e-i+1; +if(n<=0||!lua_checkstack(L,n)) +return luaL_error(L,"too many results to unpack"); +lua_rawgeti(L,1,i); +while(i++e)e=pos; +for(i=e;i>pos;i--){ +lua_rawgeti(L,1,i-1); +lua_rawseti(L,1,i); +} +break; +} +default:{ +return luaL_error(L,"wrong number of arguments to "LUA_QL("insert")); +} +} +luaL_setn(L,1,e); +lua_rawseti(L,1,pos); +return 0; +} +static int tremove(lua_State*L){ +int e=aux_getn(L,1); +int pos=luaL_optint(L,2,e); +if(!(1<=pos&&pos<=e)) +return 0; +luaL_setn(L,1,e-1); +lua_rawgeti(L,1,pos); +for(;posu)luaL_error(L,"invalid order function for sorting"); +lua_pop(L,1); +} +while(lua_rawgeti(L,1,--j),sort_comp(L,-3,-1)){ +if(j0); +} +l=strlen(p); +if(l==0||p[l-1]!='\n') +luaL_addsize(&b,l); +else{ +luaL_addsize(&b,l-1); +luaL_pushresult(&b); +return 1; +} +} +} +static int read_chars(lua_State*L,FILE*f,size_t n){ +size_t rlen; +size_t nr; +luaL_Buffer b; +luaL_buffinit(L,&b); +rlen=BUFSIZ; +do{ +char*p=luaL_prepbuffer(&b); +if(rlen>n)rlen=n; +nr=fread(p,sizeof(char),rlen,f); +luaL_addsize(&b,nr); +n-=nr; +}while(n>0&&nr==rlen); +luaL_pushresult(&b); +return(n==0||lua_objlen(L,-1)>0); +} +static int g_read(lua_State*L,FILE*f,int first){ +int nargs=lua_gettop(L)-1; +int success; +int n; +clearerr(f); +if(nargs==0){ +success=read_line(L,f); +n=first+1; +} +else{ +luaL_checkstack(L,nargs+20,"too many arguments"); +success=1; +for(n=first;nargs--&&success;n++){ +if(lua_type(L,n)==3){ +size_t l=(size_t)lua_tointeger(L,n); +success=(l==0)?test_eof(L,f):read_chars(L,f,l); +} +else{ +const char*p=lua_tostring(L,n); +luaL_argcheck(L,p&&p[0]=='*',n,"invalid option"); +switch(p[1]){ +case'n': +success=read_number(L,f); +break; +case'l': +success=read_line(L,f); +break; +case'a': +read_chars(L,f,~((size_t)0)); +success=1; +break; +default: +return luaL_argerror(L,n,"invalid format"); +} +} +} +} +if(ferror(f)) +return pushresult(L,0,NULL); +if(!success){ +lua_pop(L,1); +lua_pushnil(L); +} +return n-first; +} +static int io_read(lua_State*L){ +return g_read(L,getiofile(L,1),1); +} +static int f_read(lua_State*L){ +return g_read(L,tofile(L),2); +} +static int io_readline(lua_State*L){ +FILE*f=*(FILE**)lua_touserdata(L,lua_upvalueindex(1)); +int success; +if(f==NULL) +luaL_error(L,"file is already closed"); +success=read_line(L,f); +if(ferror(f)) +return luaL_error(L,"%s",strerror(errno)); +if(success)return 1; +else{ +if(lua_toboolean(L,lua_upvalueindex(2))){ +lua_settop(L,0); +lua_pushvalue(L,lua_upvalueindex(1)); +aux_close(L); +} +return 0; +} +} +static int g_write(lua_State*L,FILE*f,int arg){ +int nargs=lua_gettop(L)-1; +int status=1; +for(;nargs--;arg++){ +if(lua_type(L,arg)==3){ +status=status&& +fprintf(f,"%.14g",lua_tonumber(L,arg))>0; +} +else{ +size_t l; +const char*s=luaL_checklstring(L,arg,&l); +status=status&&(fwrite(s,sizeof(char),l,f)==l); +} +} +return pushresult(L,status,NULL); +} +static int io_write(lua_State*L){ +return g_write(L,getiofile(L,2),1); +} +static int f_write(lua_State*L){ +return g_write(L,tofile(L),2); +} +static int io_flush(lua_State*L){ +return pushresult(L,fflush(getiofile(L,2))==0,NULL); +} +static int f_flush(lua_State*L){ +return pushresult(L,fflush(tofile(L))==0,NULL); +} +static const luaL_Reg iolib[]={ +{"close",io_close}, +{"flush",io_flush}, +{"input",io_input}, +{"lines",io_lines}, +{"open",io_open}, +{"output",io_output}, +{"read",io_read}, +{"type",io_type}, +{"write",io_write}, +{NULL,NULL} +}; +static const luaL_Reg flib[]={ +{"close",io_close}, +{"flush",f_flush}, +{"lines",f_lines}, +{"read",f_read}, +{"write",f_write}, +{"__gc",io_gc}, +{NULL,NULL} +}; +static void createmeta(lua_State*L){ +luaL_newmetatable(L,"FILE*"); +lua_pushvalue(L,-1); +lua_setfield(L,-2,"__index"); +luaL_register(L,NULL,flib); +} +static void createstdfile(lua_State*L,FILE*f,int k,const char*fname){ +*newfile(L)=f; +if(k>0){ +lua_pushvalue(L,-1); +lua_rawseti(L,(-10001),k); +} +lua_pushvalue(L,-2); +lua_setfenv(L,-2); +lua_setfield(L,-3,fname); +} +static void newfenv(lua_State*L,lua_CFunction cls){ +lua_createtable(L,0,1); +lua_pushcfunction(L,cls); +lua_setfield(L,-2,"__close"); +} +static int luaopen_io(lua_State*L){ +createmeta(L); +newfenv(L,io_fclose); +lua_replace(L,(-10001)); +luaL_register(L,"io",iolib); +newfenv(L,io_noclose); +createstdfile(L,stdin,1,"stdin"); +createstdfile(L,stdout,2,"stdout"); +createstdfile(L,stderr,0,"stderr"); +lua_pop(L,1); +lua_getfield(L,-1,"popen"); +newfenv(L,io_pclose); +lua_setfenv(L,-2); +lua_pop(L,1); +return 1; +} +static int os_pushresult(lua_State*L,int i,const char*filename){ +int en=errno; +if(i){ +lua_pushboolean(L,1); +return 1; +} +else{ +lua_pushnil(L); +lua_pushfstring(L,"%s: %s",filename,strerror(en)); +lua_pushinteger(L,en); +return 3; +} +} +static int os_remove(lua_State*L){ +const char*filename=luaL_checkstring(L,1); +return os_pushresult(L,remove(filename)==0,filename); +} +static int os_exit(lua_State*L){ +exit(luaL_optint(L,1,EXIT_SUCCESS)); +} +static const luaL_Reg syslib[]={ +{"exit",os_exit}, +{"remove",os_remove}, +{NULL,NULL} +}; +static int luaopen_os(lua_State*L){ +luaL_register(L,"os",syslib); +return 1; +} +#define uchar(c)((unsigned char)(c)) +static ptrdiff_t posrelat(ptrdiff_t pos,size_t len){ +if(pos<0)pos+=(ptrdiff_t)len+1; +return(pos>=0)?pos:0; +} +static int str_sub(lua_State*L){ +size_t l; +const char*s=luaL_checklstring(L,1,&l); +ptrdiff_t start=posrelat(luaL_checkinteger(L,2),l); +ptrdiff_t end=posrelat(luaL_optinteger(L,3,-1),l); +if(start<1)start=1; +if(end>(ptrdiff_t)l)end=(ptrdiff_t)l; +if(start<=end) +lua_pushlstring(L,s+start-1,end-start+1); +else lua_pushliteral(L,""); +return 1; +} +static int str_lower(lua_State*L){ +size_t l; +size_t i; +luaL_Buffer b; +const char*s=luaL_checklstring(L,1,&l); +luaL_buffinit(L,&b); +for(i=0;i0) +luaL_addlstring(&b,s,l); +luaL_pushresult(&b); +return 1; +} +static int str_byte(lua_State*L){ +size_t l; +const char*s=luaL_checklstring(L,1,&l); +ptrdiff_t posi=posrelat(luaL_optinteger(L,2,1),l); +ptrdiff_t pose=posrelat(luaL_optinteger(L,3,posi),l); +int n,i; +if(posi<=0)posi=1; +if((size_t)pose>l)pose=l; +if(posi>pose)return 0; +n=(int)(pose-posi+1); +if(posi+n<=pose) +luaL_error(L,"string slice too long"); +luaL_checkstack(L,n,"string slice too long"); +for(i=0;i=ms->level||ms->capture[l].len==(-1)) +return luaL_error(ms->L,"invalid capture index"); +return l; +} +static int capture_to_close(MatchState*ms){ +int level=ms->level; +for(level--;level>=0;level--) +if(ms->capture[level].len==(-1))return level; +return luaL_error(ms->L,"invalid pattern capture"); +} +static const char*classend(MatchState*ms,const char*p){ +switch(*p++){ +case'%':{ +if(*p=='\0') +luaL_error(ms->L,"malformed pattern (ends with "LUA_QL("%%")")"); +return p+1; +} +case'[':{ +if(*p=='^')p++; +do{ +if(*p=='\0') +luaL_error(ms->L,"malformed pattern (missing "LUA_QL("]")")"); +if(*(p++)=='%'&&*p!='\0') +p++; +}while(*p!=']'); +return p+1; +} +default:{ +return p; +} +} +} +static int match_class(int c,int cl){ +int res; +switch(tolower(cl)){ +case'a':res=isalpha(c);break; +case'c':res=iscntrl(c);break; +case'd':res=isdigit(c);break; +case'l':res=islower(c);break; +case'p':res=ispunct(c);break; +case's':res=isspace(c);break; +case'u':res=isupper(c);break; +case'w':res=isalnum(c);break; +case'x':res=isxdigit(c);break; +case'z':res=(c==0);break; +default:return(cl==c); +} +return(islower(cl)?res:!res); +} +static int matchbracketclass(int c,const char*p,const char*ec){ +int sig=1; +if(*(p+1)=='^'){ +sig=0; +p++; +} +while(++pL,"unbalanced pattern"); +if(*s!=*p)return NULL; +else{ +int b=*p; +int e=*(p+1); +int cont=1; +while(++ssrc_end){ +if(*s==e){ +if(--cont==0)return s+1; +} +else if(*s==b)cont++; +} +} +return NULL; +} +static const char*max_expand(MatchState*ms,const char*s, +const char*p,const char*ep){ +ptrdiff_t i=0; +while((s+i)src_end&&singlematch(uchar(*(s+i)),p,ep)) +i++; +while(i>=0){ +const char*res=match(ms,(s+i),ep+1); +if(res)return res; +i--; +} +return NULL; +} +static const char*min_expand(MatchState*ms,const char*s, +const char*p,const char*ep){ +for(;;){ +const char*res=match(ms,s,ep+1); +if(res!=NULL) +return res; +else if(ssrc_end&&singlematch(uchar(*s),p,ep)) +s++; +else return NULL; +} +} +static const char*start_capture(MatchState*ms,const char*s, +const char*p,int what){ +const char*res; +int level=ms->level; +if(level>=32)luaL_error(ms->L,"too many captures"); +ms->capture[level].init=s; +ms->capture[level].len=what; +ms->level=level+1; +if((res=match(ms,s,p))==NULL) +ms->level--; +return res; +} +static const char*end_capture(MatchState*ms,const char*s, +const char*p){ +int l=capture_to_close(ms); +const char*res; +ms->capture[l].len=s-ms->capture[l].init; +if((res=match(ms,s,p))==NULL) +ms->capture[l].len=(-1); +return res; +} +static const char*match_capture(MatchState*ms,const char*s,int l){ +size_t len; +l=check_capture(ms,l); +len=ms->capture[l].len; +if((size_t)(ms->src_end-s)>=len&& +memcmp(ms->capture[l].init,s,len)==0) +return s+len; +else return NULL; +} +static const char*match(MatchState*ms,const char*s,const char*p){ +init: +switch(*p){ +case'(':{ +if(*(p+1)==')') +return start_capture(ms,s,p+2,(-2)); +else +return start_capture(ms,s,p+1,(-1)); +} +case')':{ +return end_capture(ms,s,p+1); +} +case'%':{ +switch(*(p+1)){ +case'b':{ +s=matchbalance(ms,s,p+2); +if(s==NULL)return NULL; +p+=4;goto init; +} +case'f':{ +const char*ep;char previous; +p+=2; +if(*p!='[') +luaL_error(ms->L,"missing "LUA_QL("[")" after " +LUA_QL("%%f")" in pattern"); +ep=classend(ms,p); +previous=(s==ms->src_init)?'\0':*(s-1); +if(matchbracketclass(uchar(previous),p,ep-1)|| +!matchbracketclass(uchar(*s),p,ep-1))return NULL; +p=ep;goto init; +} +default:{ +if(isdigit(uchar(*(p+1)))){ +s=match_capture(ms,s,uchar(*(p+1))); +if(s==NULL)return NULL; +p+=2;goto init; +} +goto dflt; +} +} +} +case'\0':{ +return s; +} +case'$':{ +if(*(p+1)=='\0') +return(s==ms->src_end)?s:NULL; +else goto dflt; +} +default:dflt:{ +const char*ep=classend(ms,p); +int m=ssrc_end&&singlematch(uchar(*s),p,ep); +switch(*ep){ +case'?':{ +const char*res; +if(m&&((res=match(ms,s+1,ep+1))!=NULL)) +return res; +p=ep+1;goto init; +} +case'*':{ +return max_expand(ms,s,p,ep); +} +case'+':{ +return(m?max_expand(ms,s+1,p,ep):NULL); +} +case'-':{ +return min_expand(ms,s,p,ep); +} +default:{ +if(!m)return NULL; +s++;p=ep;goto init; +} +} +} +} +} +static const char*lmemfind(const char*s1,size_t l1, +const char*s2,size_t l2){ +if(l2==0)return s1; +else if(l2>l1)return NULL; +else{ +const char*init; +l2--; +l1=l1-l2; +while(l1>0&&(init=(const char*)memchr(s1,*s2,l1))!=NULL){ +init++; +if(memcmp(init,s2+1,l2)==0) +return init-1; +else{ +l1-=init-s1; +s1=init; +} +} +return NULL; +} +} +static void push_onecapture(MatchState*ms,int i,const char*s, +const char*e){ +if(i>=ms->level){ +if(i==0) +lua_pushlstring(ms->L,s,e-s); +else +luaL_error(ms->L,"invalid capture index"); +} +else{ +ptrdiff_t l=ms->capture[i].len; +if(l==(-1))luaL_error(ms->L,"unfinished capture"); +if(l==(-2)) +lua_pushinteger(ms->L,ms->capture[i].init-ms->src_init+1); +else +lua_pushlstring(ms->L,ms->capture[i].init,l); +} +} +static int push_captures(MatchState*ms,const char*s,const char*e){ +int i; +int nlevels=(ms->level==0&&s)?1:ms->level; +luaL_checkstack(ms->L,nlevels,"too many captures"); +for(i=0;il1)init=(ptrdiff_t)l1; +if(find&&(lua_toboolean(L,4)|| +strpbrk(p,"^$*+?.([%-")==NULL)){ +const char*s2=lmemfind(s+init,l1-init,p,l2); +if(s2){ +lua_pushinteger(L,s2-s+1); +lua_pushinteger(L,s2-s+l2); +return 2; +} +} +else{ +MatchState ms; +int anchor=(*p=='^')?(p++,1):0; +const char*s1=s+init; +ms.L=L; +ms.src_init=s; +ms.src_end=s+l1; +do{ +const char*res; +ms.level=0; +if((res=match(&ms,s1,p))!=NULL){ +if(find){ +lua_pushinteger(L,s1-s+1); +lua_pushinteger(L,res-s); +return push_captures(&ms,NULL,0)+2; +} +else +return push_captures(&ms,s1,res); +} +}while(s1++L,3,&l); +for(i=0;iL; +switch(lua_type(L,3)){ +case 3: +case 4:{ +add_s(ms,b,s,e); +return; +} +case 6:{ +int n; +lua_pushvalue(L,3); +n=push_captures(ms,s,e); +lua_call(L,n,1); +break; +} +case 5:{ +push_onecapture(ms,0,s,e); +lua_gettable(L,3); +break; +} +} +if(!lua_toboolean(L,-1)){ +lua_pop(L,1); +lua_pushlstring(L,s,e-s); +} +else if(!lua_isstring(L,-1)) +luaL_error(L,"invalid replacement value (a %s)",luaL_typename(L,-1)); +luaL_addvalue(b); +} +static int str_gsub(lua_State*L){ +size_t srcl; +const char*src=luaL_checklstring(L,1,&srcl); +const char*p=luaL_checkstring(L,2); +int tr=lua_type(L,3); +int max_s=luaL_optint(L,4,srcl+1); +int anchor=(*p=='^')?(p++,1):0; +int n=0; +MatchState ms; +luaL_Buffer b; +luaL_argcheck(L,tr==3||tr==4|| +tr==6||tr==5,3, +"string/function/table expected"); +luaL_buffinit(L,&b); +ms.L=L; +ms.src_init=src; +ms.src_end=src+srcl; +while(nsrc) +src=e; +else if(src=sizeof("-+ #0")) +luaL_error(L,"invalid format (repeated flags)"); +if(isdigit(uchar(*p)))p++; +if(isdigit(uchar(*p)))p++; +if(*p=='.'){ +p++; +if(isdigit(uchar(*p)))p++; +if(isdigit(uchar(*p)))p++; +} +if(isdigit(uchar(*p))) +luaL_error(L,"invalid format (width or precision too long)"); +*(form++)='%'; +strncpy(form,strfrmt,p-strfrmt+1); +form+=p-strfrmt+1; +*form='\0'; +return p; +} +static void addintlen(char*form){ +size_t l=strlen(form); +char spec=form[l-1]; +strcpy(form+l-1,"l"); +form[l+sizeof("l")-2]=spec; +form[l+sizeof("l")-1]='\0'; +} +static int str_format(lua_State*L){ +int top=lua_gettop(L); +int arg=1; +size_t sfl; +const char*strfrmt=luaL_checklstring(L,arg,&sfl); +const char*strfrmt_end=strfrmt+sfl; +luaL_Buffer b; +luaL_buffinit(L,&b); +while(strfrmttop) +luaL_argerror(L,arg,"no value"); +strfrmt=scanformat(L,strfrmt,form); +switch(*strfrmt++){ +case'c':{ +sprintf(buff,form,(int)luaL_checknumber(L,arg)); +break; +} +case'd':case'i':{ +addintlen(form); +sprintf(buff,form,(long)luaL_checknumber(L,arg)); +break; +} +case'o':case'u':case'x':case'X':{ +addintlen(form); +sprintf(buff,form,(unsigned long)luaL_checknumber(L,arg)); +break; +} +case'e':case'E':case'f': +case'g':case'G':{ +sprintf(buff,form,(double)luaL_checknumber(L,arg)); +break; +} +case'q':{ +addquoted(L,&b,arg); +continue; +} +case's':{ +size_t l; +const char*s=luaL_checklstring(L,arg,&l); +if(!strchr(form,'.')&&l>=100){ +lua_pushvalue(L,arg); +luaL_addvalue(&b); +continue; +} +else{ +sprintf(buff,form,s); +break; +} +} +default:{ +return luaL_error(L,"invalid option "LUA_QL("%%%c")" to " +LUA_QL("format"),*(strfrmt-1)); +} +} +luaL_addlstring(&b,buff,strlen(buff)); +} +} +luaL_pushresult(&b); +return 1; +} +static const luaL_Reg strlib[]={ +{"byte",str_byte}, +{"char",str_char}, +{"find",str_find}, +{"format",str_format}, +{"gmatch",gmatch}, +{"gsub",str_gsub}, +{"lower",str_lower}, +{"match",str_match}, +{"rep",str_rep}, +{"sub",str_sub}, +{"upper",str_upper}, +{NULL,NULL} +}; +static void createmetatable(lua_State*L){ +lua_createtable(L,0,1); +lua_pushliteral(L,""); +lua_pushvalue(L,-2); +lua_setmetatable(L,-2); +lua_pop(L,1); +lua_pushvalue(L,-2); +lua_setfield(L,-2,"__index"); +lua_pop(L,1); +} +static int luaopen_string(lua_State*L){ +luaL_register(L,"string",strlib); +createmetatable(L); +return 1; +} +static const luaL_Reg lualibs[]={ +{"",luaopen_base}, +{"table",luaopen_table}, +{"io",luaopen_io}, +{"os",luaopen_os}, +{"string",luaopen_string}, +{NULL,NULL} +}; +static void luaL_openlibs(lua_State*L){ +const luaL_Reg*lib=lualibs; +for(;lib->func;lib++){ +lua_pushcfunction(L,lib->func); +lua_pushstring(L,lib->name); +lua_call(L,1,0); +} +} +typedef unsigned int UB; +static UB barg(lua_State*L,int idx){ +union{lua_Number n;U64 b;}bn; +bn.n=lua_tonumber(L,idx)+6755399441055744.0; +if(bn.n==0.0&&!lua_isnumber(L,idx))luaL_typerror(L,idx,"number"); +return(UB)bn.b; +} +#define BRET(b)lua_pushnumber(L,(lua_Number)(int)(b));return 1; +static int tobit(lua_State*L){ +BRET(barg(L,1))} +static int bnot(lua_State*L){ +BRET(~barg(L,1))} +static int band(lua_State*L){ +int i;UB b=barg(L,1);for(i=lua_gettop(L);i>1;i--)b&=barg(L,i);BRET(b)} +static int bor(lua_State*L){ +int i;UB b=barg(L,1);for(i=lua_gettop(L);i>1;i--)b|=barg(L,i);BRET(b)} +static int bxor(lua_State*L){ +int i;UB b=barg(L,1);for(i=lua_gettop(L);i>1;i--)b^=barg(L,i);BRET(b)} +static int lshift(lua_State*L){ +UB b=barg(L,1),n=barg(L,2)&31;BRET(b<>n)} +static int arshift(lua_State*L){ +UB b=barg(L,1),n=barg(L,2)&31;BRET((int)b>>n)} +static int rol(lua_State*L){ +UB b=barg(L,1),n=barg(L,2)&31;BRET((b<>(32-n)))} +static int ror(lua_State*L){ +UB b=barg(L,1),n=barg(L,2)&31;BRET((b>>n)|(b<<(32-n)))} +static int bswap(lua_State*L){ +UB b=barg(L,1);b=(b>>24)|((b>>8)&0xff00)|((b&0xff00)<<8)|(b<<24);BRET(b)} +static int tohex(lua_State*L){ +UB b=barg(L,1); +int n=lua_isnone(L,2)?8:(int)barg(L,2); +const char*hexdigits="0123456789abcdef"; +char buf[8]; +int i; +if(n<0){n=-n;hexdigits="0123456789ABCDEF";} +if(n>8)n=8; +for(i=(int)n;--i>=0;){buf[i]=hexdigits[b&15];b>>=4;} +lua_pushlstring(L,buf,(size_t)n); +return 1; +} +static const struct luaL_Reg bitlib[]={ +{"tobit",tobit}, +{"bnot",bnot}, +{"band",band}, +{"bor",bor}, +{"bxor",bxor}, +{"lshift",lshift}, +{"rshift",rshift}, +{"arshift",arshift}, +{"rol",rol}, +{"ror",ror}, +{"bswap",bswap}, +{"tohex",tohex}, +{NULL,NULL} +}; +int main(int argc,char**argv){ +lua_State*L=luaL_newstate(); +int i; +luaL_openlibs(L); +luaL_register(L,"bit",bitlib); +if(argc<2)return sizeof(void*); +lua_createtable(L,0,1); +lua_pushstring(L,argv[1]); +lua_rawseti(L,-2,0); +lua_setglobal(L,"arg"); +if(luaL_loadfile(L,argv[1])) +goto err; +for(i=2;i + * + * Based on Mike Pall's implementation for LuaJIT. + */ + +#include "ir.h" +#include + +#include "ir_strtab.c" + +#define MAX_RULES 2048 +#define MAX_SLOTS (MAX_RULES * 4) + +static ir_strtab strtab; + +void print_hash(uint32_t *mask, uint32_t count) +{ + uint32_t i; + + printf("static const uint32_t _ir_fold_hash[%d] = {\n", count); + for (i = 0; i < count; i++) { + printf("\t0x%08x,\n", mask[i]); + } + printf("};\n\n"); +} + +static uint32_t hash_shl2(uint32_t mask, uint32_t r1, uint32_t r2) +{ + return ((mask << r1) - mask) << r2; +} + +#if 0 +#define ir_rol(x, n) (((x)<<(n)) | ((x)>>(-(int)(n)&(8*sizeof(x)-1)))) +#define ir_ror(x, n) (((x)<<(-(int)(n)&(8*sizeof(x)-1))) | ((x)>>(n))) + +static uint32_t hash_rol2(uint32_t mask, uint32_t r1, uint32_t r2) +{ + return ir_rol((ir_rol(mask, r1) - mask), r2); +} +#endif + +/* Find a perfect hash function */ +int find_hash(uint32_t *mask, uint32_t count) +{ + uint32_t hash[MAX_SLOTS]; + uint32_t n, r1, r2, i, h; + + for (n = (count | 1); n < MAX_SLOTS; n += 2) { + for (r1 = 0; r1 < 31; r1++) { + for (r2 = 0; r2 < 32; r2++) { + memset(hash, 0, n * sizeof(uint32_t)); + for (i = 0; i < count; i++) { + h = hash_shl2(mask[i] & 0x1fffff, r1, r2) % n; + if (hash[h]) break; /* collision */ + hash[h] = mask[i]; + } + if (i == count) { + print_hash(hash, n); + printf("static uint32_t _ir_fold_hashkey(uint32_t h)\n{\n\treturn (((h << %d) - h) << %d) %% %d;\n}\n", r1, r2, n); + return 1; + } +#if 0 + memset(hash, 0, n * sizeof(uint32_t)); + for (i = 0; i < count; i++) { + h = hash_rol2(mask[i] & 0x1fffff, r1, r2) % n; + if (hash[h]) break; /* collision */ + hash[h] = mask[i]; + } + if (i == count) { + print_hash(hash, n); + printf("static uint32_t _ir_fold_hashkey(uint32_t h)\n{\nreturn 0; /*rol2(%u,%u,%u)*/\n}\n", r1, r2, n); + return 1; + } +#endif + } + } + } + + hash[0] = 0; + print_hash(hash, 1); + printf("static uint32_t _ir_fold_hashkey(uint32_t h)\n{\n\treturn 0;\n}\n"); + return 0; +} + +static int find_op(const char *s, size_t len) +{ + return ir_strtab_find(&strtab, s, (uint8_t)len) - 1; +} + +static int parse_rule(const char *buf) +{ + const char *p = buf + sizeof("IR_FOLD(") - 1; + const char *q; + int op, mask; + + while (*p == ' ' || *p == '\t') { + p++; + } + if (*p < 'A' || *p > 'Z') { + return 0; + } + q = p + 1; + while ((*q >= 'A' && *q <= 'Z') + || (*q >= '0' && *q <= '9') + || *q == '_') { + q++; + } + op = find_op(p, q - p); + if (op < 0) { + return 0; + } + mask = op; + + while (*q == ' ' || *q == '\t') { + q++; + } + if (*q == ')') { + return mask; /* unused operands */ + } else if (*q != '(') { + return 0; + } + + p = q + 1; + while (*p == ' ' || *p == '\t') { + p++; + } + if (*p == '_') { + q = p + 1; + } else if (*p >= 'A' && *p <= 'Z') { + q = p + 1; + while ((*q >= 'A' && *q <= 'Z') + || (*q >= '0' && *q <= '9') + || *q == '_') { + q++; + } + op = find_op(p, q - p); + if (op < 0) { + return 0; + } + mask |= op << 7; + } else { + return 0; + } + + while (*q == ' ' || *q == '\t') { + q++; + } + if (*q == ')') { + return mask; /* unused op2 */ + } else if (*q != ',') { + return 0; + } + + p = q + 1; + while (*p == ' ' || *p == '\t') { + p++; + } + if (*p == '_') { + q = p + 1; + } else if (*p >= 'A' && *p <= 'Z') { + q = p + 1; + while ((*q >= 'A' && *q <= 'Z') + || (*q >= '0' && *q <= '9') + || *q == '_') { + q++; + } + op = find_op(p, q - p); + if (op < 0) { + return 0; + } + mask |= op << 14; + } else { + return 0; + } + + while (*q == ' ' || *q == '\t') { + q++; + } + if (*q != ')') { + return 0; + } + + q++; + while (*q == ' ' || *q == '\t') { + q++; + } + if (*q != ')') { + return 0; + } + + return mask; +} + +int main() +{ + char buf[4096]; + FILE *f = stdin; + int line = 0; + int rules = 0; + int i; + uint32_t mask[MAX_RULES]; + uint32_t rule[MAX_RULES]; + + ir_strtab_init(&strtab, IR_LAST_OP, 0); + +#define IR_OP_ADD(name, flags, op1, op2, op3) \ + ir_strtab_lookup(&strtab, #name, sizeof(#name) - 1, IR_ ## name + 1); + + IR_OPS(IR_OP_ADD) + + while (fgets(buf, sizeof(buf) - 1, f)) { + size_t len = strlen(buf); + if (len > 0 && (buf[len - 1] == '\r' || buf[len - 1] == '\n')) { + buf[len - 1] = 0; + len--; + line++; + } + if (len >= sizeof("IR_FOLD(")-1 + && memcmp(buf, "IR_FOLD(", sizeof("IR_FOLD(")-1) == 0) { + if (rules >= MAX_RULES) { + fprintf(stderr, "ERROR: Too many rules\n"); + return 1; + } + i = parse_rule(buf); + if (!i) { + fprintf(stderr, "ERROR: Incorrect '%s' rule on line %d\n", buf, line); + return 1; + } + // TODO: few masks may share the same rule ??? + rule[rules] = line; + mask[rules] = i | (rules << 21); + rules++; + } + } + ir_strtab_free(&strtab); + +#if 0 + for (i = 0; i < rules; i++) { + printf("0x%08x\n", mask[i]); + } +#endif + + printf("/* This file is generated from \"ir_fold.h\". Do not edit! */\n\n"); + printf("typedef enum _ir_fold_rule_id {\n"); + for (i = 0; i < rules; i++) { + printf("\tIR_RULE_%d,\n", rule[i]); + } + printf("\t_IR_RULE_LAST\n"); + printf("} ir_fold_rule_id;\n\n"); + + if (!find_hash(mask, rules)) { + fprintf(stderr, "ERROR: Cannot find a good hash function\n"); + return 1; + } + + return 0; +} diff --git a/ext/opcache/jit/ir/ir.c b/ext/opcache/jit/ir/ir.c new file mode 100644 index 0000000000000..d41c3803e5166 --- /dev/null +++ b/ext/opcache/jit/ir/ir.c @@ -0,0 +1,2322 @@ +/* + * IR - Lightweight JIT Compilation Framework + * (IR construction, folding, utilities) + * Copyright (C) 2022 Zend by Perforce. + * Authors: Dmitry Stogov + * + * The logical IR representation is based on Cliff Click's Sea of Nodes. + * See: C. Click, M. Paleczny. "A Simple Graph-Based Intermediate + * Representation" In ACM SIGPLAN Workshop on Intermediate Representations + * (IR '95), pages 35-49, Jan. 1995. + * + * The physical IR representation is based on Mike Pall's LuaJIT IR. + * See: M. Pall. "LuaJIT 2.0 intellectual property disclosure and research + * opportunities" November 2009 http://lua-users.org/lists/lua-l/2009-11/msg00089.html + */ + +#ifndef _GNU_SOURCE +# define _GNU_SOURCE +#endif + +#ifndef _WIN32 +# include +#else +# define WIN32_LEAN_AND_MEAN +# include +#endif + +#include "ir.h" +#include "ir_private.h" + +#include + +#ifdef HAVE_VALGRIND +# include +#endif + +#define IR_TYPE_FLAGS(name, type, field, flags) ((flags)|sizeof(type)), +#define IR_TYPE_NAME(name, type, field, flags) #name, +#define IR_TYPE_CNAME(name, type, field, flags) #type, +#define IR_TYPE_SIZE(name, type, field, flags) sizeof(type), +#define IR_OP_NAME(name, flags, op1, op2, op3) #name, + +const uint8_t ir_type_flags[IR_LAST_TYPE] = { + 0, + IR_TYPES(IR_TYPE_FLAGS) +}; + +const char *ir_type_name[IR_LAST_TYPE] = { + "void", + IR_TYPES(IR_TYPE_NAME) +}; + +const uint8_t ir_type_size[IR_LAST_TYPE] = { + 0, + IR_TYPES(IR_TYPE_SIZE) +}; + +const char *ir_type_cname[IR_LAST_TYPE] = { + "void", + IR_TYPES(IR_TYPE_CNAME) +}; + +const char *ir_op_name[IR_LAST_OP] = { + IR_OPS(IR_OP_NAME) +#ifdef IR_PHP + IR_PHP_OPS(IR_OP_NAME) +#endif +}; + +void ir_print_const(const ir_ctx *ctx, const ir_insn *insn, FILE *f, bool quoted) +{ + if (insn->op == IR_FUNC || insn->op == IR_SYM) { + fprintf(f, "%s", ir_get_str(ctx, insn->val.i32)); + return; + } else if (insn->op == IR_STR) { + if (quoted) { + fprintf(f, "\"%s\"", ir_get_str(ctx, insn->val.i32)); + } else { + fprintf(f, "%s", ir_get_str(ctx, insn->val.i32)); + } + return; + } + IR_ASSERT(IR_IS_CONST_OP(insn->op) || insn->op == IR_FUNC_ADDR); + switch (insn->type) { + case IR_BOOL: + fprintf(f, "%u", insn->val.b); + break; + case IR_U8: + fprintf(f, "%u", insn->val.u8); + break; + case IR_U16: + fprintf(f, "%u", insn->val.u16); + break; + case IR_U32: + fprintf(f, "%u", insn->val.u32); + break; + case IR_U64: + fprintf(f, "%" PRIu64, insn->val.u64); + break; + case IR_ADDR: + if (insn->val.addr) { + fprintf(f, "0x%" PRIxPTR, insn->val.addr); + } else { + fprintf(f, "0"); + } + break; + case IR_CHAR: + if (insn->val.c == '\\') { + fprintf(f, "'\\\\'"); + } else if (insn->val.c >= ' ') { + fprintf(f, "'%c'", insn->val.c); + } else if (insn->val.c == '\t') { + fprintf(f, "'\\t'"); + } else if (insn->val.c == '\r') { + fprintf(f, "'\\r'"); + } else if (insn->val.c == '\n') { + fprintf(f, "'\\n'"); + } else if (insn->val.c == '\0') { + fprintf(f, "'\\0'"); + } else { + fprintf(f, "%u", insn->val.c); + } + break; + case IR_I8: + fprintf(f, "%d", insn->val.i8); + break; + case IR_I16: + fprintf(f, "%d", insn->val.i16); + break; + case IR_I32: + fprintf(f, "%d", insn->val.i32); + break; + case IR_I64: + fprintf(f, "%" PRIi64, insn->val.i64); + break; + case IR_DOUBLE: + if (isnan(insn->val.d)) { + fprintf(f, "nan"); + } else { + fprintf(f, "%g", insn->val.d); + } + break; + case IR_FLOAT: + if (isnan(insn->val.f)) { + fprintf(f, "nan"); + } else { + fprintf(f, "%g", insn->val.f); + } + break; + default: + IR_ASSERT(0); + break; + } +} + +#define ir_op_flag_v 0 +#define ir_op_flag_v0X3 (0 | (3 << IR_OP_FLAG_OPERANDS_SHIFT)) +#define ir_op_flag_d IR_OP_FLAG_DATA +#define ir_op_flag_d0 ir_op_flag_d +#define ir_op_flag_d1 (ir_op_flag_d | 1 | (1 << IR_OP_FLAG_OPERANDS_SHIFT)) +#define ir_op_flag_d1X1 (ir_op_flag_d | 1 | (2 << IR_OP_FLAG_OPERANDS_SHIFT)) +#define ir_op_flag_d2 (ir_op_flag_d | 2 | (2 << IR_OP_FLAG_OPERANDS_SHIFT)) +#define ir_op_flag_d2C (ir_op_flag_d | IR_OP_FLAG_COMMUTATIVE | 2 | (2 << IR_OP_FLAG_OPERANDS_SHIFT)) +#define ir_op_flag_d3 (ir_op_flag_d | 3 | (3 << IR_OP_FLAG_OPERANDS_SHIFT)) +#define ir_op_flag_r IR_OP_FLAG_DATA // "d" and "r" are the same now +#define ir_op_flag_r0 ir_op_flag_r +#define ir_op_flag_p (IR_OP_FLAG_DATA | IR_OP_FLAG_PINNED) +#define ir_op_flag_p1 (ir_op_flag_p | 1 | (1 << IR_OP_FLAG_OPERANDS_SHIFT)) +#define ir_op_flag_p1X1 (ir_op_flag_p | 1 | (2 << IR_OP_FLAG_OPERANDS_SHIFT)) +#define ir_op_flag_p1X2 (ir_op_flag_p | 1 | (3 << IR_OP_FLAG_OPERANDS_SHIFT)) +#define ir_op_flag_p2 (ir_op_flag_p | 2 | (2 << IR_OP_FLAG_OPERANDS_SHIFT)) +#define ir_op_flag_pN (ir_op_flag_p | IR_OP_FLAG_VAR_INPUTS) +#define ir_op_flag_c IR_OP_FLAG_CONTROL +#define ir_op_flag_c1X2 (ir_op_flag_c | 1 | (3 << IR_OP_FLAG_OPERANDS_SHIFT)) +#define ir_op_flag_c3 (ir_op_flag_c | 3 | (3 << IR_OP_FLAG_OPERANDS_SHIFT)) +#define ir_op_flag_S (IR_OP_FLAG_CONTROL|IR_OP_FLAG_BB_START) +#define ir_op_flag_S0X1 (ir_op_flag_S | 0 | (1 << IR_OP_FLAG_OPERANDS_SHIFT)) +#define ir_op_flag_S1 (ir_op_flag_S | 1 | (1 << IR_OP_FLAG_OPERANDS_SHIFT)) +#define ir_op_flag_S1X1 (ir_op_flag_S | 1 | (2 << IR_OP_FLAG_OPERANDS_SHIFT)) +#define ir_op_flag_S2 (ir_op_flag_S | 2 | (2 << IR_OP_FLAG_OPERANDS_SHIFT)) +#define ir_op_flag_S2X1 (ir_op_flag_S | 2 | (3 << IR_OP_FLAG_OPERANDS_SHIFT)) +#define ir_op_flag_SN (ir_op_flag_S | IR_OP_FLAG_VAR_INPUTS) +#define ir_op_flag_E (IR_OP_FLAG_CONTROL|IR_OP_FLAG_BB_END) +#define ir_op_flag_E1 (ir_op_flag_E | 1 | (1 << IR_OP_FLAG_OPERANDS_SHIFT)) +#define ir_op_flag_E2 (ir_op_flag_E | 2 | (2 << IR_OP_FLAG_OPERANDS_SHIFT)) +#define ir_op_flag_T (IR_OP_FLAG_CONTROL|IR_OP_FLAG_BB_END|IR_OP_FLAG_TERMINATOR) +#define ir_op_flag_T2X1 (ir_op_flag_T | 2 | (3 << IR_OP_FLAG_OPERANDS_SHIFT)) +#define ir_op_flag_T1X2 (ir_op_flag_T | 1 | (3 << IR_OP_FLAG_OPERANDS_SHIFT)) +#define ir_op_flag_l (IR_OP_FLAG_CONTROL|IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_LOAD) +#define ir_op_flag_l0 ir_op_flag_l +#define ir_op_flag_l1 (ir_op_flag_l | 1 | (1 << IR_OP_FLAG_OPERANDS_SHIFT)) +#define ir_op_flag_l1X1 (ir_op_flag_l | 1 | (2 << IR_OP_FLAG_OPERANDS_SHIFT)) +#define ir_op_flag_l1X2 (ir_op_flag_l | 1 | (3 << IR_OP_FLAG_OPERANDS_SHIFT)) +#define ir_op_flag_l2 (ir_op_flag_l | 2 | (2 << IR_OP_FLAG_OPERANDS_SHIFT)) +#define ir_op_flag_l3 (ir_op_flag_l | 3 | (3 << IR_OP_FLAG_OPERANDS_SHIFT)) +#define ir_op_flag_s (IR_OP_FLAG_CONTROL|IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_STORE) +#define ir_op_flag_s0 ir_op_flag_s +#define ir_op_flag_s1 (ir_op_flag_s | 1 | (1 << IR_OP_FLAG_OPERANDS_SHIFT)) +#define ir_op_flag_s2 (ir_op_flag_s | 2 | (2 << IR_OP_FLAG_OPERANDS_SHIFT)) +#define ir_op_flag_s2X1 (ir_op_flag_s | 2 | (3 << IR_OP_FLAG_OPERANDS_SHIFT)) +#define ir_op_flag_s3 (ir_op_flag_s | 3 | (3 << IR_OP_FLAG_OPERANDS_SHIFT)) +#define ir_op_flag_x1 (IR_OP_FLAG_CONTROL|IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_CALL | 1 | (1 << IR_OP_FLAG_OPERANDS_SHIFT)) +#define ir_op_flag_x2 (IR_OP_FLAG_CONTROL|IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_CALL | 2 | (2 << IR_OP_FLAG_OPERANDS_SHIFT)) +#define ir_op_flag_xN (IR_OP_FLAG_CONTROL|IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_CALL | IR_OP_FLAG_VAR_INPUTS) +#define ir_op_flag_a2 (IR_OP_FLAG_CONTROL|IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_ALLOC | 2 | (2 << IR_OP_FLAG_OPERANDS_SHIFT)) + +#define ir_op_kind____ IR_OPND_UNUSED +#define ir_op_kind_def IR_OPND_DATA +#define ir_op_kind_ref IR_OPND_DATA +#define ir_op_kind_src IR_OPND_CONTROL +#define ir_op_kind_reg IR_OPND_CONTROL_DEP +#define ir_op_kind_ret IR_OPND_CONTROL_REF +#define ir_op_kind_str IR_OPND_STR +#define ir_op_kind_num IR_OPND_NUM +#define ir_op_kind_fld IR_OPND_STR +#define ir_op_kind_var IR_OPND_DATA +#define ir_op_kind_prb IR_OPND_PROB +#define ir_op_kind_opt IR_OPND_PROB + +#define _IR_OP_FLAGS(name, flags, op1, op2, op3) \ + IR_OP_FLAGS(ir_op_flag_ ## flags, ir_op_kind_ ## op1, ir_op_kind_ ## op2, ir_op_kind_ ## op3), + +const uint32_t ir_op_flags[IR_LAST_OP] = { + IR_OPS(_IR_OP_FLAGS) +#ifdef IR_PHP + IR_PHP_OPS(_IR_OP_FLAGS) +#endif +}; + +static void ir_grow_bottom(ir_ctx *ctx) +{ + ir_insn *buf = ctx->ir_base - ctx->consts_limit; + ir_ref old_consts_limit = ctx->consts_limit; + + if (ctx->consts_limit < 1024 * 4) { + ctx->consts_limit *= 2; + } else if (ctx->consts_limit < 1024 * 4 * 2) { + ctx->consts_limit = 1024 * 4 * 2; + } else { + ctx->consts_limit += 1024 * 4; + } + buf = ir_mem_realloc(buf, (ctx->consts_limit + ctx->insns_limit) * sizeof(ir_insn)); + memmove(buf + (ctx->consts_limit - old_consts_limit), + buf, + (old_consts_limit + ctx->insns_count) * sizeof(ir_insn)); + ctx->ir_base = buf + ctx->consts_limit; +} + +static ir_ref ir_next_const(ir_ctx *ctx) +{ + ir_ref ref = ctx->consts_count; + + if (UNEXPECTED(ref >= ctx->consts_limit)) { + ir_grow_bottom(ctx); + } + ctx->consts_count = ref + 1; + return -ref; +} + +static void ir_grow_top(ir_ctx *ctx) +{ + ir_insn *buf = ctx->ir_base - ctx->consts_limit; + + if (ctx->insns_limit < 1024 * 4) { + ctx->insns_limit *= 2; + } else if (ctx->insns_limit < 1024 * 4 * 2) { + ctx->insns_limit = 1024 * 4 * 2; + } else { + ctx->insns_limit += 1024 * 4; + } + buf = ir_mem_realloc(buf, (ctx->consts_limit + ctx->insns_limit) * sizeof(ir_insn)); + ctx->ir_base = buf + ctx->consts_limit; +} + +static ir_ref ir_next_insn(ir_ctx *ctx) +{ + ir_ref ref = ctx->insns_count; + + if (UNEXPECTED(ref >= ctx->insns_limit)) { + ir_grow_top(ctx); + } + ctx->insns_count = ref + 1; + return ref; +} + +void ir_truncate(ir_ctx *ctx) +{ + ir_insn *buf = ir_mem_malloc((ctx->consts_count + ctx->insns_count) * sizeof(ir_insn)); + + memcpy(buf, ctx->ir_base - ctx->consts_count, (ctx->consts_count + ctx->insns_count) * sizeof(ir_insn)); + ir_mem_free(ctx->ir_base - ctx->consts_limit); + ctx->insns_limit = ctx->insns_count; + ctx->consts_limit = ctx->consts_count; + ctx->ir_base = buf + ctx->consts_limit; +} + +void ir_init(ir_ctx *ctx, uint32_t flags, ir_ref consts_limit, ir_ref insns_limit) +{ + ir_insn *buf; + + IR_ASSERT(consts_limit >= IR_CONSTS_LIMIT_MIN); + IR_ASSERT(insns_limit >= IR_INSNS_LIMIT_MIN); + + memset(ctx, 0, sizeof(ir_ctx)); + + ctx->insns_count = IR_UNUSED + 1; + ctx->insns_limit = insns_limit; + ctx->consts_count = -(IR_TRUE - 1); + ctx->consts_limit = consts_limit; + ctx->fold_cse_limit = IR_UNUSED + 1; + ctx->flags = flags; + + ctx->spill_base = -1; + ctx->fixed_stack_frame_size = -1; + + buf = ir_mem_malloc((consts_limit + insns_limit) * sizeof(ir_insn)); + ctx->ir_base = buf + consts_limit; + + ctx->ir_base[IR_UNUSED].optx = IR_NOP; + ctx->ir_base[IR_NULL].optx = IR_OPT(IR_C_ADDR, IR_ADDR); + ctx->ir_base[IR_NULL].val.u64 = 0; + ctx->ir_base[IR_FALSE].optx = IR_OPT(IR_C_BOOL, IR_BOOL); + ctx->ir_base[IR_FALSE].val.u64 = 0; + ctx->ir_base[IR_TRUE].optx = IR_OPT(IR_C_BOOL, IR_BOOL); + ctx->ir_base[IR_TRUE].val.u64 = 1; +} + +void ir_free(ir_ctx *ctx) +{ + ir_insn *buf = ctx->ir_base - ctx->consts_limit; + ir_mem_free(buf); + if (ctx->strtab.data) { + ir_strtab_free(&ctx->strtab); + } + if (ctx->binding) { + ir_hashtab_free(ctx->binding); + ir_mem_free(ctx->binding); + } + if (ctx->use_lists) { + ir_mem_free(ctx->use_lists); + } + if (ctx->use_edges) { + ir_mem_free(ctx->use_edges); + } + if (ctx->cfg_blocks) { + ir_mem_free(ctx->cfg_blocks); + } + if (ctx->cfg_edges) { + ir_mem_free(ctx->cfg_edges); + } + if (ctx->cfg_map) { + ir_mem_free(ctx->cfg_map); + } + if (ctx->rules) { + ir_mem_free(ctx->rules); + } + if (ctx->vregs) { + ir_mem_free(ctx->vregs); + } + if (ctx->live_intervals) { + ir_mem_free(ctx->live_intervals); + } + if (ctx->arena) { + ir_arena_free(ctx->arena); + } + if (ctx->regs) { + ir_mem_free(ctx->regs); + } + if (ctx->prev_ref) { + ir_mem_free(ctx->prev_ref); + } + if (ctx->entries) { + ir_mem_free(ctx->entries); + } + if (ctx->osr_entry_loads) { + ir_list_free((ir_list*)ctx->osr_entry_loads); + ir_mem_free(ctx->osr_entry_loads); + } +} + +ir_ref ir_unique_const_addr(ir_ctx *ctx, uintptr_t addr) +{ + ir_ref ref = ir_next_const(ctx); + ir_insn *insn = &ctx->ir_base[ref]; + + insn->optx = IR_OPT(IR_ADDR, IR_ADDR); + insn->val.u64 = addr; + /* don't insert into constants chain */ + insn->prev_const = IR_UNUSED; +#if 0 + insn->prev_const = ctx->prev_const_chain[IR_ADDR]; + ctx->prev_const_chain[IR_ADDR] = ref; +#endif +#if 0 + ir_insn *prev_insn, *next_insn; + ir_ref next; + + prev_insn = NULL; + next = ctx->prev_const_chain[IR_ADDR]; + while (next) { + next_insn = &ctx->ir_base[next]; + if (UNEXPECTED(next_insn->val.u64 >= addr)) { + break; + } + prev_insn = next_insn; + next = next_insn->prev_const; + } + + if (prev_insn) { + insn->prev_const = prev_insn->prev_const; + prev_insn->prev_const = ref; + } else { + insn->prev_const = ctx->prev_const_chain[IR_ADDR]; + ctx->prev_const_chain[IR_ADDR] = ref; + } +#endif + + return ref; +} + +static IR_NEVER_INLINE ir_ref ir_const_ex(ir_ctx *ctx, ir_val val, uint8_t type, uint32_t optx) +{ + ir_insn *insn, *prev_insn; + ir_ref ref, prev; + + if (type == IR_BOOL) { + return val.u64 ? IR_TRUE : IR_FALSE; + } else if (type == IR_ADDR && val.u64 == 0) { + return IR_NULL; + } + prev_insn = NULL; + ref = ctx->prev_const_chain[type]; + while (ref) { + insn = &ctx->ir_base[ref]; + if (UNEXPECTED(insn->val.u64 >= val.u64)) { + if (insn->val.u64 == val.u64 && insn->optx == optx) { + return ref; + } else { + break; + } + } + prev_insn = insn; + ref = insn->prev_const; + } + + if (prev_insn) { + prev = prev_insn->prev_const; + prev_insn->prev_const = -ctx->consts_count; + } else { + prev = ctx->prev_const_chain[type]; + ctx->prev_const_chain[type] = -ctx->consts_count; + } + + ref = ir_next_const(ctx); + insn = &ctx->ir_base[ref]; + insn->prev_const = prev; + + insn->optx = optx; + insn->val.u64 = val.u64; + + return ref; +} + +ir_ref ir_const(ir_ctx *ctx, ir_val val, uint8_t type) +{ + return ir_const_ex(ctx, val, type, IR_OPT(type, type)); +} + +ir_ref ir_const_i8(ir_ctx *ctx, int8_t c) +{ + ir_val val; + val.i64 = c; + return ir_const(ctx, val, IR_I8); +} + +ir_ref ir_const_i16(ir_ctx *ctx, int16_t c) +{ + ir_val val; + val.i64 = c; + return ir_const(ctx, val, IR_I16); +} + +ir_ref ir_const_i32(ir_ctx *ctx, int32_t c) +{ + ir_val val; + val.i64 = c; + return ir_const(ctx, val, IR_I32); +} + +ir_ref ir_const_i64(ir_ctx *ctx, int64_t c) +{ + ir_val val; + val.i64 = c; + return ir_const(ctx, val, IR_I64); +} + +ir_ref ir_const_u8(ir_ctx *ctx, uint8_t c) +{ + ir_val val; + val.u64 = c; + return ir_const(ctx, val, IR_U8); +} + +ir_ref ir_const_u16(ir_ctx *ctx, uint16_t c) +{ + ir_val val; + val.u64 = c; + return ir_const(ctx, val, IR_U16); +} + +ir_ref ir_const_u32(ir_ctx *ctx, uint32_t c) +{ + ir_val val; + val.u64 = c; + return ir_const(ctx, val, IR_U32); +} + +ir_ref ir_const_u64(ir_ctx *ctx, uint64_t c) +{ + ir_val val; + val.u64 = c; + return ir_const(ctx, val, IR_U64); +} + +ir_ref ir_const_bool(ir_ctx *ctx, bool c) +{ + return (c) ? IR_TRUE : IR_FALSE; +} + +ir_ref ir_const_char(ir_ctx *ctx, char c) +{ + ir_val val; + val.i64 = c; + return ir_const(ctx, val, IR_CHAR); +} + +ir_ref ir_const_float(ir_ctx *ctx, float c) +{ + ir_val val; + val.u32_hi = 0; + val.f = c; + return ir_const(ctx, val, IR_FLOAT); +} + +ir_ref ir_const_double(ir_ctx *ctx, double c) +{ + ir_val val; + val.d = c; + return ir_const(ctx, val, IR_DOUBLE); +} + +ir_ref ir_const_addr(ir_ctx *ctx, uintptr_t c) +{ + if (c == 0) { + return IR_NULL; + } + ir_val val; + val.u64 = c; + return ir_const(ctx, val, IR_ADDR); +} + +ir_ref ir_const_func_addr(ir_ctx *ctx, uintptr_t c, uint16_t flags) +{ + if (c == 0) { + return IR_NULL; + } + ir_val val; + val.u64 = c; + return ir_const_ex(ctx, val, IR_ADDR, IR_OPTX(IR_FUNC_ADDR, IR_ADDR, flags)); +} + +ir_ref ir_const_func(ir_ctx *ctx, ir_ref str, uint16_t flags) +{ + ir_val val; + val.addr = str; + return ir_const_ex(ctx, val, IR_ADDR, IR_OPTX(IR_FUNC, IR_ADDR, flags)); +} + +ir_ref ir_const_sym(ir_ctx *ctx, ir_ref str) +{ + ir_val val; + val.addr = str; + return ir_const_ex(ctx, val, IR_ADDR, IR_OPTX(IR_SYM, IR_ADDR, 0)); +} + +ir_ref ir_const_str(ir_ctx *ctx, ir_ref str) +{ + ir_val val; + val.addr = str; + return ir_const_ex(ctx, val, IR_ADDR, IR_OPTX(IR_STR, IR_ADDR, 0)); +} + +ir_ref ir_str(ir_ctx *ctx, const char *s) +{ + size_t len; + + if (!ctx->strtab.data) { + ir_strtab_init(&ctx->strtab, 64, 4096); + } + len = strlen(s); + IR_ASSERT(len <= 0xffffffff); + return ir_strtab_lookup(&ctx->strtab, s, (uint32_t)len, ir_strtab_count(&ctx->strtab) + 1); +} + +ir_ref ir_strl(ir_ctx *ctx, const char *s, size_t len) +{ + if (!ctx->strtab.data) { + ir_strtab_init(&ctx->strtab, 64, 4096); + } + IR_ASSERT(len <= 0xffffffff); + return ir_strtab_lookup(&ctx->strtab, s, (uint32_t)len, ir_strtab_count(&ctx->strtab) + 1); +} + +const char *ir_get_str(const ir_ctx *ctx, ir_ref idx) +{ + IR_ASSERT(ctx->strtab.data); + return ir_strtab_str(&ctx->strtab, idx - 1); +} + +/* IR construction */ +ir_ref ir_emit(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3) +{ + ir_ref ref = ir_next_insn(ctx); + ir_insn *insn = &ctx->ir_base[ref]; + + insn->optx = opt; + insn->op1 = op1; + insn->op2 = op2; + insn->op3 = op3; + + return ref; +} + +ir_ref ir_emit0(ir_ctx *ctx, uint32_t opt) +{ + return ir_emit(ctx, opt, IR_UNUSED, IR_UNUSED, IR_UNUSED); +} + +ir_ref ir_emit1(ir_ctx *ctx, uint32_t opt, ir_ref op1) +{ + return ir_emit(ctx, opt, op1, IR_UNUSED, IR_UNUSED); +} + +ir_ref ir_emit2(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2) +{ + return ir_emit(ctx, opt, op1, op2, IR_UNUSED); +} + +ir_ref ir_emit3(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3) +{ + return ir_emit(ctx, opt, op1, op2, op3); +} + +static ir_ref _ir_fold_cse(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3) +{ + ir_ref ref = ctx->prev_insn_chain[opt & IR_OPT_OP_MASK]; + ir_insn *insn; + + if (ref) { + ir_ref limit = ctx->fold_cse_limit; + + if (op1 > limit) { + limit = op1; + } + if (op2 > limit) { + limit = op2; + } + if (op3 > limit) { + limit = op3; + } + while (ref >= limit) { + insn = &ctx->ir_base[ref]; + if (insn->opt == opt && insn->op1 == op1 && insn->op2 == op2 && insn->op3 == op3) { + return ref; + } + if (!insn->prev_insn_offset) { + break; + } + ref = ref - (ir_ref)(uint32_t)insn->prev_insn_offset; + } + } + + return IR_UNUSED; +} + +#define IR_FOLD(X) IR_FOLD1(X, __LINE__) +#define IR_FOLD1(X, Y) IR_FOLD2(X, Y) +#define IR_FOLD2(X, Y) case IR_RULE_ ## Y: + +#define IR_FOLD_ERROR(msg) do { \ + IR_ASSERT(0 && (msg)); \ + goto ir_fold_emit; \ + } while (0) + +#define IR_FOLD_CONST_U(_val) do { \ + val.u64 = (_val); \ + goto ir_fold_const; \ + } while (0) + +#define IR_FOLD_CONST_I(_val) do { \ + val.i64 = (_val); \ + goto ir_fold_const; \ + } while (0) + +#define IR_FOLD_CONST_D(_val) do { \ + val.d = (_val); \ + goto ir_fold_const; \ + } while (0) + +#define IR_FOLD_CONST_F(_val) do { \ + val.f = (_val); \ + goto ir_fold_const; \ + } while (0) + +#define IR_FOLD_COPY(op) do { \ + ref = (op); \ + goto ir_fold_copy; \ + } while (0) + +#define IR_FOLD_BOOL(cond) \ + IR_FOLD_COPY((cond) ? IR_TRUE : IR_FALSE) + +#define IR_FOLD_NAMED(name) ir_fold_ ## name: +#define IR_FOLD_DO_NAMED(name) goto ir_fold_ ## name +#define IR_FOLD_RESTART goto ir_fold_restart +#define IR_FOLD_CSE goto ir_fold_cse +#define IR_FOLD_EMIT goto ir_fold_emit +#define IR_FOLD_NEXT break + +#include "ir_fold_hash.h" + +#define IR_FOLD_RULE(x) ((x) >> 21) +#define IR_FOLD_KEY(x) ((x) & 0x1fffff) + +/* + * key = insn->op | (insn->op1->op << 7) | (insn->op2->op << 14) + * + * ANY and UNUSED ops are represented by 0 + */ + +ir_ref ir_folding(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3, ir_insn *op1_insn, ir_insn *op2_insn, ir_insn *op3_insn) +{ + uint8_t op; + ir_ref ref; + ir_val val; + uint32_t key, any; + (void) op3_insn; + +restart: + key = (opt & IR_OPT_OP_MASK) + ((uint32_t)op1_insn->op << 7) + ((uint32_t)op2_insn->op << 14); + any = 0x1fffff; + do { + uint32_t k = key & any; + uint32_t h = _ir_fold_hashkey(k); + uint32_t fh = _ir_fold_hash[h]; + if (IR_FOLD_KEY(fh) == k /*|| (fh = _ir_fold_hash[h+1], (fh & 0x1fffff) == k)*/) { + switch (IR_FOLD_RULE(fh)) { +#include "ir_fold.h" + default: + break; + } + } + if (any == 0x7f) { + /* All parrerns are checked. Pass on to CSE. */ + goto ir_fold_cse; + } + /* op2/op1/op op2/_/op _/op1/op _/_/op + * 0x1fffff -> 0x1fc07f -> 0x003fff -> 0x00007f + * from masks to bis: 11 -> 10 -> 01 -> 00 + * + * a b => x y + * 1 1 1 0 + * 1 0 0 1 + * 0 1 0 0 + * + * x = a & b; y = !b + */ + any = ((any & (any << 7)) & 0x1fc000) | (~any & 0x3f80) | 0x7f; + } while (1); + +ir_fold_restart: + if (!(ctx->flags & IR_OPT_IN_SCCP)) { + op1_insn = ctx->ir_base + op1; + op2_insn = ctx->ir_base + op2; + op3_insn = ctx->ir_base + op3; + goto restart; + } else { + ctx->fold_insn.optx = opt; + ctx->fold_insn.op1 = op1; + ctx->fold_insn.op2 = op2; + ctx->fold_insn.op3 = op3; + return IR_FOLD_DO_RESTART; + } +ir_fold_cse: + if (!(ctx->flags & IR_OPT_IN_SCCP)) { + /* Local CSE */ + ref = _ir_fold_cse(ctx, opt, op1, op2, op3); + if (ref) { + return ref; + } + + ref = ir_emit(ctx, opt, op1, op2, op3); + + /* Update local CSE chain */ + op = opt & IR_OPT_OP_MASK; + ir_ref prev = ctx->prev_insn_chain[op]; + ir_insn *insn = ctx->ir_base + ref; + if (!prev || ref - prev > 0xffff) { + /* can't fit into 16-bit */ + insn->prev_insn_offset = 0; + } else { + insn->prev_insn_offset = ref - prev; + } + ctx->prev_insn_chain[op] = ref; + + return ref; + } +ir_fold_emit: + if (!(ctx->flags & IR_OPT_IN_SCCP)) { + return ir_emit(ctx, opt, op1, op2, op3); + } else { + ctx->fold_insn.optx = opt; + ctx->fold_insn.op1 = op1; + ctx->fold_insn.op2 = op2; + ctx->fold_insn.op3 = op3; + return IR_FOLD_DO_EMIT; + } +ir_fold_copy: + if (!(ctx->flags & IR_OPT_IN_SCCP)) { + return ref; + } else { + ctx->fold_insn.op1 = ref; + return IR_FOLD_DO_COPY; + } +ir_fold_const: + if (!(ctx->flags & IR_OPT_IN_SCCP)) { + return ir_const(ctx, val, IR_OPT_TYPE(opt)); + } else { + ctx->fold_insn.type = IR_OPT_TYPE(opt); + ctx->fold_insn.val.u64 = val.u64; + return IR_FOLD_DO_CONST; + } +} + +ir_ref ir_fold(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3) +{ + if (UNEXPECTED(!(ctx->flags & IR_OPT_FOLDING))) { + if ((opt & IR_OPT_OP_MASK) == IR_PHI) { + opt |= (3 << IR_OPT_INPUTS_SHIFT); + } + return ir_emit(ctx, opt, op1, op2, op3); + } + return ir_folding(ctx, opt, op1, op2, op3, ctx->ir_base + op1, ctx->ir_base + op2, ctx->ir_base + op3); +} + +ir_ref ir_fold0(ir_ctx *ctx, uint32_t opt) +{ + return ir_fold(ctx, opt, IR_UNUSED, IR_UNUSED, IR_UNUSED); +} + +ir_ref ir_fold1(ir_ctx *ctx, uint32_t opt, ir_ref op1) +{ + return ir_fold(ctx, opt, op1, IR_UNUSED, IR_UNUSED); +} + +ir_ref ir_fold2(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2) +{ + return ir_fold(ctx, opt, op1, op2, IR_UNUSED); +} + +ir_ref ir_fold3(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3) +{ + return ir_fold(ctx, opt, op1, op2, op3); +} + +ir_ref ir_emit_N(ir_ctx *ctx, uint32_t opt, int32_t count) +{ + int i; + ir_ref *p, ref = ctx->insns_count; + ir_insn *insn; + + IR_ASSERT(count >= 0); + while (UNEXPECTED(ref + count/4 >= ctx->insns_limit)) { + ir_grow_top(ctx); + } + ctx->insns_count = ref + 1 + count/4; + + insn = &ctx->ir_base[ref]; + insn->optx = opt | (count << IR_OPT_INPUTS_SHIFT); + for (i = 1, p = insn->ops + i; i <= (count|3); i++, p++) { + *p = IR_UNUSED; + } + + return ref; +} + +void ir_set_op(ir_ctx *ctx, ir_ref ref, int32_t n, ir_ref val) +{ + ir_insn *insn = &ctx->ir_base[ref]; + +#ifdef IR_DEBUG + if (n > 3) { + int32_t count; + + IR_ASSERT(IR_OP_HAS_VAR_INPUTS(ir_op_flags[insn->op])); + count = insn->inputs_count; + IR_ASSERT(n <= count); + } +#endif + ir_insn_set_op(insn, n, val); +} + +ir_ref ir_param(ir_ctx *ctx, ir_type type, ir_ref region, const char *name, int pos) +{ + return ir_emit(ctx, IR_OPT(IR_PARAM, type), region, ir_str(ctx, name), pos); +} + +ir_ref ir_var(ir_ctx *ctx, ir_type type, ir_ref region, const char *name) +{ + return ir_emit(ctx, IR_OPT(IR_VAR, type), region, ir_str(ctx, name), IR_UNUSED); +} + +ir_ref ir_bind(ir_ctx *ctx, ir_ref var, ir_ref def) +{ + if (IR_IS_CONST_REF(def)) { + return def; + } + if (!ctx->binding) { + ctx->binding = ir_mem_malloc(sizeof(ir_hashtab));; + ir_hashtab_init(ctx->binding, 16); + } + /* Node may be bound to some special spill slot (using negative "var") */ + IR_ASSERT(var < 0); + if (!ir_hashtab_add(ctx->binding, def, var)) { + /* Add a copy with different binding */ + def = ir_emit2(ctx, IR_OPT(IR_COPY, ctx->ir_base[def].type), def, 1); + ir_hashtab_add(ctx->binding, def, var); + } + return def; +} + +/* Batch construction of def->use edges */ +#if 0 +void ir_build_def_use_lists(ir_ctx *ctx) +{ + ir_ref n, i, j, *p, def; + ir_insn *insn; + uint32_t edges_count; + ir_use_list *lists = ir_mem_calloc(ctx->insns_count, sizeof(ir_use_list)); + ir_ref *edges; + ir_use_list *use_list; + + for (i = IR_UNUSED + 1, insn = ctx->ir_base + i; i < ctx->insns_count;) { + uint32_t flags = ir_op_flags[insn->op]; + + if (UNEXPECTED(IR_OP_HAS_VAR_INPUTS(flags))) { + n = insn->inputs_count; + } else { + n = insn->inputs_count = IR_INPUT_EDGES_COUNT(flags); + } + for (j = n, p = insn->ops + 1; j > 0; j--, p++) { + def = *p; + if (def > 0) { + lists[def].count++; + } + } + n = ir_insn_inputs_to_len(n); + i += n; + insn += n; + } + + edges_count = 0; + for (i = IR_UNUSED + 1, use_list = &lists[i]; i < ctx->insns_count; i++, use_list++) { + use_list->refs = edges_count; + edges_count += use_list->count; + use_list->count = 0; + } + + edges = ir_mem_malloc(edges_count * sizeof(ir_ref)); + for (i = IR_UNUSED + 1, insn = ctx->ir_base + i; i < ctx->insns_count;) { + n = insn->inputs_count; + for (j = n, p = insn->ops + 1; j > 0; j--, p++) { + def = *p; + if (def > 0) { + use_list = &lists[def]; + edges[use_list->refs + use_list->count++] = i; + } + } + n = ir_insn_inputs_to_len(n); + i += n; + insn += n; + } + + ctx->use_edges = edges; + ctx->use_edges_count = edges_count; + ctx->use_lists = lists; +} +#else +void ir_build_def_use_lists(ir_ctx *ctx) +{ + ir_ref n, i, j, *p, def; + ir_insn *insn; + size_t linked_lists_size, linked_lists_top = 0, edges_count = 0; + ir_use_list *lists = ir_mem_calloc(ctx->insns_count, sizeof(ir_use_list)); + ir_ref *edges; + ir_use_list *use_list; + ir_ref *linked_lists; + + linked_lists_size = IR_ALIGNED_SIZE(ctx->insns_count, 1024); + linked_lists = ir_mem_malloc(linked_lists_size * sizeof(ir_ref)); + for (i = IR_UNUSED + 1, insn = ctx->ir_base + i; i < ctx->insns_count;) { + uint32_t flags = ir_op_flags[insn->op]; + + if (UNEXPECTED(IR_OP_HAS_VAR_INPUTS(flags))) { + n = insn->inputs_count; + } else { + n = insn->inputs_count = IR_INPUT_EDGES_COUNT(flags); + } + for (j = n, p = insn->ops + 1; j > 0; j--, p++) { + def = *p; + if (def > 0) { + use_list = &lists[def]; + edges_count++; + if (!use_list->refs) { + /* store a single "use" directly in "refs" using a positive number */ + use_list->refs = i; + use_list->count = 1; + } else { + if (UNEXPECTED(linked_lists_top >= linked_lists_size)) { + linked_lists_size += 1024; + linked_lists = ir_mem_realloc(linked_lists, linked_lists_size * sizeof(ir_ref)); + } + /* form a linked list of "uses" (like in binsort) */ + linked_lists[linked_lists_top] = i; /* store the "use" */ + linked_lists[linked_lists_top + 1] = use_list->refs; /* store list next */ + use_list->refs = -(linked_lists_top + 1); /* store a head of the list using a negative number */ + linked_lists_top += 2; + use_list->count++; + } + } + } + n = ir_insn_inputs_to_len(n); + i += n; + insn += n; + } + + ctx->use_edges_count = edges_count; + edges = ir_mem_malloc(edges_count * sizeof(ir_ref)); + for (use_list = lists + ctx->insns_count - 1; use_list != lists; use_list--) { + n = use_list->refs; + if (n) { + /* transform linked list to plain array */ + while (n < 0) { + n = -n; + edges[--edges_count] = linked_lists[n - 1]; + n = linked_lists[n]; + } + IR_ASSERT(n > 0); + edges[--edges_count] = n; + use_list->refs = edges_count; + } + } + + ctx->use_edges = edges; + ctx->use_lists = lists; + ir_mem_free(linked_lists); +} +#endif + +/* Helper Data Types */ +void ir_array_grow(ir_array *a, uint32_t size) +{ + IR_ASSERT(size > a->size); + a->refs = ir_mem_realloc(a->refs, size * sizeof(ir_ref)); + a->size = size; +} + +void ir_array_insert(ir_array *a, uint32_t i, ir_ref val) +{ + IR_ASSERT(i < a->size); + if (a->refs[a->size - 1]) { + ir_array_grow(a, a->size + 1); + } + memmove(a->refs + i + 1, a->refs + i, (a->size - i - 1) * sizeof(ir_ref)); + a->refs[i] = val; +} + +void ir_array_remove(ir_array *a, uint32_t i) +{ + IR_ASSERT(i < a->size); + memmove(a->refs + i, a->refs + i + 1, (a->size - i - 1) * sizeof(ir_ref)); + a->refs[a->size - 1] = IR_UNUSED; +} + +void ir_list_insert(ir_list *l, uint32_t i, ir_ref val) +{ + IR_ASSERT(i < l->len); + if (l->len >= l->a.size) { + ir_array_grow(&l->a, l->a.size + 1); + } + memmove(l->a.refs + i + 1, l->a.refs + i, (l->len - i) * sizeof(ir_ref)); + l->a.refs[i] = val; + l->len++; +} + +void ir_list_remove(ir_list *l, uint32_t i) +{ + IR_ASSERT(i < l->len); + memmove(l->a.refs + i, l->a.refs + i + 1, (l->len - i) * sizeof(ir_ref)); + l->len--; +} + +bool ir_list_contains(const ir_list *l, ir_ref val) +{ + uint32_t i; + + for (i = 0; i < l->len; i++) { + if (ir_array_at(&l->a, i) == val) { + return 1; + } + } + return 0; +} + +static uint32_t ir_hashtab_hash_size(uint32_t size) +{ + size -= 1; + size |= (size >> 1); + size |= (size >> 2); + size |= (size >> 4); + size |= (size >> 8); + size |= (size >> 16); + return size + 1; +} + +static void ir_hashtab_resize(ir_hashtab *tab) +{ + uint32_t old_hash_size = (uint32_t)(-(int32_t)tab->mask); + char *old_data = tab->data; + uint32_t size = tab->size * 2; + uint32_t hash_size = ir_hashtab_hash_size(size); + char *data = ir_mem_malloc(hash_size * sizeof(uint32_t) + size * sizeof(ir_hashtab_bucket)); + ir_hashtab_bucket *p; + uint32_t pos, i; + + memset(data, -1, hash_size * sizeof(uint32_t)); + tab->data = data + (hash_size * sizeof(uint32_t)); + tab->mask = (uint32_t)(-(int32_t)hash_size); + tab->size = size; + + memcpy(tab->data, old_data, tab->count * sizeof(ir_hashtab_bucket)); + ir_mem_free(old_data - (old_hash_size * sizeof(uint32_t))); + + i = tab->count; + pos = 0; + p = (ir_hashtab_bucket*)tab->data; + do { + uint32_t key = p->key | tab->mask; + p->next = ((uint32_t*)tab->data)[(int32_t)key]; + ((uint32_t*)tab->data)[(int32_t)key] = pos; + pos += sizeof(ir_hashtab_bucket); + p++; + } while (--i); +} + +void ir_hashtab_init(ir_hashtab *tab, uint32_t size) +{ + IR_ASSERT(size > 0); + uint32_t hash_size = ir_hashtab_hash_size(size); + char *data = ir_mem_malloc(hash_size * sizeof(uint32_t) + size * sizeof(ir_hashtab_bucket)); + memset(data, -1, hash_size * sizeof(uint32_t)); + tab->data = (data + (hash_size * sizeof(uint32_t))); + tab->mask = (uint32_t)(-(int32_t)hash_size); + tab->size = size; + tab->count = 0; + tab->pos = 0; +} + +void ir_hashtab_free(ir_hashtab *tab) +{ + uint32_t hash_size = (uint32_t)(-(int32_t)tab->mask); + char *data = (char*)tab->data - (hash_size * sizeof(uint32_t)); + ir_mem_free(data); + tab->data = NULL; +} + +ir_ref ir_hashtab_find(const ir_hashtab *tab, uint32_t key) +{ + const char *data = (const char*)tab->data; + uint32_t pos = ((uint32_t*)data)[(int32_t)(key | tab->mask)]; + ir_hashtab_bucket *p; + + while (pos != IR_INVALID_IDX) { + p = (ir_hashtab_bucket*)(data + pos); + if (p->key == key) { + return p->val; + } + pos = p->next; + } + return IR_INVALID_VAL; +} + +bool ir_hashtab_add(ir_hashtab *tab, uint32_t key, ir_ref val) +{ + char *data = (char*)tab->data; + uint32_t pos = ((uint32_t*)data)[(int32_t)(key | tab->mask)]; + ir_hashtab_bucket *p; + + while (pos != IR_INVALID_IDX) { + p = (ir_hashtab_bucket*)(data + pos); + if (p->key == key) { + return p->val == val; + } + pos = p->next; + } + + if (UNEXPECTED(tab->count >= tab->size)) { + ir_hashtab_resize(tab); + data = tab->data; + } + + pos = tab->pos; + tab->pos += sizeof(ir_hashtab_bucket); + tab->count++; + p = (ir_hashtab_bucket*)(data + pos); + p->key = key; + p->val = val; + key |= tab->mask; + p->next = ((uint32_t*)data)[(int32_t)key]; + ((uint32_t*)data)[(int32_t)key] = pos; + return 1; +} + +static int ir_hashtab_key_cmp(const void *b1, const void *b2) +{ + return ((ir_hashtab_bucket*)b1)->key - ((ir_hashtab_bucket*)b2)->key; +} + +void ir_hashtab_key_sort(ir_hashtab *tab) +{ + ir_hashtab_bucket *p; + uint32_t hash_size, pos, i; + + if (!tab->count) { + return; + } + + qsort(tab->data, tab->count, sizeof(ir_hashtab_bucket), ir_hashtab_key_cmp); + + hash_size = ir_hashtab_hash_size(tab->size); + memset((char*)tab->data - (hash_size * sizeof(uint32_t)), -1, hash_size * sizeof(uint32_t)); + + i = tab->count; + pos = 0; + p = (ir_hashtab_bucket*)tab->data; + do { + uint32_t key = p->key | tab->mask; + p->next = ((uint32_t*)tab->data)[(int32_t)key]; + ((uint32_t*)tab->data)[(int32_t)key] = pos; + pos += sizeof(ir_hashtab_bucket); + p++; + } while (--i); +} + +static void ir_addrtab_resize(ir_hashtab *tab) +{ + uint32_t old_hash_size = (uint32_t)(-(int32_t)tab->mask); + char *old_data = tab->data; + uint32_t size = tab->size * 2; + uint32_t hash_size = ir_hashtab_hash_size(size); + char *data = ir_mem_malloc(hash_size * sizeof(uint32_t) + size * sizeof(ir_addrtab_bucket)); + ir_addrtab_bucket *p; + uint32_t pos, i; + + memset(data, -1, hash_size * sizeof(uint32_t)); + tab->data = data + (hash_size * sizeof(uint32_t)); + tab->mask = (uint32_t)(-(int32_t)hash_size); + tab->size = size; + + memcpy(tab->data, old_data, tab->count * sizeof(ir_addrtab_bucket)); + ir_mem_free(old_data - (old_hash_size * sizeof(uint32_t))); + + i = tab->count; + pos = 0; + p = (ir_addrtab_bucket*)tab->data; + do { + uint32_t key = (uint32_t)p->key | tab->mask; + p->next = ((uint32_t*)tab->data)[(int32_t)key]; + ((uint32_t*)tab->data)[(int32_t)key] = pos; + pos += sizeof(ir_addrtab_bucket); + p++; + } while (--i); +} + +void ir_addrtab_init(ir_hashtab *tab, uint32_t size) +{ + IR_ASSERT(size > 0); + uint32_t hash_size = ir_hashtab_hash_size(size); + char *data = ir_mem_malloc(hash_size * sizeof(uint32_t) + size * sizeof(ir_addrtab_bucket)); + memset(data, -1, hash_size * sizeof(uint32_t)); + tab->data = (data + (hash_size * sizeof(uint32_t))); + tab->mask = (uint32_t)(-(int32_t)hash_size); + tab->size = size; + tab->count = 0; + tab->pos = 0; +} + +void ir_addrtab_free(ir_hashtab *tab) +{ + uint32_t hash_size = (uint32_t)(-(int32_t)tab->mask); + char *data = (char*)tab->data - (hash_size * sizeof(uint32_t)); + ir_mem_free(data); + tab->data = NULL; +} + +ir_ref ir_addrtab_find(const ir_hashtab *tab, uint64_t key) +{ + const char *data = (const char*)tab->data; + uint32_t pos = ((uint32_t*)data)[(int32_t)(key | tab->mask)]; + ir_addrtab_bucket *p; + + while (pos != IR_INVALID_IDX) { + p = (ir_addrtab_bucket*)(data + pos); + if (p->key == key) { + return p->val; + } + pos = p->next; + } + return IR_INVALID_VAL; +} + +bool ir_addrtab_add(ir_hashtab *tab, uint64_t key, ir_ref val) +{ + char *data = (char*)tab->data; + uint32_t pos = ((uint32_t*)data)[(int32_t)(key | tab->mask)]; + ir_addrtab_bucket *p; + + while (pos != IR_INVALID_IDX) { + p = (ir_addrtab_bucket*)(data + pos); + if (p->key == key) { + return p->val == val; + } + pos = p->next; + } + + if (UNEXPECTED(tab->count >= tab->size)) { + ir_addrtab_resize(tab); + data = tab->data; + } + + pos = tab->pos; + tab->pos += sizeof(ir_addrtab_bucket); + tab->count++; + p = (ir_addrtab_bucket*)(data + pos); + p->key = key; + p->val = val; + key |= tab->mask; + p->next = ((uint32_t*)data)[(int32_t)key]; + ((uint32_t*)data)[(int32_t)key] = pos; + return 1; +} + +/* Memory API */ +#ifdef _WIN32 +void *ir_mem_mmap(size_t size) +{ + void *ret; + +#ifdef _M_X64 + DWORD size_hi = size >> 32, size_lo = size & 0xffffffff; +#else + DWORD size_hi = 0, size_lo = size; +#endif + + HANDLE h = CreateFileMapping(INVALID_HANDLE_VALUE, NULL, PAGE_EXECUTE_READWRITE, size_hi, size_lo, NULL); + + ret = MapViewOfFile(h, FILE_MAP_READ | FILE_MAP_WRITE | FILE_MAP_EXECUTE, 0, 0, size); + if (!ret) { + CloseHandle(h); + } + + return ret; +} + +int ir_mem_unmap(void *ptr, size_t size) +{ + /* XXX file handle is leaked. */ + UnmapViewOfFile(ptr); + return 1; +} + +int ir_mem_protect(void *ptr, size_t size) +{ + return 1; +} + +int ir_mem_unprotect(void *ptr, size_t size) +{ + return 1; +} + +int ir_mem_flush(void *ptr, size_t size) +{ + return 1; +} +#else +void *ir_mem_mmap(size_t size) +{ + void *ret = mmap(NULL, size, PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (ret == MAP_FAILED) { + ret = NULL; + } + return ret; +} + +int ir_mem_unmap(void *ptr, size_t size) +{ + munmap(ptr, size); + return 1; +} + +int ir_mem_protect(void *ptr, size_t size) +{ + mprotect(ptr, size, PROT_READ | PROT_EXEC); + return 1; +} + +int ir_mem_unprotect(void *ptr, size_t size) +{ + mprotect(ptr, size, PROT_READ | PROT_WRITE); + return 1; +} + +int ir_mem_flush(void *ptr, size_t size) +{ +#if ((defined(__GNUC__) && ZEND_GCC_VERSION >= 4003) || __has_builtin(__builtin___clear_cache)) + __builtin___clear_cache((char*)(ptr), (char*)(ptr) + size); +#endif +#ifdef HAVE_VALGRIND + VALGRIND_DISCARD_TRANSLATIONS(ptr, size); +#endif + return 1; +} +#endif + +/* Alias Analyses */ +typedef enum _ir_alias { + IR_MAY_ALIAS = -1, + IR_NO_ALIAS = 0, + IR_MUST_ALIAS = 1, +} ir_alias; + +#if 0 +static ir_alias ir_check_aliasing(ir_ctx *ctx, ir_ref addr1, ir_ref addr2) +{ + ir_insn *insn1, *insn2; + + if (addr1 == addr2) { + return IR_MUST_ALIAS; + } + + insn1 = &ctx->ir_base[addr1]; + insn2 = &ctx->ir_base[addr2]; + if (insn1->op == IR_ADD && IR_IS_CONST_REF(insn1->op2)) { + if (insn1->op1 == addr2) { + uintptr_t offset1 = ctx->ir_base[insn1->op2].val.u64; + return (offset1 != 0) ? IR_MUST_ALIAS : IR_NO_ALIAS; + } else if (insn2->op == IR_ADD && IR_IS_CONST_REF(insn1->op2) && insn1->op1 == insn2->op1) { + if (insn1->op2 == insn2->op2) { + return IR_MUST_ALIAS; + } else if (IR_IS_CONST_REF(insn1->op2) && IR_IS_CONST_REF(insn2->op2)) { + uintptr_t offset1 = ctx->ir_base[insn1->op2].val.u64; + uintptr_t offset2 = ctx->ir_base[insn2->op2].val.u64; + + return (offset1 == offset2) ? IR_MUST_ALIAS : IR_NO_ALIAS; + } + } + } else if (insn2->op == IR_ADD && IR_IS_CONST_REF(insn2->op2)) { + if (insn2->op1 == addr1) { + uintptr_t offset2 = ctx->ir_base[insn2->op2].val.u64; + + return (offset2 != 0) ? IR_MUST_ALIAS : IR_NO_ALIAS; + } + } + return IR_MAY_ALIAS; +} +#endif + +static ir_alias ir_check_partial_aliasing(const ir_ctx *ctx, ir_ref addr1, ir_ref addr2, ir_type type1, ir_type type2) +{ + ir_insn *insn1, *insn2; + + /* this must be already check */ + IR_ASSERT(addr1 != addr2); + + insn1 = &ctx->ir_base[addr1]; + insn2 = &ctx->ir_base[addr2]; + if (insn1->op == IR_ADD && IR_IS_CONST_REF(insn1->op2)) { + if (insn1->op1 == addr2) { + uintptr_t offset1 = ctx->ir_base[insn1->op2].val.addr; + uintptr_t size2 = ir_type_size[type2]; + + return (offset1 < size2) ? IR_MUST_ALIAS : IR_NO_ALIAS; + } else if (insn2->op == IR_ADD && IR_IS_CONST_REF(insn1->op2) && insn1->op1 == insn2->op1) { + if (insn1->op2 == insn2->op2) { + return IR_MUST_ALIAS; + } else if (IR_IS_CONST_REF(insn1->op2) && IR_IS_CONST_REF(insn2->op2)) { + uintptr_t offset1 = ctx->ir_base[insn1->op2].val.addr; + uintptr_t offset2 = ctx->ir_base[insn2->op2].val.addr; + + if (offset1 == offset2) { + return IR_MUST_ALIAS; + } else if (type1 == type2) { + return IR_NO_ALIAS; + } else { + /* check for partail intersection */ + uintptr_t size1 = ir_type_size[type1]; + uintptr_t size2 = ir_type_size[type2]; + + if (offset1 > offset2) { + return offset1 < offset2 + size2 ? IR_MUST_ALIAS : IR_NO_ALIAS; + } else { + return offset2 < offset1 + size1 ? IR_MUST_ALIAS : IR_NO_ALIAS; + } + } + } + } + } else if (insn2->op == IR_ADD && IR_IS_CONST_REF(insn2->op2)) { + if (insn2->op1 == addr1) { + uintptr_t offset2 = ctx->ir_base[insn2->op2].val.addr; + uintptr_t size1 = ir_type_size[type1]; + + return (offset2 < size1) ? IR_MUST_ALIAS : IR_NO_ALIAS; + } + } + return IR_MAY_ALIAS; +} + +static ir_ref ir_find_aliasing_load(ir_ctx *ctx, ir_ref ref, ir_type type, ir_ref addr) +{ + ir_ref limit = (addr > 0) ? addr : 1; + ir_insn *insn; + uint32_t modified_regset = 0; + + while (ref > limit) { + insn = &ctx->ir_base[ref]; + if (insn->op == IR_LOAD) { + if (insn->type == type && insn->op2 == addr) { + return ref; /* load forwarding (L2L) */ + } + } else if (insn->op == IR_STORE) { + ir_type type2 = ctx->ir_base[insn->op3].type; + + if (insn->op2 == addr) { + if (type2 == type) { + ref = insn->op3; + insn = &ctx->ir_base[ref]; + if (insn->op == IR_RLOAD && (modified_regset & (1 << insn->op2))) { + /* anti-dependency */ + return IR_UNUSED; + } + return ref; /* store forwarding (S2L) */ + } else if (IR_IS_TYPE_INT(type) && ir_type_size[type2] > ir_type_size[type]) { + return ir_fold1(ctx, IR_OPT(IR_TRUNC, type), insn->op3); /* partial store forwarding (S2L) */ + } else { + return IR_UNUSED; + } + } else if (ir_check_partial_aliasing(ctx, addr, insn->op2, type, type2) != IR_NO_ALIAS) { + return IR_UNUSED; + } + } else if (insn->op == IR_RSTORE) { + modified_regset |= (1 << insn->op3); + } else if (insn->op >= IR_START || insn->op == IR_CALL) { + return IR_UNUSED; + } + ref = insn->op1; + } + return IR_UNUSED; +} + +/* IR Construction API */ + +ir_ref _ir_PARAM(ir_ctx *ctx, ir_type type, const char* name, ir_ref num) +{ + IR_ASSERT(ctx->control); + IR_ASSERT(ctx->ir_base[ctx->control].op == IR_START); + IR_ASSERT(ctx->insns_count == num + 1); + return ir_param(ctx, type, ctx->control, name, num); +} + +ir_ref _ir_VAR(ir_ctx *ctx, ir_type type, const char* name) +{ +// IR_ASSERT(ctx->control); +// IR_ASSERT(IR_IS_BB_START(ctx->ir_base[ctx->control].op)); +// TODO: VAR may be insterted after some "memory" instruction + ir_ref ref = ctx->control; + + while (1) { + IR_ASSERT(ctx->control); + if (IR_IS_BB_START(ctx->ir_base[ref].op)) { + break; + } + ref = ctx->ir_base[ref].op1; + } + return ir_var(ctx, type, ref, name); +} + +ir_ref _ir_PHI_2(ir_ctx *ctx, ir_type type, ir_ref src1, ir_ref src2) +{ + IR_ASSERT(ctx->control); + IR_ASSERT(ctx->ir_base[ctx->control].op == IR_MERGE || ctx->ir_base[ctx->control].op == IR_LOOP_BEGIN); + if (src1 == src2 && src1 != IR_UNUSED) { + return src1; + } + return ir_emit3(ctx, IR_OPTX(IR_PHI, type, 3), ctx->control, src1, src2); +} + +ir_ref _ir_PHI_N(ir_ctx *ctx, ir_type type, ir_ref n, ir_ref *inputs) +{ + IR_ASSERT(ctx->control); + IR_ASSERT(n > 0); + if (n == 1) { + return inputs[0]; + } else { + ir_ref i; + ir_ref ref = inputs[0]; + + IR_ASSERT(ctx->ir_base[ctx->control].op == IR_MERGE || ctx->ir_base[ctx->control].op == IR_LOOP_BEGIN); + if (ref != IR_UNUSED) { + for (i = 1; i < n; i++) { + if (inputs[i] != ref) { + break; + } + } + if (i == n) { + /* all the same */ + return ref; + } + } + + ref = ir_emit_N(ctx, IR_OPT(IR_PHI, type), n + 1); + ir_set_op(ctx, ref, 1, ctx->control); + for (i = 0; i < n; i++) { + ir_set_op(ctx, ref, i + 2, inputs[i]); + } + return ref; + } +} + +void _ir_PHI_SET_OP(ir_ctx *ctx, ir_ref phi, ir_ref pos, ir_ref src) +{ + ir_insn *insn = &ctx->ir_base[phi]; + ir_ref *ops = insn->ops; + + IR_ASSERT(insn->op == IR_PHI); + IR_ASSERT(ctx->ir_base[insn->op1].op == IR_MERGE || ctx->ir_base[insn->op1].op == IR_LOOP_BEGIN); + IR_ASSERT(pos > 0 && pos < insn->inputs_count); + pos++; /* op1 is used for control */ + ops[pos] = src; +} + +void _ir_START(ir_ctx *ctx) +{ + IR_ASSERT(!ctx->control); + IR_ASSERT(ctx->insns_count == 1); + ctx->control = ir_emit0(ctx, IR_START); +} + +void _ir_ENTRY(ir_ctx *ctx, ir_ref src, ir_ref num) +{ + IR_ASSERT(!ctx->control); + /* fake control edge */ + IR_ASSERT((ir_op_flags[ctx->ir_base[src].op] & IR_OP_FLAG_TERMINATOR) + || ctx->ir_base[src].op == IR_END + || ctx->ir_base[src].op == IR_LOOP_END); /* return from a recursive call */ + ctx->control = ir_emit2(ctx, IR_ENTRY, src, num); +} + +void _ir_BEGIN(ir_ctx *ctx, ir_ref src) +{ + IR_ASSERT(!ctx->control); + if (src + && src + 1 == ctx->insns_count + && ctx->ir_base[src].op == IR_END) { + /* merge with the last END */ + ctx->control = ctx->ir_base[src].op1; + ctx->insns_count--; + } else { + ctx->control = ir_emit1(ctx, IR_BEGIN, src); + } +} + +ir_ref _ir_IF(ir_ctx *ctx, ir_ref condition) +{ + ir_ref if_ref; + + IR_ASSERT(ctx->control); + if_ref = ir_emit2(ctx, IR_IF, ctx->control, condition); + ctx->control = IR_UNUSED; + return if_ref; +} + +void _ir_IF_TRUE(ir_ctx *ctx, ir_ref if_ref) +{ + IR_ASSERT(!ctx->control); + IR_ASSERT(if_ref); + IR_ASSERT(ctx->ir_base[if_ref].op == IR_IF); + ctx->control = ir_emit1(ctx, IR_IF_TRUE, if_ref); +} + +void _ir_IF_TRUE_cold(ir_ctx *ctx, ir_ref if_ref) +{ + IR_ASSERT(!ctx->control); + IR_ASSERT(if_ref); + IR_ASSERT(ctx->ir_base[if_ref].op == IR_IF); + /* op2 is used as an indicator of low-probability branch */ + ctx->control = ir_emit2(ctx, IR_IF_TRUE, if_ref, 1); +} + +void _ir_IF_FALSE(ir_ctx *ctx, ir_ref if_ref) +{ + IR_ASSERT(!ctx->control); + IR_ASSERT(if_ref); + IR_ASSERT(ctx->ir_base[if_ref].op == IR_IF); + ctx->control = ir_emit1(ctx, IR_IF_FALSE, if_ref); +} + +void _ir_IF_FALSE_cold(ir_ctx *ctx, ir_ref if_ref) +{ + IR_ASSERT(!ctx->control); + IR_ASSERT(if_ref); + IR_ASSERT(ctx->ir_base[if_ref].op == IR_IF); + /* op2 is used as an indicator of low-probability branch */ + ctx->control = ir_emit2(ctx, IR_IF_FALSE, if_ref, 1); +} + +ir_ref _ir_END(ir_ctx *ctx) +{ + ir_ref ref; + + IR_ASSERT(ctx->control); + ref = ir_emit1(ctx, IR_END, ctx->control); + ctx->control = IR_UNUSED; + return ref; +} + +void _ir_MERGE_2(ir_ctx *ctx, ir_ref src1, ir_ref src2) +{ + IR_ASSERT(!ctx->control); + ctx->control = ir_emit2(ctx, IR_OPTX(IR_MERGE, IR_VOID, 2), src1, src2); +} + +void _ir_MERGE_N(ir_ctx *ctx, ir_ref n, ir_ref *inputs) +{ + IR_ASSERT(!ctx->control); + IR_ASSERT(n > 0); + if (n == 1) { + _ir_BEGIN(ctx, inputs[0]); + } else { + ir_ref *ops; + + ctx->control = ir_emit_N(ctx, IR_MERGE, n); + ops = ctx->ir_base[ctx->control].ops; + while (n) { + n--; + ops[n + 1] = inputs[n]; + } + } +} + +void _ir_MERGE_SET_OP(ir_ctx *ctx, ir_ref merge, ir_ref pos, ir_ref src) +{ + ir_insn *insn = &ctx->ir_base[merge]; + ir_ref *ops = insn->ops; + + IR_ASSERT(insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN); + IR_ASSERT(pos > 0 && pos <= insn->inputs_count); + ops[pos] = src; +} + +ir_ref _ir_END_LIST(ir_ctx *ctx, ir_ref list) +{ + ir_ref ref; + + IR_ASSERT(ctx->control); + IR_ASSERT(!list || ctx->ir_base[list].op == IR_END); + /* create a liked list of END nodes with the same destination through END.op2 */ + ref = ir_emit2(ctx, IR_END, ctx->control, list); + ctx->control = IR_UNUSED; + return ref; +} + +void _ir_MERGE_LIST(ir_ctx *ctx, ir_ref list) +{ + ir_ref ref = list; + + if (list != IR_UNUSED) { + uint32_t n = 0; + + IR_ASSERT(!ctx->control); + + /* count inputs count */ + do { + ir_insn *insn = &ctx->ir_base[ref]; + + IR_ASSERT(insn->op == IR_END); + ref = insn->op2; + n++; + } while (ref != IR_UNUSED); + + + /* create MERGE node */ + IR_ASSERT(n > 0); + if (n == 1) { + ctx->ir_base[list].op2 = IR_UNUSED; + _ir_BEGIN(ctx, list); + } else { + ctx->control = ir_emit_N(ctx, IR_MERGE, n); + ref = list; + while (n) { + ir_insn *insn = &ctx->ir_base[ref]; + + ir_set_op(ctx, ctx->control, n, ref); + ref = insn->op2; + insn->op2 = IR_UNUSED; + n--; + } + } + } +} + +ir_ref _ir_LOOP_BEGIN(ir_ctx *ctx, ir_ref src1) +{ + IR_ASSERT(!ctx->control); + ctx->control = ir_emit2(ctx, IR_OPTX(IR_LOOP_BEGIN, IR_VOID, 2), src1, IR_UNUSED); + return ctx->control; +} + +ir_ref _ir_LOOP_END(ir_ctx *ctx) +{ + ir_ref ref; + + IR_ASSERT(ctx->control); + ref = ir_emit1(ctx, IR_LOOP_END, ctx->control); + ctx->control = IR_UNUSED; + return ref; +} + +ir_ref _ir_CALL(ir_ctx *ctx, ir_type type, ir_ref func) +{ + IR_ASSERT(ctx->control); + return ctx->control = ir_emit2(ctx, IR_OPTX(IR_CALL, type, 2), ctx->control, func); +} + +ir_ref _ir_CALL_1(ir_ctx *ctx, ir_type type, ir_ref func, ir_ref arg1) +{ + IR_ASSERT(ctx->control); + return ctx->control = ir_emit3(ctx, IR_OPTX(IR_CALL, type, 3), ctx->control, func, arg1); +} + +ir_ref _ir_CALL_2(ir_ctx *ctx, ir_type type, ir_ref func, ir_ref arg1, ir_ref arg2) +{ + ir_ref call; + + IR_ASSERT(ctx->control); + call = ir_emit_N(ctx, IR_OPT(IR_CALL, type), 4); + ir_set_op(ctx, call, 1, ctx->control); + ir_set_op(ctx, call, 2, func); + ir_set_op(ctx, call, 3, arg1); + ir_set_op(ctx, call, 4, arg2); + ctx->control = call; + return call; +} + +ir_ref _ir_CALL_3(ir_ctx *ctx, ir_type type, ir_ref func, ir_ref arg1, ir_ref arg2, ir_ref arg3) +{ + ir_ref call; + + IR_ASSERT(ctx->control); + call = ir_emit_N(ctx, IR_OPT(IR_CALL, type), 5); + ir_set_op(ctx, call, 1, ctx->control); + ir_set_op(ctx, call, 2, func); + ir_set_op(ctx, call, 3, arg1); + ir_set_op(ctx, call, 4, arg2); + ir_set_op(ctx, call, 5, arg3); + ctx->control = call; + return call; +} + +ir_ref _ir_CALL_4(ir_ctx *ctx, ir_type type, ir_ref func, ir_ref arg1, ir_ref arg2, ir_ref arg3, ir_ref arg4) +{ + ir_ref call; + + IR_ASSERT(ctx->control); + call = ir_emit_N(ctx, IR_OPT(IR_CALL, type), 6); + ir_set_op(ctx, call, 1, ctx->control); + ir_set_op(ctx, call, 2, func); + ir_set_op(ctx, call, 3, arg1); + ir_set_op(ctx, call, 4, arg2); + ir_set_op(ctx, call, 5, arg3); + ir_set_op(ctx, call, 6, arg4); + ctx->control = call; + return call; +} + +ir_ref _ir_CALL_5(ir_ctx *ctx, ir_type type, ir_ref func, ir_ref arg1, ir_ref arg2, ir_ref arg3, ir_ref arg4, ir_ref arg5) +{ + ir_ref call; + + IR_ASSERT(ctx->control); + call = ir_emit_N(ctx, IR_OPT(IR_CALL, type), 7); + ir_set_op(ctx, call, 1, ctx->control); + ir_set_op(ctx, call, 2, func); + ir_set_op(ctx, call, 3, arg1); + ir_set_op(ctx, call, 4, arg2); + ir_set_op(ctx, call, 5, arg3); + ir_set_op(ctx, call, 6, arg4); + ir_set_op(ctx, call, 7, arg5); + ctx->control = call; + return call; +} + +ir_ref _ir_CALL_N(ir_ctx *ctx, ir_type type, ir_ref func, uint32_t count, ir_ref *args) +{ + ir_ref call; + uint32_t i; + + IR_ASSERT(ctx->control); + call = ir_emit_N(ctx, IR_OPT(IR_CALL, type), count + 2); + ir_set_op(ctx, call, 1, ctx->control); + ir_set_op(ctx, call, 2, func); + for (i = 0; i < count; i++) { + ir_set_op(ctx, call, i + 3, args[i]); + } + ctx->control = call; + return call; +} + +void _ir_UNREACHABLE(ir_ctx *ctx) +{ + IR_ASSERT(ctx->control); + ctx->control = ir_emit3(ctx, IR_UNREACHABLE, ctx->control, IR_UNUSED, ctx->ir_base[1].op1); + ctx->ir_base[1].op1 = ctx->control; + ctx->control = IR_UNUSED; +} + +void _ir_TAILCALL(ir_ctx *ctx, ir_type type, ir_ref func) +{ + IR_ASSERT(ctx->control); + if (ctx->ret_type == (ir_type)-1) { + ctx->ret_type = type; + } + IR_ASSERT(ctx->ret_type == type && "conflicting return type"); + ctx->control = ir_emit2(ctx, IR_OPTX(IR_TAILCALL, type, 2), ctx->control, func); + _ir_UNREACHABLE(ctx); +} + +void _ir_TAILCALL_1(ir_ctx *ctx, ir_type type, ir_ref func, ir_ref arg1) +{ + IR_ASSERT(ctx->control); + if (ctx->ret_type == (ir_type)-1) { + ctx->ret_type = type; + } + IR_ASSERT(ctx->ret_type == type && "conflicting return type"); + ctx->control = ir_emit3(ctx, IR_OPTX(IR_TAILCALL, type, 3), ctx->control, func, arg1); + _ir_UNREACHABLE(ctx); +} + +void _ir_TAILCALL_2(ir_ctx *ctx, ir_type type, ir_ref func, ir_ref arg1, ir_ref arg2) +{ + ir_ref call; + + IR_ASSERT(ctx->control); + if (ctx->ret_type == (ir_type)-1) { + ctx->ret_type = type; + } + IR_ASSERT(ctx->ret_type == type && "conflicting return type"); + call = ir_emit_N(ctx, IR_OPT(IR_TAILCALL, type), 4); + ir_set_op(ctx, call, 1, ctx->control); + ir_set_op(ctx, call, 2, func); + ir_set_op(ctx, call, 3, arg1); + ir_set_op(ctx, call, 4, arg2); + ctx->control = call; + _ir_UNREACHABLE(ctx); +} + +void _ir_TAILCALL_3(ir_ctx *ctx, ir_type type, ir_ref func, ir_ref arg1, ir_ref arg2, ir_ref arg3) +{ + ir_ref call; + + IR_ASSERT(ctx->control); + if (ctx->ret_type == (ir_type)-1) { + ctx->ret_type = type; + } + IR_ASSERT(ctx->ret_type == type && "conflicting return type"); + call = ir_emit_N(ctx, IR_OPT(IR_TAILCALL, type), 5); + ir_set_op(ctx, call, 1, ctx->control); + ir_set_op(ctx, call, 2, func); + ir_set_op(ctx, call, 3, arg1); + ir_set_op(ctx, call, 4, arg2); + ir_set_op(ctx, call, 5, arg3); + ctx->control = call; + _ir_UNREACHABLE(ctx); +} + +void _ir_TAILCALL_4(ir_ctx *ctx, ir_type type, ir_ref func, ir_ref arg1, ir_ref arg2, ir_ref arg3, ir_ref arg4) +{ + ir_ref call; + + IR_ASSERT(ctx->control); + if (ctx->ret_type == (ir_type)-1) { + ctx->ret_type = type; + } + IR_ASSERT(ctx->ret_type == type && "conflicting return type"); + call = ir_emit_N(ctx, IR_OPT(IR_TAILCALL, type), 6); + ir_set_op(ctx, call, 1, ctx->control); + ir_set_op(ctx, call, 2, func); + ir_set_op(ctx, call, 3, arg1); + ir_set_op(ctx, call, 4, arg2); + ir_set_op(ctx, call, 5, arg3); + ir_set_op(ctx, call, 6, arg4); + ctx->control = call; + _ir_UNREACHABLE(ctx); +} + +void _ir_TAILCALL_5(ir_ctx *ctx, ir_type type, ir_ref func, ir_ref arg1, ir_ref arg2, ir_ref arg3, ir_ref arg4, ir_ref arg5) +{ + ir_ref call; + + IR_ASSERT(ctx->control); + if (ctx->ret_type == (ir_type)-1) { + ctx->ret_type = type; + } + IR_ASSERT(ctx->ret_type == type && "conflicting return type"); + call = ir_emit_N(ctx, IR_OPT(IR_TAILCALL, type), 7); + ir_set_op(ctx, call, 1, ctx->control); + ir_set_op(ctx, call, 2, func); + ir_set_op(ctx, call, 3, arg1); + ir_set_op(ctx, call, 4, arg2); + ir_set_op(ctx, call, 5, arg3); + ir_set_op(ctx, call, 6, arg4); + ir_set_op(ctx, call, 7, arg5); + ctx->control = call; + _ir_UNREACHABLE(ctx); +} + +void _ir_TAILCALL_N(ir_ctx *ctx, ir_type type, ir_ref func, uint32_t count, ir_ref *args) +{ + ir_ref call; + uint32_t i; + + IR_ASSERT(ctx->control); + if (ctx->ret_type == (ir_type)-1) { + ctx->ret_type = type; + } + IR_ASSERT(ctx->ret_type == type && "conflicting return type"); + call = ir_emit_N(ctx, IR_OPT(IR_TAILCALL, type), count + 2); + ir_set_op(ctx, call, 1, ctx->control); + ir_set_op(ctx, call, 2, func); + for (i = 0; i < count; i++) { + ir_set_op(ctx, call, i + 3, args[i]); + } + ctx->control = call; + _ir_UNREACHABLE(ctx); +} + +ir_ref _ir_SWITCH(ir_ctx *ctx, ir_ref val) +{ + ir_ref ref; + + IR_ASSERT(ctx->control); + ref = ir_emit2(ctx, IR_SWITCH, ctx->control, val); + ctx->control = IR_UNUSED; + return ref; +} + +void _ir_CASE_VAL(ir_ctx *ctx, ir_ref switch_ref, ir_ref val) +{ + IR_ASSERT(!ctx->control); + ctx->control = ir_emit2(ctx, IR_CASE_VAL, switch_ref, val); +} + +void _ir_CASE_DEFAULT(ir_ctx *ctx, ir_ref switch_ref) +{ + IR_ASSERT(!ctx->control); + ctx->control = ir_emit1(ctx, IR_CASE_DEFAULT, switch_ref); +} + +void _ir_RETURN(ir_ctx *ctx, ir_ref val) +{ + ir_type type = (val != IR_UNUSED) ? ctx->ir_base[val].type : IR_VOID; + + IR_ASSERT(ctx->control); + if (ctx->ret_type == (ir_type)-1) { + ctx->ret_type = type; + } + IR_ASSERT(ctx->ret_type == type && "conflicting return type"); + ctx->control = ir_emit3(ctx, IR_RETURN, ctx->control, val, ctx->ir_base[1].op1); + ctx->ir_base[1].op1 = ctx->control; + ctx->control = IR_UNUSED; +} + +void _ir_IJMP(ir_ctx *ctx, ir_ref addr) +{ + IR_ASSERT(ctx->control); + ctx->control = ir_emit3(ctx, IR_IJMP, ctx->control, addr, ctx->ir_base[1].op1); + ctx->ir_base[1].op1 = ctx->control; + ctx->control = IR_UNUSED; +} + +ir_ref _ir_ADD_OFFSET(ir_ctx *ctx, ir_ref addr, uintptr_t offset) +{ + if (offset) { + addr = ir_fold2(ctx, IR_OPT(IR_ADD, IR_ADDR), addr, ir_const_addr(ctx, offset)); + } + return addr; +} + +void _ir_GUARD(ir_ctx *ctx, ir_ref condition, ir_ref addr) +{ + IR_ASSERT(ctx->control); + if (condition == IR_TRUE) { + return; + } else { + ir_ref ref = ctx->control; + ir_insn *insn; + + while (ref > condition) { + insn = &ctx->ir_base[ref]; + if (insn->op == IR_GUARD) { + if (insn->op2 == condition) { + return; + } + } else if (insn->op == IR_GUARD_NOT) { + if (insn->op2 == condition) { + condition = IR_FALSE; + break; + } + } else if (insn->op >= IR_START) { + break; + } + ref = insn->op1; + } + } + if (ctx->snapshot_create) { + ctx->snapshot_create(ctx, addr); + } + ctx->control = ir_emit3(ctx, IR_GUARD, ctx->control, condition, addr); +} + +void _ir_GUARD_NOT(ir_ctx *ctx, ir_ref condition, ir_ref addr) +{ + IR_ASSERT(ctx->control); + if (condition == IR_FALSE) { + return; + } else { + ir_ref ref = ctx->control; + ir_insn *insn; + + while (ref > condition) { + insn = &ctx->ir_base[ref]; + if (insn->op == IR_GUARD_NOT) { + if (insn->op2 == condition) { + return; + } + } else if (insn->op == IR_GUARD) { + if (insn->op2 == condition) { + condition = IR_TRUE; + break; + } + } else if (insn->op >= IR_START) { + break; + } + ref = insn->op1; + } + } + if (ctx->snapshot_create) { + ctx->snapshot_create(ctx, addr); + } + ctx->control = ir_emit3(ctx, IR_GUARD_NOT, ctx->control, condition, addr); +} + +ir_ref _ir_SNAPSHOT(ir_ctx *ctx, ir_ref n) +{ + ir_ref snapshot; + + IR_ASSERT(ctx->control); + snapshot = ir_emit_N(ctx, IR_SNAPSHOT, 1 + n); /* op1 is used for control */ + ctx->ir_base[snapshot].op1 = ctx->control; + ctx->control = snapshot; + return snapshot; +} + +void _ir_SNAPSHOT_SET_OP(ir_ctx *ctx, ir_ref snapshot, ir_ref pos, ir_ref val) +{ + ir_insn *insn = &ctx->ir_base[snapshot]; + ir_ref *ops = insn->ops; + + IR_ASSERT(val < snapshot); + IR_ASSERT(insn->op == IR_SNAPSHOT); + pos++; /* op1 is used for control */ + IR_ASSERT(pos > 1 && pos <= insn->inputs_count); + ops[pos] = val; +} + +ir_ref _ir_EXITCALL(ir_ctx *ctx, ir_ref func) +{ + IR_ASSERT(ctx->control); + return ctx->control = ir_emit2(ctx, IR_OPT(IR_EXITCALL, IR_I32), ctx->control, func); +} + +ir_ref _ir_ALLOCA(ir_ctx *ctx, ir_ref size) +{ + IR_ASSERT(ctx->control); + return ctx->control = ir_emit2(ctx, IR_OPT(IR_ALLOCA, IR_ADDR), ctx->control, size); +} + +void _ir_AFREE(ir_ctx *ctx, ir_ref size) +{ + IR_ASSERT(ctx->control); + ctx->control = ir_emit2(ctx, IR_AFREE, ctx->control, size); +} + +ir_ref _ir_VLOAD(ir_ctx *ctx, ir_type type, ir_ref var) +{ + IR_ASSERT(ctx->control); + return ctx->control = ir_emit2(ctx, IR_OPT(IR_VLOAD, type), ctx->control, var); +} + +void _ir_VSTORE(ir_ctx *ctx, ir_ref var, ir_ref val) +{ + IR_ASSERT(ctx->control); + ctx->control = ir_emit3(ctx, IR_VSTORE, ctx->control, var, val); +} + +ir_ref _ir_TLS(ir_ctx *ctx, ir_ref index, ir_ref offset) +{ + IR_ASSERT(ctx->control); + return ctx->control = ir_emit3(ctx, IR_OPT(IR_TLS, IR_ADDR), ctx->control, index, offset); +} + +ir_ref _ir_RLOAD(ir_ctx *ctx, ir_type type, ir_ref reg) +{ + IR_ASSERT(ctx->control); + return ctx->control = ir_emit2(ctx, IR_OPT(IR_RLOAD, type), ctx->control, reg); +} + +void _ir_RSTORE(ir_ctx *ctx, ir_ref reg, ir_ref val) +{ + IR_ASSERT(ctx->control); + ctx->control = ir_emit3(ctx, IR_RSTORE, ctx->control, val, reg); +} + +ir_ref _ir_LOAD(ir_ctx *ctx, ir_type type, ir_ref addr) +{ + ir_ref ref = ir_find_aliasing_load(ctx, ctx->control, type, addr); + + IR_ASSERT(ctx->control); + if (!ref) { + ctx->control = ref = ir_emit2(ctx, IR_OPT(IR_LOAD, type), ctx->control, addr); + } + return ref; +} + +void _ir_STORE(ir_ctx *ctx, ir_ref addr, ir_ref val) +{ + ir_ref limit = (addr > 0) ? addr : 1; + ir_ref ref = ctx->control; + ir_ref prev = IR_UNUSED; + ir_insn *insn; + ir_type type = ctx->ir_base[val].type; + ir_type type2; + bool guarded = 0; + + IR_ASSERT(ctx->control); + while (ref > limit) { + insn = &ctx->ir_base[ref]; + if (insn->op == IR_STORE) { + if (insn->op2 == addr) { + if (ctx->ir_base[insn->op3].type == type) { + if (insn->op3 == val) { + return; + } else { + if (!guarded) { + if (prev) { + ctx->ir_base[prev].op1 = insn->op1; + } else { + ctx->control = insn->op1; + } + insn->optx = IR_NOP; + insn->op1 = IR_NOP; + insn->op2 = IR_NOP; + insn->op3 = IR_NOP; + } + break; + } + } else { + break; + } + } else { + type2 = ctx->ir_base[insn->op3].type; + goto check_aliasing; + } + } else if (insn->op == IR_LOAD) { + if (insn->op2 == addr) { + break; + } + type2 = insn->type; +check_aliasing: + if (ir_check_partial_aliasing(ctx, addr, insn->op2, type, type2) != IR_NO_ALIAS) { + break; + } + } else if (insn->op == IR_GUARD || insn->op == IR_GUARD_NOT) { + guarded = 1; + } else if (insn->op >= IR_START || insn->op == IR_CALL) { + break; + } + prev = ref; + ref = insn->op1; + } + ctx->control = ir_emit3(ctx, IR_STORE, ctx->control, addr, val); +} diff --git a/ext/opcache/jit/ir/ir.h b/ext/opcache/jit/ir/ir.h new file mode 100644 index 0000000000000..18ac9e7a33f6e --- /dev/null +++ b/ext/opcache/jit/ir/ir.h @@ -0,0 +1,924 @@ +/* + * IR - Lightweight JIT Compilation Framework + * (Public API) + * Copyright (C) 2022 Zend by Perforce. + * Authors: Dmitry Stogov + */ + +#ifndef IR_H +#define IR_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include +#include +#include + +#define IR_VERSION "0.0.1" + +#ifdef _WIN32 +/* TODO Handle ARM, too. */ +# if defined(_M_X64) +# define __SIZEOF_SIZE_T__ 8 +# elif defined(_M_IX86) +# define __SIZEOF_SIZE_T__ 4 +# endif +/* Only supported is little endian for any arch on Windows, + so just fake the same for all. */ +# define __ORDER_LITTLE_ENDIAN__ 1 +# define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__ +# ifndef __has_builtin +# define __has_builtin(arg) (0) +# endif +#endif + +#if defined(IR_TARGET_X86) +# define IR_TARGET "x86" +#elif defined(IR_TARGET_X64) +# ifdef _WIN64 +# define IR_TARGET "Windows-x86_64" /* 64-bit Windows use different ABI and calling convention */ +# else +# define IR_TARGET "x86_64" +# endif +#elif defined(IR_TARGET_AARCH64) +# define IR_TARGET "aarch64" +#else +# error "Unknown IR target" +#endif + +#if defined(__SIZEOF_SIZE_T__) +# if __SIZEOF_SIZE_T__ == 8 +# define IR_64 1 +# elif __SIZEOF_SIZE_T__ != 4 +# error "Unknown addr size" +# endif +#else +# error "Unknown addr size" +#endif + +#if defined(__BYTE_ORDER__) +# if defined(__ORDER_LITTLE_ENDIAN__) +# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +# define IR_STRUCT_LOHI(lo, hi) struct {lo; hi;} +# endif +# endif +# if defined(__ORDER_BIG_ENDIAN__) +# if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +# define IR_STRUCT_LOHI(lo, hi) struct {hi; lo;} +# endif +# endif +#endif +#ifndef IR_STRUCT_LOHI +# error "Unknown byte order" +#endif + +#ifdef __has_attribute +# if __has_attribute(always_inline) +# define IR_ALWAYS_INLINE static inline __attribute__((always_inline)) +# endif +# if __has_attribute(noinline) +# define IR_NEVER_INLINE __attribute__((noinline)) +# endif +#else +# define __has_attribute(x) 0 +#endif + +#ifndef IR_ALWAYS_INLINE +# define IR_ALWAYS_INLINE static inline +#endif +#ifndef IR_NEVER_INLINE +# define IR_NEVER_INLINE +#endif + +#ifdef IR_PHP +# include "ir_php.h" +#endif + +/* IR Type flags (low 4 bits are used for type size) */ +#define IR_TYPE_SIGNED (1<<4) +#define IR_TYPE_UNSIGNED (1<<5) +#define IR_TYPE_FP (1<<6) +#define IR_TYPE_SPECIAL (1<<7) +#define IR_TYPE_BOOL (IR_TYPE_SPECIAL|IR_TYPE_UNSIGNED) +#define IR_TYPE_ADDR (IR_TYPE_SPECIAL|IR_TYPE_UNSIGNED) +#define IR_TYPE_CHAR (IR_TYPE_SPECIAL|IR_TYPE_SIGNED) + +/* List of IR types */ +#define IR_TYPES(_) \ + _(BOOL, bool, b, IR_TYPE_BOOL) \ + _(U8, uint8_t, u8, IR_TYPE_UNSIGNED) \ + _(U16, uint16_t, u16, IR_TYPE_UNSIGNED) \ + _(U32, uint32_t, u32, IR_TYPE_UNSIGNED) \ + _(U64, uint64_t, u64, IR_TYPE_UNSIGNED) \ + _(ADDR, uintptr_t, addr, IR_TYPE_ADDR) \ + _(CHAR, char, c, IR_TYPE_CHAR) \ + _(I8, int8_t, i8, IR_TYPE_SIGNED) \ + _(I16, int16_t, i16, IR_TYPE_SIGNED) \ + _(I32, int32_t, i32, IR_TYPE_SIGNED) \ + _(I64, int64_t, i64, IR_TYPE_SIGNED) \ + _(DOUBLE, double, d, IR_TYPE_FP) \ + _(FLOAT, float, f, IR_TYPE_FP) \ + +#define IR_IS_TYPE_UNSIGNED(t) ((t) < IR_CHAR) +#define IR_IS_TYPE_SIGNED(t) ((t) >= IR_CHAR && (t) < IR_DOUBLE) +#define IR_IS_TYPE_INT(t) ((t) < IR_DOUBLE) +#define IR_IS_TYPE_FP(t) ((t) >= IR_DOUBLE) + +#define IR_TYPE_ENUM(name, type, field, flags) IR_ ## name, + +typedef enum _ir_type { + IR_VOID, + IR_TYPES(IR_TYPE_ENUM) + IR_LAST_TYPE +} ir_type; + +/* List of IR opcodes + * ================== + * + * Each instruction is described by a type (opcode, flags, op1_type, op2_type, op3_type) + * + * flags + * ----- + * v - void + * d - data IR_OP_FLAG_DATA + * r - ref IR_OP_FLAG_DATA alias + * p - pinned IR_OP_FLAG_DATA + IR_OP_FLAG_PINNED + * c - control IR_OP_FLAG_CONTROL + * S - control IR_OP_FLAG_CONTROL + IR_OP_FLAG_BB_START + * E - control IR_OP_FLAG_CONTROL + IR_OP_FLAG_BB_END + * T - control IR_OP_FLAG_CONTROL + IR_OP_FLAG_BB_END + IR_OP_FLAG_TERMINATOR + * l - load IR_OP_FLAG_MEM + IR_OP_FLAG_MEM_LOAD + * s - store IR_OP_FLAG_MEM + IR_OP_FLAG_STORE + * x - call IR_OP_FLAG_MEM + IR_OP_FLAG_CALL + * a - alloc IR_OP_FLAG_MEM + IR_OP_FLAG_ALLOC + * 0-3 - number of input edges + * N - number of arguments is defined in the insn->inputs_count (MERGE, PHI, CALL) + * X1-X3 - number of extra data ops + * C - commutative operation ("d2C" => IR_OP_FLAG_DATA + IR_OP_FLAG_COMMUTATIVE) + * + * operand types + * ------------- + * ___ - unused + * def - reference to a definition op (data-flow use-def dependency edge) + * ref - memory reference (data-flow use-def dependency edge) + * var - variable reference (data-flow use-def dependency edge) + * arg - argument reference CALL/TAILCALL/CARG->CARG + * src - reference to a previous control region (IF, IF_TRUE, IF_FALSE, MERGE, LOOP_BEGIN, LOOP_END, RETURN) + * reg - data-control dependency on region (PHI, VAR, PARAM) + * ret - reference to a previous RETURN instruction (RETURN) + * str - string: variable/argument name (VAR, PARAM, CALL, TAILCALL) + * num - number: argument number (PARAM) + * prb - branch probability 1-99 (0 - unspecified): (IF_TRUE, IF_FALSE, CASE_VAL, CASE_DEFAULT) + * opt - optional number + * + * The order of IR opcodes is carefully selected for efficient folding. + * - foldable instruction go first + * - NOP is never used (code 0 is used as ANY pattern) + * - CONST is the most often used instruction (encode with 1 bit) + * - equality inversion: EQ <-> NE => op =^ 1 + * - comparison inversion: [U]LT <-> [U]GT, [U]LE <-> [U]GE => op =^ 3 + */ + +#define IR_OPS(_) \ + /* special op (must be the first !!!) */ \ + _(NOP, v, ___, ___, ___) /* empty instruction */ \ + \ + /* constants reference */ \ + _(C_BOOL, r0, ___, ___, ___) /* constant */ \ + _(C_U8, r0, ___, ___, ___) /* constant */ \ + _(C_U16, r0, ___, ___, ___) /* constant */ \ + _(C_U32, r0, ___, ___, ___) /* constant */ \ + _(C_U64, r0, ___, ___, ___) /* constant */ \ + _(C_ADDR, r0, ___, ___, ___) /* constant */ \ + _(C_CHAR, r0, ___, ___, ___) /* constant */ \ + _(C_I8, r0, ___, ___, ___) /* constant */ \ + _(C_I16, r0, ___, ___, ___) /* constant */ \ + _(C_I32, r0, ___, ___, ___) /* constant */ \ + _(C_I64, r0, ___, ___, ___) /* constant */ \ + _(C_DOUBLE, r0, ___, ___, ___) /* constant */ \ + _(C_FLOAT, r0, ___, ___, ___) /* constant */ \ + \ + /* equality ops */ \ + _(EQ, d2C, def, def, ___) /* equal */ \ + _(NE, d2C, def, def, ___) /* not equal */ \ + \ + /* comparison ops (order matters, LT must be a modulo of 4 !!!) */ \ + _(LT, d2, def, def, ___) /* less */ \ + _(GE, d2, def, def, ___) /* greater or equal */ \ + _(LE, d2, def, def, ___) /* less or equal */ \ + _(GT, d2, def, def, ___) /* greater */ \ + _(ULT, d2, def, def, ___) /* unsigned less */ \ + _(UGE, d2, def, def, ___) /* unsigned greater or equal */ \ + _(ULE, d2, def, def, ___) /* unsigned less or equal */ \ + _(UGT, d2, def, def, ___) /* unsigned greater */ \ + \ + /* arithmetic ops */ \ + _(ADD, d2C, def, def, ___) /* addition */ \ + _(SUB, d2, def, def, ___) /* subtraction (must be ADD+1) */ \ + _(MUL, d2C, def, def, ___) /* multiplication */ \ + _(DIV, d2, def, def, ___) /* division */ \ + _(MOD, d2, def, def, ___) /* modulo */ \ + _(NEG, d1, def, ___, ___) /* change sign */ \ + _(ABS, d1, def, ___, ___) /* absolute value */ \ + /* (LDEXP, MIN, MAX, FPMATH) */ \ + \ + /* type conversion ops */ \ + _(SEXT, d1, def, ___, ___) /* sign extension */ \ + _(ZEXT, d1, def, ___, ___) /* zero extension */ \ + _(TRUNC, d1, def, ___, ___) /* truncates to int type */ \ + _(BITCAST, d1, def, ___, ___) /* binary representation */ \ + _(INT2FP, d1, def, ___, ___) /* int to float conversion */ \ + _(FP2INT, d1, def, ___, ___) /* float to int conversion */ \ + _(FP2FP, d1, def, ___, ___) /* float to float conversion */ \ + \ + /* overflow-check */ \ + _(ADD_OV, d2C, def, def, ___) /* addition */ \ + _(SUB_OV, d2, def, def, ___) /* subtraction */ \ + _(MUL_OV, d2C, def, def, ___) /* multiplication */ \ + _(OVERFLOW, d1, def, ___, ___) /* overflow check add/sub/mul */ \ + \ + /* bitwise and shift ops */ \ + _(NOT, d1, def, ___, ___) /* bitwise NOT */ \ + _(OR, d2C, def, def, ___) /* bitwise OR */ \ + _(AND, d2C, def, def, ___) /* bitwise AND */ \ + _(XOR, d2C, def, def, ___) /* bitwise XOR */ \ + _(SHL, d2, def, def, ___) /* logic shift left */ \ + _(SHR, d2, def, def, ___) /* logic shift right */ \ + _(SAR, d2, def, def, ___) /* arithmetic shift right */ \ + _(ROL, d2, def, def, ___) /* rotate left */ \ + _(ROR, d2, def, def, ___) /* rotate right */ \ + _(BSWAP, d1, def, ___, ___) /* byte swap */ \ + \ + /* branch-less conditional ops */ \ + _(MIN, d2C, def, def, ___) /* min(op1, op2) */ \ + _(MAX, d2C, def, def, ___) /* max(op1, op2) */ \ + _(COND, d3, def, def, def) /* op1 ? op2 : op3 */ \ + \ + /* data-flow and miscellaneous ops */ \ + _(PHI, pN, reg, def, def) /* SSA Phi function */ \ + _(COPY, d1X1, def, opt, ___) /* COPY (last foldable op) */ \ + _(PI, p2, reg, def, ___) /* e-SSA Pi constraint ??? */ \ + /* (USE, RENAME) */ \ + \ + /* data ops */ \ + _(PARAM, p1X2, reg, str, num) /* incoming parameter proj. */ \ + _(VAR, p1X1, reg, str, ___) /* local variable */ \ + _(FUNC_ADDR, r0, ___, ___, ___) /* constant func ref */ \ + _(FUNC, r0, ___, ___, ___) /* constant func ref */ \ + _(SYM, r0, ___, ___, ___) /* constant symbol ref */ \ + _(STR, r0, ___, ___, ___) /* constant str ref */ \ + \ + /* call ops */ \ + _(CALL, xN, src, def, def) /* CALL(src, func, args...) */ \ + _(TAILCALL, xN, src, def, def) /* CALL+RETURN */ \ + \ + /* memory reference and load/store ops */ \ + _(ALLOCA, a2, src, def, ___) /* alloca(def) */ \ + _(AFREE, a2, src, def, ___) /* revert alloca(def) */ \ + _(VADDR, d1, var, ___, ___) /* load address of local var */ \ + _(VLOAD, l2, src, var, ___) /* load value of local var */ \ + _(VSTORE, s3, src, var, def) /* store value to local var */ \ + _(RLOAD, l1X2, src, num, opt) /* load value from register */ \ + _(RSTORE, s2X1, src, def, num) /* store value into register */ \ + _(LOAD, l2, src, ref, ___) /* load from memory */ \ + _(STORE, s3, src, ref, def) /* store to memory */ \ + _(TLS, l1X2, src, num, num) /* thread local variable */ \ + _(TRAP, x1, src, ___, ___) /* DebugBreak */ \ + /* memory reference ops (A, H, U, S, TMP, STR, NEW, X, V) ??? */ \ + \ + /* guards */ \ + _(GUARD, c3, src, def, def) /* IF without second successor */ \ + _(GUARD_NOT , c3, src, def, def) /* IF without second successor */ \ + \ + /* deoptimization */ \ + _(SNAPSHOT, xN, src, def, def) /* SNAPSHOT(src, args...) */ \ + \ + /* control-flow nodes */ \ + _(START, S0X1, ret, ___, ___) /* function start */ \ + _(ENTRY, S1X1, src, num, ___) /* entry with a fake src edge */ \ + _(BEGIN, S1, src, ___, ___) /* block start */ \ + _(IF_TRUE, S1X1, src, prb, ___) /* IF TRUE proj. */ \ + _(IF_FALSE, S1X1, src, prb, ___) /* IF FALSE proj. */ \ + _(CASE_VAL, S2X1, src, def, prb) /* switch proj. */ \ + _(CASE_DEFAULT, S1X1, src, prb, ___) /* switch proj. */ \ + _(MERGE, SN, src, src, src) /* control merge */ \ + _(LOOP_BEGIN, SN, src, src, src) /* loop start */ \ + _(END, E1, src, ___, ___) /* block end */ \ + _(LOOP_END, E1, src, ___, ___) /* loop end */ \ + _(IF, E2, src, def, ___) /* conditional control split */ \ + _(SWITCH, E2, src, def, ___) /* multi-way control split */ \ + _(RETURN, T2X1, src, def, ret) /* function return */ \ + _(IJMP, T2X1, src, def, ret) /* computed goto */ \ + _(UNREACHABLE, T1X2, src, ___, ret) /* unreachable (tailcall, etc) */ \ + \ + /* deoptimization helper */ \ + _(EXITCALL, x2, src, def, ___) /* save CPU regs and call op2 */ \ + + +#define IR_OP_ENUM(name, flags, op1, op2, op3) IR_ ## name, + +typedef enum _ir_op { + IR_OPS(IR_OP_ENUM) +#ifdef IR_PHP + IR_PHP_OPS(IR_OP_ENUM) +#endif + IR_LAST_OP +} ir_op; + +/* IR Opcode and Type Union */ +#define IR_OPT_OP_MASK 0x00ff +#define IR_OPT_TYPE_MASK 0xff00 +#define IR_OPT_TYPE_SHIFT 8 +#define IR_OPT_INPUTS_SHIFT 16 + +#define IR_OPT(op, type) ((uint16_t)(op) | ((uint16_t)(type) << IR_OPT_TYPE_SHIFT)) +#define IR_OPTX(op, type, n) ((uint32_t)(op) | ((uint32_t)(type) << IR_OPT_TYPE_SHIFT) | ((uint32_t)(n) << IR_OPT_INPUTS_SHIFT)) +#define IR_OPT_TYPE(opt) (((opt) & IR_OPT_TYPE_MASK) >> IR_OPT_TYPE_SHIFT) + +/* IR References */ +typedef int32_t ir_ref; + +#define IR_IS_CONST_REF(ref) ((ref) < 0) + +/* IR Constant Value */ +#define IR_UNUSED 0 +#define IR_NULL (-1) +#define IR_FALSE (-2) +#define IR_TRUE (-3) +#define IR_LAST_FOLDABLE_OP IR_COPY + +#define IR_CONSTS_LIMIT_MIN (-(IR_TRUE - 1)) +#define IR_INSNS_LIMIT_MIN (IR_UNUSED + 1) + + +#ifndef IR_64 +# define ADDR_MEMBER uintptr_t addr; +#else +# define ADDR_MEMBER +#endif +typedef union _ir_val { + double d; + uint64_t u64; + int64_t i64; +#ifdef IR_64 + uintptr_t addr; +#endif + IR_STRUCT_LOHI( + union { + uint32_t u32; + int32_t i32; + float f; + ADDR_MEMBER + IR_STRUCT_LOHI( + union { + uint16_t u16; + int16_t i16; + IR_STRUCT_LOHI( + union { + uint8_t u8; + int8_t i8; + bool b; + char c; + }, + uint8_t u8_hi + ); + }, + uint16_t u16_hi + ); + }, + uint32_t u32_hi + ); +} ir_val; +#undef ADDR_MEMBER + +/* IR constant flags */ +#define IR_CONST_EMIT (1<<0) +#define IR_CONST_FASTCALL_FUNC (1<<1) +#define IR_CONST_VARARG_FUNC (1<<2) + +/* IR Instruction */ +typedef struct _ir_insn { + IR_STRUCT_LOHI( + union { + IR_STRUCT_LOHI( + union { + IR_STRUCT_LOHI( + uint8_t op, + uint8_t type + ); + uint16_t opt; + }, + union { + uint16_t inputs_count; /* number of input control edges for MERGE, PHI, CALL, TAILCALL */ + uint16_t prev_insn_offset; /* 16-bit backward offset from current instruction for CSE */ + uint16_t const_flags; /* flag to emit constant in rodat section */ + } + ); + uint32_t optx; + ir_ref ops[1]; + }, + union { + ir_ref op1; + ir_ref prev_const; + } + ); + union { + IR_STRUCT_LOHI( + ir_ref op2, + ir_ref op3 + ); + ir_val val; + }; +} ir_insn; + +/* IR Hash Tables API (private) */ +typedef struct _ir_hashtab ir_hashtab; + +/* IR String Tables API (implementation in ir_strtab.c) */ +typedef struct _ir_strtab { + void *data; + uint32_t mask; + uint32_t size; + uint32_t count; + uint32_t pos; + char *buf; + uint32_t buf_size; + uint32_t buf_top; +} ir_strtab; + +#define ir_strtab_count(strtab) (strtab)->count + +typedef void (*ir_strtab_apply_t)(const char *str, uint32_t len, ir_ref val); + +void ir_strtab_init(ir_strtab *strtab, uint32_t count, uint32_t buf_size); +ir_ref ir_strtab_lookup(ir_strtab *strtab, const char *str, uint32_t len, ir_ref val); +ir_ref ir_strtab_find(const ir_strtab *strtab, const char *str, uint32_t len); +ir_ref ir_strtab_update(ir_strtab *strtab, const char *str, uint32_t len, ir_ref val); +const char *ir_strtab_str(const ir_strtab *strtab, ir_ref idx); +void ir_strtab_apply(const ir_strtab *strtab, ir_strtab_apply_t func); +void ir_strtab_free(ir_strtab *strtab); + +/* IR Context Flags */ +#define IR_FUNCTION (1<<0) /* Generate a function. */ +#define IR_FASTCALL_FUNC (1<<1) /* Generate a function with fastcall calling convention, x86 32-bit only. */ +#define IR_VARARG_FUNC (1<<2) +#define IR_STATIC (1<<3) +#define IR_EXTERN (1<<4) +#define IR_CONST (1<<5) + +#define IR_SKIP_PROLOGUE (1<<6) /* Don't generate function prologue. */ +#define IR_USE_FRAME_POINTER (1<<7) +#define IR_PREALLOCATED_STACK (1<<8) +#define IR_HAS_ALLOCA (1<<9) +#define IR_HAS_CALLS (1<<10) +#define IR_NO_STACK_COMBINE (1<<11) +#define IR_START_BR_TARGET (1<<12) +#define IR_ENTRY_BR_TARGET (1<<13) +#define IR_GEN_ENDBR (1<<14) +#define IR_MERGE_EMPTY_ENTRIES (1<<15) + +#define IR_CFG_HAS_LOOPS (1<<16) +#define IR_IRREDUCIBLE_CFG (1<<17) + +#define IR_OPT_FOLDING (1<<18) +#define IR_OPT_CFG (1<<19) /* merge BBs, by remove END->BEGIN nodes during CFG construction */ +#define IR_OPT_CODEGEN (1<<20) +#define IR_OPT_IN_SCCP (1<<21) +#define IR_LINEAR (1<<22) +#define IR_GEN_NATIVE (1<<23) +#define IR_GEN_CODE (1<<24) /* C or LLVM */ + +/* Temporary: SCCP -> CFG */ +#define IR_SCCP_DONE (1<<25) + +/* Temporary: Dominators -> Loops */ +#define IR_NO_LOOPS (1<<25) + +/* Temporary: Live Ranges */ +#define IR_LR_HAVE_DESSA_MOVES (1<<25) + +/* Temporary: Register Allocator */ +#define IR_RA_HAVE_SPLITS (1<<25) +#define IR_RA_HAVE_SPILLS (1<<26) + +/* debug related */ +#ifdef IR_DEBUG +# define IR_DEBUG_SCCP (1<<27) +# define IR_DEBUG_GCM (1<<28) +# define IR_DEBUG_SCHEDULE (1<<29) +# define IR_DEBUG_RA (1<<30) +#endif + +typedef struct _ir_ctx ir_ctx; +typedef struct _ir_use_list ir_use_list; +typedef struct _ir_block ir_block; +typedef struct _ir_arena ir_arena; +typedef struct _ir_live_interval ir_live_interval; +typedef struct _ir_live_range ir_live_range; +typedef struct _ir_loader ir_loader; +typedef int8_t ir_regs[4]; + +typedef void (*ir_snapshot_create_t)(ir_ctx *ctx, ir_ref addr); + +#if defined(IR_TARGET_AARCH64) +typedef const void *(*ir_get_exit_addr_t)(uint32_t exit_num); +typedef const void *(*ir_get_veneer_t)(ir_ctx *ctx, const void *addr); +typedef bool (*ir_set_veneer_t)(ir_ctx *ctx, const void *addr, const void *veneer); +#endif + +struct _ir_ctx { + ir_insn *ir_base; /* two directional array - instructions grow down, constants grow up */ + ir_ref insns_count; /* number of instructions stored in instructions buffer */ + ir_ref insns_limit; /* size of allocated instructions buffer (it's extended when overflow) */ + ir_ref consts_count; /* number of constants stored in constants buffer */ + ir_ref consts_limit; /* size of allocated constants buffer (it's extended when overflow) */ + uint32_t flags; /* IR context flags (see IR_* defines above) */ + ir_type ret_type; /* Function return type */ + uint32_t mflags; /* CPU specific flags (see IR_X86_... macros below) */ + int32_t status; /* non-zero error code (see IR_ERROR_... macros), app may use negative codes */ + ir_ref fold_cse_limit; /* CSE finds identical insns backward from "insn_count" to "fold_cse_limit" */ + ir_insn fold_insn; /* temporary storage for folding engine */ + ir_hashtab *binding; + ir_use_list *use_lists; /* def->use lists for each instruction */ + ir_ref *use_edges; /* the actual uses: use = ctx->use_edges[ctx->use_lists[def].refs + n] */ + ir_ref use_edges_count; /* number of elements in use_edges[] array */ + uint32_t cfg_blocks_count; /* number of elements in cfg_blocks[] array */ + uint32_t cfg_edges_count; /* number of elements in cfg_edges[] array */ + ir_block *cfg_blocks; /* list of basic blocks (starts from 1) */ + uint32_t *cfg_edges; /* the actual basic blocks predecessors and successors edges */ + uint32_t *cfg_map; /* map of instructions to basic block number */ + uint32_t *rules; /* array of target specific code-generation rules (for each instruction) */ + uint32_t *vregs; + ir_ref vregs_count; + int32_t spill_base; /* base register for special spill area (e.g. PHP VM frame pointer) */ + uint64_t fixed_regset; /* fixed registers, excluded for regular register allocation */ + int32_t fixed_stack_red_zone; /* reusable stack allocated by caller (default 0) */ + int32_t fixed_stack_frame_size; /* fixed stack allocated by generated code for spills and registers save/restore */ + int32_t fixed_call_stack_size; /* fixed preallocated stack for parameter passing (default 0) */ + uint64_t fixed_save_regset; /* registers that always saved/restored in prologue/epilogue */ + ir_live_interval **live_intervals; + ir_arena *arena; + ir_live_range *unused_ranges; + ir_regs *regs; + ir_ref *prev_ref; + union { + void *data; + ir_ref control; /* used by IR construction API (see ir_builder.h) */ + ir_ref bb_start; /* used by target CPU instruction matcher */ + ir_ref vars; /* list of VARs (used by register allocator) */ + }; + ir_snapshot_create_t snapshot_create; + int32_t stack_frame_alignment; + int32_t stack_frame_size; /* spill stack frame size (used by register allocator and code generator) */ + int32_t call_stack_size; /* stack for parameter passing (used by register allocator and code generator) */ + uint64_t used_preserved_regs; +#ifdef IR_TARGET_X86 + int32_t param_stack_size; + int32_t ret_slot; +#endif + uint32_t rodata_offset; + uint32_t jmp_table_offset; + uint32_t entries_count; + uint32_t *entries; /* array of ENTRY blocks */ + void *osr_entry_loads; + void *code_buffer; + size_t code_buffer_size; +#if defined(IR_TARGET_AARCH64) + int32_t deoptimization_exits; + int32_t veneers_size; + uint32_t code_size; + ir_get_exit_addr_t get_exit_addr; + ir_get_veneer_t get_veneer; + ir_set_veneer_t set_veneer; +#endif + ir_loader *loader; + ir_strtab strtab; + ir_ref prev_insn_chain[IR_LAST_FOLDABLE_OP + 1]; + ir_ref prev_const_chain[IR_LAST_TYPE]; +}; + +/* Basic IR Construction API (implementation in ir.c) */ +void ir_init(ir_ctx *ctx, uint32_t flags, ir_ref consts_limit, ir_ref insns_limit); +void ir_free(ir_ctx *ctx); +void ir_truncate(ir_ctx *ctx); + +ir_ref ir_const(ir_ctx *ctx, ir_val val, uint8_t type); +ir_ref ir_const_i8(ir_ctx *ctx, int8_t c); +ir_ref ir_const_i16(ir_ctx *ctx, int16_t c); +ir_ref ir_const_i32(ir_ctx *ctx, int32_t c); +ir_ref ir_const_i64(ir_ctx *ctx, int64_t c); +ir_ref ir_const_u8(ir_ctx *ctx, uint8_t c); +ir_ref ir_const_u16(ir_ctx *ctx, uint16_t c); +ir_ref ir_const_u32(ir_ctx *ctx, uint32_t c); +ir_ref ir_const_u64(ir_ctx *ctx, uint64_t c); +ir_ref ir_const_bool(ir_ctx *ctx, bool c); +ir_ref ir_const_char(ir_ctx *ctx, char c); +ir_ref ir_const_float(ir_ctx *ctx, float c); +ir_ref ir_const_double(ir_ctx *ctx, double c); +ir_ref ir_const_addr(ir_ctx *ctx, uintptr_t c); +ir_ref ir_const_func_addr(ir_ctx *ctx, uintptr_t c, uint16_t flags); + +ir_ref ir_const_func(ir_ctx *ctx, ir_ref str, uint16_t flags); +ir_ref ir_const_sym(ir_ctx *ctx, ir_ref str); +ir_ref ir_const_str(ir_ctx *ctx, ir_ref str); + +ir_ref ir_unique_const_addr(ir_ctx *ctx, uintptr_t c); + +void ir_print_const(const ir_ctx *ctx, const ir_insn *insn, FILE *f, bool quoted); + +ir_ref ir_str(ir_ctx *ctx, const char *s); +ir_ref ir_strl(ir_ctx *ctx, const char *s, size_t len); +const char *ir_get_str(const ir_ctx *ctx, ir_ref idx); + +ir_ref ir_emit(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3); + +ir_ref ir_emit0(ir_ctx *ctx, uint32_t opt); +ir_ref ir_emit1(ir_ctx *ctx, uint32_t opt, ir_ref op1); +ir_ref ir_emit2(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2); +ir_ref ir_emit3(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3); + +ir_ref ir_emit_N(ir_ctx *ctx, uint32_t opt, int32_t count); +void ir_set_op(ir_ctx *ctx, ir_ref ref, int32_t n, ir_ref val); + +IR_ALWAYS_INLINE void ir_set_op1(ir_ctx *ctx, ir_ref ref, ir_ref val) +{ + ctx->ir_base[ref].op1 = val; +} + +IR_ALWAYS_INLINE void ir_set_op2(ir_ctx *ctx, ir_ref ref, ir_ref val) +{ + ctx->ir_base[ref].op2 = val; +} + +IR_ALWAYS_INLINE void ir_set_op3(ir_ctx *ctx, ir_ref ref, ir_ref val) +{ + ctx->ir_base[ref].op3 = val; +} + +IR_ALWAYS_INLINE ir_ref ir_insn_op(const ir_insn *insn, int32_t n) +{ + const ir_ref *p = insn->ops + n; + return *p; +} + +IR_ALWAYS_INLINE void ir_insn_set_op(ir_insn *insn, int32_t n, ir_ref val) +{ + ir_ref *p = insn->ops + n; + *p = val; +} + +ir_ref ir_fold(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3); + +ir_ref ir_fold0(ir_ctx *ctx, uint32_t opt); +ir_ref ir_fold1(ir_ctx *ctx, uint32_t opt, ir_ref op1); +ir_ref ir_fold2(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2); +ir_ref ir_fold3(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3); + +ir_ref ir_param(ir_ctx *ctx, ir_type type, ir_ref region, const char *name, int pos); +ir_ref ir_var(ir_ctx *ctx, ir_type type, ir_ref region, const char *name); +ir_ref ir_bind(ir_ctx *ctx, ir_ref var, ir_ref def); + +/* Def -> Use lists */ +void ir_build_def_use_lists(ir_ctx *ctx); + +/* CFG - Control Flow Graph (implementation in ir_cfg.c) */ +int ir_build_cfg(ir_ctx *ctx); +int ir_remove_unreachable_blocks(ir_ctx *ctx); +int ir_build_dominators_tree(ir_ctx *ctx); +int ir_find_loops(ir_ctx *ctx); +int ir_schedule_blocks(ir_ctx *ctx); +void ir_build_prev_refs(ir_ctx *ctx); + +/* SCCP - Sparse Conditional Constant Propagation (implementation in ir_sccp.c) */ +int ir_sccp(ir_ctx *ctx); + +/* GCM - Global Code Motion and scheduling (implementation in ir_gcm.c) */ +int ir_gcm(ir_ctx *ctx); +int ir_schedule(ir_ctx *ctx); + +/* Liveness & Register Allocation (implementation in ir_ra.c) */ +#define IR_REG_NONE -1 +#define IR_REG_SPILL_LOAD (1<<6) +#define IR_REG_SPILL_STORE (1<<6) +#define IR_REG_SPILL_SPECIAL (1<<7) +#define IR_REG_SPILLED(r) \ + ((r) & (IR_REG_SPILL_LOAD|IR_REG_SPILL_STORE|IR_REG_SPILL_SPECIAL)) +#define IR_REG_NUM(r) \ + ((int8_t)((r) == IR_REG_NONE ? IR_REG_NONE : ((r) & ~(IR_REG_SPILL_LOAD|IR_REG_SPILL_STORE|IR_REG_SPILL_SPECIAL)))) + +int ir_assign_virtual_registers(ir_ctx *ctx); +int ir_compute_live_ranges(ir_ctx *ctx); +int ir_coalesce(ir_ctx *ctx); +int ir_compute_dessa_moves(ir_ctx *ctx); +int ir_reg_alloc(ir_ctx *ctx); + +int ir_regs_number(void); +bool ir_reg_is_int(int32_t reg); +const char *ir_reg_name(int8_t reg, ir_type type); +int32_t ir_get_spill_slot_offset(ir_ctx *ctx, ir_ref ref); + +/* Target CPU instruction selection and code generation (see ir_x86.c) */ +int ir_match(ir_ctx *ctx); +void *ir_emit_code(ir_ctx *ctx, size_t *size); + +/* Target address resolution (implementation in ir_emit.c) */ +void *ir_resolve_sym_name(const char *name); + +/* Target CPU disassembler (implementation in ir_disasm.c) */ +int ir_disasm_init(void); +void ir_disasm_free(void); +void ir_disasm_add_symbol(const char *name, uint64_t addr, uint64_t size); +const char* ir_disasm_find_symbol(uint64_t addr, int64_t *offset); +int ir_disasm(const char *name, + const void *start, + size_t size, + bool asm_addr, + ir_ctx *ctx, + FILE *f); + +/* Linux perf interface (implementation in ir_perf.c) */ +int ir_perf_jitdump_open(void); +int ir_perf_jitdump_close(void); +int ir_perf_jitdump_register(const char *name, const void *start, size_t size); +void ir_perf_map_register(const char *name, const void *start, size_t size); + +/* GDB JIT interface (implementation in ir_gdb.c) */ +int ir_gdb_register(const char *name, + const void *start, + size_t size, + uint32_t sp_offset, + uint32_t sp_adjustment); +void ir_gdb_unregister_all(void); +bool ir_gdb_present(void); + +/* IR load API (implementation in ir_load.c) */ +struct _ir_loader { + uint32_t default_func_flags; + bool (*init_module) (ir_loader *loader, const char *name, const char *filename, const char *target); + bool (*external_sym_dcl) (ir_loader *loader, const char *name, uint32_t flags); + bool (*external_func_dcl) (ir_loader *loader, const char *name, + uint32_t flags, ir_type ret_type, uint32_t params_count, ir_type *param_types); + bool (*forward_func_dcl) (ir_loader *loader, const char *name, + uint32_t flags, ir_type ret_type, uint32_t params_count, ir_type *param_types); + bool (*sym_dcl) (ir_loader *loader, const char *name, uint32_t flags, size_t size, bool has_data); + bool (*sym_data) (ir_loader *loader, ir_type type, uint32_t count, const void *data); + bool (*sym_data_end) (ir_loader *loader); + bool (*func_init) (ir_loader *loader, ir_ctx *ctx, const char *name); + bool (*func_process) (ir_loader *loader, ir_ctx *ctx, const char *name); + void*(*resolve_sym_name) (ir_loader *loader, const char *name); +}; + +void ir_loader_init(void); +void ir_loader_free(void); +int ir_load(ir_loader *loader, FILE *f); + +/* IR LLVM load API (implementation in ir_load_llvm.c) */ +int ir_load_llvm_bitcode(ir_loader *loader, const char *filename); +int ir_load_llvm_asm(ir_loader *loader, const char *filename); + +/* IR save API (implementation in ir_save.c) */ +void ir_save(const ir_ctx *ctx, FILE *f); + +/* IR debug dump API (implementation in ir_dump.c) */ +void ir_dump(const ir_ctx *ctx, FILE *f); +void ir_dump_dot(const ir_ctx *ctx, FILE *f); +void ir_dump_use_lists(const ir_ctx *ctx, FILE *f); +void ir_dump_cfg(ir_ctx *ctx, FILE *f); +void ir_dump_cfg_map(const ir_ctx *ctx, FILE *f); +void ir_dump_live_ranges(const ir_ctx *ctx, FILE *f); +void ir_dump_codegen(const ir_ctx *ctx, FILE *f); + +/* IR to C conversion (implementation in ir_emit_c.c) */ +int ir_emit_c(ir_ctx *ctx, const char *name, FILE *f); +void ir_emit_c_func_decl(const char *name, uint32_t flags, ir_type ret_type, uint32_t params_count, ir_type *param_types, FILE *f); + +/* IR to LLVM conversion (implementation in ir_emit_llvm.c) */ +int ir_emit_llvm(ir_ctx *ctx, const char *name, FILE *f); +void ir_emit_llvm_func_decl(const char *name, uint32_t flags, ir_type ret_type, uint32_t params_count, ir_type *param_types, FILE *f); + +/* IR verification API (implementation in ir_check.c) */ +bool ir_check(const ir_ctx *ctx); +void ir_consistency_check(void); + +/* Code patching (implementation in ir_patch.c) */ +int ir_patch(const void *code, size_t size, uint32_t jmp_table_size, const void *from_addr, const void *to_addr); + +/* CPU information (implementation in ir_cpuinfo.c) */ +#if defined(IR_TARGET_X86) || defined(IR_TARGET_X64) +# define IR_X86_SSE2 (1<<0) +# define IR_X86_SSE3 (1<<1) +# define IR_X86_SSSE3 (1<<2) +# define IR_X86_SSE41 (1<<3) +# define IR_X86_SSE42 (1<<4) +# define IR_X86_AVX (1<<5) +# define IR_X86_AVX2 (1<<6) +#endif + +uint32_t ir_cpuinfo(void); + +/* Deoptimization helpers */ +const void *ir_emit_exitgroup(uint32_t first_exit_point, uint32_t exit_points_per_group, const void *exit_addr, void *code_buffer, size_t code_buffer_size, size_t *size_ptr); + +/* A reference IR JIT compiler */ +IR_ALWAYS_INLINE void *ir_jit_compile(ir_ctx *ctx, int opt_level, size_t *size) +{ + if (opt_level == 0) { + if (ctx->flags & IR_OPT_FOLDING) { + // IR_ASSERT(0 && "IR_OPT_FOLDING is incompatible with -O0"); + return NULL; + } + ctx->flags &= ~(IR_OPT_CFG | IR_OPT_CODEGEN); + + ir_build_def_use_lists(ctx); + + if (!ir_build_cfg(ctx) + || !ir_match(ctx) + || !ir_assign_virtual_registers(ctx) + || !ir_compute_dessa_moves(ctx)) { + return NULL; + } + + return ir_emit_code(ctx, size); + } else if (opt_level == 1 || opt_level == 2) { + if (!(ctx->flags & IR_OPT_FOLDING)) { + // IR_ASSERT(0 && "IR_OPT_FOLDING must be set in ir_init() for -O1 and -O2"); + return NULL; + } + ctx->flags |= IR_OPT_CFG | IR_OPT_CODEGEN; + + ir_build_def_use_lists(ctx); + + if (opt_level == 2 + && !ir_sccp(ctx)) { + return NULL; + } + + if (!ir_build_cfg(ctx) + || !ir_build_dominators_tree(ctx) + || !ir_find_loops(ctx) + || !ir_gcm(ctx) + || !ir_schedule(ctx) + || !ir_match(ctx) + || !ir_assign_virtual_registers(ctx) + || !ir_compute_live_ranges(ctx) + || !ir_coalesce(ctx) + || !ir_reg_alloc(ctx) + || !ir_schedule_blocks(ctx)) { + return NULL; + } + + return ir_emit_code(ctx, size); + } else { + // IR_ASSERT(0 && "wrong optimization level"); + return NULL; + } +} + +#define IR_ERROR_CODE_MEM_OVERFLOW 1 +#define IR_ERROR_FIXED_STACK_FRAME_OVERFLOW 2 +#define IR_ERROR_UNSUPPORTED_CODE_RULE 3 +#define IR_ERROR_LINK 4 +#define IR_ERROR_ENCODE 5 + +/* IR Memmory Allocation */ +#ifndef ir_mem_malloc +# define ir_mem_malloc malloc +#endif +#ifndef ir_mem_calloc +# define ir_mem_calloc calloc +#endif +#ifndef ir_mem_realloc +# define ir_mem_realloc realloc +#endif +#ifndef ir_mem_free +# define ir_mem_free free +#endif + +#ifndef ir_mem_pmalloc +# define ir_mem_pmalloc malloc +#endif +#ifndef ir_mem_pcalloc +# define ir_mem_pcalloc calloc +#endif +#ifndef ir_mem_prealloc +# define ir_mem_prealloc realloc +#endif +#ifndef ir_mem_pfree +# define ir_mem_pfree free +#endif + +void *ir_mem_mmap(size_t size); +int ir_mem_unmap(void *ptr, size_t size); +int ir_mem_protect(void *ptr, size_t size); +int ir_mem_unprotect(void *ptr, size_t size); +int ir_mem_flush(void *ptr, size_t size); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* IR_H */ diff --git a/ext/opcache/jit/ir/ir_aarch64.dasc b/ext/opcache/jit/ir/ir_aarch64.dasc new file mode 100644 index 0000000000000..c4752bf6d7757 --- /dev/null +++ b/ext/opcache/jit/ir/ir_aarch64.dasc @@ -0,0 +1,5564 @@ +/* + * IR - Lightweight JIT Compilation Framework + * (Aarch64 native code generator based on DynAsm) + * Copyright (C) 2022 Zend by Perforce. + * Authors: Dmitry Stogov + */ + +|.arch arm64 + +|.actionlist dasm_actions +|.globals ir_lb +|.section code, cold_code, rodata, jmp_table + +#define IR_SPILL_POS_TO_OFFSET(offset) \ + ((ctx->flags & IR_USE_FRAME_POINTER) ? \ + ((offset) + (int32_t)sizeof(void*) * 2) : \ + ((offset) + ctx->call_stack_size)) + +#define B_IMM (1<<27) // signed imm26 * 4 +#define ADR_IMM (1<<20) // signed imm21 +#define ADRP_IMM (1LL<<32) // signed imm21 * 4096 + +static bool aarch64_may_use_b(ir_ctx *ctx, const void *addr) +{ + if (ctx->code_buffer) { + if (addr >= ctx->code_buffer && (char*)addr < (char*)ctx->code_buffer + ctx->code_buffer_size) { + return (ctx->code_buffer_size < B_IMM); + } else if ((char*)addr >= (char*)ctx->code_buffer + ctx->code_buffer_size) { + return (((char*)addr - (char*)ctx->code_buffer) < B_IMM); + } else if (addr < ctx->code_buffer) { + return (((char*)(ctx->code_buffer + ctx->code_buffer_size) - (char*)addr) < B_IMM); + } + } + return 1; //??? +} + +static bool aarch64_may_use_adr(ir_ctx *ctx, const void *addr) +{ + if (ctx->code_buffer) { + if (addr >= ctx->code_buffer && (char*)addr < (char*)ctx->code_buffer + ctx->code_buffer_size) { + return ( ctx->code_buffer_size < ADR_IMM); + } else if ((char*)addr >= (char*)ctx->code_buffer + ctx->code_buffer_size) { + return (((char*)addr - (char*)ctx->code_buffer) < ADR_IMM); + } else if (addr < ctx->code_buffer) { + return (((char*)(ctx->code_buffer + ctx->code_buffer_size) - (char*)addr) < ADR_IMM); + } + } + return 0; +} + +static bool aarch64_may_use_adrp(ir_ctx *ctx, const void *addr) +{ + if (ctx->code_buffer) { + if (addr >= ctx->code_buffer && (char*)addr < (char*)ctx->code_buffer + ctx->code_buffer_size) { + return ( ctx->code_buffer_size < ADRP_IMM); + } else if ((char*)addr >= (char*)ctx->code_buffer + ctx->code_buffer_size) { + return (((char*)addr - (char*)ctx->code_buffer) < ADRP_IMM); + } else if (addr < ctx->code_buffer) { + return (((char*)(ctx->code_buffer + ctx->code_buffer_size) - (char*)addr) < ADRP_IMM); + } + } + return 0; +} + +/* Determine whether "val" falls into two allowed ranges: + * Range 1: [0, 0xfff] + * Range 2: LSL #12 to Range 1 + * Used to guard the immediate encoding for add/adds/sub/subs/cmp/cmn instructions. */ +static bool aarch64_may_encode_imm12(const int64_t val) +{ + return (val >= 0 && (val <= 0xfff || !(val & 0xffffffffff000fff))); +} + +/* Determine whether an immediate value can be encoded as the immediate operand of logical instructions. */ +static bool aarch64_may_encode_logical_imm(uint64_t value, uint32_t type_size) +{ + /* fast path: power of two */ + if (value > 0 && !(value & (value - 1))) { + return 1; + } + + if (type_size == 8) { + if (dasm_imm13((uint32_t)value, (uint32_t)(value >> 32)) != -1) { + return 1; + } + } else { + if (dasm_imm13((uint32_t)value, (uint32_t)value) != -1) { + return 1; + } + } + + return 0; +} + +static bool aarch64_may_encode_addr_offset(int64_t offset, uint32_t type_size) +{ + return (uintptr_t)(offset) % type_size == 0 && (uintptr_t)(offset) < 0xfff * type_size; +} + +|.macro ASM_REG_REG_OP, op, type, dst, src +|| if (ir_type_size[type] == 8) { +| op Rx(dst), Rx(src) +|| } else { +| op Rw(dst), Rw(src) +|| } +|.endmacro + +|.macro ASM_REG_REG_REG_OP, op, type, dst, src1, src2 +|| if (ir_type_size[type] == 8) { +| op Rx(dst), Rx(src1), Rx(src2) +|| } else { +| op Rw(dst), Rw(src1), Rw(src2) +|| } +|.endmacro + +|.macro ASM_REG_REG_REG_REG_OP, op, type, dst, src1, src2, src3 +|| if (ir_type_size[type] == 8) { +| op Rx(dst), Rx(src1), Rx(src2), Rx(src3) +|| } else { +| op Rw(dst), Rw(src1), Rw(src2), Rw(src3); +|| } +|.endmacro + +|.macro ASM_REG_REG_IMM_OP, op, type, dst, src1, val +|| if (ir_type_size[type] == 8) { +| op Rx(dst), Rx(src1), #val +|| } else { +| op Rw(dst), Rw(src1), #val +|| } +|.endmacro + +|.macro ASM_REG_IMM_OP, op, type, reg, val +|| if (ir_type_size[type] == 8) { +| op Rx(reg), #val +|| } else { +| op Rw(reg), #val +|| } +|.endmacro + +|.macro ASM_FP_REG_IMM_OP, op, type, reg, val +|| if (type == IR_DOUBLE) { +| op Rd(reg-IR_REG_FP_FIRST), #val +|| } else { +|| IR_ASSERT(type == IR_FLOAT); +| op Rs(reg-IR_REG_FP_FIRST), #val +|| } +|.endmacro + +|.macro ASM_FP_REG_REG_REG_OP, op, type, dst, src1, src2 +|| if (type == IR_DOUBLE) { +| op Rd(dst-IR_REG_FP_FIRST), Rd(src1-IR_REG_FP_FIRST), Rd(src2-IR_REG_FP_FIRST) +|| } else { +|| IR_ASSERT(type == IR_FLOAT); +| op Rs(dst-IR_REG_FP_FIRST), Rs(src1-IR_REG_FP_FIRST), Rs(src2-IR_REG_FP_FIRST) +|| } +|.endmacro + +typedef struct _ir_backend_data { + ir_reg_alloc_data ra_data; + uint32_t dessa_from_block; + dasm_State *dasm_state; + int rodata_label, jmp_table_label; +} ir_backend_data; + +#define IR_GP_REG_NAME(code, name64, name32) \ + #name64, +#define IR_GP_REG_NAME32(code, name64, name32) \ + #name32, +#define IR_FP_REG_NAME(code, name64, name32, name16, name8) \ + #name64, +#define IR_FP_REG_NAME32(code, name64, name32, name16, name8) \ + #name32, + +static const char *_ir_reg_name[IR_REG_NUM] = { + IR_GP_REGS(IR_GP_REG_NAME) + IR_FP_REGS(IR_FP_REG_NAME) +}; + +static const char *_ir_reg_name32[IR_REG_NUM] = { + IR_GP_REGS(IR_GP_REG_NAME32) + IR_FP_REGS(IR_FP_REG_NAME32) +}; + +/* Calling Convention */ +static const int8_t _ir_int_reg_params[IR_REG_INT_ARGS] = { + IR_REG_INT_ARG1, + IR_REG_INT_ARG2, + IR_REG_INT_ARG3, + IR_REG_INT_ARG4, + IR_REG_INT_ARG5, + IR_REG_INT_ARG6, + IR_REG_INT_ARG7, + IR_REG_INT_ARG8, +}; + +static const int8_t _ir_fp_reg_params[IR_REG_FP_ARGS] = { + IR_REG_FP_ARG1, + IR_REG_FP_ARG2, + IR_REG_FP_ARG3, + IR_REG_FP_ARG4, + IR_REG_FP_ARG5, + IR_REG_FP_ARG6, + IR_REG_FP_ARG7, + IR_REG_FP_ARG8, +}; + +const char *ir_reg_name(int8_t reg, ir_type type) +{ + if (reg >= IR_REG_NUM) { + if (reg == IR_REG_SCRATCH) { + return "SCRATCH"; + } else { + IR_ASSERT(reg == IR_REG_ALL); + return "ALL"; + } + } + IR_ASSERT(reg >= 0 && reg < IR_REG_NUM); + if (type == IR_VOID) { + type = (reg < IR_REG_FP_FIRST) ? IR_ADDR : IR_DOUBLE; + } + if (ir_type_size[type] == 8) { + return _ir_reg_name[reg]; + } else { + return _ir_reg_name32[reg]; + } +} + +#define IR_RULES(_) \ + _(CMP_INT) \ + _(CMP_FP) \ + _(MUL_PWR2) \ + _(DIV_PWR2) \ + _(MOD_PWR2) \ + _(OP_INT) \ + _(OP_FP) \ + _(BINOP_INT) \ + _(BINOP_FP) \ + _(SHIFT) \ + _(SHIFT_CONST) \ + _(COPY_INT) \ + _(COPY_FP) \ + _(CMP_AND_BRANCH_INT) \ + _(CMP_AND_BRANCH_FP) \ + _(GUARD_CMP_INT) \ + _(GUARD_CMP_FP) \ + _(GUARD_OVERFLOW) \ + _(OVERFLOW_AND_BRANCH) \ + _(MIN_MAX_INT) \ + _(REG_BINOP_INT) \ + _(LOAD_INT) \ + _(LOAD_FP) \ + _(STORE_INT) \ + _(STORE_FP) \ + _(IF_INT) \ + _(RETURN_VOID) \ + _(RETURN_INT) \ + _(RETURN_FP) \ + +#define IR_RULE_ENUM(name) IR_ ## name, + +enum _ir_rule { + IR_FIRST_RULE = IR_LAST_OP, + IR_RULES(IR_RULE_ENUM) + IR_LAST_RULE +}; + +#define IR_RULE_NAME(name) #name, +const char *ir_rule_name[IR_LAST_OP] = { + NULL, + IR_RULES(IR_RULE_NAME) + NULL +}; + +/* register allocation */ +int ir_get_target_constraints(const ir_ctx *ctx, ir_ref ref, ir_target_constraints *constraints) +{ + uint32_t rule = ir_rule(ctx, ref); + const ir_insn *insn; + int n = 0; + int flags = IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG | IR_OP2_MUST_BE_IN_REG | IR_OP3_MUST_BE_IN_REG; + + constraints->def_reg = IR_REG_NONE; + constraints->hints_count = 0; + switch (rule & IR_RULE_MASK) { + case IR_BINOP_INT: + insn = &ctx->ir_base[ref]; + n = 0; + if (IR_IS_CONST_REF(insn->op1)) { + constraints->tmp_regs[n] = IR_TMP_REG(1, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; + } + if (IR_IS_CONST_REF(insn->op2) && insn->op1 != insn->op2) { + const ir_insn *val_insn = &ctx->ir_base[insn->op2]; + switch (insn->op) { + case IR_ADD: + case IR_ADD_OV: + case IR_SUB: + case IR_SUB_OV: + if (!aarch64_may_encode_imm12(val_insn->val.u64)) { + constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; + } + break; + case IR_MUL_OV: + constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_SAVE_SUB_REF); + n++; + break; + case IR_AND: + case IR_OR: + case IR_XOR: + if (!aarch64_may_encode_logical_imm(val_insn->val.u64, ir_type_size[insn->type])) { + constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; + } + break; + case IR_MUL: + case IR_DIV: + case IR_MOD: + constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; + break; + } + } + if (insn->op == IR_MOD) { + constraints->tmp_regs[n] = IR_TMP_REG(3, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; + } else if (insn->op == IR_MUL_OV && (ir_type_size[insn->type] == 8 || IR_IS_TYPE_SIGNED(insn->type))) { + constraints->tmp_regs[n] = IR_TMP_REG(3, insn->type, IR_LOAD_SUB_REF, IR_SAVE_SUB_REF); + n++; + } + break; + case IR_SEXT: + case IR_ZEXT: + flags = IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG; + IR_FALLTHROUGH; + case IR_MUL_PWR2: + case IR_DIV_PWR2: + case IR_MOD_PWR2: + case IR_SHIFT: + case IR_SHIFT_CONST: + case IR_OP_INT: + case IR_OP_FP: + case IR_INT2FP: + case IR_FP2INT: + case IR_FP2FP: + insn = &ctx->ir_base[ref]; + n = 0; + if (IR_IS_CONST_REF(insn->op1)) { + constraints->tmp_regs[n] = IR_TMP_REG(1, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; + } + if (rule == IR_SHIFT && insn->op == IR_ROL) { + constraints->tmp_regs[n] = IR_TMP_REG(3, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; + } + break; + case IR_BINOP_FP: + case IR_MIN_MAX_INT: + insn = &ctx->ir_base[ref]; + n = 0; + if (IR_IS_CONST_REF(insn->op1)) { + constraints->tmp_regs[n] = IR_TMP_REG(1, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; + } + if (IR_IS_CONST_REF(insn->op2) && insn->op1 != insn->op2) { + constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; + } + break; + case IR_CMP_INT: + insn = &ctx->ir_base[ref]; + n = 0; + if (IR_IS_CONST_REF(insn->op1)) { + constraints->tmp_regs[n] = IR_TMP_REG(1, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; + } + if (IR_IS_CONST_REF(insn->op2) && insn->op1 != insn->op2) { + insn = &ctx->ir_base[insn->op2]; + if (!aarch64_may_encode_imm12(insn->val.u64)) { + constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; + } + } + break; + case IR_CMP_FP: + insn = &ctx->ir_base[ref]; + n = 0; + if (IR_IS_CONST_REF(insn->op1)) { + const ir_insn *val_insn = &ctx->ir_base[insn->op1]; + constraints->tmp_regs[n] = IR_TMP_REG(1, val_insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; + } + if (IR_IS_CONST_REF(insn->op2) && insn->op1 != insn->op2) { + const ir_insn *val_insn = &ctx->ir_base[insn->op2]; + constraints->tmp_regs[n] = IR_TMP_REG(2, val_insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; + } + break; + case IR_VSTORE: + insn = &ctx->ir_base[ref]; + if (IR_IS_CONST_REF(insn->op3)) { + insn = &ctx->ir_base[insn->op3]; + constraints->tmp_regs[0] = IR_TMP_REG(3, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n = 1; + } + break; + case IR_LOAD_FP: + insn = &ctx->ir_base[ref]; + n = 0; + if (IR_IS_CONST_REF(insn->op2)) { + IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR); + constraints->tmp_regs[n] = IR_TMP_REG(2, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; + } + break; + case IR_STORE_INT: + case IR_STORE_FP: + insn = &ctx->ir_base[ref]; + n = 0; + if (IR_IS_CONST_REF(insn->op2)) { + IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR); + constraints->tmp_regs[n] = IR_TMP_REG(2, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; + } + if (IR_IS_CONST_REF(insn->op3)) { + insn = &ctx->ir_base[insn->op3]; + if (!IR_IS_TYPE_INT(insn->type) || insn->val.i64 != 0) { + constraints->tmp_regs[n] = IR_TMP_REG(3, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; + } + } + break; + case IR_SWITCH: + insn = &ctx->ir_base[ref]; + n = 0; + if (IR_IS_CONST_REF(insn->op2)) { + insn = &ctx->ir_base[insn->op2]; + constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; + } else { + insn = &ctx->ir_base[insn->op2]; + constraints->tmp_regs[n] = IR_TMP_REG(1, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; + } + constraints->tmp_regs[n] = IR_TMP_REG(3, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; + break; + case IR_CALL: + insn = &ctx->ir_base[ref]; + constraints->def_reg = (IR_IS_TYPE_INT(insn->type)) ? IR_REG_INT_RET1 : IR_REG_FP_RET1; + constraints->tmp_regs[0] = IR_SCRATCH_REG(IR_REG_SCRATCH, IR_USE_SUB_REF, IR_DEF_SUB_REF); + n = 1; + IR_FALLTHROUGH; + case IR_TAILCALL: + insn = &ctx->ir_base[ref]; + if (insn->inputs_count > 2) { + constraints->hints[2] = IR_REG_NONE; + constraints->hints_count = ir_get_args_regs(ctx, insn, constraints->hints); + if (!IR_IS_CONST_REF(insn->op2)) { + constraints->tmp_regs[n] = IR_TMP_REG(1, IR_ADDR, IR_LOAD_SUB_REF, IR_USE_SUB_REF); + n++; + } + } + flags = IR_USE_SHOULD_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG | IR_OP3_SHOULD_BE_IN_REG; + break; + case IR_COPY_INT: + case IR_COPY_FP: + flags = IR_DEF_REUSES_OP1_REG | IR_USE_MUST_BE_IN_REG; + break; + case IR_PARAM: + constraints->def_reg = ir_get_param_reg(ctx, ref); + flags = 0; + break; + case IR_PI: + case IR_PHI: + flags = IR_USE_SHOULD_BE_IN_REG; + break; + case IR_RLOAD: + constraints->def_reg = ctx->ir_base[ref].op2; + flags = IR_USE_SHOULD_BE_IN_REG; + break; + case IR_EXITCALL: + constraints->def_reg = IR_REG_INT_RET1; + break; + case IR_TRUNC: + case IR_BITCAST: + flags = IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG; + break; + case IR_RSTORE: + flags = IR_OP3_SHOULD_BE_IN_REG; + break; + case IR_RETURN_INT: + flags = IR_OP2_SHOULD_BE_IN_REG; + constraints->hints[2] = IR_REG_INT_RET1; + constraints->hints_count = 3; + break; + case IR_RETURN_FP: + flags = IR_OP2_SHOULD_BE_IN_REG; + constraints->hints[2] = IR_REG_FP_RET1; + constraints->hints_count = 3; + break; + case IR_SNAPSHOT: + flags = 0; + break; + } + constraints->tmps_count = n; + + return flags; +} + +/* instruction selection */ +static void ir_match_fuse_addr(ir_ctx *ctx, ir_ref addr_ref, ir_type type) +{ + if (!IR_IS_CONST_REF(addr_ref)) { + ir_insn *addr_insn = &ctx->ir_base[addr_ref]; + + if (addr_insn->op == IR_ADD + && !IR_IS_CONST_REF(addr_insn->op1) + && IR_IS_CONST_REF(addr_insn->op2) // TODO: temporary workaround + && aarch64_may_encode_addr_offset(ctx->ir_base[addr_insn->op2].val.i64, ir_type_size[type])) { + ir_use_list *use_list = &ctx->use_lists[addr_ref]; + ir_ref j = use_list->count; + + if (j > 1) { + /* check if address is used only in LOAD and STORE */ + ir_ref *p = &ctx->use_edges[use_list->refs]; + + do { + ir_insn *insn = &ctx->ir_base[*p]; + if (insn->op != IR_LOAD && (insn->op != IR_STORE || insn->op3 == addr_ref)) { + return; + } + p++; + } while (--j); + } + ctx->rules[addr_ref] = IR_FUSED | IR_SIMPLE | addr_insn->op; + } + } +} + +static uint32_t ir_match_insn(ir_ctx *ctx, ir_ref ref) +{ + ir_insn *op2_insn; + ir_insn *insn = &ctx->ir_base[ref]; + + switch (insn->op) { + case IR_EQ: + case IR_NE: + case IR_LT: + case IR_GE: + case IR_LE: + case IR_GT: + case IR_ULT: + case IR_UGE: + case IR_ULE: + case IR_UGT: + if (IR_IS_TYPE_INT(ctx->ir_base[insn->op1].type)) { + return IR_CMP_INT; + } else { + return IR_CMP_FP; + } + break; + case IR_ADD: + case IR_SUB: + if (IR_IS_TYPE_INT(insn->type)) { + if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { + op2_insn = &ctx->ir_base[insn->op2]; + if (IR_IS_CONST_REF(insn->op1)) { + // const + } else if (op2_insn->val.i64 == 0) { + return IR_COPY_INT; + } + } +binop_int: + return IR_BINOP_INT; + } else { +binop_fp: + return IR_BINOP_FP; + } + break; + case IR_MUL: + if (IR_IS_TYPE_INT(insn->type)) { + if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { + op2_insn = &ctx->ir_base[insn->op2]; + if (IR_IS_CONST_REF(insn->op1)) { + // const + } else if (op2_insn->val.u64 == 0) { + // 0 + } else if (op2_insn->val.u64 == 1) { + return IR_COPY_INT; + } else if (IR_IS_POWER_OF_TWO(op2_insn->val.u64)) { + return IR_MUL_PWR2; + } + } + return IR_BINOP_INT; + } else { + goto binop_fp; + } + break; + case IR_ADD_OV: + case IR_SUB_OV: + IR_ASSERT(IR_IS_TYPE_INT(insn->type)); + goto binop_int; + case IR_MUL_OV: + IR_ASSERT(IR_IS_TYPE_INT(insn->type)); + goto binop_int; + case IR_DIV: + if (IR_IS_TYPE_INT(insn->type)) { + if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { + op2_insn = &ctx->ir_base[insn->op2]; + if (IR_IS_CONST_REF(insn->op1)) { + // const + } else if (op2_insn->val.u64 == 1) { + return IR_COPY_INT; + } else if (IR_IS_POWER_OF_TWO(op2_insn->val.u64)) { + return IR_DIV_PWR2; + } + } + return IR_BINOP_INT; + } else { + goto binop_fp; + } + break; + case IR_MOD: + if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { + op2_insn = &ctx->ir_base[insn->op2]; + if (IR_IS_CONST_REF(insn->op1)) { + // const + } else if (IR_IS_TYPE_UNSIGNED(insn->type) && IR_IS_POWER_OF_TWO(op2_insn->val.u64)) { + return IR_MOD_PWR2; + } + } + return IR_BINOP_INT; + case IR_BSWAP: + case IR_NOT: + IR_ASSERT(IR_IS_TYPE_INT(insn->type)); + return IR_OP_INT; + case IR_NEG: + case IR_ABS: + if (IR_IS_TYPE_INT(insn->type)) { + return IR_OP_INT; + } else { + return IR_OP_FP; + } + case IR_OR: + if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { + op2_insn = &ctx->ir_base[insn->op2]; + if (IR_IS_CONST_REF(insn->op1)) { + // const + } else if (op2_insn->val.i64 == 0) { + return IR_COPY_INT; + } else if (op2_insn->val.i64 == -1) { + // -1 + } + } + goto binop_int; + case IR_AND: + if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { + op2_insn = &ctx->ir_base[insn->op2]; + if (IR_IS_CONST_REF(insn->op1)) { + // const + } else if (op2_insn->val.i64 == 0) { + // 0 + } else if (op2_insn->val.i64 == -1) { + return IR_COPY_INT; + } + } + goto binop_int; + case IR_XOR: + if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { + op2_insn = &ctx->ir_base[insn->op2]; + if (IR_IS_CONST_REF(insn->op1)) { + // const + } + } + goto binop_int; + case IR_SHL: + if (IR_IS_CONST_REF(insn->op2)) { + if (ctx->flags & IR_OPT_CODEGEN) { + op2_insn = &ctx->ir_base[insn->op2]; + if (IR_IS_CONST_REF(insn->op1)) { + // const + } else if (op2_insn->val.u64 == 0) { + return IR_COPY_INT; + } else if (ir_type_size[insn->type] >= 4) { + if (op2_insn->val.u64 == 1) { + // lea [op1*2] + } else if (op2_insn->val.u64 == 2) { + // lea [op1*4] + } else if (op2_insn->val.u64 == 3) { + // lea [op1*8] + } + } + } + return IR_SHIFT_CONST; + } + return IR_SHIFT; + case IR_SHR: + case IR_SAR: + case IR_ROL: + case IR_ROR: + if (IR_IS_CONST_REF(insn->op2)) { + if (ctx->flags & IR_OPT_CODEGEN) { + op2_insn = &ctx->ir_base[insn->op2]; + if (IR_IS_CONST_REF(insn->op1)) { + // const + } else if (op2_insn->val.u64 == 0) { + return IR_COPY_INT; + } + } + return IR_SHIFT_CONST; + } + return IR_SHIFT; + case IR_MIN: + case IR_MAX: + if (IR_IS_TYPE_INT(insn->type)) { + return IR_MIN_MAX_INT; + } else { + goto binop_fp; + } + break; +// case IR_COND: + case IR_COPY: + if (IR_IS_TYPE_INT(insn->type)) { + return IR_COPY_INT; + } else { + return IR_COPY_FP; + } + break; + case IR_CALL: + ctx->flags |= IR_HAS_CALLS; + return IR_CALL; + case IR_VAR: + return IR_SKIPPED | IR_VAR; + case IR_PARAM: + return ctx->use_lists[ref].count > 0 ? IR_PARAM : IR_SKIPPED | IR_PARAM; + case IR_ALLOCA: + if (ctx->flags & IR_FUNCTION) { + ctx->flags |= IR_USE_FRAME_POINTER | IR_HAS_ALLOCA; + } + return IR_ALLOCA; + case IR_LOAD: + ir_match_fuse_addr(ctx, insn->op2, insn->type); + if (IR_IS_TYPE_INT(insn->type)) { + return IR_LOAD_INT; + } else { + return IR_LOAD_FP; + } + break; + case IR_STORE: + ir_match_fuse_addr(ctx, insn->op2, ctx->ir_base[insn->op3].type); + if (IR_IS_TYPE_INT(ctx->ir_base[insn->op3].type)) { + return IR_STORE_INT; + } else { + return IR_STORE_FP; + } + break; + case IR_RLOAD: + if (IR_REGSET_IN(IR_REGSET_UNION((ir_regset)ctx->fixed_regset, IR_REGSET_FIXED), insn->op2)) { + return IR_SKIPPED | IR_RLOAD; + } + return IR_RLOAD; + case IR_RSTORE: + if (IR_IS_TYPE_INT(ctx->ir_base[insn->op2].type)) { + if ((ctx->flags & IR_OPT_CODEGEN) && ir_in_same_block(ctx, insn->op2) && ctx->use_lists[insn->op2].count == 1) { + ir_insn *op_insn = &ctx->ir_base[insn->op2]; + + if (!ctx->rules[insn->op2]) { + ctx->rules[insn->op2] = ir_match_insn(ctx, insn->op2); + } + if (ctx->rules[insn->op2] == IR_BINOP_INT) { + if (ctx->ir_base[op_insn->op1].op == IR_RLOAD + && ctx->ir_base[op_insn->op1].op2 == insn->op3) { + ctx->rules[insn->op2] = IR_FUSED | IR_BINOP_INT; + ctx->rules[op_insn->op1] = IR_SKIPPED | IR_RLOAD; + return IR_REG_BINOP_INT; + } else if ((ir_op_flags[op_insn->op] & IR_OP_FLAG_COMMUTATIVE) + && ctx->ir_base[op_insn->op2].op == IR_RLOAD + && ctx->ir_base[op_insn->op2].op2 == insn->op3) { + ir_ref tmp = op_insn->op1; + op_insn->op1 = op_insn->op2; + op_insn->op2 = tmp; + ctx->rules[insn->op2] = IR_FUSED | IR_BINOP_INT; + ctx->rules[op_insn->op1] = IR_SKIPPED | IR_RLOAD; + return IR_REG_BINOP_INT; + } + } + } + } + return IR_RSTORE; + case IR_START: + case IR_BEGIN: + case IR_IF_TRUE: + case IR_IF_FALSE: + case IR_CASE_VAL: + case IR_CASE_DEFAULT: + case IR_MERGE: + case IR_LOOP_BEGIN: + case IR_UNREACHABLE: + return IR_SKIPPED | insn->op; + case IR_RETURN: + if (!insn->op2) { + return IR_RETURN_VOID; + } else if (IR_IS_TYPE_INT(ctx->ir_base[insn->op2].type)) { + return IR_RETURN_INT; + } else { + return IR_RETURN_FP; + } + case IR_IF: + if (ir_in_same_block(ctx, insn->op2) && ctx->use_lists[insn->op2].count == 1) { + op2_insn = &ctx->ir_base[insn->op2]; + if (op2_insn->op >= IR_EQ && op2_insn->op <= IR_UGT) { + if (IR_IS_TYPE_INT(ctx->ir_base[op2_insn->op1].type)) { + ctx->rules[insn->op2] = IR_FUSED | IR_CMP_INT; + return IR_CMP_AND_BRANCH_INT; + } else { + ctx->rules[insn->op2] = IR_FUSED | IR_CMP_FP; + return IR_CMP_AND_BRANCH_FP; + } + } else if (op2_insn->op == IR_OVERFLOW) { + ctx->rules[insn->op2] = IR_FUSED | IR_SIMPLE | IR_OVERFLOW; + return IR_OVERFLOW_AND_BRANCH; + } + } + if (IR_IS_TYPE_INT(ctx->ir_base[insn->op2].type)) { + return IR_IF_INT; + } else { + IR_ASSERT(0 && "NIY IR_IF_FP"); + break; + } + case IR_GUARD: + case IR_GUARD_NOT: + if (ir_in_same_block(ctx, insn->op2) && ctx->use_lists[insn->op2].count == 1) { + op2_insn = &ctx->ir_base[insn->op2]; + if (op2_insn->op >= IR_EQ && op2_insn->op <= IR_UGT + // TODO: register allocator may clobber operands of CMP before they are used in the GUARD_CMP + && (insn->op2 == ref - 1 || + (insn->op2 == ctx->prev_ref[ref] - 1 + && ctx->ir_base[ctx->prev_ref[ref]].op == IR_SNAPSHOT))) { + if (IR_IS_TYPE_INT(ctx->ir_base[op2_insn->op1].type)) { + ctx->rules[insn->op2] = IR_FUSED | IR_CMP_INT; + return IR_GUARD_CMP_INT; + } else { + ctx->rules[insn->op2] = IR_FUSED | IR_CMP_FP; + return IR_GUARD_CMP_FP; + } + } else if (op2_insn->op == IR_OVERFLOW) { + ctx->rules[insn->op2] = IR_FUSED | IR_SIMPLE | IR_OVERFLOW; + return IR_GUARD_OVERFLOW; + } + } + return insn->op; + default: + break; + } + + return insn->op; +} + +static void ir_match_insn2(ir_ctx *ctx, ir_ref ref, uint32_t rule) +{ +} + +/* code generation */ +static int32_t ir_ref_spill_slot(ir_ctx *ctx, ir_ref ref, ir_reg *reg) +{ + int32_t offset; + + IR_ASSERT(ref >= 0); + offset = ctx->live_intervals[ctx->vregs[ref]]->stack_spill_pos; + IR_ASSERT(offset != -1); + if (ctx->live_intervals[ctx->vregs[ref]]->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL) { + IR_ASSERT(ctx->spill_base != IR_REG_NONE); + *reg = ctx->spill_base; + return offset; + } + *reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + return IR_SPILL_POS_TO_OFFSET(offset); +} + +static bool ir_is_same_spill_slot(ir_ctx *ctx, ir_ref ref, ir_reg reg, int32_t offset) +{ + ir_reg fp; + + return ir_ref_spill_slot(ctx, ref, &fp) == offset && reg == fp; +} + +static int32_t ir_var_spill_slot(ir_ctx *ctx, ir_ref ref, ir_reg *reg) +{ + ir_insn *var_insn = &ctx->ir_base[ref]; + + IR_ASSERT(var_insn->op == IR_VAR); + *reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + return IR_SPILL_POS_TO_OFFSET(var_insn->op3); +} + +static bool ir_may_avoid_spill_load(ir_ctx *ctx, ir_ref ref, ir_ref use) +{ + ir_live_interval *ival; + + IR_ASSERT(ctx->vregs[ref] && ctx->live_intervals[ctx->vregs[ref]]); + ival = ctx->live_intervals[ctx->vregs[ref]]; + while (ival) { + ir_use_pos *use_pos = ival->use_pos; + while (use_pos) { + if (IR_LIVE_POS_TO_REF(use_pos->pos) == use) { + return !use_pos->next || use_pos->next->op_num == 0; + } + use_pos = use_pos->next; + } + ival = ival->next; + } + return 0; +} + +static void ir_emit_load_imm_int(ir_ctx *ctx, ir_type type, ir_reg reg, int64_t val) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + IR_ASSERT(IR_IS_TYPE_INT(type)); + if (ir_type_size[type] == 8) { + if (val == 0) { + if (reg != IR_REG_ZR) { + | mov Rx(reg), xzr + } + } else if (((uint64_t)(val)) <= 0xffff) { + | movz Rx(reg), #((uint64_t)(val)) + } else if (~((uint64_t)(val)) <= 0xffff) { + | movn Rx(reg), #(~((uint64_t)(val))) + } else if ((uint64_t)(val) & 0xffff) { + | movz Rx(reg), #((uint64_t)(val) & 0xffff) + if (((uint64_t)(val) >> 16) & 0xffff) { + | movk Rx(reg), #(((uint64_t)(val) >> 16) & 0xffff), lsl #16 + } + if (((uint64_t)(val) >> 32) & 0xffff) { + | movk Rx(reg), #(((uint64_t)(val) >> 32) & 0xffff), lsl #32 + } + if ((((uint64_t)(val) >> 48) & 0xffff)) { + | movk Rx(reg), #(((uint64_t)(val) >> 48) & 0xffff), lsl #48 + } + } else if (((uint64_t)(val) >> 16) & 0xffff) { + | movz Rx(reg), #(((uint64_t)(val) >> 16) & 0xffff), lsl #16 + if (((uint64_t)(val) >> 32) & 0xffff) { + | movk Rx(reg), #(((uint64_t)(val) >> 32) & 0xffff), lsl #32 + } + if ((((uint64_t)(val) >> 48) & 0xffff)) { + | movk Rx(reg), #(((uint64_t)(val) >> 48) & 0xffff), lsl #48 + } + } else if (((uint64_t)(val) >> 32) & 0xffff) { + | movz Rx(reg), #(((uint64_t)(val) >> 32) & 0xffff), lsl #32 + if ((((uint64_t)(val) >> 48) & 0xffff)) { + | movk Rx(reg), #(((uint64_t)(val) >> 48) & 0xffff), lsl #48 + } + } else { + | movz Rx(reg), #(((uint64_t)(val) >> 48) & 0xffff), lsl #48 + } + } else { + if (val == 0) { + if (reg != IR_REG_ZR) { + | mov Rw(reg), wzr + } + } else if (((uint64_t)(val)) <= 0xffff) { + | movz Rw(reg), #((uint64_t)(val)) + } else if (~((uint64_t)(val)) <= 0xffff) { + | movn Rw(reg), #(~((uint64_t)(val))) + } else if ((uint64_t)(val) & 0xffff) { + | movz Rw(reg), #((uint64_t)(val) & 0xffff) + if (((uint64_t)(val) >> 16) & 0xffff) { + | movk Rw(reg), #(((uint64_t)(val) >> 16) & 0xffff), lsl #16 + } + } else if (((uint64_t)(val) >> 16) & 0xffff) { + | movz Rw(reg), #(((uint64_t)(val) >> 16) & 0xffff), lsl #16 + } + } +} + +static void ir_emit_load_mem_int(ir_ctx *ctx, ir_type type, ir_reg reg, ir_reg base_reg, int32_t offset) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + if (aarch64_may_encode_addr_offset(offset, ir_type_size[type])) { + switch (ir_type_size[type]) { + default: + IR_ASSERT(0); + case 8: + | ldr Rx(reg), [Rx(base_reg), #offset] + break; + case 4: + | ldr Rw(reg), [Rx(base_reg), #offset] + break; + case 2: + if (IR_IS_TYPE_SIGNED(type)) { + | ldrsh Rw(reg), [Rx(base_reg), #offset] + } else { + | ldrh Rw(reg), [Rx(base_reg), #offset] + } + break; + case 1: + if (IR_IS_TYPE_SIGNED(type)) { + | ldrsb Rw(reg), [Rx(base_reg), #offset] + } else { + | ldrb Rw(reg), [Rx(base_reg), #offset] + } + break; + } + } else { + ir_reg tmp_reg = IR_REG_INT_TMP; /* reserved temporary register */ + + ir_emit_load_imm_int(ctx, IR_ADDR, tmp_reg, offset); + switch (ir_type_size[type]) { + default: + IR_ASSERT(0); + case 8: + | ldr Rx(reg), [Rx(base_reg), Rx(tmp_reg)] + break; + case 4: + | ldr Rw(reg), [Rx(base_reg), Rx(tmp_reg)] + break; + case 2: + if (IR_IS_TYPE_SIGNED(type)) { + | ldrsh Rw(reg), [Rx(base_reg), Rx(tmp_reg)] + } else { + | ldrh Rw(reg), [Rx(base_reg), Rx(tmp_reg)] + } + break; + case 1: + if (IR_IS_TYPE_SIGNED(type)) { + | ldrsb Rw(reg), [Rx(base_reg), Rx(tmp_reg)] + } else { + | ldrb Rw(reg), [Rx(base_reg), Rx(tmp_reg)] + } + break; + } + } +} + +static void ir_emit_load_imm_fp(ir_ctx *ctx, ir_type type, ir_reg reg, ir_ref src) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_insn *insn = &ctx->ir_base[src]; + int label; + + if (type == IR_FLOAT && insn->val.u32 == 0) { + | fmov Rs(reg-IR_REG_FP_FIRST), wzr + } else if (type == IR_DOUBLE && insn->val.u64 == 0) { + | fmov Rd(reg-IR_REG_FP_FIRST), xzr + } else { + label = ctx->cfg_blocks_count - src; + insn->const_flags |= IR_CONST_EMIT; + if (type == IR_DOUBLE) { + | ldr Rd(reg-IR_REG_FP_FIRST), =>label + } else { + IR_ASSERT(type == IR_FLOAT); + | ldr Rs(reg-IR_REG_FP_FIRST), =>label + } + } +} + +static void ir_emit_load_mem_fp(ir_ctx *ctx, ir_type type, ir_reg reg, ir_reg base_reg, int32_t offset) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + if (aarch64_may_encode_addr_offset(offset, ir_type_size[type])) { + if (type == IR_DOUBLE) { + | ldr Rd(reg-IR_REG_FP_FIRST), [Rx(base_reg), #offset] + } else { + IR_ASSERT(type == IR_FLOAT); + | ldr Rs(reg-IR_REG_FP_FIRST), [Rx(base_reg), #offset] + } + } else { + ir_reg tmp_reg = IR_REG_INT_TMP; /* reserved temporary register */ + + ir_emit_load_imm_int(ctx, IR_ADDR, tmp_reg, offset); + if (type == IR_DOUBLE) { + | ldr Rd(reg-IR_REG_FP_FIRST), [Rx(base_reg), Rx(tmp_reg)] + } else { + IR_ASSERT(type == IR_FLOAT); + | ldr Rs(reg-IR_REG_FP_FIRST), [Rx(base_reg), Rx(tmp_reg)] + } + } +} + +static void ir_emit_load(ir_ctx *ctx, ir_type type, ir_reg reg, ir_ref src) +{ + int32_t offset; + ir_reg fp; + + if (IR_IS_CONST_REF(src)) { + if (IR_IS_TYPE_INT(type)) { + ir_insn *insn = &ctx->ir_base[src]; + + IR_ASSERT(insn->op != IR_STR && insn->op != IR_SYM && insn->op != IR_FUNC); + ir_emit_load_imm_int(ctx, type, reg, insn->val.i64); + } else { + ir_emit_load_imm_fp(ctx, type, reg, src); + } + } else { + offset = ir_ref_spill_slot(ctx, src, &fp); + if (IR_IS_TYPE_INT(type)) { + ir_emit_load_mem_int(ctx, type, reg, fp, offset); + } else { + ir_emit_load_mem_fp(ctx, type, reg, fp, offset); + } + } +} + +static void ir_emit_store_mem_int(ir_ctx *ctx, ir_type type, ir_reg base_reg, int32_t offset, ir_reg reg) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + if (aarch64_may_encode_addr_offset(offset, ir_type_size[type])) { + switch (ir_type_size[type]) { + default: + IR_ASSERT(0); + case 8: + | str Rx(reg), [Rx(base_reg), #offset] + break; + case 4: + | str Rw(reg), [Rx(base_reg), #offset] + break; + case 2: + | strh Rw(reg), [Rx(base_reg), #offset] + break; + case 1: + | strb Rw(reg), [Rx(base_reg), #offset] + break; + } + } else { + ir_reg tmp_reg = IR_REG_INT_TMP; /* reserved temporary register */ + + ir_emit_load_imm_int(ctx, IR_ADDR, tmp_reg, offset); + switch (ir_type_size[type]) { + default: + IR_ASSERT(0); + case 8: + | str Rx(reg), [Rx(base_reg), Rx(tmp_reg)] + break; + case 4: + | str Rw(reg), [Rx(base_reg), Rx(tmp_reg)] + break; + case 2: + | strh Rw(reg), [Rx(base_reg), Rx(tmp_reg)] + break; + case 1: + | strb Rw(reg), [Rx(base_reg), Rx(tmp_reg)] + break; + } + } +} + +static void ir_emit_store_mem_fp(ir_ctx *ctx, ir_type type, ir_reg base_reg, int32_t offset, ir_reg reg) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + if (aarch64_may_encode_addr_offset(offset, ir_type_size[type])) { + if (type == IR_DOUBLE) { + | str Rd(reg-IR_REG_FP_FIRST), [Rx(base_reg), #offset] + } else { + IR_ASSERT(type == IR_FLOAT); + | str Rs(reg-IR_REG_FP_FIRST), [Rx(base_reg), #offset] + } + } else { + ir_reg tmp_reg = IR_REG_INT_TMP; /* reserved temporary register */ + + ir_emit_load_imm_int(ctx, IR_ADDR, tmp_reg, offset); + if (type == IR_DOUBLE) { + | str Rd(reg-IR_REG_FP_FIRST), [Rx(base_reg), Rx(tmp_reg)] + } else { + IR_ASSERT(type == IR_FLOAT); + | str Rs(reg-IR_REG_FP_FIRST), [Rx(base_reg), Rx(tmp_reg)] + } + } +} + +static void ir_emit_store(ir_ctx *ctx, ir_type type, ir_ref dst, ir_reg reg) +{ + int32_t offset; + ir_reg fp; + + IR_ASSERT(dst >= 0); + offset = ir_ref_spill_slot(ctx, dst, &fp); + if (IR_IS_TYPE_INT(type)) { + ir_emit_store_mem_int(ctx, type, fp, offset, reg); + } else { + ir_emit_store_mem_fp(ctx, type, fp, offset, reg); + } +} + +static void ir_emit_mov(ir_ctx *ctx, ir_type type, ir_reg dst, ir_reg src) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + if (ir_type_size[type] == 8) { + if (dst == IR_REG_STACK_POINTER) { + | mov sp, Rx(src) + } else if (src == IR_REG_STACK_POINTER) { + | mov Rx(dst), sp + } else { + | mov Rx(dst), Rx(src) + } + } else { + | mov Rw(dst), Rw(src) + } +} + +static void ir_emit_fp_mov(ir_ctx *ctx, ir_type type, ir_reg dst, ir_reg src) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + if (ir_type_size[type] == 8) { + | fmov Rd(dst-IR_REG_FP_FIRST), Rd(src-IR_REG_FP_FIRST) + } else { + | fmov Rs(dst-IR_REG_FP_FIRST), Rs(src-IR_REG_FP_FIRST) + } +} + +static void ir_emit_prologue(ir_ctx *ctx) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + if (ctx->flags & IR_USE_FRAME_POINTER) { + | stp x29, x30, [sp, # (-(ctx->stack_frame_size+16))]! + | mov x29, sp + if (ctx->call_stack_size) { + | sub sp, sp, #(ctx->call_stack_size) + } + } else if (ctx->stack_frame_size + ctx->call_stack_size) { + if (ctx->fixed_stack_red_zone) { + IR_ASSERT(ctx->stack_frame_size + ctx->call_stack_size <= ctx->fixed_stack_red_zone); + } else { + | sub sp, sp, #(ctx->stack_frame_size + ctx->call_stack_size) + } + } + if (ctx->used_preserved_regs) { + int offset; + uint32_t i; + ir_reg prev = IR_REG_NONE; + ir_reg fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + ir_regset used_preserved_regs = (ir_regset)ctx->used_preserved_regs; + + if (ctx->flags & IR_USE_FRAME_POINTER) { + offset = ctx->stack_frame_size + sizeof(void*) * 2; + } else { + offset = ctx->stack_frame_size + ctx->call_stack_size; + } + for (i = 0; i < IR_REG_NUM; i++) { + if (IR_REGSET_IN(used_preserved_regs, i)) { + if (prev == IR_REG_NONE) { + prev = i; + } else if (i < IR_REG_FP_FIRST) { + offset -= sizeof(void*) * 2; + | stp Rx(prev), Rx(i), [Rx(fp), #offset] + prev = IR_REG_NONE; + } else { + if (prev < IR_REG_FP_FIRST) { + offset -= sizeof(void*); + | str Rx(prev), [Rx(fp), #offset] + offset -= sizeof(void*); + | str Rd(i-IR_REG_FP_FIRST), [Rx(fp), #offset] + } else { + offset -= sizeof(void*) * 2; + | stp Rd(prev-IR_REG_FP_FIRST), Rd(i-IR_REG_FP_FIRST), [Rx(fp), #offset] + } + prev = IR_REG_NONE; + } + } + } + if (prev != IR_REG_NONE) { + if (prev < IR_REG_FP_FIRST) { + offset -= sizeof(void*); + | str Rx(prev), [Rx(fp), #offset] + } else { + offset -= sizeof(void*); + | str Rd(prev-IR_REG_FP_FIRST), [Rx(fp), #offset] + } + } + } +} + +static void ir_emit_epilogue(ir_ctx *ctx) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + if (ctx->used_preserved_regs) { + int offset; + uint32_t i; + ir_reg prev = IR_REG_NONE; + ir_reg fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + ir_regset used_preserved_regs = (ir_regset)ctx->used_preserved_regs; + + if (ctx->flags & IR_USE_FRAME_POINTER) { + offset = ctx->stack_frame_size + sizeof(void*) * 2; + } else { + offset = ctx->stack_frame_size + ctx->call_stack_size; + } + for (i = 0; i < IR_REG_NUM; i++) { + if (IR_REGSET_IN(used_preserved_regs, i)) { + if (prev == IR_REG_NONE) { + prev = i; + } else if (i < IR_REG_FP_FIRST) { + offset -= sizeof(void*) * 2; + | ldp Rx(prev), Rx(i), [Rx(fp), #offset] + prev = IR_REG_NONE; + } else { + if (prev < IR_REG_FP_FIRST) { + offset -= sizeof(void*); + | ldr Rx(prev), [Rx(fp), #offset] + offset -= sizeof(void*); + | ldr Rd(i-IR_REG_FP_FIRST), [Rx(fp), #offset] + } else { + offset -= sizeof(void*) * 2; + | ldp Rd(prev-IR_REG_FP_FIRST), Rd(i-IR_REG_FP_FIRST), [Rx(fp), #offset] + } + prev = IR_REG_NONE; + } + } + } + if (prev != IR_REG_NONE) { + if (prev < IR_REG_FP_FIRST) { + offset -= sizeof(void*); + | ldr Rx(prev), [Rx(fp), #offset] + } else { + offset -= sizeof(void*); + | ldr Rd(prev-IR_REG_FP_FIRST), [Rx(fp), #offset] + } + } + } + + if (ctx->flags & IR_USE_FRAME_POINTER) { + if (ctx->call_stack_size || (ctx->flags & IR_HAS_ALLOCA)) { + | mov sp, x29 + } + | ldp x29, x30, [sp], # (ctx->stack_frame_size+16) + } else if (ctx->stack_frame_size + ctx->call_stack_size) { + if (ctx->fixed_stack_red_zone) { + IR_ASSERT(ctx->stack_frame_size + ctx->call_stack_size <= ctx->fixed_stack_red_zone); + } else { + | add sp, sp, #(ctx->stack_frame_size + ctx->call_stack_size) + } + } +} + +static void ir_emit_binop_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type type = insn->type; + ir_ref op1 = insn->op1; + ir_ref op2 = insn->op2; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + ir_reg op2_reg = ctx->regs[def][2]; + ir_reg tmp_reg; + + IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE); + + if (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(op1)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, type, op1_reg, op1); + } + if (op2_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(op2)) { + op2_reg = IR_REG_NUM(op2_reg); + if (op1 != op2) { + ir_emit_load(ctx, type, op2_reg, op2); + } + } + switch (insn->op) { + default: + IR_ASSERT(0 && "NIY binary op"); + case IR_ADD: + | ASM_REG_REG_REG_OP add, type, def_reg, op1_reg, op2_reg + break; + case IR_ADD_OV: + | ASM_REG_REG_REG_OP adds, type, def_reg, op1_reg, op2_reg + break; + case IR_SUB: + | ASM_REG_REG_REG_OP sub, type, def_reg, op1_reg, op2_reg + break; + case IR_SUB_OV: + | ASM_REG_REG_REG_OP subs, type, def_reg, op1_reg, op2_reg + break; + case IR_MUL: + | ASM_REG_REG_REG_OP mul, type, def_reg, op1_reg, op2_reg + break; + case IR_MUL_OV: + if (ir_type_size[type] == 8) { + if (IR_IS_TYPE_SIGNED(type)) { + tmp_reg = ctx->regs[def][3]; + IR_ASSERT(tmp_reg != IR_REG_NONE); + | smulh Rx(tmp_reg), Rx(op1_reg), Rx(op2_reg) + | mul Rx(def_reg), Rx(op1_reg), Rx(op2_reg) + | cmp Rx(tmp_reg), Rx(def_reg), asr #63 + } else { + tmp_reg = ctx->regs[def][3]; + IR_ASSERT(tmp_reg != IR_REG_NONE); + | umulh Rx(tmp_reg), Rx(op1_reg), Rx(op2_reg) + | mul Rx(def_reg), Rx(op1_reg), Rx(op2_reg) + | cmp Rx(tmp_reg), xzr + } + } else { + if (IR_IS_TYPE_SIGNED(type)) { + tmp_reg = ctx->regs[def][3]; + IR_ASSERT(tmp_reg != IR_REG_NONE); + | smull Rx(def_reg), Rw(op1_reg), Rw(op2_reg) + | asr Rx(tmp_reg), Rx(def_reg), #32 + | cmp Rx(tmp_reg), Rx(def_reg), asr #31 + } else { + | umull Rx(def_reg), Rw(op1_reg), Rw(op2_reg) + | cmp xzr, Rx(def_reg), lsr #32 + } + } + break; + case IR_DIV: + if (IR_IS_TYPE_SIGNED(type)) { + | ASM_REG_REG_REG_OP sdiv, type, def_reg, op1_reg, op2_reg + } else { + | ASM_REG_REG_REG_OP udiv, type, def_reg, op1_reg, op2_reg + } + break; + case IR_MOD: + tmp_reg = ctx->regs[def][3]; + IR_ASSERT(tmp_reg != IR_REG_NONE); + if (IR_IS_TYPE_SIGNED(type)) { + | ASM_REG_REG_REG_OP sdiv, type, tmp_reg, op1_reg, op2_reg + | ASM_REG_REG_REG_REG_OP msub, type, def_reg, tmp_reg, op2_reg, op1_reg + } else { + | ASM_REG_REG_REG_OP udiv, type, tmp_reg, op1_reg, op2_reg + | ASM_REG_REG_REG_REG_OP msub, type, def_reg, tmp_reg, op2_reg, op1_reg + } + break; + case IR_OR: + | ASM_REG_REG_REG_OP orr, type, def_reg, op1_reg, op2_reg + break; + case IR_AND: + | ASM_REG_REG_REG_OP and, type, def_reg, op1_reg, op2_reg + break; + case IR_XOR: + | ASM_REG_REG_REG_OP eor, type, def_reg, op1_reg, op2_reg + break; + } + } else { + IR_ASSERT(IR_IS_CONST_REF(op2)); + int32_t val = ctx->ir_base[op2].val.i32; + switch (insn->op) { + default: + IR_ASSERT(0 && "NIY binary op"); + case IR_ADD: + | ASM_REG_REG_IMM_OP add, type, def_reg, op1_reg, val + break; + case IR_ADD_OV: + | ASM_REG_REG_IMM_OP adds, type, def_reg, op1_reg, val + break; + case IR_SUB: + | ASM_REG_REG_IMM_OP sub, type, def_reg, op1_reg, val + break; + case IR_SUB_OV: + | ASM_REG_REG_IMM_OP subs, type, def_reg, op1_reg, val + break; + case IR_OR: + if (ir_type_size[type] == 8) { + uint64_t val = ctx->ir_base[op2].val.u64; + | ASM_REG_REG_IMM_OP orr, type, def_reg, op1_reg, val + } else { + | ASM_REG_REG_IMM_OP orr, type, def_reg, op1_reg, val + } + break; + case IR_AND: + if (ir_type_size[type] == 8) { + uint64_t val = ctx->ir_base[op2].val.u64; + | ASM_REG_REG_IMM_OP and, type, def_reg, op1_reg, val + } else { + | ASM_REG_REG_IMM_OP and, type, def_reg, op1_reg, val + } + break; + case IR_XOR: + if (ir_type_size[type] == 8) { + uint64_t val = ctx->ir_base[op2].val.u64; + | ASM_REG_REG_IMM_OP eor, type, def_reg, op1_reg, val + } else { + | ASM_REG_REG_IMM_OP eor, type, def_reg, op1_reg, val + } + break; + } + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } +} + +static void ir_emit_min_max_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type type = insn->type; + ir_ref op1 = insn->op1; + ir_ref op2 = insn->op2; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + ir_reg op2_reg = ctx->regs[def][2]; + + IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE && op2_reg != IR_REG_NONE); + + if (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(op1)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, type, op1_reg, op1); + } + if (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(op2)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, type, op2_reg, op2); + } + + if (op1 == op2) { + return; + } + + if (ir_type_size[type] == 8) { + | cmp Rx(op1_reg), Rx(op2_reg) + if (insn->op == IR_MIN) { + if (IR_IS_TYPE_SIGNED(type)) { + | csel Rx(def_reg), Rx(op1_reg), Rx(op2_reg), le + } else { + | csel Rx(def_reg), Rx(op1_reg), Rx(op2_reg), ls + } + } else { + IR_ASSERT(insn->op == IR_MAX); + if (IR_IS_TYPE_SIGNED(type)) { + | csel Rx(def_reg), Rx(op1_reg), Rx(op2_reg), ge + } else { + | csel Rx(def_reg), Rx(op1_reg), Rx(op2_reg), hs + } + } + } else { + | cmp Rw(op1_reg), Rw(op2_reg) + if (insn->op == IR_MIN) { + if (IR_IS_TYPE_SIGNED(type)) { + | csel Rw(def_reg), Rw(op1_reg), Rw(op2_reg), le + } else { + | csel Rw(def_reg), Rw(op1_reg), Rw(op2_reg), ls + } + } else { + IR_ASSERT(insn->op == IR_MAX); + if (IR_IS_TYPE_SIGNED(type)) { + | csel Rw(def_reg), Rw(op1_reg), Rw(op2_reg), ge + } else { + | csel Rw(def_reg), Rw(op1_reg), Rw(op2_reg), hs + } + } + } + + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } +} + +static void ir_emit_overflow(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_insn *math_insn = &ctx->ir_base[insn->op1]; + ir_type type = math_insn->type; + + IR_ASSERT(def_reg != IR_REG_NONE); + IR_ASSERT(IR_IS_TYPE_INT(type)); + if (math_insn->op == IR_MUL_OV) { + | cset Rw(def_reg), ne + } else if (IR_IS_TYPE_SIGNED(type)) { + | cset Rw(def_reg), vs + } else { + | cset Rw(def_reg), cs + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, insn->type, def, def_reg); + } +} + +static void ir_emit_overflow_and_branch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_insn *overflow_insn = &ctx->ir_base[insn->op2]; + ir_insn *math_insn = &ctx->ir_base[overflow_insn->op1]; + ir_type type = math_insn->type; + uint32_t true_block, false_block, next_block; + bool reverse = 0; + + ir_get_true_false_blocks(ctx, b, &true_block, &false_block, &next_block); + if (true_block == next_block) { + reverse = 1; + true_block = false_block; + false_block = 0; + } else if (false_block == next_block) { + false_block = 0; + } + + if (math_insn->op == IR_MUL_OV) { + if (reverse) { + | beq =>true_block + } else { + | bne =>true_block + } + } else if (IR_IS_TYPE_SIGNED(type)) { + if (reverse) { + | bvc =>true_block + } else { + | bvs =>true_block + } + } else { + if (reverse) { + | bcc =>true_block + } else { + | bcs =>true_block + } + } + if (false_block) { + | b =>false_block + } +} + +static void ir_emit_reg_binop_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_insn *op_insn = &ctx->ir_base[insn->op2]; + ir_type type = op_insn->type; + ir_ref op2 = op_insn->op2; + ir_reg op2_reg = ctx->regs[insn->op2][2]; + ir_reg reg; + + IR_ASSERT(insn->op == IR_RSTORE); + reg = insn->op3; + + if (op2_reg == IR_REG_NONE) { + ir_val *val = &ctx->ir_base[op2].val; + + IR_ASSERT(IR_IS_CONST_REF(op2)); + switch (op_insn->op) { + default: + IR_ASSERT(0 && "NIY binary op"); + case IR_ADD: + | ASM_REG_REG_IMM_OP add, type, reg, reg, val->i32 + break; + case IR_SUB: + | ASM_REG_REG_IMM_OP sub, type, reg, reg, val->i32 + break; + case IR_OR: + | ASM_REG_REG_IMM_OP orr, type, reg, reg, val->i32 + break; + case IR_AND: + | ASM_REG_REG_IMM_OP and, type, reg, reg, val->i32 + break; + case IR_XOR: + | ASM_REG_REG_IMM_OP eor, type, reg, reg, val->i32 + break; + } + } else { + if (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(op2)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, type, op2_reg, op2); + } + switch (op_insn->op) { + default: + IR_ASSERT(0 && "NIY binary op"); + case IR_ADD: + | ASM_REG_REG_REG_OP add, type, reg, reg, op2_reg + break; + case IR_SUB: + | ASM_REG_REG_REG_OP sub, type, reg, reg, op2_reg + break; + case IR_MUL: + | ASM_REG_REG_REG_OP mul, type, reg, reg, op2_reg + break; + case IR_OR: + | ASM_REG_REG_REG_OP orr, type, reg, reg, op2_reg + break; + case IR_AND: + | ASM_REG_REG_REG_OP and, type, reg, reg, op2_reg + break; + case IR_XOR: + | ASM_REG_REG_REG_OP eor, type, reg, reg, op2_reg + break; + } + } +} + +static void ir_emit_mul_div_mod_pwr2(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type type = insn->type; + ir_ref op1 = insn->op1; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + + IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE); + + if (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(op1)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, type, op1_reg, op1); + } + if (insn->op == IR_MUL) { + uint32_t shift = IR_LOG2(ctx->ir_base[insn->op2].val.u64); + if (shift == 1) { + | ASM_REG_REG_REG_OP add, insn->type, def_reg, op1_reg, op1_reg + } else { + | ASM_REG_REG_IMM_OP lsl, insn->type, def_reg, op1_reg, shift + } + } else if (insn->op == IR_DIV) { + uint32_t shift = IR_LOG2(ctx->ir_base[insn->op2].val.u64); + | ASM_REG_REG_IMM_OP lsr, insn->type, def_reg, op1_reg, shift + } else { + IR_ASSERT(insn->op == IR_MOD); + uint64_t mask = ctx->ir_base[insn->op2].val.u64 - 1; + | ASM_REG_REG_IMM_OP and, insn->type, def_reg, op1_reg, mask + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } +} + +static void ir_emit_shift(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type type = insn->type; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + ir_reg op2_reg = ctx->regs[def][2]; + ir_reg tmp_reg; + + IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE && op2_reg != IR_REG_NONE); + if (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(insn->op1)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, type, op1_reg, insn->op1); + } + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, type, op2_reg, insn->op2); + } + switch (insn->op) { + default: + IR_ASSERT(0); + case IR_SHL: + | ASM_REG_REG_REG_OP lsl, type, def_reg, op1_reg, op2_reg + break; + case IR_SHR: + | ASM_REG_REG_REG_OP lsr, type, def_reg, op1_reg, op2_reg + break; + case IR_SAR: + | ASM_REG_REG_REG_OP asr, type, def_reg, op1_reg, op2_reg + break; + case IR_ROL: + tmp_reg = ctx->regs[def][3]; + IR_ASSERT(tmp_reg != IR_REG_NONE); + if (ir_type_size[type] == 8) { + | neg Rx(tmp_reg), Rx(op2_reg) + | ror Rx(def_reg), Rx(op1_reg), Rx(tmp_reg) + } else { + | neg Rw(tmp_reg), Rw(op2_reg) + | ror Rw(def_reg), Rw(op1_reg), Rw(tmp_reg) + } + break; + case IR_ROR: + | ASM_REG_REG_REG_OP ror, type, def_reg, op1_reg, op2_reg + break; + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } +} + +static void ir_emit_shift_const(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + uint32_t shift = ctx->ir_base[insn->op2].val.u64; + ir_type type = insn->type; + ir_ref op1 = insn->op1; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + + IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE); + + if (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(op1)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, type, op1_reg, op1); + } + switch (insn->op) { + default: + IR_ASSERT(0); + case IR_SHL: + | ASM_REG_REG_IMM_OP lsl, type, def_reg, op1_reg, shift + break; + case IR_SHR: + | ASM_REG_REG_IMM_OP lsr, type, def_reg, op1_reg, shift + break; + case IR_SAR: + | ASM_REG_REG_IMM_OP asr, type, def_reg, op1_reg, shift + break; + case IR_ROL: + if (ir_type_size[type] == 8) { + shift = (64 - shift) % 64; + | ror Rx(def_reg), Rx(op1_reg), #shift + } else { + shift = (32 - shift) % 32; + | ror Rw(def_reg), Rw(op1_reg), #shift + } + break; + case IR_ROR: + | ASM_REG_REG_IMM_OP ror, type, def_reg, op1_reg, shift + break; + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } +} + +static void ir_emit_op_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type type = insn->type; + ir_ref op1 = insn->op1; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + + IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE); + + if (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(op1)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, type, op1_reg, op1); + } + if (insn->op == IR_NOT) { + if (insn->type == IR_BOOL) { + | ASM_REG_IMM_OP cmp, type, op1, 0 + | cset Rw(def_reg), eq + } else { + | ASM_REG_REG_OP mvn, insn->type, def_reg, op1_reg + } + } else if (insn->op == IR_NEG) { + | ASM_REG_REG_OP neg, insn->type, def_reg, op1_reg + } else if (insn->op == IR_ABS) { + if (ir_type_size[type] == 8) { + | cmp Rx(op1_reg), #0 + | cneg Rx(def_reg), Rx(op1_reg), lt + } else { + | cmp Rw(op1_reg), #0 + | cneg Rw(def_reg), Rw(op1_reg), lt + } + } else { + IR_ASSERT(insn->op == IR_BSWAP); + | ASM_REG_REG_OP rev, insn->type, def_reg, op1_reg + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } +} + +static void ir_emit_op_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type type = insn->type; + ir_ref op1 = insn->op1; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + + IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE); + + if (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(op1)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, type, op1_reg, op1); + } + if (insn->op == IR_NEG) { + if (type == IR_DOUBLE) { + | fneg Rd(def_reg-IR_REG_FP_FIRST), Rd(op1_reg-IR_REG_FP_FIRST) + } else { + IR_ASSERT(type == IR_FLOAT); + | fneg Rs(def_reg-IR_REG_FP_FIRST), Rs(op1_reg-IR_REG_FP_FIRST) + } + } else { + IR_ASSERT(insn->op == IR_ABS); + if (type == IR_DOUBLE) { + | fabs Rd(def_reg-IR_REG_FP_FIRST), Rd(op1_reg-IR_REG_FP_FIRST) + } else { + IR_ASSERT(type == IR_FLOAT); + | fabs Rs(def_reg-IR_REG_FP_FIRST), Rs(op1_reg-IR_REG_FP_FIRST) + } + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, insn->type, def, def_reg); + } +} + +static void ir_emit_binop_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type type = insn->type; + ir_ref op1 = insn->op1; + ir_ref op2 = insn->op2; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + ir_reg op2_reg = ctx->regs[def][2]; + + IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE && op2_reg != IR_REG_NONE); + if (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(op1)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, type, op1_reg, op1); + } + if (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(op2)) { + op2_reg = IR_REG_NUM(op2_reg); + if (op1 != op2) { + ir_emit_load(ctx, type, op2_reg, op2); + } + } + switch (insn->op) { + default: + IR_ASSERT(0 && "NIY binary op"); + case IR_ADD: + | ASM_FP_REG_REG_REG_OP fadd, type, def_reg, op1_reg, op2_reg + break; + case IR_SUB: + | ASM_FP_REG_REG_REG_OP fsub, type, def_reg, op1_reg, op2_reg + break; + case IR_MUL: + | ASM_FP_REG_REG_REG_OP fmul, type, def_reg, op1_reg, op2_reg + break; + case IR_DIV: + | ASM_FP_REG_REG_REG_OP fdiv, type, def_reg, op1_reg, op2_reg + break; + case IR_MIN: + | ASM_FP_REG_REG_REG_OP fmin, type, def_reg, op1_reg, op2_reg + break; + case IR_MAX: + | ASM_FP_REG_REG_REG_OP fmax, type, def_reg, op1_reg, op2_reg + break; + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, insn->type, def, def_reg); + } +} + +static void ir_emit_cmp_int_common(ir_ctx *ctx, ir_type type, ir_reg op1_reg, ir_ref op1, ir_reg op2_reg, ir_ref op2) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + IR_ASSERT(op1_reg != IR_REG_NONE); + if (op2_reg != IR_REG_NONE) { + if (ir_type_size[type] == 8) { + | cmp Rx(op1_reg), Rx(op2_reg) + } else { + | cmp Rw(op1_reg), Rw(op2_reg) + } + } else { + IR_ASSERT(IR_IS_CONST_REF(op2)); + int32_t val = ctx->ir_base[op2].val.i32; + + if (ir_type_size[type] == 8) { + | cmp Rx(op1_reg), #val + } else { + | cmp Rw(op1_reg), #val + } + } +} + +static void ir_emit_cmp_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type type = ctx->ir_base[insn->op1].type; + ir_op op = insn->op; + ir_ref op1 = insn->op1; + ir_ref op2 = insn->op2; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + ir_reg op2_reg = ctx->regs[def][2]; + + IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE); + if (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(op1)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, type, op1_reg, op1); + } + if (op2_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + if (op1 != op2) { + ir_emit_load(ctx, type, op2_reg, op2); + } + } + if (IR_IS_CONST_REF(op2)) { + ir_emit_load(ctx, type, op2_reg, op2); + } + } + if (IR_IS_CONST_REF(insn->op2) && ctx->ir_base[insn->op2].val.u64 == 0) { + if (op == IR_ULT) { + /* always false */ + ir_emit_load_imm_int(ctx, IR_BOOL, def_reg, 0); + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, insn->type, def, def_reg); + } + return; + } else if (op == IR_UGE) { + /* always true */ + ir_emit_load_imm_int(ctx, IR_BOOL, def_reg, 1); + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, insn->type, def, def_reg); + } + return; + } else if (op == IR_ULE) { + op = IR_EQ; + } else if (op == IR_UGT) { + op = IR_NE; + } + } + ir_emit_cmp_int_common(ctx, type, op1_reg, op1, op2_reg, op2); + switch (op) { + default: + IR_ASSERT(0 && "NIY binary op"); + case IR_EQ: + | cset Rw(def_reg), eq + break; + case IR_NE: + | cset Rw(def_reg), ne + break; + case IR_LT: + | cset Rw(def_reg), lt + break; + case IR_GE: + | cset Rw(def_reg), ge + break; + case IR_LE: + | cset Rw(def_reg), le + break; + case IR_GT: + | cset Rw(def_reg), gt + break; + case IR_ULT: + | cset Rw(def_reg), lo + break; + case IR_UGE: + | cset Rw(def_reg), hs + break; + case IR_ULE: + | cset Rw(def_reg), ls + break; + case IR_UGT: + | cset Rw(def_reg), hi + break; + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, insn->type, def, def_reg); + } +} + +static ir_op ir_emit_cmp_fp_common(ir_ctx *ctx, ir_ref cmp_ref, ir_insn *cmp_insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type type = ctx->ir_base[cmp_insn->op1].type; + ir_op op = cmp_insn->op; + ir_ref op1, op2; + ir_reg op1_reg, op2_reg; + + if (op == IR_LT || op == IR_LE) { + /* swap operands to avoid P flag check */ + op ^= 3; + op1 = cmp_insn->op2; + op2 = cmp_insn->op1; + op1_reg = ctx->regs[cmp_ref][2]; + op2_reg = ctx->regs[cmp_ref][1]; + } else { + op1 = cmp_insn->op1; + op2 = cmp_insn->op2; + op1_reg = ctx->regs[cmp_ref][1]; + op2_reg = ctx->regs[cmp_ref][2]; + } + + IR_ASSERT(op1_reg != IR_REG_NONE && op2_reg != IR_REG_NONE); + if (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(op1)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, type, op1_reg, op1); + } + if (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(op2)) { + op2_reg = IR_REG_NUM(op2_reg); + if (op1 != op2) { + ir_emit_load(ctx, type, op2_reg, op2); + } + } + if (type == IR_DOUBLE) { + | fcmp Rd(op1_reg-IR_REG_FP_FIRST), Rd(op2_reg-IR_REG_FP_FIRST) + } else { + IR_ASSERT(type == IR_FLOAT); + | fcmp Rs(op1_reg-IR_REG_FP_FIRST), Rs(op2_reg-IR_REG_FP_FIRST) + } + return op; +} + +static void ir_emit_cmp_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_op op = ir_emit_cmp_fp_common(ctx, def, insn); + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); +//??? ir_reg tmp_reg = ctx->regs[def][3]; // TODO: take into account vs flag + + IR_ASSERT(def_reg != IR_REG_NONE); + switch (op) { + default: + IR_ASSERT(0 && "NIY binary op"); + case IR_EQ: + | cset Rw(def_reg), eq + break; + case IR_NE: + | cset Rw(def_reg), ne + break; + case IR_LT: + | cset Rw(def_reg), mi + break; + case IR_GE: + | cset Rw(def_reg), ge + break; + case IR_LE: + | cset Rw(def_reg), ls + break; + case IR_GT: + | cset Rw(def_reg), gt + break; + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, insn->type, def, def_reg); + } +} + +static void ir_emit_jmp_true(ir_ctx *ctx, uint32_t b, ir_ref def) +{ + uint32_t true_block, false_block, next_block; + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + ir_get_true_false_blocks(ctx, b, &true_block, &false_block, &next_block); + if (true_block != next_block) { + | b =>true_block + } +} + +static void ir_emit_jmp_false(ir_ctx *ctx, uint32_t b, ir_ref def) +{ + uint32_t true_block, false_block, next_block; + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + ir_get_true_false_blocks(ctx, b, &true_block, &false_block, &next_block); + if (false_block != next_block) { + | b =>false_block + } +} + +static void ir_emit_jz(ir_ctx *ctx, uint8_t op, uint32_t b, ir_type type, ir_reg reg) +{ + uint32_t true_block, false_block, next_block; + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + ir_get_true_false_blocks(ctx, b, &true_block, &false_block, &next_block); + if (true_block == next_block) { + IR_ASSERT(op < IR_LT); + op ^= 1; // reverse + true_block = false_block; + false_block = 0; + } else if (false_block == next_block) { + false_block = 0; + } + + if (op == IR_EQ) { + if (ir_type_size[type] == 8) { + | cbz Rx(reg), =>true_block + } else { + | cbz Rw(reg), =>true_block + } + } else { + IR_ASSERT(op == IR_NE); + if (ir_type_size[type] == 8) { + | cbnz Rx(reg), =>true_block + } else { + | cbnz Rw(reg), =>true_block + } + } + if (false_block) { + | b =>false_block + } +} + +static void ir_emit_jcc(ir_ctx *ctx, uint8_t op, uint32_t b, ir_ref def, ir_insn *insn, bool int_cmp) +{ + uint32_t true_block, false_block, next_block; + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + ir_get_true_false_blocks(ctx, b, &true_block, &false_block, &next_block); + if (true_block == next_block) { + /* swap to avoid unconditional JMP */ + op ^= 1; // reverse + true_block = false_block; + false_block = 0; + } else if (false_block == next_block) { + false_block = 0; + } + + if (int_cmp) { + switch (op) { + default: + IR_ASSERT(0 && "NIY binary op"); + case IR_EQ: + | beq =>true_block + break; + case IR_NE: + | bne =>true_block + break; + case IR_LT: + | blt =>true_block + break; + case IR_GE: + | bge =>true_block + break; + case IR_LE: + | ble =>true_block + break; + case IR_GT: + | bgt =>true_block + break; + case IR_ULT: + | blo =>true_block + break; + case IR_UGE: + | bhs =>true_block + break; + case IR_ULE: + | bls =>true_block + break; + case IR_UGT: + | bhi =>true_block + break; + } + } else { + switch (op) { + default: + IR_ASSERT(0 && "NIY binary op"); + case IR_EQ: + | beq =>true_block + break; + case IR_NE: + | bne =>true_block + break; + case IR_LT: + | bmi =>true_block + break; + case IR_GE: + | bge =>true_block + break; + case IR_LE: + | bls =>true_block + break; + case IR_GT: + | bgt =>true_block + break; +// case IR_ULT: fprintf(stderr, "\tjb .LL%d\n", true_block); break; +// case IR_UGE: fprintf(stderr, "\tjae .LL%d\n", true_block); break; +// case IR_ULE: fprintf(stderr, "\tjbe .LL%d\n", true_block); break; +// case IR_UGT: fprintf(stderr, "\tja .LL%d\n", true_block); break; + } + } + if (false_block) { + | b =>false_block + } +} + +static void ir_emit_cmp_and_branch_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) +{ + ir_insn *cmp_insn = &ctx->ir_base[insn->op2]; + ir_op op = cmp_insn->op; + ir_type type = ctx->ir_base[cmp_insn->op1].type; + ir_ref op1 = cmp_insn->op1; + ir_ref op2 = cmp_insn->op2; + ir_reg op1_reg = ctx->regs[insn->op2][1]; + ir_reg op2_reg = ctx->regs[insn->op2][2]; + + if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, type, op1_reg, op1); + } + if (op1_reg != IR_REG_NONE && IR_IS_CONST_REF(op1)) { + ir_emit_load(ctx, type, op1_reg, op1); + } + if (op2_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + if (op1 != op2) { + ir_emit_load(ctx, type, op2_reg, op2); + } + } + if (IR_IS_CONST_REF(op2)) { + ir_emit_load(ctx, type, op2_reg, op2); + } + } + if (IR_IS_CONST_REF(op2) && ctx->ir_base[op2].val.u64 == 0) { + if (op == IR_ULT) { + /* always false */ + ir_emit_jmp_false(ctx, b, def); + return; + } else if (op == IR_UGE) { + /* always true */ + ir_emit_jmp_true(ctx, b, def); + return; + } else if (op == IR_ULE) { + op = IR_EQ; + } else if (op == IR_UGT) { + op = IR_NE; + } + if (op1_reg != IR_REG_NONE && (op == IR_EQ || op == IR_NE)) { + ir_emit_jz(ctx, op, b, type, op1_reg); + return; + } + } + ir_emit_cmp_int_common(ctx, type, op1_reg, op1, op2_reg, op2); + ir_emit_jcc(ctx, op, b, def, insn, 1); +} + +static void ir_emit_cmp_and_branch_fp(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) +{ + ir_op op = ir_emit_cmp_fp_common(ctx, insn->op2, &ctx->ir_base[insn->op2]); + ir_emit_jcc(ctx, op, b, def, insn, 0); +} + +static void ir_emit_if_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) +{ + ir_type type = ctx->ir_base[insn->op2].type; + ir_reg op2_reg = ctx->regs[def][2]; + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + if (IR_IS_CONST_REF(insn->op2)) { + uint32_t true_block, false_block, next_block; + + ir_get_true_false_blocks(ctx, b, &true_block, &false_block, &next_block); + if (ir_const_is_true(&ctx->ir_base[insn->op2])) { + if (true_block != next_block) { + | b =>true_block + } + } else { + if (false_block != next_block) { + | b =>false_block + } + } + return; + } + IR_ASSERT(op2_reg != IR_REG_NONE); + if (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(insn->op2)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, type, op2_reg, insn->op2); + } + | ASM_REG_IMM_OP cmp, type, op2_reg, 0 + ir_emit_jcc(ctx, IR_NE, b, def, insn, 1); +} + +static void ir_emit_cond(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type type = insn->type; + ir_ref op1 = insn->op1; + ir_ref op2 = insn->op2; + ir_ref op3 = insn->op3; + ir_type op1_type = ctx->ir_base[op1].type; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + ir_reg op2_reg = ctx->regs[def][2]; + ir_reg op3_reg = ctx->regs[def][3]; + + IR_ASSERT(def_reg != IR_REG_NONE); + + if (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(op2)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, type, op2_reg, op2); + if (op1 == op2) { + op1_reg = op2_reg; + } + if (op3 == op2) { + op3_reg = op2_reg; + } + } + if (op3 != op2 && (IR_REG_SPILLED(op3_reg) || IR_IS_CONST_REF(op3))) { + op3_reg = IR_REG_NUM(op3_reg); + ir_emit_load(ctx, type, op3_reg, op3); + if (op1 == op2) { + op1_reg = op3_reg; + } + } + if (op1 != op2 && op1 != op3 && (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(op1))) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, op1_type, op1_reg, op1); + } + + if (IR_IS_TYPE_INT(op1_type)) { + | ASM_REG_IMM_OP cmp, op1_type, op1_reg, 0 + } else{ + | ASM_FP_REG_IMM_OP fcmp, op1_type, op1_reg, 0.0 + } + + if (IR_IS_TYPE_INT(type)) { + if (ir_type_size[type] == 8) { + | csel Rx(def_reg), Rx(op2_reg), Rx(op3_reg), eq + } else { + | csel Rw(def_reg), Rw(op2_reg), Rw(op3_reg), eq + } + } else{ + if (type == IR_DOUBLE) { + | fcsel Rd(def_reg-IR_REG_FP_FIRST), Rd(op2_reg-IR_REG_FP_FIRST), Rd(op3_reg-IR_REG_FP_FIRST), eq + } else { + | fcsel Rs(def_reg-IR_REG_FP_FIRST), Rs(op2_reg-IR_REG_FP_FIRST), Rs(op3_reg-IR_REG_FP_FIRST), eq + } + } + + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } +} + +static void ir_emit_return_void(ir_ctx *ctx) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + ir_emit_epilogue(ctx); + | ret +} + +static void ir_emit_return_int(ir_ctx *ctx, ir_ref ref, ir_insn *insn) +{ + ir_reg op2_reg = ctx->regs[ref][2]; + + if (op2_reg != IR_REG_INT_RET1) { + ir_type type = ctx->ir_base[insn->op2].type; + + if (op2_reg != IR_REG_NONE && !IR_REG_SPILLED(op2_reg)) { + ir_emit_mov(ctx, type, IR_REG_INT_RET1, op2_reg); + } else { + ir_emit_load(ctx, type, IR_REG_INT_RET1, insn->op2); + } + } + ir_emit_return_void(ctx); +} + +static void ir_emit_return_fp(ir_ctx *ctx, ir_ref ref, ir_insn *insn) +{ + ir_reg op2_reg = ctx->regs[ref][2]; + ir_type type = ctx->ir_base[insn->op2].type; + + if (op2_reg != IR_REG_FP_RET1) { + if (op2_reg != IR_REG_NONE && !IR_REG_SPILLED(op2_reg)) { + ir_emit_fp_mov(ctx, type, IR_REG_FP_RET1, op2_reg); + } else { + ir_emit_load(ctx, type, IR_REG_FP_RET1, insn->op2); + } + } + ir_emit_return_void(ctx); +} + +static void ir_emit_sext(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_type dst_type = insn->type; + ir_type src_type = ctx->ir_base[insn->op1].type; + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + + IR_ASSERT(IR_IS_TYPE_INT(src_type)); + IR_ASSERT(IR_IS_TYPE_INT(dst_type)); + IR_ASSERT(ir_type_size[dst_type] > ir_type_size[src_type]); + IR_ASSERT(def_reg != IR_REG_NONE); + if ((op1_reg != IR_REG_NONE) && (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(insn->op1))) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, src_type, op1_reg, insn->op1); + } + + if (op1_reg != IR_REG_NONE) { + if (ir_type_size[src_type] == 1) { + if (ir_type_size[dst_type] == 2) { + | sxtb Rw(def_reg), Rw(op1_reg) + } else if (ir_type_size[dst_type] == 4) { + | sxtb Rw(def_reg), Rw(op1_reg) + } else { + IR_ASSERT(ir_type_size[dst_type] == 8); + | sxtb Rx(def_reg), Rx(op1_reg) + } + } else if (ir_type_size[src_type] == 2) { + if (ir_type_size[dst_type] == 4) { + | sxth Rw(def_reg), Rw(op1_reg) + } else { + IR_ASSERT(ir_type_size[dst_type] == 8); + | sxth Rx(def_reg), Rx(op1_reg) + } + } else { + IR_ASSERT(ir_type_size[src_type] == 4); + IR_ASSERT(ir_type_size[dst_type] == 8); + | sxtw Rx(def_reg), Rw(op1_reg) + } + } else if (IR_IS_CONST_REF(insn->op1)) { + IR_ASSERT(0); + } else { + ir_reg fp; + int32_t offset = ir_ref_spill_slot(ctx, insn->op1, &fp); + + if (ir_type_size[src_type] == 1) { + if (ir_type_size[dst_type] == 2) { + | ldrsb Rw(def_reg), [Rx(fp), #offset] + } else if (ir_type_size[dst_type] == 4) { + | ldrsb Rw(def_reg), [Rx(fp), #offset] + } else { + IR_ASSERT(ir_type_size[dst_type] == 8); + | ldrsb Rx(def_reg), [Rx(fp), #offset] + } + } else if (ir_type_size[src_type] == 2) { + if (ir_type_size[dst_type] == 4) { + | ldrsh Rw(def_reg), [Rx(fp), #offset] + } else { + IR_ASSERT(ir_type_size[dst_type] == 8); + | ldrsh Rx(def_reg), [Rx(fp), #offset] + } + } else { + IR_ASSERT(ir_type_size[src_type] == 4); + IR_ASSERT(ir_type_size[dst_type] == 8); + | ldrsw Rx(def_reg), [Rx(fp), #offset] + } + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, dst_type, def, def_reg); + } +} + +static void ir_emit_zext(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_type dst_type = insn->type; + ir_type src_type = ctx->ir_base[insn->op1].type; + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + + IR_ASSERT(IR_IS_TYPE_INT(src_type)); + IR_ASSERT(IR_IS_TYPE_INT(dst_type)); + IR_ASSERT(ir_type_size[dst_type] > ir_type_size[src_type]); + IR_ASSERT(def_reg != IR_REG_NONE); + if ((op1_reg != IR_REG_NONE) && (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(insn->op1))) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, src_type, op1_reg, insn->op1); + } + + if (op1_reg != IR_REG_NONE) { + if (ir_type_size[src_type] == 1) { + | uxtb Rw(def_reg), Rw(op1_reg) + } else if (ir_type_size[src_type] == 2) { + | uxth Rw(def_reg), Rw(op1_reg) + } else { + | mov Rw(def_reg), Rw(op1_reg) + } + } else if (IR_IS_CONST_REF(insn->op1)) { + IR_ASSERT(0); + } else { + ir_reg fp; + int32_t offset = ir_ref_spill_slot(ctx, insn->op1, &fp); + + if (ir_type_size[src_type] == 1) { + | ldrb Rw(def_reg), [Rx(fp), #offset] + } else if (ir_type_size[src_type] == 2) { + | ldrh Rw(def_reg), [Rx(fp), #offset] + } else { + IR_ASSERT(ir_type_size[src_type] == 4); + IR_ASSERT(ir_type_size[dst_type] == 8); + | ldr Rw(def_reg), [Rx(fp), #offset] + } + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, dst_type, def, def_reg); + } +} + +static void ir_emit_trunc(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type dst_type = insn->type; + ir_type src_type = ctx->ir_base[insn->op1].type; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + + IR_ASSERT(IR_IS_TYPE_INT(src_type)); + IR_ASSERT(IR_IS_TYPE_INT(dst_type)); + IR_ASSERT(ir_type_size[dst_type] < ir_type_size[src_type]); + IR_ASSERT(def_reg != IR_REG_NONE); + if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, src_type, op1_reg, insn->op1); + } + if (op1_reg != IR_REG_NONE) { + if (ir_type_size[dst_type] == 1) { + | and Rw(def_reg), Rw(op1_reg), #0xff + } else if (ir_type_size[dst_type] == 2) { + | and Rw(def_reg), Rw(op1_reg), #0xffff + } else if (op1_reg != def_reg) { + ir_emit_mov(ctx, dst_type, def_reg, op1_reg); + } + } else { + ir_emit_load(ctx, dst_type, def_reg, insn->op1); + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, dst_type, def, def_reg); + } +} + +static void ir_emit_bitcast(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_type dst_type = insn->type; + ir_type src_type = ctx->ir_base[insn->op1].type; + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + + IR_ASSERT(ir_type_size[dst_type] == ir_type_size[src_type]); + IR_ASSERT(def_reg != IR_REG_NONE); + if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, src_type, op1_reg, insn->op1); + } + if (IR_IS_TYPE_INT(src_type) && IR_IS_TYPE_INT(dst_type)) { + if (op1_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, src_type, op1_reg, insn->op1); + } + if (op1_reg != def_reg) { + ir_emit_mov(ctx, dst_type, def_reg, op1_reg); + } + } else { + ir_emit_load(ctx, dst_type, def_reg, insn->op1); + } + } else if (IR_IS_TYPE_FP(src_type) && IR_IS_TYPE_FP(dst_type)) { + if (op1_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, src_type, op1_reg, insn->op1); + } + if (op1_reg != def_reg) { + ir_emit_fp_mov(ctx, dst_type, def_reg, op1_reg); + } + } else { + ir_emit_load(ctx, dst_type, def_reg, insn->op1); + } + } else if (IR_IS_TYPE_FP(src_type)) { + IR_ASSERT(IR_IS_TYPE_INT(dst_type)); + if (op1_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, src_type, op1_reg, insn->op1); + } + if (src_type == IR_DOUBLE) { + | fmov Rx(def_reg), Rd(op1_reg-IR_REG_FP_FIRST) + } else { + IR_ASSERT(src_type == IR_FLOAT); + | fmov Rw(def_reg), Rs(op1_reg-IR_REG_FP_FIRST) + } + } else if (IR_IS_CONST_REF(insn->op1)) { + IR_ASSERT(0); //??? + } else { + ir_reg fp; + int32_t offset = ir_ref_spill_slot(ctx, insn->op1, &fp); + + if (src_type == IR_DOUBLE) { + | ldr Rx(def_reg), [Rx(fp), #offset] + } else { + IR_ASSERT(src_type == IR_FLOAT); + | ldr Rw(def_reg), [Rx(fp), #offset] + } + } + } else if (IR_IS_TYPE_FP(dst_type)) { + IR_ASSERT(IR_IS_TYPE_INT(src_type)); + if (op1_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, src_type, op1_reg, insn->op1); + } + if (dst_type == IR_DOUBLE) { + | fmov Rd(def_reg-IR_REG_FP_FIRST), Rx(op1_reg) + } else { + IR_ASSERT(dst_type == IR_FLOAT); + | fmov Rs(def_reg-IR_REG_FP_FIRST), Rw(op1_reg) + } + } else if (IR_IS_CONST_REF(insn->op1)) { + IR_ASSERT(0); //??? + } else { + ir_reg fp; + int32_t offset = ir_ref_spill_slot(ctx, insn->op1, &fp); + + if (dst_type == IR_DOUBLE) { + | ldr Rd(def_reg), [Rx(fp), #offset] + } else { + IR_ASSERT(src_type == IR_FLOAT); + | ldr Rs(def_reg), [Rx(fp), #offset] + } + } + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, dst_type, def, def_reg); + } +} + +static void ir_emit_int2fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_type dst_type = insn->type; + ir_type src_type = ctx->ir_base[insn->op1].type; + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + + IR_ASSERT(IR_IS_TYPE_INT(src_type)); + IR_ASSERT(IR_IS_TYPE_FP(dst_type)); + IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE); + if (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(insn->op1)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, src_type, op1_reg, insn->op1); + } + + if (ir_type_size[src_type] == 8) { + if (IR_IS_TYPE_SIGNED(src_type)) { + if (dst_type == IR_DOUBLE) { + | scvtf Rd(def_reg-IR_REG_FP_FIRST), Rx(op1_reg) + } else { + IR_ASSERT(dst_type == IR_FLOAT); + | scvtf Rs(def_reg-IR_REG_FP_FIRST), Rx(op1_reg) + } + } else { + if (dst_type == IR_DOUBLE) { + | ucvtf Rd(def_reg-IR_REG_FP_FIRST), Rx(op1_reg) + } else { + IR_ASSERT(dst_type == IR_FLOAT); + | ucvtf Rs(def_reg-IR_REG_FP_FIRST), Rx(op1_reg) + } + } + } else { + if (IR_IS_TYPE_SIGNED(src_type)) { + if (dst_type == IR_DOUBLE) { + | scvtf Rd(def_reg-IR_REG_FP_FIRST), Rw(op1_reg) + } else { + IR_ASSERT(dst_type == IR_FLOAT); + | scvtf Rs(def_reg-IR_REG_FP_FIRST), Rw(op1_reg) + } + } else { + if (dst_type == IR_DOUBLE) { + | ucvtf Rd(def_reg-IR_REG_FP_FIRST), Rw(op1_reg) + } else { + IR_ASSERT(dst_type == IR_FLOAT); + | ucvtf Rs(def_reg-IR_REG_FP_FIRST), Rw(op1_reg) + } + } + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, dst_type, def, def_reg); + } +} + +static void ir_emit_fp2int(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_type dst_type = insn->type; + ir_type src_type = ctx->ir_base[insn->op1].type; + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + + IR_ASSERT(IR_IS_TYPE_FP(src_type)); + IR_ASSERT(IR_IS_TYPE_INT(dst_type)); + IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE); + if (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(insn->op1)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, src_type, op1_reg, insn->op1); + } + if (ir_type_size[dst_type] == 8) { + if (IR_IS_TYPE_SIGNED(dst_type)) { + if (src_type == IR_DOUBLE) { + | fcvtzs Rx(def_reg), Rd(op1_reg-IR_REG_FP_FIRST) + } else { + IR_ASSERT(src_type == IR_FLOAT); + | fcvtzs Rx(def_reg), Rs(op1_reg-IR_REG_FP_FIRST) + } + } else { + if (src_type == IR_DOUBLE) { + | fcvtzu Rx(def_reg), Rd(op1_reg-IR_REG_FP_FIRST) + } else { + IR_ASSERT(src_type == IR_FLOAT); + | fcvtzu Rx(def_reg), Rs(op1_reg-IR_REG_FP_FIRST) + } + } + } else { + if (IR_IS_TYPE_SIGNED(dst_type)) { + if (src_type == IR_DOUBLE) { + | fcvtzs Rw(def_reg), Rd(op1_reg-IR_REG_FP_FIRST) + } else { + IR_ASSERT(src_type == IR_FLOAT); + | fcvtzs Rw(def_reg), Rs(op1_reg-IR_REG_FP_FIRST) + } + } else { + if (src_type == IR_DOUBLE) { + | fcvtzu Rw(def_reg), Rd(op1_reg-IR_REG_FP_FIRST) + } else { + IR_ASSERT(src_type == IR_FLOAT); + | fcvtzu Rw(def_reg), Rs(op1_reg-IR_REG_FP_FIRST) + } + } + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, dst_type, def, def_reg); + } +} + +static void ir_emit_fp2fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_type dst_type = insn->type; + ir_type src_type = ctx->ir_base[insn->op1].type; + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + + IR_ASSERT(IR_IS_TYPE_FP(src_type)); + IR_ASSERT(IR_IS_TYPE_FP(dst_type)); + IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE); + if (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(insn->op1)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, src_type, op1_reg, insn->op1); + } + if (src_type == dst_type) { + if (op1_reg != def_reg) { + ir_emit_fp_mov(ctx, dst_type, def_reg, op1_reg); + } + } else if (src_type == IR_DOUBLE) { + | fcvt Rs(def_reg-IR_REG_FP_FIRST), Rd(op1_reg-IR_REG_FP_FIRST) + } else { + IR_ASSERT(src_type == IR_FLOAT); + | fcvt Rd(def_reg-IR_REG_FP_FIRST), Rs(op1_reg-IR_REG_FP_FIRST) + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, dst_type, def, def_reg); + } +} + +static void ir_emit_copy_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_ref type = insn->type; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + + IR_ASSERT(def_reg != IR_REG_NONE || op1_reg != IR_REG_NONE); + if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, type, op1_reg, insn->op1); + } + if (def_reg == op1_reg) { + /* same reg */ + } else if (def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE) { + ir_emit_mov(ctx, type, def_reg, op1_reg); + } else if (def_reg != IR_REG_NONE) { + ir_emit_load(ctx, type, def_reg, insn->op1); + } else if (op1_reg != IR_REG_NONE) { + ir_emit_store(ctx, type, def, op1_reg); + } else { + IR_ASSERT(0); + } + if (def_reg != IR_REG_NONE && IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } +} + +static void ir_emit_copy_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_type type = insn->type; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + + IR_ASSERT(def_reg != IR_REG_NONE || op1_reg != IR_REG_NONE); + if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, type, op1_reg, insn->op1); + } + if (def_reg == op1_reg) { + /* same reg */ + } else if (def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE) { + ir_emit_fp_mov(ctx, type, def_reg, op1_reg); + } else if (def_reg != IR_REG_NONE) { + ir_emit_load(ctx, type, def_reg, insn->op1); + } else if (op1_reg != IR_REG_NONE) { + ir_emit_store(ctx, type, def, op1_reg); + } else { + IR_ASSERT(0); + } + if (def_reg != IR_REG_NONE && IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } +} + +static void ir_emit_vaddr(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_ref type = insn->type; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + int32_t offset; + ir_reg fp; + + IR_ASSERT(def_reg != IR_REG_NONE); + offset = ir_var_spill_slot(ctx, insn->op1, &fp); + | add Rx(def_reg), Rx(fp), #offset + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } +} + +static void ir_emit_vload(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_insn *var_insn = &ctx->ir_base[insn->op2]; + ir_ref type = insn->type; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg fp; + int32_t offset; + + IR_ASSERT(var_insn->op == IR_VAR); + fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + offset = IR_SPILL_POS_TO_OFFSET(var_insn->op3); + if (def_reg == IR_REG_NONE && ir_is_same_mem_var(ctx, def, var_insn->op3)) { + return; // fake load + } + IR_ASSERT(def_reg != IR_REG_NONE); + if (IR_IS_TYPE_INT(type)) { + ir_emit_load_mem_int(ctx, type, def_reg, fp, offset); + } else { + ir_emit_load_mem_fp(ctx, type, def_reg, fp, offset); + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } +} + +static void ir_emit_vstore(ir_ctx *ctx, ir_ref ref, ir_insn *insn) +{ + ir_insn *var_insn = &ctx->ir_base[insn->op2]; + ir_insn *val_insn = &ctx->ir_base[insn->op3]; + ir_ref type = val_insn->type; + ir_reg op3_reg = ctx->regs[ref][3]; + ir_reg fp; + int32_t offset; + + IR_ASSERT(var_insn->op == IR_VAR); + fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + offset = IR_SPILL_POS_TO_OFFSET(var_insn->op3); + IR_ASSERT(op3_reg != IR_REG_NONE); + if (IR_REG_SPILLED(op3_reg) && ir_is_same_mem_var(ctx, insn->op3, var_insn->op3)) { + return; // fake store + } + if (IR_REG_SPILLED(op3_reg) || IR_IS_CONST_REF(insn->op3)) { + op3_reg = IR_REG_NUM(op3_reg); + ir_emit_load(ctx, type, op3_reg, insn->op3); + } + if (IR_IS_TYPE_INT(type)) { + ir_emit_store_mem_int(ctx, type, fp, offset, op3_reg); + } else { + ir_emit_store_mem_fp(ctx, type, fp, offset, op3_reg); + } +} + +static int32_t ir_fuse_addr(ir_ctx *ctx, ir_ref ref, ir_reg *preg1, ir_reg *preg2) +{ + ir_insn *addr_insn = &ctx->ir_base[ref]; + ir_reg reg; + + IR_ASSERT(addr_insn->op == IR_ADD); + IR_ASSERT(!IR_IS_CONST_REF(addr_insn->op1) && IR_IS_CONST_REF(addr_insn->op2)); + reg = ctx->regs[ref][1]; + if (IR_REG_SPILLED(reg)) { + reg = IR_REG_NUM(reg); + ir_emit_load(ctx, IR_ADDR, reg, addr_insn->op1); + } + *preg1 = reg; + *preg2 = IR_REG_NONE; // TODO: ??? + return ctx->ir_base[addr_insn->op2].val.i32; +} + +static void ir_emit_load_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_ref type = insn->type; + ir_reg op2_reg = ctx->regs[def][2]; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + + if (ctx->use_lists[def].count == 1) { + /* dead load */ + return; + } + IR_ASSERT(def_reg != IR_REG_NONE); + if (!IR_IS_CONST_REF(insn->op2) && (ir_rule(ctx, insn->op2) & IR_FUSED)) { + ir_reg op1_reg; + int32_t offset = ir_fuse_addr(ctx, insn->op2, &op1_reg, &op2_reg); + + if (op2_reg == IR_REG_NONE) { + if (IR_REG_SPILLED(ctx->regs[def][0]) && ir_is_same_spill_slot(ctx, def, op1_reg, offset)) { + if (!ir_may_avoid_spill_load(ctx, def, def)) { + ir_emit_load_mem_int(ctx, type, def_reg, op1_reg, offset); + } + /* avoid load to the same location (valid only when register is not reused) */ + return; + } + ir_emit_load_mem_int(ctx, type, def_reg, op1_reg, offset); + } else { + switch (ir_type_size[type]) { + default: + IR_ASSERT(0); + case 8: + | ldr Rx(def_reg), [Rx(op1_reg), Rx(op2_reg)] + break; + case 4: + | ldr Rw(def_reg), [Rx(op1_reg), Rx(op2_reg)] + break; + case 2: + if (IR_IS_TYPE_SIGNED(type)) { + | ldrsh Rw(def_reg), [Rx(op1_reg), Rx(op2_reg)] + } else { + | ldrh Rw(def_reg), [Rx(op1_reg), Rx(op2_reg)] + } + break; + case 1: + if (IR_IS_TYPE_SIGNED(type)) { + | ldrsb Rw(def_reg), [Rx(op1_reg), Rx(op2_reg)] + } else { + | ldrb Rw(def_reg), [Rx(op1_reg), Rx(op2_reg)] + } + break; + } + } + } else { + if (op2_reg == IR_REG_NONE) { + op2_reg = def_reg; + } + if (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(insn->op2)) { + op2_reg = IR_REG_NUM(op2_reg); + IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR); + ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + } + ir_emit_load_mem_int(ctx, type, def_reg, op2_reg, 0); + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } +} + +static void ir_emit_load_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_ref type = insn->type; + ir_reg op2_reg = ctx->regs[def][2]; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + + if (ctx->use_lists[def].count == 1) { + /* dead load */ + return; + } + IR_ASSERT(def_reg != IR_REG_NONE); + if (!IR_IS_CONST_REF(insn->op2) && (ir_rule(ctx, insn->op2) & IR_FUSED)) { + ir_reg op1_reg; + int32_t offset = ir_fuse_addr(ctx, insn->op2, &op1_reg, &op2_reg); + + if (op2_reg == IR_REG_NONE) { + if (IR_REG_SPILLED(ctx->regs[def][0]) && ir_is_same_spill_slot(ctx, def, op1_reg, offset)) { + if (!ir_may_avoid_spill_load(ctx, def, def)) { + ir_emit_load_mem_fp(ctx, type, def_reg, op1_reg, offset); + } + /* avoid load to the same location (valid only when register is not reused) */ + return; + } + ir_emit_load_mem_fp(ctx, type, def_reg, op1_reg, offset); + } else { + if (type == IR_DOUBLE) { + | ldr Rd(def_reg-IR_REG_FP_FIRST), [Rx(op1_reg), Rx(op2_reg)] + } else { + IR_ASSERT(type == IR_FLOAT); + | ldr Rs(def_reg-IR_REG_FP_FIRST), [Rx(op1_reg), Rx(op2_reg)] + } + } + } else { + if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, type, op2_reg, insn->op2); + } + if (op2_reg == IR_REG_NONE) { + op2_reg = def_reg; + IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR); + ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + } + ir_emit_load_mem_fp(ctx, type, def_reg, op2_reg, 0); + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } +} + +static void ir_emit_store_int(ir_ctx *ctx, ir_ref ref, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_insn *val_insn = &ctx->ir_base[insn->op3]; + ir_ref type = val_insn->type; + ir_reg op2_reg = ctx->regs[ref][2]; + ir_reg op3_reg = ctx->regs[ref][3]; + + if (!IR_IS_CONST_REF(insn->op2) && (ir_rule(ctx, insn->op2) & IR_FUSED)) { + ir_reg op1_reg; + int32_t offset = ir_fuse_addr(ctx, insn->op2, &op1_reg, &op2_reg); + + if (op2_reg == IR_REG_NONE) { + if (!IR_IS_CONST_REF(insn->op3) && IR_REG_SPILLED(op3_reg) && ir_is_same_spill_slot(ctx, insn->op3, op1_reg, offset)) { + if (!ir_may_avoid_spill_load(ctx, insn->op3, ref)) { + op3_reg = IR_REG_NUM(op3_reg); + ir_emit_load(ctx, type, op3_reg, insn->op3); + } + /* avoid store to the same location */ + return; + } + if (op3_reg == IR_REG_NONE) { + IR_ASSERT(IR_IS_CONST_REF(insn->op3) && ctx->ir_base[insn->op3].val.i64 == 0); + op3_reg = IR_REG_ZR; + } else if (IR_REG_SPILLED(op3_reg) || IR_IS_CONST_REF(insn->op3)) { + op3_reg = IR_REG_NUM(op3_reg); + ir_emit_load(ctx, type, op3_reg, insn->op3); + } + ir_emit_store_mem_int(ctx, type, op1_reg, offset, op3_reg); + } else { + if (op3_reg == IR_REG_NONE) { + IR_ASSERT(IR_IS_CONST_REF(insn->op3) && ctx->ir_base[insn->op3].val.i64 == 0); + op3_reg = IR_REG_ZR; + } else if (IR_REG_SPILLED(op3_reg) || IR_IS_CONST_REF(insn->op3)) { + op3_reg = IR_REG_NUM(op3_reg); + ir_emit_load(ctx, type, op3_reg, insn->op3); + } + switch (ir_type_size[type]) { + default: + IR_ASSERT(0); + case 8: + | str Rx(op3_reg), [Rx(op1_reg), Rx(op2_reg)] + break; + case 4: + | str Rw(op3_reg), [Rx(op1_reg), Rx(op2_reg)] + break; + case 2: + | strh Rw(op3_reg), [Rx(op1_reg), Rx(op2_reg)] + break; + case 1: + | strb Rw(op3_reg), [Rx(op1_reg), Rx(op2_reg)] + break; + } + } + } else { + IR_ASSERT(op2_reg != IR_REG_NONE); + if (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(insn->op2)) { + op2_reg = IR_REG_NUM(op2_reg); + IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR); + ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + } + if (op3_reg == IR_REG_NONE) { + IR_ASSERT(IR_IS_CONST_REF(insn->op3) && ctx->ir_base[insn->op3].val.i64 == 0); + op3_reg = IR_REG_ZR; + } else if (IR_REG_SPILLED(op3_reg) || IR_IS_CONST_REF(insn->op3)) { + op3_reg = IR_REG_NUM(op3_reg); + ir_emit_load(ctx, type, op3_reg, insn->op3); + } + ir_emit_store_mem_int(ctx, type, op2_reg, 0, op3_reg); + } +} + +static void ir_emit_store_fp(ir_ctx *ctx, ir_ref ref, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_ref type = ctx->ir_base[insn->op3].type; + ir_reg op2_reg = ctx->regs[ref][2]; + ir_reg op3_reg = ctx->regs[ref][3]; + + IR_ASSERT(op3_reg != IR_REG_NONE); + if (!IR_IS_CONST_REF(insn->op2) && (ir_rule(ctx, insn->op2) & IR_FUSED)) { + ir_reg op1_reg; + int32_t offset = ir_fuse_addr(ctx, insn->op2, &op1_reg, &op2_reg); + + if (op2_reg == IR_REG_NONE) { + if (!IR_IS_CONST_REF(insn->op3) && IR_REG_SPILLED(op3_reg) && ir_is_same_spill_slot(ctx, insn->op3, op1_reg, offset)) { + if (!ir_may_avoid_spill_load(ctx, insn->op3, ref)) { + op3_reg = IR_REG_NUM(op3_reg); + ir_emit_load(ctx, type, op3_reg, insn->op3); + } + /* avoid store to the same location */ + return; + } + if (IR_REG_SPILLED(op3_reg) || IR_IS_CONST_REF(insn->op3)) { + op3_reg = IR_REG_NUM(op3_reg); + ir_emit_load(ctx, type, op3_reg, insn->op3); + } + ir_emit_store_mem_fp(ctx, type, op1_reg, offset, op3_reg); + } else { + if (IR_REG_SPILLED(op3_reg) || IR_IS_CONST_REF(insn->op3)) { + op3_reg = IR_REG_NUM(op3_reg); + ir_emit_load(ctx, type, op3_reg, insn->op3); + } + if (type == IR_DOUBLE) { + | str Rd(op3_reg-IR_REG_FP_FIRST), [Rx(op1_reg), Rx(op2_reg)] + } else { + IR_ASSERT(type == IR_FLOAT); + | str Rs(op3_reg-IR_REG_FP_FIRST), [Rx(op1_reg), Rx(op2_reg)] + } + } + } else { + if (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(insn->op2)) { + op2_reg = IR_REG_NUM(op2_reg); + IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR); + ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + } + if (IR_REG_SPILLED(op3_reg) || IR_IS_CONST_REF(insn->op3)) { + op3_reg = IR_REG_NUM(op3_reg); + ir_emit_load(ctx, type, op3_reg, insn->op3); + } + ir_emit_store_mem_fp(ctx, type, op2_reg, 0, op3_reg); + } +} + +static void ir_emit_rload(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_reg src_reg = insn->op2; + ir_type type = insn->type; + + if (IR_REGSET_IN(IR_REGSET_UNION((ir_regset)ctx->fixed_regset, IR_REGSET_FIXED), src_reg)) { + if (ctx->vregs[def] + && ctx->live_intervals[ctx->vregs[def]] + && ctx->live_intervals[ctx->vregs[def]]->stack_spill_pos != -1) { + ir_emit_store(ctx, type, def, src_reg); + } + } else { + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + + if (def_reg == IR_REG_NONE) { + /* op3 is used as a flag that the value is already stored in memory. + * If op3 is set we don't have to store the value once again (in case of spilling) + */ + if (!insn->op3 || !ir_is_same_spill_slot(ctx, def, ctx->spill_base, insn->op3)) { + ir_emit_store(ctx, type, def, src_reg); + } + } else { + if (src_reg != def_reg) { + if (IR_IS_TYPE_INT(type)) { + ir_emit_mov(ctx, type, def_reg, src_reg); + } else { + IR_ASSERT(IR_IS_TYPE_FP(type)); + ir_emit_fp_mov(ctx, type, def_reg, src_reg); + } + } + if (IR_REG_SPILLED(ctx->regs[def][0]) + && (!insn->op3 || !ir_is_same_spill_slot(ctx, def, ctx->spill_base, insn->op3))) { + ir_emit_store(ctx, type, def, def_reg); + } + } + } +} + +static void ir_emit_rstore(ir_ctx *ctx, ir_ref ref, ir_insn *insn) +{ + ir_ref type = ctx->ir_base[insn->op2].type; + ir_reg op2_reg = ctx->regs[ref][2]; + ir_reg dst_reg = insn->op3; + + if (op2_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, type, op2_reg, insn->op2); + } + if (op2_reg != dst_reg) { + if (IR_IS_TYPE_INT(type)) { + ir_emit_mov(ctx, type, dst_reg, op2_reg); + } else { + IR_ASSERT(IR_IS_TYPE_FP(type)); + ir_emit_fp_mov(ctx, type, dst_reg, op2_reg); + } + } + } else { + ir_emit_load(ctx, type, dst_reg, insn->op2); + } +} + +static void ir_emit_alloca(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + + if (IR_IS_CONST_REF(insn->op2)) { + ir_insn *val = &ctx->ir_base[insn->op2]; + int32_t size = val->val.i32; + + IR_ASSERT(IR_IS_TYPE_INT(val->type)); + IR_ASSERT(IR_IS_TYPE_UNSIGNED(val->type) || val->val.i64 > 0); + + if (ctx->flags & IR_HAS_CALLS) { + /* Stack must be 16 byte aligned */ + size = IR_ALIGNED_SIZE(size, 16); + } else { + size = IR_ALIGNED_SIZE(size, 8); + } + | sub sp, sp, #size + if (!(ctx->flags & IR_USE_FRAME_POINTER)) { + ctx->call_stack_size += size; + } + } else { + int32_t alignment = (ctx->flags & IR_HAS_CALLS) ? 16 : 8; + ir_reg op2_reg = ctx->regs[def][2]; + ir_type type = ctx->ir_base[insn->op2].type; + + IR_ASSERT(ctx->flags & IR_FUNCTION); + IR_ASSERT(def_reg != IR_REG_NONE && op2_reg != IR_REG_NONE); + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, type, op2_reg, insn->op2); + } + | add Rx(def_reg), Rx(op2_reg), #(alignment-1) + | and Rx(def_reg), Rx(def_reg), #(~(alignment-1)) + | sub sp, sp, Rx(def_reg); + } + if (def_reg != IR_REG_NONE) { + | mov Rx(def_reg), sp + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, insn->type, def, def_reg); + } + } else { + ir_emit_store(ctx, IR_ADDR, def, IR_REG_STACK_POINTER); + } +} + +static void ir_emit_afree(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + if (IR_IS_CONST_REF(insn->op2)) { + ir_insn *val = &ctx->ir_base[insn->op2]; + int32_t size = val->val.i32; + + IR_ASSERT(IR_IS_TYPE_INT(val->type)); + IR_ASSERT(IR_IS_TYPE_UNSIGNED(val->type) || val->val.i64 > 0); + + if (ctx->flags & IR_HAS_CALLS) { + /* Stack must be 16 byte aligned */ + size = IR_ALIGNED_SIZE(size, 16); + } else { + size = IR_ALIGNED_SIZE(size, 8); + } + | add sp, sp, #size + if (!(ctx->flags & IR_USE_FRAME_POINTER)) { + ctx->call_stack_size -= size; + } + } else { +// int32_t alignment = (ctx->flags & IR_HAS_CALLS) ? 16 : 8; + ir_reg op2_reg = ctx->regs[def][2]; + ir_type type = ctx->ir_base[insn->op2].type; + + IR_ASSERT(ctx->flags & IR_FUNCTION); + IR_ASSERT(op2_reg != IR_REG_NONE); + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, type, op2_reg, insn->op2); + } + + // TODO: alignment + + | add sp, sp, Rx(op2_reg); + } +} + +static void ir_emit_switch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type type; + ir_block *bb; + ir_insn *use_insn, *val; + uint32_t n, *p, use_block; + int i; + int label, default_label = 0; + int count = 0; + ir_val min, max; + ir_reg op1_reg, op2_reg, tmp_reg; + + type = ctx->ir_base[insn->op2].type; + if (IR_IS_TYPE_SIGNED(type)) { + min.u64 = 0x7fffffffffffffff; + max.u64 = 0x8000000000000000; + } else { + min.u64 = 0xffffffffffffffff; + max.u64 = 0x0; + } + + bb = &ctx->cfg_blocks[b]; + p = &ctx->cfg_edges[bb->successors]; + for (n = bb->successors_count; n != 0; p++, n--) { + use_block = *p; + use_insn = &ctx->ir_base[ctx->cfg_blocks[use_block].start]; + if (use_insn->op == IR_CASE_VAL) { + val = &ctx->ir_base[use_insn->op2]; + if (IR_IS_TYPE_SIGNED(type)) { + IR_ASSERT(IR_IS_TYPE_SIGNED(val->type)); + min.i64 = IR_MIN(min.i64, val->val.i64); + max.i64 = IR_MAX(max.i64, val->val.i64); + } else { + IR_ASSERT(!IR_IS_TYPE_SIGNED(val->type)); + min.u64 = (int64_t)IR_MIN(min.u64, val->val.u64); + max.u64 = (int64_t)IR_MAX(max.u64, val->val.u64); + } + count++; + } else { + IR_ASSERT(use_insn->op == IR_CASE_DEFAULT); + default_label = ir_skip_empty_target_blocks(ctx, use_block); + } + } + + op1_reg = ctx->regs[def][1]; + op2_reg = ctx->regs[def][2]; + tmp_reg = ctx->regs[def][3]; + + IR_ASSERT(op2_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE); + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, type, op2_reg, insn->op2); + } else if (IR_IS_CONST_REF(insn->op2)) { + ir_emit_load(ctx, type, op2_reg, insn->op2); + } + + /* Generate a table jmp or a sequence of calls */ + if ((max.i64-min.i64) < count * 8) { + int *labels = ir_mem_malloc(sizeof(int) * (max.i64 - min.i64 + 1)); + + for (i = 0; i <= (max.i64 - min.i64); i++) { + labels[i] = default_label; + } + p = &ctx->cfg_edges[bb->successors]; + for (n = bb->successors_count; n != 0; p++, n--) { + use_block = *p; + use_insn = &ctx->ir_base[ctx->cfg_blocks[use_block].start]; + if (use_insn->op == IR_CASE_VAL) { + val = &ctx->ir_base[use_insn->op2]; + label = ir_skip_empty_target_blocks(ctx, use_block); + labels[val->val.i64 - min.i64] = label; + } + } + + if (aarch64_may_encode_imm12(max.i64)) { + | ASM_REG_IMM_OP cmp, type, op2_reg, max.i64 + } else { + ir_emit_load_imm_int(ctx, type, tmp_reg, max.i64); + | ASM_REG_REG_OP cmp, type, op2_reg, tmp_reg + } + if (IR_IS_TYPE_SIGNED(type)) { + | bgt =>default_label + } else { + | bhi =>default_label + } + + if (op1_reg == IR_REG_NONE) { + op1_reg = op2_reg; + } + if (aarch64_may_encode_imm12(min.i64)) { + | ASM_REG_REG_IMM_OP subs, type, op1_reg, op2_reg, min.i64 + } else { + ir_emit_load_imm_int(ctx, type, tmp_reg, min.i64); + | ASM_REG_REG_REG_OP subs, type, op1_reg, op2_reg, tmp_reg + } + if (IR_IS_TYPE_SIGNED(type)) { + | blt =>default_label + } else { + | blo =>default_label + } + | adr Rx(tmp_reg), >1 + | ldr Rx(tmp_reg), [Rx(tmp_reg), Rx(op1_reg), lsl #3] + | br Rx(tmp_reg) + |.jmp_table + if (!data->jmp_table_label) { + data->jmp_table_label = ctx->cfg_blocks_count + ctx->consts_count + 3; + |=>data->jmp_table_label: + } + |.align 8 + |1: + for (i = 0; i <= (max.i64 - min.i64); i++) { + int b = labels[i]; + ir_block *bb = &ctx->cfg_blocks[b]; + ir_insn *insn = &ctx->ir_base[bb->end]; + + if (insn->op == IR_IJMP && IR_IS_CONST_REF(insn->op2)) { + ir_ref prev = ctx->prev_ref[bb->end]; + if (prev != bb->start && ctx->ir_base[prev].op == IR_SNAPSHOT) { + prev = ctx->prev_ref[prev]; + } + if (prev == bb->start) { + void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op2]); + + | .addr &addr + if (ctx->ir_base[bb->start].op != IR_CASE_DEFAULT) { + bb->flags |= IR_BB_EMPTY; + } + continue; + } + } + | .addr =>b + } + |.code + ir_mem_free(labels); + } else { + p = &ctx->cfg_edges[bb->successors]; + for (n = bb->successors_count; n != 0; p++, n--) { + use_block = *p; + use_insn = &ctx->ir_base[ctx->cfg_blocks[use_block].start]; + if (use_insn->op == IR_CASE_VAL) { + val = &ctx->ir_base[use_insn->op2]; + label = ir_skip_empty_target_blocks(ctx, use_block); + if (aarch64_may_encode_imm12(val->val.i64)) { + | ASM_REG_IMM_OP cmp, type, op2_reg, val->val.i64 + } else { + ir_emit_load_imm_int(ctx, type, tmp_reg, val->val.i64); + | ASM_REG_REG_OP cmp, type, op2_reg, tmp_reg + + } + | beq =>label + } + } + if (default_label) { + | b =>default_label + } + } +} + +static int ir_parallel_copy(ir_ctx *ctx, ir_copy *copies, int count, ir_reg tmp_reg, ir_reg tmp_fp_reg) +{ + int i; + int8_t *pred, *loc, *types; + ir_reg to, from_reg, c; + ir_type type; + ir_regset todo, ready; + ir_reg last_reg = IR_REG_NONE, last_fp_reg = IR_REG_NONE; + + loc = ir_mem_malloc(IR_REG_NUM * 3 * sizeof(int8_t)); + pred = loc + IR_REG_NUM; + types = pred + IR_REG_NUM; + memset(loc, IR_REG_NONE, IR_REG_NUM * 2 * sizeof(int8_t)); + todo = IR_REGSET_EMPTY; + ready = IR_REGSET_EMPTY; + + for (i = 0; i < count; i++) { + from_reg = copies[i].from; + to = copies[i].to; + if (from_reg != to) { + loc[from_reg] = from_reg; + pred[to] = from_reg; + types[from_reg] = copies[i].type; + if (to == tmp_reg) { + IR_ASSERT(last_reg == IR_REG_NONE); + last_reg = to; + } else if (to == tmp_fp_reg) { + IR_ASSERT(last_fp_reg == IR_REG_NONE); + last_fp_reg = to; + } else { + IR_ASSERT(!IR_REGSET_IN(todo, to)); + IR_REGSET_INCL(todo, to); + } + } + } + + IR_REGSET_FOREACH(todo, i) { + if (loc[i] == IR_REG_NONE) { + IR_REGSET_INCL(ready, i); + } + } IR_REGSET_FOREACH_END(); + + while (1) { + while (ready != IR_REGSET_EMPTY) { + to = ir_regset_pop_first(&ready); + from_reg = pred[to]; + c = loc[from_reg]; + type = types[from_reg]; + if (IR_IS_TYPE_INT(type)) { + ir_emit_mov(ctx, type, to, c); + } else { + ir_emit_fp_mov(ctx, type, to, c); + } + IR_REGSET_EXCL(todo, to); + loc[from_reg] = to; + if (from_reg == c && pred[from_reg] != IR_REG_NONE) { + IR_REGSET_INCL(ready, from_reg); + } + } + + if (todo == IR_REGSET_EMPTY) { + break; + } + to = ir_regset_pop_first(&todo); + from_reg = pred[to]; + IR_ASSERT(to != loc[from_reg]); + type = types[from_reg]; + if (IR_IS_TYPE_INT(type)) { + IR_ASSERT(tmp_reg != IR_REG_NONE); + IR_ASSERT(tmp_reg >= IR_REG_GP_FIRST && tmp_reg <= IR_REG_GP_LAST); + ir_emit_mov(ctx, type, tmp_reg, to); + loc[to] = tmp_reg; + } else { + IR_ASSERT(tmp_fp_reg != IR_REG_NONE); + IR_ASSERT(tmp_fp_reg >= IR_REG_FP_FIRST && tmp_fp_reg <= IR_REG_FP_LAST); + ir_emit_fp_mov(ctx, type, tmp_fp_reg, to); + loc[to] = tmp_fp_reg; + } + IR_REGSET_INCL(ready, to); + } + + if (last_reg != IR_REG_NONE) { + to = last_reg; + from_reg = pred[to]; + c = loc[from_reg]; + if (to != c) { + type = types[from_reg]; + IR_ASSERT(IR_IS_TYPE_INT(type)); + ir_emit_mov(ctx, type, to, c); + } + } + + if (last_fp_reg != IR_REG_NONE) { + to = last_fp_reg; + from_reg = pred[to]; + c = loc[from_reg]; + if (to != c) { + type = types[from_reg]; + IR_ASSERT(!IR_IS_TYPE_INT(type)); + ir_emit_fp_mov(ctx, type, to, c); + } + } + + ir_mem_free(loc); + + return 1; +} + +static int32_t ir_call_used_stack(ir_ctx *ctx, ir_insn *insn) +{ + int j, n; + ir_type type; + int int_param = 0; + int fp_param = 0; + int int_reg_params_count = IR_REG_INT_ARGS; + int fp_reg_params_count = IR_REG_FP_ARGS; + int32_t used_stack = 0; + + n = insn->inputs_count; + for (j = 3; j <= n; j++) { + type = ctx->ir_base[ir_insn_op(insn, j)].type; + if (IR_IS_TYPE_INT(type)) { + if (int_param >= int_reg_params_count) { + used_stack += IR_MAX(sizeof(void*), ir_type_size[type]); + } + int_param++; + } else { + IR_ASSERT(IR_IS_TYPE_FP(type)); + if (fp_param >= fp_reg_params_count) { + used_stack += IR_MAX(sizeof(void*), ir_type_size[type]); + } + fp_param++; + } + } + + return used_stack; +} + +static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg tmp_reg) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + int j, n; + ir_ref arg; + ir_insn *arg_insn; + uint8_t type; + ir_reg src_reg, dst_reg; + int int_param = 0; + int fp_param = 0; + int count = 0; + int int_reg_params_count = IR_REG_INT_ARGS; + int fp_reg_params_count = IR_REG_FP_ARGS; + const int8_t *int_reg_params = _ir_int_reg_params; + const int8_t *fp_reg_params = _ir_fp_reg_params; + int32_t used_stack, stack_offset = 0; + ir_copy *copies; + bool do_pass3 = 0; + /* For temporaries we may use any scratch registers except for registers used for parameters */ + ir_reg tmp_fp_reg = IR_REG_FP_LAST; /* Temporary register for FP loads and swap */ + + n = insn->inputs_count; + if (n < 3) { + return 0; + } + + if (tmp_reg == IR_REG_NONE) { + tmp_reg = IR_REG_IP0; + } + + if (insn->op == IR_CALL && (ctx->flags & IR_PREALLOCATED_STACK)) { + // TODO: support for preallocated stack + used_stack = 0; + } else { + used_stack = ir_call_used_stack(ctx, insn); + /* Stack must be 16 byte aligned */ + used_stack = IR_ALIGNED_SIZE(used_stack, 16); + if (ctx->fixed_call_stack_size && used_stack <= ctx->fixed_call_stack_size) { + used_stack = 0; + } else { + ctx->call_stack_size += used_stack; + if (used_stack) { + | sub sp, sp, #used_stack + } + } + } + + /* 1. move all register arguments that should be passed through stack + * and collect arguments that should be passed through registers */ + copies = ir_mem_malloc((n - 2) * sizeof(ir_copy)); + for (j = 3; j <= n; j++) { + arg = ir_insn_op(insn, j); + src_reg = ir_get_alocated_reg(ctx, def, j); + arg_insn = &ctx->ir_base[arg]; + type = arg_insn->type; + if (IR_IS_TYPE_INT(type)) { + if (int_param < int_reg_params_count) { + dst_reg = int_reg_params[int_param]; + } else { + dst_reg = IR_REG_NONE; /* pass argument through stack */ + } + int_param++; + } else { + IR_ASSERT(IR_IS_TYPE_FP(type)); + if (fp_param < fp_reg_params_count) { + dst_reg = fp_reg_params[fp_param]; + } else { + dst_reg = IR_REG_NONE; /* pass argument through stack */ + } + fp_param++; + } + if (dst_reg != IR_REG_NONE) { + if (IR_IS_CONST_REF(arg) || src_reg == IR_REG_NONE) { + /* delay CONST->REG and MEM->REG moves to third pass */ + do_pass3 = 1; + } else { + IR_ASSERT(src_reg != IR_REG_NONE); + if (IR_REG_SPILLED(src_reg)) { + src_reg = IR_REG_NUM(src_reg); + ir_emit_load(ctx, type, src_reg, arg); + } + if (src_reg != dst_reg) { + /* delay REG->REG moves to second pass */ + copies[count].type = type; + copies[count].from = src_reg; + copies[count].to = dst_reg; + count++; + } + } + } else { + /* Pass register arguments to stack (REG->MEM moves) */ + if (!IR_IS_CONST_REF(arg) && src_reg != IR_REG_NONE && !IR_REG_SPILLED(src_reg)) { + if (IR_IS_TYPE_INT(type)) { + ir_emit_store_mem_int(ctx, type, IR_REG_STACK_POINTER, stack_offset, src_reg); + } else { + ir_emit_store_mem_fp(ctx, type, IR_REG_STACK_POINTER, stack_offset, src_reg); + } + } else { + do_pass3 = 1; + } + stack_offset += IR_MAX(sizeof(void*), ir_type_size[type]); + } + } + + /* 2. move all arguments that should be passed from one register to another (REG->REG movs) */ + if (count) { + ir_parallel_copy(ctx, copies, count, tmp_reg, tmp_fp_reg); + } + ir_mem_free(copies); + + /* 3. move the remaining memory and immediate values */ + if (do_pass3) { + stack_offset = 0; + int_param = 0; + fp_param = 0; + for (j = 3; j <= n; j++) { + arg = ir_insn_op(insn, j); + src_reg = ir_get_alocated_reg(ctx, def, j); + arg_insn = &ctx->ir_base[arg]; + type = arg_insn->type; + if (IR_IS_TYPE_INT(type)) { + if (int_param < int_reg_params_count) { + dst_reg = int_reg_params[int_param]; + } else { + dst_reg = IR_REG_NONE; /* argument already passed through stack */ + } + int_param++; + } else { + IR_ASSERT(IR_IS_TYPE_FP(type)); + if (fp_param < fp_reg_params_count) { + dst_reg = fp_reg_params[fp_param]; + } else { + dst_reg = IR_REG_NONE; /* argument already passed through stack */ + } + fp_param++; + } + if (dst_reg != IR_REG_NONE) { + if (IR_IS_CONST_REF(arg) || src_reg == IR_REG_NONE) { + if (IR_IS_TYPE_INT(type)) { + if (IR_IS_CONST_REF(arg)) { + if (type == IR_ADDR) { + ir_insn *val_insn = &ctx->ir_base[arg]; + + if (val_insn->op == IR_STR) { + int label = ctx->cfg_blocks_count - arg; + + val_insn->const_flags |= IR_CONST_EMIT; + | adr Rx(dst_reg), =>label + continue; + } + IR_ASSERT(val_insn->op == IR_ADDR || val_insn->op == IR_FUNC_ADDR); + } else if (ir_type_size[type] == 1) { + type = IR_ADDR; + } + } + ir_emit_load(ctx, type, dst_reg, arg); + } else { + ir_emit_load(ctx, type, dst_reg, arg); + } + } + } else { + if (IR_IS_TYPE_INT(type)) { + if (IR_IS_CONST_REF(arg)) { + ir_insn *val_insn = &ctx->ir_base[arg]; + + if (val_insn->op == IR_STR) { + int label = ctx->cfg_blocks_count - arg; + + val_insn->const_flags |= IR_CONST_EMIT; + IR_ASSERT(tmp_reg != IR_REG_NONE); + | adr Rx(tmp_reg), =>label + | str Rx(tmp_reg), [sp, #stack_offset] + } else if (val_insn->op == IR_FUNC || val_insn->op == IR_SYM) { + IR_ASSERT(0 && "sym"); + } else { + IR_ASSERT(tmp_reg != IR_REG_NONE); + ir_emit_load_imm_int(ctx, type, tmp_reg, val_insn->val.i64); + | str Rx(tmp_reg), [sp, #stack_offset] + } + } else if (src_reg == IR_REG_NONE) { + IR_ASSERT(tmp_reg != IR_REG_NONE); + ir_emit_load(ctx, type, tmp_reg, arg); + ir_emit_store_mem_int(ctx, type, IR_REG_STACK_POINTER, stack_offset, tmp_reg); + } else if (IR_REG_SPILLED(src_reg)) { + src_reg = IR_REG_NUM(src_reg); + ir_emit_load(ctx, type, src_reg, arg); + ir_emit_store_mem_int(ctx, type, IR_REG_STACK_POINTER, stack_offset, src_reg); + } + } else { + if (IR_IS_CONST_REF(arg)) { + ir_emit_load(ctx, type, tmp_fp_reg, arg); + ir_emit_store_mem_fp(ctx, IR_DOUBLE, IR_REG_STACK_POINTER, stack_offset, tmp_fp_reg); + } else if (src_reg == IR_REG_NONE) { + IR_ASSERT(tmp_fp_reg != IR_REG_NONE); + ir_emit_load(ctx, type, tmp_fp_reg, arg); + ir_emit_store_mem_fp(ctx, IR_DOUBLE, IR_REG_STACK_POINTER, stack_offset, tmp_fp_reg); + } else if (IR_REG_SPILLED(src_reg)) { + src_reg = IR_REG_NUM(src_reg); + ir_emit_load(ctx, type, src_reg, arg); + ir_emit_store_mem_fp(ctx, type, IR_REG_STACK_POINTER, stack_offset, src_reg); + } + } + stack_offset += IR_MAX(sizeof(void*), ir_type_size[type]); + } + } + } + return used_stack; +} + +static void ir_emit_call(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_reg def_reg; + int32_t used_stack = ir_emit_arguments(ctx, def, insn, ctx->regs[def][1]); + + if (IR_IS_CONST_REF(insn->op2)) { + ir_insn *addr_insn = &ctx->ir_base[insn->op2]; + void *addr; + + IR_ASSERT(addr_insn->type == IR_ADDR); + if (addr_insn->op == IR_FUNC) { + addr = (ctx->loader && ctx->loader->resolve_sym_name) ? + ctx->loader->resolve_sym_name(ctx->loader, ir_get_str(ctx, addr_insn->val.i32)) : + ir_resolve_sym_name(ir_get_str(ctx, addr_insn->val.i32)); + } else { + IR_ASSERT(addr_insn->op == IR_ADDR || addr_insn->op == IR_FUNC_ADDR); + addr = (void*)addr_insn->val.addr; + } + if (aarch64_may_use_b(ctx, addr)) { + | bl &addr + } else { + ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, (intptr_t)addr); + | blr Rx(IR_REG_INT_TMP) + } + } else { + ir_reg op2_reg = ctx->regs[def][2]; + + IR_ASSERT(op2_reg != IR_REG_NONE); + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + } + | blr Rx(op2_reg) + } + + if (used_stack) { + | add sp, sp, #used_stack + ctx->call_stack_size -= used_stack; + } + + if (insn->type != IR_VOID) { + if (IR_IS_TYPE_INT(insn->type)) { + def_reg = IR_REG_NUM(ctx->regs[def][0]); + if (def_reg != IR_REG_NONE) { + if (def_reg != IR_REG_INT_RET1) { + ir_emit_mov(ctx, insn->type, def_reg, IR_REG_INT_RET1); + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, insn->type, def, def_reg); + } + } else if (ctx->use_lists[def].count > 1) { + ir_emit_store(ctx, insn->type, def, IR_REG_INT_RET1); + } + } else { + IR_ASSERT(IR_IS_TYPE_FP(insn->type)); + def_reg = IR_REG_NUM(ctx->regs[def][0]); + if (def_reg != IR_REG_NONE) { + if (def_reg != IR_REG_FP_RET1) { + ir_emit_fp_mov(ctx, insn->type, def_reg, IR_REG_FP_RET1); + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, insn->type, def, def_reg); + } + } else if (ctx->use_lists[def].count > 1) { + ir_emit_store(ctx, insn->type, def, IR_REG_FP_RET1); + } + } + } +} + +static void ir_emit_tailcall(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + int32_t used_stack = ir_emit_arguments(ctx, def, insn, ctx->regs[def][1]); + + if (used_stack != 0) { + ir_emit_call(ctx, def, insn); + ir_emit_return_void(ctx); + return; + } + + ir_emit_epilogue(ctx); + + if (IR_IS_CONST_REF(insn->op2)) { + ir_insn *addr_insn = &ctx->ir_base[insn->op2]; + void *addr; + + IR_ASSERT(addr_insn->type == IR_ADDR); + if (addr_insn->op == IR_FUNC) { + addr = (ctx->loader && ctx->loader->resolve_sym_name) ? + ctx->loader->resolve_sym_name(ctx->loader, ir_get_str(ctx, addr_insn->val.i32)) : + ir_resolve_sym_name(ir_get_str(ctx, addr_insn->val.i32)); + } else { + IR_ASSERT(addr_insn->op == IR_ADDR || addr_insn->op == IR_FUNC_ADDR); + addr = (void*)addr_insn->val.addr; + } + + if (aarch64_may_use_b(ctx, addr)) { + | b &addr + } else { + ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, (intptr_t)addr); + | br Rx(IR_REG_INT_TMP) + } + } else { + ir_reg op2_reg = ctx->regs[def][2]; + + IR_ASSERT(op2_reg != IR_REG_NONE); + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + } + | br Rx(op2_reg) + } +} + +static void ir_emit_ijmp(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_reg op2_reg = ctx->regs[def][2]; + + if (op2_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + } + | br Rx(op2_reg) + } else if (IR_IS_CONST_REF(insn->op2)) { + void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op2]); + + if (aarch64_may_use_b(ctx, addr)) { + | b &addr + } else { + ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, (intptr_t)addr); + | br Rx(IR_REG_INT_TMP) + } + } else { + IR_ASSERT(0); + } +} + +static void ir_emit_guard(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_reg op2_reg = ctx->regs[def][2]; + ir_type type = ctx->ir_base[insn->op2].type; + + IR_ASSERT(IR_IS_TYPE_INT(type)); + if (IR_IS_CONST_REF(insn->op2)) { + bool is_true = ir_ref_is_true(ctx, insn->op2); + + if ((insn->op == IR_GUARD && !is_true) || (insn->op == IR_GUARD_NOT && is_true)) { + if (IR_IS_CONST_REF(insn->op3)) { + void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]); + + if (aarch64_may_use_b(ctx, addr)) { + | b &addr + } else { + ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, (intptr_t)addr); + | br Rx(IR_REG_INT_TMP) + } + } else { + IR_ASSERT(0); + } + } + return; + } + + IR_ASSERT(op2_reg != IR_REG_NONE); + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, type, op2_reg, insn->op2); + } + + if (IR_IS_CONST_REF(insn->op3)) { + void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]); + + if (insn->op == IR_GUARD) { + if (ir_type_size[type] == 8) { + | cbz Rx(op2_reg), &addr + } else { + | cbz Rw(op2_reg), &addr + } + } else { + if (ir_type_size[type] == 8) { + | cbnz Rx(op2_reg), &addr + } else { + | cbnz Rw(op2_reg), &addr + } + } + } else { + IR_ASSERT(0); + } +} + +static void ir_emit_guard_jz(ir_ctx *ctx, uint8_t op, void *addr, ir_type type, ir_reg reg) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + if (op == IR_EQ) { + if (ir_type_size[type] == 8) { + | cbnz Rx(reg), &addr + } else { + | cbnz Rw(reg), &addr + } + } else { + IR_ASSERT(op == IR_NE); + if (ir_type_size[type] == 8) { + | cbz Rx(reg), &addr + } else { + | cbz Rw(reg), &addr + } + } +} + +static void ir_emit_guard_jcc(ir_ctx *ctx, uint8_t op, void *addr, bool int_cmp) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + if (int_cmp) { + switch (op) { + default: + IR_ASSERT(0 && "NIY binary op"); + case IR_EQ: + | beq &addr + break; + case IR_NE: + | bne &addr + break; + case IR_LT: + | blt &addr + break; + case IR_GE: + | bge &addr + break; + case IR_LE: + | ble &addr + break; + case IR_GT: + | bgt &addr + break; + case IR_ULT: + | blo &addr + break; + case IR_UGE: + | bhs &addr + break; + case IR_ULE: + | bls &addr + break; + case IR_UGT: + | bhi &addr + break; + } + } else { + switch (op) { + default: + IR_ASSERT(0 && "NIY binary op"); + case IR_EQ: + | beq &addr + break; + case IR_NE: + | bne &addr + break; + case IR_LT: + | bmi &addr + break; + case IR_GE: + | bge &addr + break; + case IR_LE: + | bls &addr + break; + case IR_GT: + | bgt &addr + break; +// case IR_ULT: fprintf(stderr, "\tjb .LL%d\n", true_block); break; +// case IR_UGE: fprintf(stderr, "\tjae .LL%d\n", true_block); break; +// case IR_ULE: fprintf(stderr, "\tjbe .LL%d\n", true_block); break; +// case IR_UGT: fprintf(stderr, "\tja .LL%d\n", true_block); break; + } + } +} + +static void ir_emit_guard_cmp_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_insn *cmp_insn = &ctx->ir_base[insn->op2]; + ir_op op = cmp_insn->op; + ir_type type = ctx->ir_base[cmp_insn->op1].type; + ir_ref op1 = cmp_insn->op1; + ir_ref op2 = cmp_insn->op2; + ir_reg op1_reg = ctx->regs[insn->op2][1]; + ir_reg op2_reg = ctx->regs[insn->op2][2]; + void *addr; + + if (op1_reg != IR_REG_NONE && (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(op1))) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, type, op1_reg, op1); + } + if (op2_reg != IR_REG_NONE && (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(op2))) { + op2_reg = IR_REG_NUM(op2_reg); + if (op1 != op2) { + ir_emit_load(ctx, type, op2_reg, op2); + } + } + + addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]); + + if (IR_IS_CONST_REF(op2) && ctx->ir_base[op2].val.u64 == 0) { + if (op == IR_ULT) { + /* always false */ + if (aarch64_may_use_b(ctx, addr)) { + | b &addr + } else { + ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, (intptr_t)addr); + | br Rx(IR_REG_INT_TMP) + } + return; + } else if (op == IR_UGE) { + /* always true */ + return; + } else if (op == IR_ULE) { + op = IR_EQ; + } else if (op == IR_UGT) { + op = IR_NE; + } + if (op1_reg != IR_REG_NONE && (op == IR_EQ || op == IR_NE)) { + if (insn->op == IR_GUARD_NOT) { + op ^= 1; // reverse + } + ir_emit_guard_jz(ctx, op, addr, type, op1_reg); + return; + } + } + ir_emit_cmp_int_common(ctx, type, op1_reg, op1, op2_reg, op2); + + if (insn->op == IR_GUARD) { + op ^= 1; // reverse + } + + ir_emit_guard_jcc(ctx, op, addr, 1); +} + +static void ir_emit_guard_cmp_fp(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) +{ + ir_op op = ir_emit_cmp_fp_common(ctx, insn->op2, &ctx->ir_base[insn->op2]); + void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]); + + if (insn->op == IR_GUARD) { + op ^= 1; // reverse + } + ir_emit_guard_jcc(ctx, op, addr, 0); +} + +static void ir_emit_guard_overflow(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_insn *overflow_insn = &ctx->ir_base[insn->op2]; + ir_insn *math_insn = &ctx->ir_base[overflow_insn->op1]; + ir_type type = math_insn->type; + void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]); + + IR_ASSERT(IR_IS_TYPE_INT(type)); + if (math_insn->op == IR_MUL_OV) { + if (insn->op == IR_GUARD) { + | beq &addr + } else { + | bne &addr + } + } else if (IR_IS_TYPE_SIGNED(type)) { + if (insn->op == IR_GUARD) { + | bvc &addr + } else { + | bvs &addr + } + } else { + if (insn->op == IR_GUARD) { + | bcc &addr + } else { + | bcs &addr + } + } +} + +static void ir_emit_tls(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + uint32_t code; + ir_reg reg = IR_REG_NUM(ctx->regs[def][0]); + + if (ctx->use_lists[def].count == 1) { + /* dead load */ + return; + } + +||#ifdef __APPLE__ +|| code = 0xd53bd060 | reg; // TODO: hard-coded: mrs reg, tpidrro_el0 +| .long code +| and Rx(reg), Rx(reg), #0xfffffffffffffff8 +|//??? MEM_ACCESS_64_WITH_UOFFSET_64 ldr, Rx(reg), Rx(reg), #insn->op2, TMP1 +|//??? MEM_ACCESS_64_WITH_UOFFSET_64 ldr, Rx(reg), Rx(reg), #insn->op3, TMP1 +||#else +|| code = 0xd53bd040 | reg; // TODO: hard-coded: mrs reg, tpidr_el0 +| .long code +||//??? IR_ASSERT(insn->op2 <= LDR_STR_PIMM64); +| ldr Rx(reg), [Rx(reg), #insn->op2] +||#endif + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, IR_ADDR, def, reg); + } +} + +static void ir_emit_exitcall(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + + IR_ASSERT(def_reg != IR_REG_NONE); + + | stp d30, d31, [sp, #-16]! + | stp d28, d29, [sp, #-16]! + | stp d26, d27, [sp, #-16]! + | stp d24, d25, [sp, #-16]! + | stp d22, d23, [sp, #-16]! + | stp d20, d21, [sp, #-16]! + | stp d18, d19, [sp, #-16]! + | stp d16, d17, [sp, #-16]! + | stp d14, d15, [sp, #-16]! + | stp d12, d13, [sp, #-16]! + | stp d10, d11, [sp, #-16]! + | stp d8, d9, [sp, #-16]! + | stp d6, d7, [sp, #-16]! + | stp d4, d5, [sp, #-16]! + | stp d2, d3, [sp, #-16]! + | stp d0, d1, [sp, #-16]! + + | str x30, [sp, #-16]! + | stp x28, x29, [sp, #-16]! + | stp x26, x27, [sp, #-16]! + | stp x24, x25, [sp, #-16]! + | stp x22, x23, [sp, #-16]! + | stp x20, x21, [sp, #-16]! + | stp x18, x19, [sp, #-16]! + | stp x16, x17, [sp, #-16]! + | stp x14, x15, [sp, #-16]! + | stp x12, x13, [sp, #-16]! + | stp x10, x11, [sp, #-16]! + | stp x8, x9, [sp, #-16]! + | stp x6, x7, [sp, #-16]! + | stp x4, x5, [sp, #-16]! + | stp x2, x3, [sp, #-16]! + | stp x0, x1, [sp, #-16]! + + | mov Rx(IR_REG_INT_ARG2), sp + | add Rx(IR_REG_INT_ARG1), Rx(IR_REG_INT_ARG2), #(32*8+32*8) + | str Rx(IR_REG_INT_ARG1), [sp, #(31*8)] + | mov Rx(IR_REG_INT_ARG1), Rx(IR_REG_INT_TMP) + + if (IR_IS_CONST_REF(insn->op2)) { + ir_insn *addr_insn = &ctx->ir_base[insn->op2]; + void *addr; + + IR_ASSERT(addr_insn->type == IR_ADDR); + if (addr_insn->op == IR_FUNC) { + addr = (ctx->loader && ctx->loader->resolve_sym_name) ? + ctx->loader->resolve_sym_name(ctx->loader, ir_get_str(ctx, addr_insn->val.i32)) : + ir_resolve_sym_name(ir_get_str(ctx, addr_insn->val.i32)); + } else { + IR_ASSERT(addr_insn->op == IR_ADDR || addr_insn->op == IR_FUNC_ADDR); + addr = (void*)addr_insn->val.addr; + } + + if (aarch64_may_use_b(ctx, addr)) { + | bl &addr + } else { + ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, (intptr_t)addr); + | blr Rx(IR_REG_INT_TMP) + } + } else { + IR_ASSERT(0); + } + + | add sp, sp, #(32*8+32*8) + + if (def_reg != IR_REG_INT_RET1) { + ir_emit_mov(ctx, insn->type, def_reg, IR_REG_INT_RET1); + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, insn->type, def, def_reg); + } +} + +static void ir_emit_param_move(ir_ctx *ctx, uint8_t type, ir_reg from_reg, ir_reg to_reg, ir_ref to, int32_t offset) +{ + ir_reg fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + + IR_ASSERT(from_reg != IR_REG_NONE || to_reg != IR_REG_NONE); + + if (IR_IS_TYPE_INT(type)) { + if (from_reg != IR_REG_NONE) { + if (to_reg != IR_REG_NONE) { + ir_emit_mov(ctx, type, to_reg, from_reg); + } else { + ir_emit_store(ctx, type, to, from_reg); + } + } else { + ir_emit_load_mem_int(ctx, type, to_reg, fp, offset); + } + } else { + if (from_reg != IR_REG_NONE) { + if (to_reg != IR_REG_NONE) { + ir_emit_fp_mov(ctx, type, to_reg, from_reg); + } else { + ir_emit_store(ctx, type, to, from_reg); + } + } else { + ir_emit_load_mem_fp(ctx, type, to_reg, fp, offset); + } + } +} + +static void ir_emit_load_params(ir_ctx *ctx) +{ + ir_use_list *use_list = &ctx->use_lists[1]; + ir_insn *insn; + ir_ref i, n, *p, use; + int int_param_num = 0; + int fp_param_num = 0; + ir_reg src_reg; + ir_reg dst_reg; + // TODO: Calling convention specific + int int_reg_params_count = IR_REG_INT_ARGS; + int fp_reg_params_count = IR_REG_FP_ARGS; + const int8_t *int_reg_params = _ir_int_reg_params; + const int8_t *fp_reg_params = _ir_fp_reg_params; + int32_t stack_offset = 0; + + if (ctx->flags & IR_USE_FRAME_POINTER) { + stack_offset = sizeof(void*) * 2; /* skip old frame pointer and return address */ + } else { + stack_offset = sizeof(void*) + ctx->stack_frame_size + ctx->call_stack_size; /* skip return address */ + } + n = use_list->count; + for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) { + use = *p; + insn = &ctx->ir_base[use]; + if (insn->op == IR_PARAM) { + if (IR_IS_TYPE_INT(insn->type)) { + if (int_param_num < int_reg_params_count) { + src_reg = int_reg_params[int_param_num]; + } else { + src_reg = IR_REG_NONE; + } + int_param_num++; + } else { + if (fp_param_num < fp_reg_params_count) { + src_reg = fp_reg_params[fp_param_num]; + } else { + src_reg = IR_REG_NONE; + } + fp_param_num++; + } + if (ctx->vregs[use]) { + dst_reg = IR_REG_NUM(ctx->regs[use][0]); + IR_ASSERT(src_reg != IR_REG_NONE || dst_reg != IR_REG_NONE || + stack_offset == ctx->live_intervals[ctx->vregs[use]]->stack_spill_pos + + ((ctx->flags & IR_USE_FRAME_POINTER) ? -ctx->stack_frame_size : ctx->call_stack_size)); + if (src_reg != dst_reg) { + ir_emit_param_move(ctx, insn->type, src_reg, dst_reg, use, stack_offset); + } + if (dst_reg != IR_REG_NONE && IR_REG_SPILLED(ctx->regs[use][0])) { + ir_emit_store(ctx, insn->type, use, dst_reg); + } + } + if (src_reg == IR_REG_NONE) { + if (sizeof(void*) == 8) { + stack_offset += sizeof(void*); + } else { + stack_offset += IR_MAX(sizeof(void*), ir_type_size[insn->type]); + } + } + } + } +} + +static ir_reg ir_get_free_reg(ir_type type, ir_regset available) +{ + if (IR_IS_TYPE_INT(type)) { + available = IR_REGSET_INTERSECTION(available, IR_REGSET_GP); + } else { + IR_ASSERT(IR_IS_TYPE_FP(type)); + available = IR_REGSET_INTERSECTION(available, IR_REGSET_FP); + } + IR_ASSERT(!IR_REGSET_IS_EMPTY(available)); + return IR_REGSET_FIRST(available); +} + +static int ir_fix_dessa_tmps(ir_ctx *ctx, uint8_t type, ir_ref from, ir_ref to) +{ + ir_backend_data *data = ctx->data; + ir_ref ref = ctx->cfg_blocks[data->dessa_from_block].end; + + if (to == 0) { + if (IR_IS_TYPE_INT(type)) { + if (ctx->regs[ref][0] == IR_REG_NONE) { + ctx->regs[ref][0] = IR_REG_X0; + } + } else { + IR_ASSERT(IR_IS_TYPE_FP(type)); + if (ctx->regs[ref][1] == IR_REG_NONE) { + ctx->regs[ref][1] = IR_REG_V0; + } + } + } else if (from != 0) { + if (IR_IS_TYPE_INT(type)) { + if (ctx->regs[ref][0] == IR_REG_NONE) { + ctx->regs[ref][0] = IR_REG_X0; + } + } else { + IR_ASSERT(IR_IS_TYPE_FP(type)); + if (ctx->regs[ref][1] == IR_REG_NONE) { + ctx->regs[ref][1] = IR_REG_V0; + } + } + } + return 1; +} + +static void ir_fix_param_spills(ir_ctx *ctx) +{ + ir_use_list *use_list = &ctx->use_lists[1]; + ir_insn *insn; + ir_ref i, n, *p, use; + int int_param_num = 0; + int fp_param_num = 0; + ir_reg src_reg; + // TODO: Calling convention specific + int int_reg_params_count = IR_REG_INT_ARGS; + int fp_reg_params_count = IR_REG_FP_ARGS; + const int8_t *int_reg_params = _ir_int_reg_params; + const int8_t *fp_reg_params = _ir_fp_reg_params; + int32_t stack_offset = 0; + + if (ctx->flags & IR_USE_FRAME_POINTER) { + /* skip old frame pointer and return address */ + stack_offset = sizeof(void*) * 2 + (ctx->stack_frame_size - ctx->stack_frame_alignment); + } else { + /* skip return address */ + stack_offset = sizeof(void*) + ctx->stack_frame_size; + } + n = use_list->count; + for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) { + use = *p; + insn = &ctx->ir_base[use]; + if (insn->op == IR_PARAM) { + if (IR_IS_TYPE_INT(insn->type)) { + if (int_param_num < int_reg_params_count) { + src_reg = int_reg_params[int_param_num]; + } else { + src_reg = IR_REG_NONE; + } + int_param_num++; + } else { + if (fp_param_num < fp_reg_params_count) { + src_reg = fp_reg_params[fp_param_num]; + } else { + src_reg = IR_REG_NONE; + } + fp_param_num++; + } + if (src_reg == IR_REG_NONE) { + if (ctx->vregs[use]) { + ir_live_interval *ival = ctx->live_intervals[ctx->vregs[use]]; + if ((ival->flags & IR_LIVE_INTERVAL_MEM_PARAM) + && ival->stack_spill_pos == -1 + && (ival->next || ival->reg == IR_REG_NONE)) { + ival->stack_spill_pos = stack_offset; + ctx->regs[use][0] = IR_REG_NONE; + } + } + if (sizeof(void*) == 8) { + stack_offset += sizeof(void*); + } else { + stack_offset += IR_MAX(sizeof(void*), ir_type_size[insn->type]); + } + } + } + } +} + +static void ir_allocate_unique_spill_slots(ir_ctx *ctx) +{ + uint32_t b; + ir_block *bb; + ir_insn *insn; + ir_ref i, n, j, *p; + uint32_t *rule, insn_flags; + ir_backend_data *data = ctx->data; + ir_regset available = 0; + ir_target_constraints constraints; + uint32_t def_flags; + ir_reg reg; + + ctx->regs = ir_mem_malloc(sizeof(ir_regs) * ctx->insns_count); + memset(ctx->regs, IR_REG_NONE, sizeof(ir_regs) * ctx->insns_count); + + /* vregs + tmp + fixed + SRATCH + ALL */ + ctx->live_intervals = ir_mem_calloc(ctx->vregs_count + 1 + IR_REG_NUM + 2, sizeof(ir_live_interval*)); + + if (!ctx->arena) { + ctx->arena = ir_arena_create(16 * 1024); + } + + for (b = 1, bb = ctx->cfg_blocks + b; b <= ctx->cfg_blocks_count; b++, bb++) { + IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); + for (i = bb->start, insn = ctx->ir_base + i, rule = ctx->rules + i; i <= bb->end;) { + switch (ctx->rules ? *rule : insn->op) { + case IR_START: + case IR_BEGIN: + case IR_END: + case IR_IF_TRUE: + case IR_IF_FALSE: + case IR_CASE_VAL: + case IR_CASE_DEFAULT: + case IR_MERGE: + case IR_LOOP_BEGIN: + case IR_LOOP_END: + break; + default: + def_flags = ir_get_target_constraints(ctx, i, &constraints); + if (ctx->rules + && *rule != IR_CMP_AND_BRANCH_INT + && *rule != IR_CMP_AND_BRANCH_FP + && *rule != IR_GUARD_CMP_INT + && *rule != IR_GUARD_CMP_FP) { + available = IR_REGSET_SCRATCH; + } + if (ctx->vregs[i]) { + reg = constraints.def_reg; + if (reg != IR_REG_NONE && IR_REGSET_IN(available, reg)) { + IR_REGSET_EXCL(available, reg); + ctx->regs[i][0] = reg | IR_REG_SPILL_STORE; + } else if (def_flags & IR_USE_MUST_BE_IN_REG) { + if (insn->op == IR_VLOAD + && ctx->live_intervals[ctx->vregs[i]] + && ctx->live_intervals[ctx->vregs[i]]->stack_spill_pos != -1) { + /* pass */ + } else if (insn->op != IR_PARAM) { + reg = ir_get_free_reg(insn->type, available); + IR_REGSET_EXCL(available, reg); + ctx->regs[i][0] = reg | IR_REG_SPILL_STORE; + } + } + if (!ctx->live_intervals[ctx->vregs[i]]) { + ir_live_interval *ival = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval)); + memset(ival, 0, sizeof(ir_live_interval)); + ctx->live_intervals[ctx->vregs[i]] = ival; + ival->type = insn->type; + ival->reg = IR_REG_NONE; + ival->vreg = ctx->vregs[i]; + ival->stack_spill_pos = -1; + if (insn->op == IR_PARAM && reg == IR_REG_NONE) { + ival->flags |= IR_LIVE_INTERVAL_MEM_PARAM; + } else { + ival->stack_spill_pos = ir_allocate_spill_slot(ctx, ival->type, &data->ra_data); + } + } else if (insn->op == IR_PARAM) { + IR_ASSERT(0 && "unexpected PARAM"); + return; + } + } else if (insn->op == IR_VAR) { + ir_use_list *use_list = &ctx->use_lists[i]; + ir_ref n = use_list->count; + + if (n > 0) { + int32_t stack_spill_pos = insn->op3 = ir_allocate_spill_slot(ctx, insn->type, &data->ra_data); + ir_ref i, *p, use; + ir_insn *use_insn; + + for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) { + use = *p; + use_insn = &ctx->ir_base[use]; + if (use_insn->op == IR_VLOAD) { + if (ctx->vregs[use] + && !ctx->live_intervals[ctx->vregs[use]]) { + ir_live_interval *ival = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval)); + memset(ival, 0, sizeof(ir_live_interval)); + ctx->live_intervals[ctx->vregs[use]] = ival; + ival->type = insn->type; + ival->reg = IR_REG_NONE; + ival->vreg = ctx->vregs[use]; + ival->stack_spill_pos = stack_spill_pos; + } + } else if (use_insn->op == IR_VSTORE) { + if (!IR_IS_CONST_REF(use_insn->op3) + && ctx->vregs[use_insn->op3] + && !ctx->live_intervals[ctx->vregs[use_insn->op3]]) { + ir_live_interval *ival = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval)); + memset(ival, 0, sizeof(ir_live_interval)); + ctx->live_intervals[ctx->vregs[use_insn->op3]] = ival; + ival->type = insn->type; + ival->reg = IR_REG_NONE; + ival->vreg = ctx->vregs[use_insn->op3]; + ival->stack_spill_pos = stack_spill_pos; + } + } + } + } + } + + insn_flags = ir_op_flags[insn->op]; + n = constraints.tmps_count; + if (n) { + do { + n--; + if (constraints.tmp_regs[n].type) { + ir_reg reg = ir_get_free_reg(constraints.tmp_regs[n].type, available); + IR_REGSET_EXCL(available, reg); + ctx->regs[i][constraints.tmp_regs[n].num] = reg; + } else if (constraints.tmp_regs[n].reg == IR_REG_SCRATCH) { + available = IR_REGSET_DIFFERENCE(available, IR_REGSET_SCRATCH); + } else { + IR_REGSET_EXCL(available, constraints.tmp_regs[n].reg); + } + } while (n); + } + n = insn->inputs_count; + for (j = 1, p = insn->ops + 1; j <= n; j++, p++) { + ir_ref input = *p; + if (IR_OPND_KIND(insn_flags, j) == IR_OPND_DATA && input > 0 && ctx->vregs[input]) { + if ((def_flags & IR_DEF_REUSES_OP1_REG) && j == 1) { + ir_reg reg = IR_REG_NUM(ctx->regs[i][0]); + ctx->regs[i][1] = reg | IR_REG_SPILL_LOAD; + } else { + uint8_t use_flags = IR_USE_FLAGS(def_flags, j); + ir_reg reg = (j < constraints.hints_count) ? constraints.hints[j] : IR_REG_NONE; + + if (reg != IR_REG_NONE && IR_REGSET_IN(available, reg)) { + IR_REGSET_EXCL(available, reg); + ctx->regs[i][j] = reg | IR_REG_SPILL_LOAD; + } else if (j > 1 && input == insn->op1 && ctx->regs[i][1] != IR_REG_NONE) { + ctx->regs[i][j] = ctx->regs[i][1]; + } else if (use_flags & IR_USE_MUST_BE_IN_REG) { + reg = ir_get_free_reg(ctx->ir_base[input].type, available); + IR_REGSET_EXCL(available, reg); + ctx->regs[i][j] = reg | IR_REG_SPILL_LOAD; + } + } + } + } + break; + } + n = ir_insn_len(insn); + i += n; + insn += n; + rule += n; + } + if (bb->flags & IR_BB_DESSA_MOVES) { + data->dessa_from_block = b; + ir_gen_dessa_moves(ctx, b, ir_fix_dessa_tmps); + } + } + + ctx->used_preserved_regs = ctx->fixed_save_regset; + ctx->flags |= IR_NO_STACK_COMBINE; + ir_fix_stack_frame(ctx); +} + +static void ir_preallocate_call_stack(ir_ctx *ctx) +{ + int call_stack_size, peak_call_stack_size = 0; + ir_ref i, n; + ir_insn *insn; + + for (i = 1, insn = ctx->ir_base + 1; i < ctx->insns_count;) { + if (insn->op == IR_CALL) { + call_stack_size = ir_call_used_stack(ctx, insn); + if (call_stack_size > peak_call_stack_size) { + peak_call_stack_size = call_stack_size; + } + } + n = ir_insn_len(insn); + i += n; + insn += n; + } + if (peak_call_stack_size) { + ctx->call_stack_size = peak_call_stack_size; + ctx->flags |= IR_PREALLOCATED_STACK; + } +} + +void ir_fix_stack_frame(ir_ctx *ctx) +{ + uint32_t additional_size = 0; + + if (ctx->used_preserved_regs) { + ir_regset used_preserved_regs = (ir_regset)ctx->used_preserved_regs; + ir_reg reg; + (void) reg; + + IR_REGSET_FOREACH(used_preserved_regs, reg) { + additional_size += sizeof(void*); + } IR_REGSET_FOREACH_END(); + } + + ctx->stack_frame_size = IR_ALIGNED_SIZE(ctx->stack_frame_size, sizeof(void*)); + ctx->stack_frame_size += additional_size; + ctx->stack_frame_alignment = 0; + ctx->call_stack_size = 0; + + if ((ctx->flags & IR_HAS_CALLS) && !(ctx->flags & IR_FUNCTION)) { + while (IR_ALIGNED_SIZE(ctx->stack_frame_size, 16) != ctx->stack_frame_size) { + ctx->stack_frame_size += sizeof(void*); + ctx->stack_frame_alignment += sizeof(void*); + } + } else if (ctx->flags & IR_HAS_CALLS) { + ctx->flags |= IR_USE_FRAME_POINTER; + /* Stack must be 16 byte aligned */ + if (!(ctx->flags & IR_FUNCTION)) { + while (IR_ALIGNED_SIZE(ctx->stack_frame_size, 16) != ctx->stack_frame_size) { + ctx->stack_frame_size += sizeof(void*); + ctx->stack_frame_alignment += sizeof(void*); + } + } else if (ctx->flags & IR_USE_FRAME_POINTER) { + while (IR_ALIGNED_SIZE(ctx->stack_frame_size + sizeof(void*) * 2, 16) != ctx->stack_frame_size + sizeof(void*) * 2) { + ctx->stack_frame_size += sizeof(void*); + ctx->stack_frame_alignment += sizeof(void*); + } + } else { + if (!(ctx->flags & IR_NO_STACK_COMBINE)) { + ir_preallocate_call_stack(ctx); + } + while (IR_ALIGNED_SIZE(ctx->stack_frame_size + ctx->call_stack_size, 16) != + ctx->stack_frame_size + ctx->call_stack_size) { + ctx->stack_frame_size += sizeof(void*); + ctx->stack_frame_alignment += sizeof(void*); + } + } + } + + ir_fix_param_spills(ctx); +} + +static void* dasm_labels[ir_lb_MAX]; + +/* Veneers support (TODO: avid global variable usage) */ +static ir_ctx *ir_current_ctx; + +void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr) +{ + uint32_t b, n, target; + ir_block *bb; + ir_ref i; + ir_insn *insn; + uint32_t *rule; + ir_backend_data data; + dasm_State **Dst; + int ret; + void *entry; + size_t size; + + data.ra_data.unused_slot_4 = 0; + data.ra_data.unused_slot_2 = 0; + data.ra_data.unused_slot_1 = 0; + data.ra_data.handled = NULL; + data.rodata_label = 0; + data.jmp_table_label = 0; + ctx->data = &data; + + if (!ctx->live_intervals) { + ctx->stack_frame_size = 0; + ctx->stack_frame_alignment = 0; + ctx->call_stack_size = 0; + ctx->used_preserved_regs = 0; + ir_allocate_unique_spill_slots(ctx); + } + + if (ctx->fixed_stack_frame_size != -1) { + if (ctx->fixed_stack_red_zone) { + IR_ASSERT(ctx->fixed_stack_red_zone == ctx->fixed_stack_frame_size + ctx->fixed_call_stack_size); + } + if (ctx->stack_frame_size > ctx->fixed_stack_frame_size) { + // TODO: report error to caller +#ifdef IR_DEBUG_MESSAGES + fprintf(stderr, "IR Compilation Aborted: ctx->stack_frame_size > ctx->fixed_stack_frame_size at %s:%d\n", + __FILE__, __LINE__); +#endif + ctx->data = NULL; + ctx->status = IR_ERROR_FIXED_STACK_FRAME_OVERFLOW; + return NULL; + } + ctx->stack_frame_size = ctx->fixed_stack_frame_size; + ctx->call_stack_size = ctx->fixed_call_stack_size; + ctx->stack_frame_alignment = 0; + } + + Dst = &data.dasm_state; + data.dasm_state = NULL; + dasm_init(&data.dasm_state, DASM_MAXSECTION); + dasm_setupglobal(&data.dasm_state, dasm_labels, ir_lb_MAX); + dasm_setup(&data.dasm_state, dasm_actions); + /* labels for each block + for each constant + rodata label + jmp_table label + for each entry */ + dasm_growpc(&data.dasm_state, ctx->cfg_blocks_count + 1 + ctx->consts_count + 1 + 1 + 1 + ctx->entries_count); + + if (!(ctx->flags & IR_SKIP_PROLOGUE)) { + ir_emit_prologue(ctx); + } + if (ctx->flags & IR_FUNCTION) { + ir_emit_load_params(ctx); + } + + for (b = 1, bb = ctx->cfg_blocks + b; b <= ctx->cfg_blocks_count; b++, bb++) { + IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); + if ((bb->flags & (IR_BB_START|IR_BB_ENTRY|IR_BB_EMPTY)) == IR_BB_EMPTY) { + continue; + } + |=>b: + + i = bb->start; + insn = ctx->ir_base + i; + if (bb->flags & IR_BB_ENTRY) { + uint32_t label = ctx->cfg_blocks_count + ctx->consts_count + 4 + insn->op3; + + |=>label: + ir_emit_prologue(ctx); + ctx->entries[insn->op3] = i; + } + + /* skip first instruction */ + n = ir_insn_len(insn); + i += n; + insn += n; + rule = ctx->rules + i; + + while (i <= bb->end) { + if (!((*rule) & (IR_FUSED|IR_SKIPPED))) + switch (*rule) { + case IR_VAR: + case IR_PARAM: + case IR_PI: + case IR_PHI: + case IR_SNAPSHOT: + break; + case IR_MUL_PWR2: + case IR_DIV_PWR2: + case IR_MOD_PWR2: + ir_emit_mul_div_mod_pwr2(ctx, i, insn); + break; + case IR_SHIFT: + ir_emit_shift(ctx, i, insn); + break; + case IR_SHIFT_CONST: + ir_emit_shift_const(ctx, i, insn); + break; + case IR_OP_INT: + ir_emit_op_int(ctx, i, insn); + break; + case IR_OP_FP: + ir_emit_op_fp(ctx, i, insn); + break; + case IR_BINOP_INT: + ir_emit_binop_int(ctx, i, insn); + break; + case IR_BINOP_FP: + ir_emit_binop_fp(ctx, i, insn); + break; + case IR_CMP_INT: + ir_emit_cmp_int(ctx, i, insn); + break; + case IR_CMP_FP: + ir_emit_cmp_fp(ctx, i, insn); + break; + case IR_SEXT: + ir_emit_sext(ctx, i, insn); + break; + case IR_ZEXT: + ir_emit_zext(ctx, i, insn); + break; + case IR_TRUNC: + ir_emit_trunc(ctx, i, insn); + break; + case IR_BITCAST: + ir_emit_bitcast(ctx, i, insn); + break; + case IR_INT2FP: + ir_emit_int2fp(ctx, i, insn); + break; + case IR_FP2INT: + ir_emit_fp2int(ctx, i, insn); + break; + case IR_FP2FP: + ir_emit_fp2fp(ctx, i, insn); + break; + case IR_COPY_INT: + ir_emit_copy_int(ctx, i, insn); + break; + case IR_COPY_FP: + ir_emit_copy_fp(ctx, i, insn); + break; + case IR_CMP_AND_BRANCH_INT: + ir_emit_cmp_and_branch_int(ctx, b, i, insn); + break; + case IR_CMP_AND_BRANCH_FP: + ir_emit_cmp_and_branch_fp(ctx, b, i, insn); + break; + case IR_GUARD_CMP_INT: + ir_emit_guard_cmp_int(ctx, b, i, insn); + break; + case IR_GUARD_CMP_FP: + ir_emit_guard_cmp_fp(ctx, b, i, insn); + break; + case IR_IF_INT: + ir_emit_if_int(ctx, b, i, insn); + break; + case IR_COND: + ir_emit_cond(ctx, i, insn); + break; + case IR_SWITCH: + ir_emit_switch(ctx, b, i, insn); + break; + case IR_MIN_MAX_INT: + ir_emit_min_max_int(ctx, i, insn); + break; + case IR_OVERFLOW: + ir_emit_overflow(ctx, i, insn); + break; + case IR_OVERFLOW_AND_BRANCH: + ir_emit_overflow_and_branch(ctx, b, i, insn); + break; + case IR_END: + case IR_LOOP_END: + if (bb->flags & IR_BB_OSR_ENTRY_LOADS) { + ir_emit_osr_entry_loads(ctx, b, bb); + } + if (bb->flags & IR_BB_DESSA_MOVES) { + ir_emit_dessa_moves(ctx, b, bb); + } + do { + ir_ref succ = ctx->cfg_edges[bb->successors]; + + if (UNEXPECTED(bb->successors_count == 2)) { + if (ctx->cfg_blocks[succ].flags & IR_BB_ENTRY) { + succ = ctx->cfg_edges[bb->successors + 1]; + } else { + IR_ASSERT(ctx->cfg_blocks[ctx->cfg_edges[bb->successors + 1]].flags & IR_BB_ENTRY); + } + } else { + IR_ASSERT(bb->successors_count == 1); + } + target = ir_skip_empty_target_blocks(ctx, succ); + if (b == ctx->cfg_blocks_count || target != ir_skip_empty_next_blocks(ctx, b + 1)) { + | b =>target + } + } while (0); + break; + case IR_RETURN_VOID: + ir_emit_return_void(ctx); + break; + case IR_RETURN_INT: + ir_emit_return_int(ctx, i, insn); + break; + case IR_RETURN_FP: + ir_emit_return_fp(ctx, i, insn); + break; + case IR_CALL: + ir_emit_call(ctx, i, insn); + break; + case IR_TAILCALL: + ir_emit_tailcall(ctx, i, insn); + break; + case IR_IJMP: + ir_emit_ijmp(ctx, i, insn); + break; + case IR_REG_BINOP_INT: + ir_emit_reg_binop_int(ctx, i, insn); + break; + case IR_VADDR: + ir_emit_vaddr(ctx, i, insn); + break; + case IR_VLOAD: + ir_emit_vload(ctx, i, insn); + break; + case IR_VSTORE: + ir_emit_vstore(ctx, i, insn); + break; + case IR_RLOAD: + ir_emit_rload(ctx, i, insn); + break; + case IR_RSTORE: + ir_emit_rstore(ctx, i, insn); + break; + case IR_LOAD_INT: + ir_emit_load_int(ctx, i, insn); + break; + case IR_LOAD_FP: + ir_emit_load_fp(ctx, i, insn); + break; + case IR_STORE_INT: + ir_emit_store_int(ctx, i, insn); + break; + case IR_STORE_FP: + ir_emit_store_fp(ctx, i, insn); + break; + case IR_ALLOCA: + ir_emit_alloca(ctx, i, insn); + break; + case IR_AFREE: + ir_emit_afree(ctx, i, insn); + break; + case IR_EXITCALL: + ir_emit_exitcall(ctx, i, insn); + break; + case IR_GUARD: + case IR_GUARD_NOT: + ir_emit_guard(ctx, i, insn); + break; + case IR_GUARD_OVERFLOW: + ir_emit_guard_overflow(ctx, i, insn); + break; + case IR_TLS: + ir_emit_tls(ctx, i, insn); + break; + default: + IR_ASSERT(0 && "NIY rule/instruction"); + dasm_free(&data.dasm_state); + ctx->data = NULL; + ctx->status = IR_ERROR_UNSUPPORTED_CODE_RULE; + return NULL; + } + n = ir_insn_len(insn); + i += n; + insn += n; + rule += n; + } + } + + if (ctx->deoptimization_exits) { + for (i = 0; i < ctx->deoptimization_exits; i++) { + const void *exit_addr = ctx->get_exit_addr(i); + + if (!exit_addr) { + ctx->data = NULL; + return 0; + } + | b &exit_addr + } + } + + if (data.rodata_label) { + |.rodata + } + for (i = IR_UNUSED + 1, insn = ctx->ir_base - i; i < ctx->consts_count; i++, insn--) { + if (insn->const_flags & IR_CONST_EMIT) { + if (IR_IS_TYPE_FP(insn->type)) { + int label = ctx->cfg_blocks_count + i; + + if (!data.rodata_label) { + data.rodata_label = ctx->cfg_blocks_count + ctx->consts_count + 2; + + |.rodata + |=>data.rodata_label: + } + if (insn->type == IR_DOUBLE) { + |.align 8 + |=>label: + |.long insn->val.u32, insn->val.u32_hi + } else { + IR_ASSERT(insn->type == IR_FLOAT); + |.align 4 + |=>label: + |.long insn->val.u32 + } + } else if (insn->op == IR_STR) { + int label = ctx->cfg_blocks_count + i; + const char *str = ir_get_str(ctx, insn->val.i32); + int i = 0; + + if (!data.rodata_label) { + data.rodata_label = ctx->cfg_blocks_count + ctx->consts_count + 2; + + |.rodata + |=>data.rodata_label: + } + |.align 8 + |=>label: + while (1) { + char c; + uint32_t w = 0; + int j; + + for (j = 0; j < 4; j++) { + c = str[i]; + if (!c) { + break; + } else if (c == '\\') { + if (str[i+1] == '\\') { + i++; + c = '\\'; + } else if (str[i+1] == '\'') { + i++; + c = '\''; + } else if (str[i+1] == '"') { + i++; + c = '"'; + } else if (str[i+1] == 'a') { + i++; + c = '\a'; + } else if (str[i+1] == 'b') { + i++; + c = '\b'; + } else if (str[i+1] == 'e') { + i++; + c = 27; /* '\e'; */ + } else if (str[i+1] == 'f') { + i++; + c = '\f'; + } else if (str[i+1] == 'n') { + i++; + c = '\n'; + } else if (str[i+1] == 'r') { + i++; + c = '\r'; + } else if (str[i+1] == 't') { + i++; + c = '\t'; + } else if (str[i+1] == 'v') { + i++; + c = '\v'; + } else if (str[i+1] == '?') { + i++; + c = 0x3f; + } + } + w |= c << (8 * j); + i++; + } + | .long w + if (!c) { + break; + } + } + + } else { + IR_ASSERT(0); + } + } + } + if (data.rodata_label) { + |.code + } + + if (ctx->status) { + dasm_free(&data.dasm_state); + ctx->data = NULL; + return NULL; + } + + ret = dasm_link(&data.dasm_state, size_ptr); + if (ret != DASM_S_OK) { + IR_ASSERT(0); + dasm_free(&data.dasm_state); + if (ctx->code_buffer == NULL) { + ir_mem_unmap(entry, size); + } + ctx->data = NULL; + ctx->status = IR_ERROR_LINK; + return NULL; + } + size = *size_ptr; + + if (ctx->code_buffer != NULL) { + if (IR_ALIGNED_SIZE(size, 16) > ctx->code_buffer_size) { + dasm_free(&data.dasm_state); + ctx->data = NULL; + ctx->status = IR_ERROR_CODE_MEM_OVERFLOW; + return NULL; + } + entry = ctx->code_buffer; + IR_ASSERT((uintptr_t)entry % 16 == 0); + } else { + entry = ir_mem_mmap(size); + if (!entry) { + dasm_free(&data.dasm_state); + ctx->data = NULL; + ctx->status = IR_ERROR_CODE_MEM_OVERFLOW; + return NULL; + } + ir_mem_unprotect(entry, size); + } + + ir_current_ctx = ctx; + ctx->veneers_size = 0; + if (data.jmp_table_label) { + ctx->code_size = dasm_getpclabel(&data.dasm_state, data.jmp_table_label); + } else if (data.rodata_label) { + ctx->code_size = dasm_getpclabel(&data.dasm_state, data.jmp_table_label); + } else { + ctx->code_size = size; + } + + ret = dasm_encode(&data.dasm_state, entry); + if (ret != DASM_S_OK) { + IR_ASSERT(0); + dasm_free(&data.dasm_state); + if (ctx->code_buffer == NULL) { + ir_mem_unmap(entry, size); + } + ctx->data = NULL; + ctx->status = IR_ERROR_ENCODE; + return NULL; + } + + if (data.jmp_table_label) { + uint32_t offset = dasm_getpclabel(&data.dasm_state, data.jmp_table_label); + ctx->jmp_table_offset = offset; + } else { + ctx->jmp_table_offset = 0; + } + if (data.rodata_label) { + uint32_t offset = dasm_getpclabel(&data.dasm_state, data.rodata_label); + ctx->rodata_offset = offset; + } else { + ctx->rodata_offset = 0; + } + + if (ctx->entries_count) { + /* For all entries */ + i = ctx->entries_count; + do { + ir_insn *insn = &ctx->ir_base[ctx->entries[--i]]; + uint32_t offset = dasm_getpclabel(&data.dasm_state, ctx->cfg_blocks_count + ctx->consts_count + 4 + insn->op3); + insn->op3 = offset; + } while (i != 0); + } + + dasm_free(&data.dasm_state); + + *size_ptr += ctx->veneers_size; + + ir_mem_flush(entry, size); + + if (ctx->code_buffer == NULL) { + ir_mem_protect(entry, size); + } + + ctx->data = NULL; + return entry; +} + +const void *ir_emit_exitgroup(uint32_t first_exit_point, uint32_t exit_points_per_group, const void *exit_addr, void *code_buffer, size_t code_buffer_size, size_t *size_ptr) +{ + void *entry; + size_t size; + uint32_t i; + dasm_State **Dst, *dasm_state; + int ret; + + /* IR_ASSERT(aarch64_may_use_b(ctx, exit_addr)) */ + IR_ASSERT(code_buffer); + if ((char*)exit_addr >= (char*)code_buffer && (char*)exit_addr < (char*)code_buffer + code_buffer_size) { + IR_ASSERT(code_buffer_size < B_IMM); + } else if ((char*)exit_addr >= (char*)code_buffer + code_buffer_size) { + IR_ASSERT(((char*)exit_addr - (char*)code_buffer) < B_IMM); + } else if ((char*)exit_addr < (char*)code_buffer) { + IR_ASSERT(((((char*)(code_buffer)) + code_buffer_size) - (char*)exit_addr) < B_IMM); + } else { + IR_ASSERT(0); + } + + Dst = &dasm_state; + dasm_state = NULL; + dasm_init(&dasm_state, DASM_MAXSECTION); + dasm_setupglobal(&dasm_state, dasm_labels, ir_lb_MAX); + dasm_setup(&dasm_state, dasm_actions); + + | bl >2 + |1: + for (i = 1; i < exit_points_per_group; i++) { + | bl >2 + } + |2: + | adr Rx(IR_REG_INT_TMP), <1 + | sub Rx(IR_REG_INT_TMP), lr, Rx(IR_REG_INT_TMP) + | lsr Rx(IR_REG_INT_TMP), Rx(IR_REG_INT_TMP), #2 + if (first_exit_point) { + | add Rx(IR_REG_INT_TMP), Rx(IR_REG_INT_TMP), #first_exit_point + } + | b &exit_addr + + ret = dasm_link(&dasm_state, &size); + if (ret != DASM_S_OK) { + IR_ASSERT(0); + dasm_free(&dasm_state); + return NULL; + } + + if (code_buffer != NULL) { + if (IR_ALIGNED_SIZE(size, 16) > code_buffer_size) { + dasm_free(&dasm_state); + return NULL; + } + entry = code_buffer; + IR_ASSERT((uintptr_t)entry % 16 == 0); + } else { + entry = ir_mem_mmap(size); + ir_mem_unprotect(entry, size); + } + + ir_current_ctx = NULL; + ret = dasm_encode(&dasm_state, entry); + if (ret != DASM_S_OK) { + IR_ASSERT(0); + dasm_free(&dasm_state); + if (code_buffer == NULL) { + ir_mem_unmap(entry, size); + } + return NULL; + } + + dasm_free(&dasm_state); + + ir_mem_flush(entry, size); + + if (code_buffer == NULL) { + ir_mem_protect(entry, size); + } + + *size_ptr = size; + return entry; +} + +static int ir_add_veneer(dasm_State *Dst, void *buffer, uint32_t ins, int *b, uint32_t *cp, ptrdiff_t offset) +{ + ir_ctx *ctx = ir_current_ctx; + const void *addr, *veneer = NULL; + ptrdiff_t na; + int n, m; + + IR_ASSERT(ctx && ctx->code_buffer); + + if ((ins >> 16) == DASM_REL_A) { + addr = (void*)((((ptrdiff_t)(*(b-1))) << 32) | (unsigned int)(*(b-2))); + if (ctx->get_veneer) { + veneer = ctx->get_veneer(ctx, addr); + } + } else { + IR_ASSERT(0 && "too long jmp distance"); + return 0; + } + + if (veneer) { + na = (ptrdiff_t)veneer - (ptrdiff_t)cp + 4; + n = (int)na; + + /* check if we can jump to veneer */ + if ((ptrdiff_t)n != na) { + /* pass */ + } else if (!(ins & 0xf800)) { /* B, BL */ + if ((n & 3) == 0 && ((n+0x08000000) >> 28) == 0) { + return n; + } + } else if ((ins & 0x800)) { /* B.cond, CBZ, CBNZ, LDR* literal */ + if ((n & 3) == 0 && ((n+0x00100000) >> 21) == 0) { + return n; + } + } else if ((ins & 0x3000) == 0x2000) { /* ADR */ + /* pass */ + } else if ((ins & 0x3000) == 0x3000) { /* ADRP */ + /* pass */ + } else if ((ins & 0x1000)) { /* TBZ, TBNZ */ + if ((n & 3) == 0 && ((n+0x00008000) >> 16) == 0) { + return n; + } + } + } + + veneer = (char*)buffer + (Dst->codesize + ctx->veneers_size); + if (veneer > (void*)((char*)ctx->code_buffer + ctx->code_buffer_size)) { + IR_ASSERT(0 && "too long jmp distance" && "jit buffer overflow"); + return 0; /* jit_buffer_size overflow */ + } + + na = (ptrdiff_t)veneer - (ptrdiff_t)cp + 4; + n = (int)na; + + /* check if we can jump to veneer */ + if ((ptrdiff_t)n != na) { + IR_ASSERT(0 && "too long jmp distance"); + return 0; + } else if (!(ins & 0xf800)) { /* B, BL */ + if ((n & 3) != 0 || ((n+0x08000000) >> 28) != 0) { + IR_ASSERT(0 && "too long jmp distance"); + return 0; + } + } else if ((ins & 0x800)) { /* B.cond, CBZ, CBNZ, LDR* literal */ + if ((n & 3) != 0 || ((n+0x00100000) >> 21) != 0) { + IR_ASSERT(0 && "too long jmp distance"); + return 0; + } + } else if ((ins & 0x3000) == 0x2000) { /* ADR */ + IR_ASSERT(0 && "too long jmp distance"); + return 0; + } else if ((ins & 0x3000) == 0x3000) { /* ADRP */ + IR_ASSERT(0 && "too long jmp distance"); + return 0; + } else if ((ins & 0x1000)) { /* TBZ, TBNZ */ + if ((n & 3) != 0 || ((n+0x00008000) >> 16) != 0) { + IR_ASSERT(0 && "too long jmp distance"); + return 0; + } + } else if ((ins & 0x8000)) { /* absolute */ + IR_ASSERT(0 && "too long jmp distance"); + return 0; + } else { + IR_ASSERT(0 && "too long jmp distance"); + return 0; + } + + /* check if we can use B to jump from veneer */ + na = (ptrdiff_t)cp + offset - (ptrdiff_t)veneer - 4; + m = (int)na; + if ((ptrdiff_t)m != na) { + IR_ASSERT(0 && "too long jmp distance"); + return 0; + } else if ((m & 3) != 0 || ((m+0x08000000) >> 28) != 0) { + IR_ASSERT(0 && "too long jmp distance"); + return 0; + } + + if (!ctx->set_veneer || !ctx->set_veneer(ctx, addr, veneer)) { + IR_ASSERT(0 && "too long jmp distance"); + return 0; + } + + /* generate B instruction */ + *(uint32_t*)veneer = 0x14000000 | ((m >> 2) & 0x03ffffff); + ctx->veneers_size += 4; + + return n; +} diff --git a/ext/opcache/jit/ir/ir_aarch64.h b/ext/opcache/jit/ir/ir_aarch64.h new file mode 100644 index 0000000000000..4c36f7e56f692 --- /dev/null +++ b/ext/opcache/jit/ir/ir_aarch64.h @@ -0,0 +1,173 @@ +/* + * IR - Lightweight JIT Compilation Framework + * (Aarch64 CPU specific definitions) + * Copyright (C) 2022 Zend by Perforce. + * Authors: Dmitry Stogov + */ + +#ifndef IR_AARCH64_H +#define IR_AARCH64_H + +#define IR_GP_REGS(_) \ + _(X0, x0, w0) \ + _(X1, x1, w1) \ + _(X2, x2, w2) \ + _(X3, x3, w3) \ + _(X4, x4, w4) \ + _(X5, x5, w5) \ + _(X6, x6, w6) \ + _(X7, x7, w7) \ + _(X8, x8, w8) \ + _(X9, x9, w9) \ + _(X10, x10, w10) \ + _(X11, x11, w11) \ + _(X12, x12, w12) \ + _(X13, x13, w13) \ + _(X14, x14, w14) \ + _(X15, x15, w15) \ + _(X16, x16, w16) \ + _(X17, x17, w17) \ + _(X18, x18, w18) \ + _(X19, x19, w18) \ + _(X20, x20, w20) \ + _(X21, x21, w21) \ + _(X22, x22, w22) \ + _(X23, x23, w23) \ + _(X24, x24, w24) \ + _(X25, x25, w25) \ + _(X26, x26, w26) \ + _(X27, x27, w27) \ + _(X28, x28, w28) \ + _(X29, x29, w29) \ + _(X30, x30, w30) \ + _(X31, x31, w31) \ + +# define IR_FP_REGS(_) \ + _(V0, d0, s0, h0, b0) \ + _(V1, d1, s1, h1, b1) \ + _(V2, d2, s2, h2, b2) \ + _(V3, d3, s3, h3, b3) \ + _(V4, d4, s4, h4, b4) \ + _(V5, d5, s5, h5, b5) \ + _(V6, d6, s6, h6, b6) \ + _(V7, d7, s7, h7, b7) \ + _(V8, d8, s8, h8, b8) \ + _(V9, d9, s9, h9, b9) \ + _(V10, d10, s10, h10, b10) \ + _(V11, d11, s11, h11, b11) \ + _(V12, d12, s12, h12, b12) \ + _(V13, d13, s13, h13, b13) \ + _(V14, d14, s14, h14, b14) \ + _(V15, d15, s15, h15, b15) \ + _(V16, d16, s16, h16, b16) \ + _(V17, d17, s17, h17, b17) \ + _(V18, d18, s18, h18, b18) \ + _(V19, d19, s19, h19, b18) \ + _(V20, d20, s20, h20, b20) \ + _(V21, d21, s21, h21, b21) \ + _(V22, d22, s22, h22, b22) \ + _(V23, d23, s23, h23, b23) \ + _(V24, d24, s24, h24, b24) \ + _(V25, d25, s25, h25, b25) \ + _(V26, d26, s26, h26, b26) \ + _(V27, d27, s27, h27, b27) \ + _(V28, d28, s28, h28, b28) \ + _(V29, d29, s29, h29, b29) \ + _(V30, d30, s30, h30, b30) \ + _(V31, d31, s31, h31, b31) \ + +#define IR_GP_REG_ENUM(code, name64, name32) \ + IR_REG_ ## code, + +#define IR_FP_REG_ENUM(code, name64, name32, name16, name8) \ + IR_REG_ ## code, + +enum _ir_reg { + _IR_REG_NONE = -1, + IR_GP_REGS(IR_GP_REG_ENUM) + IR_FP_REGS(IR_FP_REG_ENUM) + IR_REG_NUM, +}; + +#define IR_REG_GP_FIRST IR_REG_X0 +#define IR_REG_FP_FIRST IR_REG_V0 +#define IR_REG_GP_LAST (IR_REG_FP_FIRST - 1) +#define IR_REG_FP_LAST (IR_REG_NUM - 1) +#define IR_REG_SCRATCH (IR_REG_NUM) /* special name for regset */ +#define IR_REG_ALL (IR_REG_NUM + 1) /* special name for regset */ + +#define IR_REGSET_64BIT 1 + +#define IR_REG_INT_TMP IR_REG_X17 /* reserved temporary register used by code-generator */ + +#define IR_REG_STACK_POINTER \ + IR_REG_X31 +#define IR_REG_FRAME_POINTER \ + IR_REG_X29 +#define IR_REGSET_FIXED \ + ( IR_REGSET(IR_REG_INT_TMP) \ + | IR_REGSET(IR_REG_X18) /* platform specific register */ \ + | IR_REGSET_INTERVAL(IR_REG_X29, IR_REG_X31)) +#define IR_REGSET_GP \ + IR_REGSET_DIFFERENCE(IR_REGSET_INTERVAL(IR_REG_GP_FIRST, IR_REG_GP_LAST), IR_REGSET_FIXED) +#define IR_REGSET_FP \ + IR_REGSET_DIFFERENCE(IR_REGSET_INTERVAL(IR_REG_FP_FIRST, IR_REG_FP_LAST), IR_REGSET_FIXED) + +#define IR_REG_IP0 IR_REG_X16 +#define IR_REG_IP1 IR_REG_X17 +#define IR_REG_PR IR_REG_X18 +#define IR_REG_LR IR_REG_X30 +#define IR_REG_ZR IR_REG_X31 + +/* Calling Convention */ +#define IR_REG_INT_RET1 IR_REG_X0 +#define IR_REG_FP_RET1 IR_REG_V0 +#define IR_REG_INT_ARGS 8 +#define IR_REG_FP_ARGS 8 +#define IR_REG_INT_ARG1 IR_REG_X0 +#define IR_REG_INT_ARG2 IR_REG_X1 +#define IR_REG_INT_ARG3 IR_REG_X2 +#define IR_REG_INT_ARG4 IR_REG_X3 +#define IR_REG_INT_ARG5 IR_REG_X4 +#define IR_REG_INT_ARG6 IR_REG_X5 +#define IR_REG_INT_ARG7 IR_REG_X6 +#define IR_REG_INT_ARG8 IR_REG_X7 +#define IR_REG_FP_ARG1 IR_REG_V0 +#define IR_REG_FP_ARG2 IR_REG_V1 +#define IR_REG_FP_ARG3 IR_REG_V2 +#define IR_REG_FP_ARG4 IR_REG_V3 +#define IR_REG_FP_ARG5 IR_REG_V4 +#define IR_REG_FP_ARG6 IR_REG_V5 +#define IR_REG_FP_ARG7 IR_REG_V6 +#define IR_REG_FP_ARG8 IR_REG_V7 +#define IR_MAX_REG_ARGS 16 +#define IR_SHADOW_ARGS 0 + +# define IR_REGSET_SCRATCH \ + (IR_REGSET_INTERVAL(IR_REG_X0, IR_REG_X18) \ + | IR_REGSET_INTERVAL(IR_REG_V0, IR_REG_V7) \ + | IR_REGSET_INTERVAL(IR_REG_V16, IR_REG_V31)) + +# define IR_REGSET_PRESERVED \ + (IR_REGSET_INTERVAL(IR_REG_X19, IR_REG_X30) \ + | IR_REGSET_INTERVAL(IR_REG_V8, IR_REG_V15)) + +typedef struct _ir_tmp_reg { + union { + uint8_t num; + int8_t reg; + }; + uint8_t type; + uint8_t start; + uint8_t end; +} ir_tmp_reg; + +struct _ir_target_constraints { + int8_t def_reg; + uint8_t tmps_count; + uint8_t hints_count; + ir_tmp_reg tmp_regs[3]; + int8_t hints[IR_MAX_REG_ARGS + 3]; +}; + +#endif /* IR_AARCH64_H */ diff --git a/ext/opcache/jit/ir/ir_builder.h b/ext/opcache/jit/ir/ir_builder.h new file mode 100644 index 0000000000000..c7d5abf5e4694 --- /dev/null +++ b/ext/opcache/jit/ir/ir_builder.h @@ -0,0 +1,639 @@ +/* + * IR - Lightweight JIT Compilation Framework + * (IR Construction API) + * Copyright (C) 2022 Zend by Perforce. + * Authors: Dmitry Stogov + */ + +#ifndef IR_BUILDER_H +#define IR_BUILDER_H + +#ifdef __cplusplus +extern "C" { +#endif + +/* _ir_CTX may be redefined by the user */ +#define _ir_CTX ctx + +#define ir_NOP() ir_emit0(_ir_CTX, IR_NOP) + +#define ir_CONST_BOOL(_val) ir_const_bool(_ir_CTX, (_val)) +#define ir_CONST_U8(_val) ir_const_u8(_ir_CTX, (_val)) +#define ir_CONST_U16(_val) ir_const_u16(_ir_CTX, (_val)) +#define ir_CONST_U32(_val) ir_const_u32(_ir_CTX, (_val)) +#define ir_CONST_U64(_val) ir_const_u64(_ir_CTX, (_val)) +#define ir_CONST_ADDR(_val) ir_const_addr(_ir_CTX, (uintptr_t)(_val)) +#define ir_CONST_CHAR(_val) ir_const_char(_ir_CTX, (_val)) +#define ir_CONST_I8(_val) ir_const_i8(_ir_CTX, (_val)) +#define ir_CONST_I16(_val) ir_const_i16(_ir_CTX, (_val)) +#define ir_CONST_I32(_val) ir_const_i32(_ir_CTX, (_val)) +#define ir_CONST_I64(_val) ir_const_i64(_ir_CTX, (_val)) +#define ir_CONST_DOUBLE(_val) ir_const_double(_ir_CTX, (_val)) +#define ir_CONST_FLOAT(_val) ir_const_float(_ir_CTX, (_val)) + +#define ir_CMP_OP(_op, _op1, _op2) ir_fold2(_ir_CTX, IR_OPT((_op), IR_BOOL), (_op1), (_op2)) + +#define ir_UNARY_OP(_op, _type, _op1) ir_fold1(_ir_CTX, IR_OPT((_op), (_type)), (_op1)) +#define ir_UNARY_OP_B(_op, _op1) ir_fold1(_ir_CTX, IR_OPT((_op), IR_BOOL), (_op1)) +#define ir_UNARY_OP_U8(_op, _op1) ir_fold1(_ir_CTX, IR_OPT((_op), IR_U8), (_op1)) +#define ir_UNARY_OP_U16(_op, _op1) ir_fold1(_ir_CTX, IR_OPT((_op), IR_U16), (_op1)) +#define ir_UNARY_OP_U32(_op, _op1) ir_fold1(_ir_CTX, IR_OPT((_op), IR_U32), (_op1)) +#define ir_UNARY_OP_U64(_op, _op1) ir_fold1(_ir_CTX, IR_OPT((_op), IR_U64), (_op1)) +#define ir_UNARY_OP_A(_op, _op1) ir_fold1(_ir_CTX, IR_OPT((_op), IR_ADDR), (_op1)) +#define ir_UNARY_OP_C(_op, _op1) ir_fold1(_ir_CTX, IR_OPT((_op), IR_CHAR), (_op1)) +#define ir_UNARY_OP_I8(_op, _op1) ir_fold1(_ir_CTX, IR_OPT((_op), IR_I8), (_op1)) +#define ir_UNARY_OP_I16(_op, _op1) ir_fold1(_ir_CTX, IR_OPT((_op), IR_I16), (_op1)) +#define ir_UNARY_OP_I32(_op, _op1) ir_fold1(_ir_CTX, IR_OPT((_op), IR_I32), (_op1)) +#define ir_UNARY_OP_I64(_op, _op1) ir_fold1(_ir_CTX, IR_OPT((_op), IR_I64), (_op1)) +#define ir_UNARY_OP_D(_op, _op1) ir_fold1(_ir_CTX, IR_OPT((_op), IR_DOUBLE), (_op1)) +#define ir_UNARY_OP_F(_op, _op1) ir_fold1(_ir_CTX, IR_OPT((_op), IR_FLOAT), (_op1)) + +#define ir_BINARY_OP(_op, _t, _op1, _op2) ir_fold2(_ir_CTX, IR_OPT((_op), (_t)), (_op1), (_op2)) +#define ir_BINARY_OP_B(_op, _op1, _op2) ir_fold2(_ir_CTX, IR_OPT((_op), IR_BOOL), (_op1), (_op2)) +#define ir_BINARY_OP_U8(_op, _op1, _op2) ir_fold2(_ir_CTX, IR_OPT((_op), IR_U8), (_op1), (_op2)) +#define ir_BINARY_OP_U16(_op, _op1, _op2) ir_fold2(_ir_CTX, IR_OPT((_op), IR_U16), (_op1), (_op2)) +#define ir_BINARY_OP_U32(_op, _op1, _op2) ir_fold2(_ir_CTX, IR_OPT((_op), IR_U32), (_op1), (_op2)) +#define ir_BINARY_OP_U64(_op, _op1, _op2) ir_fold2(_ir_CTX, IR_OPT((_op), IR_U64), (_op1), (_op2)) +#define ir_BINARY_OP_A(_op, _op1, _op2) ir_fold2(_ir_CTX, IR_OPT((_op), IR_ADDR), (_op1), (_op2)) +#define ir_BINARY_OP_C(_op, _op1, _op2) ir_fold2(_ir_CTX, IR_OPT((_op), IR_CHAR), (_op1), (_op2)) +#define ir_BINARY_OP_I8(_op, _op1, _op2) ir_fold2(_ir_CTX, IR_OPT((_op), IR_I8), (_op1), (_op2)) +#define ir_BINARY_OP_I16(_op, _op1, _op2) ir_fold2(_ir_CTX, IR_OPT((_op), IR_I16), (_op1), (_op2)) +#define ir_BINARY_OP_I32(_op, _op1, _op2) ir_fold2(_ir_CTX, IR_OPT((_op), IR_I32), (_op1), (_op2)) +#define ir_BINARY_OP_I64(_op, _op1, _op2) ir_fold2(_ir_CTX, IR_OPT((_op), IR_I64), (_op1), (_op2)) +#define ir_BINARY_OP_D(_op, _op1, _op2) ir_fold2(_ir_CTX, IR_OPT((_op), IR_DOUBLE), (_op1), (_op2)) +#define ir_BINARY_OP_F(_op, _op1, _op2) ir_fold2(_ir_CTX, IR_OPT((_op), IR_FLOAT), (_op1), (_op2)) + +#define ir_EQ(_op1, _op2) ir_CMP_OP(IR_EQ, (_op1), (_op2)) +#define ir_NE(_op1, _op2) ir_CMP_OP(IR_NE, (_op1), (_op2)) + +#define ir_LT(_op1, _op2) ir_CMP_OP(IR_LT, (_op1), (_op2)) +#define ir_GE(_op1, _op2) ir_CMP_OP(IR_GE, (_op1), (_op2)) +#define ir_LE(_op1, _op2) ir_CMP_OP(IR_LE, (_op1), (_op2)) +#define ir_GT(_op1, _op2) ir_CMP_OP(IR_GT, (_op1), (_op2)) + +#define ir_ULT(_op1, _op2) ir_CMP_OP(IR_ULT, (_op1), (_op2)) +#define ir_UGE(_op1, _op2) ir_CMP_OP(IR_UGE, (_op1), (_op2)) +#define ir_ULE(_op1, _op2) ir_CMP_OP(IR_ULE, (_op1), (_op2)) +#define ir_UGT(_op1, _op2) ir_CMP_OP(IR_UGT, (_op1), (_op2)) + +#define ir_ADD(_type, _op1, _op2) ir_BINARY_OP(IR_ADD, (_type), (_op1), (_op2)) +#define ir_ADD_U8(_op1, _op2) ir_BINARY_OP_U8(IR_ADD, (_op1), (_op2)) +#define ir_ADD_U16(_op1, _op2) ir_BINARY_OP_U16(IR_ADD, (_op1), (_op2)) +#define ir_ADD_U32(_op1, _op2) ir_BINARY_OP_U32(IR_ADD, (_op1), (_op2)) +#define ir_ADD_U64(_op1, _op2) ir_BINARY_OP_U64(IR_ADD, (_op1), (_op2)) +#define ir_ADD_A(_op1, _op2) ir_BINARY_OP_A(IR_ADD, (_op1), (_op2)) +#define ir_ADD_C(_op1, _op2) ir_BINARY_OP_C(IR_ADD, (_op1), (_op2)) +#define ir_ADD_I8(_op1, _op2) ir_BINARY_OP_I8(IR_ADD, (_op1), (_op2)) +#define ir_ADD_I16(_op1, _op2) ir_BINARY_OP_I16(IR_ADD, (_op1), (_op2)) +#define ir_ADD_I32(_op1, _op2) ir_BINARY_OP_I32(IR_ADD, (_op1), (_op2)) +#define ir_ADD_I64(_op1, _op2) ir_BINARY_OP_I64(IR_ADD, (_op1), (_op2)) +#define ir_ADD_D(_op1, _op2) ir_BINARY_OP_D(IR_ADD, (_op1), (_op2)) +#define ir_ADD_F(_op1, _op2) ir_BINARY_OP_F(IR_ADD, (_op1), (_op2)) + +#define ir_SUB(_type, _op1, _op2) ir_BINARY_OP(IR_SUB, (_type), (_op1), (_op2)) +#define ir_SUB_U8(_op1, _op2) ir_BINARY_OP_U8(IR_SUB, (_op1), (_op2)) +#define ir_SUB_U16(_op1, _op2) ir_BINARY_OP_U16(IR_SUB, (_op1), (_op2)) +#define ir_SUB_U32(_op1, _op2) ir_BINARY_OP_U32(IR_SUB, (_op1), (_op2)) +#define ir_SUB_U64(_op1, _op2) ir_BINARY_OP_U64(IR_SUB, (_op1), (_op2)) +#define ir_SUB_A(_op1, _op2) ir_BINARY_OP_A(IR_SUB, (_op1), (_op2)) +#define ir_SUB_C(_op1, _op2) ir_BINARY_OP_C(IR_SUB, (_op1), (_op2)) +#define ir_SUB_I8(_op1, _op2) ir_BINARY_OP_I8(IR_SUB, (_op1), (_op2)) +#define ir_SUB_I16(_op1, _op2) ir_BINARY_OP_I16(IR_SUB, (_op1), (_op2)) +#define ir_SUB_I32(_op1, _op2) ir_BINARY_OP_I32(IR_SUB, (_op1), (_op2)) +#define ir_SUB_I64(_op1, _op2) ir_BINARY_OP_I64(IR_SUB, (_op1), (_op2)) +#define ir_SUB_D(_op1, _op2) ir_BINARY_OP_D(IR_SUB, (_op1), (_op2)) +#define ir_SUB_F(_op1, _op2) ir_BINARY_OP_F(IR_SUB, (_op1), (_op2)) + +#define ir_MUL(_type, _op1, _op2) ir_BINARY_OP(IR_MUL, (_type), (_op1), (_op2)) +#define ir_MUL_U8(_op1, _op2) ir_BINARY_OP_U8(IR_MUL, (_op1), (_op2)) +#define ir_MUL_U16(_op1, _op2) ir_BINARY_OP_U16(IR_MUL, (_op1), (_op2)) +#define ir_MUL_U32(_op1, _op2) ir_BINARY_OP_U32(IR_MUL, (_op1), (_op2)) +#define ir_MUL_U64(_op1, _op2) ir_BINARY_OP_U64(IR_MUL, (_op1), (_op2)) +#define ir_MUL_A(_op1, _op2) ir_BINARY_OP_A(IR_MUL, (_op1), (_op2)) +#define ir_MUL_C(_op1, _op2) ir_BINARY_OP_C(IR_MUL, (_op1), (_op2)) +#define ir_NUL_I8(_op1, _op2) ir_BINARY_OP_I8(IR_MUL, (_op1), (_op2)) +#define ir_MUL_I16(_op1, _op2) ir_BINARY_OP_I16(IR_MUL, (_op1), (_op2)) +#define ir_MUL_I32(_op1, _op2) ir_BINARY_OP_I32(IR_MUL, (_op1), (_op2)) +#define ir_MUL_I64(_op1, _op2) ir_BINARY_OP_I64(IR_MUL, (_op1), (_op2)) +#define ir_MUL_D(_op1, _op2) ir_BINARY_OP_D(IR_MUL, (_op1), (_op2)) +#define ir_MUL_F(_op1, _op2) ir_BINARY_OP_F(IR_MUL, (_op1), (_op2)) + +#define ir_DIV(_type, _op1, _op2) ir_BINARY_OP(IR_DIV, (_type), (_op1), (_op2)) +#define ir_DIV_U8(_op1, _op2) ir_BINARY_OP_U8(IR_DIV, (_op1), (_op2)) +#define ir_DIV_U16(_op1, _op2) ir_BINARY_OP_U16(IR_DIV, (_op1), (_op2)) +#define ir_DIV_U32(_op1, _op2) ir_BINARY_OP_U32(IR_DIV, (_op1), (_op2)) +#define ir_DIV_U64(_op1, _op2) ir_BINARY_OP_U64(IR_DIV, (_op1), (_op2)) +#define ir_DIV_A(_op1, _op2) ir_BINARY_OP_A(IR_DIV, (_op1), (_op2)) +#define ir_DIV_C(_op1, _op2) ir_BINARY_OP_C(IR_DIV, (_op1), (_op2)) +#define ir_DIV_I8(_op1, _op2) ir_BINARY_OP_I8(IR_DIV, (_op1), (_op2)) +#define ir_DIV_I16(_op1, _op2) ir_BINARY_OP_I16(IR_DIV, (_op1), (_op2)) +#define ir_DIV_I32(_op1, _op2) ir_BINARY_OP_I32(IR_DIV, (_op1), (_op2)) +#define ir_DIV_I64(_op1, _op2) ir_BINARY_OP_I64(IR_DIV, (_op1), (_op2)) +#define ir_DIV_D(_op1, _op2) ir_BINARY_OP_D(IR_DIV, (_op1), (_op2)) +#define ir_DIV_F(_op1, _op2) ir_BINARY_OP_F(IR_DIV, (_op1), (_op2)) + +#define ir_MOD(_type, _op1, _op2) ir_BINARY_OP(IR_MOD, (_type), (_op1), (_op2)) +#define ir_MOD_U8(_op1, _op2) ir_BINARY_OP_U8(IR_MOD, (_op1), (_op2)) +#define ir_MOD_U16(_op1, _op2) ir_BINARY_OP_U16(IR_MOD, (_op1), (_op2)) +#define ir_MOD_U32(_op1, _op2) ir_BINARY_OP_U32(IR_MOD, (_op1), (_op2)) +#define ir_MOD_U64(_op1, _op2) ir_BINARY_OP_U64(IR_MOD, (_op1), (_op2)) +#define ir_MOD_A(_op1, _op2) ir_BINARY_OP_A(IR_MOD, (_op1), (_op2)) +#define ir_MOD_C(_op1, _op2) ir_BINARY_OP_C(IR_MOD, (_op1), (_op2)) +#define ir_MOD_I8(_op1, _op2) ir_BINARY_OP_I8(IR_MOD, (_op1), (_op2)) +#define ir_MOD_I16(_op1, _op2) ir_BINARY_OP_I16(IR_MOD, (_op1), (_op2)) +#define ir_MOD_I32(_op1, _op2) ir_BINARY_OP_I32(IR_MOD, (_op1), (_op2)) +#define ir_MOD_I64(_op1, _op2) ir_BINARY_OP_I64(IR_MOD, (_op1), (_op2)) + +#define ir_NEG(_type, _op1) ir_UNARY_OP(IR_NEG, (_type), (_op1)) +#define ir_NEG_C(_op1) ir_UNARY_OP_C(IR_NEG, (_op1)) +#define ir_NEG_I8(_op1) ir_UNARY_OP_I8(IR_NEG, (_op1)) +#define ir_NEG_I16(_op1) ir_UNARY_OP_I16(IR_NEG, (_op1)) +#define ir_NEG_I32(_op1) ir_UNARY_OP_I32(IR_NEG, (_op1)) +#define ir_NEG_I64(_op1) ir_UNARY_OP_I64(IR_NEG, (_op1)) +#define ir_NEG_D(_op1) ir_UNARY_OP_D(IR_NEG, (_op1)) +#define ir_NEG_F(_op1) ir_UNARY_OP_F(IR_NEG, (_op1)) + +#define ir_ABS(_type, _op1) ir_UNARY_OP(IR_ABS, (_type), (_op1)) +#define ir_ABS_C(_op1) ir_UNARY_OP_C(IR_ABS, (_op1)) +#define ir_ABS_I8(_op1) ir_UNARY_OP_I8(IR_ABS, (_op1)) +#define ir_ABS_I16(_op1) ir_UNARY_OP_I16(IR_ABS, (_op1)) +#define ir_ABS_I32(_op1) ir_UNARY_OP_I32(IR_ABS, (_op1)) +#define ir_ABS_I64(_op1) ir_UNARY_OP_I64(IR_ABS, (_op1)) +#define ir_ABS_D(_op1) ir_UNARY_OP_D(IR_ABS, (_op1)) +#define ir_ABS_F(_op1) ir_UNARY_OP_F(IR_ABS, (_op1)) + +#define ir_SEXT(_type, _op1) ir_UNARY_OP(IR_SEXT, (_type), (_op1)) +#define ir_SEXT_U8(_op1) ir_UNARY_OP_U8(IR_SEXT, (_op1)) +#define ir_SEXT_U16(_op1) ir_UNARY_OP_U16(IR_SEXT, (_op1)) +#define ir_SEXT_U32(_op1) ir_UNARY_OP_U32(IR_SEXT, (_op1)) +#define ir_SEXT_U64(_op1) ir_UNARY_OP_U64(IR_SEXT, (_op1)) +#define ir_SEXT_A(_op1) ir_UNARY_OP_A(IR_SEXT, (_op1)) +#define ir_SEXT_C(_op1) ir_UNARY_OP_C(IR_SEXT, (_op1)) +#define ir_SEXT_I8(_op1) ir_UNARY_OP_I8(IR_SEXT, (_op1)) +#define ir_SEXT_I16(_op1) ir_UNARY_OP_I16(IR_SEXT, (_op1)) +#define ir_SEXT_I32(_op1) ir_UNARY_OP_I32(IR_SEXT, (_op1)) +#define ir_SEXT_I64(_op1) ir_UNARY_OP_I64(IR_SEXT, (_op1)) + +#define ir_ZEXT(_type, _op1) ir_UNARY_OP(IR_ZEXT, (_type), (_op1)) +#define ir_ZEXT_U8(_op1) ir_UNARY_OP_U8(IR_ZEXT, (_op1)) +#define ir_ZEXT_U16(_op1) ir_UNARY_OP_U16(IR_ZEXT, (_op1)) +#define ir_ZEXT_U32(_op1) ir_UNARY_OP_U32(IR_ZEXT, (_op1)) +#define ir_ZEXT_U64(_op1) ir_UNARY_OP_U64(IR_ZEXT, (_op1)) +#define ir_ZEXT_A(_op1) ir_UNARY_OP_A(IR_ZEXT, (_op1)) +#define ir_ZEXT_C(_op1) ir_UNARY_OP_C(IR_ZEXT, (_op1)) +#define ir_ZEXT_I8(_op1) ir_UNARY_OP_I8(IR_ZEXT, (_op1)) +#define ir_ZEXT_I16(_op1) ir_UNARY_OP_I16(IR_ZEXT, (_op1)) +#define ir_ZEXT_I32(_op1) ir_UNARY_OP_I32(IR_ZEXT, (_op1)) +#define ir_ZEXT_I64(_op1) ir_UNARY_OP_I64(IR_ZEXT, (_op1)) + +#define ir_TRUNC(_type, _op1) ir_UNARY_OP(IR_TRUNC, (_type), (_op1)) +#define ir_TRUNC_U8(_op1) ir_UNARY_OP_U8(IR_TRUNC, (_op1)) +#define ir_TRUNC_U16(_op1) ir_UNARY_OP_U16(IR_TRUNC, (_op1)) +#define ir_TRUNC_U32(_op1) ir_UNARY_OP_U32(IR_TRUNC, (_op1)) +#define ir_TRUNC_U64(_op1) ir_UNARY_OP_U64(IR_TRUNC, (_op1)) +#define ir_TRUNC_A(_op1) ir_UNARY_OP_A(IR_TRUNC, (_op1)) +#define ir_TRUNC_C(_op1) ir_UNARY_OP_C(IR_TRUNC, (_op1)) +#define ir_TRUNC_I8(_op1) ir_UNARY_OP_I8(IR_TRUNC, (_op1)) +#define ir_TRUNC_I16(_op1) ir_UNARY_OP_I16(IR_TRUNC, (_op1)) +#define ir_TRUNC_I32(_op1) ir_UNARY_OP_I32(IR_TRUNC, (_op1)) +#define ir_TRUNC_I64(_op1) ir_UNARY_OP_I64(IR_TRUNC, (_op1)) + +#define ir_BITCAST(_type, _op1) ir_UNARY_OP(IR_BITCAST, (_type), (_op1)) +#define ir_BITCAST_U8(_op1) ir_UNARY_OP_U8(IR_BITCAST, (_op1)) +#define ir_BITCAST_U16(_op1) ir_UNARY_OP_U16(IR_BITCAST, (_op1)) +#define ir_BITCAST_U32(_op1) ir_UNARY_OP_U32(IR_BITCAST, (_op1)) +#define ir_BITCAST_U64(_op1) ir_UNARY_OP_U64(IR_BITCAST, (_op1)) +#define ir_BITCAST_A(_op1) ir_UNARY_OP_A(IR_BITCAST, (_op1)) +#define ir_BITCAST_C(_op1) ir_UNARY_OP_C(IR_BITCAST, (_op1)) +#define ir_BITCAST_I8(_op1) ir_UNARY_OP_I8(IR_BITCAST, (_op1)) +#define ir_BITCAST_I16(_op1) ir_UNARY_OP_I16(IR_BITCAST, (_op1)) +#define ir_BITCAST_I32(_op1) ir_UNARY_OP_I32(IR_BITCAST, (_op1)) +#define ir_BITCAST_I64(_op1) ir_UNARY_OP_I64(IR_BITCAST, (_op1)) +#define ir_BITCAST_D(_op1) ir_UNARY_OP_D(IR_BITCAST, (_op1)) +#define ir_BITCAST_F(_op1) ir_UNARY_OP_F(IR_BITCAST, (_op1)) + +#define ir_INT2FP(_type, _op1) ir_UNARY_OP(IR_INT2FP, (_type), (_op1)) +#define ir_INT2D(_op1) ir_UNARY_OP_D(IR_INT2FP, (_op1)) +#define ir_INT2F(_op1) ir_UNARY_OP_F(IR_INT2FP, (_op1)) + +#define ir_FP2INT(_type, _op1) ir_UNARY_OP(IR_FP2INT, (_type), (_op1)) +#define ir_FP2U8(_op1) ir_UNARY_OP_U8(IR_FP2INT, (_op1)) +#define ir_FP2U16(_op1) ir_UNARY_OP_U16(IR_FP2INT, (_op1)) +#define ir_FP2U32(_op1) ir_UNARY_OP_U32(IR_FP2INT, (_op1)) +#define ir_FP2U64(_op1) ir_UNARY_OP_U64(IR_FP2INT, (_op1)) +#define ir_FP2I8(_op1) ir_UNARY_OP_I8(IR_FP2INT, (_op1)) +#define ir_FP2I16(_op1) ir_UNARY_OP_I16(IR_FP2INT, (_op1)) +#define ir_FP2I32(_op1) ir_UNARY_OP_I32(IR_FP2INT, (_op1)) +#define ir_FP2I64(_op1) ir_UNARY_OP_I64(IR_FP2INT, (_op1)) + +#define ir_FP2FP(_type, _op1) ir_UNARY_OP(IR_FP2FP, (_type), (_op1)) +#define ir_F2D(_op1) ir_UNARY_OP_D(IR_FP2FP, (_op1)) +#define ir_D2F(_op1) ir_UNARY_OP_F(IR_FP2FP, (_op1)) + +#define ir_ADD_OV(_type, _op1, _op2) ir_BINARY_OP(IR_ADD_OV, (_type), (_op1), (_op2)) +#define ir_ADD_OV_U8(_op1, _op2) ir_BINARY_OP_U8(IR_ADD_OV, (_op1), (_op2)) +#define ir_ADD_OV_U16(_op1, _op2) ir_BINARY_OP_U16(IR_ADD_OV, (_op1), (_op2)) +#define ir_ADD_OV_U32(_op1, _op2) ir_BINARY_OP_U32(IR_ADD_OV, (_op1), (_op2)) +#define ir_ADD_OV_U64(_op1, _op2) ir_BINARY_OP_U64(IR_ADD_OV, (_op1), (_op2)) +#define ir_ADD_OV_A(_op1, _op2) ir_BINARY_OP_A(IR_ADD_OV, (_op1), (_op2)) +#define ir_ADD_OV_C(_op1, _op2) ir_BINARY_OP_C(IR_ADD_OV, (_op1), (_op2)) +#define ir_ADD_OV_I8(_op1, _op2) ir_BINARY_OP_I8(IR_ADD_OV, (_op1), (_op2)) +#define ir_ADD_OV_I16(_op1, _op2) ir_BINARY_OP_I16(IR_ADD_OV, (_op1), (_op2)) +#define ir_ADD_OV_I32(_op1, _op2) ir_BINARY_OP_I32(IR_ADD_OV, (_op1), (_op2)) +#define ir_ADD_OV_I64(_op1, _op2) ir_BINARY_OP_I64(IR_ADD_OV, (_op1), (_op2)) + +#define ir_SUB_OV(_type, _op1, _op2) ir_BINARY_OP(IR_SUB_OV, (_type), (_op1), (_op2)) +#define ir_SUB_OV_U8(_op1, _op2) ir_BINARY_OP_U8(IR_SUB_OV, (_op1), (_op2)) +#define ir_SUB_OV_U16(_op1, _op2) ir_BINARY_OP_U16(IR_SUB_OV, (_op1), (_op2)) +#define ir_SUB_OV_U32(_op1, _op2) ir_BINARY_OP_U32(IR_SUB_OV, (_op1), (_op2)) +#define ir_SUB_OV_U64(_op1, _op2) ir_BINARY_OP_U64(IR_SUB_OV, (_op1), (_op2)) +#define ir_SUB_OV_A(_op1, _op2) ir_BINARY_OP_A(IR_SUB_OV, (_op1), (_op2)) +#define ir_SUB_OV_C(_op1, _op2) ir_BINARY_OP_C(IR_SUB_OV, (_op1), (_op2)) +#define ir_SUB_OV_I8(_op1, _op2) ir_BINARY_OP_I8(IR_SUB_OV, (_op1), (_op2)) +#define ir_SUB_OV_I16(_op1, _op2) ir_BINARY_OP_I16(IR_SUB_OV, (_op1), (_op2)) +#define ir_SUB_OV_I32(_op1, _op2) ir_BINARY_OP_I32(IR_SUB_OV, (_op1), (_op2)) +#define ir_SUB_OV_I64(_op1, _op2) ir_BINARY_OP_I64(IR_SUB_OV, (_op1), (_op2)) + +#define ir_MUL_OV(_type, _op1, _op2) ir_BINARY_OP(IR_MUL_OV, (_type), (_op1), (_op2)) +#define ir_MUL_OV_U8(_op1, _op2) ir_BINARY_OP_U8(IR_MUL_OV, (_op1), (_op2)) +#define ir_MUL_OV_U16(_op1, _op2) ir_BINARY_OP_U16(IR_MUL_OV, (_op1), (_op2)) +#define ir_MUL_OV_U32(_op1, _op2) ir_BINARY_OP_U32(IR_MUL_OV, (_op1), (_op2)) +#define ir_MUL_OV_U64(_op1, _op2) ir_BINARY_OP_U64(IR_MUL_OV, (_op1), (_op2)) +#define ir_MUL_OV_A(_op1, _op2) ir_BINARY_OP_A(IR_MUL_OV, (_op1), (_op2)) +#define ir_MUL_OV_C(_op1, _op2) ir_BINARY_OP_C(IR_MUL_OV, (_op1), (_op2)) +#define ir_MUL_OV_I8(_op1, _op2) ir_BINARY_OP_I8(IR_MUL_OV, (_op1), (_op2)) +#define ir_MUL_OV_I16(_op1, _op2) ir_BINARY_OP_I16(IR_MUL_OV, (_op1), (_op2)) +#define ir_MUL_OV_I32(_op1, _op2) ir_BINARY_OP_I32(IR_MUL_OV, (_op1), (_op2)) +#define ir_MUL_OV_I64(_op1, _op2) ir_BINARY_OP_I64(IR_MUL_OV, (_op1), (_op2)) + +#define ir_OVERFLOW(_op1) ir_fold1(_ir_CTX, IR_OPT(IR_OVERFLOW, IR_BOOL), (_op1)) + +#define ir_NOT(_type, _op1) ir_UNARY_OP(IR_NOT, (_type), (_op1)) +#define ir_NOT_B(_op1) ir_UNARY_OP_B(IR_NOT, (_op1)) +#define ir_NOT_U8(_op1) ir_UNARY_OP_U8(IR_NOT, (_op1)) +#define ir_NOT_U16(_op1) ir_UNARY_OP_U16(IR_NOT, (_op1)) +#define ir_NOT_U32(_op1) ir_UNARY_OP_U32(IR_NOT, (_op1)) +#define ir_NOT_U64(_op1) ir_UNARY_OP_U64(IR_NOT, (_op1)) +#define ir_NOT_A(_op1) ir_UNARY_OP_A(IR_NOT, (_op1)) +#define ir_NOT_C(_op1) ir_UNARY_OP_C(IR_NOT, (_op1)) +#define ir_NOT_I8(_op1) ir_UNARY_OP_I8(IR_NOT, (_op1)) +#define ir_NOT_I16(_op1) ir_UNARY_OP_I16(IR_NOT, (_op1)) +#define ir_NOT_I32(_op1) ir_UNARY_OP_I32(IR_NOT, (_op1)) +#define ir_NOT_I64(_op1) ir_UNARY_OP_I64(IR_NOT, (_op1)) + +#define ir_OR(_type, _op1, _op2) ir_BINARY_OP(IR_OR, (_type), (_op1), (_op2)) +#define ir_OR_B(_op1, _op2) ir_BINARY_OP_B(IR_OR, (_op1), (_op2)) +#define ir_OR_U8(_op1, _op2) ir_BINARY_OP_U8(IR_OR, (_op1), (_op2)) +#define ir_OR_U16(_op1, _op2) ir_BINARY_OP_U16(IR_OR, (_op1), (_op2)) +#define ir_OR_U32(_op1, _op2) ir_BINARY_OP_U32(IR_OR, (_op1), (_op2)) +#define ir_OR_U64(_op1, _op2) ir_BINARY_OP_U64(IR_OR, (_op1), (_op2)) +#define ir_OR_A(_op1, _op2) ir_BINARY_OP_A(IR_OR, (_op1), (_op2)) +#define ir_OR_C(_op1, _op2) ir_BINARY_OP_C(IR_OR, (_op1), (_op2)) +#define ir_OR_I8(_op1, _op2) ir_BINARY_OP_I8(IR_OR, (_op1), (_op2)) +#define ir_OR_I16(_op1, _op2) ir_BINARY_OP_I16(IR_OR, (_op1), (_op2)) +#define ir_OR_I32(_op1, _op2) ir_BINARY_OP_I32(IR_OR, (_op1), (_op2)) +#define ir_OR_I64(_op1, _op2) ir_BINARY_OP_I64(IR_OR, (_op1), (_op2)) + +#define ir_AND(_type, _op1, _op2) ir_BINARY_OP(IR_AND, (_type), (_op1), (_op2)) +#define ir_AND_B(_op1, _op2) ir_BINARY_OP_B(IR_AND, (_op1), (_op2)) +#define ir_AND_U8(_op1, _op2) ir_BINARY_OP_U8(IR_AND, (_op1), (_op2)) +#define ir_AND_U16(_op1, _op2) ir_BINARY_OP_U16(IR_AND, (_op1), (_op2)) +#define ir_AND_U32(_op1, _op2) ir_BINARY_OP_U32(IR_AND, (_op1), (_op2)) +#define ir_AND_U64(_op1, _op2) ir_BINARY_OP_U64(IR_AND, (_op1), (_op2)) +#define ir_AND_A(_op1, _op2) ir_BINARY_OP_A(IR_AND, (_op1), (_op2)) +#define ir_AND_C(_op1, _op2) ir_BINARY_OP_C(IR_AND, (_op1), (_op2)) +#define ir_AND_I8(_op1, _op2) ir_BINARY_OP_I8(IR_AND, (_op1), (_op2)) +#define ir_AND_I16(_op1, _op2) ir_BINARY_OP_I16(IR_AND, (_op1), (_op2)) +#define ir_AND_I32(_op1, _op2) ir_BINARY_OP_I32(IR_AND, (_op1), (_op2)) +#define ir_AND_I64(_op1, _op2) ir_BINARY_OP_I64(IR_AND, (_op1), (_op2)) + +#define ir_XOR(_type, _op1, _op2) ir_BINARY_OP(IR_XOR, (_type), (_op1), (_op2)) +#define ir_XOR_B(_op1, _op2) ir_BINARY_OP_B(IR_XOR, (_op1), (_op2)) +#define ir_XOR_U8(_op1, _op2) ir_BINARY_OP_U8(IR_XOR, (_op1), (_op2)) +#define ir_XOR_U16(_op1, _op2) ir_BINARY_OP_U16(IR_XOR, (_op1), (_op2)) +#define ir_XOR_U32(_op1, _op2) ir_BINARY_OP_U32(IR_XOR, (_op1), (_op2)) +#define ir_XOR_U64(_op1, _op2) ir_BINARY_OP_U64(IR_XOR, (_op1), (_op2)) +#define ir_XOR_A(_op1, _op2) ir_BINARY_OP_A(IR_XOR, (_op1), (_op2)) +#define ir_XOR_C(_op1, _op2) ir_BINARY_OP_C(IR_XOR, (_op1), (_op2)) +#define ir_XOR_I8(_op1, _op2) ir_BINARY_OP_I8(IR_XOR, (_op1), (_op2)) +#define ir_XOR_I16(_op1, _op2) ir_BINARY_OP_I16(IR_XOR, (_op1), (_op2)) +#define ir_XOR_I32(_op1, _op2) ir_BINARY_OP_I32(IR_XOR, (_op1), (_op2)) +#define ir_XOR_I64(_op1, _op2) ir_BINARY_OP_I64(IR_XOR, (_op1), (_op2)) + +#define ir_SHL(_type, _op1, _op2) ir_BINARY_OP(IR_SHL, (_type), (_op1), (_op2)) +#define ir_SHL_U8(_op1, _op2) ir_BINARY_OP_U8(IR_SHL, (_op1), (_op2)) +#define ir_SHL_U16(_op1, _op2) ir_BINARY_OP_U16(IR_SHL, (_op1), (_op2)) +#define ir_SHL_U32(_op1, _op2) ir_BINARY_OP_U32(IR_SHL, (_op1), (_op2)) +#define ir_SHL_U64(_op1, _op2) ir_BINARY_OP_U64(IR_SHL, (_op1), (_op2)) +#define ir_SHL_A(_op1, _op2) ir_BINARY_OP_A(IR_SHL, (_op1), (_op2)) +#define ir_SHL_C(_op1, _op2) ir_BINARY_OP_C(IR_SHL, (_op1), (_op2)) +#define ir_SHL_I8(_op1, _op2) ir_BINARY_OP_I8(IR_SHL, (_op1), (_op2)) +#define ir_SHL_I16(_op1, _op2) ir_BINARY_OP_I16(IR_SHL, (_op1), (_op2)) +#define ir_SHL_I32(_op1, _op2) ir_BINARY_OP_I32(IR_SHL, (_op1), (_op2)) +#define ir_SHL_I64(_op1, _op2) ir_BINARY_OP_I64(IR_SHL, (_op1), (_op2)) + +#define ir_SHR(_type, _op1, _op2) ir_BINARY_OP(IR_SHR, (_type), (_op1), (_op2)) +#define ir_SHR_U8(_op1, _op2) ir_BINARY_OP_U8(IR_SHR, (_op1), (_op2)) +#define ir_SHR_U16(_op1, _op2) ir_BINARY_OP_U16(IR_SHR, (_op1), (_op2)) +#define ir_SHR_U32(_op1, _op2) ir_BINARY_OP_U32(IR_SHR, (_op1), (_op2)) +#define ir_SHR_U64(_op1, _op2) ir_BINARY_OP_U64(IR_SHR, (_op1), (_op2)) +#define ir_SHR_A(_op1, _op2) ir_BINARY_OP_A(IR_SHR, (_op1), (_op2)) +#define ir_SHR_C(_op1, _op2) ir_BINARY_OP_C(IR_SHR, (_op1), (_op2)) +#define ir_SHR_I8(_op1, _op2) ir_BINARY_OP_I8(IR_SHR, (_op1), (_op2)) +#define ir_SHR_I16(_op1, _op2) ir_BINARY_OP_I16(IR_SHR, (_op1), (_op2)) +#define ir_SHR_I32(_op1, _op2) ir_BINARY_OP_I32(IR_SHR, (_op1), (_op2)) +#define ir_SHR_I64(_op1, _op2) ir_BINARY_OP_I64(IR_SHR, (_op1), (_op2)) + +#define ir_SAR(_type, _op1, _op2) ir_BINARY_OP(IR_SAR, (_type), (_op1), (_op2)) +#define ir_SAR_U8(_op1, _op2) ir_BINARY_OP_U8(IR_SAR, (_op1), (_op2)) +#define ir_SAR_U16(_op1, _op2) ir_BINARY_OP_U16(IR_SAR, (_op1), (_op2)) +#define ir_SAR_U32(_op1, _op2) ir_BINARY_OP_U32(IR_SAR, (_op1), (_op2)) +#define ir_SAR_U64(_op1, _op2) ir_BINARY_OP_U64(IR_SAR, (_op1), (_op2)) +#define ir_SAR_A(_op1, _op2) ir_BINARY_OP_A(IR_SAR, (_op1), (_op2)) +#define ir_SAR_C(_op1, _op2) ir_BINARY_OP_C(IR_SAR, (_op1), (_op2)) +#define ir_SAR_I8(_op1, _op2) ir_BINARY_OP_I8(IR_SAR, (_op1), (_op2)) +#define ir_SAR_I16(_op1, _op2) ir_BINARY_OP_I16(IR_SAR, (_op1), (_op2)) +#define ir_SAR_I32(_op1, _op2) ir_BINARY_OP_I32(IR_SAR, (_op1), (_op2)) +#define ir_SAR_I64(_op1, _op2) ir_BINARY_OP_I64(IR_SAR, (_op1), (_op2)) + +#define ir_ROL(_type, _op1, _op2) ir_BINARY_OP(IR_ROL, (_type), (_op1), (_op2)) +#define ir_ROL_U8(_op1, _op2) ir_BINARY_OP_U8(IR_ROL, (_op1), (_op2)) +#define ir_ROL_U16(_op1, _op2) ir_BINARY_OP_U16(IR_ROL, (_op1), (_op2)) +#define ir_ROL_U32(_op1, _op2) ir_BINARY_OP_U32(IR_ROL, (_op1), (_op2)) +#define ir_ROL_U64(_op1, _op2) ir_BINARY_OP_U64(IR_ROL, (_op1), (_op2)) +#define ir_ROL_A(_op1, _op2) ir_BINARY_OP_A(IR_ROL, (_op1), (_op2)) +#define ir_ROL_C(_op1, _op2) ir_BINARY_OP_C(IR_ROL, (_op1), (_op2)) +#define ir_ROL_I8(_op1, _op2) ir_BINARY_OP_I8(IR_ROL, (_op1), (_op2)) +#define ir_ROL_I16(_op1, _op2) ir_BINARY_OP_I16(IR_ROL, (_op1), (_op2)) +#define ir_ROL_I32(_op1, _op2) ir_BINARY_OP_I32(IR_ROL, (_op1), (_op2)) +#define ir_ROL_I64(_op1, _op2) ir_BINARY_OP_I64(IR_ROL, (_op1), (_op2)) + +#define ir_ROR(_type, _op1, _op2) ir_BINARY_OP(IR_ROR, (_type), (_op1), (_op2)) +#define ir_ROR_U8(_op1, _op2) ir_BINARY_OP_U8(IR_ROR, (_op1), (_op2)) +#define ir_ROR_U16(_op1, _op2) ir_BINARY_OP_U16(IR_ROR, (_op1), (_op2)) +#define ir_ROR_U32(_op1, _op2) ir_BINARY_OP_U32(IR_ROR, (_op1), (_op2)) +#define ir_ROR_U64(_op1, _op2) ir_BINARY_OP_U64(IR_ROR, (_op1), (_op2)) +#define ir_ROR_A(_op1, _op2) ir_BINARY_OP_A(IR_ROR, (_op1), (_op2)) +#define ir_ROR_C(_op1, _op2) ir_BINARY_OP_C(IR_ROR, (_op1), (_op2)) +#define ir_ROR_I8(_op1, _op2) ir_BINARY_OP_I8(IR_ROR, (_op1), (_op2)) +#define ir_ROR_I16(_op1, _op2) ir_BINARY_OP_I16(IR_ROR, (_op1), (_op2)) +#define ir_ROR_I32(_op1, _op2) ir_BINARY_OP_I32(IR_ROR, (_op1), (_op2)) +#define ir_ROR_I64(_op1, _op2) ir_BINARY_OP_I64(IR_ROR, (_op1), (_op2)) + +#define ir_BSWAP(_type, _op1) ir_UNARY_OP(IR_BSWAP, (_type), (_op1)) +#define ir_BSWAP_U16(_op1) ir_UNARY_OP_U16(IR_BSWAP, (_op1)) +#define ir_BSWAP_U32(_op1) ir_UNARY_OP_U32(IR_BSWAP, (_op1)) +#define ir_BSWAP_U64(_op1) ir_UNARY_OP_U64(IR_BSWAP, (_op1)) +#define ir_BSWAP_A(_op1) ir_UNARY_OP_A(IR_BSWAP, (_op1)) +#define ir_BSWAP_I16(_op1) ir_UNARY_OP_I16(IR_BSWAP, (_op1)) +#define ir_BSWAP_I32(_op1) ir_UNARY_OP_I32(IR_BSWAP, (_op1)) +#define ir_BSWAP_I64(_op1) ir_UNARY_OP_I64(IR_BSWAP, (_op1)) + +#define ir_MIN(_type, _op1, _op2) ir_BINARY_OP(IR_MIN, (_type), (_op1), (_op2)) +#define ir_MIN_U8(_op1, _op2) ir_BINARY_OP_U8(IR_MIN, (_op1), (_op2)) +#define ir_MIN_U16(_op1, _op2) ir_BINARY_OP_U16(IR_MIN, (_op1), (_op2)) +#define ir_MIN_U32(_op1, _op2) ir_BINARY_OP_U32(IR_MIN, (_op1), (_op2)) +#define ir_MIN_U64(_op1, _op2) ir_BINARY_OP_U64(IR_MIN, (_op1), (_op2)) +#define ir_MIN_A(_op1, _op2) ir_BINARY_OP_A(IR_MIN, (_op1), (_op2)) +#define ir_MIN_C(_op1, _op2) ir_BINARY_OP_C(IR_MIN, (_op1), (_op2)) +#define ir_MIN_I8(_op1, _op2) ir_BINARY_OP_I8(IR_MIN, (_op1), (_op2)) +#define ir_MIN_I16(_op1, _op2) ir_BINARY_OP_I16(IR_MIN, (_op1), (_op2)) +#define ir_MIN_I32(_op1, _op2) ir_BINARY_OP_I32(IR_MIN, (_op1), (_op2)) +#define ir_MIN_I64(_op1, _op2) ir_BINARY_OP_I64(IR_MIN, (_op1), (_op2)) +#define ir_MIN_D(_op1, _op2) ir_BINARY_OP_D(IR_MIN, (_op1), (_op2)) +#define ir_MIN_F(_op1, _op2) ir_BINARY_OP_F(IR_MIN, (_op1), (_op2)) + +#define ir_MAX(_type, _op1, _op2) ir_BINARY_OP(IR_MAX, (_type), (_op1), (_op2)) +#define ir_MAX_U8(_op1, _op2) ir_BINARY_OP_U8(IR_MAX, (_op1), (_op2)) +#define ir_MAX_U16(_op1, _op2) ir_BINARY_OP_U16(IR_MAX, (_op1), (_op2)) +#define ir_MAX_U32(_op1, _op2) ir_BINARY_OP_U32(IR_MAX, (_op1), (_op2)) +#define ir_MAX_U64(_op1, _op2) ir_BINARY_OP_U64(IR_MAX, (_op1), (_op2)) +#define ir_MAX_A(_op1, _op2) ir_BINARY_OP_A(IR_MAX, (_op1), (_op2)) +#define ir_MAX_C(_op1, _op2) ir_BINARY_OP_C(IR_MAX, (_op1), (_op2)) +#define ir_MAX_I8(_op1, _op2) ir_BINARY_OP_I8(IR_MAX, (_op1), (_op2)) +#define ir_MAX_I16(_op1, _op2) ir_BINARY_OP_I16(IR_MAX, (_op1), (_op2)) +#define ir_MAX_I32(_op1, _op2) ir_BINARY_OP_I32(IR_MAX, (_op1), (_op2)) +#define ir_MAX_I64(_op1, _op2) ir_BINARY_OP_I64(IR_MAX, (_op1), (_op2)) +#define ir_MAX_D(_op1, _op2) ir_BINARY_OP_D(IR_MAX, (_op1), (_op2)) +#define ir_MAX_F(_op1, _op2) ir_BINARY_OP_F(IR_MAX, (_op1), (_op2)) + +#define ir_COND(_type, _op1, _op2, _op3) ir_fold3(_ir_CTX, IR_OPT(IR_COND, (_type)), (_op1), (_op2), (_op3)) +#define ir_COND_U8(_op1, _op2, _op3) ir_fold3(_ir_CTX, IR_OPT(IR_COND, IR_U8), (_op1), (_op2), (_op3)) +#define ir_COND_U16(_op1, _op2, _op3) ir_fold3(_ir_CTX, IR_OPT(IR_COND, IR_U16), (_op1), (_op2), (_op3)) +#define ir_COND_U32(_op1, _op2, _op3) ir_fold3(_ir_CTX, IR_OPT(IR_COND, IR_U32), (_op1), (_op2), (_op3)) +#define ir_COND_U64(_op1, _op2, _op3) ir_fold3(_ir_CTX, IR_OPT(IR_COND, IR_U64), (_op1), (_op2), (_op3)) +#define ir_COND_A(_op1, _op2, _op3) ir_fold3(_ir_CTX, IR_OPT(IR_COND, IR_ADDR), (_op1), (_op2), (_op3)) +#define ir_COND_C(_op1, _op2, _op3) ir_fold3(_ir_CTX, IR_OPT(IR_COND, IR_CHAR), (_op1), (_op2), (_op3)) +#define ir_COND_I8(_op1, _op2, _op3) ir_fold3(_ir_CTX, IR_OPT(IR_COND, IR_I8), (_op1), (_op2), (_op3)) +#define ir_COND_I16(_op1, _op2, _op3) ir_fold3(_ir_CTX, IR_OPT(IR_COMD, IR_I16), (_op1), (_op2), (_op3)) +#define ir_COND_I32(_op1, _op2, _op3) ir_fold3(_ir_CTX, IR_OPT(IR_COND, IR_I32), (_op1), (_op2), (_op3)) +#define ir_COND_I64(_op1, _op2, _op3) ir_fold3(_ir_CTX, IR_OPT(IR_COND, IR_I64), (_op1), (_op2), (_op3)) +#define ir_COND_D(_op1, _op2, _op3) ir_fold3(_ir_CTX, IR_OPT(IR_COND, IR_DOUBLE), (_op1), (_op2), (_op3)) +#define ir_COND_F(_op1, _op2, _op3) ir_fold3(_ir_CTX, IR_OPT(IR_COND, IR_FLOAT), (_op1), (_op2), (_op3)) + +#define ir_PHI_2(type, _src1, _src2) _ir_PHI_2(_ir_CTX, type, (_src1), (_src2)) +#define ir_PHI_N(type, _n, _inputs) _ir_PHI_N(_ir_CTX, type, (_n), (_inputs)) +#define ir_PHI_SET_OP(_ref, _pos, _src) _ir_PHI_SET_OP(_ir_CTX, (_ref), (_pos), (_src)) + +#define ir_COPY(_type, _op1) ir_UNARY_OP(IR_COPY, (_type), (_op1)) +#define ir_COPY_B(_op1) ir_UNARY_OP_B(IR_COPY, (_op1)) +#define ir_COPY_U8(_op1) ir_UNARY_OP_U8(IR_COPY, (_op1)) +#define ir_COPY_U16(_op1) ir_UNARY_OP_U16(IR_COPY, (_op1)) +#define ir_COPY_U32(_op1) ir_UNARY_OP_U32(IR_COPY, (_op1)) +#define ir_COPY_U64(_op1) ir_UNARY_OP_U64(IR_COPY, (_op1)) +#define ir_COPY_A(_op1) ir_UNARY_OP_A(IR_COPY, (_op1)) +#define ir_COPY_C(_op1) ir_UNARY_OP_C(IR_COPY, (_op1)) +#define ir_COPY_I8(_op1) ir_UNARY_OP_I8(IR_COPY, (_op1)) +#define ir_COPY_I16(_op1) ir_UNARY_OP_I16(IR_COPY, (_op1)) +#define ir_COPY_I32(_op1) ir_UNARY_OP_I32(IR_COPY, (_op1)) +#define ir_COPY_I64(_op1) ir_UNARY_OP_I64(IR_COPY, (_op1)) +#define ir_COPY_D(_op1) ir_UNARY_OP_D(IR_COPY, (_op1)) +#define ir_COPY_F(_op1) ir_UNARY_OP_F(IR_COPY, (_op1)) + +/* Helper to add address with a constant offset */ +#define ir_ADD_OFFSET(_addr, _offset) _ir_ADD_OFFSET(_ir_CTX, (_addr), (_offset)) + +/* Unfoldable variant of COPY */ +#define ir_HARD_COPY(_type, _op1) ir_BINARY_OP(IR_COPY, (_type), (_op1), 1) +#define ir_HARD_COPY_B(_op1) ir_BINARY_OP_B(IR_COPY, (_op1), 1) +#define ir_HARD_COPY_U8(_op1) ir_BINARY_OP_U8(IR_COPY, (_op1), 1) +#define ir_HARD_COPY_U16(_op1) ir_BINARY_OP_U16(IR_COPY, (_op1), 1) +#define ir_HARD_COPY_U32(_op1) ir_BINARY_OP_U32(IR_COPY, (_op1), 1) +#define ir_HARD_COPY_U64(_op1) ir_BINARY_OP_U64(IR_COPY, (_op1), 1) +#define ir_HARD_COPY_A(_op1) ir_BINARY_OP_A(IR_COPY, (_op1), 1) +#define ir_HARD_COPY_C(_op1) ir_BINARY_OP_C(IR_COPY, (_op1), 1) +#define ir_HARD_COPY_I8(_op1) ir_BINARY_OP_I8(IR_COPY, (_op1), 1) +#define ir_HARD_COPY_I16(_op1) ir_BINARY_OP_I16(IR_COPY, (_op1), 1) +#define ir_HARD_COPY_I32(_op1) ir_BINARY_OP_I32(IR_COPY, (_op1), 1) +#define ir_HARD_COPY_I64(_op1) ir_BINARY_OP_I64(IR_COPY, (_op1), 1) +#define ir_HARD_COPY_D(_op1) ir_BINARY_OP_D(IR_COPY, (_op1), 1) +#define ir_HARD_COPY_F(_op1) ir_BINARY_OP_F(IR_COPY, (_op1), 1) + +#define ir_PARAM(_type, _name, _num) _ir_PARAM(_ir_CTX, (_type), (_name), (_num)) +#define ir_VAR(_type, _name) _ir_VAR(_ir_CTX, (_type), (_name)) + +#define ir_CALL(type, func) _ir_CALL(_ir_CTX, type, func) +#define ir_CALL_1(type, func, a1) _ir_CALL_1(_ir_CTX, type, func, a1) +#define ir_CALL_2(type, func, a1, a2) _ir_CALL_2(_ir_CTX, type, func, a1, a2) +#define ir_CALL_3(type, func, a1, a2, a3) _ir_CALL_3(_ir_CTX, type, func, a1, a2, a3) +#define ir_CALL_4(type, func, a1, a2, a3, a4) _ir_CALL_4(_ir_CTX, type, func, a1, a2, a3, a4) +#define ir_CALL_5(type, func, a1, a2, a3, a4, a5) _ir_CALL_5(_ir_CTX, type, func, a1, a2, a3, a4, a5) +#define ir_CALL_N(type, func, count, args) _ir_CALL_N(_ir_CTX, type, func, count, args) + +#define ir_TAILCALL(type, func) _ir_TAILCALL(_ir_CTX, type, func) +#define ir_TAILCALL_1(type, func, a1) _ir_TAILCALL_1(_ir_CTX, type, func, a1) +#define ir_TAILCALL_2(type, func, a1, a2) _ir_TAILCALL_2(_ir_CTX, type, func, a1, a2) +#define ir_TAILCALL_3(type, func, a1, a2, a3) _ir_TAILCALL_3(_ir_CTX, type, func, a1, a2, a3) +#define ir_TAILCALL_4(type, func, a1, a2, a3, a4) _ir_TAILCALL_4(_ir_CTX, type, func, a1, a2, a3, a4) +#define ir_TAILCALL_5(type, func, a1, a2, a3, a4, a5) _ir_TAILCALL_5(_ir_CTX, type, func, a1, a2, a3, a4, a5) +#define ir_TAILCALL_N(type, func, count, args) _ir_TAILCALL_N(_ir_CTX, type, func, count, args) + +#define ir_ALLOCA(_size) _ir_ALLOCA(_ir_CTX, (_size)) +#define ir_AFREE(_size) _ir_AFREE(_ir_CTX, (_size)) +#define ir_VADDR(_var) ir_emit1(_ir_CTX, IR_OPT(IR_VADDR, IR_ADDR), (_var)) +#define ir_VLOAD(_type, _var) _ir_VLOAD(_ir_CTX, (_type), (_var)) +#define ir_VLOAD_B(_var) _ir_VLOAD(_ir_CTX, IR_BOOL, (_var)) +#define ir_VLOAD_U8(_var) _ir_VLOAD(_ir_CTX, IR_U8, (_var)) +#define ir_VLOAD_U16(_var) _ir_VLOAD(_ir_CTX, IR_U16, (_var)) +#define ir_VLOAD_U32(_var) _ir_VLOAD(_ir_CTX, IR_U32, (_var)) +#define ir_VLOAD_U64(_var) _ir_VLOAD(_ir_CTX, IR_U64, (_var)) +#define ir_VLOAD_A(_var) _ir_VLOAD(_ir_CTX, IR_ADDR, (_var)) +#define ir_VLOAD_C(_var) _ir_VLOAD(_ir_CTX, IR_CHAR, (_var)) +#define ir_VLOAD_I8(_var) _ir_VLOAD(_ir_CTX, IR_I8, (_var)) +#define ir_VLOAD_I16(_var) _ir_VLOAD(_ir_CTX, IR_I16, (_var)) +#define ir_VLOAD_I32(_var) _ir_VLOAD(_ir_CTX, IR_I32, (_var)) +#define ir_VLOAD_I64(_var) _ir_VLOAD(_ir_CTX, IR_I64, (_var)) +#define ir_VLOAD_D(_var) _ir_VLOAD(_ir_CTX, IR_DOUBLE, (_var)) +#define ir_VLOAD_F(_var) _ir_VLOAD(_ir_CTX, IR_FLOAT, (_var)) +#define ir_VSTORE(_var, _val) _ir_VSTORE(_ir_CTX, (_var), (_val)) +#define ir_RLOAD(_type, _reg) _ir_RLOAD(_ir_CTX, (_type), (_reg)) +#define ir_RLOAD_B(_reg) _ir_RLOAD(_ir_CTX, IR_BOOL, (_reg)) +#define ir_RLOAD_U8(_reg) _ir_RLOAD(_ir_CTX, IR_U8, (_reg)) +#define ir_RLOAD_U16(_reg) _ir_RLOAD(_ir_CTX, IR_U16, (_reg)) +#define ir_RLOAD_U32(_reg) _ir_RLOAD(_ir_CTX, IR_U32, (_reg)) +#define ir_RLOAD_U64(_reg) _ir_RLOAD(_ir_CTX, IR_U64, (_reg)) +#define ir_RLOAD_A(_reg) _ir_RLOAD(_ir_CTX, IR_ADDR, (_reg)) +#define ir_RLOAD_C(_reg) _ir_RLOAD(_ir_CTX, IR_CHAR, (_reg)) +#define ir_RLOAD_I8(_reg) _ir_RLOAD(_ir_CTX, IR_I8, (_reg)) +#define ir_RLOAD_I16(_reg) _ir_RLOAD(_ir_CTX, IR_I16, (_reg)) +#define ir_RLOAD_I32(_reg) _ir_RLOAD(_ir_CTX, IR_I32, (_reg)) +#define ir_RLOAD_I64(_reg) _ir_RLOAD(_ir_CTX, IR_I64, (_reg)) +#define ir_RLOAD_D(_reg) _ir_RLOAD(_ir_CTX, IR_DOUBLE, (_reg)) +#define ir_RLOAD_F(_reg) _ir_RLOAD(_ir_CTX, IR_FLOAT, (_reg)) +#define ir_RSTORE(_reg, _val) _ir_RSTORE(_ir_CTX, (_reg), (_val)) +#define ir_LOAD(_type, _addr) _ir_LOAD(_ir_CTX, (_type), (_addr)) +#define ir_LOAD_B(_addr) _ir_LOAD(_ir_CTX, IR_BOOL, (_addr)) +#define ir_LOAD_U8(_addr) _ir_LOAD(_ir_CTX, IR_U8, (_addr)) +#define ir_LOAD_U16(_addr) _ir_LOAD(_ir_CTX, IR_U16, (_addr)) +#define ir_LOAD_U32(_addr) _ir_LOAD(_ir_CTX, IR_U32, (_addr)) +#define ir_LOAD_U64(_addr) _ir_LOAD(_ir_CTX, IR_U64, (_addr)) +#define ir_LOAD_A(_addr) _ir_LOAD(_ir_CTX, IR_ADDR, (_addr)) +#define ir_LOAD_C(_addr) _ir_LOAD(_ir_CTX, IR_CHAR, (_addr)) +#define ir_LOAD_I8(_addr) _ir_LOAD(_ir_CTX, IR_I8, (_addr)) +#define ir_LOAD_I16(_addr) _ir_LOAD(_ir_CTX, IR_I16, (_addr)) +#define ir_LOAD_I32(_addr) _ir_LOAD(_ir_CTX, IR_I32, (_addr)) +#define ir_LOAD_I64(_addr) _ir_LOAD(_ir_CTX, IR_I64, (_addr)) +#define ir_LOAD_D(_addr) _ir_LOAD(_ir_CTX, IR_DOUBLE, (_addr)) +#define ir_LOAD_F(_addr) _ir_LOAD(_ir_CTX, IR_FLOAT, (_addr)) +#define ir_STORE(_addr, _val) _ir_STORE(_ir_CTX, (_addr), (_val)) +#define ir_TLS(_index, _offset) _ir_TLS(_ir_CTX, (_index), (_offset)) +#define ir_TRAP() do {_ir_CTX->control = ir_emit1(_ir_CTX, IR_TRAP, _ir_CTX->control);} while (0) + +#define ir_START() _ir_START(_ir_CTX) +#define ir_ENTRY(_src, _num) _ir_ENTRY(_ir_CTX, (_src), (_num)) +#define ir_BEGIN(_src) _ir_BEGIN(_ir_CTX, (_src)) +#define ir_IF(_condition) _ir_IF(_ir_CTX, (_condition)) +#define ir_IF_TRUE(_if) _ir_IF_TRUE(_ir_CTX, (_if)) +#define ir_IF_TRUE_cold(_if) _ir_IF_TRUE_cold(_ir_CTX, (_if)) +#define ir_IF_FALSE(_if) _ir_IF_FALSE(_ir_CTX, (_if)) +#define ir_IF_FALSE_cold(_if) _ir_IF_FALSE_cold(_ir_CTX, (_if)) +#define ir_END() _ir_END(_ir_CTX) +#define ir_MERGE_2(_src1, _src2) _ir_MERGE_2(_ir_CTX, (_src1), (_src2)) +#define ir_MERGE_N(_n, _inputs) _ir_MERGE_N(_ir_CTX, (_n), (_inputs)) +#define ir_MERGE_SET_OP(_ref, _pos, _src) _ir_MERGE_SET_OP(_ir_CTX, (_ref), (_pos), (_src)) +#define ir_LOOP_BEGIN(_src1) _ir_LOOP_BEGIN(_ir_CTX, (_src1)) +#define ir_LOOP_END() _ir_LOOP_END(_ir_CTX) +#define ir_SWITCH(_val) _ir_SWITCH(_ir_CTX, (_val)) +#define ir_CASE_VAL(_switch, _val) _ir_CASE_VAL(_ir_CTX, (_switch), (_val)) +#define ir_CASE_DEFAULT(_switch) _ir_CASE_DEFAULT(_ir_CTX, (_switch)) +#define ir_RETURN(_val) _ir_RETURN(_ir_CTX, (_val)) +#define ir_IJMP(_addr) _ir_IJMP(_ir_CTX, (_addr)) +#define ir_UNREACHABLE() _ir_UNREACHABLE(_ir_CTX) + +#define ir_GUARD(_condition, _addr) _ir_GUARD(_ir_CTX, (_condition), (_addr)) +#define ir_GUARD_NOT(_condition, _addr) _ir_GUARD_NOT(_ir_CTX, (_condition), (_addr)) + +#define ir_SNAPSHOT(_n) _ir_SNAPSHOT(_ir_CTX, (_n)) +#define ir_SNAPSHOT_SET_OP(_s, _pos, _v) _ir_SNAPSHOT_SET_OP(_ir_CTX, (_s), (_pos), (_v)) + +#define ir_EXITCALL(_func) _ir_EXITCALL(_ir_CTX,(_func)) + +#define ir_END_list(_list) do { _list = _ir_END_LIST(_ir_CTX, _list); } while (0) +#define ir_MERGE_list(_list) _ir_MERGE_LIST(_ir_CTX, (_list)) + +#define ir_MERGE_WITH(_src2) do {ir_ref end = ir_END(); ir_MERGE_2(end, _src2);} while (0) +#define ir_MERGE_WITH_EMPTY_TRUE(_if) do {ir_ref end = ir_END(); ir_IF_TRUE(_if); ir_MERGE_2(end, ir_END());} while (0) +#define ir_MERGE_WITH_EMPTY_FALSE(_if) do {ir_ref end = ir_END(); ir_IF_FALSE(_if); ir_MERGE_2(end, ir_END());} while (0) + +ir_ref _ir_ADD_OFFSET(ir_ctx *ctx, ir_ref addr, uintptr_t offset); +ir_ref _ir_PHI_2(ir_ctx *ctx, ir_type type, ir_ref src1, ir_ref src2); +ir_ref _ir_PHI_N(ir_ctx *ctx, ir_type type, ir_ref n, ir_ref *inputs); +void _ir_PHI_SET_OP(ir_ctx *ctx, ir_ref phi, ir_ref pos, ir_ref src); +ir_ref _ir_PARAM(ir_ctx *ctx, ir_type type, const char* name, ir_ref num); +ir_ref _ir_VAR(ir_ctx *ctx, ir_type type, const char* name); +ir_ref _ir_CALL(ir_ctx *ctx, ir_type type, ir_ref func); +ir_ref _ir_CALL_1(ir_ctx *ctx, ir_type type, ir_ref func, ir_ref arg1); +ir_ref _ir_CALL_2(ir_ctx *ctx, ir_type type, ir_ref func, ir_ref arg1, ir_ref arg2); +ir_ref _ir_CALL_3(ir_ctx *ctx, ir_type type, ir_ref func, ir_ref arg1, ir_ref arg2, ir_ref arg3); +ir_ref _ir_CALL_4(ir_ctx *ctx, ir_type type, ir_ref func, ir_ref arg1, ir_ref arg2, ir_ref arg3, ir_ref arg4); +ir_ref _ir_CALL_5(ir_ctx *ctx, ir_type type, ir_ref func, ir_ref arg1, ir_ref arg2, ir_ref arg3, ir_ref arg4, ir_ref arg5); +ir_ref _ir_CALL_N(ir_ctx *ctx, ir_type type, ir_ref func, uint32_t count, ir_ref *args); +void _ir_TAILCALL(ir_ctx *ctx, ir_type type, ir_ref func); +void _ir_TAILCALL_1(ir_ctx *ctx, ir_type type, ir_ref func, ir_ref arg1); +void _ir_TAILCALL_2(ir_ctx *ctx, ir_type type, ir_ref func, ir_ref arg1, ir_ref arg2); +void _ir_TAILCALL_3(ir_ctx *ctx, ir_type type, ir_ref func, ir_ref arg1, ir_ref arg2, ir_ref arg3); +void _ir_TAILCALL_4(ir_ctx *ctx, ir_type type, ir_ref func, ir_ref arg1, ir_ref arg2, ir_ref arg3, ir_ref arg4); +void _ir_TAILCALL_5(ir_ctx *ctx, ir_type type, ir_ref func, ir_ref arg1, ir_ref arg2, ir_ref arg3, ir_ref arg4, ir_ref arg5); +ir_ref _ir_TAILCALL_N(ir_ctx *ctx, ir_type type, ir_ref func, uint32_t count, ir_ref *args); +ir_ref _ir_ALLOCA(ir_ctx *ctx, ir_ref size); +void _ir_AFREE(ir_ctx *ctx, ir_ref size); +ir_ref _ir_VLOAD(ir_ctx *ctx, ir_type type, ir_ref var); +void _ir_VSTORE(ir_ctx *ctx, ir_ref var, ir_ref val); +ir_ref _ir_RLOAD(ir_ctx *ctx, ir_type type, ir_ref reg); +void _ir_RSTORE(ir_ctx *ctx, ir_ref reg, ir_ref val); +ir_ref _ir_LOAD(ir_ctx *ctx, ir_type type, ir_ref addr); +void _ir_STORE(ir_ctx *ctx, ir_ref addr, ir_ref val); +void _ir_START(ir_ctx *ctx); +void _ir_ENTRY(ir_ctx *ctx, ir_ref src, ir_ref num); +void _ir_BEGIN(ir_ctx *ctx, ir_ref src); +ir_ref _ir_END(ir_ctx *ctx); +ir_ref _ir_END_LIST(ir_ctx *ctx, ir_ref list); +ir_ref _ir_IF(ir_ctx *ctx, ir_ref condition); +void _ir_IF_TRUE(ir_ctx *ctx, ir_ref if_ref); +void _ir_IF_TRUE_cold(ir_ctx *ctx, ir_ref if_ref); +void _ir_IF_FALSE(ir_ctx *ctx, ir_ref if_ref); +void _ir_IF_FALSE_cold(ir_ctx *ctx, ir_ref if_ref); +void _ir_MERGE_2(ir_ctx *ctx, ir_ref src1, ir_ref src2); +void _ir_MERGE_N(ir_ctx *ctx, ir_ref n, ir_ref *inputs); +void _ir_MERGE_SET_OP(ir_ctx *ctx, ir_ref merge, ir_ref pos, ir_ref src); +void _ir_MERGE_LIST(ir_ctx *ctx, ir_ref list); +ir_ref _ir_LOOP_BEGIN(ir_ctx *ctx, ir_ref src1); +ir_ref _ir_LOOP_END(ir_ctx *ctx); +ir_ref _ir_TLS(ir_ctx *ctx, ir_ref index, ir_ref offset); +void _ir_UNREACHABLE(ir_ctx *ctx); +ir_ref _ir_SWITCH(ir_ctx *ctx, ir_ref val); +void _ir_CASE_VAL(ir_ctx *ctx, ir_ref switch_ref, ir_ref val); +void _ir_CASE_DEFAULT(ir_ctx *ctx, ir_ref switch_ref); +void _ir_RETURN(ir_ctx *ctx, ir_ref val); +void _ir_IJMP(ir_ctx *ctx, ir_ref addr); +void _ir_GUARD(ir_ctx *ctx, ir_ref condition, ir_ref addr); +void _ir_GUARD_NOT(ir_ctx *ctx, ir_ref condition, ir_ref addr); +ir_ref _ir_SNAPSHOT(ir_ctx *ctx, ir_ref n); +void _ir_SNAPSHOT_SET_OP(ir_ctx *ctx, ir_ref snapshot, ir_ref pos, ir_ref val); +ir_ref _ir_EXITCALL(ir_ctx *ctx, ir_ref func); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* IR_BUILDER_H */ diff --git a/ext/opcache/jit/ir/ir_cfg.c b/ext/opcache/jit/ir/ir_cfg.c new file mode 100644 index 0000000000000..b886319beb4d4 --- /dev/null +++ b/ext/opcache/jit/ir/ir_cfg.c @@ -0,0 +1,1219 @@ +/* + * IR - Lightweight JIT Compilation Framework + * (CFG - Control Flow Graph) + * Copyright (C) 2022 Zend by Perforce. + * Authors: Dmitry Stogov + */ + +#include "ir.h" +#include "ir_private.h" + +static ir_ref _ir_merge_blocks(ir_ctx *ctx, ir_ref end, ir_ref begin) +{ + ir_ref prev, next; + ir_use_list *use_list; + ir_ref n, *p; + + IR_ASSERT(ctx->ir_base[begin].op == IR_BEGIN); + IR_ASSERT(ctx->ir_base[end].op == IR_END); + IR_ASSERT(ctx->ir_base[begin].op1 == end); + IR_ASSERT(ctx->use_lists[end].count == 1); + + prev = ctx->ir_base[end].op1; + + use_list = &ctx->use_lists[begin]; + IR_ASSERT(use_list->count == 1); + next = ctx->use_edges[use_list->refs]; + + /* remove BEGIN and END */ + ctx->ir_base[begin].op = IR_NOP; + ctx->ir_base[begin].op1 = IR_UNUSED; + ctx->use_lists[begin].count = 0; + ctx->ir_base[end].op = IR_NOP; + ctx->ir_base[end].op1 = IR_UNUSED; + ctx->use_lists[end].count = 0; + + /* connect their predecessor and successor */ + ctx->ir_base[next].op1 = prev; + use_list = &ctx->use_lists[prev]; + n = use_list->count; + for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) { + if (*p == end) { + *p = next; + } + } + + return next; +} + +IR_ALWAYS_INLINE void _ir_add_successors(const ir_ctx *ctx, ir_ref ref, ir_worklist *worklist) +{ + ir_use_list *use_list = &ctx->use_lists[ref]; + ir_ref *p, use, n = use_list->count; + + if (n < 2) { + if (n == 1) { + use = ctx->use_edges[use_list->refs]; + IR_ASSERT(ir_op_flags[ctx->ir_base[use].op] & IR_OP_FLAG_CONTROL); + ir_worklist_push(worklist, use); + } + } else { + p = &ctx->use_edges[use_list->refs]; + if (n == 2) { + use = *p; + IR_ASSERT(ir_op_flags[ctx->ir_base[use].op] & IR_OP_FLAG_CONTROL); + ir_worklist_push(worklist, use); + use = *(p + 1); + IR_ASSERT(ir_op_flags[ctx->ir_base[use].op] & IR_OP_FLAG_CONTROL); + ir_worklist_push(worklist, use); + } else { + for (; n > 0; p++, n--) { + use = *p; + IR_ASSERT(ir_op_flags[ctx->ir_base[use].op] & IR_OP_FLAG_CONTROL); + ir_worklist_push(worklist, use); + } + } + } +} + +IR_ALWAYS_INLINE void _ir_add_predecessors(const ir_insn *insn, ir_worklist *worklist) +{ + ir_ref n, ref; + const ir_ref *p; + + if (insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN) { + n = insn->inputs_count; + for (p = insn->ops + 1; n > 0; p++, n--) { + ref = *p; + IR_ASSERT(ref); + ir_worklist_push(worklist, ref); + } + } else if (insn->op != IR_START) { + if (EXPECTED(insn->op1)) { + ir_worklist_push(worklist, insn->op1); + } + } +} + +int ir_build_cfg(ir_ctx *ctx) +{ + ir_ref n, *p, ref, start, end, next; + uint32_t b; + ir_insn *insn; + ir_worklist worklist; + uint32_t bb_init_falgs; + uint32_t count, bb_count = 0; + uint32_t edges_count = 0; + ir_block *blocks, *bb; + uint32_t *_blocks, *edges; + ir_use_list *use_list; + uint32_t len = ir_bitset_len(ctx->insns_count); + ir_bitset bb_starts = ir_mem_calloc(len * 2, IR_BITSET_BITS / 8); + ir_bitset bb_leaks = bb_starts + len; + _blocks = ir_mem_calloc(ctx->insns_count, sizeof(uint32_t)); + ir_worklist_init(&worklist, ctx->insns_count); + + /* First try to perform backward DFS search starting from "stop" nodes */ + + /* Add all "stop" nodes */ + ref = ctx->ir_base[1].op1; + while (ref) { + ir_worklist_push(&worklist, ref); + ref = ctx->ir_base[ref].op3; + } + + while (ir_worklist_len(&worklist)) { + ref = ir_worklist_pop(&worklist); + insn = &ctx->ir_base[ref]; + + IR_ASSERT(IR_IS_BB_END(insn->op)); + /* Remember BB end */ + end = ref; + /* Some successors of IF and SWITCH nodes may be inaccessible by backward DFS */ + use_list = &ctx->use_lists[end]; + n = use_list->count; + if (n > 1) { + for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) { + /* Remember possible inaccessible successors */ + ir_bitset_incl(bb_leaks, *p); + } + } + /* Skip control nodes untill BB start */ + ref = insn->op1; + while (1) { + insn = &ctx->ir_base[ref]; + if (IR_IS_BB_START(insn->op)) { + if (insn->op == IR_BEGIN + && (ctx->flags & IR_OPT_CFG) + && ctx->ir_base[insn->op1].op == IR_END + && ctx->use_lists[ref].count == 1) { + ref = _ir_merge_blocks(ctx, insn->op1, ref); + ref = ctx->ir_base[ref].op1; + continue; + } + break; + } + ref = insn->op1; // follow connected control blocks untill BB start + } + /* Mark BB Start */ + bb_count++; + _blocks[ref] = end; + ir_bitset_incl(bb_starts, ref); + /* Add predecessors */ + _ir_add_predecessors(insn, &worklist); + } + + /* Backward DFS way miss some branches ending by infinite loops. */ + /* Try forward DFS. (in most cases all nodes are already proceed). */ + + /* START node may be inaccessible from "stop" nodes */ + ir_bitset_incl(bb_leaks, 1); + + /* Add not processed START and successor of IF and SWITCH */ + IR_BITSET_FOREACH_DIFFERENCE(bb_leaks, bb_starts, len, start) { + ir_worklist_push(&worklist, start); + } IR_BITSET_FOREACH_END(); + + if (ir_worklist_len(&worklist)) { + ir_bitset_union(worklist.visited, bb_starts, len); + do { + ref = ir_worklist_pop(&worklist); + insn = &ctx->ir_base[ref]; + + IR_ASSERT(IR_IS_BB_START(insn->op)); + /* Remember BB start */ + start = ref; + /* Skip control nodes untill BB end */ + while (1) { + use_list = &ctx->use_lists[ref]; + n = use_list->count; + next = IR_UNUSED; + for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) { + next = *p; + insn = &ctx->ir_base[next]; + if ((ir_op_flags[insn->op] & IR_OP_FLAG_CONTROL) && insn->op1 == ref) { + break; + } + } + IR_ASSERT(next != IR_UNUSED); + ref = next; +next_successor: + if (IR_IS_BB_END(insn->op)) { + if (insn->op == IR_END && (ctx->flags & IR_OPT_CFG)) { + use_list = &ctx->use_lists[ref]; + IR_ASSERT(use_list->count == 1); + next = ctx->use_edges[use_list->refs]; + + if (ctx->ir_base[next].op == IR_BEGIN + && ctx->use_lists[next].count == 1) { + ref = _ir_merge_blocks(ctx, ref, next); + insn = &ctx->ir_base[ref]; + goto next_successor; + } + } + break; + } + } + /* Mark BB Start */ + bb_count++; + _blocks[start] = ref; + ir_bitset_incl(bb_starts, start); + /* Add successors */ + _ir_add_successors(ctx, ref, &worklist); + } while (ir_worklist_len(&worklist)); + } + + IR_ASSERT(bb_count > 0); + + /* Create array of basic blocks and count successor/predecessors edges for each BB */ + blocks = ir_mem_malloc((bb_count + 1) * sizeof(ir_block)); + b = 1; + bb = blocks + 1; + count = 0; + /* SCCP already removed UNREACHABKE blocks, otherwise all blocks are marked as UNREACHABLE first */ + bb_init_falgs = (ctx->flags & IR_SCCP_DONE) ? 0 : IR_BB_UNREACHABLE; + IR_BITSET_FOREACH(bb_starts, len, start) { + end = _blocks[start]; + _blocks[start] = b; + _blocks[end] = b; + insn = &ctx->ir_base[start]; + IR_ASSERT(IR_IS_BB_START(insn->op)); + IR_ASSERT(end > start); + bb->start = start; + bb->end = end; + bb->successors = count; + count += ctx->use_lists[end].count; + bb->successors_count = 0; + bb->predecessors = count; + bb->dom_parent = 0; + bb->dom_depth = 0; + bb->dom_child = 0; + bb->dom_next_child = 0; + bb->loop_header = 0; + bb->loop_depth = 0; + if (insn->op == IR_START) { + bb->flags = IR_BB_START; + bb->predecessors_count = 0; + } else { + bb->flags = bb_init_falgs; + if (insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN) { + n = insn->inputs_count; + bb->predecessors_count = n; + edges_count += n; + count += n; + } else if (EXPECTED(insn->op1)) { + if (insn->op == IR_ENTRY) { + bb->flags |= IR_BB_ENTRY; + ctx->entries_count++; + } + bb->predecessors_count = 1; + edges_count++; + count++; + } else { + IR_ASSERT(insn->op == IR_BEGIN); /* start of unreachable block */ + bb->predecessors_count = 0; + } + } + b++; + bb++; + } IR_BITSET_FOREACH_END(); + IR_ASSERT(count == edges_count * 2); + ir_mem_free(bb_starts); + + /* Create an array of successor/predecessors control edges */ + edges = ir_mem_malloc(edges_count * 2 * sizeof(uint32_t)); + bb = blocks + 1; + for (b = 1; b <= bb_count; b++, bb++) { + insn = &ctx->ir_base[bb->start]; + if (bb->predecessors_count > 1) { + uint32_t *q = edges + bb->predecessors; + n = insn->inputs_count; + for (p = insn->ops + 1; n > 0; p++, q++, n--) { + ref = *p; + IR_ASSERT(ref); + ir_ref pred_b = _blocks[ref]; + ir_block *pred_bb = &blocks[pred_b]; + *q = pred_b; + edges[pred_bb->successors + pred_bb->successors_count++] = b; + } + } else if (bb->predecessors_count == 1) { + ref = insn->op1; + IR_ASSERT(ref); + IR_ASSERT(IR_OPND_KIND(ir_op_flags[insn->op], 1) == IR_OPND_CONTROL); + ir_ref pred_b = _blocks[ref]; + ir_block *pred_bb = &blocks[pred_b]; + edges[bb->predecessors] = pred_b; + edges[pred_bb->successors + pred_bb->successors_count++] = b; + } + } + + ctx->cfg_blocks_count = bb_count; + ctx->cfg_edges_count = edges_count * 2; + ctx->cfg_blocks = blocks; + ctx->cfg_edges = edges; + ctx->cfg_map = _blocks; + + if (!(ctx->flags & IR_SCCP_DONE)) { + uint32_t reachable_count = 0; + + /* Mark reachable blocks */ + ir_worklist_clear(&worklist); + ir_worklist_push(&worklist, 1); + while (ir_worklist_len(&worklist) != 0) { + uint32_t *p; + + reachable_count++; + b = ir_worklist_pop(&worklist); + bb = &blocks[b]; + bb->flags &= ~IR_BB_UNREACHABLE; + n = bb->successors_count; + if (n > 1) { + for (p = edges + bb->successors; n > 0; p++, n--) { + ir_worklist_push(&worklist, *p); + } + } else if (n == 1) { + ir_worklist_push(&worklist, edges[bb->successors]); + } + } + if (reachable_count != ctx->cfg_blocks_count) { + ir_remove_unreachable_blocks(ctx); + } + } + + ir_worklist_free(&worklist); + + return 1; +} + +static void ir_remove_predecessor(ir_ctx *ctx, ir_block *bb, uint32_t from) +{ + uint32_t i, *p, *q, n = 0; + + p = q = &ctx->cfg_edges[bb->predecessors]; + for (i = 0; i < bb->predecessors_count; i++, p++) { + if (*p != from) { + if (p != q) { + *q = *p; + } + q++; + n++; + } + } + IR_ASSERT(n != bb->predecessors_count); + bb->predecessors_count = n; +} + +static void ir_remove_from_use_list(ir_ctx *ctx, ir_ref from, ir_ref ref) +{ + ir_ref j, n, *p, *q, use; + ir_use_list *use_list = &ctx->use_lists[from]; + ir_ref skip = 0; + + n = use_list->count; + for (j = 0, p = q = &ctx->use_edges[use_list->refs]; j < n; j++, p++) { + use = *p; + if (use == ref) { + skip++; + } else { + if (p != q) { + *q = use; + } + q++; + } + } + use_list->count -= skip; +} + +static void ir_remove_merge_input(ir_ctx *ctx, ir_ref merge, ir_ref from) +{ + ir_ref i, j, n, k, *p, use; + ir_insn *use_insn; + ir_use_list *use_list; + ir_bitset life_inputs; + ir_insn *insn = &ctx->ir_base[merge]; + + IR_ASSERT(insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN); + n = insn->inputs_count; + i = 1; + life_inputs = ir_bitset_malloc(n + 1); + for (j = 1; j <= n; j++) { + ir_ref input = ir_insn_op(insn, j); + + if (input != from) { + if (i != j) { + ir_insn_set_op(insn, i, input); + } + ir_bitset_incl(life_inputs, j); + i++; + } + } + i--; + if (i == 1) { + insn->op = IR_BEGIN; + insn->inputs_count = 0; + use_list = &ctx->use_lists[merge]; + if (use_list->count > 1) { + for (k = 0, p = &ctx->use_edges[use_list->refs]; k < use_list->count; k++, p++) { + use = *p; + use_insn = &ctx->ir_base[use]; + if (use_insn->op == IR_PHI) { + /* Convert PHI to COPY */ + i = 2; + for (j = 2; j <= n; j++) { + ir_ref input = ir_insn_op(use_insn, j); + + if (ir_bitset_in(life_inputs, j - 1)) { + use_insn->op1 = ir_insn_op(use_insn, j); + } else if (input > 0) { + ir_remove_from_use_list(ctx, input, use); + } + } + use_insn->op = IR_COPY; + use_insn->op2 = IR_UNUSED; + use_insn->op3 = IR_UNUSED; + ir_remove_from_use_list(ctx, merge, use); + } + } + } + } else { + insn->inputs_count = i; + + n++; + use_list = &ctx->use_lists[merge]; + if (use_list->count > 1) { + for (k = 0, p = &ctx->use_edges[use_list->refs]; k < use_list->count; k++, p++) { + use = *p; + use_insn = &ctx->ir_base[use]; + if (use_insn->op == IR_PHI) { + i = 2; + for (j = 2; j <= n; j++) { + ir_ref input = ir_insn_op(use_insn, j); + + if (ir_bitset_in(life_inputs, j - 1)) { + IR_ASSERT(input); + if (i != j) { + ir_insn_set_op(use_insn, i, input); + } + i++; + } else if (input > 0) { + ir_remove_from_use_list(ctx, input, use); + } + } + } + } + } + } + ir_mem_free(life_inputs); + ir_remove_from_use_list(ctx, from, merge); +} + +/* CFG constructed after SCCP pass doesn't have unreachable BBs, otherwise they should be removed */ +int ir_remove_unreachable_blocks(ir_ctx *ctx) +{ + uint32_t b, *p, i; + uint32_t unreachable_count = 0; + uint32_t bb_count = ctx->cfg_blocks_count; + ir_block *bb = ctx->cfg_blocks + 1; + + for (b = 1; b <= bb_count; b++, bb++) { + if (bb->flags & IR_BB_UNREACHABLE) { +#if 0 + do {if (!unreachable_count) ir_dump_cfg(ctx, stderr);} while(0); +#endif + if (bb->successors_count) { + for (i = 0, p = &ctx->cfg_edges[bb->successors]; i < bb->successors_count; i++, p++) { + ir_block *succ_bb = &ctx->cfg_blocks[*p]; + + if (!(succ_bb->flags & IR_BB_UNREACHABLE)) { + ir_remove_predecessor(ctx, succ_bb, b); + ir_remove_merge_input(ctx, succ_bb->start, bb->end); + } + } + } else { + ir_ref prev, ref = bb->end; + ir_insn *insn = &ctx->ir_base[ref]; + + IR_ASSERT(ir_op_flags[insn->op] & IR_OP_FLAG_TERMINATOR); + /* remove from terminators list */ + prev = ctx->ir_base[1].op1; + if (prev == ref) { + ctx->ir_base[1].op1 = insn->op3; + } else { + while (prev) { + if (ctx->ir_base[prev].op3 == ref) { + ctx->ir_base[prev].op3 = insn->op3; + break; + } + prev = ctx->ir_base[prev].op3; + } + } + } + ctx->cfg_map[bb->start] = 0; + ctx->cfg_map[bb->end] = 0; + unreachable_count++; + } + } + + if (unreachable_count) { + ir_block *dst_bb; + uint32_t n = 1; + uint32_t *edges; + + dst_bb = bb = ctx->cfg_blocks + 1; + for (b = 1; b <= bb_count; b++, bb++) { + if (!(bb->flags & IR_BB_UNREACHABLE)) { + if (dst_bb != bb) { + memcpy(dst_bb, bb, sizeof(ir_block)); + ctx->cfg_map[dst_bb->start] = n; + ctx->cfg_map[dst_bb->end] = n; + } + dst_bb->successors_count = 0; + dst_bb++; + n++; + } + } + ctx->cfg_blocks_count = bb_count = n - 1; + + /* Rebuild successor/predecessors control edges */ + edges = ctx->cfg_edges; + bb = ctx->cfg_blocks + 1; + for (b = 1; b <= bb_count; b++, bb++) { + ir_insn *insn = &ctx->ir_base[bb->start]; + ir_ref *p, ref; + + n = bb->predecessors_count; + if (n > 1) { + uint32_t *q = edges + bb->predecessors; + + IR_ASSERT(n == insn->inputs_count); + for (p = insn->ops + 1; n > 0; p++, q++, n--) { + ref = *p; + IR_ASSERT(ref); + ir_ref pred_b = ctx->cfg_map[ref]; + ir_block *pred_bb = &ctx->cfg_blocks[pred_b]; + *q = pred_b; + edges[pred_bb->successors + pred_bb->successors_count++] = b; + } + } else if (n == 1) { + ref = insn->op1; + IR_ASSERT(ref); + IR_ASSERT(IR_OPND_KIND(ir_op_flags[insn->op], 1) == IR_OPND_CONTROL); + ir_ref pred_b = ctx->cfg_map[ref]; + ir_block *pred_bb = &ctx->cfg_blocks[pred_b]; + edges[bb->predecessors] = pred_b; + edges[pred_bb->successors + pred_bb->successors_count++] = b; + } + } + } + + return 1; +} + +#if 0 +static void compute_postnum(const ir_ctx *ctx, uint32_t *cur, uint32_t b) +{ + uint32_t i, *p; + ir_block *bb = &ctx->cfg_blocks[b]; + + if (bb->postnum != 0) { + return; + } + + if (bb->successors_count) { + bb->postnum = -1; /* Marker for "currently visiting" */ + p = ctx->cfg_edges + bb->successors; + i = bb->successors_count; + do { + compute_postnum(ctx, cur, *p); + p++; + } while (--i); + } + bb->postnum = (*cur)++; +} + +/* Computes dominator tree using algorithm from "A Simple, Fast Dominance Algorithm" by + * Cooper, Harvey and Kennedy. */ +int ir_build_dominators_tree(ir_ctx *ctx) +{ + uint32_t blocks_count, b, postnum; + ir_block *blocks, *bb; + uint32_t *edges; + bool changed; + + ctx->flags &= ~IR_NO_LOOPS; + + postnum = 1; + compute_postnum(ctx, &postnum, 1); + + /* Find immediate dominators */ + blocks = ctx->cfg_blocks; + edges = ctx->cfg_edges; + blocks_count = ctx->cfg_blocks_count; + blocks[1].idom = 1; + do { + changed = 0; + /* Iterating in Reverse Post Order */ + for (b = 2, bb = &blocks[2]; b <= blocks_count; b++, bb++) { + IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); + if (bb->predecessors_count == 1) { + uint32_t pred_b = edges[bb->predecessors]; + + IR_ASSERT(blocks[pred_b].idom > 0); + if (bb->idom != pred_b) { + bb->idom = pred_b; + changed = 1; + } + } else if (bb->predecessors_count) { + uint32_t idom = 0; + uint32_t k = bb->predecessors_count; + uint32_t *p = edges + bb->predecessors; + + do { + uint32_t pred_b = *p; + ir_block *pred_bb = &blocks[pred_b]; + ir_block *idom_bb; + + if (pred_bb->idom > 0) { + idom = pred_b; + idom_bb = &blocks[idom]; + + while (--k > 0) { + pred_b = *(++p); + pred_bb = &blocks[pred_b]; + if (pred_bb->idom > 0) { + while (idom != pred_b) { + while (pred_bb->postnum < idom_bb->postnum) { + pred_b = pred_bb->idom; + pred_bb = &blocks[pred_b]; + } + while (idom_bb->postnum < pred_bb->postnum) { + idom = idom_bb->idom; + idom_bb = &blocks[idom]; + } + } + } + } + + if (bb->idom != idom) { + bb->idom = idom; + changed = 1; + } + break; + } + p++; + } while (--k > 0); + } + } + } while (changed); + blocks[1].idom = 0; + blocks[1].dom_depth = 0; + + /* Construct dominators tree */ + for (b = 2, bb = &blocks[2]; b <= blocks_count; b++, bb++) { + IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); + if (bb->idom > 0) { + ir_block *idom_bb = &blocks[bb->idom]; + + bb->dom_depth = idom_bb->dom_depth + 1; + /* Sort by block number to traverse children in pre-order */ + if (idom_bb->dom_child == 0) { + idom_bb->dom_child = b; + } else if (b < idom_bb->dom_child) { + bb->dom_next_child = idom_bb->dom_child; + idom_bb->dom_child = b; + } else { + int child = idom_bb->dom_child; + ir_block *child_bb = &blocks[child]; + + while (child_bb->dom_next_child > 0 && b > child_bb->dom_next_child) { + child = child_bb->dom_next_child; + child_bb = &blocks[child]; + } + bb->dom_next_child = child_bb->dom_next_child; + child_bb->dom_next_child = b; + } + } + } + + return 1; +} +#else +/* A single pass modification of "A Simple, Fast Dominance Algorithm" by + * Cooper, Harvey and Kennedy, that relays on IR block ordering */ +int ir_build_dominators_tree(ir_ctx *ctx) +{ + uint32_t blocks_count, b; + ir_block *blocks, *bb; + uint32_t *edges; + + ctx->flags |= IR_NO_LOOPS; + + /* Find immediate dominators */ + blocks = ctx->cfg_blocks; + edges = ctx->cfg_edges; + blocks_count = ctx->cfg_blocks_count; + blocks[1].idom = 1; + blocks[1].dom_depth = 0; + + /* Iterating in Reverse Post Order */ + for (b = 2, bb = &blocks[2]; b <= blocks_count; b++, bb++) { + IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); + IR_ASSERT(bb->predecessors_count > 0); + uint32_t k = bb->predecessors_count; + uint32_t *p = edges + bb->predecessors; + uint32_t idom = *p; + ir_block *idom_bb; + + if (UNEXPECTED(idom > b)) { + /* In rare cases, LOOP_BEGIN.op1 may be a back-edge. Skip back-edges. */ + ctx->flags &= ~IR_NO_LOOPS; + while (1) { + k--; + p++; + idom = *p; + if (idom < b) { + break; + } + IR_ASSERT(k > 0); + } + } + IR_ASSERT(blocks[idom].idom > 0); + + while (--k > 0) { + uint32_t pred_b = *(++p); + + if (pred_b < b) { + IR_ASSERT(blocks[pred_b].idom > 0); + while (idom != pred_b) { + while (pred_b > idom) { + pred_b = blocks[pred_b].idom; + } + while (idom > pred_b) { + idom = blocks[idom].idom; + } + } + } else { + ctx->flags &= ~IR_NO_LOOPS; + } + } + bb->idom = idom; + idom_bb = &blocks[idom]; + + bb->dom_depth = idom_bb->dom_depth + 1; + /* Sort by block number to traverse children in pre-order */ + if (idom_bb->dom_child == 0) { + idom_bb->dom_child = b; + } else if (b < idom_bb->dom_child) { + bb->dom_next_child = idom_bb->dom_child; + idom_bb->dom_child = b; + } else { + int child = idom_bb->dom_child; + ir_block *child_bb = &blocks[child]; + + while (child_bb->dom_next_child > 0 && b > child_bb->dom_next_child) { + child = child_bb->dom_next_child; + child_bb = &blocks[child]; + } + bb->dom_next_child = child_bb->dom_next_child; + child_bb->dom_next_child = b; + } + } + + blocks[1].idom = 0; + + return 1; +} +#endif + +static bool ir_dominates(const ir_block *blocks, uint32_t b1, uint32_t b2) +{ + uint32_t b1_depth = blocks[b1].dom_depth; + const ir_block *bb2 = &blocks[b2]; + + while (bb2->dom_depth > b1_depth) { + b2 = bb2->dom_parent; + bb2 = &blocks[b2]; + } + return b1 == b2; +} + +int ir_find_loops(ir_ctx *ctx) +{ + uint32_t i, j, n, count; + uint32_t *entry_times, *exit_times, *sorted_blocks, time = 1; + ir_block *blocks = ctx->cfg_blocks; + uint32_t *edges = ctx->cfg_edges; + ir_worklist work; + + if (ctx->flags & IR_NO_LOOPS) { + return 1; + } + + /* We don't materialize the DJ spanning tree explicitly, as we are only interested in ancestor + * queries. These are implemented by checking entry/exit times of the DFS search. */ + ir_worklist_init(&work, ctx->cfg_blocks_count + 1); + entry_times = ir_mem_malloc((ctx->cfg_blocks_count + 1) * 3 * sizeof(uint32_t)); + exit_times = entry_times + ctx->cfg_blocks_count + 1; + sorted_blocks = exit_times + ctx->cfg_blocks_count + 1; + + memset(entry_times, 0, (ctx->cfg_blocks_count + 1) * sizeof(uint32_t)); + + ir_worklist_push(&work, 1); + while (ir_worklist_len(&work)) { + ir_block *bb; + int child; + +next: + i = ir_worklist_peek(&work); + if (!entry_times[i]) { + entry_times[i] = time++; + } + + /* Visit blocks immediately dominated by i. */ + bb = &blocks[i]; + for (child = bb->dom_child; child > 0; child = blocks[child].dom_next_child) { + if (ir_worklist_push(&work, child)) { + goto next; + } + } + + /* Visit join edges. */ + if (bb->successors_count) { + uint32_t *p = edges + bb->successors; + for (j = 0; j < bb->successors_count; j++,p++) { + uint32_t succ = *p; + + if (blocks[succ].idom == i) { + continue; + } else if (ir_worklist_push(&work, succ)) { + goto next; + } + } + } + exit_times[i] = time++; + ir_worklist_pop(&work); + } + + /* Sort blocks by level, which is the opposite order in which we want to process them */ + sorted_blocks[1] = 1; + j = 1; + n = 2; + while (j != n) { + i = j; + j = n; + for (; i < j; i++) { + int child; + for (child = blocks[sorted_blocks[i]].dom_child; child > 0; child = blocks[child].dom_next_child) { + sorted_blocks[n++] = child; + } + } + } + count = n; + + /* Identify loops. See Sreedhar et al, "Identifying Loops Using DJ Graphs". */ + while (n > 1) { + i = sorted_blocks[--n]; + ir_block *bb = &blocks[i]; + + if (bb->predecessors_count > 1) { + bool irreducible = 0; + uint32_t *p = &edges[bb->predecessors]; + + j = bb->predecessors_count; + do { + uint32_t pred = *p; + + /* A join edge is one for which the predecessor does not + immediately dominate the successor. */ + if (bb->idom != pred) { + /* In a loop back-edge (back-join edge), the successor dominates + the predecessor. */ + if (ir_dominates(blocks, i, pred)) { + if (!ir_worklist_len(&work)) { + ir_bitset_clear(work.visited, ir_bitset_len(ir_worklist_capasity(&work))); + } + blocks[pred].loop_header = 0; /* support for merged loops */ + ir_worklist_push(&work, pred); + } else { + /* Otherwise it's a cross-join edge. See if it's a branch + to an ancestor on the DJ spanning tree. */ + if (entry_times[pred] > entry_times[i] && exit_times[pred] < exit_times[i]) { + irreducible = 1; + } + } + } + p++; + } while (--j); + + if (UNEXPECTED(irreducible)) { + // TODO: Support for irreducible loops ??? + bb->flags |= IR_BB_IRREDUCIBLE_LOOP; + ctx->flags |= IR_IRREDUCIBLE_CFG; + while (ir_worklist_len(&work)) { + ir_worklist_pop(&work); + } + } else if (ir_worklist_len(&work)) { + bb->flags |= IR_BB_LOOP_HEADER; + ctx->flags |= IR_CFG_HAS_LOOPS; + bb->loop_depth = 1; + while (ir_worklist_len(&work)) { + j = ir_worklist_pop(&work); + while (blocks[j].loop_header > 0) { + j = blocks[j].loop_header; + } + if (j != i) { + ir_block *bb = &blocks[j]; + if (bb->idom == 0 && j != 1) { + /* Ignore blocks that are unreachable or only abnormally reachable. */ + continue; + } + bb->loop_header = i; + if (bb->predecessors_count) { + uint32_t *p = &edges[bb->predecessors]; + j = bb->predecessors_count; + do { + ir_worklist_push(&work, *p); + p++; + } while (--j); + } + } + } + } + } + } + + if (ctx->flags & IR_CFG_HAS_LOOPS) { + for (n = 1; n < count; n++) { + i = sorted_blocks[n]; + ir_block *bb = &blocks[i]; + if (bb->loop_header > 0) { + ir_block *loop = &blocks[bb->loop_header]; + uint32_t loop_depth = loop->loop_depth; + + if (bb->flags & IR_BB_LOOP_HEADER) { + loop_depth++; + } + bb->loop_depth = loop_depth; + if (bb->flags & (IR_BB_ENTRY|IR_BB_LOOP_WITH_ENTRY)) { + loop->flags |= IR_BB_LOOP_WITH_ENTRY; + } + } + } + } + + ir_mem_free(entry_times); + ir_worklist_free(&work); + + return 1; +} + +/* A variation of "Top-down Positioning" algorithm described by + * Karl Pettis and Robert C. Hansen "Profile Guided Code Positioning" + * + * TODO: Switch to "Bottom-up Positioning" algorithm + */ +int ir_schedule_blocks(ir_ctx *ctx) +{ + ir_bitqueue blocks; + uint32_t b, best_successor, j, last_non_empty; + ir_block *bb, *best_successor_bb; + ir_insn *insn; + uint32_t *list, *map; + uint32_t count = 0; + bool reorder = 0; + + ir_bitqueue_init(&blocks, ctx->cfg_blocks_count + 1); + blocks.pos = 0; + list = ir_mem_malloc(sizeof(uint32_t) * (ctx->cfg_blocks_count + 1) * 2); + map = list + (ctx->cfg_blocks_count + 1); + for (b = 1; b <= ctx->cfg_blocks_count; b++) { + ir_bitset_incl(blocks.set, b); + } + + while ((b = ir_bitqueue_pop(&blocks)) != (uint32_t)-1) { + bb = &ctx->cfg_blocks[b]; + /* Start trace */ + last_non_empty = 0; + do { + if (UNEXPECTED(bb->flags & IR_BB_PREV_EMPTY_ENTRY) && ir_bitqueue_in(&blocks, b - 1)) { + /* Schedule the previous empty ENTRY block before this one */ + uint32_t predecessor = b - 1; + + ir_bitqueue_del(&blocks, predecessor); + count++; + list[count] = predecessor; + map[predecessor] = count; + if (predecessor != count) { + reorder = 1; + } + } + count++; + list[count] = b; + map[b] = count; + if (b != count) { + reorder = 1; + } + if (!(bb->flags & IR_BB_EMPTY)) { + last_non_empty = b; + } + best_successor_bb = NULL; + if (bb->successors_count == 1) { + best_successor = ctx->cfg_edges[bb->successors]; + if (ir_bitqueue_in(&blocks, best_successor)) { + best_successor_bb = &ctx->cfg_blocks[best_successor]; + } + } else if (bb->successors_count > 1) { + uint32_t prob, best_successor_prob; + uint32_t *p, successor; + ir_block *successor_bb; + + for (b = 0, p = &ctx->cfg_edges[bb->successors]; b < bb->successors_count; b++, p++) { + successor = *p; + if (ir_bitqueue_in(&blocks, successor)) { + successor_bb = &ctx->cfg_blocks[successor]; + insn = &ctx->ir_base[successor_bb->start]; + if (insn->op == IR_IF_TRUE || insn->op == IR_IF_FALSE) { + prob = insn->op2; + if (!prob) { + prob = 100 / bb->successors_count; + if (!(successor_bb->flags & IR_BB_EMPTY)) { + prob++; + } + } + } else if (insn->op == IR_CASE_DEFAULT) { + prob = insn->op2; + if (!prob) { + prob = 100 / bb->successors_count; + } + } else if (insn->op == IR_CASE_VAL) { + prob = insn->op3; + if (!prob) { + prob = 100 / bb->successors_count; + } + } else if (insn->op == IR_ENTRY) { + if ((ctx->flags & IR_MERGE_EMPTY_ENTRIES) && (successor_bb->flags & IR_BB_EMPTY)) { + prob = 99; /* prefer empty ENTRY block to go first */ + } else { + prob = 1; + } + } else { + prob = 100 / bb->successors_count; + } + if (!best_successor_bb + || successor_bb->loop_depth > best_successor_bb->loop_depth + || prob > best_successor_prob) { + best_successor = successor; + best_successor_bb = successor_bb; + best_successor_prob = prob; + } + } + } + } + if (!best_successor_bb) { + /* Try to continue trace using the other successor of the last IF */ + if ((bb->flags & IR_BB_EMPTY) && last_non_empty) { + bb = &ctx->cfg_blocks[last_non_empty]; + if (bb->successors_count == 2 && ctx->ir_base[bb->end].op == IR_IF) { + b = ctx->cfg_edges[bb->successors]; + + if (!ir_bitqueue_in(&blocks, b)) { + b = ctx->cfg_edges[bb->successors + 1]; + } + if (ir_bitqueue_in(&blocks, b)) { + bb = &ctx->cfg_blocks[b]; + ir_bitqueue_del(&blocks, b); + continue; + } + } + } + /* End trace */ + break; + } + b = best_successor; + bb = best_successor_bb; + ir_bitqueue_del(&blocks, b); + } while (1); + } + + if (reorder) { + ir_block *cfg_blocks = ir_mem_malloc(sizeof(ir_block) * (ctx->cfg_blocks_count + 1)); + + memset(ctx->cfg_blocks, 0, sizeof(ir_block)); + for (b = 1, bb = cfg_blocks + 1; b <= count; b++, bb++) { + *bb = ctx->cfg_blocks[list[b]]; + if (bb->dom_parent > 0) { + bb->dom_parent = map[bb->dom_parent]; + } + if (bb->dom_child > 0) { + bb->dom_child = map[bb->dom_child]; + } + if (bb->dom_next_child > 0) { + bb->dom_next_child = map[bb->dom_next_child]; + } + if (bb->loop_header > 0) { + bb->loop_header = map[bb->loop_header]; + } + } + for (j = 0; j < ctx->cfg_edges_count; j++) { + if (ctx->cfg_edges[j] > 0) { + ctx->cfg_edges[j] = map[ctx->cfg_edges[j]]; + } + } + ir_mem_free(ctx->cfg_blocks); + ctx->cfg_blocks = cfg_blocks; + + if (ctx->osr_entry_loads) { + ir_list *list = (ir_list*)ctx->osr_entry_loads; + uint32_t pos = 0, count; + + while (1) { + b = ir_list_at(list, pos); + if (b == 0) { + break; + } + ir_list_set(list, pos, map[b]); + pos++; + count = ir_list_at(list, pos); + pos += count + 1; + } + } + + if (ctx->cfg_map) { + ir_ref i; + + for (i = IR_UNUSED + 1; i < ctx->insns_count; i++) { + ctx->cfg_map[i] = map[ctx->cfg_map[i]]; + } + } + } + + ir_mem_free(list); + ir_bitqueue_free(&blocks); + + return 1; +} + +/* JMP target optimisation */ +uint32_t ir_skip_empty_target_blocks(const ir_ctx *ctx, uint32_t b) +{ + ir_block *bb; + + while (1) { + bb = &ctx->cfg_blocks[b]; + + if ((bb->flags & (IR_BB_START|IR_BB_ENTRY|IR_BB_EMPTY)) == IR_BB_EMPTY) { + b = ctx->cfg_edges[bb->successors]; + } else { + break; + } + } + return b; +} + +uint32_t ir_skip_empty_next_blocks(const ir_ctx *ctx, uint32_t b) +{ + ir_block *bb; + + while (1) { + if (b > ctx->cfg_blocks_count) { + return 0; + } + + bb = &ctx->cfg_blocks[b]; + + if ((bb->flags & (IR_BB_START|IR_BB_EMPTY)) == IR_BB_EMPTY) { + b++; + } else { + break; + } + } + return b; +} + +void ir_get_true_false_blocks(const ir_ctx *ctx, uint32_t b, uint32_t *true_block, uint32_t *false_block, uint32_t *next_block) +{ + ir_block *bb; + uint32_t *p, use_block; + + *true_block = 0; + *false_block = 0; + bb = &ctx->cfg_blocks[b]; + IR_ASSERT(ctx->ir_base[bb->end].op == IR_IF); + IR_ASSERT(bb->successors_count == 2); + p = &ctx->cfg_edges[bb->successors]; + use_block = *p; + if (ctx->ir_base[ctx->cfg_blocks[use_block].start].op == IR_IF_TRUE) { + *true_block = ir_skip_empty_target_blocks(ctx, use_block); + use_block = *(p+1); + IR_ASSERT(ctx->ir_base[ctx->cfg_blocks[use_block].start].op == IR_IF_FALSE); + *false_block = ir_skip_empty_target_blocks(ctx, use_block); + } else { + IR_ASSERT(ctx->ir_base[ctx->cfg_blocks[use_block].start].op == IR_IF_FALSE); + *false_block = ir_skip_empty_target_blocks(ctx, use_block); + use_block = *(p+1); + IR_ASSERT(ctx->ir_base[ctx->cfg_blocks[use_block].start].op == IR_IF_TRUE); + *true_block = ir_skip_empty_target_blocks(ctx, use_block); + } + IR_ASSERT(*true_block && *false_block); + *next_block = b == ctx->cfg_blocks_count ? 0 : ir_skip_empty_next_blocks(ctx, b + 1); +} diff --git a/ext/opcache/jit/ir/ir_check.c b/ext/opcache/jit/ir/ir_check.c new file mode 100644 index 0000000000000..1993ee1360565 --- /dev/null +++ b/ext/opcache/jit/ir/ir_check.c @@ -0,0 +1,381 @@ +/* + * IR - Lightweight JIT Compilation Framework + * (IR verification) + * Copyright (C) 2022 Zend by Perforce. + * Authors: Dmitry Stogov + */ + +#include "ir.h" +#include "ir_private.h" + +void ir_consistency_check(void) +{ + IR_ASSERT(IR_UNUSED == 0); + IR_ASSERT(IR_NOP == 0); + + IR_ASSERT((int)IR_BOOL == (int)IR_C_BOOL); + IR_ASSERT((int)IR_U8 == (int)IR_C_U8); + IR_ASSERT((int)IR_U16 == (int)IR_C_U16); + IR_ASSERT((int)IR_U32 == (int)IR_C_U32); + IR_ASSERT((int)IR_U64 == (int)IR_C_U64); + IR_ASSERT((int)IR_ADDR == (int)IR_C_ADDR); + IR_ASSERT((int)IR_CHAR == (int)IR_C_CHAR); + IR_ASSERT((int)IR_I8 == (int)IR_C_I8); + IR_ASSERT((int)IR_I16 == (int)IR_C_I16); + IR_ASSERT((int)IR_I32 == (int)IR_C_I32); + IR_ASSERT((int)IR_I64 == (int)IR_C_I64); + IR_ASSERT((int)IR_DOUBLE == (int)IR_C_DOUBLE); + IR_ASSERT((int)IR_FLOAT == (int)IR_C_FLOAT); + + IR_ASSERT((IR_EQ ^ 1) == IR_NE); + IR_ASSERT((IR_LT ^ 3) == IR_GT); + IR_ASSERT((IR_GT ^ 3) == IR_LT); + IR_ASSERT((IR_LE ^ 3) == IR_GE); + IR_ASSERT((IR_GE ^ 3) == IR_LE); + IR_ASSERT((IR_ULT ^ 3) == IR_UGT); + IR_ASSERT((IR_UGT ^ 3) == IR_ULT); + IR_ASSERT((IR_ULE ^ 3) == IR_UGE); + IR_ASSERT((IR_UGE ^ 3) == IR_ULE); + + IR_ASSERT(IR_ADD + 1 == IR_SUB); +} + +static bool ir_check_use_list(const ir_ctx *ctx, ir_ref from, ir_ref to) +{ + ir_ref n, j, *p; + ir_use_list *use_list = &ctx->use_lists[from]; + + n = use_list->count; + for (j = 0, p = &ctx->use_edges[use_list->refs]; j < n; j++, p++) { + if (*p == to) { + return 1; + } + } + return 0; +} + +static bool ir_check_input_list(const ir_ctx *ctx, ir_ref from, ir_ref to) +{ + ir_insn *insn = &ctx->ir_base[to]; + ir_ref n, j, *p; + + n = ir_input_edges_count(ctx, insn); + for (j = 1, p = insn->ops + 1; j <= n; j++, p++) { + if (*p == from) { + return 1; + } + } + return 0; +} + +static bool ir_check_domination(const ir_ctx *ctx, ir_ref def, ir_ref use) +{ + uint32_t b1 = ctx->cfg_map[def]; + uint32_t b2 = ctx->cfg_map[use]; + ir_block *blocks = ctx->cfg_blocks; + uint32_t b1_depth = blocks[b1].dom_depth; + const ir_block *bb2 = &blocks[b2]; + + if (b1 == b2) { + return def < use; + } + while (bb2->dom_depth > b1_depth) { + b2 = bb2->dom_parent; + bb2 = &blocks[b2]; + } + return b1 == b2; +} + +bool ir_check(const ir_ctx *ctx) +{ + ir_ref i, j, n, *p, use; + ir_insn *insn, *use_insn; + ir_type type; + uint32_t flags; + bool ok = 1; + + for (i = IR_UNUSED + 1, insn = ctx->ir_base + i; i < ctx->insns_count;) { + flags = ir_op_flags[insn->op]; + n = ir_input_edges_count(ctx, insn); + for (j = 1, p = insn->ops + 1; j <= n; j++, p++) { + use = *p; + if (use != IR_UNUSED) { + if (IR_IS_CONST_REF(use)) { + if (use >= ctx->consts_count) { + fprintf(stderr, "ir_base[%d].ops[%d] constant reference (%d) is out of range\n", i, j, use); + ok = 0; + } + } else { + if (use >= ctx->insns_count) { + fprintf(stderr, "ir_base[%d].ops[%d] insn reference (%d) is out of range\n", i, j, use); + ok = 0; + } + use_insn = &ctx->ir_base[use]; + switch (IR_OPND_KIND(flags, j)) { + case IR_OPND_DATA: + if (!(ir_op_flags[use_insn->op] & IR_OP_FLAG_DATA)) { + if (!(ir_op_flags[use_insn->op] & IR_OP_FLAG_MEM) + || use_insn->type == IR_VOID) { + fprintf(stderr, "ir_base[%d].ops[%d] reference (%d) must be DATA\n", i, j, use); + ok = 0; + } + } + if (use >= i + && !(insn->op == IR_PHI + && (!(ctx->flags & IR_LINEAR) || ctx->ir_base[insn->op1].op == IR_LOOP_BEGIN))) { + fprintf(stderr, "ir_base[%d].ops[%d] invalid forward reference (%d)\n", i, j, use); + ok = 0; + } + if (flags & IR_OP_FLAG_DATA) { + switch (insn->op) { + case IR_COND: + if (j == 1) { + break; + } + IR_FALLTHROUGH; + case IR_ADD: + case IR_SUB: + case IR_MUL: + case IR_DIV: + case IR_MOD: + case IR_NEG: + case IR_ABS: + case IR_ADD_OV: + case IR_SUB_OV: + case IR_MUL_OV: + case IR_NOT: + case IR_OR: + case IR_AND: + case IR_XOR: + case IR_SHL: + case IR_SHR: + case IR_SAR: + case IR_ROL: + case IR_ROR: + case IR_BSWAP: + case IR_MIN: + case IR_MAX: + case IR_PHI: + case IR_COPY: + case IR_PI: + if (insn->type != use_insn->type) { + if (j == 2 + && (insn->op == IR_SHL + || insn->op == IR_SHR + || insn->op == IR_SAR + || insn->op == IR_ROL + || insn->op == IR_ROR) + && ir_type_size[use_insn->type] < ir_type_size[insn->type]) { + /* second argument of SHIFT may be incompatible with result */ + break; + } + if (insn->op == IR_NOT && insn->type == IR_BOOL) { + /* boolean not */ + break; + } + if (sizeof(void*) == 8) { + if (insn->type == IR_ADDR && (use_insn->type == IR_U64 || use_insn->type == IR_I64)) { + break; + } + } else { + if (insn->type == IR_ADDR && (use_insn->type == IR_U32 || use_insn->type == IR_I32)) { + break; + } + } + fprintf(stderr, "ir_base[%d].ops[%d] (%d) type is incompatible with result type (%d != %d)\n", + i, j, use, use_insn->type, insn->type); + ok = 0; + } + break; + } + } + if ((ctx->flags & IR_LINEAR) + && ctx->cfg_map + && insn->op != IR_PHI + && !ir_check_domination(ctx, use, i)) { + fprintf(stderr, "ir_base[%d].ops[%d] -> %d, %d doesn't dominate %d\n", i, j, use, use, i); + ok = 0; + } + break; + case IR_OPND_CONTROL: + if (flags & IR_OP_FLAG_BB_START) { + if (!(ir_op_flags[use_insn->op] & IR_OP_FLAG_BB_END)) { + fprintf(stderr, "ir_base[%d].ops[%d] reference (%d) must be BB_END\n", i, j, use); + ok = 0; + } + } else { + if (ir_op_flags[use_insn->op] & IR_OP_FLAG_BB_END) { + fprintf(stderr, "ir_base[%d].ops[%d] reference (%d) must not be BB_END\n", i, j, use); + ok = 0; + } + } + break; + case IR_OPND_CONTROL_DEP: + if (use >= i + && !(insn->op == IR_LOOP_BEGIN)) { + fprintf(stderr, "ir_base[%d].ops[%d] invalid forward reference (%d)\n", i, j, use); + ok = 0; + } else if (insn->op == IR_PHI) { + ir_insn *merge_insn = &ctx->ir_base[insn->op1]; + if (merge_insn->op != IR_MERGE && merge_insn->op != IR_LOOP_BEGIN) { + fprintf(stderr, "ir_base[%d].ops[%d] reference (%d) must be MERGE or LOOP_BEGIN\n", i, j, use); + ok = 0; + } + } + break; + case IR_OPND_CONTROL_REF: + if (!(ir_op_flags[use_insn->op] & IR_OP_FLAG_CONTROL)) { + fprintf(stderr, "ir_base[%d].ops[%d] reference (%d) must be CONTROL\n", i, j, use); + ok = 0; + } + break; + default: + fprintf(stderr, "ir_base[%d].ops[%d] reference (%d) of unsupported kind\n", i, j, use); + ok = 0; + } + } + } else if ((insn->op == IR_RETURN || insn->op == IR_UNREACHABLE) && j == 2) { + /* pass (function returns void) */ + } else if (insn->op == IR_BEGIN && j == 1) { + /* pass (start of unreachable basic block) */ + } else if (IR_OPND_KIND(flags, j) != IR_OPND_CONTROL_REF + && (insn->op != IR_SNAPSHOT || j == 1)) { + fprintf(stderr, "ir_base[%d].ops[%d] missing reference (%d)\n", i, j, use); + ok = 0; + } + if (ctx->use_lists + && use > 0 + && !ir_check_use_list(ctx, use, i)) { + fprintf(stderr, "ir_base[%d].ops[%d] is not in use list (%d)\n", i, j, use); + ok = 0; + } + } + + switch (insn->op) { + case IR_PHI: + if (insn->inputs_count != ctx->ir_base[insn->op1].inputs_count + 1) { + fprintf(stderr, "ir_base[%d] inconsistent PHI inputs_count (%d != %d)\n", + i, insn->inputs_count, ctx->ir_base[insn->op1].inputs_count + 1); + ok = 0; + } + break; + case IR_LOAD: + case IR_STORE: + type = ctx->ir_base[insn->op2].type; + if (type != IR_ADDR + && (!IR_IS_TYPE_INT(type) || ir_type_size[type] != ir_type_size[IR_ADDR])) { + fprintf(stderr, "ir_base[%d].op2 must have ADDR type (%s)\n", + i, ir_type_name[type]); + ok = 0; + } + break; + case IR_VLOAD: + case IR_VSTORE: + if (ctx->ir_base[insn->op2].op != IR_VAR) { + fprintf(stderr, "ir_base[%d].op2 must be 'VAR' (%s)\n", + i, ir_op_name[ctx->ir_base[insn->op2].op]); + ok = 0; + } + break; + case IR_RETURN: + if (ctx->ret_type != (insn->op2 ? ctx->ir_base[insn->op2].type : IR_VOID)) { + fprintf(stderr, "ir_base[%d].type incompatible return type\n", i); + ok = 0; + } + break; + case IR_TAILCALL: + if (ctx->ret_type != insn->type) { + fprintf(stderr, "ir_base[%d].type incompatible return type\n", i); + ok = 0; + } + break; + } + + if (ctx->use_lists) { + ir_use_list *use_list = &ctx->use_lists[i]; + ir_ref count; + + for (j = 0, p = &ctx->use_edges[use_list->refs]; j < use_list->count; j++, p++) { + use = *p; + if (!ir_check_input_list(ctx, i, use)) { + fprintf(stderr, "ir_base[%d] is in use list of ir_base[%d]\n", use, i); + ok = 0; + } + } + + if ((flags & IR_OP_FLAG_CONTROL) && !(flags & IR_OP_FLAG_MEM)) { + switch (insn->op) { + case IR_SWITCH: + /* may have many successors */ + if (use_list->count < 1) { + fprintf(stderr, "ir_base[%d].op (SWITCH) must have at least 1 successor (%d)\n", i, use_list->count); + ok = 0; + } + break; + case IR_IF: + if (use_list->count != 2) { + fprintf(stderr, "ir_base[%d].op (IF) must have 2 successors (%d)\n", i, use_list->count); + ok = 0; + } + break; + case IR_UNREACHABLE: + case IR_RETURN: + if (use_list->count == 1) { + /* UNREACHABLE and RETURN may be linked with the following ENTRY by a fake edge */ + if (ctx->ir_base[ctx->use_edges[use_list->refs]].op == IR_ENTRY) { + break; + } + } + IR_FALLTHROUGH; + case IR_IJMP: + if (use_list->count != 0) { + fprintf(stderr, "ir_base[%d].op (%s) must not have successors (%d)\n", + i, ir_op_name[insn->op], use_list->count); + ok = 0; + } + break; + default: + /* skip data references */ + count = use_list->count; + for (j = 0, p = &ctx->use_edges[use_list->refs]; j < use_list->count; j++, p++) { + use = *p; + if (!(ir_op_flags[ctx->ir_base[use].op] & IR_OP_FLAG_CONTROL)) { + count--; + } + } + if (count != 1) { + if (insn->op == IR_CALL && count == 2) { + /* result of CALL may be used as data in control instruction */ + break; + } + if ((insn->op == IR_LOOP_END || insn->op == IR_END) && count == 2) { + /* LOOP_END/END may be linked with the following ENTRY by a fake edge */ + if (ctx->ir_base[ctx->use_edges[use_list->refs]].op == IR_ENTRY) { + count--; + } + if (ctx->ir_base[ctx->use_edges[use_list->refs + 1]].op == IR_ENTRY) { + count--; + } + if (count == 1) { + break; + } + } + fprintf(stderr, "ir_base[%d].op (%s) must have 1 successor (%d)\n", + i, ir_op_name[insn->op], count); + ok = 0; + } + break; + } + } + } + n = ir_insn_inputs_to_len(n); + i += n; + insn += n; + } + +// if (!ok) { +// ir_dump_codegen(ctx, stderr); +// } + IR_ASSERT(ok); + return ok; +} diff --git a/ext/opcache/jit/ir/ir_disasm.c b/ext/opcache/jit/ir/ir_disasm.c new file mode 100644 index 0000000000000..70ee738fd6d7a --- /dev/null +++ b/ext/opcache/jit/ir/ir_disasm.c @@ -0,0 +1,832 @@ +/* + * IR - Lightweight JIT Compilation Framework + * (Disassembler based on libcapstone) + * Copyright (C) 2022 Zend by Perforce. + * Authors: Dmitry Stogov + */ + +#ifndef _GNU_SOURCE +# define _GNU_SOURCE +#endif + +#ifndef _WIN32 +# include +# include +# include +#endif + +#include "ir.h" +#include "ir_private.h" + +#ifndef _WIN32 +# include "ir_elf.h" +#endif + +#include +#define HAVE_CAPSTONE_ITER + +typedef struct _ir_sym_node { + uint64_t addr; + uint64_t end; + struct _ir_sym_node *parent; + struct _ir_sym_node *child[2]; + unsigned char info; + char name[1]; +} ir_sym_node; + +static ir_sym_node *_symbols = NULL; + +static void ir_syms_rotateleft(ir_sym_node *p) +{ + ir_sym_node *r = p->child[1]; + p->child[1] = r->child[0]; + if (r->child[0]) { + r->child[0]->parent = p; + } + r->parent = p->parent; + if (p->parent == NULL) { + _symbols = r; + } else if (p->parent->child[0] == p) { + p->parent->child[0] = r; + } else { + p->parent->child[1] = r; + } + r->child[0] = p; + p->parent = r; +} + +static void ir_syms_rotateright(ir_sym_node *p) +{ + ir_sym_node *l = p->child[0]; + p->child[0] = l->child[1]; + if (l->child[1]) { + l->child[1]->parent = p; + } + l->parent = p->parent; + if (p->parent == NULL) { + _symbols = l; + } else if (p->parent->child[1] == p) { + p->parent->child[1] = l; + } else { + p->parent->child[0] = l; + } + l->child[1] = p; + p->parent = l; +} + +void ir_disasm_add_symbol(const char *name, + uint64_t addr, + uint64_t size) +{ + ir_sym_node *sym; + size_t len = strlen(name); + + sym = ir_mem_pmalloc(sizeof(ir_sym_node) + len + 1); + if (!sym) { + return; + } + sym->addr = addr; + sym->end = (addr + size - 1); + memcpy((char*)&sym->name, name, len + 1); + sym->parent = sym->child[0] = sym->child[1] = NULL; + sym->info = 1; + if (_symbols) { + ir_sym_node *node = _symbols; + + /* insert it into rbtree */ + do { + if (sym->addr > node->addr) { + IR_ASSERT(sym->addr > (node->end)); + if (node->child[1]) { + node = node->child[1]; + } else { + node->child[1] = sym; + sym->parent = node; + break; + } + } else if (sym->addr < node->addr) { + if (node->child[0]) { + node = node->child[0]; + } else { + node->child[0] = sym; + sym->parent = node; + break; + } + } else { + IR_ASSERT(sym->addr == node->addr); + if (strcmp(name, node->name) == 0 && sym->end < node->end) { + /* reduce size of the existing symbol */ + node->end = sym->end; + } + ir_mem_pfree(sym); + return; + } + } while (1); + + /* fix rbtree after inserting */ + while (sym && sym != _symbols && sym->parent->info == 1) { + if (sym->parent == sym->parent->parent->child[0]) { + node = sym->parent->parent->child[1]; + if (node && node->info == 1) { + sym->parent->info = 0; + node->info = 0; + sym->parent->parent->info = 1; + sym = sym->parent->parent; + } else { + if (sym == sym->parent->child[1]) { + sym = sym->parent; + ir_syms_rotateleft(sym); + } + sym->parent->info = 0; + sym->parent->parent->info = 1; + ir_syms_rotateright(sym->parent->parent); + } + } else { + node = sym->parent->parent->child[0]; + if (node && node->info == 1) { + sym->parent->info = 0; + node->info = 0; + sym->parent->parent->info = 1; + sym = sym->parent->parent; + } else { + if (sym == sym->parent->child[0]) { + sym = sym->parent; + ir_syms_rotateright(sym); + } + sym->parent->info = 0; + sym->parent->parent->info = 1; + ir_syms_rotateleft(sym->parent->parent); + } + } + } + } else { + _symbols = sym; + } + _symbols->info = 0; +} + +static void ir_disasm_destroy_symbols(ir_sym_node *n) +{ + if (n) { + if (n->child[0]) { + ir_disasm_destroy_symbols(n->child[0]); + } + if (n->child[1]) { + ir_disasm_destroy_symbols(n->child[1]); + } + ir_mem_pfree(n); + } +} + +const char* ir_disasm_find_symbol(uint64_t addr, int64_t *offset) +{ + ir_sym_node *node = _symbols; + while (node) { + if (addr < node->addr) { + node = node->child[0]; + } else if (addr > node->end) { + node = node->child[1]; + } else { + *offset = addr - node->addr; + return node->name; + } + } + return NULL; +} + +static uint64_t ir_disasm_branch_target(csh cs, const cs_insn *insn) +{ + unsigned int i; + +#if defined(IR_TARGET_X86) || defined(IR_TARGET_X64) + if (cs_insn_group(cs, insn, X86_GRP_JUMP)) { + for (i = 0; i < insn->detail->x86.op_count; i++) { + if (insn->detail->x86.operands[i].type == X86_OP_IMM) { + return insn->detail->x86.operands[i].imm; + } + } + } +#elif defined(IR_TARGET_AARCH64) + if (cs_insn_group(cs, insn, ARM64_GRP_JUMP) + || insn->id == ARM64_INS_BL + || insn->id == ARM64_INS_ADR) { + for (i = 0; i < insn->detail->arm64.op_count; i++) { + if (insn->detail->arm64.operands[i].type == ARM64_OP_IMM) + return insn->detail->arm64.operands[i].imm; + } + } +#endif + + return 0; +} + +static uint64_t ir_disasm_rodata_reference(csh cs, const cs_insn *insn) +{ +#if defined(IR_TARGET_X86) + unsigned int i; + + for (i = 0; i < insn->detail->x86.op_count; i++) { + if (insn->detail->x86.operands[i].type == X86_OP_MEM + && insn->detail->x86.operands[i].mem.base == X86_REG_INVALID + && insn->detail->x86.operands[i].mem.segment == X86_REG_INVALID + && insn->detail->x86.operands[i].mem.index == X86_REG_INVALID + && insn->detail->x86.operands[i].mem.scale == 1) { + return (uint32_t)insn->detail->x86.operands[i].mem.disp; + } + } + if (cs_insn_group(cs, insn, X86_GRP_JUMP)) { + for (i = 0; i < insn->detail->x86.op_count; i++) { + if (insn->detail->x86.operands[i].type == X86_OP_MEM + && insn->detail->x86.operands[i].mem.disp) { + return (uint32_t)insn->detail->x86.operands[i].mem.disp; + } + } + } + if (insn->id == X86_INS_MOV + && insn->detail->x86.op_count == 2 + && insn->detail->x86.operands[0].type == X86_OP_IMM + && insn->detail->x86.operands[0].size == sizeof(void*)) { + return (uint32_t)insn->detail->x86.operands[0].imm; + } +#elif defined(IR_TARGET_X64) + unsigned int i; + + for (i = 0; i < insn->detail->x86.op_count; i++) { + if (insn->detail->x86.operands[i].type == X86_OP_MEM + && insn->detail->x86.operands[i].mem.base == X86_REG_RIP + && insn->detail->x86.operands[i].mem.segment == X86_REG_INVALID + // TODO: support for index and scale + && insn->detail->x86.operands[i].mem.index == X86_REG_INVALID + && insn->detail->x86.operands[i].mem.scale == 1) { + return insn->detail->x86.operands[i].mem.disp + insn->address + insn->size; + } + } +#elif defined(IR_TARGET_AARCH64) + unsigned int i; + + if (insn->id == ARM64_INS_ADR + || insn->id == ARM64_INS_LDRB + || insn->id == ARM64_INS_LDR + || insn->id == ARM64_INS_LDRH + || insn->id == ARM64_INS_LDRSB + || insn->id == ARM64_INS_LDRSH + || insn->id == ARM64_INS_LDRSW + || insn->id == ARM64_INS_STRB + || insn->id == ARM64_INS_STR + || insn->id == ARM64_INS_STRH) { + for (i = 0; i < insn->detail->arm64.op_count; i++) { + if (insn->detail->arm64.operands[i].type == ARM64_OP_IMM) + return insn->detail->arm64.operands[i].imm; + } + } + return 0; +#endif + + return 0; +} + +static const char* ir_disasm_resolver(uint64_t addr, + int64_t *offset) +{ +#ifndef _WIN32 + const char *name; + void *a = (void*)(uintptr_t)(addr); + Dl_info info; + + name = ir_disasm_find_symbol(addr, offset); + if (name) { + return name; + } + + if (dladdr(a, &info) + && info.dli_sname != NULL + && info.dli_saddr == a) { + *offset = 0; + return info.dli_sname; + } +#else + const char *name; + name = ir_disasm_find_symbol(addr, offset); + if (name) { + return name; + } +#endif + + return NULL; +} + +int ir_disasm(const char *name, + const void *start, + size_t size, + bool asm_addr, + ir_ctx *ctx, + FILE *f) +{ + size_t orig_size = size; + const void *orig_end = (void *)((char *)start + size); + const void *end; + ir_hashtab labels; + int32_t l, n; + uint64_t addr; + csh cs; + cs_insn *insn; +# ifdef HAVE_CAPSTONE_ITER + const uint8_t *cs_code; + size_t cs_size; + uint64_t cs_addr; +# else + size_t count, i; +# endif + const char *sym; + int64_t offset = 0; + char *p, *q, *r; + uint32_t rodata_offset = 0; + uint32_t jmp_table_offset = 0; + ir_hashtab_bucket *b; + int32_t entry; + cs_err ret; + +# if defined(IR_TARGET_X86) || defined(IR_TARGET_X64) +# ifdef IR_TARGET_X64 + ret = cs_open(CS_ARCH_X86, CS_MODE_64, &cs); + if (ret != CS_ERR_OK) { + fprintf(stderr, "cs_open(CS_ARCH_X86, CS_MODE_64, ...) failed; [%d] %s\n", ret, cs_strerror(ret)); + return 0; + } +# else + ret = cs_open(CS_ARCH_X86, CS_MODE_32, &cs); + if (ret != CS_ERR_OK) { + fprintf(stderr, "cs_open(CS_ARCH_X86, CS_MODE_32, ...) failed; [%d] %s\n", ret, cs_strerror(ret)); + return 0; + } +# endif + cs_option(cs, CS_OPT_DETAIL, CS_OPT_ON); +# if DISASM_INTEL_SYNTAX + cs_option(cs, CS_OPT_SYNTAX, CS_OPT_SYNTAX_INTEL); +# else + cs_option(cs, CS_OPT_SYNTAX, CS_OPT_SYNTAX_ATT); +# endif +# elif defined(IR_TARGET_AARCH64) + ret = cs_open(CS_ARCH_ARM64, CS_MODE_ARM, &cs); + if (ret != CS_ERR_OK) { + fprintf(stderr, "cs_open(CS_ARCH_ARM64, CS_MODE_ARM, ...) failed; [%d] %s\n", ret, cs_strerror(ret)); + return 0; + } + cs_option(cs, CS_OPT_DETAIL, CS_OPT_ON); + cs_option(cs, CS_OPT_SYNTAX, CS_OPT_SYNTAX_ATT); +# endif + + if (name) { + fprintf(f, "%s:\n", name); + } + + ir_hashtab_init(&labels, 32); + + if (ctx) { + if (ctx->entries_count) { + int i = ctx->entries_count; + do { + ir_insn *insn = &ctx->ir_base[ctx->entries[--i]]; + ir_hashtab_add(&labels, insn->op3, insn->op2); + } while (i != 0); + } + + rodata_offset = ctx->rodata_offset; + if (rodata_offset) { + if (size > rodata_offset) { + size = rodata_offset; + } + } + jmp_table_offset = ctx->jmp_table_offset; + if (jmp_table_offset) { + uint32_t n; + uintptr_t *p; + + IR_ASSERT(orig_size - jmp_table_offset <= 0xffffffff); + n = (uint32_t)(orig_size - jmp_table_offset); + if (size > jmp_table_offset) { + size = jmp_table_offset; + } + while (n > 0 && IR_ALIGNED_SIZE(n, sizeof(void*)) != n) { + jmp_table_offset++; + n--; + } + IR_ASSERT(n > 0 && n % sizeof(void*) == 0 && jmp_table_offset % sizeof(void*) == 0); + p = (uintptr_t*)((char*)start + jmp_table_offset); + while (n > 0) { + if (*p) { + if ((uintptr_t)*p >= (uintptr_t)start && (uintptr_t)*p < (uintptr_t)orig_end) { + ir_hashtab_add(&labels, (uint32_t)((uintptr_t)*p - (uintptr_t)start), -1); + } + } + p++; + n -= sizeof(void*); + } + } + } + end = (void *)((char *)start + size); + +# ifdef HAVE_CAPSTONE_ITER + cs_code = start; + cs_size = (uint8_t*)end - (uint8_t*)start; + cs_addr = (uint64_t)(uintptr_t)cs_code; + insn = cs_malloc(cs); + while (cs_disasm_iter(cs, &cs_code, &cs_size, &cs_addr, insn)) { + if ((addr = ir_disasm_branch_target(cs, insn)) +# else + count = cs_disasm(cs, start, (uint8_t*)end - (uint8_t*)start, (uintptr_t)start, 0, &insn); + for (i = 0; i < count; i++) { + if ((addr = ir_disasm_branch_target(cs, &(insn[i]))) +# endif + && (addr >= (uint64_t)(uintptr_t)start && addr < (uint64_t)(uintptr_t)end)) { + ir_hashtab_add(&labels, (uint32_t)((uintptr_t)addr - (uintptr_t)start), -1); +# ifdef HAVE_CAPSTONE_ITER + } else if ((addr = ir_disasm_rodata_reference(cs, insn))) { +# else + } else if ((addr = ir_disasm_rodata_reference(cs, &(insn[i])))) { +# endif + if (addr >= (uint64_t)(uintptr_t)end && addr < (uint64_t)(uintptr_t)orig_end) { + ir_hashtab_add(&labels, (uint32_t)((uintptr_t)addr - (uintptr_t)start), -1); + } + } + } + + ir_hashtab_key_sort(&labels); + + /* renumber labels */ + l = 0; + n = labels.count; + b = labels.data; + while (n > 0) { + if (b->val < 0) { + b->val = --l; + } + b++; + n--; + } + +# ifdef HAVE_CAPSTONE_ITER + cs_code = start; + cs_size = (uint8_t*)end - (uint8_t*)start; + cs_addr = (uint64_t)(uintptr_t)cs_code; + while (cs_disasm_iter(cs, &cs_code, &cs_size, &cs_addr, insn)) { + entry = ir_hashtab_find(&labels, (uint32_t)((uintptr_t)insn->address - (uintptr_t)start)); +# else + for (i = 0; i < count; i++) { + entry = ir_hashtab_find(&labels, (uint32_t)((uintptr_t)insn->address - (uintptr_t)start)); +# endif + if (entry != (ir_ref)IR_INVALID_VAL) { + if (entry >= 0) { + fprintf(f, ".ENTRY_%d:\n", entry); + } else { + fprintf(f, ".L%d:\n", -entry); + } + } + +# ifdef HAVE_CAPSTONE_ITER + if (asm_addr) { + fprintf(f, " %" PRIx64 ":", insn->address); + } + p = insn->op_str; +#if defined(IR_TARGET_X64) && (CS_API_MAJOR < 5) + /* Fix capstone MOVD/MOVQ disassemble mismatch */ + if (insn->id == X86_INS_MOVQ && strcmp(insn->mnemonic, "movd") == 0) { + insn->mnemonic[3] = 'q'; + } +#endif + if (strlen(p) == 0) { + fprintf(f, "\t%s\n", insn->mnemonic); + continue; + } else { + fprintf(f, "\t%s ", insn->mnemonic); + } +# else + if (asm_addr) { + fprintf(f, " %" PRIx64 ":", insn[i].address); + } + p = insn[i].op_str; + if (strlen(p) == 0) { + fprintf(f, "\t%s\n", insn[i].mnemonic); + continue; + } else { + fprintf(f, "\t%s ", insn[i].mnemonic); + } +# endif + /* Try to replace the target addresses with a symbols */ +#if defined(IR_TARGET_X64) +# ifdef HAVE_CAPSTONE_ITER + if ((addr = ir_disasm_rodata_reference(cs, insn))) { +# else + if ((addr = ir_disasm_rodata_reference(cs, &(insn[i])))) { +# endif + if (addr >= (uint64_t)(uintptr_t)end && addr < (uint64_t)(uintptr_t)orig_end) { + entry = ir_hashtab_find(&labels, (uint32_t)((uintptr_t)addr - (uintptr_t)start)); + if (entry != (ir_ref)IR_INVALID_VAL) { + r = q = strstr(p, "(%rip)"); + if (r && r > p) { + r--; + while (r > p && ((*r >= '0' && *r <= '9') || (*r >= 'a' && *r <= 'f') || (*r >= 'A' && *r <= 'F'))) { + r--; + } + if (r > p && *r == 'x' && *(r - 1) == '0') { + r -= 2; + } + if (r > p) { + fwrite(p, 1, r - p, f); + } + if (entry >= 0) { + fprintf(f, ".ENTRY_%d%s\n", entry, q); + } else { + fprintf(f, ".L%d%s\n", -entry, q); + } + continue; + } + } + } + } +#endif +#if defined(IR_TARGET_AARCH64) + while ((q = strstr(p, "#0x")) != NULL) { + r = q + 3; +#else + while ((q = strstr(p, "0x")) != NULL) { + r = q + 2; +#endif + addr = 0; + while (1) { + if (*r >= '0' && *r <= '9') { + addr = addr * 16 + (*r - '0'); + } else if (*r >= 'A' && *r <= 'F') { + addr = addr * 16 + (*r - 'A' + 10); + } else if (*r >= 'a' && *r <= 'f') { + addr = addr * 16 + (*r - 'a' + 10); + } else { + break; + } + r++; + } + if (p != q && *(q-1) == '-') { + q--; + addr = (uint32_t)(-(int64_t)addr); + } + if (addr >= (uint64_t)(uintptr_t)start && addr < (uint64_t)(uintptr_t)orig_end) { + entry = ir_hashtab_find(&labels, (uint32_t)((uintptr_t)addr - (uintptr_t)start)); + if (entry != (ir_ref)IR_INVALID_VAL) { + fwrite(p, 1, q - p, f); + if (entry >= 0) { + fprintf(f, ".ENTRY_%d", entry); + } else { + fprintf(f, ".L%d", -entry); + } + } else if (r > p) { + fwrite(p, 1, r - p, f); + } + } else if ((sym = ir_disasm_resolver(addr, &offset))) { +#if defined(IR_TARGET_X86) || defined(IR_TARGET_X64) + if (offset && p != q && *(q-1) == '$') { + if (r > p) { + fwrite(p, 1, r - p, f); + } + p = r; + continue; + } +#endif + if (q > p) { + fwrite(p, 1, q - p, f); + } + fputs(sym, f); + if (offset != 0) { + if (offset > 0) { + fprintf(f, "+0x%" PRIx64, offset); + } else { + fprintf(f, "-0x%" PRIx64, -offset); + } + } + } else if (r > p) { + fwrite(p, 1, r - p, f); + } + p = r; + } + fprintf(f, "%s\n", p); + } +# ifdef HAVE_CAPSTONE_ITER + cs_free(insn, 1); +# else + cs_free(insn, count); +# endif + + if (rodata_offset || jmp_table_offset) { + fprintf(f, ".rodata\n"); + } + if (rodata_offset) { + const unsigned char *p = (unsigned char*)start + rodata_offset; + uint32_t n = jmp_table_offset ? + (uint32_t)(jmp_table_offset - rodata_offset) : + (uint32_t)(orig_size - rodata_offset); + uint32_t j; + + while (n > 0) { + entry = ir_hashtab_find(&labels, (uint32_t)((uintptr_t)p - (uintptr_t)start)); + if (entry != (ir_ref)IR_INVALID_VAL) { + if (entry >= 0) { + fprintf(f, ".ENTRY_%d:\n", entry); + } else { + fprintf(f, ".L%d:\n", -entry); + } + } + fprintf(f, "\t.db 0x%02x", (int)*p); + p++; + n--; + j = 15; + while (n > 0 && j > 0) { + entry = ir_hashtab_find(&labels, (uint32_t)((uintptr_t)p - (uintptr_t)start)); + if (entry != (ir_ref)IR_INVALID_VAL) { + break; + } + fprintf(f, ", 0x%02x", (int)*p); + p++; + n--; + j--; + } + fprintf(f, "\n"); + } + } + if (jmp_table_offset) { + uintptr_t *p = (uintptr_t*)(unsigned char*)start + jmp_table_offset; + uint32_t n = (uint32_t)(orig_size - jmp_table_offset); + + fprintf(f, ".align %d\n", (int)sizeof(void*)); + + p = (uintptr_t*)((char*)start + jmp_table_offset); + while (n > 0) { + entry = ir_hashtab_find(&labels, (uint32_t)((uintptr_t)p - (uintptr_t)start)); + if (entry != (ir_ref)IR_INVALID_VAL) { + if (entry >= 0) { + fprintf(f, ".ENTRY_%d:\n", entry); + } else { + fprintf(f, ".L%d:\n", -entry); + } + } + if (*p) { + if ((uintptr_t)*p >= (uintptr_t)start && (uintptr_t)*p < (uintptr_t)orig_end) { + entry = ir_hashtab_find(&labels, (uint32_t)(*p - (uintptr_t)start)); + IR_ASSERT(entry != (ir_ref)IR_INVALID_VAL); + if (entry >= 0) { + if (sizeof(void*) == 8) { + fprintf(f, "\t.qword .ENTRY_%d\n", entry); + } else { + fprintf(f, "\t.dword .ENTRY_%d\n", entry); + } + } else { + if (sizeof(void*) == 8) { + fprintf(f, "\t.qword .L%d\n", -entry); + } else { + fprintf(f, "\t.dword .L%d\n", -entry); + } + } + } else { + int64_t offset; + const char *name = ir_disasm_find_symbol(*p, &offset); + + if (name && offset == 0) { + if (sizeof(void*) == 8) { + fprintf(f, "\t.qword %s\n", name); + } else { + fprintf(f, "\t.dword %s\n", name); + } + } else { + if (sizeof(void*) == 8) { + fprintf(f, "\t.qword 0x%0llx\n", (long long)*p); + } else { + fprintf(f, "\t.dword 0x%0x\n", (int)*p); + } + } + } + } else { + if (sizeof(void*) == 8) { + fprintf(f, "\t.qword 0\n"); + } else { + fprintf(f, "\t.dword 0\n"); + } + } + p++; + n -= sizeof(void*); + } + } + + fprintf(f, "\n"); + + ir_hashtab_free(&labels); + + cs_close(&cs); + + return 1; +} + +#ifndef _WIN32 +static void* ir_elf_read_sect(int fd, ir_elf_sectheader *sect) +{ + void *s = ir_mem_malloc(sect->size); + + if (lseek(fd, sect->ofs, SEEK_SET) < 0) { + ir_mem_free(s); + return NULL; + } + if (read(fd, s, sect->size) != (ssize_t)sect->size) { + ir_mem_free(s); + return NULL; + } + + return s; +} + +static void ir_elf_load_symbols(void) +{ + ir_elf_header hdr; + ir_elf_sectheader sect; + int i; +#if defined(__linux__) + int fd = open("/proc/self/exe", O_RDONLY); +#elif defined(__NetBSD__) + int fd = open("/proc/curproc/exe", O_RDONLY); +#elif defined(__FreeBSD__) || defined(__DragonFly__) + char path[PATH_MAX]; + size_t pathlen = sizeof(path); + int mib[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1}; + if (sysctl(mib, 4, path, &pathlen, NULL, 0) == -1) { + return; + } + int fd = open(path, O_RDONLY); +#elif defined(__sun) + int fd = open("/proc/self/path/a.out", O_RDONLY); +#elif defined(__HAIKU__) + char path[PATH_MAX]; + if (find_path(B_APP_IMAGE_SYMBOL, B_FIND_PATH_IMAGE_PATH, + NULL, path, sizeof(path)) != B_OK) { + return; + } + + int fd = open(path, O_RDONLY); +#else + // To complete eventually for other ELF platforms. + // Otherwise APPLE is Mach-O + int fd = -1; +#endif + + if (fd >= 0) { + if (read(fd, &hdr, sizeof(hdr)) == sizeof(hdr) + && hdr.emagic[0] == '\177' + && hdr.emagic[1] == 'E' + && hdr.emagic[2] == 'L' + && hdr.emagic[3] == 'F' + && lseek(fd, hdr.shofs, SEEK_SET) >= 0) { + for (i = 0; i < hdr.shnum; i++) { + if (read(fd, §, sizeof(sect)) == sizeof(sect) + && sect.type == ELFSECT_TYPE_SYMTAB) { + uint32_t n, count = sect.size / sizeof(ir_elf_symbol); + ir_elf_symbol *syms = ir_elf_read_sect(fd, §); + char *str_tbl; + + if (syms) { + if (lseek(fd, hdr.shofs + sect.link * sizeof(sect), SEEK_SET) >= 0 + && read(fd, §, sizeof(sect)) == sizeof(sect) + && (str_tbl = (char*)ir_elf_read_sect(fd, §)) != NULL) { + for (n = 0; n < count; n++) { + if (syms[n].name + && (ELFSYM_TYPE(syms[n].info) == ELFSYM_TYPE_FUNC + /*|| ELFSYM_TYPE(syms[n].info) == ELFSYM_TYPE_DATA*/) + && (ELFSYM_BIND(syms[n].info) == ELFSYM_BIND_LOCAL + /*|| ELFSYM_BIND(syms[n].info) == ELFSYM_BIND_GLOBAL*/)) { + ir_disasm_add_symbol(str_tbl + syms[n].name, syms[n].value, syms[n].size); + } + } + ir_mem_free(str_tbl); + } + ir_mem_free(syms); + } + if (lseek(fd, hdr.shofs + (i + 1) * sizeof(sect), SEEK_SET) < 0) { + break; + } + } + } + } + close(fd); + } +} +#endif + +int ir_disasm_init(void) +{ +#ifndef _WIN32 + ir_elf_load_symbols(); +#endif + return 1; +} + +void ir_disasm_free(void) +{ + if (_symbols) { + ir_disasm_destroy_symbols(_symbols); + _symbols = NULL; + } +} diff --git a/ext/opcache/jit/ir/ir_dump.c b/ext/opcache/jit/ir/ir_dump.c new file mode 100644 index 0000000000000..06c1bf65f33c1 --- /dev/null +++ b/ext/opcache/jit/ir/ir_dump.c @@ -0,0 +1,713 @@ +/* + * IR - Lightweight JIT Compilation Framework + * (debug dumps) + * Copyright (C) 2022 Zend by Perforce. + * Authors: Dmitry Stogov + */ + +#include "ir.h" +#include "ir_private.h" + +void ir_dump(const ir_ctx *ctx, FILE *f) +{ + ir_ref i, j, n, ref, *p; + ir_insn *insn; + uint32_t flags; + + for (i = 1 - ctx->consts_count, insn = ctx->ir_base + i; i < IR_UNUSED; i++, insn++) { + fprintf(f, "%05d %s %s(", i, ir_op_name[insn->op], ir_type_name[insn->type]); + ir_print_const(ctx, insn, f, true); + fprintf(f, ")\n"); + } + + for (i = IR_UNUSED + 1, insn = ctx->ir_base + i; i < ctx->insns_count; i++, insn++) { + flags = ir_op_flags[insn->op]; + fprintf(f, "%05d %s", i, ir_op_name[insn->op]); + if ((flags & IR_OP_FLAG_DATA) || ((flags & IR_OP_FLAG_MEM) && insn->type != IR_VOID)) { + fprintf(f, " %s", ir_type_name[insn->type]); + } + n = ir_operands_count(ctx, insn); + for (j = 1, p = insn->ops + 1; j <= 3; j++, p++) { + ref = *p; + if (ref) { + fprintf(f, " %05d", ref); + } + } + if (n > 3) { + n -= 3; + do { + i++; + insn++; + fprintf(f, "\n%05d", i); + for (j = 0; j < 4; j++, p++) { + ref = *p; + if (ref) { + fprintf(f, " %05d", ref); + } + } + n -= 4; + } while (n > 0); + } + fprintf(f, "\n"); + } +} + +void ir_dump_dot(const ir_ctx *ctx, FILE *f) +{ + int DATA_WEIGHT = 0; + int CONTROL_WEIGHT = 5; + int REF_WEIGHT = 4; + ir_ref i, j, n, ref, *p; + ir_insn *insn; + uint32_t flags; + + fprintf(f, "digraph ir {\n"); + fprintf(f, "\trankdir=TB;\n"); + for (i = 1 - ctx->consts_count, insn = ctx->ir_base + i; i < IR_UNUSED; i++, insn++) { + fprintf(f, "\tc%d [label=\"C%d: CONST %s(", -i, -i, ir_type_name[insn->type]); + /* FIXME(tony): We still cannot handle strings with escaped double quote inside */ + ir_print_const(ctx, insn, f, false); + fprintf(f, ")\",style=filled,fillcolor=yellow];\n"); + } + + for (i = IR_UNUSED + 1, insn = ctx->ir_base + i; i < ctx->insns_count;) { + flags = ir_op_flags[insn->op]; + if (flags & IR_OP_FLAG_CONTROL) { + if (insn->op == IR_START) { + fprintf(f, "\t{rank=min; n%d [label=\"%d: %s\",shape=box,style=\"rounded,filled\",fillcolor=red];}\n", i, i, ir_op_name[insn->op]); + } else if (insn->op == IR_ENTRY) { + fprintf(f, "\t{n%d [label=\"%d: %s\",shape=box,style=\"rounded,filled\",fillcolor=red];}\n", i, i, ir_op_name[insn->op]); + } else if (flags & IR_OP_FLAG_TERMINATOR) { + fprintf(f, "\t{rank=max; n%d [label=\"%d: %s\",shape=box,style=\"rounded,filled\",fillcolor=red];}\n", i, i, ir_op_name[insn->op]); + } else if (flags & IR_OP_FLAG_MEM) { + fprintf(f, "\tn%d [label=\"%d: %s\",shape=box,style=filled,fillcolor=pink];\n", i, i, ir_op_name[insn->op]); + } else { + fprintf(f, "\tn%d [label=\"%d: %s\",shape=box,style=filled,fillcolor=lightcoral];\n", i, i, ir_op_name[insn->op]); + } + } else if (flags & IR_OP_FLAG_DATA) { + if (IR_OPND_KIND(flags, 1) == IR_OPND_DATA) { + /* not a leaf */ + fprintf(f, "\tn%d [label=\"%d: %s\"", i, i, ir_op_name[insn->op]); + fprintf(f, ",shape=diamond,style=filled,fillcolor=deepskyblue];\n"); + } else { + if (insn->op == IR_PARAM) { + fprintf(f, "\tn%d [label=\"%d: %s %s \\\"%s\\\"\",style=filled,fillcolor=lightblue];\n", + i, i, ir_op_name[insn->op], ir_type_name[insn->type], ir_get_str(ctx, insn->op2)); + } else if (insn->op == IR_VAR) { + fprintf(f, "\tn%d [label=\"%d: %s %s \\\"%s\\\"\"];\n", i, i, ir_op_name[insn->op], ir_type_name[insn->type], ir_get_str(ctx, insn->op2)); + } else { + fprintf(f, "\tn%d [label=\"%d: %s %s\",style=filled,fillcolor=deepskyblue];\n", i, i, ir_op_name[insn->op], ir_type_name[insn->type]); + } + } + } + n = ir_operands_count(ctx, insn); + for (j = 1, p = insn->ops + 1; j <= n; j++, p++) { + ref = *p; + if (ref) { + switch (IR_OPND_KIND(flags, j)) { + case IR_OPND_DATA: + if (IR_IS_CONST_REF(ref)) { + fprintf(f, "\tc%d -> n%d [color=blue,weight=%d];\n", -ref, i, DATA_WEIGHT); + } else if (insn->op == IR_PHI + && ctx->ir_base[insn->op1].op == IR_LOOP_BEGIN + && ctx->ir_base[ir_insn_op(&ctx->ir_base[insn->op1], j - 1)].op == IR_LOOP_END) { + fprintf(f, "\tn%d -> n%d [color=blue,dir=back];\n", i, ref); + } else { + fprintf(f, "\tn%d -> n%d [color=blue,weight=%d];\n", ref, i, DATA_WEIGHT); + } + break; + case IR_OPND_CONTROL: + if (insn->op == IR_LOOP_BEGIN && ctx->ir_base[ref].op == IR_LOOP_END) { + fprintf(f, "\tn%d -> n%d [style=bold,color=red,dir=back];\n", i, ref); + } else if (insn->op == IR_ENTRY) { + fprintf(f, "\tn%d -> n%d [style=bold,color=red,style=dashed,weight=%d];\n", ref, i, CONTROL_WEIGHT); + } else { + fprintf(f, "\tn%d -> n%d [style=bold,color=red,weight=%d];\n", ref, i, CONTROL_WEIGHT); + } + break; + case IR_OPND_CONTROL_DEP: + case IR_OPND_CONTROL_REF: + fprintf(f, "\tn%d -> n%d [style=dashed,dir=back,weight=%d];\n", ref, i, REF_WEIGHT); + break; + } + } + } + n = ir_insn_inputs_to_len(n); + i += n; + insn += n; + } + fprintf(f, "}\n"); +} + +void ir_dump_use_lists(const ir_ctx *ctx, FILE *f) +{ + ir_ref i, j, n, *p; + ir_use_list *list; + + if (ctx->use_lists) { + fprintf(f, "{ # Use Lists\n"); + for (i = 1, list = &ctx->use_lists[1]; i < ctx->insns_count; i++, list++) { + n = list->count; + if (n > 0) { + p = &ctx->use_edges[list->refs]; + fprintf(f, "%05d(%d): [%05d", i, n, *p); + p++; + for (j = 1; j < n; j++, p++) { + fprintf(f, ", %05d", *p); + } + fprintf(f, "]\n"); + } + } + fprintf(f, "}\n"); + } +} + +static int ir_dump_dessa_move(ir_ctx *ctx, uint8_t type, ir_ref from, ir_ref to) +{ + FILE *f = ctx->data; + int8_t reg; + + if (IR_IS_CONST_REF(from)) { + fprintf(f, "\tmov c_%d -> ", -from); + } else if (from) { + fprintf(f, "\tmov R%d", ctx->vregs[from]); + if (ctx->live_intervals && ctx->live_intervals[ctx->vregs[from]]) { + reg = ctx->live_intervals[ctx->vregs[from]]->reg; + if (reg >= 0) { + fprintf(f, " [%%%s]", ir_reg_name(reg, type)); + } + } + fprintf(f, " -> "); + } else { + fprintf(f, "\tmov TMP -> "); + } + + if (to) { + fprintf(f, "R%d", ctx->vregs[to]); + if (ctx->live_intervals && ctx->live_intervals[ctx->vregs[to]]) { + reg = ctx->live_intervals[ctx->vregs[to]]->reg; + if (reg >= 0) { + fprintf(f, " [%%%s]", ir_reg_name(reg, type)); + } + } + fprintf(f, "\n"); + } else { + fprintf(f, "TMP\n"); + } + return 1; +} + +void ir_dump_cfg(ir_ctx *ctx, FILE *f) +{ + if (ctx->cfg_blocks) { + uint32_t b, i, bb_count = ctx->cfg_blocks_count; + ir_block *bb = ctx->cfg_blocks + 1; + + fprintf(f, "{ # CFG\n"); + for (b = 1; b <= bb_count; b++, bb++) { + fprintf(f, "BB%d:\n", b); + fprintf(f, "\tstart=%d\n", bb->start); + fprintf(f, "\tend=%d\n", bb->end); + if (bb->successors_count) { + fprintf(f, "\tsuccessors(%d) [BB%d", bb->successors_count, ctx->cfg_edges[bb->successors]); + for (i = 1; i < bb->successors_count; i++) { + fprintf(f, ", BB%d", ctx->cfg_edges[bb->successors + i]); + } + fprintf(f, "]\n"); + } + if (bb->predecessors_count) { + fprintf(f, "\tpredecessors(%d) [BB%d", bb->predecessors_count, ctx->cfg_edges[bb->predecessors]); + for (i = 1; i < bb->predecessors_count; i++) { + fprintf(f, ", BB%d", ctx->cfg_edges[bb->predecessors + i]); + } + fprintf(f, "]\n"); + } + if (bb->dom_parent > 0) { + fprintf(f, "\tdom_parent=BB%d\n", bb->dom_parent); + } + fprintf(f, "\tdom_depth=%d\n", bb->dom_depth); + if (bb->dom_child > 0) { + int child = bb->dom_child; + fprintf(f, "\tdom_children [BB%d", child); + child = ctx->cfg_blocks[child].dom_next_child; + while (child > 0) { + fprintf(f, ", BB%d", child); + child = ctx->cfg_blocks[child].dom_next_child; + } + fprintf(f, "]\n"); + } + if (bb->flags & IR_BB_ENTRY) { + fprintf(f, "\tENTRY\n"); + } + if (bb->flags & IR_BB_UNREACHABLE) { + fprintf(f, "\tUNREACHABLE\n"); + } + if (bb->flags & IR_BB_LOOP_HEADER) { + if (bb->flags & IR_BB_LOOP_WITH_ENTRY) { + fprintf(f, "\tLOOP_HEADER, LOOP_WITH_ENTRY\n"); + } else { + fprintf(f, "\tLOOP_HEADER\n"); + } + } + if (bb->flags & IR_BB_IRREDUCIBLE_LOOP) { + fprintf(stderr, "\tIRREDUCIBLE_LOOP\n"); + } + if (bb->loop_header > 0) { + fprintf(f, "\tloop_header=BB%d\n", bb->loop_header); + } + if (bb->loop_depth != 0) { + fprintf(f, "\tloop_depth=%d\n", bb->loop_depth); + } + if (bb->flags & IR_BB_OSR_ENTRY_LOADS) { + ir_list *list = (ir_list*)ctx->osr_entry_loads; + uint32_t pos = 0, i, count; + + IR_ASSERT(list); + while (1) { + i = ir_list_at(list, pos); + if (b == i) { + break; + } + IR_ASSERT(i != 0); /* end marker */ + pos++; + count = ir_list_at(list, pos); + pos += count + 1; + } + pos++; + count = ir_list_at(list, pos); + pos++; + + for (i = 0; i < count; i++, pos++) { + ir_ref ref = ir_list_at(list, pos); + fprintf(f, "\tOSR_ENTRY_LOAD=d_%d\n", ref); + } + } + if (bb->flags & IR_BB_DESSA_MOVES) { + ctx->data = f; + ir_gen_dessa_moves(ctx, b, ir_dump_dessa_move); + } + } + fprintf(f, "}\n"); + } +} + +void ir_dump_cfg_map(const ir_ctx *ctx, FILE *f) +{ + ir_ref i; + uint32_t *_blocks = ctx->cfg_map; + + if (_blocks) { + fprintf(f, "{ # CFG map (insn -> bb)\n"); + for (i = IR_UNUSED + 1; i < ctx->insns_count; i++) { + fprintf(f, "%d -> %d\n", i, _blocks[i]); + } + fprintf(f, "}\n"); + } +} + +void ir_dump_live_ranges(const ir_ctx *ctx, FILE *f) +{ + ir_ref i, j, n; + + if (!ctx->live_intervals) { + return; + } + fprintf(f, "{ # LIVE-RANGES (vregs_count=%d)\n", ctx->vregs_count); + for (i = 0; i <= ctx->vregs_count; i++) { + ir_live_interval *ival = ctx->live_intervals[i]; + + if (ival) { + ir_live_range *p; + ir_use_pos *use_pos; + + if (i == 0) { + fprintf(f, "TMP"); + } else { + for (j = 1; j < ctx->insns_count; j++) { + if (ctx->vregs[j] == (uint32_t)i) { + break; + } + } + fprintf(f, "R%d (d_%d", i, j); + for (j++; j < ctx->insns_count; j++) { + if (ctx->vregs[j] == (uint32_t)i) { + fprintf(f, ", d_%d", j); + } + } + fprintf(f, ")"); + if (ival->stack_spill_pos != -1) { + if (ival->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL) { + IR_ASSERT(ctx->spill_base >= 0); + fprintf(f, " [SPILL=0x%x(%%%s)]", ival->stack_spill_pos, ir_reg_name(ctx->spill_base, IR_ADDR)); + } else { + fprintf(f, " [SPILL=0x%x]", ival->stack_spill_pos); + } + } + } + if (ival->next) { + fprintf(f, "\n\t"); + } else if (ival->reg != IR_REG_NONE) { + fprintf(f, " "); + } + do { + if (ival->reg != IR_REG_NONE) { + fprintf(f, "[%%%s]", ir_reg_name(ival->reg, ival->type)); + } + p = &ival->range; + fprintf(f, ": [%d.%d-%d.%d)", + IR_LIVE_POS_TO_REF(p->start), IR_LIVE_POS_TO_SUB_REF(p->start), + IR_LIVE_POS_TO_REF(p->end), IR_LIVE_POS_TO_SUB_REF(p->end)); + if (i == 0) { + /* This is a TMP register */ + if (ival->tmp_ref == IR_LIVE_POS_TO_REF(p->start)) { + fprintf(f, "/%d", ival->tmp_op_num); + } else { + fprintf(f, "/%d.%d", ival->tmp_ref, ival->tmp_op_num); + } + } else { + p = p->next; + while (p) { + fprintf(f, ", [%d.%d-%d.%d)", + IR_LIVE_POS_TO_REF(p->start), IR_LIVE_POS_TO_SUB_REF(p->start), + IR_LIVE_POS_TO_REF(p->end), IR_LIVE_POS_TO_SUB_REF(p->end)); + p = p->next; + } + } + use_pos = ival->use_pos; + while (use_pos) { + if (use_pos->flags & IR_PHI_USE) { + IR_ASSERT(use_pos->op_num > 0); + fprintf(f, ", PHI_USE(%d.%d, phi=d_%d/%d)", + IR_LIVE_POS_TO_REF(use_pos->pos), IR_LIVE_POS_TO_SUB_REF(use_pos->pos), + -use_pos->hint_ref, use_pos->op_num); + } else if (use_pos->flags & IR_FUSED_USE) { + fprintf(f, ", USE(%d.%d/%d.%d", + IR_LIVE_POS_TO_REF(use_pos->pos), IR_LIVE_POS_TO_SUB_REF(use_pos->pos), + -use_pos->hint_ref, use_pos->op_num); + if (use_pos->hint >= 0) { + fprintf(f, ", hint=%%%s", ir_reg_name(use_pos->hint, ival->type)); + } + fprintf(f, ")"); + if (use_pos->flags & IR_USE_MUST_BE_IN_REG) { + fprintf(f, "!"); + } + } else { + if (!use_pos->op_num) { + fprintf(f, ", DEF(%d.%d", + IR_LIVE_POS_TO_REF(use_pos->pos), IR_LIVE_POS_TO_SUB_REF(use_pos->pos)); + } else { + fprintf(f, ", USE(%d.%d/%d", + IR_LIVE_POS_TO_REF(use_pos->pos), IR_LIVE_POS_TO_SUB_REF(use_pos->pos), + use_pos->op_num); + } + if (use_pos->hint >= 0) { + fprintf(f, ", hint=%%%s", ir_reg_name(use_pos->hint, ival->type)); + } + if (use_pos->hint_ref) { + fprintf(f, ", hint=R%d", ctx->vregs[use_pos->hint_ref]); + } + fprintf(f, ")"); + if (use_pos->flags & IR_USE_MUST_BE_IN_REG) { + fprintf(f, "!"); + } + } + use_pos = use_pos->next; + } + if (ival->next) { + fprintf(f, "\n\t"); + } + ival = ival->next; + } while (ival); + fprintf(f, "\n"); + } + } +#if 1 + n = ctx->vregs_count + ir_regs_number() + 2; + for (i = ctx->vregs_count + 1; i <= n; i++) { + ir_live_interval *ival = ctx->live_intervals[i]; + + if (ival) { + ir_live_range *p = &ival->range; + fprintf(f, "[%%%s] : [%d.%d-%d.%d)", + ir_reg_name(ival->reg, ival->type), + IR_LIVE_POS_TO_REF(p->start), IR_LIVE_POS_TO_SUB_REF(p->start), + IR_LIVE_POS_TO_REF(p->end), IR_LIVE_POS_TO_SUB_REF(p->end)); + p = p->next; + while (p) { + fprintf(f, ", [%d.%d-%d.%d)", + IR_LIVE_POS_TO_REF(p->start), IR_LIVE_POS_TO_SUB_REF(p->start), + IR_LIVE_POS_TO_REF(p->end), IR_LIVE_POS_TO_SUB_REF(p->end)); + p = p->next; + } + fprintf(f, "\n"); + } + } +#endif + fprintf(f, "}\n"); +} + +void ir_dump_codegen(const ir_ctx *ctx, FILE *f) +{ + ir_ref i, j, n, ref, *p; + ir_insn *insn; + uint32_t flags, b; + ir_block *bb; + bool first; + + fprintf(f, "{\n"); + for (i = IR_UNUSED + 1, insn = ctx->ir_base - i; i < ctx->consts_count; i++, insn--) { + fprintf(f, "\t%s c_%d = ", ir_type_cname[insn->type], i); + if (insn->op == IR_FUNC) { + if (!insn->const_flags) { + fprintf(f, "func(%s)", ir_get_str(ctx, insn->val.i32)); + } else { + fprintf(f, "func(%s, %d)", ir_get_str(ctx, insn->val.i32), insn->const_flags); + } + } else if (insn->op == IR_SYM) { + fprintf(f, "sym(%s)", ir_get_str(ctx, insn->val.i32)); + } else if (insn->op == IR_FUNC_ADDR) { + fprintf(f, "func_addr("); + ir_print_const(ctx, insn, f, true); + if (insn->const_flags) { + fprintf(f, ", %d", insn->const_flags); + } + fprintf(f, ")"); + } else { + ir_print_const(ctx, insn, f, true); + } + fprintf(f, ";\n"); + } + + for (b = 1, bb = ctx->cfg_blocks + b; b <= ctx->cfg_blocks_count; b++, bb++) { + fprintf(f, "#BB%d:\n", b); + + for (i = bb->start, insn = ctx->ir_base + i; i <= bb->end;) { + flags = ir_op_flags[insn->op]; + if (flags & IR_OP_FLAG_CONTROL) { + if (!(flags & IR_OP_FLAG_MEM) || insn->type == IR_VOID) { + fprintf(f, "\tl_%d = ", i); + } else { + fprintf(f, "\t%s d_%d", ir_type_cname[insn->type], i); + if (ctx->vregs && ctx->vregs[i]) { + fprintf(f, " {R%d}", ctx->vregs[i]); + } + if (ctx->regs) { + int8_t reg = ctx->regs[i][0]; + if (reg != IR_REG_NONE) { + fprintf(f, " {%%%s%s}", ir_reg_name(IR_REG_NUM(reg), insn->type), + (reg & (IR_REG_SPILL_STORE|IR_REG_SPILL_SPECIAL)) ? ":store" : ""); + } + } + fprintf(f, ", l_%d = ", i); + } + } else { + fprintf(f, "\t"); + if (flags & IR_OP_FLAG_DATA) { + fprintf(f, "%s d_%d", ir_type_cname[insn->type], i); + if (ctx->vregs && ctx->vregs[i]) { + fprintf(f, " {R%d}", ctx->vregs[i]); + } + if (ctx->regs) { + int8_t reg = ctx->regs[i][0]; + if (reg != IR_REG_NONE) { + fprintf(f, " {%%%s%s}", ir_reg_name(IR_REG_NUM(reg), insn->type), + (reg & (IR_REG_SPILL_STORE|IR_REG_SPILL_SPECIAL)) ? ":store" : ""); + } + } + fprintf(f, " = "); + } + } + fprintf(f, "%s", ir_op_name[insn->op]); + n = ir_operands_count(ctx, insn); + if ((insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN) && n != 2) { + fprintf(f, "/%d", n); + } else if ((insn->op == IR_CALL || insn->op == IR_TAILCALL) && n != 2) { + fprintf(f, "/%d", n - 2); + } else if (insn->op == IR_PHI && n != 3) { + fprintf(f, "/%d", n - 1); + } else if (insn->op == IR_SNAPSHOT) { + fprintf(f, "/%d", n - 1); + } + first = 1; + for (j = 1, p = insn->ops + 1; j <= n; j++, p++) { + uint32_t opnd_kind = IR_OPND_KIND(flags, j); + + ref = *p; + if (ref) { + switch (opnd_kind) { + case IR_OPND_DATA: + if (IR_IS_CONST_REF(ref)) { + fprintf(f, "%sc_%d", first ? "(" : ", ", -ref); + } else { + fprintf(f, "%sd_%d", first ? "(" : ", ", ref); + } + if (ctx->vregs && ref > 0 && ctx->vregs[ref]) { + fprintf(f, " {R%d}", ctx->vregs[ref]); + } + if (ctx->regs) { + int8_t *regs = ctx->regs[i]; + int8_t reg = regs[j]; + if (reg != IR_REG_NONE) { + fprintf(f, " {%%%s%s}", ir_reg_name(IR_REG_NUM(reg), ctx->ir_base[ref].type), + (reg & (IR_REG_SPILL_LOAD|IR_REG_SPILL_SPECIAL)) ? ":load" : ""); + } + } + first = 0; + break; + case IR_OPND_CONTROL: + case IR_OPND_CONTROL_DEP: + case IR_OPND_CONTROL_REF: + fprintf(f, "%sl_%d", first ? "(" : ", ", ref); + first = 0; + break; + case IR_OPND_STR: + fprintf(f, "%s\"%s\"", first ? "(" : ", ", ir_get_str(ctx, ref)); + first = 0; + break; + case IR_OPND_PROB: + if (ref == 0) { + break; + } + IR_FALLTHROUGH; + case IR_OPND_NUM: + fprintf(f, "%s%d", first ? "(" : ", ", ref); + first = 0; + break; + } + } else if (opnd_kind == IR_OPND_NUM) { + fprintf(f, "%s%d", first ? "(" : ", ", ref); + first = 0; + } else if (IR_IS_REF_OPND_KIND(opnd_kind) && j != n) { + fprintf(f, "%snull", first ? "(" : ", "); + first = 0; + } + } + if (first) { + fprintf(f, ";"); + } else { + fprintf(f, ");"); + } + if (((flags & IR_OP_FLAG_DATA) || ((flags & IR_OP_FLAG_MEM) && insn->type != IR_VOID)) && ctx->binding) { + ir_ref var = ir_binding_find(ctx, i); + if (var) { + IR_ASSERT(var < 0); + fprintf(f, " # BIND(0x%x);", -var); + } + } + if (ctx->rules) { + uint32_t rule = ctx->rules[i]; + uint32_t id = rule & ~(IR_FUSED|IR_SKIPPED|IR_SIMPLE); + + if (id < IR_LAST_OP) { + fprintf(f, " # RULE(%s", ir_op_name[id]); + } else { + IR_ASSERT(id > IR_LAST_OP /*&& id < IR_LAST_RULE*/); + fprintf(f, " # RULE(%s", ir_rule_name[id - IR_LAST_OP]); + } + if (rule & IR_FUSED) { + fprintf(f, ":FUSED"); + } + if (rule & IR_SKIPPED) { + fprintf(f, ":SKIPPED"); + } + if (rule & IR_SIMPLE) { + fprintf(f, ":SIMPLE"); + } + fprintf(f, ")"); + } + fprintf(f, "\n"); + n = ir_insn_inputs_to_len(n); + i += n; + insn += n; + } + + if (bb->flags & IR_BB_DESSA_MOVES) { + uint32_t succ; + ir_block *succ_bb; + ir_use_list *use_list; + ir_ref k, i, *p, use_ref, input; + ir_insn *use_insn; + + IR_ASSERT(bb->successors_count == 1); + succ = ctx->cfg_edges[bb->successors]; + succ_bb = &ctx->cfg_blocks[succ]; + IR_ASSERT(succ_bb->predecessors_count > 1); + use_list = &ctx->use_lists[succ_bb->start]; + k = ir_phi_input_number(ctx, succ_bb, b); + + for (i = 0, p = &ctx->use_edges[use_list->refs]; i < use_list->count; i++, p++) { + use_ref = *p; + use_insn = &ctx->ir_base[use_ref]; + if (use_insn->op == IR_PHI) { + input = ir_insn_op(use_insn, k); + if (IR_IS_CONST_REF(input)) { + fprintf(f, "\t# DESSA MOV c_%d", -input); + } else if (ctx->vregs[input] != ctx->vregs[use_ref]) { + fprintf(f, "\t# DESSA MOV d_%d {R%d}", input, ctx->vregs[input]); + } else { + continue; + } + if (ctx->regs) { + int8_t *regs = ctx->regs[use_ref]; + int8_t reg = regs[k]; + if (reg != IR_REG_NONE) { + fprintf(f, " {%%%s%s}", ir_reg_name(IR_REG_NUM(reg), ctx->ir_base[input].type), + (reg & (IR_REG_SPILL_LOAD|IR_REG_SPILL_SPECIAL)) ? ":load" : ""); + } + } + fprintf(f, " -> d_%d {R%d}", use_ref, ctx->vregs[use_ref]); + if (ctx->regs) { + int8_t reg = ctx->regs[use_ref][0]; + if (reg != IR_REG_NONE) { + fprintf(f, " {%%%s%s}", ir_reg_name(IR_REG_NUM(reg), ctx->ir_base[use_ref].type), + (reg & (IR_REG_SPILL_STORE|IR_REG_SPILL_SPECIAL)) ? ":store" : ""); + } + } + fprintf(f, "\n"); + } + } + } + + insn = &ctx->ir_base[bb->end]; + if (insn->op == IR_END || insn->op == IR_LOOP_END) { + uint32_t succ; + + if (bb->successors_count == 1) { + succ = ctx->cfg_edges[bb->successors]; + } else { + /* END may have a fake control edge to ENTRY */ + IR_ASSERT(bb->successors_count == 2); + succ = ctx->cfg_edges[bb->successors]; + if (ctx->ir_base[ctx->cfg_blocks[succ].start].op == IR_ENTRY) { + succ = ctx->cfg_edges[bb->successors + 1]; +#ifdef IR_DEBUG + } else { + uint32_t fake_succ = ctx->cfg_edges[bb->successors + 1]; + IR_ASSERT(ctx->ir_base[ctx->cfg_blocks[fake_succ].start].op == IR_ENTRY); +#endif + } + } + if (succ != b + 1) { + fprintf(f, "\t# GOTO BB%d\n", succ); + } + } else if (insn->op == IR_IF) { + uint32_t true_block, false_block, *p; + + p = &ctx->cfg_edges[bb->successors]; + true_block = *p; + if (ctx->ir_base[ctx->cfg_blocks[true_block].start].op == IR_IF_TRUE) { + false_block = *(p+1); + IR_ASSERT(ctx->ir_base[ctx->cfg_blocks[false_block].start].op == IR_IF_FALSE); + } else { + false_block = true_block; + IR_ASSERT(ctx->ir_base[ctx->cfg_blocks[false_block].start].op == IR_IF_FALSE); + true_block = *(p+1); + IR_ASSERT(ctx->ir_base[ctx->cfg_blocks[true_block].start].op == IR_IF_TRUE); + } + fprintf(f, "\t# IF_TRUE BB%d, IF_FALSE BB%d\n", true_block, false_block); + } else if (insn->op == IR_SWITCH) { + fprintf(f, "\t# SWITCH ...\n"); + } + } + fprintf(f, "}\n"); +} diff --git a/ext/opcache/jit/ir/ir_elf.h b/ext/opcache/jit/ir/ir_elf.h new file mode 100644 index 0000000000000..961789a7b4a08 --- /dev/null +++ b/ext/opcache/jit/ir/ir_elf.h @@ -0,0 +1,101 @@ +/* + * IR - Lightweight JIT Compilation Framework + * (ELF header definitions) + * Copyright (C) 2022 Zend by Perforce. + * Authors: Dmitry Stogov + */ + +#ifndef IR_ELF +#define IR_ELF + +#if defined(IR_TARGET_X64) || defined(IR_TARGET_AARCH64) +# define ELF64 +#else +# undef ELF64 +#endif + +typedef struct _ir_elf_header { + uint8_t emagic[4]; + uint8_t eclass; + uint8_t eendian; + uint8_t eversion; + uint8_t eosabi; + uint8_t eabiversion; + uint8_t epad[7]; + uint16_t type; + uint16_t machine; + uint32_t version; + uintptr_t entry; + uintptr_t phofs; + uintptr_t shofs; + uint32_t flags; + uint16_t ehsize; + uint16_t phentsize; + uint16_t phnum; + uint16_t shentsize; + uint16_t shnum; + uint16_t shstridx; +} ir_elf_header; + +typedef struct ir_elf_sectheader { + uint32_t name; + uint32_t type; + uintptr_t flags; + uintptr_t addr; + uintptr_t ofs; + uintptr_t size; + uint32_t link; + uint32_t info; + uintptr_t align; + uintptr_t entsize; +} ir_elf_sectheader; + +#define ELFSECT_IDX_ABS 0xfff1 + +enum { + ELFSECT_TYPE_PROGBITS = 1, + ELFSECT_TYPE_SYMTAB = 2, + ELFSECT_TYPE_STRTAB = 3, + ELFSECT_TYPE_NOBITS = 8, + ELFSECT_TYPE_DYNSYM = 11, +}; + +#define ELFSECT_FLAGS_WRITE (1 << 0) +#define ELFSECT_FLAGS_ALLOC (1 << 1) +#define ELFSECT_FLAGS_EXEC (1 << 2) +#define ELFSECT_FLAGS_TLS (1 << 10) + +typedef struct ir_elf_symbol { +#ifdef ELF64 + uint32_t name; + uint8_t info; + uint8_t other; + uint16_t sectidx; + uintptr_t value; + uint64_t size; +#else + uint32_t name; + uintptr_t value; + uint32_t size; + uint8_t info; + uint8_t other; + uint16_t sectidx; +#endif +} ir_elf_symbol; + +#define ELFSYM_BIND(info) ((info) >> 4) +#define ELFSYM_TYPE(info) ((info) & 0xf) +#define ELFSYM_INFO(bind, type) (((bind) << 4) | (type)) + +enum { + ELFSYM_TYPE_DATA = 2, + ELFSYM_TYPE_FUNC = 2, + ELFSYM_TYPE_FILE = 4, +}; + +enum { + ELFSYM_BIND_LOCAL = 0, + ELFSYM_BIND_GLOBAL = 1, +}; + +#endif diff --git a/ext/opcache/jit/ir/ir_emit.c b/ext/opcache/jit/ir/ir_emit.c new file mode 100644 index 0000000000000..d6de65cda70ea --- /dev/null +++ b/ext/opcache/jit/ir/ir_emit.c @@ -0,0 +1,608 @@ +/* + * IR - Lightweight JIT Compilation Framework + * (Native code generator based on DynAsm) + * Copyright (C) 2022 Zend by Perforce. + * Authors: Dmitry Stogov + */ + +#include "ir.h" + +#if defined(IR_TARGET_X86) || defined(IR_TARGET_X64) +# include "ir_x86.h" +#elif defined(IR_TARGET_AARCH64) +# include "ir_aarch64.h" +#else +# error "Unknown IR target" +#endif + +#include "ir_private.h" +#ifndef _WIN32 +# include +#else +# define WIN32_LEAN_AND_MEAN +# include +# include +#endif + +#define DASM_M_GROW(ctx, t, p, sz, need) \ + do { \ + size_t _sz = (sz), _need = (need); \ + if (_sz < _need) { \ + if (_sz < 16) _sz = 16; \ + while (_sz < _need) _sz += _sz; \ + (p) = (t *)ir_mem_realloc((p), _sz); \ + (sz) = _sz; \ + } \ + } while(0) + +#define DASM_M_FREE(ctx, p, sz) ir_mem_free(p) + +#if IR_DEBUG +# define DASM_CHECKS +#endif + +typedef struct _ir_copy { + ir_type type; + ir_reg from; + ir_reg to; +} ir_copy; + +typedef struct _ir_delayed_copy { + ir_ref input; + ir_ref output; + ir_type type; + ir_reg from; + ir_reg to; +} ir_delayed_copy; + +#if IR_REG_INT_ARGS +static const int8_t _ir_int_reg_params[IR_REG_INT_ARGS]; +#else +static const int8_t *_ir_int_reg_params; +#endif +#if IR_REG_FP_ARGS +static const int8_t _ir_fp_reg_params[IR_REG_FP_ARGS]; +#else +static const int8_t *_ir_fp_reg_params; +#endif + +#ifdef IR_HAVE_FASTCALL +static const int8_t _ir_int_fc_reg_params[IR_REG_INT_FCARGS]; +static const int8_t *_ir_fp_fc_reg_params; + +bool ir_is_fastcall(const ir_ctx *ctx, const ir_insn *insn) +{ + if (sizeof(void*) == 4) { + if (IR_IS_CONST_REF(insn->op2)) { + return (ctx->ir_base[insn->op2].const_flags & IR_CONST_FASTCALL_FUNC) != 0; + } else if (ctx->ir_base[insn->op2].op == IR_BITCAST) { + return (ctx->ir_base[insn->op2].op2 & IR_CONST_FASTCALL_FUNC) != 0; + } + return 0; + } + return 0; +} +#else +bool ir_is_fastcall(const ir_ctx *ctx, const ir_insn *insn) +{ + return 0; +} +#endif + +bool ir_is_vararg(const ir_ctx *ctx, ir_insn *insn) +{ + if (IR_IS_CONST_REF(insn->op2)) { + return (ctx->ir_base[insn->op2].const_flags & IR_CONST_VARARG_FUNC) != 0; + } else if (ctx->ir_base[insn->op2].op == IR_BITCAST) { + return (ctx->ir_base[insn->op2].op2 & IR_CONST_VARARG_FUNC) != 0; + } + return 0; +} + +IR_ALWAYS_INLINE uint32_t ir_rule(const ir_ctx *ctx, ir_ref ref) +{ + IR_ASSERT(!IR_IS_CONST_REF(ref)); + return ctx->rules[ref]; +} + +IR_ALWAYS_INLINE bool ir_in_same_block(ir_ctx *ctx, ir_ref ref) +{ + return ref > ctx->bb_start; +} + + +static ir_reg ir_get_param_reg(const ir_ctx *ctx, ir_ref ref) +{ + ir_use_list *use_list = &ctx->use_lists[1]; + int i; + ir_ref use, *p; + ir_insn *insn; + int int_param = 0; + int fp_param = 0; + int int_reg_params_count = IR_REG_INT_ARGS; + int fp_reg_params_count = IR_REG_FP_ARGS; + const int8_t *int_reg_params = _ir_int_reg_params; + const int8_t *fp_reg_params = _ir_fp_reg_params; + +#ifdef IR_HAVE_FASTCALL + if (sizeof(void*) == 4 && (ctx->flags & IR_FASTCALL_FUNC)) { + int_reg_params_count = IR_REG_INT_FCARGS; + fp_reg_params_count = IR_REG_FP_FCARGS; + int_reg_params = _ir_int_fc_reg_params; + fp_reg_params = _ir_fp_fc_reg_params; + } +#endif + + for (i = 0, p = &ctx->use_edges[use_list->refs]; i < use_list->count; i++, p++) { + use = *p; + insn = &ctx->ir_base[use]; + if (insn->op == IR_PARAM) { + if (IR_IS_TYPE_INT(insn->type)) { + if (use == ref) { + if (int_param < int_reg_params_count) { + return int_reg_params[int_param]; + } else { + return IR_REG_NONE; + } + } + int_param++; +#ifdef _WIN64 + /* WIN64 calling convention use common couter for int and fp registers */ + fp_param++; +#endif + } else { + IR_ASSERT(IR_IS_TYPE_FP(insn->type)); + if (use == ref) { + if (fp_param < fp_reg_params_count) { + return fp_reg_params[fp_param]; + } else { + return IR_REG_NONE; + } + } + fp_param++; +#ifdef _WIN64 + /* WIN64 calling convention use common couter for int and fp registers */ + int_param++; +#endif + } + } + } + return IR_REG_NONE; +} + +static int ir_get_args_regs(const ir_ctx *ctx, const ir_insn *insn, int8_t *regs) +{ + int j, n; + ir_type type; + int int_param = 0; + int fp_param = 0; + int count = 0; + int int_reg_params_count = IR_REG_INT_ARGS; + int fp_reg_params_count = IR_REG_FP_ARGS; + const int8_t *int_reg_params = _ir_int_reg_params; + const int8_t *fp_reg_params = _ir_fp_reg_params; + +#ifdef IR_HAVE_FASTCALL + if (sizeof(void*) == 4 && ir_is_fastcall(ctx, insn)) { + int_reg_params_count = IR_REG_INT_FCARGS; + fp_reg_params_count = IR_REG_FP_FCARGS; + int_reg_params = _ir_int_fc_reg_params; + fp_reg_params = _ir_fp_fc_reg_params; + } +#endif + + n = insn->inputs_count; + n = IR_MIN(n, IR_MAX_REG_ARGS + 2); + for (j = 3; j <= n; j++) { + type = ctx->ir_base[ir_insn_op(insn, j)].type; + if (IR_IS_TYPE_INT(type)) { + if (int_param < int_reg_params_count) { + regs[j] = int_reg_params[int_param]; + count = j + 1; + } else { + regs[j] = IR_REG_NONE; + } + int_param++; +#ifdef _WIN64 + /* WIN64 calling convention use common couter for int and fp registers */ + fp_param++; +#endif + } else { + IR_ASSERT(IR_IS_TYPE_FP(type)); + if (fp_param < fp_reg_params_count) { + regs[j] = fp_reg_params[fp_param]; + count = j + 1; + } else { + regs[j] = IR_REG_NONE; + } + fp_param++; +#ifdef _WIN64 + /* WIN64 calling convention use common couter for int and fp registers */ + int_param++; +#endif + } + } + return count; +} + +static bool ir_is_same_mem(const ir_ctx *ctx, ir_ref r1, ir_ref r2) +{ + ir_live_interval *ival1, *ival2; + int32_t o1, o2; + + if (IR_IS_CONST_REF(r1) || IR_IS_CONST_REF(r2)) { + return 0; + } + + IR_ASSERT(ctx->vregs[r1] && ctx->vregs[r2]); + ival1 = ctx->live_intervals[ctx->vregs[r1]]; + ival2 = ctx->live_intervals[ctx->vregs[r2]]; + IR_ASSERT(ival1 && ival2); + o1 = ival1->stack_spill_pos; + o2 = ival2->stack_spill_pos; + IR_ASSERT(o1 != -1 && o2 != -1); + return o1 == o2; +} + +static bool ir_is_same_mem_var(const ir_ctx *ctx, ir_ref r1, int32_t offset) +{ + ir_live_interval *ival1; + int32_t o1; + + if (IR_IS_CONST_REF(r1)) { + return 0; + } + + IR_ASSERT(ctx->vregs[r1]); + ival1 = ctx->live_intervals[ctx->vregs[r1]]; + IR_ASSERT(ival1); + o1 = ival1->stack_spill_pos; + IR_ASSERT(o1 != -1); + return o1 == offset; +} + +void *ir_resolve_sym_name(const char *name) +{ + void *handle = NULL; + void *addr; + +#ifndef _WIN32 +# ifdef RTLD_DEFAULT + handle = RTLD_DEFAULT; +# endif + addr = dlsym(handle, name); +#else + HMODULE mods[256]; + DWORD cbNeeded; + uint32_t i = 0; + + /* Quick workaraund to prevent *.irt tests failures */ + // TODO: try to find a general solution ??? + if (strcmp(name, "printf") == 0) { + return (void*)printf; + } + + addr = NULL; + + EnumProcessModules(GetCurrentProcess(), mods, sizeof(mods), &cbNeeded); + + while(i < (cbNeeded / sizeof(HMODULE))) { + addr = GetProcAddress(mods[i], name); + if (addr) { + return addr; + } + i++; + } +#endif + IR_ASSERT(addr != NULL); + return addr; +} + +#ifdef IR_SNAPSHOT_HANDLER_DCL + IR_SNAPSHOT_HANDLER_DCL(); +#endif + +static void *ir_jmp_addr(ir_ctx *ctx, ir_insn *insn, ir_insn *addr_insn) +{ + void *addr; + + IR_ASSERT(addr_insn->type == IR_ADDR); + if (addr_insn->op == IR_FUNC) { + addr = (ctx->loader && ctx->loader->resolve_sym_name) ? + ctx->loader->resolve_sym_name(ctx->loader, ir_get_str(ctx, addr_insn->val.i32)) : + ir_resolve_sym_name(ir_get_str(ctx, addr_insn->val.i32)); + } else { + IR_ASSERT(addr_insn->op == IR_ADDR || addr_insn->op == IR_FUNC_ADDR); + addr = (void*)addr_insn->val.addr; + } +#ifdef IR_SNAPSHOT_HANDLER + if (ctx->ir_base[insn->op1].op == IR_SNAPSHOT) { + addr = IR_SNAPSHOT_HANDLER(ctx, insn->op1, &ctx->ir_base[insn->op1], addr); + } +#endif + return addr; +} + +#if defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Warray-bounds" +# pragma GCC diagnostic ignored "-Wimplicit-fallthrough" +#endif + +#if defined(IR_TARGET_X86) || defined(IR_TARGET_X64) +# include "dynasm/dasm_proto.h" +# include "dynasm/dasm_x86.h" +#elif defined(IR_TARGET_AARCH64) +# include "dynasm/dasm_proto.h" +static int ir_add_veneer(dasm_State *Dst, void *buffer, uint32_t ins, int *b, uint32_t *cp, ptrdiff_t offset); +# define DASM_ADD_VENEER ir_add_veneer +# include "dynasm/dasm_arm64.h" +#else +# error "Unknown IR target" +#endif + +#if defined(__GNUC__) +# pragma GCC diagnostic pop +#endif + + +/* Forward Declarations */ +static void ir_emit_osr_entry_loads(ir_ctx *ctx, int b, ir_block *bb); +static void ir_emit_dessa_moves(ir_ctx *ctx, int b, ir_block *bb); + +#if defined(IR_TARGET_X86) || defined(IR_TARGET_X64) +# include "ir_emit_x86.h" +#elif defined(IR_TARGET_AARCH64) +# include "ir_emit_aarch64.h" +#else +# error "Unknown IR target" +#endif + +static IR_NEVER_INLINE void ir_emit_osr_entry_loads(ir_ctx *ctx, int b, ir_block *bb) +{ + ir_list *list = (ir_list*)ctx->osr_entry_loads; + int pos = 0, count, i; + ir_ref ref; + + IR_ASSERT(ctx->binding); + IR_ASSERT(list); + while (1) { + i = ir_list_at(list, pos); + if (b == i) { + break; + } + IR_ASSERT(i != 0); /* end marker */ + pos++; + count = ir_list_at(list, pos); + pos += count + 1; + } + pos++; + count = ir_list_at(list, pos); + pos++; + + for (i = 0; i < count; i++, pos++) { + ref = ir_list_at(list, pos); + IR_ASSERT(ref >= 0 && ctx->vregs[ref] && ctx->live_intervals[ctx->vregs[ref]]); + if (!(ctx->live_intervals[ctx->vregs[ref]]->flags & IR_LIVE_INTERVAL_SPILLED)) { + /* not spilled */ + ir_reg reg = ctx->live_intervals[ctx->vregs[ref]]->reg; + ir_type type = ctx->ir_base[ref].type; + int32_t offset = -ir_binding_find(ctx, ref); + + IR_ASSERT(offset > 0); + if (IR_IS_TYPE_INT(type)) { + ir_emit_load_mem_int(ctx, type, reg, ctx->spill_base, offset); + } else { + ir_emit_load_mem_fp(ctx, type, reg, ctx->spill_base, offset); + } + } else { + IR_ASSERT(ctx->live_intervals[ctx->vregs[ref]]->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL); + } + } +} + +static void ir_emit_dessa_moves(ir_ctx *ctx, int b, ir_block *bb) +{ + uint32_t succ, k, n = 0, n2 = 0; + ir_block *succ_bb; + ir_use_list *use_list; + ir_ref i, *p; + ir_copy *copies; + ir_delayed_copy *copies2; + ir_reg tmp_reg = ctx->regs[bb->end][0]; + ir_reg tmp_fp_reg = ctx->regs[bb->end][1]; + + IR_ASSERT(bb->successors_count == 1); + succ = ctx->cfg_edges[bb->successors]; + succ_bb = &ctx->cfg_blocks[succ]; + IR_ASSERT(succ_bb->predecessors_count > 1); + use_list = &ctx->use_lists[succ_bb->start]; + k = ir_phi_input_number(ctx, succ_bb, b); + + copies = ir_mem_malloc(use_list->count * sizeof(ir_copy) + use_list->count * sizeof(ir_delayed_copy)); + copies2 = (ir_delayed_copy*)(copies + use_list->count); + + for (i = 0, p = &ctx->use_edges[use_list->refs]; i < use_list->count; i++, p++) { + ir_ref ref = *p; + ir_insn *insn = &ctx->ir_base[ref]; + + if (insn->op == IR_PHI) { + ir_ref input = ir_insn_op(insn, k); + ir_reg src = ir_get_alocated_reg(ctx, ref, k); + ir_reg dst = ctx->regs[ref][0]; + + if (dst == IR_REG_NONE) { + /* STORE to memory cannot clobber any input register (do it right now) */ + if (IR_IS_CONST_REF(input)) { + IR_ASSERT(src == IR_REG_NONE); +#if defined(IR_TARGET_X86) || defined(IR_TARGET_X64) + if (IR_IS_TYPE_INT(insn->type) + && (ir_type_size[insn->type] != 8 || IR_IS_SIGNED_32BIT(ctx->ir_base[input].val.i64))) { + ir_emit_store_imm(ctx, insn->type, ref, ctx->ir_base[input].val.i32); + continue; + } +#endif + ir_reg tmp = IR_IS_TYPE_INT(insn->type) ? tmp_reg : tmp_fp_reg; + + IR_ASSERT(tmp != IR_REG_NONE); + ir_emit_load(ctx, insn->type, tmp, input); + ir_emit_store(ctx, insn->type, ref, tmp); + } else if (src == IR_REG_NONE) { + if (!ir_is_same_mem(ctx, input, ref)) { + ir_reg tmp = IR_IS_TYPE_INT(insn->type) ? tmp_reg : tmp_fp_reg; + + IR_ASSERT(tmp != IR_REG_NONE); + ir_emit_load(ctx, insn->type, tmp, input); + ir_emit_store(ctx, insn->type, ref, tmp); + } + } else { + if (IR_REG_SPILLED(src)) { + src = IR_REG_NUM(src); + ir_emit_load(ctx, insn->type, src, input); + if (ir_is_same_mem(ctx, input, ref)) { + continue; + } + } + ir_emit_store(ctx, insn->type, ref, src); + } + } else if (src == IR_REG_NONE) { + /* STORE of constant or memory can't be clobbered by parallel reg->reg copies (delay it) */ + copies2[n2].input = input; + copies2[n2].output = ref; + copies2[n2].type = insn->type; + copies2[n2].from = src; + copies2[n2].to = dst; + n2++; + } else { + IR_ASSERT(!IR_IS_CONST_REF(input)); + if (IR_REG_SPILLED(src)) { + ir_emit_load(ctx, insn->type, IR_REG_NUM(src), input); + } + if (IR_REG_SPILLED(dst) && (!IR_REG_SPILLED(src) || !ir_is_same_mem(ctx, input, ref))) { + ir_emit_store(ctx, insn->type, ref, IR_REG_NUM(src)); + } + if (IR_REG_NUM(src) != IR_REG_NUM(dst)) { + /* Schedule parallel reg->reg copy */ + copies[n].type = insn->type; + copies[n].from = IR_REG_NUM(src); + copies[n].to = IR_REG_NUM(dst); + n++; + } + } + } + } + + if (n > 0) { + ir_parallel_copy(ctx, copies, n, tmp_reg, tmp_fp_reg); + } + + for (n = 0; n < n2; n++) { + ir_ref input = copies2[n].input; + ir_ref ref = copies2[n].output; + ir_type type = copies2[n].type; + ir_reg dst = copies2[n].to; + + IR_ASSERT(dst != IR_REG_NONE); + if (IR_IS_CONST_REF(input)) { + ir_emit_load(ctx, type, IR_REG_NUM(dst), input); + } else { + IR_ASSERT(copies2[n].from == IR_REG_NONE); + if (IR_REG_SPILLED(dst) && ir_is_same_mem(ctx, input, ref)) { + /* avoid LOAD and STORE to the same memory */ + continue; + } + ir_emit_load(ctx, type, IR_REG_NUM(dst), input); + } + if (IR_REG_SPILLED(dst)) { + ir_emit_store(ctx, type, ref, IR_REG_NUM(dst)); + } + } + + ir_mem_free(copies); +} + +int ir_match(ir_ctx *ctx) +{ + uint32_t b; + ir_ref start, ref, *prev_ref; + ir_block *bb; + ir_insn *insn; + uint32_t entries_count = 0; + + ctx->rules = ir_mem_calloc(ctx->insns_count, sizeof(uint32_t)); + + prev_ref = ctx->prev_ref; + if (!prev_ref) { + ir_build_prev_refs(ctx); + prev_ref = ctx->prev_ref; + } + + if (ctx->entries_count) { + ctx->entries = ir_mem_malloc(ctx->entries_count * sizeof(ir_ref)); + } + + for (b = ctx->cfg_blocks_count, bb = ctx->cfg_blocks + b; b > 0; b--, bb--) { + IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); + start = bb->start; + if (UNEXPECTED(bb->flags & IR_BB_ENTRY)) { + IR_ASSERT(entries_count < ctx->entries_count); + insn = &ctx->ir_base[start]; + IR_ASSERT(insn->op == IR_ENTRY); + insn->op3 = entries_count; + ctx->entries[entries_count] = b; + entries_count++; + } + ctx->rules[start] = IR_SKIPPED | IR_NOP; + ref = bb->end; + if (bb->successors_count == 1) { + insn = &ctx->ir_base[ref]; + if (insn->op == IR_END || insn->op == IR_LOOP_END) { + ctx->rules[ref] = insn->op; + ref = prev_ref[ref]; + if (ref == start) { + if (EXPECTED(!(bb->flags & IR_BB_ENTRY))) { + bb->flags |= IR_BB_EMPTY; + } else if (ctx->flags & IR_MERGE_EMPTY_ENTRIES) { + bb->flags |= IR_BB_EMPTY; + if (ctx->cfg_edges[bb->successors] == b + 1) { + (bb + 1)->flags |= IR_BB_PREV_EMPTY_ENTRY; + } + } + continue; + } + } + } + + ctx->bb_start = start; /* bb_start is used by matcher to avoid fusion of insns from different blocks */ + + while (ref != start) { + uint32_t rule = ctx->rules[ref]; + + if (!rule) { + ctx->rules[ref] = rule = ir_match_insn(ctx, ref); + } + ir_match_insn2(ctx, ref, rule); + ref = prev_ref[ref]; + } + } + + if (ctx->entries_count) { + ctx->entries_count = entries_count; + if (!entries_count) { + ir_mem_free(ctx->entries); + ctx->entries = NULL; + } + } + + return 1; +} + +int32_t ir_get_spill_slot_offset(ir_ctx *ctx, ir_ref ref) +{ + int32_t offset; + + IR_ASSERT(ref >= 0 && ctx->vregs[ref] && ctx->live_intervals[ctx->vregs[ref]]); + offset = ctx->live_intervals[ctx->vregs[ref]]->stack_spill_pos; + IR_ASSERT(offset != -1); + return IR_SPILL_POS_TO_OFFSET(offset); +} diff --git a/ext/opcache/jit/ir/ir_fold.h b/ext/opcache/jit/ir/ir_fold.h new file mode 100644 index 0000000000000..d6053286508c0 --- /dev/null +++ b/ext/opcache/jit/ir/ir_fold.h @@ -0,0 +1,2129 @@ +/* + * IR - Lightweight JIT Compilation Framework + * (Folding engine rules) + * Copyright (C) 2022 Zend by Perforce. + * Authors: Dmitry Stogov + * + * Based on Mike Pall's implementation for LuaJIT. + */ + +/* Constant Folding */ +IR_FOLD(EQ(C_BOOL, C_BOOL)) +IR_FOLD(EQ(C_U8, C_U8)) +IR_FOLD(EQ(C_U16, C_U16)) +IR_FOLD(EQ(C_U32, C_U32)) +IR_FOLD(EQ(C_U64, C_U64)) +IR_FOLD(EQ(C_ADDR, C_ADDR)) +IR_FOLD(EQ(C_CHAR, C_CHAR)) +IR_FOLD(EQ(C_I8, C_I8)) +IR_FOLD(EQ(C_I16, C_I16)) +IR_FOLD(EQ(C_I32, C_I32)) +IR_FOLD(EQ(C_I64, C_I64)) +{ + IR_FOLD_BOOL(op1_insn->val.u64 == op2_insn->val.u64); +} + +IR_FOLD(EQ(C_DOUBLE, C_DOUBLE)) +{ + IR_FOLD_BOOL(op1_insn->val.d == op2_insn->val.d); +} + +IR_FOLD(EQ(C_FLOAT, C_FLOAT)) +{ + IR_FOLD_BOOL(op1_insn->val.d == op2_insn->val.d); +} + +IR_FOLD(NE(C_BOOL, C_BOOL)) +IR_FOLD(NE(C_U8, C_U8)) +IR_FOLD(NE(C_U16, C_U16)) +IR_FOLD(NE(C_U32, C_U32)) +IR_FOLD(NE(C_U64, C_U64)) +IR_FOLD(NE(C_ADDR, C_ADDR)) +IR_FOLD(NE(C_CHAR, C_CHAR)) +IR_FOLD(NE(C_I8, C_I8)) +IR_FOLD(NE(C_I16, C_I16)) +IR_FOLD(NE(C_I32, C_I32)) +IR_FOLD(NE(C_I64, C_I64)) +{ + IR_FOLD_BOOL(op1_insn->val.u64 != op2_insn->val.u64); +} + +IR_FOLD(NE(C_DOUBLE, C_DOUBLE)) +{ + IR_FOLD_BOOL(op1_insn->val.d != op2_insn->val.d); +} + +IR_FOLD(NE(C_FLOAT, C_FLOAT)) +{ + IR_FOLD_BOOL(op1_insn->val.f != op2_insn->val.f); +} + +IR_FOLD(LT(C_BOOL, C_BOOL)) +IR_FOLD(LT(C_U8, C_U8)) +IR_FOLD(LT(C_U16, C_U16)) +IR_FOLD(LT(C_U32, C_U32)) +IR_FOLD(LT(C_U64, C_U64)) +IR_FOLD(LT(C_ADDR, C_ADDR)) +{ + IR_FOLD_BOOL(op1_insn->val.u64 < op2_insn->val.u64); +} + +IR_FOLD(LT(C_CHAR, C_CHAR)) +IR_FOLD(LT(C_I8, C_I8)) +IR_FOLD(LT(C_I16, C_I16)) +IR_FOLD(LT(C_I32, C_I32)) +IR_FOLD(LT(C_I64, C_I64)) +{ + IR_FOLD_BOOL(op1_insn->val.i64 < op2_insn->val.i64); +} + +IR_FOLD(LT(C_DOUBLE, C_DOUBLE)) +{ + IR_FOLD_BOOL(op1_insn->val.d < op2_insn->val.d); +} + +IR_FOLD(LT(C_FLOAT, C_FLOAT)) +{ + IR_FOLD_BOOL(op1_insn->val.f < op2_insn->val.f); +} + +IR_FOLD(GE(C_BOOL, C_BOOL)) +IR_FOLD(GE(C_U8, C_U8)) +IR_FOLD(GE(C_U16, C_U16)) +IR_FOLD(GE(C_U32, C_U32)) +IR_FOLD(GE(C_U64, C_U64)) +IR_FOLD(GE(C_ADDR, C_ADDR)) +{ + IR_FOLD_BOOL(op1_insn->val.u64 >= op2_insn->val.u64); +} + +IR_FOLD(GE(C_CHAR, C_CHAR)) +IR_FOLD(GE(C_I8, C_I8)) +IR_FOLD(GE(C_I16, C_I16)) +IR_FOLD(GE(C_I32, C_I32)) +IR_FOLD(GE(C_I64, C_I64)) +{ + IR_FOLD_BOOL(op1_insn->val.i64 >= op2_insn->val.i64); +} + +IR_FOLD(GE(C_DOUBLE, C_DOUBLE)) +{ + IR_FOLD_BOOL(op1_insn->val.d >= op2_insn->val.d); +} + +IR_FOLD(GE(C_FLOAT, C_FLOAT)) +{ + IR_FOLD_BOOL(op1_insn->val.f >= op2_insn->val.f); +} + +IR_FOLD(LE(C_BOOL, C_BOOL)) +IR_FOLD(LE(C_U8, C_U8)) +IR_FOLD(LE(C_U16, C_U16)) +IR_FOLD(LE(C_U32, C_U32)) +IR_FOLD(LE(C_U64, C_U64)) +IR_FOLD(LE(C_ADDR, C_ADDR)) +{ + IR_FOLD_BOOL(op1_insn->val.u64 <= op2_insn->val.u64); +} + +IR_FOLD(LE(C_CHAR, C_CHAR)) +IR_FOLD(LE(C_I8, C_I8)) +IR_FOLD(LE(C_I16, C_I16)) +IR_FOLD(LE(C_I32, C_I32)) +IR_FOLD(LE(C_I64, C_I64)) +{ + IR_FOLD_BOOL(op1_insn->val.i64 <= op2_insn->val.i64); +} + +IR_FOLD(LE(C_DOUBLE, C_DOUBLE)) +{ + IR_FOLD_BOOL(op1_insn->val.d <= op2_insn->val.d); +} + +IR_FOLD(LE(C_FLOAT, C_FLOAT)) +{ + IR_FOLD_BOOL(op1_insn->val.f <= op2_insn->val.f); +} + +IR_FOLD(GT(C_BOOL, C_BOOL)) +IR_FOLD(GT(C_U8, C_U8)) +IR_FOLD(GT(C_U16, C_U16)) +IR_FOLD(GT(C_U32, C_U32)) +IR_FOLD(GT(C_U64, C_U64)) +IR_FOLD(GT(C_ADDR, C_ADDR)) +{ + IR_FOLD_BOOL(op1_insn->val.u64 > op2_insn->val.u64); +} + +IR_FOLD(GT(C_CHAR, C_CHAR)) +IR_FOLD(GT(C_I8, C_I8)) +IR_FOLD(GT(C_I16, C_I16)) +IR_FOLD(GT(C_I32, C_I32)) +IR_FOLD(GT(C_I64, C_I64)) +{ + IR_FOLD_BOOL(op1_insn->val.i64 > op2_insn->val.i64); +} + +IR_FOLD(GT(C_DOUBLE, C_DOUBLE)) +{ + IR_FOLD_BOOL(op1_insn->val.d > op2_insn->val.d); +} + +IR_FOLD(GT(C_FLOAT, C_FLOAT)) +{ + IR_FOLD_BOOL(op1_insn->val.f > op2_insn->val.f); +} + +IR_FOLD(ULT(C_BOOL, C_BOOL)) +IR_FOLD(ULT(C_U8, C_U8)) +IR_FOLD(ULT(C_U16, C_U16)) +IR_FOLD(ULT(C_U32, C_U32)) +IR_FOLD(ULT(C_U64, C_U64)) +IR_FOLD(ULT(C_ADDR, C_ADDR)) +IR_FOLD(ULT(C_CHAR, C_CHAR)) +IR_FOLD(ULT(C_I8, C_I8)) +IR_FOLD(ULT(C_I16, C_I16)) +IR_FOLD(ULT(C_I32, C_I32)) +IR_FOLD(ULT(C_I64, C_I64)) +{ + IR_FOLD_BOOL(op1_insn->val.u64 < op2_insn->val.u64); +} + +IR_FOLD(ULT(C_DOUBLE, C_DOUBLE)) +{ + IR_FOLD_BOOL(!(op1_insn->val.d >= op2_insn->val.d)); +} + +IR_FOLD(ULT(C_FLOAT, C_FLOAT)) +{ + IR_FOLD_BOOL(!(op1_insn->val.f >= op2_insn->val.f)); +} + +IR_FOLD(UGE(C_BOOL, C_BOOL)) +IR_FOLD(UGE(C_U8, C_U8)) +IR_FOLD(UGE(C_U16, C_U16)) +IR_FOLD(UGE(C_U32, C_U32)) +IR_FOLD(UGE(C_U64, C_U64)) +IR_FOLD(UGE(C_ADDR, C_ADDR)) +IR_FOLD(UGE(C_CHAR, C_CHAR)) +IR_FOLD(UGE(C_I8, C_I8)) +IR_FOLD(UGE(C_I16, C_I16)) +IR_FOLD(UGE(C_I32, C_I32)) +IR_FOLD(UGE(C_I64, C_I64)) +{ + IR_FOLD_BOOL(op1_insn->val.u64 >= op2_insn->val.u64); +} + +IR_FOLD(UGE(C_DOUBLE, C_DOUBLE)) +{ + IR_FOLD_BOOL(!(op1_insn->val.d < op2_insn->val.d)); +} + +IR_FOLD(UGE(C_FLOAT, C_FLOAT)) +{ + IR_FOLD_BOOL(!(op1_insn->val.f < op2_insn->val.f)); +} + +IR_FOLD(ULE(C_BOOL, C_BOOL)) +IR_FOLD(ULE(C_U8, C_U8)) +IR_FOLD(ULE(C_U16, C_U16)) +IR_FOLD(ULE(C_U32, C_U32)) +IR_FOLD(ULE(C_U64, C_U64)) +IR_FOLD(ULE(C_ADDR, C_ADDR)) +IR_FOLD(ULE(C_CHAR, C_CHAR)) +IR_FOLD(ULE(C_I8, C_I8)) +IR_FOLD(ULE(C_I16, C_I16)) +IR_FOLD(ULE(C_I32, C_I32)) +IR_FOLD(ULE(C_I64, C_I64)) +{ + IR_FOLD_BOOL(op1_insn->val.u64 <= op2_insn->val.u64); +} + +IR_FOLD(ULE(C_DOUBLE, C_DOUBLE)) +{ + IR_FOLD_BOOL(!(op1_insn->val.d > op2_insn->val.d)); +} + +IR_FOLD(ULE(C_FLOAT, C_FLOAT)) +{ + IR_FOLD_BOOL(!(op1_insn->val.f > op2_insn->val.f)); +} + +IR_FOLD(UGT(C_BOOL, C_BOOL)) +IR_FOLD(UGT(C_U8, C_U8)) +IR_FOLD(UGT(C_U16, C_U16)) +IR_FOLD(UGT(C_U32, C_U32)) +IR_FOLD(UGT(C_U64, C_U64)) +IR_FOLD(UGT(C_ADDR, C_ADDR)) +IR_FOLD(UGT(C_CHAR, C_CHAR)) +IR_FOLD(UGT(C_I8, C_I8)) +IR_FOLD(UGT(C_I16, C_I16)) +IR_FOLD(UGT(C_I32, C_I32)) +IR_FOLD(UGT(C_I64, C_I64)) +{ + IR_FOLD_BOOL(op1_insn->val.u64 > op2_insn->val.u64); +} + +IR_FOLD(UGT(C_DOUBLE, C_DOUBLE)) +{ + IR_FOLD_BOOL(!(op1_insn->val.d <= op2_insn->val.d)); +} + +IR_FOLD(UGT(C_FLOAT, C_FLOAT)) +{ + IR_FOLD_BOOL(!(op1_insn->val.f <= op2_insn->val.f)); +} + +IR_FOLD(ADD(C_U8, C_U8)) +IR_FOLD(ADD(C_U16, C_U16)) +IR_FOLD(ADD(C_U32, C_U32)) +IR_FOLD(ADD(C_U64, C_U64)) +IR_FOLD(ADD(C_ADDR, C_ADDR)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_U(op1_insn->val.u64 + op2_insn->val.u64); +} + +IR_FOLD(ADD(C_I8, C_I8)) +IR_FOLD(ADD(C_I16, C_I16)) +IR_FOLD(ADD(C_I32, C_I32)) +IR_FOLD(ADD(C_I64, C_I64)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_I(op1_insn->val.i64 + op2_insn->val.i64); +} + +IR_FOLD(ADD(C_DOUBLE, C_DOUBLE)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_D(op1_insn->val.d + op2_insn->val.d); +} + +IR_FOLD(ADD(C_FLOAT, C_FLOAT)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_F(op1_insn->val.f + op2_insn->val.f); +} + +IR_FOLD(SUB(C_U8, C_U8)) +IR_FOLD(SUB(C_U16, C_U16)) +IR_FOLD(SUB(C_U32, C_U32)) +IR_FOLD(SUB(C_U64, C_U64)) +IR_FOLD(SUB(C_ADDR, C_ADDR)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_U(op1_insn->val.u64 - op2_insn->val.u64); +} + +IR_FOLD(SUB(C_I8, C_I8)) +IR_FOLD(SUB(C_I16, C_I16)) +IR_FOLD(SUB(C_I32, C_I32)) +IR_FOLD(SUB(C_I64, C_I64)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_I(op1_insn->val.i64 - op2_insn->val.i64); +} + +IR_FOLD(SUB(C_DOUBLE, C_DOUBLE)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_D(op1_insn->val.d - op2_insn->val.d); +} + +IR_FOLD(SUB(C_FLOAT, C_FLOAT)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_F(op1_insn->val.f - op2_insn->val.f); +} + +IR_FOLD(MUL(C_U8, C_U8)) +IR_FOLD(MUL(C_U16, C_U16)) +IR_FOLD(MUL(C_U32, C_U32)) +IR_FOLD(MUL(C_U64, C_U64)) +IR_FOLD(MUL(C_ADDR, C_ADDR)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_U(op1_insn->val.u64 * op2_insn->val.u64); +} + +IR_FOLD(MUL(C_I8, C_I8)) +IR_FOLD(MUL(C_I16, C_I16)) +IR_FOLD(MUL(C_I32, C_I32)) +IR_FOLD(MUL(C_I64, C_I64)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_I(op1_insn->val.i64 * op2_insn->val.i64); +} + +IR_FOLD(MUL(C_DOUBLE, C_DOUBLE)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_D(op1_insn->val.d * op2_insn->val.d); +} + +IR_FOLD(MUL(C_FLOAT, C_FLOAT)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_F(op1_insn->val.f * op2_insn->val.f); +} + +IR_FOLD(DIV(C_U8, C_U8)) +IR_FOLD(DIV(C_U16, C_U16)) +IR_FOLD(DIV(C_U32, C_U32)) +IR_FOLD(DIV(C_U64, C_U64)) +IR_FOLD(DIV(C_ADDR, C_ADDR)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + if (op2_insn->val.u64 == 0) { + /* division by zero */ + IR_FOLD_EMIT; + } + IR_FOLD_CONST_U(op1_insn->val.u64 / op2_insn->val.u64); +} + +IR_FOLD(DIV(C_I8, C_I8)) +IR_FOLD(DIV(C_I16, C_I16)) +IR_FOLD(DIV(C_I32, C_I32)) +IR_FOLD(DIV(C_I64, C_I64)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + if (op2_insn->val.i64 == 0) { + /* division by zero */ + IR_FOLD_EMIT; + } + IR_FOLD_CONST_I(op1_insn->val.i64 / op2_insn->val.i64); +} + +IR_FOLD(DIV(C_DOUBLE, C_DOUBLE)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_D(op1_insn->val.d / op2_insn->val.d); +} + +IR_FOLD(DIV(C_FLOAT, C_FLOAT)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_F(op1_insn->val.f / op2_insn->val.f); +} + +IR_FOLD(MOD(C_U8, C_U8)) +IR_FOLD(MOD(C_U16, C_U16)) +IR_FOLD(MOD(C_U32, C_U32)) +IR_FOLD(MOD(C_U64, C_U64)) +IR_FOLD(MOD(C_ADDR, C_ADDR)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + if (op2_insn->val.u64 == 0) { + /* division by zero */ + IR_FOLD_EMIT; + } + IR_FOLD_CONST_U(op1_insn->val.u64 % op2_insn->val.u64); +} + +IR_FOLD(MOD(C_I8, C_I8)) +IR_FOLD(MOD(C_I16, C_I16)) +IR_FOLD(MOD(C_I32, C_I32)) +IR_FOLD(MOD(C_I64, C_I64)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + if (op2_insn->val.i64 == 0) { + /* division by zero */ + IR_FOLD_EMIT; + } + IR_FOLD_CONST_I(op1_insn->val.i64 % op2_insn->val.i64); +} + +IR_FOLD(NEG(C_I8)) +IR_FOLD(NEG(C_I16)) +IR_FOLD(NEG(C_I32)) +IR_FOLD(NEG(C_I64)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_I(-op1_insn->val.i64); +} + +IR_FOLD(NEG(C_DOUBLE)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_D(-op1_insn->val.d); +} + +IR_FOLD(NEG(C_FLOAT)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_F(-op1_insn->val.f); +} + +IR_FOLD(ABS(C_I8)) +IR_FOLD(ABS(C_I16)) +IR_FOLD(ABS(C_I32)) +IR_FOLD(ABS(C_I64)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + if (op1_insn->val.i64 >= 0) { + IR_FOLD_COPY(op1); + } else { + IR_FOLD_CONST_I(-op1_insn->val.i64); + } +} + +IR_FOLD(ABS(C_DOUBLE)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_D(fabs(op1_insn->val.d)); +} + +IR_FOLD(ABS(C_FLOAT)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_F(fabsf(op1_insn->val.f)); +} + +IR_FOLD(ADD_OV(C_U8, C_U8)) +IR_FOLD(ADD_OV(C_U16, C_U16)) +IR_FOLD(ADD_OV(C_U32, C_U32)) +IR_FOLD(ADD_OV(C_U64, C_U64)) +{ + ir_type type = IR_OPT_TYPE(opt); + uint64_t max = ((uint64_t)0xffffffffffffffff) >> (64 - ir_type_size[type] * 8); + IR_ASSERT(type == op1_insn->type); + if (op1_insn->val.u64 > max - op2_insn->val.u64) { + IR_FOLD_NEXT; + } + IR_FOLD_CONST_U(op1_insn->val.u64 + op2_insn->val.u64); +} + +IR_FOLD(ADD_OV(C_I8, C_I8)) +IR_FOLD(ADD_OV(C_I16, C_I16)) +IR_FOLD(ADD_OV(C_I32, C_I32)) +IR_FOLD(ADD_OV(C_I64, C_I64)) +{ + ir_type type = IR_OPT_TYPE(opt); + int64_t max = ((uint64_t)0x7fffffffffffffff) >> (64 - ir_type_size[type] * 8); + int64_t min = - max - 1; + IR_ASSERT(type == op1_insn->type); + if ((op2_insn->val.i64 > 0 && op1_insn->val.i64 > max - op2_insn->val.i64) + || (op2_insn->val.i64 < 0 && op1_insn->val.i64 < min - op2_insn->val.i64)) { + IR_FOLD_NEXT; + } + IR_FOLD_CONST_I(op1_insn->val.i64 + op2_insn->val.i64); +} + +IR_FOLD(SUB_OV(C_U8, C_U8)) +IR_FOLD(SUB_OV(C_U16, C_U16)) +IR_FOLD(SUB_OV(C_U32, C_U32)) +IR_FOLD(SUB_OV(C_U64, C_U64)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + if (op2_insn->val.u64 > op1_insn->val.u64) { + IR_FOLD_NEXT; + } + IR_FOLD_CONST_U(op1_insn->val.u64 - op2_insn->val.u64); +} + +IR_FOLD(SUB_OV(C_I8, C_I8)) +IR_FOLD(SUB_OV(C_I16, C_I16)) +IR_FOLD(SUB_OV(C_I32, C_I32)) +IR_FOLD(SUB_OV(C_I64, C_I64)) +{ + ir_type type = IR_OPT_TYPE(opt); + int64_t max = ((uint64_t)0x7fffffffffffffff) >> (64 - ir_type_size[type] * 8); + int64_t min = - max - 1; + IR_ASSERT(type == op1_insn->type); + if ((op2_insn->val.i64 > 0 && op1_insn->val.i64 < min + op2_insn->val.i64) + || (op2_insn->val.i64 < 0 && op1_insn->val.i64 > max + op2_insn->val.i64)) { + IR_FOLD_NEXT; + } + IR_FOLD_CONST_I(op1_insn->val.i64 - op2_insn->val.i64); +} + +IR_FOLD(MUL_OV(C_U8, C_U8)) +IR_FOLD(MUL_OV(C_U16, C_U16)) +IR_FOLD(MUL_OV(C_U32, C_U32)) +IR_FOLD(MUL_OV(C_U64, C_U64)) +{ + ir_type type = IR_OPT_TYPE(opt); + uint64_t max = ((uint64_t)0xffffffffffffffff) >> (64 - ir_type_size[type] * 8); + uint64_t res; + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + res = op1_insn->val.u64 * op2_insn->val.u64; + if (op1_insn->val.u64 != 0 && res / op1_insn->val.u64 != op2_insn->val.u64 && res <= max) { + IR_FOLD_NEXT; + } + IR_FOLD_CONST_U(res); +} + +IR_FOLD(MUL_OV(C_I8, C_I8)) +IR_FOLD(MUL_OV(C_I16, C_I16)) +IR_FOLD(MUL_OV(C_I32, C_I32)) +IR_FOLD(MUL_OV(C_I64, C_I64)) +{ + ir_type type = IR_OPT_TYPE(opt); + int64_t max = ((uint64_t)0x7fffffffffffffff) >> (64 - ir_type_size[type] * 8); + int64_t min = - max - 1; + int64_t res; + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + res = op1_insn->val.i64 * op2_insn->val.i64; + if (op1_insn->val.i64 != 0 && res / op1_insn->val.i64 != op2_insn->val.i64 && res >= min && res <= max) { + IR_FOLD_NEXT; + } + IR_FOLD_CONST_U(res); +} + +IR_FOLD(OVERFLOW(_)) +{ + if (op1_insn->op != IR_ADD_OV && op1_insn->op != IR_SUB_OV && op1_insn->op != IR_MUL_OV) { + IR_FOLD_COPY(IR_FALSE); + } + IR_FOLD_NEXT; +} + +IR_FOLD(NOT(C_BOOL)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_BOOL(!op1_insn->val.u64); +} + +IR_FOLD(NOT(C_U8)) +IR_FOLD(NOT(C_CHAR)) +IR_FOLD(NOT(C_I8)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_U(~op1_insn->val.u8); +} + +IR_FOLD(NOT(C_U16)) +IR_FOLD(NOT(C_I16)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_U(~op1_insn->val.u16); +} + +IR_FOLD(NOT(C_U32)) +IR_FOLD(NOT(C_I32)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_U(~op1_insn->val.u32); +} + +IR_FOLD(NOT(C_U64)) +IR_FOLD(NOT(C_I64)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_U(~op1_insn->val.u64); +} + +IR_FOLD(OR(C_BOOL, C_BOOL)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_BOOL(op1_insn->val.b || op2_insn->val.b); +} + +IR_FOLD(OR(C_U8, C_U8)) +IR_FOLD(OR(C_CHAR, C_CHAR)) +IR_FOLD(OR(C_I8, C_I8)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_U(op1_insn->val.u8 | op2_insn->val.u8); +} + +IR_FOLD(OR(C_U16, C_U16)) +IR_FOLD(OR(C_I16, C_I16)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_U(op1_insn->val.u16 | op2_insn->val.u16); +} + +IR_FOLD(OR(C_U32, C_U32)) +IR_FOLD(OR(C_I32, C_I32)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_U(op1_insn->val.u32 | op2_insn->val.u32); +} + +IR_FOLD(OR(C_U64, C_U64)) +IR_FOLD(OR(C_I64, C_I64)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_U(op1_insn->val.u64 | op2_insn->val.u64); +} + +IR_FOLD(AND(C_BOOL, C_BOOL)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_BOOL(op1_insn->val.b && op2_insn->val.b); +} + +IR_FOLD(AND(C_U8, C_U8)) +IR_FOLD(AND(C_CHAR, C_CHAR)) +IR_FOLD(AND(C_I8, C_I8)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_U(op1_insn->val.u8 & op2_insn->val.u8); +} + +IR_FOLD(AND(C_U16, C_U16)) +IR_FOLD(AND(C_I16, C_I16)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_U(op1_insn->val.u16 & op2_insn->val.u16); +} + +IR_FOLD(AND(C_U32, C_U32)) +IR_FOLD(AND(C_I32, C_I32)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_U(op1_insn->val.u32 & op2_insn->val.u32); +} + +IR_FOLD(AND(C_U64, C_U64)) +IR_FOLD(AND(C_I64, C_I64)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_U(op1_insn->val.u64 & op2_insn->val.u64); +} + +IR_FOLD(XOR(C_BOOL, C_BOOL)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_BOOL(op1_insn->val.b != op2_insn->val.b); +} + +IR_FOLD(XOR(C_U8, C_U8)) +IR_FOLD(XOR(C_CHAR, C_CHAR)) +IR_FOLD(XOR(C_I8, C_I8)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_U(op1_insn->val.u8 ^ op2_insn->val.u8); +} + +IR_FOLD(XOR(C_U16, C_U16)) +IR_FOLD(XOR(C_I16, C_I16)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_U(op1_insn->val.u16 ^ op2_insn->val.u16); +} + +IR_FOLD(XOR(C_U32, C_U32)) +IR_FOLD(XOR(C_I32, C_I32)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_U(op1_insn->val.u32 ^ op2_insn->val.u32); +} + +IR_FOLD(XOR(C_U64, C_U64)) +IR_FOLD(XOR(C_I64, C_I64)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_U(op1_insn->val.u64 ^ op2_insn->val.u64); +} + +IR_FOLD(SHL(C_U8, C_U8)) +IR_FOLD(SHL(C_CHAR, C_CHAR)) +IR_FOLD(SHL(C_I8, C_I8)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_U(op1_insn->val.u8 << op2_insn->val.u8); +} + +IR_FOLD(SHL(C_U16, C_U16)) +IR_FOLD(SHL(C_I16, C_I16)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_U(op1_insn->val.u16 << op2_insn->val.u16); +} + +IR_FOLD(SHL(C_U32, C_U32)) +IR_FOLD(SHL(C_I32, C_I32)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_U(op1_insn->val.u32 << op2_insn->val.u32); +} + +IR_FOLD(SHL(C_U64, C_U64)) +IR_FOLD(SHL(C_I64, C_I64)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_U(op1_insn->val.u64 << op2_insn->val.u64); +} + +IR_FOLD(SHR(C_U8, C_U8)) +IR_FOLD(SHR(C_CHAR, C_CHAR)) +IR_FOLD(SHR(C_I8, C_I8)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_U(op1_insn->val.u8 >> op2_insn->val.u8); +} + +IR_FOLD(SHR(C_U16, C_U16)) +IR_FOLD(SHR(C_I16, C_I16)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_U(op1_insn->val.u16 >> op2_insn->val.u16); +} + +IR_FOLD(SHR(C_U32, C_U32)) +IR_FOLD(SHR(C_I32, C_I32)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_U(op1_insn->val.u32 >> op2_insn->val.u32); +} + +IR_FOLD(SHR(C_U64, C_U64)) +IR_FOLD(SHR(C_I64, C_I64)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_U(op1_insn->val.u64 >> op2_insn->val.u64); +} + +IR_FOLD(SAR(C_U8, C_U8)) +IR_FOLD(SAR(C_CHAR, C_CHAR)) +IR_FOLD(SAR(C_I8, C_I8)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_I(op1_insn->val.i8 >> op2_insn->val.i8); +} + +IR_FOLD(SAR(C_U16, C_U16)) +IR_FOLD(SAR(C_I16, C_I16)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_I(op1_insn->val.i16 >> op2_insn->val.i16); +} + +IR_FOLD(SAR(C_U32, C_U32)) +IR_FOLD(SAR(C_I32, C_I32)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_I(op1_insn->val.i32 >> op2_insn->val.i32); +} + +IR_FOLD(SAR(C_U64, C_U64)) +IR_FOLD(SAR(C_I64, C_I64)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_I(op1_insn->val.i64 >> op2_insn->val.i64); +} + +IR_FOLD(ROL(C_U8, C_U8)) +IR_FOLD(ROL(C_CHAR, C_CHAR)) +IR_FOLD(ROL(C_I8, C_I8)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_U(ir_rol8(op1_insn->val.u8, op2_insn->val.u8)); +} + +IR_FOLD(ROL(C_U16, C_U16)) +IR_FOLD(ROL(C_I16, C_I16)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_U(ir_rol16(op1_insn->val.u16, op2_insn->val.u16)); +} + +IR_FOLD(ROL(C_U32, C_U32)) +IR_FOLD(ROL(C_I32, C_I32)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_U(ir_rol32(op1_insn->val.u32, op2_insn->val.u32)); +} + +IR_FOLD(ROL(C_U64, C_U64)) +IR_FOLD(ROL(C_I64, C_I64)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_U(ir_rol64(op1_insn->val.u64, op2_insn->val.u64)); +} + +IR_FOLD(ROR(C_U8, C_U8)) +IR_FOLD(ROR(C_CHAR, C_CHAR)) +IR_FOLD(ROR(C_I8, C_I8)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_U(ir_ror8(op1_insn->val.u8, op2_insn->val.u8)); +} + +IR_FOLD(ROR(C_U16, C_U16)) +IR_FOLD(ROR(C_I16, C_I16)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_U(ir_ror16(op1_insn->val.u16, op2_insn->val.u16)); +} + +IR_FOLD(ROR(C_U32, C_U32)) +IR_FOLD(ROR(C_I32, C_I32)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_U(ir_ror32(op1_insn->val.u32, op2_insn->val.u32)); +} + +IR_FOLD(ROR(C_U64, C_U64)) +IR_FOLD(ROR(C_I64, C_I64)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + IR_FOLD_CONST_U(ir_ror64(op1_insn->val.u64, op2_insn->val.u64)); +} + +//IR_FOLD(BSWAP(CONST)) +//TODO: bswap + +IR_FOLD(MIN(C_BOOL, C_BOOL)) +IR_FOLD(MIN(C_U8, C_U8)) +IR_FOLD(MIN(C_U16, C_U16)) +IR_FOLD(MIN(C_U32, C_U32)) +IR_FOLD(MIN(C_U64, C_U64)) +IR_FOLD(MIN(C_ADDR, C_ADDR)) +{ + IR_FOLD_COPY(op1_insn->val.u64 <= op2_insn->val.u64 ? op1 : op2); +} + +IR_FOLD(MIN(C_CHAR, C_CHAR)) +IR_FOLD(MIN(C_I8, C_U8)) +IR_FOLD(MIN(C_I16, C_U16)) +IR_FOLD(MIN(C_I32, C_U32)) +IR_FOLD(MIN(C_I64, C_U64)) +{ + IR_FOLD_COPY(op1_insn->val.i64 <= op2_insn->val.i64 ? op1 : op2); +} + +IR_FOLD(MIN(C_DOUBLE, C_DOUBLE)) +{ + IR_FOLD_COPY(op1_insn->val.d <= op2_insn->val.d ? op1 : op2); +} + +IR_FOLD(MIN(C_FLOAT, C_FLOAT)) +{ + IR_FOLD_COPY(op1_insn->val.f <= op2_insn->val.f ? op1 : op2); +} + +IR_FOLD(MAX(C_BOOL, C_BOOL)) +IR_FOLD(MAX(C_U8, C_U8)) +IR_FOLD(MAX(C_U16, C_U16)) +IR_FOLD(MAX(C_U32, C_U32)) +IR_FOLD(MAX(C_U64, C_U64)) +IR_FOLD(MAX(C_ADDR, C_ADDR)) +{ + IR_FOLD_COPY(op1_insn->val.u64 >= op2_insn->val.u64 ? op1 : op2); +} + +IR_FOLD(MAX(C_CHAR, C_CHAR)) +IR_FOLD(MAX(C_I8, C_U8)) +IR_FOLD(MAX(C_I16, C_U16)) +IR_FOLD(MAX(C_I32, C_U32)) +IR_FOLD(MAX(C_I64, C_U64)) +{ + IR_FOLD_COPY(op1_insn->val.i64 >= op2_insn->val.i64 ? op1 : op2); +} + +IR_FOLD(MAX(C_DOUBLE, C_DOUBLE)) +{ + IR_FOLD_COPY(op1_insn->val.d >= op2_insn->val.d ? op1 : op2); +} + +IR_FOLD(MAX(C_FLOAT, C_FLOAT)) +{ + IR_FOLD_COPY(op1_insn->val.f >= op2_insn->val.f ? op1 : op2); +} + +IR_FOLD(SEXT(C_I8)) +IR_FOLD(SEXT(C_U8)) +IR_FOLD(SEXT(C_BOOL)) +{ + IR_ASSERT(IR_IS_TYPE_INT(IR_OPT_TYPE(opt))); + IR_ASSERT(ir_type_size[IR_OPT_TYPE(opt)] > ir_type_size[op1_insn->type]); + IR_FOLD_CONST_I((int64_t)op1_insn->val.i8); +} + +IR_FOLD(SEXT(C_I16)) +IR_FOLD(SEXT(C_U16)) +{ + IR_ASSERT(IR_IS_TYPE_INT(IR_OPT_TYPE(opt))); + IR_ASSERT(ir_type_size[IR_OPT_TYPE(opt)] > ir_type_size[op1_insn->type]); + IR_FOLD_CONST_I((int64_t)op1_insn->val.i16); +} + +IR_FOLD(SEXT(C_I32)) +IR_FOLD(SEXT(C_U32)) +{ + IR_ASSERT(IR_IS_TYPE_INT(IR_OPT_TYPE(opt))); + IR_ASSERT(ir_type_size[IR_OPT_TYPE(opt)] > ir_type_size[op1_insn->type]); + IR_FOLD_CONST_I((int64_t)op1_insn->val.i32); +} + +IR_FOLD(ZEXT(C_I8)) +IR_FOLD(ZEXT(C_U8)) +IR_FOLD(ZEXT(C_BOOL)) +{ + IR_ASSERT(IR_IS_TYPE_INT(IR_OPT_TYPE(opt))); + IR_ASSERT(ir_type_size[IR_OPT_TYPE(opt)] > ir_type_size[op1_insn->type]); + IR_FOLD_CONST_U((uint64_t)op1_insn->val.u8); +} + +IR_FOLD(ZEXT(C_I16)) +IR_FOLD(ZEXT(C_U16)) +{ + IR_ASSERT(IR_IS_TYPE_INT(IR_OPT_TYPE(opt))); + IR_ASSERT(ir_type_size[IR_OPT_TYPE(opt)] > ir_type_size[op1_insn->type]); + IR_FOLD_CONST_U((uint64_t)op1_insn->val.u16); +} + +IR_FOLD(ZEXT(C_I32)) +IR_FOLD(ZEXT(C_U32)) +{ + IR_ASSERT(IR_IS_TYPE_INT(IR_OPT_TYPE(opt))); + IR_ASSERT(ir_type_size[IR_OPT_TYPE(opt)] > ir_type_size[op1_insn->type]); + IR_FOLD_CONST_U((uint64_t)op1_insn->val.u32); +} + +IR_FOLD(TRUNC(C_I16)) +IR_FOLD(TRUNC(C_I32)) +IR_FOLD(TRUNC(C_I64)) +IR_FOLD(TRUNC(C_U16)) +IR_FOLD(TRUNC(C_U32)) +IR_FOLD(TRUNC(C_U64)) +{ + IR_ASSERT(IR_IS_TYPE_INT(IR_OPT_TYPE(opt))); + IR_ASSERT(ir_type_size[IR_OPT_TYPE(opt)] < ir_type_size[op1_insn->type]); + switch (IR_OPT_TYPE(opt)) { + default: + IR_ASSERT(0); + case IR_I8: + IR_FOLD_CONST_I(op1_insn->val.i8); + case IR_I16: + IR_FOLD_CONST_I(op1_insn->val.i16); + case IR_I32: + IR_FOLD_CONST_I(op1_insn->val.i32); + case IR_U8: + IR_FOLD_CONST_U(op1_insn->val.u8); + case IR_U16: + IR_FOLD_CONST_U(op1_insn->val.u16); + case IR_U32: + IR_FOLD_CONST_U(op1_insn->val.u32); + } +} + + +IR_FOLD(BITCAST(C_I8)) +IR_FOLD(BITCAST(C_I16)) +IR_FOLD(BITCAST(C_I32)) +IR_FOLD(BITCAST(C_I64)) +IR_FOLD(BITCAST(C_U8)) +IR_FOLD(BITCAST(C_U16)) +IR_FOLD(BITCAST(C_U32)) +IR_FOLD(BITCAST(C_U64)) +IR_FOLD(BITCAST(C_FLOAT)) +IR_FOLD(BITCAST(C_DOUBLE)) +IR_FOLD(BITCAST(C_BOOL)) +IR_FOLD(BITCAST(C_CHAR)) +IR_FOLD(BITCAST(C_ADDR)) +{ + IR_ASSERT(ir_type_size[IR_OPT_TYPE(opt)] == ir_type_size[op1_insn->type]); + switch (IR_OPT_TYPE(opt)) { + default: + IR_ASSERT(0); + case IR_I8: + IR_FOLD_CONST_I(op1_insn->val.i8); + case IR_I16: + IR_FOLD_CONST_I(op1_insn->val.i16); + case IR_I32: + IR_FOLD_CONST_I(op1_insn->val.i32); + case IR_I64: + IR_FOLD_CONST_I(op1_insn->val.i64); + case IR_U8: + IR_FOLD_CONST_U(op1_insn->val.u8); + case IR_U16: + IR_FOLD_CONST_U(op1_insn->val.u16); + case IR_U32: + IR_FOLD_CONST_U(op1_insn->val.u32); + case IR_U64: + IR_FOLD_CONST_U(op1_insn->val.u64); + case IR_FLOAT: + IR_FOLD_CONST_F(op1_insn->val.f); + case IR_DOUBLE: + IR_FOLD_CONST_D(op1_insn->val.d); + case IR_CHAR: + IR_FOLD_CONST_I(op1_insn->val.c); + case IR_ADDR: + IR_FOLD_CONST_U(op1_insn->val.addr); + } +} + +IR_FOLD(INT2FP(C_I8)) +IR_FOLD(INT2FP(C_I16)) +IR_FOLD(INT2FP(C_I32)) +IR_FOLD(INT2FP(C_I64)) +{ + if (IR_OPT_TYPE(opt) == IR_DOUBLE) { + IR_FOLD_CONST_D((double)op1_insn->val.i64); + } else { + IR_ASSERT(IR_OPT_TYPE(opt) == IR_FLOAT); + IR_FOLD_CONST_F((float)op1_insn->val.i64); + } +} + +IR_FOLD(INT2FP(C_U8)) +IR_FOLD(INT2FP(C_U16)) +IR_FOLD(INT2FP(C_U32)) +IR_FOLD(INT2FP(C_U64)) +{ + if (IR_OPT_TYPE(opt) == IR_DOUBLE) { + IR_FOLD_CONST_D((double)op1_insn->val.u64); + } else { + IR_ASSERT(IR_OPT_TYPE(opt) == IR_FLOAT); + IR_FOLD_CONST_F((float)op1_insn->val.u64); + } +} + +IR_FOLD(FP2INT(C_FLOAT)) +{ + IR_ASSERT(IR_IS_TYPE_INT(IR_OPT_TYPE(opt))); + switch (IR_OPT_TYPE(opt)) { + default: + IR_ASSERT(0); + case IR_I8: + IR_FOLD_CONST_I((int8_t)op1_insn->val.f); + case IR_I16: + IR_FOLD_CONST_I((int16_t)op1_insn->val.f); + case IR_I32: + IR_FOLD_CONST_I((int32_t)op1_insn->val.f); + case IR_I64: + IR_FOLD_CONST_I((int64_t)op1_insn->val.f); + case IR_U8: + IR_FOLD_CONST_U((uint8_t)op1_insn->val.f); + case IR_U16: + IR_FOLD_CONST_U((uint16_t)op1_insn->val.f); + case IR_U32: + IR_FOLD_CONST_U((uint32_t)op1_insn->val.f); + case IR_U64: + IR_FOLD_CONST_U((uint64_t)op1_insn->val.f); + } +} + +IR_FOLD(FP2INT(C_DOUBLE)) +{ + IR_ASSERT(IR_IS_TYPE_INT(IR_OPT_TYPE(opt))); + switch (IR_OPT_TYPE(opt)) { + default: + IR_ASSERT(0); + case IR_I8: + IR_FOLD_CONST_I((int8_t)op1_insn->val.d); + case IR_I16: + IR_FOLD_CONST_I((int16_t)op1_insn->val.d); + case IR_I32: + IR_FOLD_CONST_I((int32_t)op1_insn->val.d); + case IR_I64: + IR_FOLD_CONST_I((int64_t)op1_insn->val.d); + case IR_U8: + IR_FOLD_CONST_U((uint8_t)op1_insn->val.d); + case IR_U16: + IR_FOLD_CONST_U((uint16_t)op1_insn->val.d); + case IR_U32: + IR_FOLD_CONST_U((uint32_t)op1_insn->val.d); + case IR_U64: + IR_FOLD_CONST_U((uint64_t)op1_insn->val.d); + } +} + +IR_FOLD(FP2FP(C_FLOAT)) +{ + if (IR_OPT_TYPE(opt) == IR_DOUBLE) { + IR_FOLD_CONST_D((double)op1_insn->val.f); + } else { + IR_ASSERT(IR_OPT_TYPE(opt) == IR_FLOAT); + IR_FOLD_COPY(op1); + } +} + +IR_FOLD(FP2FP(C_DOUBLE)) +{ + if (IR_OPT_TYPE(opt) == IR_DOUBLE) { + IR_FOLD_COPY(op1); + } else { + IR_ASSERT(IR_OPT_TYPE(opt) == IR_FLOAT); + IR_FOLD_CONST_F((float)op1_insn->val.d); + } +} + +// TODO: constant functions (e.g. sin, cos) + +/* Copy Propagation */ +IR_FOLD(COPY(_)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + if (!op2) { + IR_FOLD_COPY(op1); + } + /* skip CSE */ + IR_FOLD_EMIT; +} + +IR_FOLD(PHI(_, _)) // TODO: PHI(_, _, _) +{ + if (op2 == op3 && op3 != IR_UNUSED) { + IR_FOLD_COPY(op2); + } + /* skip CSE */ + opt = opt | (3 << IR_OPT_INPUTS_SHIFT); + IR_FOLD_EMIT; +} + +IR_FOLD(COND(C_BOOL, _)) // TODO: COND(CONST, _, _) +IR_FOLD(COND(C_U8, _)) +IR_FOLD(COND(C_U16, _)) +IR_FOLD(COND(C_U32, _)) +IR_FOLD(COND(C_U64, _)) +IR_FOLD(COND(C_ADDR, _)) +IR_FOLD(COND(C_CHAR, _)) +IR_FOLD(COND(C_I8, _)) +IR_FOLD(COND(C_I16, _)) +IR_FOLD(COND(C_I32, _)) +IR_FOLD(COND(C_I64, _)) +IR_FOLD(COND(C_DOUBLE, _)) +IR_FOLD(COND(C_FLOAT, _)) +{ + if (ir_const_is_true(op1_insn)) { + IR_FOLD_COPY(op2); + } else { + IR_FOLD_COPY(op3); + } +} + +/* Algebraic simplifications */ +IR_FOLD(ABS(ABS)) +{ + /* abs(x = abs(y)) => x */ + IR_FOLD_COPY(op1); +} + +IR_FOLD(ABS(NEG)) +{ + /* abs(neg(y)) => abs(y) */ + op1 = op1_insn->op1; + IR_FOLD_RESTART; +} + +IR_FOLD(NEG(NEG)) +IR_FOLD(NOT(NOT)) +IR_FOLD(BSWAP(BSWAP)) +{ + /* f(f(y)) => y */ + IR_FOLD_COPY(op1_insn->op1); +} + +IR_FOLD(ADD(_, C_U8)) +IR_FOLD(ADD(_, C_U16)) +IR_FOLD(ADD(_, C_U32)) +IR_FOLD(ADD(_, C_U64)) +IR_FOLD(ADD(_, C_I8)) +IR_FOLD(ADD(_, C_I16)) +IR_FOLD(ADD(_, C_I32)) +IR_FOLD(ADD(_, C_I64)) +IR_FOLD(ADD(_, C_ADDR)) +IR_FOLD(SUB(_, C_U8)) +IR_FOLD(SUB(_, C_U16)) +IR_FOLD(SUB(_, C_U32)) +IR_FOLD(SUB(_, C_U64)) +IR_FOLD(SUB(_, C_I8)) +IR_FOLD(SUB(_, C_I16)) +IR_FOLD(SUB(_, C_I32)) +IR_FOLD(SUB(_, C_I64)) +IR_FOLD(SUB(_, C_ADDR)) +IR_FOLD(ADD_OV(_, C_U8)) +IR_FOLD(ADD_OV(_, C_U16)) +IR_FOLD(ADD_OV(_, C_U32)) +IR_FOLD(ADD_OV(_, C_U64)) +IR_FOLD(ADD_OV(_, C_I8)) +IR_FOLD(ADD_OV(_, C_I16)) +IR_FOLD(ADD_OV(_, C_I32)) +IR_FOLD(ADD_OV(_, C_I64)) +IR_FOLD(ADD_OV(_, C_ADDR)) +IR_FOLD(SUB_OV(_, C_U8)) +IR_FOLD(SUB_OV(_, C_U16)) +IR_FOLD(SUB_OV(_, C_U32)) +IR_FOLD(SUB_OV(_, C_U64)) +IR_FOLD(SUB_OV(_, C_I8)) +IR_FOLD(SUB_OV(_, C_I16)) +IR_FOLD(SUB_OV(_, C_I32)) +IR_FOLD(SUB_OV(_, C_I64)) +IR_FOLD(SUB_OV(_, C_ADDR)) +{ + if (op2_insn->val.u64 == 0) { + /* a +/- 0 => a */ + IR_FOLD_COPY(op1); + } + IR_FOLD_NEXT; +} + +IR_FOLD(SUB(C_I8, _)) +IR_FOLD(SUB(C_I16, _)) +IR_FOLD(SUB(C_I32, _)) +IR_FOLD(SUB(C_I64, _)) +{ + if (op1_insn->val.u64 == 0) { + /* 0 - a => -a (invalid for +0.0) */ + opt = IR_NEG | (opt & IR_OPT_TYPE_MASK); + op1 = op2; + op2 = IR_UNUSED; + IR_FOLD_RESTART; + } + IR_FOLD_NEXT; +} + +IR_FOLD(ADD(NEG, _)) +{ + /* (-a) + b => b - a */ + opt++; /* ADD -> SUB */ + op1 = op2; + op2 = op1_insn->op1; + IR_FOLD_RESTART; +} + +IR_FOLD(ADD(_, NEG)) +IR_FOLD(SUB(_,NEG)) +{ + /* a + (-b) => a - b */ + opt ^= 1; /* ADD <-> SUB */ + op2 = op2_insn->op2; + IR_FOLD_RESTART; +} + +IR_FOLD(ADD(SUB, _)) +{ + if (IR_IS_TYPE_INT(IR_OPT_TYPE(opt))) { + if (op1_insn->op2 == op2) { + /* (a - b) + b => a */ + IR_FOLD_COPY(op1_insn->op1); + } + } + IR_FOLD_NEXT; +} + +IR_FOLD(ADD(_, SUB)) +{ + if (IR_IS_TYPE_INT(IR_OPT_TYPE(opt))) { + if (op2_insn->op2 == op1) { + /* a + (b - a) => b */ + IR_FOLD_COPY(op2_insn->op1); + } + } + IR_FOLD_NEXT; +} + +IR_FOLD(SUB(ADD, _)) +{ + if (IR_IS_TYPE_INT(IR_OPT_TYPE(opt))) { + if (op1_insn->op1 == op2) { + /* (a + b) - a => b */ + IR_FOLD_COPY(op1_insn->op2); + } else if (op1_insn->op2 == op2) { + /* (a + b) - a => b */ + IR_FOLD_COPY(op1_insn->op1); + } + } + IR_FOLD_NEXT; +} + +IR_FOLD(SUB(_, ADD)) +{ + if (IR_IS_TYPE_INT(IR_OPT_TYPE(opt))) { + if (op2_insn->op1 == op1) { + /* a - (a + b) => -b */ + opt = IR_NEG | (opt & IR_OPT_TYPE_MASK); + op1 = op2_insn->op2; + op2 = IR_UNUSED; + IR_FOLD_RESTART; + } else if (op2_insn->op2 == op1) { + /* b - (a + b) => -a */ + opt = IR_NEG | (opt & IR_OPT_TYPE_MASK); + op1 = op2_insn->op1; + op2 = IR_UNUSED; + IR_FOLD_RESTART; + } + } + IR_FOLD_NEXT; +} + +IR_FOLD(SUB(SUB, _)) +{ + if (IR_IS_TYPE_INT(IR_OPT_TYPE(opt))) { + if (op1_insn->op1 == op2) { + /* (a - b) - a => -b */ + opt = IR_NEG | (opt & IR_OPT_TYPE_MASK); + op1 = op1_insn->op2; + op2 = IR_UNUSED; + IR_FOLD_RESTART; + } + } + IR_FOLD_NEXT; +} + +IR_FOLD(SUB(_, SUB)) +{ + if (IR_IS_TYPE_INT(IR_OPT_TYPE(opt))) { + if (op2_insn->op1 == op1) { + /* a - (a - b) => b */ + IR_FOLD_COPY(op2_insn->op2); + } + } + IR_FOLD_NEXT; +} + +IR_FOLD(SUB(ADD, ADD)) +{ + if (IR_IS_TYPE_INT(IR_OPT_TYPE(opt))) { + if (op1_insn->op1 == op2_insn->op1) { + /* (a + b) - (a + c) => b - c */ + op1 = op1_insn->op2; + op2 = op2_insn->op2; + IR_FOLD_RESTART; + } else if (op1_insn->op1 == op2_insn->op2) { + /* (a + b) - (c + a) => b - c */ + op1 = op1_insn->op2; + op2 = op2_insn->op1; + IR_FOLD_RESTART; + } else if (op1_insn->op2 == op2_insn->op1) { + /* (a + b) - (b + c) => a - c */ + op1 = op1_insn->op1; + op2 = op2_insn->op2; + IR_FOLD_RESTART; + } else if (op1_insn->op2 == op2_insn->op2) { + /* (a + b) - (c + b) => a - c */ + op1 = op1_insn->op1; + op2 = op2_insn->op1; + IR_FOLD_RESTART; + } + } + IR_FOLD_NEXT; +} + +// IR_FOLD(SUB(NEG, CONST)) TODO: -a - b => -b - a +// IR_FOLD(MUL(NEG, CONST)) TODO: -a * b => a * -b +// IR_FOLD(DIV(NEG, CONST)) TODO: -a / b => a / -b + +IR_FOLD(MUL(_, C_U8)) +IR_FOLD(MUL(_, C_U16)) +IR_FOLD(MUL(_, C_U32)) +IR_FOLD(MUL(_, C_U64)) +{ + if (op2_insn->val.u64 == 0) { + /* a * 0 => 0 */ + IR_FOLD_COPY(op2); + } else if (op2_insn->val.u64 == 1) { + IR_FOLD_COPY(op1); + } else if (op2_insn->val.u64 == 2) { + opt = IR_ADD | (opt & IR_OPT_TYPE_MASK); + op2 = op1; + IR_FOLD_RESTART; + } + IR_FOLD_NEXT; +} + +IR_FOLD(MUL(_, C_I8)) +IR_FOLD(MUL(_, C_I16)) +IR_FOLD(MUL(_, C_I32)) +IR_FOLD(MUL(_, C_I64)) +{ + if (op2_insn->val.i64 == 0) { + /* a * 0 => 0 */ + IR_FOLD_COPY(op2); + } else if (op2_insn->val.i64 == 1) { + /* a * 1 => a */ + IR_FOLD_COPY(op1); + } else if (op2_insn->val.i64 == 2) { + /* a * 2 => a + a */ + opt = IR_ADD | (opt & IR_OPT_TYPE_MASK); + op2 = op1; + IR_FOLD_RESTART; + } else if (op2_insn->val.i64 == -1) { + /* a * -1 => -a */ + opt = IR_NEG | (opt & IR_OPT_TYPE_MASK); + op2 = IR_UNUSED; + IR_FOLD_RESTART; + } + IR_FOLD_NEXT; +} + +IR_FOLD(MUL(_, C_DOUBLE)) +{ + if (op2_insn->val.d == 1.0) { + /* a * 1.0 => a */ + IR_FOLD_COPY(op1); + } else if (op2_insn->val.d == 2.0) { + /* a * 2.0 => a + a */ + opt = IR_ADD | (opt & IR_OPT_TYPE_MASK); + op2 = op1; + IR_FOLD_RESTART; + } else if (op2_insn->val.d == -1.0) { + /* a * -1.0 => -a */ + opt = IR_NEG | (opt & IR_OPT_TYPE_MASK); + op2 = IR_UNUSED; + IR_FOLD_RESTART; + } + IR_FOLD_NEXT; +} + +IR_FOLD(MUL(_, C_FLOAT)) +{ + if (op2_insn->val.f == 1.0) { + /* a * 1.0 => a */ + IR_FOLD_COPY(op1); + } else if (op2_insn->val.f == 2.0) { + /* a * 2.0 => a + a */ + opt = IR_ADD | (opt & IR_OPT_TYPE_MASK); + op2 = op1; + IR_FOLD_RESTART; + } else if (op2_insn->val.f == -1.0) { + /* a * -1.0 => -a */ + opt = IR_NEG | (opt & IR_OPT_TYPE_MASK); + op2 = IR_UNUSED; + IR_FOLD_RESTART; + } + IR_FOLD_NEXT; +} + +IR_FOLD(DIV(_, C_U8)) +IR_FOLD(DIV(_, C_U16)) +IR_FOLD(DIV(_, C_U32)) +IR_FOLD(DIV(_, C_U64)) +{ + if (op2_insn->val.u64 == 1) { + IR_FOLD_COPY(op1); + } + IR_FOLD_NEXT; +} + +IR_FOLD(DIV(_, C_I8)) +IR_FOLD(DIV(_, C_I16)) +IR_FOLD(DIV(_, C_I32)) +IR_FOLD(DIV(_, C_I64)) +{ + if (op2_insn->val.i64 == 1) { + /* a / 1 => a */ + IR_FOLD_COPY(op1); + } else if (op2_insn->val.i64 == -1) { + /* a / -1 => -a */ + opt = IR_NEG | (opt & IR_OPT_TYPE_MASK); + op2 = IR_UNUSED; + IR_FOLD_RESTART; + } + IR_FOLD_NEXT; +} + +IR_FOLD(DIV(_, C_DOUBLE)) +{ + if (op2_insn->val.d == 1.0) { + /* a / 1.0 => a */ + IR_FOLD_COPY(op1); + } else if (op2_insn->val.d == -1.0) { + /* a / -1.0 => -a */ + opt = IR_NEG | (opt & IR_OPT_TYPE_MASK); + op2 = IR_UNUSED; + IR_FOLD_RESTART; + } + IR_FOLD_NEXT; +} + +IR_FOLD(DIV(_, C_FLOAT)) +{ + if (op2_insn->val.f == 1.0) { + /* a / 1.0 => a */ + IR_FOLD_COPY(op1); + } else if (op2_insn->val.f == -1.0) { + /* a / -1.0 => -a */ + opt = IR_NEG | (opt & IR_OPT_TYPE_MASK); + op2 = IR_UNUSED; + IR_FOLD_RESTART; + } + IR_FOLD_NEXT; +} + +IR_FOLD(MUL(NEG, NEG)) +IR_FOLD(DIV(NEG, NEG)) +{ + op1 = op1_insn->op1; + op2 = op2_insn->op1; + IR_FOLD_RESTART; +} + +IR_FOLD(AND(_, C_BOOL)) +{ + IR_FOLD_COPY(op2_insn->val.b ? op1 : op2); +} + +IR_FOLD(AND(_, C_U8)) +IR_FOLD(AND(_, C_I8)) +IR_FOLD(AND(_, C_CHAR)) +{ + if (op2_insn->val.i8 == 0) { + /* a & 0 => 0 */ + IR_FOLD_COPY(op2); + } else if (op2_insn->val.i8 == -1) { + IR_FOLD_COPY(op1); + } + IR_FOLD_NEXT; +} + +IR_FOLD(AND(_, C_U16)) +IR_FOLD(AND(_, C_I16)) +{ + if (op2_insn->val.i16 == 0) { + /* a & 0 => 0 */ + IR_FOLD_COPY(op2); + } else if (op2_insn->val.i16 == -1) { + IR_FOLD_COPY(op1); + } + IR_FOLD_NEXT; +} + +IR_FOLD(AND(_, C_U32)) +IR_FOLD(AND(_, C_I32)) +{ + if (op2_insn->val.i32 == 0) { + /* a & 0 => 0 */ + IR_FOLD_COPY(op2); + } else if (op2_insn->val.i32 == -1) { + IR_FOLD_COPY(op1); + } + IR_FOLD_NEXT; +} + +IR_FOLD(AND(_, C_U64)) +IR_FOLD(AND(_, C_I64)) +{ + if (op2_insn->val.i64 == 0) { + /* a & 0 => 0 */ + IR_FOLD_COPY(op2); + } else if (op2_insn->val.i64 == -1) { + IR_FOLD_COPY(op1); + } + IR_FOLD_NEXT; +} + +IR_FOLD(OR(_, C_BOOL)) +{ + IR_FOLD_COPY(op2_insn->val.b ? op2 : op1); +} + +IR_FOLD(OR(_, C_U8)) +IR_FOLD(OR(_, C_I8)) +IR_FOLD(OR(_, C_CHAR)) +{ + if (op2_insn->val.i8 == -1) { + /* a | 1 => 1 */ + IR_FOLD_COPY(op2); + } else if (op2_insn->val.i8 == 0) { + IR_FOLD_COPY(op1); + } + IR_FOLD_NEXT; +} + +IR_FOLD(OR(_, C_U16)) +IR_FOLD(OR(_, C_I16)) +{ + if (op2_insn->val.i16 == -1) { + /* a | 1 => 1 */ + IR_FOLD_COPY(op2); + } else if (op2_insn->val.i16 == 0) { + IR_FOLD_COPY(op1); + } + IR_FOLD_NEXT; +} + +IR_FOLD(OR(_, C_U32)) +IR_FOLD(OR(_, C_I32)) +{ + if (op2_insn->val.i32 == -1) { + /* a | 1 => 1 */ + IR_FOLD_COPY(op2); + } else if (op2_insn->val.i32 == -0) { + IR_FOLD_COPY(op1); + } + IR_FOLD_NEXT; +} + +IR_FOLD(OR(_, C_U64)) +IR_FOLD(OR(_, C_I64)) +{ + if (op2_insn->val.i64 == -1) { + /* a | 1 => 1 */ + IR_FOLD_COPY(op2); + } else if (op2_insn->val.i64 == 0) { + IR_FOLD_COPY(op1); + } + IR_FOLD_NEXT; +} + +IR_FOLD(XOR(_, C_BOOL)) +{ + if (!op2_insn->val.b) { + /* a ^ 0 => a */ + IR_FOLD_COPY(op1); + } else { + /* a ^ 1 => !a */ + opt = IR_NOT | (opt & IR_OPT_TYPE_MASK); + op2 = IR_UNUSED; + IR_FOLD_RESTART; + } +} + +IR_FOLD(XOR(_, C_U8)) +IR_FOLD(XOR(_, C_I8)) +IR_FOLD(XOR(_, C_CHAR)) +{ + if (op2_insn->val.i8 == 0) { + /* a ^ 0 => a */ + IR_FOLD_COPY(op1); + } else if (op2_insn->val.i8 == -1) { + /* a ^ 1 => ~a */ + opt = IR_NOT | (opt & IR_OPT_TYPE_MASK); + op2 = IR_UNUSED; + IR_FOLD_RESTART; + } + IR_FOLD_NEXT; +} + +IR_FOLD(XOR(_, C_U16)) +IR_FOLD(XOR(_, C_I16)) +{ + if (op2_insn->val.i16 == 0) { + /* a ^ 0 => a */ + IR_FOLD_COPY(op1); + } else if (op2_insn->val.i16 == -1) { + /* a ^ 1 => ~a */ + opt = IR_NOT | (opt & IR_OPT_TYPE_MASK); + op2 = IR_UNUSED; + IR_FOLD_RESTART; + } + IR_FOLD_NEXT; +} + +IR_FOLD(XOR(_, C_U32)) +IR_FOLD(XOR(_, C_I32)) +{ + if (op2_insn->val.i32 == 0) { + /* a ^ 0 => a */ + IR_FOLD_COPY(op1); + } else if (op2_insn->val.i32 == -1) { + /* a ^ 1 => ~a */ + opt = IR_NOT | (opt & IR_OPT_TYPE_MASK); + op2 = IR_UNUSED; + IR_FOLD_RESTART; + } + IR_FOLD_NEXT; +} + +IR_FOLD(XOR(_, C_U64)) +IR_FOLD(XOR(_, C_I64)) +{ + if (op2_insn->val.i64 == 0) { + /* a ^ 0 => a */ + IR_FOLD_COPY(op1); + } else if (op2_insn->val.i64 == -1) { + /* a ^ 1 => ~a */ + opt = IR_NOT | (opt & IR_OPT_TYPE_MASK); + op2 = IR_UNUSED; + IR_FOLD_RESTART; + } + IR_FOLD_NEXT; +} + +IR_FOLD(SHL(_, C_U8)) +IR_FOLD(SHL(_, C_U16)) +IR_FOLD(SHL(_, C_U32)) +IR_FOLD(SHL(_, C_U64)) +IR_FOLD(SHL(_, C_I8)) +IR_FOLD(SHL(_, C_I16)) +IR_FOLD(SHL(_, C_I32)) +IR_FOLD(SHL(_, C_I64)) +{ + if (op2_insn->val.u64 == 0) { + /* a << 0 => a */ + IR_FOLD_COPY(op1); + } else if (op2_insn->val.u64 == 1) { + /* a << 1 => a + a */ + opt = IR_ADD | (opt & IR_OPT_TYPE_MASK); + op2 = op1; + IR_FOLD_RESTART; + } + IR_FOLD_NEXT; +} + +IR_FOLD(SHR(_, C_U8)) +IR_FOLD(SHR(_, C_U16)) +IR_FOLD(SHR(_, C_U32)) +IR_FOLD(SHR(_, C_U64)) +IR_FOLD(SHR(_, C_I8)) +IR_FOLD(SHR(_, C_I16)) +IR_FOLD(SHR(_, C_I32)) +IR_FOLD(SHR(_, C_I64)) +IR_FOLD(SAR(_, C_U8)) +IR_FOLD(SAR(_, C_U16)) +IR_FOLD(SAR(_, C_U32)) +IR_FOLD(SAR(_, C_U64)) +IR_FOLD(SAR(_, C_I8)) +IR_FOLD(SAR(_, C_I16)) +IR_FOLD(SAR(_, C_I32)) +IR_FOLD(SAR(_, C_I64)) +IR_FOLD(ROL(_, C_U8)) +IR_FOLD(ROL(_, C_U16)) +IR_FOLD(ROL(_, C_U32)) +IR_FOLD(ROL(_, C_U64)) +IR_FOLD(ROL(_, C_I8)) +IR_FOLD(ROL(_, C_I16)) +IR_FOLD(ROL(_, C_I32)) +IR_FOLD(ROL(_, C_I64)) +IR_FOLD(ROR(_, C_U8)) +IR_FOLD(ROR(_, C_U16)) +IR_FOLD(ROR(_, C_U32)) +IR_FOLD(ROR(_, C_U64)) +IR_FOLD(ROR(_, C_I8)) +IR_FOLD(ROR(_, C_I16)) +IR_FOLD(ROR(_, C_I32)) +IR_FOLD(ROR(_, C_I64)) +{ + if (op2_insn->val.u64 == 0) { + /* a >> 0 => a */ + IR_FOLD_COPY(op1); + } + IR_FOLD_NEXT; +} + +IR_FOLD(SHL(C_U8, _)) +IR_FOLD(SHL(C_U16, _)) +IR_FOLD(SHL(C_U32, _)) +IR_FOLD(SHL(C_U64, _)) +IR_FOLD(SHL(C_I8, _)) +IR_FOLD(SHL(C_I16, _)) +IR_FOLD(SHL(C_I32, _)) +IR_FOLD(SHL(C_I64, _)) +IR_FOLD(SHR(C_U8, _)) +IR_FOLD(SHR(C_U16, _)) +IR_FOLD(SHR(C_U32, _)) +IR_FOLD(SHR(C_U64, _)) +IR_FOLD(SHR(C_I8, _)) +IR_FOLD(SHR(C_I16, _)) +IR_FOLD(SHR(C_I32, _)) +IR_FOLD(SHR(C_I64, _)) +{ + if (op1_insn->val.u64 == 0) { + /* 0 << a => 0 */ + IR_FOLD_COPY(op1); + } + IR_FOLD_NEXT; +} + +IR_FOLD(SAR(C_U8, _)) +IR_FOLD(SAR(C_I8, _)) +IR_FOLD(ROL(C_U8, _)) +IR_FOLD(ROL(C_I8, _)) +IR_FOLD(ROR(C_U8, _)) +IR_FOLD(ROR(C_I8, _)) +{ + if (op1_insn->val.i8 == 0 || op1_insn->val.i8 == -1) { + IR_FOLD_COPY(op1); + } + IR_FOLD_NEXT; +} + +IR_FOLD(SAR(C_U16, _)) +IR_FOLD(SAR(C_I16, _)) +IR_FOLD(ROL(C_U16, _)) +IR_FOLD(ROL(C_I16, _)) +IR_FOLD(ROR(C_U16, _)) +IR_FOLD(ROR(C_I16, _)) +{ + if (op1_insn->val.i16 == 0 || op1_insn->val.i16 == -1) { + IR_FOLD_COPY(op1); + } + IR_FOLD_NEXT; +} + +IR_FOLD(SAR(C_U32, _)) +IR_FOLD(SAR(C_I32, _)) +IR_FOLD(ROL(C_U32, _)) +IR_FOLD(ROL(C_I32, _)) +IR_FOLD(ROR(C_U32, _)) +IR_FOLD(ROR(C_I32, _)) +{ + if (op1_insn->val.i32 == 0 || op1_insn->val.i32 == -1) { + IR_FOLD_COPY(op1); + } + IR_FOLD_NEXT; +} + +IR_FOLD(SAR(C_U64, _)) +IR_FOLD(SAR(C_I64, _)) +IR_FOLD(ROL(C_U64, _)) +IR_FOLD(ROL(C_I64, _)) +IR_FOLD(ROR(C_U64, _)) +IR_FOLD(ROR(C_I64, _)) +{ + if (op1_insn->val.i64 == 0 || op1_insn->val.i64 == -1) { + IR_FOLD_COPY(op1); + } + IR_FOLD_NEXT; +} + +// TODO: conversions + +// TODO: Reassociation +IR_FOLD(ADD(ADD, C_U8)) +IR_FOLD(ADD(ADD, C_U16)) +IR_FOLD(ADD(ADD, C_U32)) +IR_FOLD(ADD(ADD, C_U64)) +IR_FOLD(ADD(ADD, C_ADDR)) +{ + if (IR_IS_CONST_REF(op1_insn->op2)) { + /* (x + c1) + c2 => x + (c1 + c2) */ + val.u64 = ctx->ir_base[op1_insn->op2].val.u64 + op2_insn->val.u64; + op1 = op1_insn->op1; + op2 = ir_const(ctx, val, IR_OPT_TYPE(opt)); + IR_FOLD_RESTART; + } + IR_FOLD_NEXT; +} + +IR_FOLD(ADD(ADD, C_I8)) +IR_FOLD(ADD(ADD, C_I16)) +IR_FOLD(ADD(ADD, C_I32)) +IR_FOLD(ADD(ADD, C_I64)) +{ + if (IR_IS_CONST_REF(op1_insn->op2)) { + /* (x + c1) + c2 => x + (c1 + c2) */ + val.i64 = ctx->ir_base[op1_insn->op2].val.i64 + op2_insn->val.i64; + op1 = op1_insn->op1; + op2 = ir_const(ctx, val, IR_OPT_TYPE(opt)); + IR_FOLD_RESTART; + } + IR_FOLD_NEXT; +} + +IR_FOLD(MUL(MUL, C_U8)) +IR_FOLD(MUL(MUL, C_U16)) +IR_FOLD(MUL(MUL, C_U32)) +IR_FOLD(MUL(MUL, C_U64)) +{ + if (IR_IS_CONST_REF(op1_insn->op2)) { + /* (x * c1) * c2 => x * (c1 * c2) */ + val.u64 = ctx->ir_base[op1_insn->op2].val.u64 * op2_insn->val.u64; + op1 = op1_insn->op1; + op2 = ir_const(ctx, val, IR_OPT_TYPE(opt)); + IR_FOLD_RESTART; + } + IR_FOLD_NEXT; +} + +IR_FOLD(MUL(MUL, C_I8)) +IR_FOLD(MUL(MUL, C_I16)) +IR_FOLD(MUL(MUL, C_I32)) +IR_FOLD(MUL(MUL, C_I64)) +{ + if (IR_IS_CONST_REF(op1_insn->op2)) { + /* (x * c1) * c2 => x * (c1 * c2) */ + val.i64 = ctx->ir_base[op1_insn->op2].val.i64 * op2_insn->val.i64; + op1 = op1_insn->op1; + op2 = ir_const(ctx, val, IR_OPT_TYPE(opt)); + IR_FOLD_RESTART; + } + IR_FOLD_NEXT; +} + +IR_FOLD(AND(AND, C_U8)) +IR_FOLD(AND(AND, C_U16)) +IR_FOLD(AND(AND, C_U32)) +IR_FOLD(AND(AND, C_U64)) +IR_FOLD(AND(AND, C_I8)) +IR_FOLD(AND(AND, C_I16)) +IR_FOLD(AND(AND, C_I32)) +IR_FOLD(AND(AND, C_I64)) +{ + if (IR_IS_CONST_REF(op1_insn->op2)) { + /* (x & c1) & c2 => x & (c1 & c2) */ + val.u64 = ctx->ir_base[op1_insn->op2].val.u64 & op2_insn->val.u64; + op1 = op1_insn->op1; + op2 = ir_const(ctx, val, IR_OPT_TYPE(opt)); + IR_FOLD_RESTART; + } + IR_FOLD_NEXT; +} + +IR_FOLD(OR(OR, C_U8)) +IR_FOLD(OR(OR, C_U16)) +IR_FOLD(OR(OR, C_U32)) +IR_FOLD(OR(OR, C_U64)) +IR_FOLD(OR(OR, C_I8)) +IR_FOLD(OR(OR, C_I16)) +IR_FOLD(OR(OR, C_I32)) +IR_FOLD(OR(OR, C_I64)) +{ + if (IR_IS_CONST_REF(op1_insn->op2)) { + /* (x | c1) | c2 => x | (c1 | c2) */ + val.u64 = ctx->ir_base[op1_insn->op2].val.u64 | op2_insn->val.u64; + op1 = op1_insn->op1; + op2 = ir_const(ctx, val, IR_OPT_TYPE(opt)); + IR_FOLD_RESTART; + } + IR_FOLD_NEXT; +} + +IR_FOLD(XOR(XOR, C_U8)) +IR_FOLD(XOR(XOR, C_U16)) +IR_FOLD(XOR(XOR, C_U32)) +IR_FOLD(XOR(XOR, C_U64)) +IR_FOLD(XOR(XOR, C_I8)) +IR_FOLD(XOR(XOR, C_I16)) +IR_FOLD(XOR(XOR, C_I32)) +IR_FOLD(XOR(XOR, C_I64)) +{ + if (IR_IS_CONST_REF(op1_insn->op2)) { + /* (x ^ c1) ^ c2 => x ^ (c1 ^ c2) */ + val.u64 = ctx->ir_base[op1_insn->op2].val.u64 ^ op2_insn->val.u64; + op1 = op1_insn->op1; + op2 = ir_const(ctx, val, IR_OPT_TYPE(opt)); + IR_FOLD_RESTART; + } + IR_FOLD_NEXT; +} + +IR_FOLD(AND(AND, _)) +IR_FOLD(OR(OR, _)) +IR_FOLD(MIN(MIN, _)) +IR_FOLD(MAX(MAX, _)) +{ + if (op1_insn->op1 == op2 || op1_insn->op2 == op2) { + IR_FOLD_COPY(op2); + } + IR_FOLD_NEXT; +} + +IR_FOLD(XOR(XOR, _)) +{ + if (op1_insn->op1 == op2) { + IR_FOLD_COPY(op1_insn->op2); + } else if (op1_insn->op2 == op2) { + IR_FOLD_COPY(op1_insn->op1); + } + IR_FOLD_NEXT; +} + +/* Swap operands (move lower ref to op2) for better CSE */ +IR_FOLD(ADD(_, _)) +IR_FOLD(MUL(_, _)) +IR_FOLD_NAMED(swap_ops) +{ + if (op1 < op2) { /* move lower ref to op2 */ + ir_ref tmp = op1; + op1 = op2; + op2 = tmp; + IR_FOLD_RESTART; + } + IR_FOLD_NEXT; +} + +IR_FOLD(ADD_OV(_, _)) +IR_FOLD(MUL_OV(_, _)) +{ + if (op1 < op2) { /* move lower ref to op2 */ + ir_ref tmp = op1; + op1 = op2; + op2 = tmp; + IR_FOLD_RESTART; + } + /* skip CSE ??? */ + IR_FOLD_EMIT; +} + +IR_FOLD(SUB(_, _)) +{ + if (IR_IS_TYPE_INT(IR_OPT_TYPE(opt)) && op1 == op2) { + IR_FOLD_CONST_U(0); + } + IR_FOLD_NEXT; +} + +IR_FOLD(SUB_OV(_, _)) +{ + if (op1 == op2) { + IR_FOLD_CONST_U(0); + } + /* skip CSE ??? */ + IR_FOLD_EMIT; +} + +/* Binary operations with op1 == op2 */ +IR_FOLD(AND(_,_)) +IR_FOLD(OR(_,_)) +IR_FOLD(MIN(_, _)) +IR_FOLD(MAX(_, _)) +{ + /* a & a => a */ + if (op1 == op2) { + IR_FOLD_COPY(op1); + } + IR_FOLD_DO_NAMED(swap_ops); +} + +IR_FOLD(XOR(_,_)) +{ + /* a xor a => 0 */ + if (op1 == op2) { + IR_FOLD_CONST_U(0); + } + IR_FOLD_DO_NAMED(swap_ops); +} + +IR_FOLD(EQ(_, _)) +IR_FOLD(NE(_, _)) +{ + if (op1 != op2) { + IR_FOLD_DO_NAMED(swap_ops); + } else if (IR_IS_TYPE_INT(op1_insn->type)) { + /* a == a => true */ + IR_FOLD_BOOL((opt & IR_OPT_OP_MASK) == IR_EQ); + } + IR_FOLD_NEXT; +} + +IR_FOLD(LT(_, _)) +IR_FOLD(GE(_, _)) +IR_FOLD(LE(_, _)) +IR_FOLD(GT(_, _)) +{ + if (op1 == op2) { + if (IR_IS_TYPE_INT(op1_insn->type)) { + /* a >= a => true (two low bits are differ) */ + IR_FOLD_BOOL((opt ^ (opt >> 1)) & 1); + } + } else if (op1 < op2) { /* move lower ref to op2 */ + ir_ref tmp = op1; + op1 = op2; + op2 = tmp; + opt ^= 3; /* [U]LT <-> [U]GT, [U]LE <-> [U]GE */ + IR_FOLD_RESTART; + } + IR_FOLD_NEXT; +} + +IR_FOLD(ULT(_, _)) +IR_FOLD(UGE(_, _)) +IR_FOLD(ULE(_, _)) +IR_FOLD(UGT(_, _)) +{ + if (op1 == op2) { + /* a >= a => true (two low bits are differ) */ + IR_FOLD_BOOL((opt ^ (opt >> 1)) & 1); + } else if (op1 < op2) { /* move lower ref to op2 */ + ir_ref tmp = op1; + op1 = op2; + op2 = tmp; + opt ^= 3; /* [U]LT <-> [U]GT, [U]LE <-> [U]GE */ + } + IR_FOLD_NEXT; +} + +IR_FOLD(COND(_, _)) // TODO: COND(_, _, _) +{ + if (op2 == op3) { + IR_FOLD_COPY(op2); + } + IR_FOLD_NEXT; +} diff --git a/ext/opcache/jit/ir/ir_gcm.c b/ext/opcache/jit/ir/ir_gcm.c new file mode 100644 index 0000000000000..694271a57c212 --- /dev/null +++ b/ext/opcache/jit/ir/ir_gcm.c @@ -0,0 +1,897 @@ +/* + * IR - Lightweight JIT Compilation Framework + * (GCM - Global Code Motion and Scheduler) + * Copyright (C) 2022 Zend by Perforce. + * Authors: Dmitry Stogov + * + * The GCM algorithm is based on Cliff Click's publication + * See: C. Click. "Global code motion, global value numbering" Submitted to PLDI'95. + */ + +#include "ir.h" +#include "ir_private.h" + +static int32_t ir_gcm_schedule_early(ir_ctx *ctx, int32_t *_blocks, ir_ref ref, ir_list *queue_rest) +{ + ir_ref n, *p, input; + ir_insn *insn; + uint32_t dom_depth; + int32_t b, result; + bool reschedule_late = 1; + + insn = &ctx->ir_base[ref]; + + IR_ASSERT(insn->op != IR_PARAM && insn->op != IR_VAR); + IR_ASSERT(insn->op != IR_PHI && insn->op != IR_PI); + + result = 1; + dom_depth = 0; + + n = insn->inputs_count; + for (p = insn->ops + 1; n > 0; p++, n--) { + input = *p; + if (input > 0) { + b = _blocks[input]; + if (b == 0) { + b = ir_gcm_schedule_early(ctx, _blocks, input, queue_rest); + } else if (b < 0) { + b = -b; + } + if (dom_depth < ctx->cfg_blocks[b].dom_depth) { + dom_depth = ctx->cfg_blocks[b].dom_depth; + result = b; + } + reschedule_late = 0; + } + } + _blocks[ref] = -result; + + if (UNEXPECTED(reschedule_late)) { + /* Floating nodes that don't depend on other nodes + * (e.g. only on constants), have to be scheduled to the + * last common ancestor. Otherwise they always go to the + * first block. + */ + ir_list_push_unchecked(queue_rest, ref); + } + return result; +} + +/* Last Common Ancestor */ +static int32_t ir_gcm_find_lca(ir_ctx *ctx, int32_t b1, int32_t b2) +{ + uint32_t dom_depth; + + dom_depth = ctx->cfg_blocks[b2].dom_depth; + while (ctx->cfg_blocks[b1].dom_depth > dom_depth) { + b1 = ctx->cfg_blocks[b1].dom_parent; + } + dom_depth = ctx->cfg_blocks[b1].dom_depth; + while (ctx->cfg_blocks[b2].dom_depth > dom_depth) { + b2 = ctx->cfg_blocks[b2].dom_parent; + } + while (b1 != b2) { + b1 = ctx->cfg_blocks[b1].dom_parent; + b2 = ctx->cfg_blocks[b2].dom_parent; + } + return b2; +} + +static void ir_gcm_schedule_late(ir_ctx *ctx, int32_t *_blocks, ir_ref ref) +{ + ir_ref n, *p, use; + ir_insn *insn; + ir_use_list *use_list; + + IR_ASSERT(_blocks[ref] < 0); + _blocks[ref] = -_blocks[ref]; + use_list = &ctx->use_lists[ref]; + n = use_list->count; + if (n) { + int32_t lca, b; + + insn = &ctx->ir_base[ref]; + IR_ASSERT(insn->op != IR_PARAM && insn->op != IR_VAR); + IR_ASSERT(insn->op != IR_PHI && insn->op != IR_PI); + + lca = 0; + for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) { + use = *p; + b = _blocks[use]; + if (!b) { + continue; + } else if (b < 0) { + ir_gcm_schedule_late(ctx, _blocks, use); + b = _blocks[use]; + IR_ASSERT(b != 0); + } + insn = &ctx->ir_base[use]; + if (insn->op == IR_PHI) { + ir_ref *p = insn->ops + 2; /* PHI data inputs */ + ir_ref *q = ctx->ir_base[insn->op1].ops + 1; /* MERGE inputs */ + ir_ref n = insn->inputs_count - 1; + + for (;n > 0; p++, q++, n--) { + if (*p == ref) { + b = _blocks[*q]; + lca = !lca ? b : ir_gcm_find_lca(ctx, lca, b); + } + } + } else { + lca = !lca ? b : ir_gcm_find_lca(ctx, lca, b); + } + } + IR_ASSERT(lca != 0 && "No Common Ancestor"); + b = lca; + + if (b != _blocks[ref]) { + ir_block *bb = &ctx->cfg_blocks[b]; + uint32_t loop_depth = bb->loop_depth; + + if (loop_depth) { + uint32_t flags; + + use_list = &ctx->use_lists[ref]; + if (use_list->count == 1) { + use = ctx->use_edges[use_list->refs]; + insn = &ctx->ir_base[use]; + if (insn->op == IR_IF || insn->op == IR_GUARD || insn->op == IR_GUARD_NOT) { + _blocks[ref] = b; + return; + } + } + + flags = (bb->flags & IR_BB_LOOP_HEADER) ? bb->flags : ctx->cfg_blocks[bb->loop_header].flags; + if ((flags & IR_BB_LOOP_WITH_ENTRY) + && !(ctx->binding && ir_binding_find(ctx, ref))) { + /* Don't move loop invariant code across an OSR ENTRY if we can't restore it */ + } else { + do { + lca = bb->dom_parent; + bb = &ctx->cfg_blocks[lca]; + if (bb->loop_depth < loop_depth) { + if (!bb->loop_depth) { + b = lca; + break; + } + flags = (bb->flags & IR_BB_LOOP_HEADER) ? bb->flags : ctx->cfg_blocks[bb->loop_header].flags; + if ((flags & IR_BB_LOOP_WITH_ENTRY) + && !(ctx->binding && ir_binding_find(ctx, ref))) { + break; + } + loop_depth = bb->loop_depth; + b = lca; + } + } while (lca != _blocks[ref]); + } + } + _blocks[ref] = b; + if (ctx->ir_base[ref + 1].op == IR_OVERFLOW) { + /* OVERFLOW is a projection and must be scheduled together with previous ADD/SUB/MUL_OV */ + _blocks[ref + 1] = b; + } + } + } +} + +static void ir_gcm_schedule_rest(ir_ctx *ctx, int32_t *_blocks, ir_ref ref) +{ + ir_ref n, *p, use; + ir_insn *insn; + + IR_ASSERT(_blocks[ref] < 0); + _blocks[ref] = -_blocks[ref]; + n = ctx->use_lists[ref].count; + if (n) { + uint32_t lca; + int32_t b; + + insn = &ctx->ir_base[ref]; + IR_ASSERT(insn->op != IR_PARAM && insn->op != IR_VAR); + IR_ASSERT(insn->op != IR_PHI && insn->op != IR_PI); + + lca = 0; + for (p = &ctx->use_edges[ctx->use_lists[ref].refs]; n > 0; p++, n--) { + use = *p; + b = _blocks[use]; + if (!b) { + continue; + } else if (b < 0) { + ir_gcm_schedule_late(ctx, _blocks, use); + b = _blocks[use]; + IR_ASSERT(b != 0); + } + insn = &ctx->ir_base[use]; + if (insn->op == IR_PHI) { + ir_ref *p = insn->ops + 2; /* PHI data inputs */ + ir_ref *q = ctx->ir_base[insn->op1].ops + 1; /* MERGE inputs */ + + ir_ref n = insn->inputs_count - 1; + + for (;n > 0; p++, q++, n--) { + if (*p == ref) { + b = _blocks[*q]; + lca = !lca ? b : ir_gcm_find_lca(ctx, lca, b); + } + } + } else { + lca = !lca ? b : ir_gcm_find_lca(ctx, lca, b); + } + } + IR_ASSERT(lca != 0 && "No Common Ancestor"); + b = lca; + _blocks[ref] = b; + if (ctx->ir_base[ref + 1].op == IR_OVERFLOW) { + /* OVERFLOW is a projection and must be scheduled together with previous ADD/SUB/MUL_OV */ + _blocks[ref + 1] = b; + } + } +} + +int ir_gcm(ir_ctx *ctx) +{ + ir_ref k, n, *p, ref; + ir_block *bb; + ir_list queue_early; + ir_list queue_late; + ir_list queue_rest; + int32_t *_blocks, b; + ir_insn *insn, *use_insn; + ir_use_list *use_list; + + IR_ASSERT(ctx->cfg_map); + _blocks = (int32_t*)ctx->cfg_map; + + ir_list_init(&queue_early, ctx->insns_count); + + if (ctx->cfg_blocks_count == 1) { + ref = ctx->cfg_blocks[1].end; + do { + insn = &ctx->ir_base[ref]; + _blocks[ref] = 1; /* pin to block */ + if (insn->inputs_count > 1) { + /* insn has input data edges */ + ir_list_push_unchecked(&queue_early, ref); + } + ref = insn->op1; /* control predecessor */ + } while (ref != 1); /* IR_START */ + _blocks[1] = 1; /* pin to block */ + + use_list = &ctx->use_lists[1]; + n = use_list->count; + for (p = &ctx->use_edges[use_list->refs]; n > 0; n--, p++) { + ref = *p; + use_insn = &ctx->ir_base[ref]; + if (use_insn->op == IR_PARAM || use_insn->op == IR_VAR) { + ctx->cfg_blocks[1].flags |= (use_insn->op == IR_PARAM) ? IR_BB_HAS_PARAM : IR_BB_HAS_VAR; + _blocks[ref] = 1; /* pin to block */ + } + } + + /* Place all live nodes to the first block */ + while (ir_list_len(&queue_early)) { + ref = ir_list_pop(&queue_early); + insn = &ctx->ir_base[ref]; + n = insn->inputs_count; + for (p = insn->ops + 1; n > 0; p++, n--) { + ref = *p; + if (ref > 0 && _blocks[ref] == 0) { + _blocks[ref] = 1; + ir_list_push_unchecked(&queue_early, ref); + } + } + } + + ir_list_free(&queue_early); + + return 1; + } + + ir_list_init(&queue_late, ctx->insns_count); + + /* pin and collect control and control depended (PARAM, VAR, PHI, PI) instructions */ + b = ctx->cfg_blocks_count; + for (bb = ctx->cfg_blocks + b; b > 0; bb--, b--) { + IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); + ref = bb->end; + + /* process the last instruction of the block */ + insn = &ctx->ir_base[ref]; + _blocks[ref] = b; /* pin to block */ + if (insn->inputs_count > 1) { + /* insn has input data edges */ + ir_list_push_unchecked(&queue_early, ref); + } + ref = insn->op1; /* control predecessor */ + + while (ref != bb->start) { + insn = &ctx->ir_base[ref]; + _blocks[ref] = b; /* pin to block */ + if (insn->inputs_count > 1) { + /* insn has input data edges */ + ir_list_push_unchecked(&queue_early, ref); + } + if (insn->type != IR_VOID) { + IR_ASSERT(ir_op_flags[insn->op] & IR_OP_FLAG_MEM); + ir_list_push_unchecked(&queue_late, ref); + } + ref = insn->op1; /* control predecessor */ + } + + /* process the first instruction of the block */ + _blocks[ref] = b; /* pin to block */ + + use_list = &ctx->use_lists[ref]; + n = use_list->count; + if (n > 1) { + for (p = &ctx->use_edges[use_list->refs]; n > 0; n--, p++) { + ref = *p; + use_insn = &ctx->ir_base[ref]; + if (use_insn->op == IR_PHI || use_insn->op == IR_PI) { + bb->flags |= (use_insn->op == IR_PHI) ? IR_BB_HAS_PHI : IR_BB_HAS_PI; + if (EXPECTED(ctx->use_lists[ref].count != 0)) { + _blocks[ref] = b; /* pin to block */ + ir_list_push_unchecked(&queue_early, ref); + ir_list_push_unchecked(&queue_late, ref); + } + } else if (use_insn->op == IR_PARAM) { + bb->flags |= IR_BB_HAS_PARAM; + _blocks[ref] = b; /* pin to block */ + if (EXPECTED(ctx->use_lists[ref].count != 0)) { + ir_list_push_unchecked(&queue_late, ref); + } + } else if (use_insn->op == IR_VAR) { + bb->flags |= IR_BB_HAS_VAR; + _blocks[ref] = b; /* pin to block */ + if (EXPECTED(ctx->use_lists[ref].count != 0)) { + /* This is necessary only for VADDR */ + ir_list_push_unchecked(&queue_late, ref); + } + } + } + } + } + + ir_list_init(&queue_rest, ctx->insns_count); + + n = ir_list_len(&queue_early); + while (n > 0) { + n--; + ref = ir_list_at(&queue_early, n); + insn = &ctx->ir_base[ref]; + k = insn->inputs_count - 1; + for (p = insn->ops + 2; k > 0; p++, k--) { + ref = *p; + if (ref > 0 && _blocks[ref] == 0) { + ir_gcm_schedule_early(ctx, _blocks, ref, &queue_rest); + } + } + } + +#ifdef IR_DEBUG + if (ctx->flags & IR_DEBUG_GCM) { + fprintf(stderr, "GCM Schedule Early\n"); + for (n = 1; n < ctx->insns_count; n++) { + fprintf(stderr, "%d -> %d\n", n, _blocks[n]); + } + } +#endif + + n = ir_list_len(&queue_late); + while (n > 0) { + n--; + ref = ir_list_at(&queue_late, n); + use_list = &ctx->use_lists[ref]; + k = use_list->count; + for (p = &ctx->use_edges[use_list->refs]; k > 0; p++, k--) { + ref = *p; + if (_blocks[ref] < 0) { + ir_gcm_schedule_late(ctx, _blocks, ref); + } + } + } + + n = ir_list_len(&queue_rest); + while (n > 0) { + n--; + ref = ir_list_at(&queue_rest, n); + ir_gcm_schedule_rest(ctx, _blocks, ref); + } + + ir_list_free(&queue_early); + ir_list_free(&queue_late); + ir_list_free(&queue_rest); + +#ifdef IR_DEBUG + if (ctx->flags & IR_DEBUG_GCM) { + fprintf(stderr, "GCM Schedule Late\n"); + for (n = 1; n < ctx->insns_count; n++) { + fprintf(stderr, "%d -> %d\n", n, _blocks[n]); + } + } +#endif + + return 1; +} + +static void ir_xlat_binding(ir_ctx *ctx, ir_ref *_xlat) +{ + uint32_t n1, n2, pos; + ir_ref key; + ir_hashtab_bucket *b1, *b2; + ir_hashtab *binding = ctx->binding; + uint32_t hash_size = (uint32_t)(-(int32_t)binding->mask); + + memset((char*)binding->data - (hash_size * sizeof(uint32_t)), -1, hash_size * sizeof(uint32_t)); + n1 = binding->count; + n2 = 0; + pos = 0; + b1 = binding->data; + b2 = binding->data; + while (n1 > 0) { + key = b1->key; + IR_ASSERT(key < ctx->insns_count); + if (_xlat[key]) { + key = _xlat[key]; + b2->key = key; + if (b1->val > 0) { + IR_ASSERT(_xlat[b1->val]); + b2->val = _xlat[b1->val]; + } else { + b2->val = b1->val; + } + key |= binding->mask; + b2->next = ((uint32_t*)binding->data)[key]; + ((uint32_t*)binding->data)[key] = pos; + pos += sizeof(ir_hashtab_bucket); + b2++; + n2++; + } + b1++; + n1--; + } + binding->count = n2; +} + +IR_ALWAYS_INLINE ir_ref ir_count_constant(ir_ref *_xlat, ir_ref ref) +{ + if (!_xlat[ref]) { + _xlat[ref] = ref; /* this is only a "used constant" marker */ + return 1; + } + return 0; +} + +int ir_schedule(ir_ctx *ctx) +{ + ir_ctx new_ctx; + ir_ref i, j, k, n, *p, *q, ref, new_ref, prev_ref, insns_count, consts_count, use_edges_count; + ir_ref *_xlat; + ir_ref *edges; + uint32_t b, prev_b; + uint32_t *_blocks = ctx->cfg_map; + ir_ref *_next = ir_mem_malloc(ctx->insns_count * sizeof(ir_ref)); + ir_ref *_prev = ir_mem_malloc(ctx->insns_count * sizeof(ir_ref)); + ir_ref _move_down = 0; + ir_block *bb; + ir_insn *insn, *new_insn; + ir_use_list *lists, *use_list, *new_list; + + /* Create a double-linked list of nodes ordered by BB, respecting BB->start and BB->end */ + prev_b = _blocks[1]; + IR_ASSERT(prev_b); + _prev[1] = 0; + _prev[ctx->cfg_blocks[1].end] = 0; + for (i = 2, j = 1; i < ctx->insns_count; i++) { + b = _blocks[i]; + IR_ASSERT((int32_t)b >= 0); + if (b == prev_b) { + /* add to the end of the list */ + _next[j] = i; + _prev[i] = j; + j = i; + } else if (b > prev_b) { + bb = &ctx->cfg_blocks[b]; + if (i == bb->start) { + IR_ASSERT(bb->end > bb->start); + prev_b = b; + _prev[bb->end] = 0; + /* add to the end of the list */ + _next[j] = i; + _prev[i] = j; + j = i; + } else { + IR_ASSERT(i != bb->end); + /* move down late (see the following loop) */ + _next[i] = _move_down; + _move_down = i; + } + } else if (b) { + bb = &ctx->cfg_blocks[b]; + IR_ASSERT(i != bb->start); + if (_prev[bb->end]) { + /* move up, insert before the end of the already scheduled BB */ + k = bb->end; + } else { + /* move up, insert at the end of the block */ + k = ctx->cfg_blocks[b + 1].start; + } + /* insert before "k" */ + _prev[i] = _prev[k]; + _next[i] = k; + _next[_prev[k]] = i; + _prev[k] = i; + } + } + _next[j] = 0; + + while (_move_down) { + i = _move_down; + _move_down = _next[i]; + b = _blocks[i]; + bb = &ctx->cfg_blocks[b]; + k = _next[bb->start]; + + if (bb->flags & (IR_BB_HAS_PHI|IR_BB_HAS_PI|IR_BB_HAS_PARAM|IR_BB_HAS_VAR)) { + /* insert after the start of the block and all PARAM, VAR, PI, PHI */ + insn = &ctx->ir_base[k]; + while (insn->op == IR_PHI || insn->op == IR_PARAM || insn->op == IR_VAR || insn->op == IR_PI) { + k = _next[k]; + insn = &ctx->ir_base[k]; + } + } + + /* insert before "k" */ + _prev[i] = _prev[k]; + _next[i] = k; + _next[_prev[k]] = i; + _prev[k] = i; + } + +#ifdef IR_DEBUG + if (ctx->flags & IR_DEBUG_SCHEDULE) { + fprintf(stderr, "Before Schedule\n"); + for (i = 1; i != 0; i = _next[i]) { + fprintf(stderr, "%d -> %d\n", i, _blocks[i]); + } + } +#endif + + _xlat = ir_mem_calloc((ctx->consts_count + ctx->insns_count), sizeof(ir_ref)); + _xlat += ctx->consts_count; + _xlat[IR_TRUE] = IR_TRUE; + _xlat[IR_FALSE] = IR_FALSE; + _xlat[IR_NULL] = IR_NULL; + _xlat[IR_UNUSED] = IR_UNUSED; + insns_count = 1; + consts_count = -(IR_TRUE - 1); + + /* Topological sort according dependencies inside each basic block */ + for (b = 1, bb = ctx->cfg_blocks + 1; b <= ctx->cfg_blocks_count; b++, bb++) { + IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); + /* Schedule BB start */ + i = bb->start; + _xlat[i] = bb->start = insns_count; + insn = &ctx->ir_base[i]; + if (insn->op == IR_CASE_VAL) { + IR_ASSERT(insn->op2 < IR_TRUE); + consts_count += ir_count_constant(_xlat, insn->op2); + } + n = insn->inputs_count; + insns_count += ir_insn_inputs_to_len(n); + i = _next[i]; + insn = &ctx->ir_base[i]; + if (bb->flags & (IR_BB_HAS_PHI|IR_BB_HAS_PI|IR_BB_HAS_PARAM|IR_BB_HAS_VAR)) { + /* Schedule PARAM, VAR, PI */ + while (insn->op == IR_PARAM || insn->op == IR_VAR || insn->op == IR_PI) { + _xlat[i] = insns_count; + insns_count += 1; + i = _next[i]; + insn = &ctx->ir_base[i]; + } + /* Schedule PHIs */ + while (insn->op == IR_PHI) { + ir_ref j, *p, input; + + _xlat[i] = insns_count; + /* Reuse "n" from MERGE and skip first input */ + insns_count += ir_insn_inputs_to_len(n + 1); + for (j = n, p = insn->ops + 2; j > 0; p++, j--) { + input = *p; + if (input < IR_TRUE) { + consts_count += ir_count_constant(_xlat, input); + } + } + i = _next[i]; + insn = &ctx->ir_base[i]; + } + } + while (i != bb->end) { + ir_ref n, j, *p, input; + +restart: + n = insn->inputs_count; + for (j = n, p = insn->ops + 1; j > 0; p++, j--) { + input = *p; + if (!_xlat[input]) { + /* input is not scheduled yet */ + if (input > 0) { + if (_blocks[input] == b) { + /* "input" should be before "i" to satisfy dependency */ +#ifdef IR_DEBUG + if (ctx->flags & IR_DEBUG_SCHEDULE) { + fprintf(stderr, "Wrong dependency %d:%d -> %d\n", b, input, i); + } +#endif + /* remove "input" */ + _prev[_next[input]] = _prev[input]; + _next[_prev[input]] = _next[input]; + /* insert before "i" */ + _prev[input] = _prev[i]; + _next[input] = i; + _next[_prev[i]] = input; + _prev[i] = input; + /* restart from "input" */ + i = input; + insn = &ctx->ir_base[i]; + goto restart; + } + } else if (input < IR_TRUE) { + consts_count += ir_count_constant(_xlat, input); + } + } + } + _xlat[i] = insns_count; + insns_count += ir_insn_inputs_to_len(n); + i = _next[i]; + insn = &ctx->ir_base[i]; + } + /* Schedule BB end */ + _xlat[i] = bb->end = insns_count; + insns_count++; + if (IR_INPUT_EDGES_COUNT(ir_op_flags[insn->op]) == 2) { + if (insn->op2 < IR_TRUE) { + consts_count += ir_count_constant(_xlat, insn->op2); + } + } + } + +#ifdef IR_DEBUG + if (ctx->flags & IR_DEBUG_SCHEDULE) { + fprintf(stderr, "After Schedule\n"); + for (i = 1; i != 0; i = _next[i]) { + fprintf(stderr, "%d -> %d\n", i, _blocks[i]); + } + } +#endif + +#if 1 + /* Check if scheduling didn't make any modifications */ + if (consts_count == ctx->consts_count && insns_count == ctx->insns_count) { + bool changed = 0; + + for (i = 1; i != 0; i = _next[i]) { + if (_xlat[i] != i) { + changed = 1; + break; + } + } + if (!changed) { + _xlat -= ctx->consts_count; + ir_mem_free(_xlat); + ir_mem_free(_next); + + ctx->prev_ref = _prev; + ctx->flags |= IR_LINEAR; + ir_truncate(ctx); + + return 1; + } + } +#endif + + ir_mem_free(_prev); + + ir_init(&new_ctx, ctx->flags, consts_count, insns_count); + new_ctx.insns_count = insns_count; + new_ctx.ret_type = ctx->ret_type; + new_ctx.mflags = ctx->mflags; + new_ctx.spill_base = ctx->spill_base; + new_ctx.fixed_stack_red_zone = ctx->fixed_stack_red_zone; + new_ctx.fixed_stack_frame_size = ctx->fixed_stack_frame_size; + new_ctx.fixed_call_stack_size = ctx->fixed_call_stack_size; + new_ctx.fixed_regset = ctx->fixed_regset; + new_ctx.fixed_save_regset = ctx->fixed_save_regset; + new_ctx.entries_count = ctx->entries_count; +#if defined(IR_TARGET_AARCH64) + new_ctx.deoptimization_exits = ctx->deoptimization_exits; + new_ctx.get_exit_addr = ctx->get_exit_addr; + new_ctx.get_veneer = ctx->get_veneer; + new_ctx.set_veneer = ctx->set_veneer; +#endif + new_ctx.loader = ctx->loader; + + /* Copy constants */ + if (consts_count == ctx->consts_count) { + new_ctx.consts_count = consts_count; + ref = 1 - consts_count; + insn = &ctx->ir_base[ref]; + new_insn = &new_ctx.ir_base[ref]; + + memcpy(new_insn, insn, sizeof(ir_insn) * (IR_TRUE - ref)); + if (ctx->strtab.data) { + while (ref != IR_TRUE) { + if (new_insn->op == IR_FUNC || new_insn->op == IR_SYM || new_insn->op == IR_STR) { + new_insn->val.addr = ir_str(&new_ctx, ir_get_str(ctx, new_insn->val.i32)); + } + new_insn++; + ref++; + } + } + } else { + new_ref = -new_ctx.consts_count; + new_insn = &new_ctx.ir_base[new_ref]; + for (ref = IR_TRUE - 1, insn = &ctx->ir_base[ref]; ref > -ctx->consts_count; insn--, ref--) { + if (!_xlat[ref]) { + continue; + } + new_insn->optx = insn->optx; + new_insn->prev_const = 0; + if (insn->op == IR_FUNC || insn->op == IR_SYM || insn->op == IR_STR) { + new_insn->val.addr = ir_str(&new_ctx, ir_get_str(ctx, insn->val.i32)); + } else { + new_insn->val.u64 = insn->val.u64; + } + _xlat[ref] = new_ref; + new_ref--; + new_insn--; + } + new_ctx.consts_count = -new_ref; + } + + new_ctx.cfg_map = ir_mem_calloc(ctx->insns_count, sizeof(uint32_t)); + new_ctx.prev_ref = _prev = ir_mem_malloc(insns_count * sizeof(ir_ref)); + new_ctx.use_lists = lists = ir_mem_malloc(insns_count * sizeof(ir_use_list)); + new_ctx.use_edges = edges = ir_mem_malloc(ctx->use_edges_count * sizeof(ir_ref)); + + /* Copy instructions, use lists and use edges */ + prev_ref = 0; + use_edges_count = 0; + for (i = 1; i != 0; i = _next[i]) { + new_ref = _xlat[i]; + new_ctx.cfg_map[new_ref] = _blocks[i]; + _prev[new_ref] = prev_ref; + prev_ref = new_ref; + + use_list = &ctx->use_lists[i]; + n = use_list->count; + k = 0; + if (n == 1) { + ref = ctx->use_edges[use_list->refs]; + if (_xlat[ref]) { + *edges = _xlat[ref]; + edges++; + k = 1; + } + } else { + p = &ctx->use_edges[use_list->refs]; + while (n--) { + ref = *p; + if (_xlat[ref]) { + *edges = _xlat[ref]; + edges++; + k++; + } + p++; + } + } + new_list = &lists[new_ref]; + new_list->refs = use_edges_count; + use_edges_count += k; + new_list->count = k; + + insn = &ctx->ir_base[i]; + new_insn = &new_ctx.ir_base[new_ref]; + + new_insn->optx = insn->optx; + n = new_insn->inputs_count; + switch (n) { + case 0: + new_insn->op1 = insn->op1; + new_insn->op2 = insn->op2; + new_insn->op3 = insn->op3; + break; + case 1: + new_insn->op1 = _xlat[insn->op1]; + if (new_insn->op == IR_PARAM || insn->op == IR_VAR) { + new_insn->op2 = ir_str(&new_ctx, ir_get_str(ctx, insn->op2)); + } else { + new_insn->op2 = insn->op2; + } + new_insn->op3 = insn->op3; + break; + case 2: + new_insn->op1 = _xlat[insn->op1]; + new_insn->op2 = _xlat[insn->op2]; + new_insn->op3 = insn->op3; + break; + case 3: + new_insn->op1 = _xlat[insn->op1]; + new_insn->op2 = _xlat[insn->op2]; + new_insn->op3 = _xlat[insn->op3]; + break; + default: + for (j = n, p = insn->ops + 1, q = new_insn->ops + 1; j > 0; p++, q++, j--) { + *q = _xlat[*p]; + } + break; + } + } + + /* Update list of terminators (IR_OPND_CONTROL_REF) */ + insn = &new_ctx.ir_base[1]; + ref = insn->op1; + if (ref) { + insn->op1 = ref = _xlat[ref]; + while (1) { + insn = &new_ctx.ir_base[ref]; + ref = insn->op3; + if (!ref) { + break; + } + insn->op3 = ref = _xlat[ref]; + } + } + + IR_ASSERT(ctx->use_edges_count >= use_edges_count); + new_ctx.use_edges_count = use_edges_count; + new_ctx.use_edges = ir_mem_realloc(new_ctx.use_edges, use_edges_count * sizeof(ir_ref)); + + if (ctx->binding) { + ir_xlat_binding(ctx, _xlat); + new_ctx.binding = ctx->binding; + ctx->binding = NULL; + } + + _xlat -= ctx->consts_count; + ir_mem_free(_xlat); + + new_ctx.cfg_blocks_count = ctx->cfg_blocks_count; + new_ctx.cfg_edges_count = ctx->cfg_edges_count; + new_ctx.cfg_blocks = ctx->cfg_blocks; + new_ctx.cfg_edges = ctx->cfg_edges; + ctx->cfg_blocks = NULL; + ctx->cfg_edges = NULL; + + ir_free(ctx); + IR_ASSERT(new_ctx.consts_count == new_ctx.consts_limit); + IR_ASSERT(new_ctx.insns_count == new_ctx.insns_limit); + memcpy(ctx, &new_ctx, sizeof(ir_ctx)); + ctx->flags |= IR_LINEAR; + + ir_mem_free(_next); + + return 1; +} + +void ir_build_prev_refs(ir_ctx *ctx) +{ + uint32_t b; + ir_block *bb; + ir_ref i, n, prev; + ir_insn *insn; + + ctx->prev_ref = ir_mem_malloc(ctx->insns_count * sizeof(ir_ref)); + prev = 0; + for (b = 1, bb = ctx->cfg_blocks + b; b <= ctx->cfg_blocks_count; b++, bb++) { + IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); + for (i = bb->start, insn = ctx->ir_base + i; i < bb->end;) { + ctx->prev_ref[i] = prev; + n = ir_insn_len(insn); + prev = i; + i += n; + insn += n; + } + ctx->prev_ref[i] = prev; + } +} diff --git a/ext/opcache/jit/ir/ir_gdb.c b/ext/opcache/jit/ir/ir_gdb.c new file mode 100644 index 0000000000000..8c2781d603868 --- /dev/null +++ b/ext/opcache/jit/ir/ir_gdb.c @@ -0,0 +1,642 @@ +/* + * IR - Lightweight JIT Compilation Framework + * (GDB interface) + * Copyright (C) 2022 Zend by Perforce. + * Authors: Dmitry Stogov + * + * Based on Mike Pall's implementation of GDB interface for LuaJIT. + */ + +#include +#include +#include +#include + +#ifdef __FreeBSD__ +# include +# include +# include +# include +#endif + +#include "ir.h" +#include "ir_private.h" +#include "ir_elf.h" + +/* DWARF definitions. */ +#define DW_CIE_VERSION 1 + +/* CFA (Canonical frame address) */ +enum { + DW_CFA_nop = 0x0, + DW_CFA_offset_extended = 0x5, + DW_CFA_def_cfa = 0xc, + DW_CFA_def_cfa_offset = 0xe, + DW_CFA_offset_extended_sf = 0x11, + DW_CFA_advance_loc = 0x40, + DW_CFA_offset = 0x80 +}; + +enum { + DW_EH_PE_udata4 = 0x03, + DW_EH_PE_textrel = 0x20 +}; + +enum { + DW_TAG_compile_unit = 0x11 +}; + +enum { + DW_children_no = 0, + DW_children_yes = 1 +}; + +enum { + DW_AT_name = 0x03, + DW_AT_stmt_list = 0x10, + DW_AT_low_pc = 0x11, + DW_AT_high_pc = 0x12 +}; + +enum { + DW_FORM_addr = 0x01, + DW_FORM_data4 = 0x06, + DW_FORM_string = 0x08 +}; + +enum { + DW_LNS_extended_op = 0, + DW_LNS_copy = 1, + DW_LNS_advance_pc = 2, + DW_LNS_advance_line = 3 +}; + +enum { + DW_LNE_end_sequence = 1, + DW_LNE_set_address = 2 +}; + +enum { +#if defined(IR_TARGET_X86) + DW_REG_AX, DW_REG_CX, DW_REG_DX, DW_REG_BX, + DW_REG_SP, DW_REG_BP, DW_REG_SI, DW_REG_DI, + DW_REG_RA, +#elif defined(IR_TARGET_X64) + /* Yes, the order is strange, but correct. */ + DW_REG_AX, DW_REG_DX, DW_REG_CX, DW_REG_BX, + DW_REG_SI, DW_REG_DI, DW_REG_BP, DW_REG_SP, + DW_REG_8, DW_REG_9, DW_REG_10, DW_REG_11, + DW_REG_12, DW_REG_13, DW_REG_14, DW_REG_15, + DW_REG_RA, +#elif defined(IR_TARGET_AARCH64) + DW_REG_SP = 31, + DW_REG_RA = 30, + DW_REG_X29 = 29, +#else +#error "Unsupported target architecture" +#endif +}; + +enum { + GDBJIT_SECT_NULL, + GDBJIT_SECT_text, + GDBJIT_SECT_eh_frame, + GDBJIT_SECT_shstrtab, + GDBJIT_SECT_strtab, + GDBJIT_SECT_symtab, + GDBJIT_SECT_debug_info, + GDBJIT_SECT_debug_abbrev, + GDBJIT_SECT_debug_line, + GDBJIT_SECT__MAX +}; + +enum { + GDBJIT_SYM_UNDEF, + GDBJIT_SYM_FILE, + GDBJIT_SYM_FUNC, + GDBJIT_SYM__MAX +}; + +typedef struct _ir_gdbjit_obj { + ir_elf_header hdr; + ir_elf_sectheader sect[GDBJIT_SECT__MAX]; + ir_elf_symbol sym[GDBJIT_SYM__MAX]; + uint8_t space[4096]; +} ir_gdbjit_obj; + +static const ir_elf_header ir_elfhdr_template = { + .emagic = { 0x7f, 'E', 'L', 'F' }, +#ifdef ELF64 + .eclass = 2, +#else + .eclass = 1, +#endif +#ifdef WORDS_BIGENDIAN + .eendian = 2, +#else + .eendian = 1, +#endif + .eversion = 1, +#if defined(Linux) + .eosabi = 0, /* TODO: Nope, it's not 3. ??? */ +#elif defined(__FreeBSD__) + .eosabi = 9, +#elif defined(__OpenBSD__) + .eosabi = 12, +#elif defined(__NetBSD__) + .eosabi = 2, +#elif defined(__DragonFly__) + .eosabi = 0, +#elif (defined(__sun__) && defined(__svr4__)) + .eosabi = 6, +#else + .eosabi = 0, +#endif + .eabiversion = 0, + .epad = { 0, 0, 0, 0, 0, 0, 0 }, + .type = 1, +#if defined(IR_TARGET_X86) + .machine = 3, +#elif defined(IR_TARGET_X64) + .machine = 62, +#elif defined(IR_TARGET_AARCH64) + .machine = 183, +#else +# error "Unsupported target architecture" +#endif + .version = 1, + .entry = 0, + .phofs = 0, + .shofs = offsetof(ir_gdbjit_obj, sect), + .flags = 0, + .ehsize = sizeof(ir_elf_header), + .phentsize = 0, + .phnum = 0, + .shentsize = sizeof(ir_elf_sectheader), + .shnum = GDBJIT_SECT__MAX, + .shstridx = GDBJIT_SECT_shstrtab +}; + +/* Context for generating the ELF object for the GDB JIT API. */ +typedef struct _ir_gdbjit_ctx { + uint8_t *p; /* Pointer to next address in obj.space. */ + uint8_t *startp; /* Pointer to start address in obj.space. */ + uintptr_t mcaddr; /* Machine code address. */ + uint32_t szmcode; /* Size of machine code. */ + int32_t lineno; /* Starting line number. */ + const char *name; /* JIT function name */ + const char *filename; /* Starting file name. */ + size_t objsize; /* Final size of ELF object. */ + ir_gdbjit_obj obj; /* In-memory ELF object. */ +} ir_gdbjit_ctx; + +/* Add a zero-terminated string */ +static uint32_t ir_gdbjit_strz(ir_gdbjit_ctx *ctx, const char *str) +{ + uint8_t *p = ctx->p; + uint32_t ofs = (uint32_t)(p - ctx->startp); + do { + *p++ = (uint8_t)*str; + } while (*str++); + ctx->p = p; + return ofs; +} + +/* Add a ULEB128 value */ +static void ir_gdbjit_uleb128(ir_gdbjit_ctx *ctx, uint32_t v) +{ + uint8_t *p = ctx->p; + for (; v >= 0x80; v >>= 7) + *p++ = (uint8_t)((v & 0x7f) | 0x80); + *p++ = (uint8_t)v; + ctx->p = p; +} + +/* Add a SLEB128 value */ +static void ir_gdbjit_sleb128(ir_gdbjit_ctx *ctx, int32_t v) +{ + uint8_t *p = ctx->p; + for (; (uint32_t)(v+0x40) >= 0x80; v >>= 7) + *p++ = (uint8_t)((v & 0x7f) | 0x80); + *p++ = (uint8_t)(v & 0x7f); + ctx->p = p; +} + +static void ir_gdbjit_secthdr(ir_gdbjit_ctx *ctx) +{ + ir_elf_sectheader *sect; + + *ctx->p++ = '\0'; + +#define SECTDEF(id, tp, al) \ + sect = &ctx->obj.sect[GDBJIT_SECT_##id]; \ + sect->name = ir_gdbjit_strz(ctx, "." #id); \ + sect->type = ELFSECT_TYPE_##tp; \ + sect->align = (al) + + SECTDEF(text, NOBITS, 16); + sect->flags = ELFSECT_FLAGS_ALLOC|ELFSECT_FLAGS_EXEC; + sect->addr = ctx->mcaddr; + sect->ofs = 0; + sect->size = ctx->szmcode; + + SECTDEF(eh_frame, PROGBITS, sizeof(uintptr_t)); + sect->flags = ELFSECT_FLAGS_ALLOC; + + SECTDEF(shstrtab, STRTAB, 1); + SECTDEF(strtab, STRTAB, 1); + + SECTDEF(symtab, SYMTAB, sizeof(uintptr_t)); + sect->ofs = offsetof(ir_gdbjit_obj, sym); + sect->size = sizeof(ctx->obj.sym); + sect->link = GDBJIT_SECT_strtab; + sect->entsize = sizeof(ir_elf_symbol); + sect->info = GDBJIT_SYM_FUNC; + + SECTDEF(debug_info, PROGBITS, 1); + SECTDEF(debug_abbrev, PROGBITS, 1); + SECTDEF(debug_line, PROGBITS, 1); + +#undef SECTDEF +} + +static void ir_gdbjit_symtab(ir_gdbjit_ctx *ctx) +{ + ir_elf_symbol *sym; + + *ctx->p++ = '\0'; + + sym = &ctx->obj.sym[GDBJIT_SYM_FILE]; + sym->name = ir_gdbjit_strz(ctx, "JIT code"); + sym->sectidx = ELFSECT_IDX_ABS; + sym->info = ELFSYM_INFO(ELFSYM_BIND_LOCAL, ELFSYM_TYPE_FILE); + + sym = &ctx->obj.sym[GDBJIT_SYM_FUNC]; + sym->name = ir_gdbjit_strz(ctx, ctx->name); + sym->sectidx = GDBJIT_SECT_text; + sym->value = 0; + sym->size = ctx->szmcode; + sym->info = ELFSYM_INFO(ELFSYM_BIND_GLOBAL, ELFSYM_TYPE_FUNC); +} + +typedef IR_SET_ALIGNED(1, uint16_t unaligned_uint16_t); +typedef IR_SET_ALIGNED(1, uint32_t unaligned_uint32_t); +typedef IR_SET_ALIGNED(1, uintptr_t unaligned_uintptr_t); + +#define SECTALIGN(p, a) \ + ((p) = (uint8_t *)(((uintptr_t)(p) + ((a)-1)) & ~(uintptr_t)((a)-1))) + +/* Shortcuts to generate DWARF structures. */ +#define DB(x) (*p++ = (x)) +#define DI8(x) (*(int8_t *)p = (x), p++) +#define DU16(x) (*(unaligned_uint16_t *)p = (x), p += 2) +#define DU32(x) (*(unaligned_uint32_t *)p = (x), p += 4) +#define DADDR(x) (*(unaligned_uintptr_t *)p = (x), p += sizeof(uintptr_t)) +#define DUV(x) (ctx->p = p, ir_gdbjit_uleb128(ctx, (x)), p = ctx->p) +#define DSV(x) (ctx->p = p, ir_gdbjit_sleb128(ctx, (x)), p = ctx->p) +#define DSTR(str) (ctx->p = p, ir_gdbjit_strz(ctx, (str)), p = ctx->p) +#define DALIGNNOP(s) while ((uintptr_t)p & ((s)-1)) *p++ = DW_CFA_nop +#define DSECT(name, stmt) \ + { unaligned_uint32_t *szp_##name = (uint32_t *)p; p += 4; stmt \ + *szp_##name = (uint32_t)((p-(uint8_t *)szp_##name)-4); } + +static void ir_gdbjit_ehframe(ir_gdbjit_ctx *ctx, uint32_t sp_offset, uint32_t sp_adjustment) +{ + uint8_t *p = ctx->p; + uint8_t *framep = p; + + /* DWARF EH CIE (Common Information Entry) */ + DSECT(CIE, + DU32(0); /* CIE ID. */ + DB(DW_CIE_VERSION); /* Version */ + DSTR("zR"); /* Augmentation String. */ + DUV(1); /* Code alignment factor. */ + DSV(-(int32_t)sizeof(uintptr_t)); /* Data alignment factor. */ + DB(DW_REG_RA); /* Return address register. */ + DB(1); DB(DW_EH_PE_textrel|DW_EH_PE_udata4); /* Augmentation data. */ +#if defined(IR_TARGET_X86) || defined(IR_TARGET_X64) + DB(DW_CFA_def_cfa); DUV(DW_REG_SP); DUV(sizeof(uintptr_t)); + DB(DW_CFA_offset|DW_REG_RA); DUV(1); +#elif defined(IR_TARGET_AARCH64) + DB(DW_CFA_def_cfa); DUV(DW_REG_SP); DUV(0); +#endif + DALIGNNOP(sizeof(uintptr_t)); + ) + + /* DWARF EH FDE (Frame Description Entry). */ + DSECT(FDE, + DU32((uint32_t)(p-framep)); /* Offset to CIE Pointer. */ + DU32(0); /* Machine code offset relative to .text. */ + DU32(ctx->szmcode); /* Machine code length. */ + DB(0); /* Augmentation data. */ + DB(DW_CFA_def_cfa_offset); DUV(sp_offset); +#if defined(IR_TARGET_AARCH64) + if (sp_offset) { + if (sp_adjustment && sp_adjustment < sp_offset) { + DB(DW_CFA_offset|DW_REG_X29); DUV(sp_adjustment / sizeof(uintptr_t)); + DB(DW_CFA_offset|DW_REG_RA); DUV((sp_adjustment / sizeof(uintptr_t)) - 1); + } else { + DB(DW_CFA_offset|DW_REG_X29); DUV(sp_offset / sizeof(uintptr_t)); + DB(DW_CFA_offset|DW_REG_RA); DUV((sp_offset / sizeof(uintptr_t)) - 1); + } + } +#endif + if (sp_adjustment && sp_adjustment > sp_offset) { + DB(DW_CFA_advance_loc|1); DB(DW_CFA_def_cfa_offset); DUV(sp_adjustment); +#if defined(IR_TARGET_AARCH64) + if (!sp_offset) { + DB(DW_CFA_offset|DW_REG_X29); DUV(sp_adjustment / sizeof(uintptr_t)); + DB(DW_CFA_offset|DW_REG_RA); DUV((sp_adjustment / sizeof(uintptr_t)) - 1); + } +#endif + } + DALIGNNOP(sizeof(uintptr_t)); + ) + + ctx->p = p; +} + +static void ir_gdbjit_debuginfo(ir_gdbjit_ctx *ctx) +{ + uint8_t *p = ctx->p; + + DSECT(info, + DU16(2); /* DWARF version. */ + DU32(0); /* Abbrev offset. */ + DB(sizeof(uintptr_t)); /* Pointer size. */ + + DUV(1); /* Abbrev #1: DW_TAG_compile_unit. */ + DSTR(ctx->filename); /* DW_AT_name. */ + DADDR(ctx->mcaddr); /* DW_AT_low_pc. */ + DADDR(ctx->mcaddr + ctx->szmcode); /* DW_AT_high_pc. */ + DU32(0); /* DW_AT_stmt_list. */ + ); + + ctx->p = p; +} + +static void ir_gdbjit_debugabbrev(ir_gdbjit_ctx *ctx) +{ + uint8_t *p = ctx->p; + + /* Abbrev #1: DW_TAG_compile_unit. */ + DUV(1); + DUV(DW_TAG_compile_unit); + DB(DW_children_no); + DUV(DW_AT_name); + DUV(DW_FORM_string); + DUV(DW_AT_low_pc); + DUV(DW_FORM_addr); + DUV(DW_AT_high_pc); + DUV(DW_FORM_addr); + DUV(DW_AT_stmt_list); + DUV(DW_FORM_data4); + DB(0); + DB(0); + + ctx->p = p; +} + +#define DLNE(op, s) (DB(DW_LNS_extended_op), DUV(1+(s)), DB((op))) + +static void ir_gdbjit_debugline(ir_gdbjit_ctx *ctx) +{ + uint8_t *p = ctx->p; + + DSECT(line, + DU16(2); /* DWARF version. */ + DSECT(header, + DB(1); /* Minimum instruction length. */ + DB(1); /* is_stmt. */ + DI8(0); /* Line base for special opcodes. */ + DB(2); /* Line range for special opcodes. */ + DB(3+1); /* Opcode base at DW_LNS_advance_line+1. */ + DB(0); DB(1); DB(1); /* Standard opcode lengths. */ + /* Directory table. */ + DB(0); + /* File name table. */ + DSTR(ctx->filename); DUV(0); DUV(0); DUV(0); + DB(0); + ); + DLNE(DW_LNE_set_address, sizeof(uintptr_t)); + DADDR(ctx->mcaddr); + if (ctx->lineno) (DB(DW_LNS_advance_line), DSV(ctx->lineno-1)); + DB(DW_LNS_copy); + DB(DW_LNS_advance_pc); DUV(ctx->szmcode); + DLNE(DW_LNE_end_sequence, 0); + ); + + ctx->p = p; +} + + +#undef DLNE + +/* Undef shortcuts. */ +#undef DB +#undef DI8 +#undef DU16 +#undef DU32 +#undef DADDR +#undef DUV +#undef DSV +#undef DSTR +#undef DALIGNNOP +#undef DSECT + +typedef void (*ir_gdbjit_initf) (ir_gdbjit_ctx *ctx); + +static void ir_gdbjit_initsect(ir_gdbjit_ctx *ctx, int sect) +{ + ctx->startp = ctx->p; + ctx->obj.sect[sect].ofs = (uintptr_t)((char *)ctx->p - (char *)&ctx->obj); +} + +static void ir_gdbjit_initsect_done(ir_gdbjit_ctx *ctx, int sect) +{ + ctx->obj.sect[sect].size = (uintptr_t)(ctx->p - ctx->startp); +} + +static void ir_gdbjit_buildobj(ir_gdbjit_ctx *ctx, uint32_t sp_offset, uint32_t sp_adjustment) +{ + ir_gdbjit_obj *obj = &ctx->obj; + + /* Fill in ELF header and clear structures. */ + memcpy(&obj->hdr, &ir_elfhdr_template, sizeof(ir_elf_header)); + memset(&obj->sect, 0, sizeof(ir_elf_sectheader) * GDBJIT_SECT__MAX); + memset(&obj->sym, 0, sizeof(ir_elf_symbol) * GDBJIT_SYM__MAX); + + /* Initialize sections. */ + ctx->p = obj->space; + ir_gdbjit_initsect(ctx, GDBJIT_SECT_shstrtab); ir_gdbjit_secthdr(ctx); ir_gdbjit_initsect_done(ctx, GDBJIT_SECT_shstrtab); + ir_gdbjit_initsect(ctx, GDBJIT_SECT_strtab); ir_gdbjit_symtab(ctx); ir_gdbjit_initsect_done(ctx, GDBJIT_SECT_strtab); + ir_gdbjit_initsect(ctx, GDBJIT_SECT_debug_info); ir_gdbjit_debuginfo(ctx); ir_gdbjit_initsect_done(ctx, GDBJIT_SECT_debug_info); + ir_gdbjit_initsect(ctx, GDBJIT_SECT_debug_abbrev); ir_gdbjit_debugabbrev(ctx); ir_gdbjit_initsect_done(ctx, GDBJIT_SECT_debug_abbrev); + ir_gdbjit_initsect(ctx, GDBJIT_SECT_debug_line); ir_gdbjit_debugline(ctx); ir_gdbjit_initsect_done(ctx, GDBJIT_SECT_debug_line); + SECTALIGN(ctx->p, sizeof(uintptr_t)); + ir_gdbjit_initsect(ctx, GDBJIT_SECT_eh_frame); ir_gdbjit_ehframe(ctx, sp_offset, sp_adjustment); ir_gdbjit_initsect_done(ctx, GDBJIT_SECT_eh_frame); + ctx->objsize = (size_t)((char *)ctx->p - (char *)obj); + + IR_ASSERT(ctx->objsize < sizeof(ir_gdbjit_obj)); +} + +enum { + IR_GDBJIT_NOACTION, + IR_GDBJIT_REGISTER, + IR_GDBJIT_UNREGISTER +}; + +typedef struct _ir_gdbjit_code_entry { + struct _ir_gdbjit_code_entry *next_entry; + struct _ir_gdbjit_code_entry *prev_entry; + const char *symfile_addr; + uint64_t symfile_size; +} ir_gdbjit_code_entry; + +typedef struct _ir_gdbjit_descriptor { + uint32_t version; + uint32_t action_flag; + struct _ir_gdbjit_code_entry *relevant_entry; + struct _ir_gdbjit_code_entry *first_entry; +} ir_gdbjit_descriptor; + +ir_gdbjit_descriptor __jit_debug_descriptor = { + 1, IR_GDBJIT_NOACTION, NULL, NULL +}; + +#ifdef IR_EXTERNAL_GDB_ENTRY +void __jit_debug_register_code(void); +#else +IR_NEVER_INLINE void __jit_debug_register_code(void) +{ + __asm__ __volatile__(""); +} +#endif + +static bool ir_gdb_register_code(const void *object, size_t size) +{ + ir_gdbjit_code_entry *entry; + + entry = malloc(sizeof(ir_gdbjit_code_entry) + size); + if (entry == NULL) { + return 0; + } + + entry->symfile_addr = ((char*)entry) + sizeof(ir_gdbjit_code_entry); + entry->symfile_size = size; + + memcpy((char *)entry->symfile_addr, object, size); + + entry->prev_entry = NULL; + entry->next_entry = __jit_debug_descriptor.first_entry; + + if (entry->next_entry) { + entry->next_entry->prev_entry = entry; + } + __jit_debug_descriptor.first_entry = entry; + + /* Notify GDB */ + __jit_debug_descriptor.relevant_entry = entry; + __jit_debug_descriptor.action_flag = IR_GDBJIT_REGISTER; + __jit_debug_register_code(); + + return 1; +} + +void ir_gdb_unregister_all(void) +{ + ir_gdbjit_code_entry *entry; + + __jit_debug_descriptor.action_flag = IR_GDBJIT_UNREGISTER; + while ((entry = __jit_debug_descriptor.first_entry)) { + __jit_debug_descriptor.first_entry = entry->next_entry; + if (entry->next_entry) { + entry->next_entry->prev_entry = NULL; + } + /* Notify GDB */ + __jit_debug_descriptor.relevant_entry = entry; + __jit_debug_register_code(); + + free(entry); + } +} + +bool ir_gdb_present(void) +{ + bool ret = 0; +#if defined(__linux__) /* netbsd while having this procfs part, does not hold the tracer pid */ + int fd = open("/proc/self/status", O_RDONLY); + + if (fd > 0) { + char buf[1024]; + ssize_t n = read(fd, buf, sizeof(buf) - 1); + char *s; + pid_t pid; + + if (n > 0) { + buf[n] = 0; + s = strstr(buf, "TracerPid:"); + if (s) { + s += sizeof("TracerPid:") - 1; + while (*s == ' ' || *s == '\t') { + s++; + } + pid = atoi(s); + if (pid) { + char out[1024]; + sprintf(buf, "/proc/%d/exe", (int)pid); + if (readlink(buf, out, sizeof(out) - 1) > 0) { + if (strstr(out, "gdb")) { + ret = 1; + } + } + } + } + } + + close(fd); + } +#elif defined(__FreeBSD__) + struct kinfo_proc *proc = kinfo_getproc(getpid()); + + if (proc) { + if ((proc->ki_flag & P_TRACED) != 0) { + struct kinfo_proc *dbg = kinfo_getproc(proc->ki_tracer); + + ret = (dbg && strstr(dbg->ki_comm, "gdb")); + } + } +#endif + + return ret; +} + +int ir_gdb_register(const char *name, + const void *start, + size_t size, + uint32_t sp_offset, + uint32_t sp_adjustment) +{ + ir_gdbjit_ctx ctx; + + ctx.mcaddr = (uintptr_t)start; + ctx.szmcode = (uint32_t)size; + ctx.name = name; + ctx.filename = "unknown"; + ctx.lineno = 0; + + ir_gdbjit_buildobj(&ctx, sp_offset, sp_adjustment); + + return ir_gdb_register_code(&ctx.obj, ctx.objsize); +} + +void ir_gdb_init(void) +{ + /* This might enable registration of all JIT-ed code, but unfortunately, + * in case of many functions, this takes enormous time. */ + if (ir_gdb_present()) { +#if 0 + _debug |= IR_DEBUG_GDB; +#endif + } +} diff --git a/ext/opcache/jit/ir/ir_patch.c b/ext/opcache/jit/ir/ir_patch.c new file mode 100644 index 0000000000000..39e08eb46a552 --- /dev/null +++ b/ext/opcache/jit/ir/ir_patch.c @@ -0,0 +1,270 @@ +/* + * IR - Lightweight JIT Compilation Framework + * (Native code patcher) + * Copyright (C) 2022 Zend by Perforce. + * Authors: Dmitry Stogov + * + * Based on Mike Pall's implementation for LuaJIT. + */ + +#include "ir.h" +#include "ir_private.h" + +#if defined(IR_TARGET_X86) || defined(IR_TARGET_X64) +static uint32_t _asm_x86_inslen(const uint8_t* p) +{ + static const uint8_t map_op1[256] = { + 0x92,0x92,0x92,0x92,0x52,0x45,0x51,0x51,0x92,0x92,0x92,0x92,0x52,0x45,0x51,0x20, + 0x92,0x92,0x92,0x92,0x52,0x45,0x51,0x51,0x92,0x92,0x92,0x92,0x52,0x45,0x51,0x51, + 0x92,0x92,0x92,0x92,0x52,0x45,0x10,0x51,0x92,0x92,0x92,0x92,0x52,0x45,0x10,0x51, + 0x92,0x92,0x92,0x92,0x52,0x45,0x10,0x51,0x92,0x92,0x92,0x92,0x52,0x45,0x10,0x51, +#ifdef IR_TARGET_X64 + 0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x14,0x14,0x14,0x14,0x14,0x14,0x14,0x14, +#else + 0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51, +#endif + 0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51, + 0x51,0x51,0x92,0x92,0x10,0x10,0x12,0x11,0x45,0x86,0x52,0x93,0x51,0x51,0x51,0x51, + 0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52, + 0x93,0x86,0x93,0x93,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92, + 0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x47,0x51,0x51,0x51,0x51,0x51, +#ifdef IR_TARGET_X64 + 0x59,0x59,0x59,0x59,0x51,0x51,0x51,0x51,0x52,0x45,0x51,0x51,0x51,0x51,0x51,0x51, +#else + 0x55,0x55,0x55,0x55,0x51,0x51,0x51,0x51,0x52,0x45,0x51,0x51,0x51,0x51,0x51,0x51, +#endif + 0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x05,0x05,0x05,0x05,0x05,0x05,0x05,0x05, + 0x93,0x93,0x53,0x51,0x70,0x71,0x93,0x86,0x54,0x51,0x53,0x51,0x51,0x52,0x51,0x51, + 0x92,0x92,0x92,0x92,0x52,0x52,0x51,0x51,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92, + 0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x45,0x45,0x47,0x52,0x51,0x51,0x51,0x51, + 0x10,0x51,0x10,0x10,0x51,0x51,0x63,0x66,0x51,0x51,0x51,0x51,0x51,0x51,0x92,0x92 + }; + static const uint8_t map_op2[256] = { + 0x93,0x93,0x93,0x93,0x52,0x52,0x52,0x52,0x52,0x52,0x51,0x52,0x51,0x93,0x52,0x94, + 0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93, + 0x53,0x53,0x53,0x53,0x53,0x53,0x53,0x53,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93, + 0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x34,0x51,0x35,0x51,0x51,0x51,0x51,0x51, + 0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93, + 0x53,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93, + 0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93, + 0x94,0x54,0x54,0x54,0x93,0x93,0x93,0x52,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93, + 0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46, + 0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93, + 0x52,0x52,0x52,0x93,0x94,0x93,0x51,0x51,0x52,0x52,0x52,0x93,0x94,0x93,0x93,0x93, + 0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x94,0x93,0x93,0x93,0x93,0x93, + 0x93,0x93,0x94,0x93,0x94,0x94,0x94,0x93,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52, + 0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93, + 0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93, + 0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x52 + }; + uint32_t result = 0; + uint32_t prefixes = 0; + uint32_t x = map_op1[*p]; + + for (;;) { + switch (x >> 4) { + case 0: + return result + x + (prefixes & 4); + case 1: + prefixes |= x; + x = map_op1[*++p]; + result++; + break; + case 2: + x = map_op2[*++p]; + break; + case 3: + p++; + goto mrm; + case 4: + result -= (prefixes & 2); + /* fallthrough */ + case 5: + return result + (x & 15); + case 6: /* Group 3. */ + if (p[1] & 0x38) { + x = 2; + } else if ((prefixes & 2) && (x == 0x66)) { + x = 4; + } + goto mrm; + case 7: /* VEX c4/c5. */ +#ifdef IR_TARGET_X86 + if (p[1] < 0xc0) { + x = 2; + goto mrm; + } +#endif + if (x == 0x70) { + x = *++p & 0x1f; + result++; + if (x >= 2) { + p += 2; + result += 2; + goto mrm; + } + } + p++; + result++; + x = map_op2[*++p]; + break; + case 8: + result -= (prefixes & 2); + /* fallthrough */ + case 9: +mrm: + /* ModR/M and possibly SIB. */ + result += (x & 15); + x = *++p; + switch (x >> 6) { + case 0: + if ((x & 7) == 5) { + return result + 4; + } + break; + case 1: + result++; + break; + case 2: + result += 4; + break; + case 3: + return result; + } + if ((x & 7) == 4) { + result++; + if (x < 0x40 && (p[1] & 7) == 5) { + result += 4; + } + } + return result; + } + } +} + +typedef IR_SET_ALIGNED(1, uint16_t unaligned_uint16_t); +typedef IR_SET_ALIGNED(1, int32_t unaligned_int32_t); + +static int ir_patch_code(const void *code, size_t size, const void *from_addr, const void *to_addr) +{ + int ret = 0; + uint8_t *p, *end; + + p = (uint8_t*)code; + end = p + size - 4; + while (p < end) { + if ((*(unaligned_uint16_t*)p & 0xf0ff) == 0x800f && p + *(unaligned_int32_t*)(p+2) == (uint8_t*)from_addr - 6) { + *(unaligned_int32_t*)(p+2) = ((uint8_t*)to_addr - (p + 6)); + ret++; + } else if (*p == 0xe9 && p + *(unaligned_int32_t*)(p+1) == (uint8_t*)from_addr - 5) { + *(unaligned_int32_t*)(p+1) = ((uint8_t*)to_addr - (p + 5)); + ret++; + } + p += _asm_x86_inslen(p); + } + if (ret) { + ir_mem_flush((void*)code, size); + } + return ret; +} + +#elif defined(IR_TARGET_AARCH64) + +static int ir_patch_code(const void *code, size_t size, const void *from_addr, const void *to_addr) +{ + int ret = 0; + uint8_t *p, *end; + const void *veneer = NULL; + ptrdiff_t delta; + + end = (uint8_t*)code; + p = end + size; + while (p > end) { + uint32_t *ins_ptr; + uint32_t ins; + + p -= 4; + ins_ptr = (uint32_t*)p; + ins = *ins_ptr; + if ((ins & 0xfc000000u) == 0x14000000u) { + // B (imm26:0..25) + delta = (uint32_t*)from_addr - ins_ptr; + if (((ins ^ (uint32_t)delta) & 0x01ffffffu) == 0) { + delta = (uint32_t*)to_addr - ins_ptr; + if (((delta + 0x02000000) >> 26) != 0) { + abort(); // branch target out of range + } + *ins_ptr = (ins & 0xfc000000u) | ((uint32_t)delta & 0x03ffffffu); + ret++; + if (!veneer) { + veneer = p; + } + } + } else if ((ins & 0xff000000u) == 0x54000000u || + (ins & 0x7e000000u) == 0x34000000u) { + // B.cond, CBZ, CBNZ (imm19:5..23) + delta = (uint32_t*)from_addr - ins_ptr; + if (((ins ^ ((uint32_t)delta << 5)) & 0x00ffffe0u) == 0) { + delta = (uint32_t*)to_addr - ins_ptr; + if (((delta + 0x40000) >> 19) != 0) { + if (veneer) { + delta = (uint32_t*)veneer - ins_ptr; + if (((delta + 0x40000) >> 19) != 0) { + abort(); // branch target out of range + } + } else { + abort(); // branch target out of range + } + } + *ins_ptr = (ins & 0xff00001fu) | (((uint32_t)delta & 0x7ffffu) << 5); + ret++; + } + } else if ((ins & 0x7e000000u) == 0x36000000u) { + // TBZ, TBNZ (imm14:5..18) + delta = (uint32_t*)from_addr - ins_ptr; + if (((ins ^ ((uint32_t)delta << 5)) & 0x0007ffe0u) == 0) { + delta = (uint32_t*)to_addr - ins_ptr; + if (((delta + 0x2000) >> 14) != 0) { + if (veneer) { + delta = (uint32_t*)veneer - ins_ptr; + if (((delta + 0x2000) >> 14) != 0) { + abort(); // branch target out of range + } + } else { + abort(); // branch target out of range + } + } + *ins_ptr = (ins & 0xfff8001fu) | (((uint32_t)delta & 0x3fffu) << 5); + ret++; + } + } + } + + if (ret) { + ir_mem_flush((void*)code, size); + } + + return ret; +} +#endif + +int ir_patch(const void *code, size_t size, uint32_t jmp_table_size, const void *from_addr, const void *to_addr) +{ + int ret = 0; + + if (jmp_table_size) { + const void **jmp_slot = (const void **)((char*)code + IR_ALIGNED_SIZE(size, sizeof(void*))); + + do { + if (*jmp_slot == from_addr) { + *jmp_slot = to_addr; + ret++; + } + jmp_slot++; + } while (--jmp_table_size); + } + + ret += ir_patch_code(code, size, from_addr, to_addr); + + return ret; +} diff --git a/ext/opcache/jit/ir/ir_perf.c b/ext/opcache/jit/ir/ir_perf.c new file mode 100644 index 0000000000000..5eac3006e2aeb --- /dev/null +++ b/ext/opcache/jit/ir/ir_perf.c @@ -0,0 +1,266 @@ +/* + * IR - Lightweight JIT Compilation Framework + * (Linux perf interface) + * Copyright (C) 2022 Zend by Perforce. + * Authors: Dmitry Stogov + * + * 1) Profile using perf-.map + * perf record ./prog + * perf report + * + * 2) Profile using jit-.dump + * perf record -k 1 ./prog + * perf inject -j -i perf.data -o perf.data.jitted + * perf report -i perf.data.jitted + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(__linux__) +#include +#elif defined(__darwin__) +# include +#elif defined(__FreeBSD__) +# include +# include +#elif defined(__NetBSD__) +# include +#elif defined(__DragonFly__) +# include +# include +#elif defined(__sun) +// avoiding thread.h inclusion as it conflicts with vtunes types. +extern unsigned int thr_self(void); +#elif defined(__HAIKU__) +#include +#endif + +#include "ir.h" +#include "ir_elf.h" + +#define IR_PERF_JITDUMP_HEADER_MAGIC 0x4A695444 +#define IR_PERF_JITDUMP_HEADER_VERSION 1 + +#define IR_PERF_JITDUMP_RECORD_LOAD 0 +#define IR_PERF_JITDUMP_RECORD_MOVE 1 +#define IR_PERF_JITDUMP_RECORD_DEBUG_INFO 2 +#define IR_PERF_JITDUMP_RECORD_CLOSE 3 +#define IR_PERF_JITDUMP_UNWINDING_UNFO 4 + +#define ALIGN8(size) (((size) + 7) & ~7) +#define PADDING8(size) (ALIGN8(size) - (size)) + +typedef struct ir_perf_jitdump_header { + uint32_t magic; + uint32_t version; + uint32_t size; + uint32_t elf_mach_target; + uint32_t reserved; + uint32_t process_id; + uint64_t time_stamp; + uint64_t flags; +} ir_perf_jitdump_header; + +typedef struct _ir_perf_jitdump_record { + uint32_t event; + uint32_t size; + uint64_t time_stamp; +} ir_perf_jitdump_record; + +typedef struct _ir_perf_jitdump_load_record { + ir_perf_jitdump_record hdr; + uint32_t process_id; + uint32_t thread_id; + uint64_t vma; + uint64_t code_address; + uint64_t code_size; + uint64_t code_id; +} ir_perf_jitdump_load_record; + +static int jitdump_fd = -1; +static void *jitdump_mem = MAP_FAILED; + +static uint64_t ir_perf_timestamp(void) +{ + struct timespec ts; + + if (clock_gettime(CLOCK_MONOTONIC, &ts) != 0) { + return 0; + } + return ((uint64_t)ts.tv_sec * 1000000000) + ts.tv_nsec; +} + +int ir_perf_jitdump_open(void) +{ + char filename[64]; + int fd, ret; + ir_elf_header elf_hdr; + ir_perf_jitdump_header jit_hdr; + + sprintf(filename, "/tmp/jit-%d.dump", getpid()); + if (!ir_perf_timestamp()) { + return 0; + } + +#if defined(__linux__) + fd = open("/proc/self/exe", O_RDONLY); +#elif defined(__NetBSD__) + fd = open("/proc/curproc/exe", O_RDONLY); +#elif defined(__FreeBSD__) || defined(__DragonFly__) + char path[PATH_MAX]; + size_t pathlen = sizeof(path); + int mib[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1}; + if (sysctl(mib, 4, path, &pathlen, NULL, 0) == -1) { + return 0; + } + fd = open(path, O_RDONLY); +#elif defined(__sun) + fd = open("/proc/self/path/a.out", O_RDONLY); +#elif defined(__HAIKU__) + char path[PATH_MAX]; + if (find_path(B_APP_IMAGE_SYMBOL, B_FIND_PATH_IMAGE_PATH, + NULL, path, sizeof(path)) != B_OK) { + return 0; + } + + fd = open(path, O_RDONLY); +#else + fd = -1; +#endif + if (fd < 0) { + return 0; + } + + ret = read(fd, &elf_hdr, sizeof(elf_hdr)); + close(fd); + + if (ret != sizeof(elf_hdr) || + elf_hdr.emagic[0] != 0x7f || + elf_hdr.emagic[1] != 'E' || + elf_hdr.emagic[2] != 'L' || + elf_hdr.emagic[3] != 'F') { + return 0; + } + + jitdump_fd = open(filename, O_CREAT | O_TRUNC | O_RDWR, 0666); + if (jitdump_fd < 0) { + return 0; + } + + jitdump_mem = mmap(NULL, + sysconf(_SC_PAGESIZE), + PROT_READ|PROT_EXEC, + MAP_PRIVATE, jitdump_fd, 0); + + if (jitdump_mem == MAP_FAILED) { + close(jitdump_fd); + jitdump_fd = -1; + return 0; + } + + memset(&jit_hdr, 0, sizeof(jit_hdr)); + jit_hdr.magic = IR_PERF_JITDUMP_HEADER_MAGIC; + jit_hdr.version = IR_PERF_JITDUMP_HEADER_VERSION; + jit_hdr.size = sizeof(jit_hdr); + jit_hdr.elf_mach_target = elf_hdr.machine; + jit_hdr.process_id = getpid(); + jit_hdr.time_stamp = ir_perf_timestamp(); + jit_hdr.flags = 0; + if (write(jitdump_fd, &jit_hdr, sizeof(jit_hdr)) != sizeof(jit_hdr)) { + return 0; + } + return 1; +} + +int ir_perf_jitdump_close(void) +{ + int ret = 1; + + if (jitdump_fd >= 0) { + ir_perf_jitdump_record rec; + + rec.event = IR_PERF_JITDUMP_RECORD_CLOSE; + rec.size = sizeof(rec); + rec.time_stamp = ir_perf_timestamp(); + if (write(jitdump_fd, &rec, sizeof(rec)) != sizeof(rec)) { + ret = 0; + } + close(jitdump_fd); + + if (jitdump_mem != MAP_FAILED) { + munmap(jitdump_mem, sysconf(_SC_PAGESIZE)); + } + } + return ret; +} + +int ir_perf_jitdump_register(const char *name, const void *start, size_t size) +{ + if (jitdump_fd >= 0) { + static uint64_t id = 1; + ir_perf_jitdump_load_record rec; + size_t len = strlen(name); + uint32_t thread_id = 0; +#if defined(__linux__) + thread_id = syscall(SYS_gettid); +#elif defined(__darwin__) + uint64_t thread_id_u64; + pthread_threadid_np(NULL, &thread_id_u64); + thread_id = (uint32_t) thread_id_u64; +#elif defined(__FreeBSD__) + long tid; + thr_self(&tid); + thread_id = (uint32_t)tid; +#elif defined(__OpenBSD__) + thread_id = getthrid(); +#elif defined(__NetBSD__) + thread_id = _lwp_self(); +#elif defined(__DragonFly__) + thread_id = lwp_gettid(); +#elif defined(__sun) + thread_id = thr_self(); +#endif + + memset(&rec, 0, sizeof(rec)); + rec.hdr.event = IR_PERF_JITDUMP_RECORD_LOAD; + rec.hdr.size = sizeof(rec) + len + 1 + size; + rec.hdr.time_stamp = ir_perf_timestamp(); + rec.process_id = getpid(); + rec.thread_id = thread_id; + rec.vma = (uint64_t)(uintptr_t)start; + rec.code_address = (uint64_t)(uintptr_t)start; + rec.code_size = (uint64_t)size; + rec.code_id = id++; + + if (write(jitdump_fd, &rec, sizeof(rec)) != sizeof(rec) + || write(jitdump_fd, name, len + 1) < 0 + || write(jitdump_fd, start, size) < 0) { + return 0; + } + } + return 1; +} + +void ir_perf_map_register(const char *name, const void *start, size_t size) +{ + static FILE *fp = NULL; + + if (!fp) { + char filename[64]; + + sprintf(filename, "/tmp/perf-%d.map", getpid()); + fp = fopen(filename, "w"); + if (!fp) { + return; + } + setlinebuf(fp); + } + fprintf(fp, "%zx %zx %s\n", (size_t)(uintptr_t)start, size, name); +} diff --git a/ext/opcache/jit/ir/ir_php.h b/ext/opcache/jit/ir/ir_php.h new file mode 100644 index 0000000000000..d26f78c99bc7a --- /dev/null +++ b/ext/opcache/jit/ir/ir_php.h @@ -0,0 +1,37 @@ +/* + * IR - Lightweight JIT Compilation Framework + * (IR/PHP integration) + * Copyright (C) 2022 Zend by Perforce. + * Authors: Dmitry Stogov + */ + +#ifndef IR_PHP_H +#define IR_PHP_H + +#define IR_PHP_OPS(_) + + +#define IR_SNAPSHOT_HANDLER_DCL() \ + void *zend_jit_snapshot_handler(ir_ctx *ctx, ir_ref snapshot_ref, ir_insn *snapshot, void *addr) + +#define IR_SNAPSHOT_HANDLER(ctx, ref, insn, addr) \ + zend_jit_snapshot_handler(ctx, ref, insn, addr) + +#ifndef IR_PHP_MM +# define IR_PHP_MM 1 +#endif + +#if IR_PHP_MM +# include "zend.h" + +# define ir_mem_malloc emalloc +# define ir_mem_calloc ecalloc +# define ir_mem_realloc erealloc +# define ir_mem_free efree +#endif + +#if defined(IR_TARGET_AARCH64) +# define IR_EXTERNAL_GDB_ENTRY +#endif + +#endif /* IR_PHP_H */ diff --git a/ext/opcache/jit/ir/ir_private.h b/ext/opcache/jit/ir/ir_private.h new file mode 100644 index 0000000000000..0f6267bd58532 --- /dev/null +++ b/ext/opcache/jit/ir/ir_private.h @@ -0,0 +1,1206 @@ +/* + * IR - Lightweight JIT Compilation Framework + * (Common data structures and non public definitions) + * Copyright (C) 2022 Zend by Perforce. + * Authors: Dmitry Stogov + */ + +#ifndef IR_PRIVATE_H +#define IR_PRIVATE_H +#include +#include + +#ifdef IR_DEBUG +# include +# define IR_ASSERT(x) assert(x) +#else +# define IR_ASSERT(x) +#endif + +#ifdef _WIN32 +# include +# ifdef _M_X64 +# pragma intrinsic(_BitScanForward64) +# pragma intrinsic(_BitScanReverse64) +# endif +# pragma intrinsic(_BitScanForward) +# pragma intrinsic(_BitScanReverse) +#endif + +#ifdef __has_builtin +# if __has_builtin(__builtin_expect) +# define EXPECTED(condition) __builtin_expect(!!(condition), 1) +# define UNEXPECTED(condition) __builtin_expect(!!(condition), 0) +# endif +# if __has_attribute(__aligned__) +# define IR_SET_ALIGNED(alignment, decl) decl __attribute__ ((__aligned__ (alignment))) +# endif +# if __has_attribute(__fallthrough__) +# define IR_FALLTHROUGH __attribute__((__fallthrough__)) +# endif +#elif defined(_WIN32) +# define IR_SET_ALIGNED(alignment, decl) __declspec(align(alignment)) decl +#else /* GCC prior to 10 or non-clang/msvc compilers */ +#define __has_builtin(x) 0 +#endif +#ifndef EXPECTED +# define EXPECTED(condition) (condition) +# define UNEXPECTED(condition) (condition) +#endif +#ifndef IR_SET_ALIGNED +# define IR_SET_ALIGNED(alignment, decl) decl +#endif +#ifndef IR_FALLTHROUGH +# define IR_FALLTHROUGH ((void)0) +#endif + +/*** Helper routines ***/ + +#define IR_ALIGNED_SIZE(size, alignment) \ + (((size) + ((alignment) - 1)) & ~((alignment) - 1)) + +#define IR_MAX(a, b) (((a) > (b)) ? (a) : (b)) +#define IR_MIN(a, b) (((a) < (b)) ? (a) : (b)) + +#define IR_IS_POWER_OF_TWO(x) (!((x) & ((x) - 1))) + +#define IR_LOG2(x) ir_ntzl(x) + +IR_ALWAYS_INLINE uint8_t ir_rol8(uint8_t op1, uint8_t op2) +{ + return (op1 << op2) | (op1 >> (8 - op2)); +} + +IR_ALWAYS_INLINE uint16_t ir_rol16(uint16_t op1, uint16_t op2) +{ + return (op1 << op2) | (op1 >> (16 - op2)); +} + +IR_ALWAYS_INLINE uint32_t ir_rol32(uint32_t op1, uint32_t op2) +{ + return (op1 << op2) | (op1 >> (32 - op2)); +} + +IR_ALWAYS_INLINE uint64_t ir_rol64(uint64_t op1, uint64_t op2) +{ + return (op1 << op2) | (op1 >> (64 - op2)); +} + +IR_ALWAYS_INLINE uint8_t ir_ror8(uint8_t op1, uint8_t op2) +{ + return (op1 >> op2) | (op1 << (8 - op2)); +} + +IR_ALWAYS_INLINE uint16_t ir_ror16(uint16_t op1, uint16_t op2) +{ + return (op1 >> op2) | (op1 << (16 - op2)); +} + +IR_ALWAYS_INLINE uint32_t ir_ror32(uint32_t op1, uint32_t op2) +{ + return (op1 >> op2) | (op1 << (32 - op2)); +} + +IR_ALWAYS_INLINE uint64_t ir_ror64(uint64_t op1, uint64_t op2) +{ + return (op1 >> op2) | (op1 << (64 - op2)); +} + +/* Number of trailing zero bits (0x01 -> 0; 0x40 -> 6; 0x00 -> LEN) */ +IR_ALWAYS_INLINE uint32_t ir_ntz(uint32_t num) +{ +#if (defined(__GNUC__) || __has_builtin(__builtin_ctz)) + return __builtin_ctz(num); +#elif defined(_WIN32) + uint32_t index; + + if (!_BitScanForward(&index, num)) { + /* undefined behavior */ + return 32; + } + + return index; +#else + int n; + + if (num == 0) return 32; + + n = 1; + if ((num & 0x0000ffff) == 0) {n += 16; num = num >> 16;} + if ((num & 0x000000ff) == 0) {n += 8; num = num >> 8;} + if ((num & 0x0000000f) == 0) {n += 4; num = num >> 4;} + if ((num & 0x00000003) == 0) {n += 2; num = num >> 2;} + return n - (num & 1); +#endif +} + +/* Number of trailing zero bits (0x01 -> 0; 0x40 -> 6; 0x00 -> LEN) */ +IR_ALWAYS_INLINE uint32_t ir_ntzl(uint64_t num) +{ +#if (defined(__GNUC__) || __has_builtin(__builtin_ctzl)) + return __builtin_ctzl(num); +#elif defined(_WIN64) + unsigned long index; + + if (!_BitScanForward64(&index, num)) { + /* undefined behavior */ + return 64; + } + + return (uint32_t) index; +#else + uint32_t n; + + if (num == 0) return 64; + + n = 1; + if ((num & 0xffffffff) == 0) {n += 32; num = num >> 32;} + if ((num & 0x0000ffff) == 0) {n += 16; num = num >> 16;} + if ((num & 0x000000ff) == 0) {n += 8; num = num >> 8;} + if ((num & 0x0000000f) == 0) {n += 4; num = num >> 4;} + if ((num & 0x00000003) == 0) {n += 2; num = num >> 2;} + return n - (uint32_t)(num & 1); +#endif +} + +/* Number of leading zero bits (Undefined for zero) */ +IR_ALWAYS_INLINE int ir_nlz(uint32_t num) +{ +#if (defined(__GNUC__) || __has_builtin(__builtin_clz)) + return __builtin_clz(num); +#elif defined(_WIN32) + uint32_t index; + + if (!_BitScanReverse(&index, num)) { + /* undefined behavior */ + return 32; + } + + return (int) (32 - 1) - index; +#else + uint32_t x; + uint32_t n; + + n = 32; + x = num >> 16; if (x != 0) {n -= 16; num = x;} + x = num >> 8; if (x != 0) {n -= 8; num = x;} + x = num >> 4; if (x != 0) {n -= 4; num = x;} + x = num >> 2; if (x != 0) {n -= 2; num = x;} + x = num >> 1; if (x != 0) return n - 2; + return n - num; +#endif +} + +IR_ALWAYS_INLINE int ir_nlzl(uint64_t num) +{ +#if (defined(__GNUC__) || __has_builtin(__builtin_clzll)) + return __builtin_clzll(num); +#elif defined(_WIN64) + unsigned long index; + + if (!_BitScanReverse64(&index, num)) { + /* undefined behavior */ + return 64; + } + + return (int) (64 - 1) - index; +#else + uint64_t x; + uint32_t n; + + n = 64; + x = num >> 32; if (x != 0) {n -= 32; num = x;} + x = num >> 16; if (x != 0) {n -= 16; num = x;} + x = num >> 8; if (x != 0) {n -= 8; num = x;} + x = num >> 4; if (x != 0) {n -= 4; num = x;} + x = num >> 2; if (x != 0) {n -= 2; num = x;} + x = num >> 1; if (x != 0) return n - 2; + return n - (uint32_t)num; +#endif +} + +/*** Helper data types ***/ + +/* Arena */ +struct _ir_arena { + char *ptr; + char *end; + ir_arena *prev; +}; + +IR_ALWAYS_INLINE ir_arena* ir_arena_create(size_t size) +{ + ir_arena *arena; + + IR_ASSERT(size >= IR_ALIGNED_SIZE(sizeof(ir_arena), 8)); + arena = (ir_arena*)ir_mem_malloc(size); + arena->ptr = (char*) arena + IR_ALIGNED_SIZE(sizeof(ir_arena), 8); + arena->end = (char*) arena + size; + arena->prev = NULL; + return arena; +} + +IR_ALWAYS_INLINE void ir_arena_free(ir_arena *arena) +{ + do { + ir_arena *prev = arena->prev; + ir_mem_free(arena); + arena = prev; + } while (arena); +} + +IR_ALWAYS_INLINE void* ir_arena_alloc(ir_arena **arena_ptr, size_t size) +{ + ir_arena *arena = *arena_ptr; + char *ptr = arena->ptr; + + size = IR_ALIGNED_SIZE(size, 8); + + if (EXPECTED(size <= (size_t)(arena->end - ptr))) { + arena->ptr = ptr + size; + } else { + size_t arena_size = + UNEXPECTED((size + IR_ALIGNED_SIZE(sizeof(ir_arena), 8)) > (size_t)(arena->end - (char*) arena)) ? + (size + IR_ALIGNED_SIZE(sizeof(ir_arena), 8)) : + (size_t)(arena->end - (char*) arena); + ir_arena *new_arena = (ir_arena*)ir_mem_malloc(arena_size); + + ptr = (char*) new_arena + IR_ALIGNED_SIZE(sizeof(ir_arena), 8); + new_arena->ptr = (char*) new_arena + IR_ALIGNED_SIZE(sizeof(ir_arena), 8) + size; + new_arena->end = (char*) new_arena + arena_size; + new_arena->prev = arena; + *arena_ptr = new_arena; + } + + return (void*) ptr; +} + +IR_ALWAYS_INLINE void* ir_arena_checkpoint(ir_arena *arena) +{ + return arena->ptr; +} + +IR_ALWAYS_INLINE void ir_release(ir_arena **arena_ptr, void *checkpoint) +{ + ir_arena *arena = *arena_ptr; + + while (UNEXPECTED((char*)checkpoint > arena->end) || + UNEXPECTED((char*)checkpoint <= (char*)arena)) { + ir_arena *prev = arena->prev; + ir_mem_free(arena); + *arena_ptr = arena = prev; + } + IR_ASSERT((char*)checkpoint > (char*)arena && (char*)checkpoint <= arena->end); + arena->ptr = (char*)checkpoint; +} + +/* Bitsets */ +#if defined(IR_TARGET_X86) +# define IR_BITSET_BITS 32 +# define IR_BITSET_ONE 1U +# define ir_bitset_base_t uint32_t +# define ir_bitset_ntz ir_ntz +#else +# define IR_BITSET_BITS 64 +# ifdef _M_X64 /* MSVC*/ +# define IR_BITSET_ONE 1ui64 +# else +# define IR_BITSET_ONE 1UL +# endif +# define ir_bitset_base_t uint64_t +# define ir_bitset_ntz ir_ntzl +#endif + +typedef ir_bitset_base_t *ir_bitset; + +IR_ALWAYS_INLINE uint32_t ir_bitset_len(uint32_t n) +{ + return (n + (IR_BITSET_BITS - 1)) / IR_BITSET_BITS; +} + +IR_ALWAYS_INLINE ir_bitset ir_bitset_malloc(uint32_t n) +{ + return ir_mem_calloc(ir_bitset_len(n), IR_BITSET_BITS / 8); +} + +IR_ALWAYS_INLINE void ir_bitset_incl(ir_bitset set, uint32_t n) +{ + set[n / IR_BITSET_BITS] |= IR_BITSET_ONE << (n % IR_BITSET_BITS); +} + +IR_ALWAYS_INLINE void ir_bitset_excl(ir_bitset set, uint32_t n) +{ + set[n / IR_BITSET_BITS] &= ~(IR_BITSET_ONE << (n % IR_BITSET_BITS)); +} + +IR_ALWAYS_INLINE bool ir_bitset_in(const ir_bitset set, uint32_t n) +{ + return (set[(n / IR_BITSET_BITS)] & (IR_BITSET_ONE << (n % IR_BITSET_BITS))) != 0; +} + +IR_ALWAYS_INLINE void ir_bitset_clear(ir_bitset set, uint32_t len) +{ + memset(set, 0, len * (IR_BITSET_BITS / 8)); +} + +IR_ALWAYS_INLINE void ir_bitset_fill(ir_bitset set, uint32_t len) +{ + memset(set, 0xff, len * (IR_BITSET_BITS / 8)); +} + +IR_ALWAYS_INLINE bool ir_bitset_empty(const ir_bitset set, uint32_t len) +{ + uint32_t i; + for (i = 0; i < len; i++) { + if (set[i]) { + return 0; + } + } + return 1; +} + +IR_ALWAYS_INLINE bool ir_bitset_equal(const ir_bitset set1, const ir_bitset set2, uint32_t len) +{ + return memcmp(set1, set2, len * (IR_BITSET_BITS / 8)) == 0; +} + +IR_ALWAYS_INLINE void ir_bitset_copy(ir_bitset set1, const ir_bitset set2, uint32_t len) +{ + memcpy(set1, set2, len * (IR_BITSET_BITS / 8)); +} + +IR_ALWAYS_INLINE void ir_bitset_intersection(ir_bitset set1, const ir_bitset set2, uint32_t len) +{ + uint32_t i; + + for (i = 0; i < len; i++) { + set1[i] &= set2[i]; + } +} + +IR_ALWAYS_INLINE void ir_bitset_union(ir_bitset set1, const ir_bitset set2, uint32_t len) +{ + uint32_t i; + + for (i = 0; i < len; i++) { + set1[i] |= set2[i]; + } +} + +IR_ALWAYS_INLINE void ir_bitset_difference(ir_bitset set1, const ir_bitset set2, uint32_t len) +{ + uint32_t i; + + for (i = 0; i < len; i++) { + set1[i] = set1[i] & ~set2[i]; + } +} + +IR_ALWAYS_INLINE bool ir_bitset_is_subset(const ir_bitset set1, const ir_bitset set2, uint32_t len) +{ + uint32_t i; + + for (i = 0; i < len; i++) { + if (set1[i] & ~set2[i]) { + return 0; + } + } + return 1; +} + +IR_ALWAYS_INLINE int ir_bitset_first(const ir_bitset set, uint32_t len) +{ + uint32_t i; + + for (i = 0; i < len; i++) { + if (set[i]) { + return IR_BITSET_BITS * i + ir_bitset_ntz(set[i]); + } + } + return -1; /* empty set */ +} + +IR_ALWAYS_INLINE int ir_bitset_last(const ir_bitset set, uint32_t len) +{ + uint32_t i = len; + + while (i > 0) { + i--; + if (set[i]) { + uint32_t j = IR_BITSET_BITS * i - 1; + ir_bitset_base_t x = set[i]; + do { + x = x >> 1; + j++; + } while (x != 0); + return j; + } + } + return -1; /* empty set */ +} + +IR_ALWAYS_INLINE int ir_bitset_pop_first(ir_bitset set, uint32_t len) +{ + uint32_t i; + + for (i = 0; i < len; i++) { + ir_bitset_base_t x = set[i]; + + if (x) { + int bit = IR_BITSET_BITS * i + ir_bitset_ntz(x); + set[i] = x & (x - 1); + return bit; + } + } + return -1; /* empty set */ +} + +#define IR_BITSET_FOREACH(set, len, bit) do { \ + ir_bitset _set = (set); \ + uint32_t _i, _len = (len); \ + for (_i = 0; _i < _len; _set++, _i++) { \ + ir_bitset_base_t _x = *_set; \ + while (_x) { \ + (bit) = IR_BITSET_BITS * _i + ir_bitset_ntz(_x); \ + _x &= _x - 1; + +#define IR_BITSET_FOREACH_DIFFERENCE(set1, set2, len, bit) do { \ + ir_bitset _set1 = (set1); \ + ir_bitset _set2 = (set2); \ + uint32_t _i, _len = (len); \ + for (_i = 0; _i < _len; _i++) { \ + ir_bitset_base_t _x = _set1[_i] & ~_set2[_i]; \ + while (_x) { \ + (bit) = IR_BITSET_BITS * _i + ir_bitset_ntz(_x); \ + _x &= _x - 1; + +#define IR_BITSET_FOREACH_END() \ + } \ + } \ +} while (0) + +/* Bit Queue */ +typedef struct _ir_bitqueue { + uint32_t len; + uint32_t pos; + ir_bitset set; +} ir_bitqueue; + +IR_ALWAYS_INLINE void ir_bitqueue_init(ir_bitqueue *q, uint32_t n) +{ + q->len = ir_bitset_len(n); + q->pos = q->len - 1; + q->set = ir_bitset_malloc(n); +} + +IR_ALWAYS_INLINE void ir_bitqueue_free(ir_bitqueue *q) +{ + ir_mem_free(q->set); +} + +IR_ALWAYS_INLINE void ir_bitqueue_clear(ir_bitqueue *q) +{ + q->pos = q->len - 1; + ir_bitset_clear(q->set, q->len); +} + +IR_ALWAYS_INLINE int ir_bitqueue_pop(ir_bitqueue *q) +{ + uint32_t i = q->pos; + ir_bitset_base_t x, *p = q->set + i; + do { + x = *p; + if (x) { + int bit = IR_BITSET_BITS * i + ir_bitset_ntz(x); + *p = x & (x - 1); + q->pos = i; + return bit; + } + p++; + i++; + } while (i < q->len); + q->pos = q->len - 1; + return -1; /* empty set */ +} + +IR_ALWAYS_INLINE void ir_bitqueue_add(ir_bitqueue *q, uint32_t n) +{ + uint32_t i = n / IR_BITSET_BITS; + q->set[i] |= IR_BITSET_ONE << (n % IR_BITSET_BITS); + if (i < q->pos) { + q->pos = i; + } +} + +IR_ALWAYS_INLINE void ir_bitqueue_del(ir_bitqueue *q, uint32_t n) +{ + ir_bitset_excl(q->set, n); +} + +IR_ALWAYS_INLINE bool ir_bitqueue_in(const ir_bitqueue *q, uint32_t n) +{ + return ir_bitset_in(q->set, n); +} + +/* Dynamic array of numeric references */ +typedef struct _ir_array { + ir_ref *refs; + uint32_t size; +} ir_array; + +void ir_array_grow(ir_array *a, uint32_t size); +void ir_array_insert(ir_array *a, uint32_t i, ir_ref val); +void ir_array_remove(ir_array *a, uint32_t i); + +IR_ALWAYS_INLINE void ir_array_init(ir_array *a, uint32_t size) +{ + a->refs = ir_mem_malloc(size * sizeof(ir_ref)); + a->size = size; +} + +IR_ALWAYS_INLINE void ir_array_free(ir_array *a) +{ + ir_mem_free(a->refs); + a->refs = NULL; + a->size = 0; +} + +IR_ALWAYS_INLINE uint32_t ir_array_size(const ir_array *a) +{ + return a->size; +} + +IR_ALWAYS_INLINE ir_ref ir_array_get(const ir_array *a, uint32_t i) +{ + return (i < a->size) ? a->refs[i] : IR_UNUSED; +} + +IR_ALWAYS_INLINE ir_ref ir_array_at(const ir_array *a, uint32_t i) +{ + IR_ASSERT(i < a->size); + return a->refs[i]; +} + +IR_ALWAYS_INLINE void ir_array_set(ir_array *a, uint32_t i, ir_ref val) +{ + if (i >= a->size) { + ir_array_grow(a, i + 1); + } + a->refs[i] = val; +} + +IR_ALWAYS_INLINE void ir_array_set_unchecked(ir_array *a, uint32_t i, ir_ref val) +{ + IR_ASSERT(i < a->size); + a->refs[i] = val; +} + +/* List/Stack of numeric references */ +typedef struct _ir_list { + ir_array a; + uint32_t len; +} ir_list; + +bool ir_list_contains(const ir_list *l, ir_ref val); +void ir_list_insert(ir_list *l, uint32_t i, ir_ref val); +void ir_list_remove(ir_list *l, uint32_t i); + +IR_ALWAYS_INLINE void ir_list_init(ir_list *l, uint32_t size) +{ + ir_array_init(&l->a, size); + l->len = 0; +} + +IR_ALWAYS_INLINE void ir_list_free(ir_list *l) +{ + ir_array_free(&l->a); + l->len = 0; +} + +IR_ALWAYS_INLINE void ir_list_clear(ir_list *l) +{ + l->len = 0; +} + +IR_ALWAYS_INLINE uint32_t ir_list_len(const ir_list *l) +{ + return l->len; +} + +IR_ALWAYS_INLINE uint32_t ir_list_capasity(const ir_list *l) +{ + return ir_array_size(&l->a); +} + +IR_ALWAYS_INLINE void ir_list_push(ir_list *l, ir_ref val) +{ + ir_array_set(&l->a, l->len++, val); +} + +IR_ALWAYS_INLINE void ir_list_push_unchecked(ir_list *l, ir_ref val) +{ + ir_array_set_unchecked(&l->a, l->len++, val); +} + +IR_ALWAYS_INLINE ir_ref ir_list_pop(ir_list *l) +{ + IR_ASSERT(l->len > 0); + return ir_array_at(&l->a, --l->len); +} + +IR_ALWAYS_INLINE ir_ref ir_list_peek(const ir_list *l) +{ + IR_ASSERT(l->len > 0); + return ir_array_at(&l->a, l->len - 1); +} + +IR_ALWAYS_INLINE ir_ref ir_list_at(const ir_list *l, uint32_t i) +{ + IR_ASSERT(i < l->len); + return ir_array_at(&l->a, i); +} + +IR_ALWAYS_INLINE void ir_list_set(ir_list *l, uint32_t i, ir_ref val) +{ + IR_ASSERT(i < l->len); + ir_array_set_unchecked(&l->a, i, val); +} + +/* Worklist (unique list) */ +typedef struct _ir_worklist { + ir_list l; + ir_bitset visited; +} ir_worklist; + +IR_ALWAYS_INLINE void ir_worklist_init(ir_worklist *w, uint32_t size) +{ + ir_list_init(&w->l, size); + w->visited = ir_bitset_malloc(size); +} + +IR_ALWAYS_INLINE void ir_worklist_free(ir_worklist *w) +{ + ir_list_free(&w->l); + ir_mem_free(w->visited); +} + +IR_ALWAYS_INLINE uint32_t ir_worklist_len(const ir_worklist *w) +{ + return ir_list_len(&w->l); +} + +IR_ALWAYS_INLINE uint32_t ir_worklist_capasity(const ir_worklist *w) +{ + return ir_list_capasity(&w->l); +} + +IR_ALWAYS_INLINE void ir_worklist_clear(ir_worklist *w) +{ + ir_list_clear(&w->l); + ir_bitset_clear(w->visited, ir_bitset_len(ir_worklist_capasity(w))); +} + +IR_ALWAYS_INLINE bool ir_worklist_push(ir_worklist *w, ir_ref val) +{ + IR_ASSERT(val >= 0 && (uint32_t)val < ir_worklist_capasity(w)); + if (ir_bitset_in(w->visited, val)) { + return 0; + } + ir_bitset_incl(w->visited, val); + IR_ASSERT(ir_list_len(&w->l) < ir_list_capasity(&w->l)); + ir_list_push_unchecked(&w->l, val); + return 1; +} + +IR_ALWAYS_INLINE ir_ref ir_worklist_pop(ir_worklist *w) +{ + return ir_list_pop(&w->l); +} + +IR_ALWAYS_INLINE ir_ref ir_worklist_peek(const ir_worklist *w) +{ + return ir_list_peek(&w->l); +} + +/* IR Hash Table */ +#define IR_INVALID_IDX 0xffffffff +#define IR_INVALID_VAL 0x80000000 + +typedef struct _ir_hashtab_bucket { + uint32_t key; + ir_ref val; + uint32_t next; +} ir_hashtab_bucket; + +typedef struct _ir_hashtab { + void *data; + uint32_t mask; + uint32_t size; + uint32_t count; + uint32_t pos; +} ir_hashtab; + +void ir_hashtab_init(ir_hashtab *tab, uint32_t size); +void ir_hashtab_free(ir_hashtab *tab); +ir_ref ir_hashtab_find(const ir_hashtab *tab, uint32_t key); +bool ir_hashtab_add(ir_hashtab *tab, uint32_t key, ir_ref val); +void ir_hashtab_key_sort(ir_hashtab *tab); + +/* IR Addr Table */ +typedef struct _ir_addrtab_bucket { + uint64_t key; + ir_ref val; + uint32_t next; +} ir_addrtab_bucket; + +void ir_addrtab_init(ir_hashtab *tab, uint32_t size); +void ir_addrtab_free(ir_hashtab *tab); +ir_ref ir_addrtab_find(const ir_hashtab *tab, uint64_t key); +bool ir_addrtab_add(ir_hashtab *tab, uint64_t key, ir_ref val); + +/*** IR OP info ***/ +extern const uint8_t ir_type_flags[IR_LAST_TYPE]; +extern const char *ir_type_name[IR_LAST_TYPE]; +extern const char *ir_type_cname[IR_LAST_TYPE]; +extern const uint8_t ir_type_size[IR_LAST_TYPE]; +extern const uint32_t ir_op_flags[IR_LAST_OP]; +extern const char *ir_op_name[IR_LAST_OP]; + +#define IR_IS_CONST_OP(op) ((op) > IR_NOP && (op) <= IR_C_FLOAT) +#define IR_IS_FOLDABLE_OP(op) ((op) <= IR_LAST_FOLDABLE_OP) + +IR_ALWAYS_INLINE bool ir_const_is_true(const ir_insn *v) +{ + + if (v->type == IR_BOOL) { + return v->val.b; + } else if (IR_IS_TYPE_INT(v->type)) { + return v->val.i64 != 0; + } else if (v->type == IR_DOUBLE) { + return v->val.d != 0.0; + } else { + IR_ASSERT(v->type == IR_FLOAT); + return v->val.f != 0.0; + } + return 0; +} + +IR_ALWAYS_INLINE bool ir_ref_is_true(ir_ctx *ctx, ir_ref ref) +{ + if (ref == IR_TRUE) { + return 1; + } else if (ref == IR_FALSE) { + return 0; + } else { + IR_ASSERT(IR_IS_CONST_REF(ref)); + return ir_const_is_true(&ctx->ir_base[ref]); + } +} + +/* IR OP flags */ +#define IR_OP_FLAG_OPERANDS_SHIFT 3 + +#define IR_OP_FLAG_EDGES_MASK 0x03 +#define IR_OP_FLAG_VAR_INPUTS 0x04 +#define IR_OP_FLAG_OPERANDS_MASK 0x18 +#define IR_OP_FLAG_MEM_MASK ((1<<6)|(1<<7)) + +#define IR_OP_FLAG_DATA (1<<8) +#define IR_OP_FLAG_CONTROL (1<<9) +#define IR_OP_FLAG_MEM (1<<10) +#define IR_OP_FLAG_COMMUTATIVE (1<<11) +#define IR_OP_FLAG_BB_START (1<<12) +#define IR_OP_FLAG_BB_END (1<<13) +#define IR_OP_FLAG_TERMINATOR (1<<14) +#define IR_OP_FLAG_PINNED (1<<15) + +#define IR_OP_FLAG_MEM_LOAD ((0<<6)|(0<<7)) +#define IR_OP_FLAG_MEM_STORE ((0<<6)|(1<<7)) +#define IR_OP_FLAG_MEM_CALL ((1<<6)|(0<<7)) +#define IR_OP_FLAG_MEM_ALLOC ((1<<6)|(1<<7)) +#define IR_OP_FLAG_MEM_MASK ((1<<6)|(1<<7)) + +#define IR_OPND_UNUSED 0x0 +#define IR_OPND_DATA 0x1 +#define IR_OPND_CONTROL 0x2 +#define IR_OPND_CONTROL_DEP 0x3 +#define IR_OPND_CONTROL_REF 0x4 +#define IR_OPND_STR 0x5 +#define IR_OPND_NUM 0x6 +#define IR_OPND_PROB 0x7 + +#define IR_OP_FLAGS(op_flags, op1_flags, op2_flags, op3_flags) \ + ((op_flags) | ((op1_flags) << 20) | ((op2_flags) << 24) | ((op3_flags) << 28)) + +#define IR_INPUT_EDGES_COUNT(flags) (flags & IR_OP_FLAG_EDGES_MASK) +#define IR_OPERANDS_COUNT(flags) ((flags & IR_OP_FLAG_OPERANDS_MASK) >> IR_OP_FLAG_OPERANDS_SHIFT) + +#define IR_OP_HAS_VAR_INPUTS(flags) ((flags) & IR_OP_FLAG_VAR_INPUTS) + +#define IR_OPND_KIND(flags, i) \ + (((flags) >> (16 + (4 * (((i) > 3) ? 3 : (i))))) & 0xf) + +#define IR_IS_REF_OPND_KIND(kind) \ + ((kind) >= IR_OPND_DATA && (kind) <= IR_OPND_CONTROL_REF) + +IR_ALWAYS_INLINE ir_ref ir_operands_count(const ir_ctx *ctx, const ir_insn *insn) +{ + uint32_t flags = ir_op_flags[insn->op]; + uint32_t n = IR_OPERANDS_COUNT(flags); + + if (UNEXPECTED(IR_OP_HAS_VAR_INPUTS(flags))) { + /* MERGE, PHI, CALL, etc */ + n = insn->inputs_count; + } + return n; +} + +IR_ALWAYS_INLINE ir_ref ir_input_edges_count(const ir_ctx *ctx, const ir_insn *insn) +{ + uint32_t flags = ir_op_flags[insn->op]; + uint32_t n = IR_INPUT_EDGES_COUNT(flags); + if (UNEXPECTED(IR_OP_HAS_VAR_INPUTS(flags))) { + /* MERGE, PHI, CALL, etc */ + n = insn->inputs_count; + } + return n; +} + +IR_ALWAYS_INLINE uint32_t ir_insn_inputs_to_len(uint32_t inputs_count) +{ + return 1 + (inputs_count >> 2); +} + +IR_ALWAYS_INLINE uint32_t ir_insn_len(const ir_insn *insn) +{ + return ir_insn_inputs_to_len(insn->inputs_count); +} + +/*** IR Binding ***/ +IR_ALWAYS_INLINE ir_ref ir_binding_find(const ir_ctx *ctx, ir_ref ref) +{ + ir_ref var = ir_hashtab_find(ctx->binding, ref); + return (var != (ir_ref)IR_INVALID_VAL) ? var : 0; +} + +/*** IR Use Lists ***/ +struct _ir_use_list { + ir_ref refs; /* index in ir_ctx->use_edges[] array */ + ir_ref count; +}; + +/*** IR Basic Blocks info ***/ +#define IR_IS_BB_START(op) \ + ((ir_op_flags[op] & IR_OP_FLAG_BB_START) != 0) + +#define IR_IS_BB_MERGE(op) \ + ((op) == IR_MERGE || (op) == IR_LOOP_BEGIN) + +#define IR_IS_BB_END(op) \ + ((ir_op_flags[op] & IR_OP_FLAG_BB_END) != 0) + +#define IR_BB_UNREACHABLE (1<<0) +#define IR_BB_START (1<<1) +#define IR_BB_ENTRY (1<<2) +#define IR_BB_LOOP_HEADER (1<<3) +#define IR_BB_IRREDUCIBLE_LOOP (1<<4) +#define IR_BB_DESSA_MOVES (1<<5) /* translation out of SSA requires MOVEs */ +#define IR_BB_EMPTY (1<<6) +#define IR_BB_PREV_EMPTY_ENTRY (1<<7) +#define IR_BB_OSR_ENTRY_LOADS (1<<8) /* OSR Entry-point with register LOADs */ +#define IR_BB_LOOP_WITH_ENTRY (1<<9) /* set together with LOOP_HEADER if there is an ENTRY in the loop */ + +/* The following flags are set by GCM */ +#define IR_BB_HAS_PHI (1<<10) +#define IR_BB_HAS_PI (1<<11) +#define IR_BB_HAS_PARAM (1<<12) +#define IR_BB_HAS_VAR (1<<13) + + +struct _ir_block { + uint32_t flags; + ir_ref start; /* index of first instruction */ + ir_ref end; /* index of last instruction */ + uint32_t successors; /* index in ir_ctx->cfg_edges[] array */ + uint32_t successors_count; + uint32_t predecessors; /* index in ir_ctx->cfg_edges[] array */ + uint32_t predecessors_count; + union { + uint32_t dom_parent; /* immediate dominator block */ + uint32_t idom; /* immediate dominator block */ + }; + union { + uint32_t dom_depth; /* depth from the root of the dominators tree */ + uint32_t postnum; /* used temporary during tree constructon */ + }; + uint32_t dom_child; /* first dominated blocks */ + uint32_t dom_next_child; /* next dominated block (linked list) */ + uint32_t loop_header; + uint32_t loop_depth; +}; + +uint32_t ir_skip_empty_target_blocks(const ir_ctx *ctx, uint32_t b); +uint32_t ir_skip_empty_next_blocks(const ir_ctx *ctx, uint32_t b); +void ir_get_true_false_blocks(const ir_ctx *ctx, uint32_t b, uint32_t *true_block, uint32_t *false_block, uint32_t *next_block); + +IR_ALWAYS_INLINE uint32_t ir_phi_input_number(const ir_ctx *ctx, const ir_block *bb, uint32_t from) +{ + uint32_t n, *p; + + for (n = 0, p = &ctx->cfg_edges[bb->predecessors]; n < bb->predecessors_count; p++, n++) { + if (*p == from) { + return n + 2; /* first input is a reference to MERGE */ + } + } + IR_ASSERT(0); + return 0; +} + +/*** Folding Engine (see ir.c and ir_fold.h) ***/ +typedef enum _ir_fold_action { + IR_FOLD_DO_RESTART, + IR_FOLD_DO_CSE, + IR_FOLD_DO_EMIT, + IR_FOLD_DO_COPY, + IR_FOLD_DO_CONST +} ir_fold_action; + +ir_ref ir_folding(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3, ir_insn *op1_insn, ir_insn *op2_insn, ir_insn *op3_insn); + +/*** IR Live Info ***/ +typedef ir_ref ir_live_pos; +typedef struct _ir_use_pos ir_use_pos; + +#define IR_SUB_REFS_COUNT 4 + +#define IR_LOAD_SUB_REF 0 +#define IR_USE_SUB_REF 1 +#define IR_DEF_SUB_REF 2 +#define IR_SAVE_SUB_REF 3 + +#define IR_LIVE_POS_TO_REF(pos) ((pos) / IR_SUB_REFS_COUNT) +#define IR_LIVE_POS_TO_SUB_REF(pos) ((pos) % IR_SUB_REFS_COUNT) + +#define IR_LIVE_POS_FROM_REF(ref) ((ref) * IR_SUB_REFS_COUNT) + +#define IR_START_LIVE_POS_FROM_REF(ref) ((ref) * IR_SUB_REFS_COUNT) +#define IR_LOAD_LIVE_POS_FROM_REF(ref) ((ref) * IR_SUB_REFS_COUNT + IR_LOAD_SUB_REF) +#define IR_USE_LIVE_POS_FROM_REF(ref) ((ref) * IR_SUB_REFS_COUNT + IR_USE_SUB_REF) +#define IR_DEF_LIVE_POS_FROM_REF(ref) ((ref) * IR_SUB_REFS_COUNT + IR_DEF_SUB_REF) +#define IR_SAVE_LIVE_POS_FROM_REF(ref) ((ref) * IR_SUB_REFS_COUNT + IR_SAVE_SUB_REF) +#define IR_END_LIVE_POS_FROM_REF(ref) ((ref) * IR_SUB_REFS_COUNT + IR_SUB_REFS_COUNT) + +/* ir_use_pos.flags bits */ +#define IR_USE_MUST_BE_IN_REG (1<<0) +#define IR_USE_SHOULD_BE_IN_REG (1<<1) +#define IR_DEF_REUSES_OP1_REG (1<<2) +#define IR_DEF_CONFLICTS_WITH_INPUT_REGS (1<<3) + +#define IR_FUSED_USE (1<<6) +#define IR_PHI_USE (1<<7) + +#define IR_OP1_MUST_BE_IN_REG (1<<8) +#define IR_OP1_SHOULD_BE_IN_REG (1<<9) +#define IR_OP2_MUST_BE_IN_REG (1<<10) +#define IR_OP2_SHOULD_BE_IN_REG (1<<11) +#define IR_OP3_MUST_BE_IN_REG (1<<12) +#define IR_OP3_SHOULD_BE_IN_REG (1<<13) + +#define IR_USE_FLAGS(def_flags, op_num) (((def_flags) >> (6 + (IR_MIN((op_num), 3) * 2))) & 3) + +struct _ir_use_pos { + uint16_t op_num; /* 0 - means result */ + int8_t hint; + uint8_t flags; + ir_ref hint_ref; /* negative references are used for FUSION anf PHI */ + ir_live_pos pos; + ir_use_pos *next; +}; + +struct _ir_live_range { + ir_live_pos start; /* inclusive */ + ir_live_pos end; /* exclusive */ + ir_live_range *next; +}; + +/* ir_live_interval.flags bits (two low bits are reserved for temporary register number) */ +#define IR_LIVE_INTERVAL_FIXED (1<<0) +#define IR_LIVE_INTERVAL_TEMP (1<<1) +#define IR_LIVE_INTERVAL_HAS_HINT_REGS (1<<2) +#define IR_LIVE_INTERVAL_HAS_HINT_REFS (1<<3) +#define IR_LIVE_INTERVAL_MEM_PARAM (1<<4) +#define IR_LIVE_INTERVAL_MEM_LOAD (1<<5) +#define IR_LIVE_INTERVAL_COALESCED (1<<6) +#define IR_LIVE_INTERVAL_SPILL_SPECIAL (1<<7) /* spill slot is pre-allocated in a special area (see ir_ctx.spill_reserved_base) */ +#define IR_LIVE_INTERVAL_SPILLED (1<<8) +#define IR_LIVE_INTERVAL_SPLIT_CHILD (1<<9) + +struct _ir_live_interval { + uint8_t type; + int8_t reg; + uint16_t flags; + union { + int32_t vreg; + int32_t tmp_ref; + }; + union { + int32_t stack_spill_pos; + ir_ref tmp_op_num; + }; + ir_live_pos end; /* end of the last live range (cahce of ival.range.{next->}end) */ + ir_live_range range; + ir_live_range *current_range; + ir_use_pos *use_pos; + ir_live_interval *next; + ir_live_interval *list_next; /* linked list of active, inactive or unhandled intervals */ +}; + +typedef int (*emit_copy_t)(ir_ctx *ctx, uint8_t type, ir_ref from, ir_ref to); + +int ir_gen_dessa_moves(ir_ctx *ctx, uint32_t b, emit_copy_t emit_copy); + +#if defined(IR_REGSET_64BIT) + +/*typedef enum _ir_reg ir_reg;*/ +typedef int8_t ir_reg; + +/*** Register Sets ***/ +#if IR_REGSET_64BIT +typedef uint64_t ir_regset; +#else +typedef uint32_t ir_regset; +#endif + +#define IR_REGSET_EMPTY 0 + +#define IR_REGSET_IS_EMPTY(regset) \ + (regset == IR_REGSET_EMPTY) + +#define IR_REGSET_IS_SINGLETON(regset) \ + (regset && !(regset & (regset - 1))) + +#if IR_REGSET_64BIT +# define IR_REGSET(reg) \ + (1ull << (reg)) +#else +# define IR_REGSET(reg) \ + (1u << (reg)) +#endif + +#if IR_REGSET_64BIT +# define IR_REGSET_INTERVAL(reg1, reg2) \ + (((1ull << ((reg2) - (reg1) + 1)) - 1) << (reg1)) +#else +# define IR_REGSET_INTERVAL(reg1, reg2) \ + (((1u << ((reg2) - (reg1) + 1)) - 1) << (reg1)) +#endif + +#define IR_REGSET_IN(regset, reg) \ + (((regset) & IR_REGSET(reg)) != 0) + +#define IR_REGSET_INCL(regset, reg) \ + (regset) |= IR_REGSET(reg) + +#define IR_REGSET_EXCL(regset, reg) \ + (regset) &= ~IR_REGSET(reg) + +#define IR_REGSET_UNION(set1, set2) \ + ((set1) | (set2)) + +#define IR_REGSET_INTERSECTION(set1, set2) \ + ((set1) & (set2)) + +#define IR_REGSET_DIFFERENCE(set1, set2) \ + ((set1) & ~(set2)) + +#if IR_REGSET_64BIT +# define IR_REGSET_FIRST(set) ((ir_reg)ir_ntzl(set)) +# define ir_REGSET_LAST(set) ((ir_reg)(ir_nlzl(set)(set)^63)) +#else +# define IR_REGSET_FIRST(set) ((ir_reg)ir_ntz(set)) +# define IR_REGSET_LAST(set) ((ir_reg)(ir_nlz(set)^31)) +#endif + +IR_ALWAYS_INLINE ir_reg ir_regset_pop_first(ir_regset *set) +{ + ir_reg reg; + + IR_ASSERT(!IR_REGSET_IS_EMPTY(*set)); + reg = IR_REGSET_FIRST(*set); + *set = (*set) & ((*set) - 1); + return reg; +} + +#define IR_REGSET_FOREACH(set, reg) \ + do { \ + ir_regset _tmp = (set); \ + while (!IR_REGSET_IS_EMPTY(_tmp)) { \ + reg = ir_regset_pop_first(&_tmp); + +#define IR_REGSET_FOREACH_END() \ + } \ + } while (0) + +#endif /* defined(IR_REGSET_64BIT) */ + +/*** IR Register Allocation ***/ +/* Flags for ctx->regs[][] (low bits are used for register number itself) */ +typedef struct _ir_reg_alloc_data { + int32_t unused_slot_4; + int32_t unused_slot_2; + int32_t unused_slot_1; + ir_live_interval **handled; +} ir_reg_alloc_data; + +int32_t ir_allocate_spill_slot(ir_ctx *ctx, ir_type type, ir_reg_alloc_data *data); + +IR_ALWAYS_INLINE void ir_set_alocated_reg(ir_ctx *ctx, ir_ref ref, int op_num, int8_t reg) +{ + int8_t *regs = ctx->regs[ref]; + + if (op_num > 0) { + /* regs[] is not limited by the declared boundary 4, the real boundary checked below */ + IR_ASSERT(op_num <= IR_MAX(3, ctx->ir_base[ref].inputs_count)); + } + regs[op_num] = reg; +} + +IR_ALWAYS_INLINE int8_t ir_get_alocated_reg(const ir_ctx *ctx, ir_ref ref, int op_num) +{ + int8_t *regs = ctx->regs[ref]; + + /* regs[] is not limited by the declared boundary 4, the real boundary checked below */ + IR_ASSERT(op_num <= IR_MAX(3, ctx->ir_base[ref].inputs_count)); + return regs[op_num]; +} + +/*** IR Target Interface ***/ + +/* ctx->rules[] flags */ +#define IR_FUSED (1U<<31) /* Insn is fused into others (code is generated as part of the fusion root) */ +#define IR_SKIPPED (1U<<30) /* Insn is skipped (code is not generated) */ +#define IR_SIMPLE (1U<<29) /* Insn doesn't have any target constraints */ + +#define IR_RULE_MASK 0xff + +extern const char *ir_rule_name[]; + +typedef struct _ir_target_constraints ir_target_constraints; + +#define IR_TMP_REG(_num, _type, _start, _end) \ + (ir_tmp_reg){.num=(_num), .type=(_type), .start=(_start), .end=(_end)} +#define IR_SCRATCH_REG(_reg, _start, _end) \ + (ir_tmp_reg){.reg=(_reg), .type=IR_VOID, .start=(_start), .end=(_end)} + +int ir_get_target_constraints(const ir_ctx *ctx, ir_ref ref, ir_target_constraints *constraints); + +void ir_fix_stack_frame(ir_ctx *ctx); + +/* Utility */ +ir_type ir_get_return_type(ir_ctx *ctx); +bool ir_is_fastcall(const ir_ctx *ctx, const ir_insn *insn); +bool ir_is_vararg(const ir_ctx *ctx, ir_insn *insn); + +//#define IR_BITSET_LIVENESS + +#endif /* IR_PRIVATE_H */ diff --git a/ext/opcache/jit/ir/ir_ra.c b/ext/opcache/jit/ir/ir_ra.c new file mode 100644 index 0000000000000..d3b9ac134a91c --- /dev/null +++ b/ext/opcache/jit/ir/ir_ra.c @@ -0,0 +1,3870 @@ +/* + * IR - Lightweight JIT Compilation Framework + * (RA - Register Allocation, Liveness, Coalescing, SSA Deconstruction) + * Copyright (C) 2022 Zend by Perforce. + * Authors: Dmitry Stogov + * + * See: "Linear Scan Register Allocation on SSA Form", Christian Wimmer and + * Michael Franz, CGO'10 (2010) + * See: "Optimized Interval Splitting in a Linear Scan Register Allocator", + * Christian Wimmer VEE'10 (2005) + */ + +#ifndef _GNU_SOURCE +# define _GNU_SOURCE +#endif + +#include +#include "ir.h" + +#if defined(IR_TARGET_X86) || defined(IR_TARGET_X64) +# include "ir_x86.h" +#elif defined(IR_TARGET_AARCH64) +# include "ir_aarch64.h" +#else +# error "Unknown IR target" +#endif + +#include "ir_private.h" + +int ir_regs_number(void) +{ + return IR_REG_NUM; +} + +bool ir_reg_is_int(int32_t reg) +{ + IR_ASSERT(reg >= 0 && reg < IR_REG_NUM); + return reg >= IR_REG_GP_FIRST && reg <= IR_REG_GP_LAST; +} + +static int ir_assign_virtual_registers_slow(ir_ctx *ctx) +{ + uint32_t *vregs; + uint32_t vregs_count = 0; + uint32_t b; + ir_ref i, n; + ir_block *bb; + ir_insn *insn; + uint32_t flags; + + /* Assign unique virtual register to each data node */ + vregs = ir_mem_calloc(ctx->insns_count, sizeof(ir_ref)); + n = 1; + for (b = 1, bb = ctx->cfg_blocks + b; b <= ctx->cfg_blocks_count; b++, bb++) { + IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); + i = bb->start; + + /* skip first instruction */ + insn = ctx->ir_base + i; + n = ir_insn_len(insn); + i += n; + insn += n; + while (i < bb->end) { + flags = ir_op_flags[insn->op]; + if (((flags & IR_OP_FLAG_DATA) && insn->op != IR_VAR && (insn->op != IR_PARAM || ctx->use_lists[i].count > 0)) + || ((flags & IR_OP_FLAG_MEM) && ctx->use_lists[i].count > 1)) { + if (!ctx->rules || !(ctx->rules[i] & (IR_FUSED|IR_SKIPPED))) { + vregs[i] = ++vregs_count; + } + } + n = ir_insn_len(insn); + i += n; + insn += n; + } + } + ctx->vregs_count = vregs_count; + ctx->vregs = vregs; + + return 1; +} + +int ir_assign_virtual_registers(ir_ctx *ctx) +{ + uint32_t *vregs; + uint32_t vregs_count = 0; + ir_ref i; + ir_insn *insn; + + if (!ctx->rules) { + return ir_assign_virtual_registers_slow(ctx); + } + + /* Assign unique virtual register to each rule that needs it */ + vregs = ir_mem_malloc(ctx->insns_count * sizeof(ir_ref)); + + for (i = 1, insn = &ctx->ir_base[1]; i < ctx->insns_count; i++, insn++) { + uint32_t v = 0; + + if (ctx->rules[i] && !(ctx->rules[i] & (IR_FUSED|IR_SKIPPED))) { + uint32_t flags = ir_op_flags[insn->op]; + + if ((flags & IR_OP_FLAG_DATA) + || ((flags & IR_OP_FLAG_MEM) && ctx->use_lists[i].count > 1)) { + v = ++vregs_count; + } + } + vregs[i] = v; + } + + ctx->vregs_count = vregs_count; + ctx->vregs = vregs; + + return 1; +} + +/* Lifetime intervals construction */ + +static ir_live_interval *ir_new_live_range(ir_ctx *ctx, int v, ir_live_pos start, ir_live_pos end) +{ + ir_live_interval *ival = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval)); + + ival->type = IR_VOID; + ival->reg = IR_REG_NONE; + ival->flags = 0; + ival->vreg = v; + ival->stack_spill_pos = -1; // not allocated + ival->range.start = start; + ival->range.end = ival->end = end; + ival->range.next = NULL; + ival->use_pos = NULL; + ival->next = NULL; + + ctx->live_intervals[v] = ival; + return ival; +} + +static ir_live_interval *ir_add_live_range(ir_ctx *ctx, int v, ir_live_pos start, ir_live_pos end) +{ + ir_live_interval *ival = ctx->live_intervals[v]; + ir_live_range *p, *q; + + if (!ival) { + return ir_new_live_range(ctx, v, start, end); + } + + p = &ival->range; + if (end >= p->start) { + ir_live_range *prev = NULL; + + do { + if (p->end >= start) { + if (start < p->start) { + p->start = start; + } + if (end > p->end) { + /* merge with next */ + ir_live_range *next = p->next; + + p->end = end; + while (next && p->end >= next->start) { + if (next->end > p->end) { + p->end = next->end; + } + p->next = next->next; + /* remember in the "unused_ranges" list */ + next->next = ctx->unused_ranges; + ctx->unused_ranges = next; + next = p->next; + } + if (!p->next) { + ival->end = p->end; + } + } + return ival; + } + prev = p; + p = prev->next; + } while (p && end >= p->start); + if (!p) { + ival->end = end; + } + if (prev) { + if (ctx->unused_ranges) { + /* reuse */ + q = ctx->unused_ranges; + ctx->unused_ranges = q->next; + } else { + q = ir_arena_alloc(&ctx->arena, sizeof(ir_live_range)); + } + prev->next = q; + q->start = start; + q->end = end; + q->next = p; + return ival; + } + } + + if (ctx->unused_ranges) { + /* reuse */ + q = ctx->unused_ranges; + ctx->unused_ranges = q->next; + } else { + q = ir_arena_alloc(&ctx->arena, sizeof(ir_live_range)); + } + q->start = p->start; + q->end = p->end; + q->next = p->next; + p->start = start; + p->end = end; + p->next = q; + return ival; +} + +IR_ALWAYS_INLINE ir_live_interval *ir_add_prev_live_range(ir_ctx *ctx, int v, ir_live_pos start, ir_live_pos end) +{ + ir_live_interval *ival = ctx->live_intervals[v]; + + if (ival && ival->range.start == end) { + ival->range.start = start; + return ival; + } + return ir_add_live_range(ctx, v, start, end); +} + +static void ir_add_fixed_live_range(ir_ctx *ctx, ir_reg reg, ir_live_pos start, ir_live_pos end) +{ + int v = ctx->vregs_count + 1 + reg; + ir_live_interval *ival = ctx->live_intervals[v]; + ir_live_range *q; + + if (!ival) { + ival = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval)); + ival->type = IR_VOID; + ival->reg = reg; + ival->flags = IR_LIVE_INTERVAL_FIXED; + ival->vreg = v; + ival->stack_spill_pos = -1; // not allocated + ival->range.start = start; + ival->range.end = ival->end = end; + ival->range.next = NULL; + ival->use_pos = NULL; + ival->next = NULL; + + ctx->live_intervals[v] = ival; + } else if (EXPECTED(end < ival->range.start)) { + if (ctx->unused_ranges) { + /* reuse */ + q = ctx->unused_ranges; + ctx->unused_ranges = q->next; + } else { + q = ir_arena_alloc(&ctx->arena, sizeof(ir_live_range)); + } + + q->start = ival->range.start; + q->end = ival->range.end; + q->next = ival->range.next; + ival->range.start = start; + ival->range.end = end; + ival->range.next = q; + } else if (end == ival->range.start) { + ival->range.start = start; + } else { + ir_add_live_range(ctx, v, start, end); + } +} + +static void ir_add_tmp(ir_ctx *ctx, ir_ref ref, ir_ref tmp_ref, int32_t tmp_op_num, ir_tmp_reg tmp_reg) +{ + ir_live_interval *ival = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval)); + + ival->type = tmp_reg.type; + ival->reg = IR_REG_NONE; + ival->flags = IR_LIVE_INTERVAL_TEMP; + ival->tmp_ref = tmp_ref; + ival->tmp_op_num = tmp_op_num; + ival->range.start = IR_START_LIVE_POS_FROM_REF(ref) + tmp_reg.start; + ival->range.end = ival->end = IR_START_LIVE_POS_FROM_REF(ref) + tmp_reg.end; + ival->range.next = NULL; + ival->use_pos = NULL; + + if (!ctx->live_intervals[0]) { + ival->next = NULL; + ctx->live_intervals[0] = ival; + } else if (ival->range.start >= ctx->live_intervals[0]->range.start) { + ir_live_interval *prev = ctx->live_intervals[0]; + + while (prev->next && ival->range.start >= prev->next->range.start) { + prev = prev->next; + } + ival->next = prev->next; + prev->next = ival; + } else { + ir_live_interval *next = ctx->live_intervals[0]; + + ival->next = next; + ctx->live_intervals[0] = ival; + } + return; +} + +static bool ir_has_tmp(ir_ctx *ctx, ir_ref ref, int32_t op_num) +{ + ir_live_interval *ival = ctx->live_intervals[0]; + + if (ival) { + while (ival && IR_LIVE_POS_TO_REF(ival->range.start) <= ref) { + if (ival->tmp_ref == ref && ival->tmp_op_num == op_num) { + return 1; + } + ival = ival->next; + } + } + return 0; +} + +static ir_live_interval *ir_fix_live_range(ir_ctx *ctx, int v, ir_live_pos old_start, ir_live_pos new_start) +{ + ir_live_interval *ival = ctx->live_intervals[v]; + ir_live_range *p = &ival->range; + +#if 0 + while (p && p->start < old_start) { + p = p->next; + } +#endif + IR_ASSERT(ival && p->start == old_start); + p->start = new_start; + return ival; +} + +static void ir_add_use_pos(ir_ctx *ctx, ir_live_interval *ival, ir_use_pos *use_pos) +{ + ir_use_pos *p = ival->use_pos; + + if (EXPECTED(!p || p->pos > use_pos->pos)) { + use_pos->next = p; + ival->use_pos = use_pos; + } else { + ir_use_pos *prev; + + do { + prev = p; + p = p->next; + } while (p && p->pos < use_pos->pos); + + use_pos->next = prev->next; + prev->next = use_pos; + } +} + + +IR_ALWAYS_INLINE void ir_add_use(ir_ctx *ctx, ir_live_interval *ival, int op_num, ir_live_pos pos, ir_reg hint, uint8_t use_flags, ir_ref hint_ref) +{ + ir_use_pos *use_pos; + + use_pos = ir_arena_alloc(&ctx->arena, sizeof(ir_use_pos)); + use_pos->op_num = op_num; + use_pos->hint = hint; + use_pos->flags = use_flags; + use_pos->hint_ref = hint_ref; + use_pos->pos = pos; + + if (hint != IR_REG_NONE) { + ival->flags |= IR_LIVE_INTERVAL_HAS_HINT_REGS; + } + if (hint_ref > 0) { + ival->flags |= IR_LIVE_INTERVAL_HAS_HINT_REFS; + } + + ir_add_use_pos(ctx, ival, use_pos); +} + +static void ir_add_phi_use(ir_ctx *ctx, ir_live_interval *ival, int op_num, ir_live_pos pos, ir_ref phi_ref) +{ + ir_use_pos *use_pos; + + IR_ASSERT(phi_ref > 0); + use_pos = ir_arena_alloc(&ctx->arena, sizeof(ir_use_pos)); + use_pos->op_num = op_num; + use_pos->hint = IR_REG_NONE; + use_pos->flags = IR_PHI_USE | IR_USE_SHOULD_BE_IN_REG; // TODO: ??? + use_pos->hint_ref = -phi_ref; + use_pos->pos = pos; + + ir_add_use_pos(ctx, ival, use_pos); +} + +static void ir_add_hint(ir_ctx *ctx, ir_ref ref, ir_live_pos pos, ir_reg hint) +{ + ir_live_interval *ival = ctx->live_intervals[ctx->vregs[ref]]; + + if (!(ival->flags & IR_LIVE_INTERVAL_HAS_HINT_REGS)) { + ir_use_pos *use_pos = ival->use_pos; + + while (use_pos) { + if (use_pos->pos == pos) { + if (use_pos->hint == IR_REG_NONE) { + use_pos->hint = hint; + ival->flags |= IR_LIVE_INTERVAL_HAS_HINT_REGS; + } + } + use_pos = use_pos->next; + } + } +} + +static void ir_hint_propagation(ir_ctx *ctx) +{ + int i; + ir_live_interval *ival; + ir_use_pos *use_pos; + ir_use_pos *hint_use_pos; + + for (i = ctx->vregs_count; i > 0; i--) { + ival = ctx->live_intervals[i]; + if (ival + && (ival->flags & (IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS)) == (IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS)) { + use_pos = ival->use_pos; + hint_use_pos = NULL; + while (use_pos) { + if (use_pos->op_num == 0) { + if (use_pos->hint_ref > 0) { + hint_use_pos = use_pos; + } + } else if (use_pos->hint != IR_REG_NONE) { + if (hint_use_pos) { + ir_add_hint(ctx, hint_use_pos->hint_ref, hint_use_pos->pos, use_pos->hint); + hint_use_pos = NULL; + } + } + use_pos = use_pos->next; + } + } + } +} + +#ifdef IR_BITSET_LIVENESS +/* DFS + Loop-Forest livness for SSA using bitset(s) */ +static void ir_add_osr_entry_loads(ir_ctx *ctx, ir_block *bb, ir_bitset live, uint32_t len, uint32_t b) +{ + bool ok = 1; + int count = 0; + ir_list *list = (ir_list*)ctx->osr_entry_loads; + ir_ref i; + + IR_BITSET_FOREACH(live, len, i) { + /* Skip live references from ENTRY to PARAM. TODO: duplicate PARAM in each ENTRY ??? */ + ir_use_pos *use_pos = ctx->live_intervals[i]->use_pos; + ir_ref ref = (use_pos->hint_ref < 0) ? -use_pos->hint_ref : IR_LIVE_POS_TO_REF(use_pos->pos); + + if (use_pos->op_num) { + ir_ref *ops = ctx->ir_base[ref].ops; + ref = ops[use_pos->op_num]; + } + + if (ctx->ir_base[ref].op == IR_PARAM) { + continue; + } + if (ctx->binding) { + ir_ref var = ir_binding_find(ctx, ref); + if (var < 0) { + /* We may load the value at OSR entry-point */ + if (!count) { + bb->flags &= ~IR_BB_EMPTY; + bb->flags |= IR_BB_OSR_ENTRY_LOADS; + if (!ctx->osr_entry_loads) { + list = ctx->osr_entry_loads = ir_mem_malloc(sizeof(ir_list)); + ir_list_init(list, 16); + } + ir_list_push(list, b); + ir_list_push(list, 0); + } + ir_list_push(list, ref); + count++; + continue; + } + } + fprintf(stderr, "ENTRY %d (block %d start %d) - live var %d\n", ctx->ir_base[bb->start].op2, b, bb->start, ref); + ok = 0; + } IR_BITSET_FOREACH_END(); + + if (!ok) { + IR_ASSERT(0); + } + if (count) { + ir_list_set(list, ir_list_len(ctx->osr_entry_loads) - (count + 1), count); + +#if 0 + /* ENTRY "clobbers" all registers */ + ir_ref ref = ctx->ir_base[bb->start].op1; + ir_add_fixed_live_range(ctx, IR_REG_ALL, + IR_DEF_LIVE_POS_FROM_REF(ref), + IR_SAVE_LIVE_POS_FROM_REF(ref)); +#endif + } +} + +static void ir_add_fusion_ranges(ir_ctx *ctx, ir_ref ref, ir_ref input, ir_block *bb, ir_bitset live) +{ + ir_ref stack[4]; + int stack_pos = 0; + ir_target_constraints constraints; + ir_insn *insn; + uint32_t j, n, flags, def_flags; + ir_ref *p, child; + uint8_t use_flags; + ir_reg reg; + ir_live_pos use_pos; + ir_live_interval *ival; + + while (1) { + IR_ASSERT(input > 0 && ctx->rules[input] & IR_FUSED); + + if (!(ctx->rules[input] & IR_SIMPLE)) { + def_flags = ir_get_target_constraints(ctx, input, &constraints); + n = constraints.tmps_count; + while (n > 0) { + n--; + if (constraints.tmp_regs[n].type) { + ir_add_tmp(ctx, ref, input, constraints.tmp_regs[n].num, constraints.tmp_regs[n]); + } else { + /* CPU specific constraints */ + ir_add_fixed_live_range(ctx, constraints.tmp_regs[n].reg, + IR_START_LIVE_POS_FROM_REF(ref) + constraints.tmp_regs[n].start, + IR_START_LIVE_POS_FROM_REF(ref) + constraints.tmp_regs[n].end); + } + } + } else { + def_flags = IR_OP1_MUST_BE_IN_REG | IR_OP2_MUST_BE_IN_REG | IR_OP3_MUST_BE_IN_REG; + constraints.hints_count = 0; + } + + insn = &ctx->ir_base[input]; + flags = ir_op_flags[insn->op]; + n = IR_INPUT_EDGES_COUNT(flags); + j = 1; + p = insn->ops + j; + if (flags & IR_OP_FLAG_CONTROL) { + j++; + p++; + } + for (; j <= n; j++, p++) { + IR_ASSERT(IR_OPND_KIND(flags, j) == IR_OPND_DATA); + child = *p; + if (child > 0) { + uint32_t v = ctx->vregs[child]; + + if (v) { + use_flags = IR_FUSED_USE | IR_USE_FLAGS(def_flags, j); + reg = (j < constraints.hints_count) ? constraints.hints[j] : IR_REG_NONE; + use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref); + if (EXPECTED(reg == IR_REG_NONE)) { + use_pos += IR_USE_SUB_REF; + } + + if (!ir_bitset_in(live, v)) { + /* live.add(opd) */ + ir_bitset_incl(live, v); + /* intervals[opd].addRange(b.from, op.id) */ + ival = ir_add_live_range(ctx, v, + IR_START_LIVE_POS_FROM_REF(bb->start), use_pos); + } else { + ival = ctx->live_intervals[v]; + } + ir_add_use(ctx, ival, j, use_pos, reg, use_flags, -input); + } else if (ctx->rules[child] & IR_FUSED) { + IR_ASSERT(stack_pos < (int)(sizeof(stack)/sizeof(stack_pos))); + stack[stack_pos++] = child; + } else if (ctx->rules[child] == (IR_SKIPPED|IR_RLOAD)) { + ir_set_alocated_reg(ctx, input, j, ctx->ir_base[child].op2); + } + } + } + if (!stack_pos) { + break; + } + input = stack[--stack_pos]; + } +} + +int ir_compute_live_ranges(ir_ctx *ctx) +{ + uint32_t b, i, j, k, n, succ, *p; + ir_ref ref; + uint32_t len; + ir_insn *insn; + ir_block *bb, *succ_bb; +#ifdef IR_DEBUG + ir_bitset visited; +#endif + ir_bitset live, bb_live; + ir_bitset loops = NULL; + ir_bitqueue queue; + ir_live_interval *ival; + + if (!(ctx->flags & IR_LINEAR) || !ctx->vregs) { + return 0; + } + + if (ctx->rules) { + ctx->regs = ir_mem_malloc(sizeof(ir_regs) * ctx->insns_count); + memset(ctx->regs, IR_REG_NONE, sizeof(ir_regs) * ctx->insns_count); + } + + /* Root of the list of IR_VARs */ + ctx->vars = IR_UNUSED; + + /* Compute Live Ranges */ + ctx->flags &= ~IR_LR_HAVE_DESSA_MOVES; + len = ir_bitset_len(ctx->vregs_count + 1); + bb_live = ir_mem_malloc((ctx->cfg_blocks_count + 1) * len * sizeof(ir_bitset_base_t)); + + /* vregs + tmp + fixed + SRATCH + ALL */ + ctx->live_intervals = ir_mem_calloc(ctx->vregs_count + 1 + IR_REG_NUM + 2, sizeof(ir_live_interval*)); + +#ifdef IR_DEBUG + visited = ir_bitset_malloc(ctx->cfg_blocks_count + 1); +#endif + + if (!ctx->arena) { + ctx->arena = ir_arena_create(16 * 1024); + } + + /* for each basic block in reverse order */ + for (b = ctx->cfg_blocks_count; b > 0; b--) { + bb = &ctx->cfg_blocks[b]; + IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); + /* for each successor of b */ + +#ifdef IR_DEBUG + ir_bitset_incl(visited, b); +#endif + live = bb_live + (len * b); + n = bb->successors_count; + if (n == 0) { + ir_bitset_clear(live, len); + } else { + p = &ctx->cfg_edges[bb->successors]; + succ = *p; + +#ifdef IR_DEBUG + /* blocks must be ordered where all dominators of a block are before this block */ + IR_ASSERT(ir_bitset_in(visited, succ) || bb->loop_header == succ); +#endif + + /* live = union of successors.liveIn */ + if (EXPECTED(succ > b) && EXPECTED(!(ctx->cfg_blocks[succ].flags & IR_BB_ENTRY))) { + ir_bitset_copy(live, bb_live + (len * succ), len); + } else { + IR_ASSERT(succ > b || (ctx->cfg_blocks[succ].flags & IR_BB_LOOP_HEADER)); + ir_bitset_clear(live, len); + } + if (n > 1) { + for (p++, n--; n > 0; p++, n--) { + succ = *p; + if (EXPECTED(succ > b) && EXPECTED(!(ctx->cfg_blocks[succ].flags & IR_BB_ENTRY))) { + ir_bitset_union(live, bb_live + (len * succ), len); + } else { + IR_ASSERT(succ > b || (ctx->cfg_blocks[succ].flags & IR_BB_LOOP_HEADER)); + } + } + } + + /* for each opd in live */ + IR_BITSET_FOREACH(live, len, i) { + /* intervals[opd].addRange(b.from, b.to) */ + ir_add_prev_live_range(ctx, i, + IR_START_LIVE_POS_FROM_REF(bb->start), + IR_END_LIVE_POS_FROM_REF(bb->end)); + } IR_BITSET_FOREACH_END(); + } + + if (bb->successors_count == 1) { + /* for each phi function phi of successor */ + succ = ctx->cfg_edges[bb->successors]; + succ_bb = &ctx->cfg_blocks[succ]; + if (succ_bb->flags & IR_BB_HAS_PHI) { + ir_use_list *use_list = &ctx->use_lists[succ_bb->start]; + + k = ir_phi_input_number(ctx, succ_bb, b); + IR_ASSERT(k != 0); + for (ref = 0; ref < use_list->count; ref++) { + ir_ref use = ctx->use_edges[use_list->refs + ref]; + insn = &ctx->ir_base[use]; + if (insn->op == IR_PHI) { + ir_ref input = ir_insn_op(insn, k); + if (input > 0) { + uint32_t v = ctx->vregs[input]; + + /* live.add(phi.inputOf(b)) */ + IR_ASSERT(v); + ir_bitset_incl(live, v); + /* intervals[phi.inputOf(b)].addRange(b.from, b.to) */ + ival = ir_add_prev_live_range(ctx, v, + IR_START_LIVE_POS_FROM_REF(bb->start), + IR_END_LIVE_POS_FROM_REF(bb->end)); + ir_add_phi_use(ctx, ival, k, IR_DEF_LIVE_POS_FROM_REF(bb->end), use); + } + } + } + } + } + + /* for each operation op of b in reverse order */ + ref = bb->end; + insn = &ctx->ir_base[ref]; + if (insn->op == IR_END || insn->op == IR_LOOP_END) { + ref = ctx->prev_ref[ref]; + } + for (; ref > bb->start; ref = ctx->prev_ref[ref]) { + uint32_t def_flags; + uint32_t flags; + ir_ref *p; + ir_target_constraints constraints; + uint32_t v; + + if (ctx->rules) { + int n; + + if (ctx->rules[ref] & (IR_FUSED|IR_SKIPPED)) { + if (ctx->rules[ref] == (IR_SKIPPED|IR_VAR) && ctx->use_lists[ref].count > 0) { + insn = &ctx->ir_base[ref]; + insn->op3 = ctx->vars; + ctx->vars = ref; + } + continue; + } + + def_flags = ir_get_target_constraints(ctx, ref, &constraints); + n = constraints.tmps_count; + while (n > 0) { + n--; + if (constraints.tmp_regs[n].type) { + ir_add_tmp(ctx, ref, ref, constraints.tmp_regs[n].num, constraints.tmp_regs[n]); + } else { + /* CPU specific constraints */ + ir_add_fixed_live_range(ctx, constraints.tmp_regs[n].reg, + IR_START_LIVE_POS_FROM_REF(ref) + constraints.tmp_regs[n].start, + IR_START_LIVE_POS_FROM_REF(ref) + constraints.tmp_regs[n].end); + } + } + } else { + def_flags = 0; + constraints.def_reg = IR_REG_NONE; + constraints.hints_count = 0; + } + + insn = &ctx->ir_base[ref]; + v = ctx->vregs[ref]; + if (v) { + IR_ASSERT(ir_bitset_in(live, v)); + + if (insn->op != IR_PHI) { + ir_live_pos def_pos; + ir_ref hint_ref = 0; + ir_reg reg = constraints.def_reg; + + if (reg != IR_REG_NONE) { + def_pos = IR_SAVE_LIVE_POS_FROM_REF(ref); + if (insn->op == IR_PARAM || insn->op == IR_RLOAD) { + /* parameter register must be kept before it's copied */ + ir_add_fixed_live_range(ctx, reg, IR_START_LIVE_POS_FROM_REF(bb->start), def_pos); + } + } else if (def_flags & IR_DEF_REUSES_OP1_REG) { + if (!IR_IS_CONST_REF(insn->op1) && ctx->vregs[insn->op1]) { + hint_ref = insn->op1; + } + def_pos = IR_LOAD_LIVE_POS_FROM_REF(ref); + } else if (def_flags & IR_DEF_CONFLICTS_WITH_INPUT_REGS) { + def_pos = IR_LOAD_LIVE_POS_FROM_REF(ref); + } else { + if (insn->op == IR_PARAM) { + /* We may reuse parameter stack slot for spilling */ + ctx->live_intervals[v]->flags |= IR_LIVE_INTERVAL_MEM_PARAM; + } else if (insn->op == IR_VLOAD) { + /* Load may be fused into the usage instruction */ + ctx->live_intervals[v]->flags |= IR_LIVE_INTERVAL_MEM_LOAD; + } + def_pos = IR_DEF_LIVE_POS_FROM_REF(ref); + } + /* live.remove(opd) */ + ir_bitset_excl(live, v); + /* intervals[opd].setFrom(op.id) */ + ival = ir_fix_live_range(ctx, v, + IR_START_LIVE_POS_FROM_REF(bb->start), def_pos); + ival->type = insn->type; + ir_add_use(ctx, ival, 0, def_pos, reg, def_flags, hint_ref); + } else { + /* live.remove(opd) */ + ir_bitset_excl(live, v); + /* PHIs inputs must not be processed */ + ival = ctx->live_intervals[v]; + if (UNEXPECTED(!ival)) { + /* Dead PHI */ + ival = ir_add_live_range(ctx, v, IR_DEF_LIVE_POS_FROM_REF(ref), IR_USE_LIVE_POS_FROM_REF(ref)); + } + ival->type = insn->type; + ir_add_use(ctx, ival, 0, IR_DEF_LIVE_POS_FROM_REF(ref), IR_REG_NONE, IR_USE_SHOULD_BE_IN_REG, 0); + continue; + } + } + + IR_ASSERT(insn->op != IR_PHI && (!ctx->rules || !(ctx->rules[ref] & (IR_FUSED|IR_SKIPPED)))); + flags = ir_op_flags[insn->op]; + j = 1; + p = insn->ops + 1; + if (flags & (IR_OP_FLAG_CONTROL|IR_OP_FLAG_MEM|IR_OP_FLAG_PINNED)) { + j++; + p++; + } + for (; j <= insn->inputs_count; j++, p++) { + ir_ref input = *p; + ir_reg reg = (j < constraints.hints_count) ? constraints.hints[j] : IR_REG_NONE; + ir_live_pos use_pos; + ir_ref hint_ref = 0; + uint32_t v; + + if (input > 0) { + v = ctx->vregs[input]; + if (v) { + use_pos = IR_USE_LIVE_POS_FROM_REF(ref); + if (reg != IR_REG_NONE) { + use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref); + ir_add_fixed_live_range(ctx, reg, use_pos, use_pos + IR_USE_SUB_REF); + } else if (def_flags & IR_DEF_REUSES_OP1_REG) { + if (j == 1) { + use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref); + IR_ASSERT(ctx->vregs[ref]); + hint_ref = ref; + } else if (input == insn->op1) { + /* Input is the same as "op1" */ + use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref); + } + } + if (!ir_bitset_in(live, v)) { + /* live.add(opd) */ + ir_bitset_incl(live, v); + /* intervals[opd].addRange(b.from, op.id) */ + ival = ir_add_live_range(ctx, v, IR_START_LIVE_POS_FROM_REF(bb->start), use_pos); + } else { + ival = ctx->live_intervals[v]; + } + ir_add_use(ctx, ival, j, use_pos, reg, IR_USE_FLAGS(def_flags, j), hint_ref); + } else if (ctx->rules) { + if (ctx->rules[input] & IR_FUSED) { + ir_add_fusion_ranges(ctx, ref, input, bb, live); + } else if (ctx->rules[input] == (IR_SKIPPED|IR_RLOAD)) { + ir_set_alocated_reg(ctx, ref, j, ctx->ir_base[input].op2); + } + } + } else if (reg != IR_REG_NONE) { + use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref); + ir_add_fixed_live_range(ctx, reg, use_pos, use_pos + IR_USE_SUB_REF); + } + } + } + + /* if b is loop header */ + if ((bb->flags & IR_BB_LOOP_HEADER) + && !ir_bitset_empty(live, len)) { + /* variables live at loop header are alive at the whole loop body */ + uint32_t bb_set_len = ir_bitset_len(ctx->cfg_blocks_count + 1); + uint32_t child; + ir_block *child_bb; + ir_bitset child_live_in; + + if (!loops) { + loops = ir_bitset_malloc(ctx->cfg_blocks_count + 1); + ir_bitqueue_init(&queue, ctx->cfg_blocks_count + 1); + } else { + ir_bitset_clear(loops, bb_set_len); + ir_bitqueue_clear(&queue); + } + ir_bitset_incl(loops, b); + child = b; + do { + child_bb = &ctx->cfg_blocks[child]; + child_live_in = bb_live + (len * child); + + IR_BITSET_FOREACH(live, len, i) { + ir_bitset_incl(child_live_in, i); + ir_add_live_range(ctx, i, + IR_START_LIVE_POS_FROM_REF(child_bb->start), + IR_END_LIVE_POS_FROM_REF(child_bb->end)); + } IR_BITSET_FOREACH_END(); + + child = child_bb->dom_child; + while (child) { + child_bb = &ctx->cfg_blocks[child]; + if (child_bb->loop_header && ir_bitset_in(loops, child_bb->loop_header)) { + ir_bitqueue_add(&queue, child); + if (child_bb->flags & IR_BB_LOOP_HEADER) { + ir_bitset_incl(loops, child); + } + } + child = child_bb->dom_next_child; + } + } while ((child = ir_bitqueue_pop(&queue)) != (uint32_t)-1); + } + } + + if (ctx->entries) { + for (i = 0; i < ctx->entries_count; i++) { + b = ctx->entries[i]; + bb = &ctx->cfg_blocks[b]; + live = bb_live + (len * b); + ir_add_osr_entry_loads(ctx, bb, live, len, b); + } + if (ctx->osr_entry_loads) { + ir_list_push((ir_list*)ctx->osr_entry_loads, 0); + } + } + + if (loops) { + ir_mem_free(loops); + ir_bitqueue_free(&queue); + } + + ir_mem_free(bb_live); +#ifdef IR_DEBUG + ir_mem_free(visited); +#endif + + return 1; +} + +#else +/* Path exploration by definition liveness for SSA using sets represented by linked lists */ + +#define IS_LIVE_IN_BLOCK(v, b) \ + (live_in_block[v] == b) +#define SET_LIVE_IN_BLOCK(v, b) do { \ + live_in_block[v] = b; \ + } while (0) + +/* Returns the last virtual register alive at the end of the block (it is used as an already-visited marker) */ +IR_ALWAYS_INLINE uint32_t ir_live_out_top(ir_ctx *ctx, uint32_t *live_outs, ir_list *live_lists, uint32_t b) +{ +#if 0 + return live_outs[b]; +#else + if (!live_outs[b]) { + return -1; + } + return ir_list_at(live_lists, live_outs[b]); +#endif +} + +/* Remember a virtual register alive at the end of the block */ +IR_ALWAYS_INLINE void ir_live_out_push(ir_ctx *ctx, uint32_t *live_outs, ir_list *live_lists, uint32_t b, uint32_t v) +{ +#if 0 + ir_block *bb = &ctx->cfg_blocks[b]; + live_outs[b] = v; + ir_add_prev_live_range(ctx, v, + IR_START_LIVE_POS_FROM_REF(bb->start), + IR_END_LIVE_POS_FROM_REF(bb->end)); +#else + if (live_lists->len >= live_lists->a.size) { + ir_array_grow(&live_lists->a, live_lists->a.size + 1024); + } + /* Form a linked list of virtual register live at the end of the block */ + ir_list_push_unchecked(live_lists, live_outs[b]); /* push old root of the list (previous element of the list) */ + live_outs[b] = ir_list_len(live_lists); /* remember the new root */ + ir_list_push_unchecked(live_lists, v); /* push a virtual register */ +#endif +} + +/* + * Computes live-out sets for each basic-block per variable using def-use chains. + * + * The implementation is based on algorithms 6 and 7 desriebed in + * "Computing Liveness Sets for SSA-Form Programs", Florian Brandner, Benoit Boissinot. + * Alain Darte, Benoit Dupont de Dinechin, Fabrice Rastello. TR Inria RR-7503, 2011 + */ +static void ir_compute_live_sets(ir_ctx *ctx, uint32_t *live_outs, ir_list *live_lists) +{ + ir_list block_queue, fuse_queue; + ir_ref i; + + ir_list_init(&fuse_queue, 16); + ir_list_init(&block_queue, 256); + + /* For each virtual register explore paths from all uses to definition */ + for (i = ctx->insns_count - 1; i > 0; i--) { + uint32_t v = ctx->vregs[i]; + + if (v) { + uint32_t def_block = ctx->cfg_map[i]; + ir_use_list *use_list = &ctx->use_lists[i]; + ir_ref *p, n = use_list->count; + + /* Collect all blocks where 'v' is used into a 'block_queue' */ + for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) { + ir_ref use = *p; + ir_insn *insn = &ctx->ir_base[use]; + + if (UNEXPECTED(insn->op == IR_PHI)) { + ir_ref n = insn->inputs_count - 1; + ir_ref *p = insn->ops + 2; /* PHI data inputs */ + ir_ref *q = ctx->ir_base[insn->op1].ops + 1; /* MERGE inputs */ + + for (;n > 0; p++, q++, n--) { + if (*p == i) { + uint32_t pred_block = ctx->cfg_map[*q]; + + if (ir_live_out_top(ctx, live_outs, live_lists, pred_block) != v) { + ir_live_out_push(ctx, live_outs, live_lists, pred_block, v); + if (pred_block != def_block) { + ir_list_push(&block_queue, pred_block); + } + } + } + } + } else if (ctx->rules && UNEXPECTED(ctx->rules[use] & IR_FUSED)) { + while (1) { + ir_use_list *use_list = &ctx->use_lists[use]; + ir_ref *p, n = use_list->count; + + for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) { + ir_ref use = *p; + + if (ctx->rules[use] & IR_FUSED) { + ir_list_push(&fuse_queue, use); + } else { + uint32_t use_block = ctx->cfg_map[use]; + + if (def_block != use_block && ir_live_out_top(ctx, live_outs, live_lists, use_block) != v) { + ir_list_push(&block_queue, use_block); + } + } + } + if (!ir_list_len(&fuse_queue)) { + break; + } + use = ir_list_pop(&fuse_queue); + } + } else { + uint32_t use_block = ctx->cfg_map[use]; + + /* Check if the virtual register is alive at the start of 'use_block' */ + if (def_block != use_block && ir_live_out_top(ctx, live_outs, live_lists, use_block) != v) { + ir_list_push(&block_queue, use_block); + } + } + } + + /* UP_AND_MARK: Traverse through predecessor blocks until we reach the block where 'v' is defined*/ + while (ir_list_len(&block_queue)) { + uint32_t b = ir_list_pop(&block_queue); + ir_block *bb = &ctx->cfg_blocks[b]; + uint32_t *p, n = bb->predecessors_count; + + if (bb->flags & IR_BB_ENTRY) { + /* live_in_push(ENTRY, v) */ + ir_insn *insn = &ctx->ir_base[bb->start]; + + IR_ASSERT(insn->op == IR_ENTRY); + IR_ASSERT(insn->op3 >= 0 && insn->op3 < (ir_ref)ctx->entries_count); + if (live_lists->len >= live_lists->a.size) { + ir_array_grow(&live_lists->a, live_lists->a.size + 1024); + } + ir_list_push_unchecked(live_lists, live_outs[ctx->cfg_blocks_count + 1 + insn->op3]); + ir_list_push_unchecked(live_lists, v); + live_outs[ctx->cfg_blocks_count + 1 + insn->op3] = ir_list_len(live_lists) - 1; + continue; + } + for (p = &ctx->cfg_edges[bb->predecessors]; n > 0; p++, n--) { + uint32_t pred_block = *p; + + /* Check if 'pred_block' wasn't traversed before */ + if (ir_live_out_top(ctx, live_outs, live_lists, pred_block) != v) { + /* Mark a virtual register 'v' alive at the end of 'pred_block' */ + ir_live_out_push(ctx, live_outs, live_lists, pred_block, v); + if (pred_block != def_block) { + ir_list_push(&block_queue, pred_block); + } + } + } + } + } + } + + ir_list_free(&block_queue); + ir_list_free(&fuse_queue); +} + +static void ir_add_osr_entry_loads(ir_ctx *ctx, ir_block *bb, uint32_t pos, ir_list *live_lists, uint32_t b) +{ + bool ok = 1; + int count = 0; + ir_list *list = (ir_list*)ctx->osr_entry_loads; + ir_ref i; + + while (pos) { + i = ir_list_at(live_lists, pos); + pos = ir_list_at(live_lists, pos - 1); + + /* Skip live references from ENTRY to PARAM. TODO: duplicate PARAM in each ENTRY ??? */ + ir_use_pos *use_pos = ctx->live_intervals[i]->use_pos; + ir_ref ref = (use_pos->hint_ref < 0) ? -use_pos->hint_ref : IR_LIVE_POS_TO_REF(use_pos->pos); + + if (use_pos->op_num) { + ir_ref *ops = ctx->ir_base[ref].ops; + ref = ops[use_pos->op_num]; + } + + if (ctx->ir_base[ref].op == IR_PARAM) { + continue; + } + if (ctx->binding) { + ir_ref var = ir_binding_find(ctx, ref); + if (var < 0) { + /* We may load the value at OSR entry-point */ + if (!count) { + bb->flags &= ~IR_BB_EMPTY; + bb->flags |= IR_BB_OSR_ENTRY_LOADS; + if (!ctx->osr_entry_loads) { + list = ctx->osr_entry_loads = ir_mem_malloc(sizeof(ir_list)); + ir_list_init(list, 16); + } + ir_list_push(list, b); + ir_list_push(list, 0); + } + ir_list_push(list, ref); + count++; + continue; + } + } + fprintf(stderr, "ENTRY %d (block %d start %d) - live var %d\n", ctx->ir_base[bb->start].op2, b, bb->start, ref); + ok = 0; + } + + if (!ok) { + IR_ASSERT(0); + } + if (count) { + ir_list_set(list, ir_list_len(ctx->osr_entry_loads) - (count + 1), count); + +#if 0 + /* ENTRY "clobbers" all registers */ + ir_ref ref = ctx->ir_base[bb->start].op1; + ir_add_fixed_live_range(ctx, IR_REG_ALL, + IR_DEF_LIVE_POS_FROM_REF(ref), + IR_SAVE_LIVE_POS_FROM_REF(ref)); +#endif + } +} + +static void ir_add_fusion_ranges(ir_ctx *ctx, ir_ref ref, ir_ref input, ir_block *bb, uint32_t *live_in_block, uint32_t b) +{ + ir_ref stack[4]; + int stack_pos = 0; + ir_target_constraints constraints; + ir_insn *insn; + uint32_t j, n, flags, def_flags; + ir_ref *p, child; + uint8_t use_flags; + ir_reg reg; + ir_live_pos pos = IR_START_LIVE_POS_FROM_REF(ref); + ir_live_pos use_pos; + ir_live_interval *ival; + + while (1) { + IR_ASSERT(input > 0 && ctx->rules[input] & IR_FUSED); + + if (!(ctx->rules[input] & IR_SIMPLE)) { + def_flags = ir_get_target_constraints(ctx, input, &constraints); + n = constraints.tmps_count; + while (n > 0) { + n--; + if (constraints.tmp_regs[n].type) { + ir_add_tmp(ctx, ref, input, constraints.tmp_regs[n].num, constraints.tmp_regs[n]); + } else { + /* CPU specific constraints */ + ir_add_fixed_live_range(ctx, constraints.tmp_regs[n].reg, + pos + constraints.tmp_regs[n].start, + pos + constraints.tmp_regs[n].end); + } + } + } else { + def_flags = IR_OP1_MUST_BE_IN_REG | IR_OP2_MUST_BE_IN_REG | IR_OP3_MUST_BE_IN_REG; + constraints.hints_count = 0; + } + + insn = &ctx->ir_base[input]; + flags = ir_op_flags[insn->op]; + IR_ASSERT(!IR_OP_HAS_VAR_INPUTS(flags)); + n = IR_INPUT_EDGES_COUNT(flags); + j = 1; + p = insn->ops + j; + if (flags & IR_OP_FLAG_CONTROL) { + j++; + p++; + } + for (; j <= n; j++, p++) { + IR_ASSERT(IR_OPND_KIND(flags, j) == IR_OPND_DATA); + child = *p; + if (child > 0) { + uint32_t v = ctx->vregs[child]; + + if (v) { + reg = (j < constraints.hints_count) ? constraints.hints[j] : IR_REG_NONE; + use_pos = pos; + if (EXPECTED(reg == IR_REG_NONE)) { + use_pos += IR_USE_SUB_REF; + } + + if (!IS_LIVE_IN_BLOCK(v, b)) { + /* live.add(opd) */ + SET_LIVE_IN_BLOCK(v, b); + /* intervals[opd].addRange(b.from, op.id) */ + ival = ir_add_live_range(ctx, v, + IR_START_LIVE_POS_FROM_REF(bb->start), use_pos); + } else { + ival = ctx->live_intervals[v]; + } + use_flags = IR_FUSED_USE | IR_USE_FLAGS(def_flags, j); + ir_add_use(ctx, ival, j, use_pos, reg, use_flags, -input); + } else if (ctx->rules[child] & IR_FUSED) { + IR_ASSERT(stack_pos < (int)(sizeof(stack)/sizeof(stack_pos))); + stack[stack_pos++] = child; + } else if (ctx->rules[child] == (IR_SKIPPED|IR_RLOAD)) { + ir_set_alocated_reg(ctx, input, j, ctx->ir_base[child].op2); + } + } + } + if (!stack_pos) { + break; + } + input = stack[--stack_pos]; + } +} + +int ir_compute_live_ranges(ir_ctx *ctx) +{ + uint32_t b, i, j, k, n, succ; + ir_ref ref; + ir_insn *insn; + ir_block *bb, *succ_bb; + uint32_t *live_outs; + uint32_t *live_in_block; + ir_list live_lists; + ir_live_interval *ival; + + if (!(ctx->flags & IR_LINEAR) || !ctx->vregs) { + return 0; + } + + if (ctx->rules) { + ctx->regs = ir_mem_malloc(sizeof(ir_regs) * ctx->insns_count); + memset(ctx->regs, IR_REG_NONE, sizeof(ir_regs) * ctx->insns_count); + } + + /* Root of the list of IR_VARs */ + ctx->vars = IR_UNUSED; + + /* Compute Live Ranges */ + ctx->flags &= ~IR_LR_HAVE_DESSA_MOVES; + + /* vregs + tmp + fixed + SRATCH + ALL */ + ctx->live_intervals = ir_mem_calloc(ctx->vregs_count + 1 + IR_REG_NUM + 2, sizeof(ir_live_interval*)); + + if (!ctx->arena) { + ctx->arena = ir_arena_create(16 * 1024); + } + + live_outs = ir_mem_calloc(ctx->cfg_blocks_count + 1 + ctx->entries_count, sizeof(uint32_t)); + ir_list_init(&live_lists, 1024); + ir_compute_live_sets(ctx, live_outs, &live_lists); + live_in_block = ir_mem_calloc(ctx->vregs_count + 1, sizeof(uint32_t)); + + /* for each basic block in reverse order */ + for (b = ctx->cfg_blocks_count; b > 0; b--) { + bb = &ctx->cfg_blocks[b]; + IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); + + /* For all virtual register alive at the end of the block */ + n = live_outs[b]; + while (n != 0) { + i = ir_list_at(&live_lists, n); + SET_LIVE_IN_BLOCK(i, b); + ir_add_prev_live_range(ctx, i, + IR_START_LIVE_POS_FROM_REF(bb->start), + IR_END_LIVE_POS_FROM_REF(bb->end)); + n = ir_list_at(&live_lists, n - 1); + } + + if (bb->successors_count == 1) { + /* for each phi function of the successor */ + succ = ctx->cfg_edges[bb->successors]; + succ_bb = &ctx->cfg_blocks[succ]; + if (succ_bb->flags & IR_BB_HAS_PHI) { + ir_use_list *use_list = &ctx->use_lists[succ_bb->start]; + ir_ref n, *p; + + k = ir_phi_input_number(ctx, succ_bb, b); + IR_ASSERT(k != 0); + n = use_list->count; + for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) { + ir_ref use = *p; + insn = &ctx->ir_base[use]; + if (insn->op == IR_PHI) { + ir_ref input = ir_insn_op(insn, k); + if (input > 0) { + uint32_t v = ctx->vregs[input]; + + IR_ASSERT(v); + ival = ctx->live_intervals[v]; + ir_add_phi_use(ctx, ival, k, IR_DEF_LIVE_POS_FROM_REF(bb->end), use); + } + } + } + } + } + + /* for each operation of the block in reverse order */ + ref = bb->end; + insn = &ctx->ir_base[ref]; + if (insn->op == IR_END || insn->op == IR_LOOP_END) { + ref = ctx->prev_ref[ref]; + } + for (; ref > bb->start; ref = ctx->prev_ref[ref]) { + uint32_t def_flags; + uint32_t flags; + ir_ref *p; + ir_target_constraints constraints; + uint32_t v; + + if (ctx->rules) { + int n; + + if (ctx->rules[ref] & (IR_FUSED|IR_SKIPPED)) { + if (ctx->rules[ref] == (IR_SKIPPED|IR_VAR) && ctx->use_lists[ref].count > 0) { + insn = &ctx->ir_base[ref]; + insn->op3 = ctx->vars; + ctx->vars = ref; + } + continue; + } + + def_flags = ir_get_target_constraints(ctx, ref, &constraints); + n = constraints.tmps_count; + while (n > 0) { + n--; + if (constraints.tmp_regs[n].type) { + ir_add_tmp(ctx, ref, ref, constraints.tmp_regs[n].num, constraints.tmp_regs[n]); + } else { + /* CPU specific constraints */ + ir_add_fixed_live_range(ctx, constraints.tmp_regs[n].reg, + IR_START_LIVE_POS_FROM_REF(ref) + constraints.tmp_regs[n].start, + IR_START_LIVE_POS_FROM_REF(ref) + constraints.tmp_regs[n].end); + } + } + } else { + def_flags = 0; + constraints.def_reg = IR_REG_NONE; + constraints.hints_count = 0; + } + + insn = &ctx->ir_base[ref]; + v = ctx->vregs[ref]; + if (v) { + if (insn->op != IR_PHI) { + ir_live_pos def_pos; + ir_ref hint_ref = 0; + ir_reg reg = constraints.def_reg; + + if (reg != IR_REG_NONE) { + def_pos = IR_SAVE_LIVE_POS_FROM_REF(ref); + if (insn->op == IR_PARAM || insn->op == IR_RLOAD) { + /* parameter register must be kept before it's copied */ + ir_add_fixed_live_range(ctx, reg, IR_START_LIVE_POS_FROM_REF(bb->start), def_pos); + } + } else if (def_flags & IR_DEF_REUSES_OP1_REG) { + if (!IR_IS_CONST_REF(insn->op1) && ctx->vregs[insn->op1]) { + hint_ref = insn->op1; + } + if (def_flags & IR_DEF_CONFLICTS_WITH_INPUT_REGS) { + def_pos = IR_USE_LIVE_POS_FROM_REF(ref); + } else { + def_pos = IR_LOAD_LIVE_POS_FROM_REF(ref); + } + } else if (def_flags & IR_DEF_CONFLICTS_WITH_INPUT_REGS) { + def_pos = IR_LOAD_LIVE_POS_FROM_REF(ref); + } else { + if (insn->op == IR_PARAM) { + /* We may reuse parameter stack slot for spilling */ + ctx->live_intervals[v]->flags |= IR_LIVE_INTERVAL_MEM_PARAM; + } else if (insn->op == IR_VLOAD) { + /* Load may be fused into the usage instruction */ + ctx->live_intervals[v]->flags |= IR_LIVE_INTERVAL_MEM_LOAD; + } + def_pos = IR_DEF_LIVE_POS_FROM_REF(ref); + } + /* intervals[opd].setFrom(op.id) */ + ival = ir_fix_live_range(ctx, v, + IR_START_LIVE_POS_FROM_REF(bb->start), def_pos); + ival->type = insn->type; + ir_add_use(ctx, ival, 0, def_pos, reg, def_flags, hint_ref); + } else { + /* PHIs inputs must not be processed */ + ival = ctx->live_intervals[v]; + if (UNEXPECTED(!ival)) { + /* Dead PHI */ + ival = ir_add_live_range(ctx, v, IR_DEF_LIVE_POS_FROM_REF(ref), IR_USE_LIVE_POS_FROM_REF(ref)); + } + ival->type = insn->type; + ir_add_use(ctx, ival, 0, IR_DEF_LIVE_POS_FROM_REF(ref), IR_REG_NONE, IR_USE_SHOULD_BE_IN_REG, 0); + continue; + } + } + + IR_ASSERT(insn->op != IR_PHI && (!ctx->rules || !(ctx->rules[ref] & (IR_FUSED|IR_SKIPPED)))); + flags = ir_op_flags[insn->op]; + j = 1; + p = insn->ops + 1; + if (flags & (IR_OP_FLAG_CONTROL|IR_OP_FLAG_MEM|IR_OP_FLAG_PINNED)) { + j++; + p++; + } + for (; j <= insn->inputs_count; j++, p++) { + ir_ref input = *p; + ir_reg reg = (j < constraints.hints_count) ? constraints.hints[j] : IR_REG_NONE; + ir_live_pos use_pos; + ir_ref hint_ref = 0; + uint32_t v; + + if (input > 0) { + v = ctx->vregs[input]; + if (v) { + use_pos = IR_USE_LIVE_POS_FROM_REF(ref); + if (reg != IR_REG_NONE) { + use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref); + ir_add_fixed_live_range(ctx, reg, use_pos, use_pos + IR_USE_SUB_REF); + } else if (def_flags & IR_DEF_REUSES_OP1_REG) { + if (j == 1) { + if (def_flags & IR_DEF_CONFLICTS_WITH_INPUT_REGS) { + use_pos = IR_USE_LIVE_POS_FROM_REF(ref); + } else { + use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref); + } + IR_ASSERT(ctx->vregs[ref]); + hint_ref = ref; + } else if (input == insn->op1) { + /* Input is the same as "op1" */ + use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref); + } + } + if (!IS_LIVE_IN_BLOCK(v, b)) { + /* live.add(opd) */ + SET_LIVE_IN_BLOCK(v, b); + /* intervals[opd].addRange(b.from, op.id) */ + ival = ir_add_live_range(ctx, v, IR_START_LIVE_POS_FROM_REF(bb->start), use_pos); + } else { + ival = ctx->live_intervals[v]; + } + ir_add_use(ctx, ival, j, use_pos, reg, IR_USE_FLAGS(def_flags, j), hint_ref); + } else if (ctx->rules) { + if (ctx->rules[input] & IR_FUSED) { + ir_add_fusion_ranges(ctx, ref, input, bb, live_in_block, b); + } else { + if (ctx->rules[input] == (IR_SKIPPED|IR_RLOAD)) { + ir_set_alocated_reg(ctx, ref, j, ctx->ir_base[input].op2); + } + if (reg != IR_REG_NONE) { + use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref); + ir_add_fixed_live_range(ctx, reg, use_pos, use_pos + IR_USE_SUB_REF); + } + } + } + } else if (reg != IR_REG_NONE) { + use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref); + ir_add_fixed_live_range(ctx, reg, use_pos, use_pos + IR_USE_SUB_REF); + } + } + } + } + + if (ctx->entries) { + for (i = 0; i < ctx->entries_count; i++) { + b = ctx->entries[i]; + bb = &ctx->cfg_blocks[b]; + IR_ASSERT(bb->predecessors_count == 1); + ir_add_osr_entry_loads(ctx, bb, live_outs[ctx->cfg_blocks_count + 1 + i], &live_lists, b); + } + if (ctx->osr_entry_loads) { + ir_list_push((ir_list*)ctx->osr_entry_loads, 0); + } + } + + ir_list_free(&live_lists); + ir_mem_free(live_outs); + ir_mem_free(live_in_block); + + return 1; +} + +#endif + +/* Live Ranges coalescing */ + +static ir_live_pos ir_ivals_overlap(ir_live_range *lrg1, ir_live_range *lrg2) +{ + while (1) { + if (lrg2->start < lrg1->end) { + if (lrg1->start < lrg2->end) { + return IR_MAX(lrg1->start, lrg2->start); + } else { + lrg2 = lrg2->next; + if (!lrg2) { + return 0; + } + } + } else { + lrg1 = lrg1->next; + if (!lrg1) { + return 0; + } + } + } +} + +static ir_live_pos ir_vregs_overlap(ir_ctx *ctx, uint32_t r1, uint32_t r2) +{ + ir_live_interval *ival1 = ctx->live_intervals[r1]; + ir_live_interval *ival2 = ctx->live_intervals[r2]; + +#if 0 + if (ival2->range.start >= ival1->end + || ival1->range.start >= ival2->end) { + return 0; + } +#endif + return ir_ivals_overlap(&ival1->range, &ival2->range); +} + +static void ir_vregs_join(ir_ctx *ctx, uint32_t r1, uint32_t r2) +{ + ir_live_interval *ival = ctx->live_intervals[r2]; + ir_live_range *live_range = &ival->range; + ir_live_range *next; + ir_use_pos *use_pos, *next_pos, **prev; + +#if 0 + fprintf(stderr, "COALESCE %d -> %d\n", r2, r1); +#endif + + ir_add_live_range(ctx, r1, live_range->start, live_range->end); + live_range = live_range->next; + while (live_range) { + next = live_range->next; + live_range->next = ctx->unused_ranges; + ctx->unused_ranges = live_range; + ir_add_live_range(ctx, r1, live_range->start, live_range->end); + live_range = next; + } + + /* merge sorted use_pos lists */ + prev = &ctx->live_intervals[r1]->use_pos; + use_pos = ival->use_pos; + while (use_pos) { + if (use_pos->hint_ref > 0 && ctx->vregs[use_pos->hint_ref] == r1) { + use_pos->hint_ref = 0; + } + while (*prev && ((*prev)->pos < use_pos->pos || + ((*prev)->pos == use_pos->pos && + (use_pos->op_num == 0 || (*prev)->op_num < use_pos->op_num)))) { + if ((*prev)->hint_ref > 0 && ctx->vregs[(*prev)->hint_ref] == r2) { + (*prev)->hint_ref = 0; + } + prev = &(*prev)->next; + } + next_pos = use_pos->next; + use_pos->next = *prev; + *prev = use_pos; + prev = &use_pos->next; + use_pos = next_pos; + } + use_pos = *prev; + while (use_pos) { + if (use_pos->hint_ref > 0 && ctx->vregs[use_pos->hint_ref] == r2) { + use_pos->hint_ref = 0; + } + use_pos = use_pos->next; + } + + ctx->live_intervals[r1]->flags |= + IR_LIVE_INTERVAL_COALESCED | (ival->flags & (IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS)); + if (ctx->ir_base[IR_LIVE_POS_TO_REF(ctx->live_intervals[r1]->use_pos->pos)].op != IR_VLOAD) { + ctx->live_intervals[r1]->flags &= ~IR_LIVE_INTERVAL_MEM_LOAD; + } + ctx->live_intervals[r2] = NULL; + + // TODO: remember to reuse ??? + //ir_mem_free(ival); +} + +static bool ir_try_coalesce(ir_ctx *ctx, ir_ref from, ir_ref to) +{ + ir_ref i; + uint32_t v1 = ctx->vregs[from]; + uint32_t v2 = ctx->vregs[to]; + + if (v1 != v2 && !ir_vregs_overlap(ctx, v1, v2)) { + uint16_t f1 = ctx->live_intervals[v1]->flags; + uint16_t f2 = ctx->live_intervals[v2]->flags; + + if ((f1 & IR_LIVE_INTERVAL_COALESCED) && !(f2 & IR_LIVE_INTERVAL_COALESCED)) { + ir_vregs_join(ctx, v1, v2); + ctx->vregs[to] = v1; + } else if ((f2 & IR_LIVE_INTERVAL_COALESCED) && !(f1 & IR_LIVE_INTERVAL_COALESCED)) { + ir_vregs_join(ctx, v2, v1); + ctx->vregs[from] = v2; + } else if (from < to) { + ir_vregs_join(ctx, v1, v2); + if (f2 & IR_LIVE_INTERVAL_COALESCED) { + for (i = 0; i < ctx->insns_count; i++) { + if (ctx->vregs[i] == v2) { + ctx->vregs[i] = v1; + } + } + } else { + ctx->vregs[to] = v1; + } + } else { + ir_vregs_join(ctx, v2, v1); + if (f1 & IR_LIVE_INTERVAL_COALESCED) { + for (i = 0; i < ctx->insns_count; i++) { + if (ctx->vregs[i] == v1) { + ctx->vregs[i] = v2; + } + } + } else { + ctx->vregs[from] = v2; + } + } + return 1; + } + return 0; +} + +static void ir_add_phi_move(ir_ctx *ctx, uint32_t b, ir_ref from, ir_ref to) +{ + if (IR_IS_CONST_REF(from) || ctx->vregs[from] != ctx->vregs[to]) { + ctx->cfg_blocks[b].flags &= ~IR_BB_EMPTY; + ctx->cfg_blocks[b].flags |= IR_BB_DESSA_MOVES; + ctx->flags |= IR_LR_HAVE_DESSA_MOVES; +#if 0 + fprintf(stderr, "BB%d: MOV %d -> %d\n", b, from, to); +#endif + } +} + +#if defined(_WIN32) || defined(__APPLE__) || defined(__FreeBSD__) +static int ir_block_cmp(void *data, const void *b1, const void *b2) +#else +static int ir_block_cmp(const void *b1, const void *b2, void *data) +#endif +{ + ir_ctx *ctx = data; + int d1 = ctx->cfg_blocks[*(ir_ref*)b1].loop_depth; + int d2 = ctx->cfg_blocks[*(ir_ref*)b2].loop_depth; + + if (d1 > d2) { + return -1; + } else if (d1 == d2) { + if (ctx->cfg_blocks[*(ir_ref*)b1].start < ctx->cfg_blocks[*(ir_ref*)b2].start) { + return -1; + } else { + return 1; + } + } else { + return 1; + } +} + +static void ir_swap_operands(ir_ctx *ctx, ir_ref i, ir_insn *insn) +{ + ir_live_pos pos = IR_USE_LIVE_POS_FROM_REF(i); + ir_live_pos load_pos = IR_LOAD_LIVE_POS_FROM_REF(i); + ir_live_interval *ival; + ir_live_range *r; + ir_use_pos *p, *p1 = NULL, *p2 = NULL; + ir_ref tmp; + + tmp = insn->op1; + insn->op1 = insn->op2; + insn->op2 = tmp; + + ival = ctx->live_intervals[ctx->vregs[insn->op1]]; + p = ival->use_pos; + while (p) { + if (p->pos == pos) { + p->pos = load_pos; + p->op_num = 1; + p1 = p; + break; + } + p = p->next; + } + + ival = ctx->live_intervals[ctx->vregs[i]]; + p = ival->use_pos; + while (p) { + if (p->pos == load_pos) { + p->hint_ref = insn->op1; + break; + } + p = p->next; + } + + if (insn->op2 > 0 && ctx->vregs[insn->op2]) { + ival = ctx->live_intervals[ctx->vregs[insn->op2]]; + r = &ival->range; + while (r) { + if (r->end == load_pos) { + r->end = pos; + if (!r->next) { + ival->end = pos; + } + break; + } + r = r->next; + } + p = ival->use_pos; + while (p) { + if (p->pos == load_pos) { + p->pos = pos; + p->op_num = 2; + p2 = p; + break; + } + p = p->next; + } + } + if (p1 && p2) { + uint8_t tmp = p1->flags; + p1->flags = p2->flags; + p2->flags = tmp; + } +} + +static int ir_hint_conflict(ir_ctx *ctx, ir_ref ref, int use, int def) +{ + ir_use_pos *p; + ir_reg r1 = IR_REG_NONE; + ir_reg r2 = IR_REG_NONE; + + p = ctx->live_intervals[use]->use_pos; + while (p) { + if (IR_LIVE_POS_TO_REF(p->pos) == ref) { + break; + } + if (p->hint != IR_REG_NONE) { + r1 = p->hint; + } + p = p->next; + } + + p = ctx->live_intervals[def]->use_pos; + while (p) { + if (IR_LIVE_POS_TO_REF(p->pos) > ref) { + if (p->hint != IR_REG_NONE) { + r2 = p->hint; + break; + } + } + p = p->next; + } + return r1 != r2 && r1 != IR_REG_NONE && r2 != IR_REG_NONE; +} + +static int ir_try_swap_operands(ir_ctx *ctx, ir_ref i, ir_insn *insn) +{ + if (ctx->vregs[insn->op1] + && ctx->vregs[insn->op1] != ctx->vregs[i] + && !ir_vregs_overlap(ctx, ctx->vregs[insn->op1], ctx->vregs[i]) + && !ir_hint_conflict(ctx, i, ctx->vregs[insn->op1], ctx->vregs[i])) { + /* pass */ + } else { + if (ctx->vregs[insn->op2] && ctx->vregs[insn->op2] != ctx->vregs[i]) { + ir_live_pos pos = IR_USE_LIVE_POS_FROM_REF(i); + ir_live_pos load_pos = IR_LOAD_LIVE_POS_FROM_REF(i); + ir_live_interval *ival = ctx->live_intervals[ctx->vregs[insn->op2]]; + ir_live_range *r = &ival->range; + + if ((ival->flags & IR_LIVE_INTERVAL_MEM_PARAM) && ctx->use_lists[insn->op2].count == 1) { + return 0; + } + while (r) { + if (r->end == pos) { + r->end = load_pos; + if (!r->next) { + ival->end = load_pos; + } + if (!ir_vregs_overlap(ctx, ctx->vregs[insn->op2], ctx->vregs[i]) + && !ir_hint_conflict(ctx, i, ctx->vregs[insn->op2], ctx->vregs[i])) { + ir_swap_operands(ctx, i, insn); + return 1; + } else { + r->end = pos; + if (!r->next) { + ival->end = pos; + } + } + break; + } + r = r->next; + } + } + } + return 0; +} + +int ir_coalesce(ir_ctx *ctx) +{ + uint32_t b, n, succ; + ir_ref *p, use, input, k, j; + ir_block *bb, *succ_bb; + ir_use_list *use_list; + ir_insn *insn; + ir_worklist blocks; + bool compact = 0; + + /* Collect a list of blocks which are predecossors to block with phi functions */ + ir_worklist_init(&blocks, ctx->cfg_blocks_count + 1); + for (b = 1, bb = &ctx->cfg_blocks[1]; b <= ctx->cfg_blocks_count; b++, bb++) { + IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); + if (bb->flags & IR_BB_HAS_PHI) { + k = bb->predecessors_count; + use_list = &ctx->use_lists[bb->start]; + n = use_list->count; + IR_ASSERT(k == ctx->ir_base[bb->start].inputs_count); + k++; + for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) { + use = *p; + insn = &ctx->ir_base[use]; + if (insn->op == IR_PHI) { + for (j = 2; j <= k; j++) { + ir_worklist_push(&blocks, ctx->cfg_edges[bb->predecessors + (j-2)]); + } + } + } + } + } + +#ifdef _WIN32 +# define qsort_fn(base, num, width, func, data) qsort_s(base, num, width, func, data) +#elif defined(__APPLE__) || defined(__FreeBSD__) +# define qsort_fn(base, num, width, func, data) qsort_r(base, num, width, data, func) +#else +# define qsort_fn(base, num, width, func, data) qsort_r(base, num, width, func, data) +#endif + qsort_fn(blocks.l.a.refs, ir_worklist_len(&blocks), sizeof(ir_ref), ir_block_cmp, ctx); + + while (ir_worklist_len(&blocks)) { + uint32_t i; + + b = ir_worklist_pop(&blocks); + bb = &ctx->cfg_blocks[b]; + IR_ASSERT(bb->successors_count == 1); + succ = ctx->cfg_edges[bb->successors]; + succ_bb = &ctx->cfg_blocks[succ]; + IR_ASSERT(succ_bb->predecessors_count > 1); + k = ir_phi_input_number(ctx, succ_bb, b); + use_list = &ctx->use_lists[succ_bb->start]; + n = use_list->count; + for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) { + use = *p; + insn = &ctx->ir_base[use]; + if (insn->op == IR_PHI) { + input = ir_insn_op(insn, k); + if (input > 0) { + if (!ir_try_coalesce(ctx, input, use)) { + ir_add_phi_move(ctx, b, input, use); + } else { + compact = 1; + } + } else { + /* Move for constant input */ + ir_add_phi_move(ctx, b, input, use); + } + } + } + } + ir_worklist_free(&blocks); + + ir_hint_propagation(ctx); + + if (ctx->rules) { + /* try to swap operands of commutative instructions for better register allocation */ + for (b = 1, bb = &ctx->cfg_blocks[1]; b <= ctx->cfg_blocks_count; b++, bb++) { + ir_ref i; + + IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); + i = bb->end; + + /* skip last instruction */ + i = ctx->prev_ref[i]; + + while (i != bb->start) { + insn = &ctx->ir_base[i]; + if ((ir_op_flags[insn->op] & IR_OP_FLAG_COMMUTATIVE) + && ctx->vregs[i] + && ctx->live_intervals[ctx->vregs[i]]->use_pos + && (ctx->live_intervals[ctx->vregs[i]]->use_pos->flags & IR_DEF_REUSES_OP1_REG) + && insn->op2 > 0 + && insn->op1 > 0 + && insn->op1 != insn->op2) { + ir_try_swap_operands(ctx, i, insn); + } + i = ctx->prev_ref[i]; + } + } + } + + if (compact) { + ir_ref i, n; + uint32_t *xlat = ir_mem_malloc((ctx->vregs_count + 1) * sizeof(uint32_t)); + + for (i = 1, n = 1; i <= ctx->vregs_count; i++) { + if (ctx->live_intervals[i]) { + xlat[i] = n; + if (i != n) { + ctx->live_intervals[n] = ctx->live_intervals[i]; + ctx->live_intervals[n]->vreg = n; + } + n++; + } + } + n--; + if (n != ctx->vregs_count) { + j = ctx->vregs_count - n; + /* vregs + tmp + fixed + SRATCH + ALL */ + for (i = n + 1; i <= n + IR_REG_NUM + 2; i++) { + ctx->live_intervals[i] = ctx->live_intervals[i + j]; + if (ctx->live_intervals[i]) { + ctx->live_intervals[i]->vreg = i; + } + } + for (j = 1; j < ctx->insns_count; j++) { + if (ctx->vregs[j]) { + ctx->vregs[j] = xlat[ctx->vregs[j]]; + } + } + ctx->vregs_count = n; + } + ir_mem_free(xlat); + } + + return 1; +} + +/* SSA Deconstruction */ + +int ir_compute_dessa_moves(ir_ctx *ctx) +{ + uint32_t b, i, n; + ir_ref j, k, *p, use; + ir_block *bb; + ir_use_list *use_list; + ir_insn *insn; + + for (b = 1, bb = &ctx->cfg_blocks[1]; b <= ctx->cfg_blocks_count; b++, bb++) { + IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); + k = bb->predecessors_count; + if (k > 1) { + use_list = &ctx->use_lists[bb->start]; + n = use_list->count; + if (n > 1) { + IR_ASSERT(k == ctx->ir_base[bb->start].inputs_count); + k++; + for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) { + use = *p; + insn = &ctx->ir_base[use]; + if (insn->op == IR_PHI) { + for (j = 2; j <= k; j++) { + if (IR_IS_CONST_REF(ir_insn_op(insn, j)) || ctx->vregs[ir_insn_op(insn, j)] != ctx->vregs[use]) { + int pred = ctx->cfg_edges[bb->predecessors + (j-2)]; + ctx->cfg_blocks[pred].flags &= ~IR_BB_EMPTY; + ctx->cfg_blocks[pred].flags |= IR_BB_DESSA_MOVES; + ctx->flags |= IR_LR_HAVE_DESSA_MOVES; + } + } + } + } + } + } + } + return 1; +} + +int ir_gen_dessa_moves(ir_ctx *ctx, uint32_t b, emit_copy_t emit_copy) +{ + uint32_t succ, k, n = 0; + ir_block *bb, *succ_bb; + ir_use_list *use_list; + ir_ref *loc, *pred, i, *p, ref, input; + ir_insn *insn; + uint32_t len; + ir_bitset todo, ready; + bool have_constants = 0; + + bb = &ctx->cfg_blocks[b]; + if (!(bb->flags & IR_BB_DESSA_MOVES)) { + return 0; + } + IR_ASSERT(bb->successors_count == 1); + succ = ctx->cfg_edges[bb->successors]; + succ_bb = &ctx->cfg_blocks[succ]; + IR_ASSERT(succ_bb->predecessors_count > 1); + use_list = &ctx->use_lists[succ_bb->start]; + + k = ir_phi_input_number(ctx, succ_bb, b); + + loc = ir_mem_malloc(ctx->insns_count * 2 * sizeof(ir_ref)); + pred = loc + ctx->insns_count; + len = ir_bitset_len(ctx->insns_count); + todo = ir_bitset_malloc(ctx->insns_count); + + for (i = 0, p = &ctx->use_edges[use_list->refs]; i < use_list->count; i++, p++) { + ref = *p; + insn = &ctx->ir_base[ref]; + if (insn->op == IR_PHI) { + input = ir_insn_op(insn, k); + if (IR_IS_CONST_REF(input)) { + have_constants = 1; + } else if (ctx->vregs[input] != ctx->vregs[ref]) { + loc[ref] = pred[input] = 0; + ir_bitset_incl(todo, ref); + n++; + } + } + } + + if (n > 0) { + ready = ir_bitset_malloc(ctx->insns_count); + IR_BITSET_FOREACH(todo, len, ref) { + insn = &ctx->ir_base[ref]; + IR_ASSERT(insn->op == IR_PHI); + input = ir_insn_op(insn, k); + loc[input] = input; + pred[ref] = input; + } IR_BITSET_FOREACH_END(); + + IR_BITSET_FOREACH(todo, len, i) { + if (!loc[i]) { + ir_bitset_incl(ready, i); + } + } IR_BITSET_FOREACH_END(); + + while (1) { + ir_ref a, b, c; + + while ((b = ir_bitset_pop_first(ready, len)) >= 0) { + a = pred[b]; + c = loc[a]; + emit_copy(ctx, ctx->ir_base[b].type, c, b); + ir_bitset_excl(todo, b); + loc[a] = b; + if (a == c && pred[a]) { + ir_bitset_incl(ready, a); + } + } + b = ir_bitset_pop_first(todo, len); + if (b < 0) { + break; + } + IR_ASSERT(b != loc[pred[b]]); + emit_copy(ctx, ctx->ir_base[b].type, b, 0); + loc[b] = 0; + ir_bitset_incl(ready, b); + } + + ir_mem_free(ready); + } + + ir_mem_free(todo); + ir_mem_free(loc); + + if (have_constants) { + for (i = 0, p = &ctx->use_edges[use_list->refs]; i < use_list->count; i++, p++) { + ref = *p; + insn = &ctx->ir_base[ref]; + if (insn->op == IR_PHI) { + input = ir_insn_op(insn, k); + if (IR_IS_CONST_REF(input)) { + emit_copy(ctx, insn->type, input, ref); + } + } + } + } + + return 1; +} + +/* Linear Scan Register Allocation */ + +#ifdef IR_DEBUG +# define IR_LOG_LSRA(action, ival, comment) do { \ + if (ctx->flags & IR_DEBUG_RA) { \ + ir_live_interval *_ival = (ival); \ + ir_live_pos _start = _ival->range.start; \ + ir_live_pos _end = _ival->end; \ + fprintf(stderr, action " R%d [%d.%d...%d.%d)" comment "\n", \ + (_ival->flags & IR_LIVE_INTERVAL_TEMP) ? 0 : _ival->vreg, \ + IR_LIVE_POS_TO_REF(_start), IR_LIVE_POS_TO_SUB_REF(_start), \ + IR_LIVE_POS_TO_REF(_end), IR_LIVE_POS_TO_SUB_REF(_end)); \ + } \ + } while (0) +# define IR_LOG_LSRA_ASSIGN(action, ival, comment) do { \ + if (ctx->flags & IR_DEBUG_RA) { \ + ir_live_interval *_ival = (ival); \ + ir_live_pos _start = _ival->range.start; \ + ir_live_pos _end = _ival->end; \ + fprintf(stderr, action " R%d [%d.%d...%d.%d) to %s" comment "\n", \ + (_ival->flags & IR_LIVE_INTERVAL_TEMP) ? 0 : _ival->vreg, \ + IR_LIVE_POS_TO_REF(_start), IR_LIVE_POS_TO_SUB_REF(_start), \ + IR_LIVE_POS_TO_REF(_end), IR_LIVE_POS_TO_SUB_REF(_end), \ + ir_reg_name(_ival->reg, _ival->type)); \ + } \ + } while (0) +# define IR_LOG_LSRA_SPLIT(ival, pos) do { \ + if (ctx->flags & IR_DEBUG_RA) { \ + ir_live_interval *_ival = (ival); \ + ir_live_pos _start = _ival->range.start; \ + ir_live_pos _end = _ival->end; \ + ir_live_pos _pos = (pos); \ + fprintf(stderr, " ---- Split R%d [%d.%d...%d.%d) at %d.%d\n", \ + (_ival->flags & IR_LIVE_INTERVAL_TEMP) ? 0 : _ival->vreg, \ + IR_LIVE_POS_TO_REF(_start), IR_LIVE_POS_TO_SUB_REF(_start), \ + IR_LIVE_POS_TO_REF(_end), IR_LIVE_POS_TO_SUB_REF(_end), \ + IR_LIVE_POS_TO_REF(_pos), IR_LIVE_POS_TO_SUB_REF(_pos)); \ + } \ + } while (0) +# define IR_LOG_LSRA_CONFLICT(action, ival, pos) do { \ + if (ctx->flags & IR_DEBUG_RA) { \ + ir_live_interval *_ival = (ival); \ + ir_live_pos _start = _ival->range.start; \ + ir_live_pos _end = _ival->end; \ + ir_live_pos _pos = (pos); \ + fprintf(stderr, action " R%d [%d.%d...%d.%d) assigned to %s at %d.%d\n", \ + (_ival->flags & IR_LIVE_INTERVAL_TEMP) ? 0 : _ival->vreg, \ + IR_LIVE_POS_TO_REF(_start), IR_LIVE_POS_TO_SUB_REF(_start), \ + IR_LIVE_POS_TO_REF(_end), IR_LIVE_POS_TO_SUB_REF(_end), \ + ir_reg_name(_ival->reg, _ival->type), \ + IR_LIVE_POS_TO_REF(_pos), IR_LIVE_POS_TO_SUB_REF(_pos)); \ + } \ + } while (0) +#else +# define IR_LOG_LSRA(action, ival, comment) +# define IR_LOG_LSRA_ASSIGN(action, ival, comment) +# define IR_LOG_LSRA_SPLIT(ival, pos) +# define IR_LOG_LSRA_CONFLICT(action, ival, pos); +#endif + +static bool ir_ival_covers(ir_live_interval *ival, ir_live_pos position) +{ + ir_live_range *live_range = &ival->range; + + do { + if (position < live_range->end) { + return position >= live_range->start; + } + live_range = live_range->next; + } while (live_range); + + return 0; +} + +static bool ir_ival_has_hole_between(ir_live_interval *ival, ir_live_pos from, ir_live_pos to) +{ + ir_live_range *r = &ival->range; + + while (r) { + if (from < r->start) { + return 1; + } else if (to <= r->end) { + return 0; + } + r = r->next; + } + return 0; +} + + +static ir_live_pos ir_last_use_pos_before(ir_live_interval *ival, ir_live_pos pos, uint8_t flags) +{ + ir_live_pos ret = 0; + ir_use_pos *p = ival->use_pos; + + while (p && p->pos <= pos) { + if (p->flags & flags) { + ret = p->pos; + } + p = p->next; + } + return ret; +} + +static ir_live_pos ir_first_use_pos_after(ir_live_interval *ival, ir_live_pos pos, uint8_t flags) +{ + ir_use_pos *p = ival->use_pos; + + while (p && p->pos <= pos) { + p = p->next; + } + while (p && !(p->flags & flags)) { + p = p->next; + } + return p ? p->pos : 0x7fffffff; +} + +static ir_live_pos ir_first_use_pos(ir_live_interval *ival, uint8_t flags) +{ + ir_use_pos *p = ival->use_pos; + + while (p && !(p->flags & flags)) { + p = p->next; + } + return p ? p->pos : 0x7fffffff; +} + +static ir_block *ir_block_from_live_pos(ir_ctx *ctx, ir_live_pos pos) +{ + ir_ref ref = IR_LIVE_POS_TO_REF(pos); + uint32_t b = ctx->cfg_map[ref]; + + while (!b) { + ref--; + IR_ASSERT(ref > 0); + b = ctx->cfg_map[ref]; + } + IR_ASSERT(b <= ctx->cfg_blocks_count); + return &ctx->cfg_blocks[b]; +} + +static ir_live_pos ir_find_optimal_split_position(ir_ctx *ctx, ir_live_interval *ival, ir_live_pos min_pos, ir_live_pos max_pos, bool prefer_max) +{ + ir_block *min_bb, *max_bb; + + if (min_pos == max_pos) { + return max_pos; + } + + IR_ASSERT(min_pos < max_pos); + IR_ASSERT(min_pos >= ival->range.start); + IR_ASSERT(max_pos < ival->end); + + min_bb = ir_block_from_live_pos(ctx, min_pos); + max_bb = ir_block_from_live_pos(ctx, max_pos); + + if (min_bb == max_bb + || ir_ival_has_hole_between(ival, min_pos, max_pos)) { // TODO: ??? + return (prefer_max) ? max_pos : min_pos; + } + + if (max_bb->loop_depth > 0) { + /* Split at the end of the loop entry */ + do { + ir_block *bb; + + if (max_bb->flags & IR_BB_LOOP_HEADER) { + bb = max_bb; + } else { + IR_ASSERT(max_bb->loop_header); + bb = &ctx->cfg_blocks[max_bb->loop_header]; + } + bb = &ctx->cfg_blocks[bb->idom]; + if (IR_DEF_LIVE_POS_FROM_REF(bb->end) < min_pos) { + break; + } + max_bb = bb; + } while (max_bb->loop_depth > 0); + + if (IR_DEF_LIVE_POS_FROM_REF(max_bb->end) < max_pos) { + return IR_DEF_LIVE_POS_FROM_REF(max_bb->end); + } + } + + if (IR_LOAD_LIVE_POS_FROM_REF(max_bb->start) > min_pos) { + return IR_LOAD_LIVE_POS_FROM_REF(max_bb->start); + } else { + // TODO: "min_bb" is in a deeper loop than "max_bb" ??? + return max_pos; + } +} + +static ir_live_interval *ir_split_interval_at(ir_ctx *ctx, ir_live_interval *ival, ir_live_pos pos) +{ + ir_live_interval *child; + ir_live_range *p, *prev; + ir_use_pos *use_pos, *prev_use_pos; + + IR_LOG_LSRA_SPLIT(ival, pos); + IR_ASSERT(pos > ival->range.start); + ctx->flags |= IR_RA_HAVE_SPLITS; + + p = &ival->range; + prev = NULL; + while (p && pos >= p->end) { + prev = p; + p = prev->next; + } + IR_ASSERT(p); + + if (pos < p->start) { + /* split between ranges */ + pos = p->start; + } + + use_pos = ival->use_pos; + prev_use_pos = NULL; + + ival->flags &= ~(IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS); + if (p->start == pos) { + while (use_pos && pos > use_pos->pos) { + if (use_pos->hint != IR_REG_NONE) { + ival->flags |= IR_LIVE_INTERVAL_HAS_HINT_REGS; + } + if (use_pos->hint_ref > 0) { + ival->flags |= IR_LIVE_INTERVAL_HAS_HINT_REFS; + } + prev_use_pos = use_pos; + use_pos = use_pos->next; + } + } else { + while (use_pos && pos >= use_pos->pos) { + if (use_pos->hint != IR_REG_NONE) { + ival->flags |= IR_LIVE_INTERVAL_HAS_HINT_REGS; + } + if (use_pos->hint_ref > 0) { + ival->flags |= IR_LIVE_INTERVAL_HAS_HINT_REFS; + } + prev_use_pos = use_pos; + use_pos = use_pos->next; + } + } + + child = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval)); + child->type = ival->type; + child->reg = IR_REG_NONE; + child->flags = IR_LIVE_INTERVAL_SPLIT_CHILD; + child->vreg = ival->vreg; + child->stack_spill_pos = -1; // not allocated + child->range.start = pos; + child->range.end = p->end; + child->range.next = p->next; + child->end = ival->end; + child->use_pos = prev_use_pos ? prev_use_pos->next : use_pos; + + child->next = ival->next; + ival->next = child; + + if (pos == p->start) { + prev->next = NULL; + ival->end = prev->end; + /* Cache to reuse */ + p->next = ctx->unused_ranges; + ctx->unused_ranges = p; + } else { + p->end = ival->end = pos; + p->next = NULL; + } + if (prev_use_pos) { + prev_use_pos->next = NULL; + } else { + ival->use_pos = NULL; + } + + use_pos = child->use_pos; + while (use_pos) { + if (use_pos->hint != IR_REG_NONE) { + child->flags |= IR_LIVE_INTERVAL_HAS_HINT_REGS; + } + if (use_pos->hint_ref > 0) { + child->flags |= IR_LIVE_INTERVAL_HAS_HINT_REFS; + } + use_pos = use_pos->next; + } + + return child; +} + +int32_t ir_allocate_spill_slot(ir_ctx *ctx, ir_type type, ir_reg_alloc_data *data) +{ + int32_t ret; + uint8_t size = ir_type_size[type]; + + IR_ASSERT(size == 1 || size == 2 || size == 4 || size == 8); + if (data->handled && data->handled[size]) { + ret = data->handled[size]->stack_spill_pos; + data->handled[size] = data->handled[size]->list_next; + } else if (size == 8) { + ret = ctx->stack_frame_size; + ctx->stack_frame_size += 8; + } else if (size == 4) { + if (data->unused_slot_4) { + ret = data->unused_slot_4; + data->unused_slot_4 = 0; + } else if (data->handled && data->handled[8]) { + ret = data->handled[8]->stack_spill_pos; + data->handled[8] = data->handled[8]->list_next; + data->unused_slot_4 = ret + 4; + } else { + ret = ctx->stack_frame_size; + if (sizeof(void*) == 8) { + data->unused_slot_4 = ctx->stack_frame_size + 4; + ctx->stack_frame_size += 8; + } else { + ctx->stack_frame_size += 4; + } + } + } else if (size == 2) { + if (data->unused_slot_2) { + ret = data->unused_slot_2; + data->unused_slot_2 = 0; + } else if (data->unused_slot_4) { + ret = data->unused_slot_4; + data->unused_slot_2 = data->unused_slot_4 + 2; + data->unused_slot_4 = 0; + } else if (data->handled && data->handled[4]) { + ret = data->handled[4]->stack_spill_pos; + data->handled[4] = data->handled[4]->list_next; + data->unused_slot_2 = ret + 2; + } else if (data->handled && data->handled[8]) { + ret = data->handled[8]->stack_spill_pos; + data->handled[8] = data->handled[8]->list_next; + data->unused_slot_2 = ret + 2; + data->unused_slot_4 = ret + 4; + } else { + ret = ctx->stack_frame_size; + data->unused_slot_2 = ctx->stack_frame_size + 2; + if (sizeof(void*) == 8) { + data->unused_slot_4 = ctx->stack_frame_size + 4; + ctx->stack_frame_size += 8; + } else { + ctx->stack_frame_size += 4; + } + } + } else { + IR_ASSERT(size == 1); + if (data->unused_slot_1) { + ret = data->unused_slot_1; + data->unused_slot_1 = 0; + } else if (data->unused_slot_2) { + ret = data->unused_slot_2; + data->unused_slot_1 = data->unused_slot_2 + 1; + data->unused_slot_2 = 0; + } else if (data->unused_slot_4) { + ret = data->unused_slot_4; + data->unused_slot_1 = data->unused_slot_4 + 1; + data->unused_slot_2 = data->unused_slot_4 + 2; + data->unused_slot_4 = 0; + } else if (data->handled && data->handled[2]) { + ret = data->handled[2]->stack_spill_pos; + data->handled[2] = data->handled[2]->list_next; + data->unused_slot_1 = ret + 1; + } else if (data->handled && data->handled[4]) { + ret = data->handled[4]->stack_spill_pos; + data->handled[4] = data->handled[4]->list_next; + data->unused_slot_1 = ret + 1; + data->unused_slot_2 = ret + 2; + } else if (data->handled && data->handled[8]) { + ret = data->handled[8]->stack_spill_pos; + data->handled[8] = data->handled[8]->list_next; + data->unused_slot_1 = ret + 1; + data->unused_slot_2 = ret + 2; + data->unused_slot_4 = ret + 4; + } else { + ret = ctx->stack_frame_size; + data->unused_slot_1 = ctx->stack_frame_size + 1; + data->unused_slot_2 = ctx->stack_frame_size + 2; + if (sizeof(void*) == 8) { + data->unused_slot_4 = ctx->stack_frame_size + 4; + ctx->stack_frame_size += 8; + } else { + ctx->stack_frame_size += 4; + } + } + } + return ret; +} + +static ir_reg ir_get_first_reg_hint(ir_ctx *ctx, ir_live_interval *ival, ir_regset available) +{ + ir_use_pos *use_pos; + ir_reg reg; + + use_pos = ival->use_pos; + while (use_pos) { + reg = use_pos->hint; + if (reg >= 0 && IR_REGSET_IN(available, reg)) { + return reg; + } + use_pos = use_pos->next; + } + + return IR_REG_NONE; +} + +static ir_reg ir_try_allocate_preferred_reg(ir_ctx *ctx, ir_live_interval *ival, ir_regset available, ir_live_pos *freeUntilPos) +{ + ir_use_pos *use_pos; + ir_reg reg; + + if (ival->flags & IR_LIVE_INTERVAL_HAS_HINT_REGS) { + use_pos = ival->use_pos; + while (use_pos) { + reg = use_pos->hint; + if (reg >= 0 && IR_REGSET_IN(available, reg)) { + if (ival->end <= freeUntilPos[reg]) { + /* register available for the whole interval */ + return reg; + } + } + use_pos = use_pos->next; + } + } + + if (ival->flags & IR_LIVE_INTERVAL_HAS_HINT_REFS) { + use_pos = ival->use_pos; + while (use_pos) { + if (use_pos->hint_ref > 0) { + reg = ctx->live_intervals[ctx->vregs[use_pos->hint_ref]]->reg; + if (reg >= 0 && IR_REGSET_IN(available, reg)) { + if (ival->end <= freeUntilPos[reg]) { + /* register available for the whole interval */ + return reg; + } + } + } + use_pos = use_pos->next; + } + } + + return IR_REG_NONE; +} + +static ir_reg ir_get_preferred_reg(ir_ctx *ctx, ir_live_interval *ival, ir_regset available) +{ + ir_use_pos *use_pos; + ir_reg reg; + + use_pos = ival->use_pos; + while (use_pos) { + reg = use_pos->hint; + if (reg >= 0 && IR_REGSET_IN(available, reg)) { + return reg; + } else if (use_pos->hint_ref > 0) { + reg = ctx->live_intervals[ctx->vregs[use_pos->hint_ref]]->reg; + if (reg >= 0 && IR_REGSET_IN(available, reg)) { + return reg; + } + } + use_pos = use_pos->next; + } + + return IR_REG_NONE; +} + +static void ir_add_to_unhandled(ir_live_interval **unhandled, ir_live_interval *ival) +{ + ir_live_pos pos = ival->range.start; + + if (*unhandled == NULL + || pos < (*unhandled)->range.start + || (pos == (*unhandled)->range.start + && (ival->flags & (IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS)) + && !((*unhandled)->flags & (IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS))) + || (pos == (*unhandled)->range.start + && ival->vreg > (*unhandled)->vreg)) { + ival->list_next = *unhandled; + *unhandled = ival; + } else { + ir_live_interval *prev = *unhandled; + + while (prev->list_next) { + if (pos < prev->list_next->range.start + || (pos == prev->list_next->range.start + && (ival->flags & (IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS)) + && !(prev->list_next->flags & (IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS))) + || (pos == prev->list_next->range.start + && ival->vreg > prev->list_next->vreg)) { + break; + } + prev = prev->list_next; + } + ival->list_next = prev->list_next; + prev->list_next = ival; + } +} + +/* merge sorted lists */ +static void ir_merge_to_unhandled(ir_live_interval **unhandled, ir_live_interval *ival) +{ + ir_live_interval **prev; + ir_live_pos pos; + + if (*unhandled == NULL) { + *unhandled = ival; + while (ival) { + ival = ival->list_next = ival->next; + } + } else { + prev = unhandled; + while (ival) { + pos = ival->range.start; + while (*prev && pos >= (*prev)->range.start) { + prev = &(*prev)->list_next; + } + ival->list_next = *prev; + *prev = ival; + prev = &ival->list_next; + ival = ival->next; + } + } +#if IR_DEBUG + ival = *unhandled; + pos = 0; + + while (ival) { + IR_ASSERT(ival->range.start >= pos); + pos = ival->range.start; + ival = ival->list_next; + } +#endif +} + +static void ir_add_to_unhandled_spill(ir_live_interval **unhandled, ir_live_interval *ival) +{ + ir_live_pos pos = ival->range.start; + + if (*unhandled == NULL + || pos <= (*unhandled)->range.start) { + ival->list_next = *unhandled; + *unhandled = ival; + } else { + ir_live_interval *prev = *unhandled; + + while (prev->list_next) { + if (pos <= prev->list_next->range.start) { + break; + } + prev = prev->list_next; + } + ival->list_next = prev->list_next; + prev->list_next = ival; + } +} + +static ir_reg ir_try_allocate_free_reg(ir_ctx *ctx, ir_live_interval *ival, ir_live_interval **active, ir_live_interval *inactive, ir_live_interval **unhandled) +{ + ir_live_pos freeUntilPos[IR_REG_NUM]; + int i, reg; + ir_live_pos pos, next; + ir_live_interval *other; + ir_regset available, overlapped, scratch; + + if (IR_IS_TYPE_FP(ival->type)) { + available = IR_REGSET_FP; + /* set freeUntilPos of all physical registers to maxInt */ + for (i = IR_REG_FP_FIRST; i <= IR_REG_FP_LAST; i++) { + freeUntilPos[i] = 0x7fffffff; + } + } else { + available = IR_REGSET_GP; + if (ctx->flags & IR_USE_FRAME_POINTER) { + IR_REGSET_EXCL(available, IR_REG_FRAME_POINTER); + } +#if defined(IR_TARGET_X86) + if (ir_type_size[ival->type] == 1) { + /* TODO: if no registers avialivle, we may use of one this register for already allocated interval ??? */ + IR_REGSET_EXCL(available, IR_REG_RBP); + IR_REGSET_EXCL(available, IR_REG_RSI); + IR_REGSET_EXCL(available, IR_REG_RDI); + } +#endif + /* set freeUntilPos of all physical registers to maxInt */ + for (i = IR_REG_GP_FIRST; i <= IR_REG_GP_LAST; i++) { + freeUntilPos[i] = 0x7fffffff; + } + } + + available = IR_REGSET_DIFFERENCE(available, (ir_regset)ctx->fixed_regset); + + /* for each interval it in active */ + other = *active; + while (other) { + /* freeUntilPos[it.reg] = 0 */ + reg = other->reg; + IR_ASSERT(reg >= 0); + if (reg >= IR_REG_SCRATCH) { + if (reg == IR_REG_SCRATCH) { + available = IR_REGSET_DIFFERENCE(available, IR_REGSET_SCRATCH); + } else { + IR_ASSERT(reg == IR_REG_ALL); + available = IR_REGSET_EMPTY; + } + } else { + IR_REGSET_EXCL(available, reg); + } + other = other->list_next; + } + + /* for each interval it in inactive intersecting with current + * + * This loop is not necessary for program in SSA form (see LSRA on SSA fig. 6), + * but it is still necessary after coalescing and splitting + */ + overlapped = IR_REGSET_EMPTY; + other = inactive; + pos = ival->end; + while (other) { + /* freeUntilPos[it.reg] = next intersection of it with current */ + if (other->current_range->start < pos) { + next = ir_ivals_overlap(&ival->range, other->current_range); + if (next) { + reg = other->reg; + IR_ASSERT(reg >= 0); + if (reg >= IR_REG_SCRATCH) { + ir_regset regset; + + if (reg == IR_REG_SCRATCH) { + regset = IR_REGSET_INTERSECTION(available, IR_REGSET_SCRATCH); + } else { + IR_ASSERT(reg == IR_REG_ALL); + regset = available; + } + overlapped = IR_REGSET_UNION(overlapped, regset); + IR_REGSET_FOREACH(regset, reg) { + if (next < freeUntilPos[reg]) { + freeUntilPos[reg] = next; + } + } IR_REGSET_FOREACH_END(); + } else if (IR_REGSET_IN(available, reg)) { + IR_REGSET_INCL(overlapped, reg); + if (next < freeUntilPos[reg]) { + freeUntilPos[reg] = next; + } + } + } + } + other = other->list_next; + } + + available = IR_REGSET_DIFFERENCE(available, overlapped); + if (available != IR_REGSET_EMPTY) { + + if (ival->flags & (IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS)) { + /* Try to use hint */ + reg = ir_try_allocate_preferred_reg(ctx, ival, available, freeUntilPos); + if (reg != IR_REG_NONE) { + ival->reg = reg; + IR_LOG_LSRA_ASSIGN(" ---- Assign", ival, " (hint available without spilling)"); + if (*unhandled && ival->end > (*unhandled)->range.start) { + ival->list_next = *active; + *active = ival; + } + return reg; + } + } + + if (ival->flags & IR_LIVE_INTERVAL_SPLIT_CHILD) { + /* Try to reuse the register previously allocated for splited interval */ + reg = ctx->live_intervals[ival->vreg]->reg; + if (reg >= 0 && IR_REGSET_IN(available, reg)) { + ival->reg = reg; + IR_LOG_LSRA_ASSIGN(" ---- Assign", ival, " (available without spilling)"); + if (*unhandled && ival->end > (*unhandled)->range.start) { + ival->list_next = *active; + *active = ival; + } + return reg; + } + } + + /* prefer caller-saved registers to avoid save/restore in prologue/epilogue */ + scratch = IR_REGSET_INTERSECTION(available, IR_REGSET_SCRATCH); + if (scratch != IR_REGSET_EMPTY) { + /* prefer registers that don't conflict with the hints for the following unhandled intervals */ + if (1) { + ir_regset non_conflicting = scratch; + + other = *unhandled; + while (other && other->range.start < ival->range.end) { + if (other->flags & IR_LIVE_INTERVAL_HAS_HINT_REGS) { + reg = ir_get_first_reg_hint(ctx, other, non_conflicting); + + if (reg >= 0) { + IR_REGSET_EXCL(non_conflicting, reg); + if (non_conflicting == IR_REGSET_EMPTY) { + break; + } + } + } + other = other->list_next; + } + if (non_conflicting != IR_REGSET_EMPTY) { + reg = IR_REGSET_FIRST(non_conflicting); + } else { + reg = IR_REGSET_FIRST(scratch); + } + } else { + reg = IR_REGSET_FIRST(scratch); + } + } else { + reg = IR_REGSET_FIRST(available); + } + ival->reg = reg; + IR_LOG_LSRA_ASSIGN(" ---- Assign", ival, " (available without spilling)"); + if (*unhandled && ival->end > (*unhandled)->range.start) { + ival->list_next = *active; + *active = ival; + } + return reg; + } + + /* reg = register with highest freeUntilPos */ + reg = IR_REG_NONE; + pos = 0; + IR_REGSET_FOREACH(overlapped, i) { + if (freeUntilPos[i] > pos) { + pos = freeUntilPos[i]; + reg = i; + } else if (freeUntilPos[i] == pos + && !IR_REGSET_IN(IR_REGSET_SCRATCH, reg) + && IR_REGSET_IN(IR_REGSET_SCRATCH, i)) { + /* prefer caller-saved registers to avoid save/restore in prologue/epilogue */ + pos = freeUntilPos[i]; + reg = i; + } + } IR_REGSET_FOREACH_END(); + + if (pos > ival->range.start) { + /* register available for the first part of the interval */ + /* split current before freeUntilPos[reg] */ + ir_live_pos split_pos = ir_last_use_pos_before(ival, pos, + IR_USE_MUST_BE_IN_REG | IR_USE_SHOULD_BE_IN_REG); + if (split_pos > ival->range.start) { + split_pos = ir_find_optimal_split_position(ctx, ival, split_pos, pos, 0); + other = ir_split_interval_at(ctx, ival, split_pos); + if (ival->flags & (IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS)) { + ir_reg pref_reg = ir_try_allocate_preferred_reg(ctx, ival, IR_REGSET_UNION(available, overlapped), freeUntilPos); + + if (pref_reg != IR_REG_NONE) { + ival->reg = pref_reg; + } else { + ival->reg = reg; + } + } else { + ival->reg = reg; + } + IR_LOG_LSRA_ASSIGN(" ---- Assign", ival, " (available without spilling for the first part)"); + if (*unhandled && ival->end > (*unhandled)->range.start) { + ival->list_next = *active; + *active = ival; + } + ir_add_to_unhandled(unhandled, other); + IR_LOG_LSRA(" ---- Queue", other, ""); + return reg; + } + } + return IR_REG_NONE; +} + +static ir_reg ir_allocate_blocked_reg(ir_ctx *ctx, ir_live_interval *ival, ir_live_interval **active, ir_live_interval **inactive, ir_live_interval **unhandled) +{ + ir_live_pos nextUsePos[IR_REG_NUM]; + ir_live_pos blockPos[IR_REG_NUM]; + int i, reg; + ir_live_pos pos, next_use_pos; + ir_live_interval *other, *prev; + ir_use_pos *use_pos; + ir_regset available, tmp_regset; + + if (!(ival->flags & IR_LIVE_INTERVAL_TEMP)) { + use_pos = ival->use_pos; + while (use_pos && !(use_pos->flags & IR_USE_MUST_BE_IN_REG)) { + use_pos = use_pos->next; + } + if (!use_pos) { + /* spill */ + IR_LOG_LSRA(" ---- Spill", ival, " (no use pos that must be in reg)"); + ctx->flags |= IR_RA_HAVE_SPILLS; + return IR_REG_NONE; + } + next_use_pos = use_pos->pos; + } else { + next_use_pos = ival->range.end; + } + + if (IR_IS_TYPE_FP(ival->type)) { + available = IR_REGSET_FP; + /* set nextUsePos of all physical registers to maxInt */ + for (i = IR_REG_FP_FIRST; i <= IR_REG_FP_LAST; i++) { + nextUsePos[i] = 0x7fffffff; + blockPos[i] = 0x7fffffff; + } + } else { + available = IR_REGSET_GP; + if (ctx->flags & IR_USE_FRAME_POINTER) { + IR_REGSET_EXCL(available, IR_REG_FRAME_POINTER); + } +#if defined(IR_TARGET_X86) + if (ir_type_size[ival->type] == 1) { + /* TODO: if no registers avialivle, we may use of one this register for already allocated interval ??? */ + IR_REGSET_EXCL(available, IR_REG_RBP); + IR_REGSET_EXCL(available, IR_REG_RSI); + IR_REGSET_EXCL(available, IR_REG_RDI); + } +#endif + /* set nextUsePos of all physical registers to maxInt */ + for (i = IR_REG_GP_FIRST; i <= IR_REG_GP_LAST; i++) { + nextUsePos[i] = 0x7fffffff; + blockPos[i] = 0x7fffffff; + } + } + + available = IR_REGSET_DIFFERENCE(available, (ir_regset)ctx->fixed_regset); + + if (IR_REGSET_IS_EMPTY(available)) { + fprintf(stderr, "LSRA Internal Error: No registers available. Allocation is not possible\n"); + IR_ASSERT(0); + exit(-1); + } + + /* for each interval it in active */ + other = *active; + while (other) { + /* nextUsePos[it.reg] = next use of it after start of current */ + reg = other->reg; + IR_ASSERT(reg >= 0); + if (reg >= IR_REG_SCRATCH) { + ir_regset regset; + + if (reg == IR_REG_SCRATCH) { + regset = IR_REGSET_INTERSECTION(available, IR_REGSET_SCRATCH); + } else { + IR_ASSERT(reg == IR_REG_ALL); + regset = available; + } + IR_REGSET_FOREACH(regset, reg) { + blockPos[reg] = nextUsePos[reg] = 0; + } IR_REGSET_FOREACH_END(); + } else if (IR_REGSET_IN(available, reg)) { + if (other->flags & (IR_LIVE_INTERVAL_FIXED|IR_LIVE_INTERVAL_TEMP)) { + blockPos[reg] = nextUsePos[reg] = 0; + } else { + pos = ir_first_use_pos_after(other, ival->range.start, + IR_USE_MUST_BE_IN_REG | IR_USE_SHOULD_BE_IN_REG); + if (pos < nextUsePos[reg]) { + nextUsePos[reg] = pos; + } + } + } + other = other->list_next; + } + + /* for each interval it in inactive intersecting with current */ + other = *inactive; + while (other) { + /* freeUntilPos[it.reg] = next intersection of it with current */ + reg = other->reg; + IR_ASSERT(reg >= 0); + if (reg >= IR_REG_SCRATCH) { + ir_live_pos overlap = ir_ivals_overlap(&ival->range, other->current_range); + + if (overlap) { + ir_regset regset; + + if (reg == IR_REG_SCRATCH) { + regset = IR_REGSET_INTERSECTION(available, IR_REGSET_SCRATCH); + } else { + IR_ASSERT(reg == IR_REG_ALL); + regset = available; + } + IR_REGSET_FOREACH(regset, reg) { + if (overlap < nextUsePos[reg]) { + nextUsePos[reg] = overlap; + } + if (overlap < blockPos[reg]) { + blockPos[reg] = overlap; + } + } IR_REGSET_FOREACH_END(); + } + } else if (IR_REGSET_IN(available, reg)) { + ir_live_pos overlap = ir_ivals_overlap(&ival->range, other->current_range); + + if (overlap) { + if (other->flags & (IR_LIVE_INTERVAL_FIXED|IR_LIVE_INTERVAL_TEMP)) { + if (overlap < nextUsePos[reg]) { + nextUsePos[reg] = overlap; + } + if (overlap < blockPos[reg]) { + blockPos[reg] = overlap; + } + } else { + pos = ir_first_use_pos_after(other, ival->range.start, + IR_USE_MUST_BE_IN_REG | IR_USE_SHOULD_BE_IN_REG); + if (pos < nextUsePos[reg]) { + nextUsePos[reg] = pos; + } + } + } + } + other = other->list_next; + } + + /* register hinting */ + reg = IR_REG_NONE; + if (ival->flags & (IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS)) { + reg = ir_get_preferred_reg(ctx, ival, available); + } + if (reg == IR_REG_NONE) { +select_register: + reg = IR_REGSET_FIRST(available); + } + + /* reg = register with highest nextUsePos */ + pos = nextUsePos[reg]; + tmp_regset = available; + IR_REGSET_EXCL(tmp_regset, reg); + IR_REGSET_FOREACH(tmp_regset, i) { + if (nextUsePos[i] > pos) { + pos = nextUsePos[i]; + reg = i; + } + } IR_REGSET_FOREACH_END(); + + /* if first usage of current is after nextUsePos[reg] then */ + if (next_use_pos > pos && !(ival->flags & IR_LIVE_INTERVAL_TEMP)) { + /* all other intervals are used before current, so it is best to spill current itself */ + /* assign spill slot to current */ + /* split current before its first use position that requires a register */ + ir_live_pos split_pos; + +spill_current: + if (next_use_pos == ival->range.start) { + IR_ASSERT(ival->use_pos && ival->use_pos->op_num == 0); + /* split right after definition */ + split_pos = next_use_pos + 1; + } else { + split_pos = ir_find_optimal_split_position(ctx, ival, ival->range.start, next_use_pos - 1, 1); + } + + if (split_pos > ival->range.start) { + IR_LOG_LSRA(" ---- Conflict with others", ival, " (all others are used before)"); + other = ir_split_interval_at(ctx, ival, split_pos); + IR_LOG_LSRA(" ---- Spill", ival, ""); + ir_add_to_unhandled(unhandled, other); + IR_LOG_LSRA(" ---- Queue", other, ""); + return IR_REG_NONE; + } + } + + if (ival->end > blockPos[reg]) { + /* spilling make a register free only for the first part of current */ + IR_LOG_LSRA(" ---- Conflict with others", ival, " (spilling make a register free only for the first part)"); + /* split current at optimal position before block_pos[reg] */ + ir_live_pos split_pos = ir_last_use_pos_before(ival, blockPos[reg] + 1, + IR_USE_MUST_BE_IN_REG | IR_USE_SHOULD_BE_IN_REG); + if (split_pos == 0) { + split_pos = ir_first_use_pos_after(ival, blockPos[reg], + IR_USE_MUST_BE_IN_REG | IR_USE_SHOULD_BE_IN_REG) - 1; + other = ir_split_interval_at(ctx, ival, split_pos); + ir_add_to_unhandled(unhandled, other); + IR_LOG_LSRA(" ---- Queue", other, ""); + return IR_REG_NONE; + } + if (split_pos >= blockPos[reg]) { +try_next_available_register: + IR_REGSET_EXCL(available, reg); + if (IR_REGSET_IS_EMPTY(available)) { + fprintf(stderr, "LSRA Internal Error: Unsolvable conflict. Allocation is not possible\n"); + IR_ASSERT(0); + exit(-1); + } + IR_LOG_LSRA(" ---- Restart", ival, ""); + goto select_register; + } + split_pos = ir_find_optimal_split_position(ctx, ival, split_pos, blockPos[reg], 1); + other = ir_split_interval_at(ctx, ival, split_pos); + ir_add_to_unhandled(unhandled, other); + IR_LOG_LSRA(" ---- Queue", other, ""); + } + + /* spill intervals that currently block reg */ + prev = NULL; + other = *active; + while (other) { + ir_live_pos split_pos; + + if (reg == other->reg) { + /* split active interval for reg at position */ + ir_live_pos overlap = ir_ivals_overlap(&ival->range, other->current_range); + + if (overlap) { + ir_live_interval *child, *child2; + + IR_ASSERT(other->type != IR_VOID); + IR_LOG_LSRA_CONFLICT(" ---- Conflict with active", other, overlap); + + split_pos = ir_last_use_pos_before(other, ival->range.start, IR_USE_MUST_BE_IN_REG | IR_USE_SHOULD_BE_IN_REG); + if (split_pos == 0) { + split_pos = ival->range.start; + } + split_pos = ir_find_optimal_split_position(ctx, other, split_pos, ival->range.start, 1); + if (split_pos > other->range.start) { + child = ir_split_interval_at(ctx, other, split_pos); + if (prev) { + prev->list_next = other->list_next; + } else { + *active = other->list_next; + } + IR_LOG_LSRA(" ---- Finish", other, ""); + } else { + if (ir_first_use_pos(other, IR_USE_MUST_BE_IN_REG) <= other->end) { + if (!(ival->flags & IR_LIVE_INTERVAL_TEMP)) { + next_use_pos = ir_first_use_pos(ival, IR_USE_MUST_BE_IN_REG); + if (next_use_pos == ival->range.start) { + IR_ASSERT(ival->use_pos && ival->use_pos->op_num == 0); + /* split right after definition */ + split_pos = next_use_pos + 1; + } else { + split_pos = ir_find_optimal_split_position(ctx, ival, ival->range.start, next_use_pos - 1, 1); + } + + if (split_pos > ival->range.start) { + goto spill_current; + } + } + goto try_next_available_register; + } + child = other; + other->reg = IR_REG_NONE; + if (prev) { + prev->list_next = other->list_next; + } else { + *active = other->list_next; + } + IR_LOG_LSRA(" ---- Spill and Finish", other, " (it must not be in reg)"); + } + + split_pos = ir_first_use_pos_after(child, ival->range.start, IR_USE_MUST_BE_IN_REG | IR_USE_SHOULD_BE_IN_REG) - 1; // TODO: ??? + if (split_pos > child->range.start && split_pos < child->end) { + ir_live_pos opt_split_pos = ir_find_optimal_split_position(ctx, child, ival->range.start, split_pos, 1); + if (opt_split_pos > child->range.start) { + split_pos = opt_split_pos; + } + child2 = ir_split_interval_at(ctx, child, split_pos); + IR_LOG_LSRA(" ---- Spill", child, ""); + ir_add_to_unhandled(unhandled, child2); + IR_LOG_LSRA(" ---- Queue", child2, ""); + } else if (child != other) { + // TODO: this may cause endless loop + ir_add_to_unhandled(unhandled, child); + IR_LOG_LSRA(" ---- Queue", child, ""); + } + } + break; + } + prev = other; + other = other->list_next; + } + + /* split any inactive interval for reg at the end of its lifetime hole */ + other = *inactive; + prev = NULL; + while (other) { + /* freeUntilPos[it.reg] = next intersection of it with current */ + if (reg == other->reg) { + ir_live_pos overlap = ir_ivals_overlap(&ival->range, other->current_range); + + if (overlap) { + ir_live_interval *child; + + IR_ASSERT(other->type != IR_VOID); + IR_LOG_LSRA_CONFLICT(" ---- Conflict with inactive", other, overlap); + // TODO: optimal split position (this case is not tested) + child = ir_split_interval_at(ctx, other, overlap); + if (prev) { + prev->list_next = other = other->list_next; + } else { + *inactive = other = other->list_next; + } + ir_add_to_unhandled(unhandled, child); + IR_LOG_LSRA(" ---- Queue", child, ""); + continue; + } + } + prev = other; + other = other->list_next; + } + + /* current.reg = reg */ + ival->reg = reg; + IR_LOG_LSRA_ASSIGN(" ---- Assign", ival, " (after splitting others)"); + + if (*unhandled && ival->end > (*unhandled)->range.start) { + ival->list_next = *active; + *active = ival; + } + return reg; +} + +static int ir_fix_dessa_tmps(ir_ctx *ctx, uint8_t type, ir_ref from, ir_ref to) +{ + ir_block *bb = ctx->data; + ir_tmp_reg tmp_reg; + + if (to == 0) { + if (IR_IS_TYPE_INT(type)) { + tmp_reg.num = 0; + tmp_reg.type = type; + tmp_reg.start = IR_DEF_SUB_REF; + tmp_reg.end = IR_SAVE_SUB_REF; + } else { + IR_ASSERT(IR_IS_TYPE_FP(type)); + tmp_reg.num = 1; + tmp_reg.type = type; + tmp_reg.start = IR_DEF_SUB_REF; + tmp_reg.end = IR_SAVE_SUB_REF; + } + } else if (from != 0) { + if (IR_IS_TYPE_INT(type)) { + tmp_reg.num = 0; + tmp_reg.type = type; + tmp_reg.start = IR_DEF_SUB_REF; + tmp_reg.end = IR_SAVE_SUB_REF; + } else { + IR_ASSERT(IR_IS_TYPE_FP(type)); + tmp_reg.num = 1; + tmp_reg.type = type; + tmp_reg.start = IR_DEF_SUB_REF; + tmp_reg.end = IR_SAVE_SUB_REF; + } + } else { + return 1; + } + if (!ir_has_tmp(ctx, bb->end, tmp_reg.num)) { + ir_add_tmp(ctx, bb->end, bb->end, tmp_reg.num, tmp_reg); + } + return 1; +} + +static bool ir_ival_spill_for_fuse_load(ir_ctx *ctx, ir_live_interval *ival, ir_reg_alloc_data *data) +{ + ir_use_pos *use_pos = ival->use_pos; + ir_insn *insn; + + if (ival->flags & IR_LIVE_INTERVAL_MEM_PARAM) { + IR_ASSERT(!ival->next && use_pos && use_pos->op_num == 0); + insn = &ctx->ir_base[IR_LIVE_POS_TO_REF(use_pos->pos)]; + IR_ASSERT(insn->op == IR_PARAM); + use_pos = use_pos->next; + if (use_pos && (use_pos->next || (use_pos->flags & IR_USE_MUST_BE_IN_REG))) { + return 0; + } + + if (use_pos) { + ir_block *bb = ir_block_from_live_pos(ctx, use_pos->pos); + if (bb->loop_depth) { + return 0; + } + } + + return 1; + } else if (ival->flags & IR_LIVE_INTERVAL_MEM_LOAD) { + insn = &ctx->ir_base[IR_LIVE_POS_TO_REF(use_pos->pos)]; + IR_ASSERT(insn->op == IR_VLOAD); + use_pos = use_pos->next; + if (use_pos && (use_pos->next || (use_pos->flags & IR_USE_MUST_BE_IN_REG))) { + return 0; + } + + if (use_pos) { + ir_block *bb = ir_block_from_live_pos(ctx, use_pos->pos); + if (bb->loop_depth && bb != ir_block_from_live_pos(ctx, ival->use_pos->pos)) { + return 0; + } + } + + IR_ASSERT(ctx->ir_base[insn->op2].op == IR_VAR); + ival->stack_spill_pos = ctx->ir_base[insn->op2].op3; + + return 1; + } + return 0; +} + +static void ir_assign_bound_spill_slots(ir_ctx *ctx) +{ + ir_hashtab_bucket *b = ctx->binding->data; + uint32_t n = ctx->binding->count; + uint32_t v; + ir_live_interval *ival; + + while (n > 0) { + v = ctx->vregs[b->key]; + if (v) { + ival = ctx->live_intervals[v]; + if (ival + && ival->stack_spill_pos == -1 + && (ival->next || ival->reg == IR_REG_NONE)) { + IR_ASSERT(b->val < 0); + /* special spill slot */ + ival->stack_spill_pos = -b->val; + ival->flags |= IR_LIVE_INTERVAL_SPILLED | IR_LIVE_INTERVAL_SPILL_SPECIAL; + } + } + b++; + n--; + } +} + +static int ir_linear_scan(ir_ctx *ctx) +{ + uint32_t b; + ir_block *bb; + ir_live_interval *unhandled = NULL; + ir_live_interval *active = NULL; + ir_live_interval *inactive = NULL; + ir_live_interval *ival, *other, *prev; + int j; + ir_live_pos position; + ir_reg reg; + ir_reg_alloc_data data; + ir_ref vars = ctx->vars; + + if (!ctx->live_intervals) { + return 0; + } + + if (ctx->flags & IR_LR_HAVE_DESSA_MOVES) { + /* Add fixed intervals for temporary registers used for DESSA moves */ + for (b = 1, bb = &ctx->cfg_blocks[1]; b <= ctx->cfg_blocks_count; b++, bb++) { + IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); + if (bb->flags & IR_BB_DESSA_MOVES) { + ctx->data = bb; + ir_gen_dessa_moves(ctx, b, ir_fix_dessa_tmps); + } + } + } + + ctx->data = &data; + ctx->stack_frame_size = 0; + data.unused_slot_4 = 0; + data.unused_slot_2 = 0; + data.unused_slot_1 = 0; + data.handled = NULL; + + while (vars) { + ir_insn *insn = &ctx->ir_base[vars]; + + IR_ASSERT(insn->op == IR_VAR); + vars = insn->op3; /* list next */ + + insn->op3 = ir_allocate_spill_slot(ctx, insn->type, &data); + } + + for (j = ctx->vregs_count; j != 0; j--) { + ival = ctx->live_intervals[j]; + if (ival) { + if (!(ival->flags & (IR_LIVE_INTERVAL_MEM_PARAM|IR_LIVE_INTERVAL_MEM_LOAD)) + || !ir_ival_spill_for_fuse_load(ctx, ival, &data)) { + ir_add_to_unhandled(&unhandled, ival); + } + } + } + + ival = ctx->live_intervals[0]; + if (ival) { + ir_merge_to_unhandled(&unhandled, ival); + } + + /* vregs + tmp + fixed + SRATCH + ALL */ + for (j = ctx->vregs_count + 1; j <= ctx->vregs_count + IR_REG_NUM + 2; j++) { + ival = ctx->live_intervals[j]; + if (ival) { + ival->current_range = &ival->range; + ival->list_next = inactive; + inactive = ival; + } + } + + ctx->flags &= ~(IR_RA_HAVE_SPLITS|IR_RA_HAVE_SPILLS); + +#ifdef IR_DEBUG + if (ctx->flags & IR_DEBUG_RA) { + fprintf(stderr, "----\n"); + ir_dump_live_ranges(ctx, stderr); + fprintf(stderr, "---- Start LSRA\n"); + } +#endif + + while (unhandled) { + ival = unhandled; + ival->current_range = &ival->range; + unhandled = ival->list_next; + position = ival->range.start; + + IR_LOG_LSRA(" ---- Processing", ival, "..."); + + /* for each interval i in active */ + other = active; + prev = NULL; + while (other) { + ir_live_range *r = other->current_range; + + IR_ASSERT(r); + if (r->end <= position) { + do { + r = r->next; + } while (r && r->end <= position); + if (!r) { + /* move i from active to handled */ + other = other->list_next; + if (prev) { + prev->list_next = other; + } else { + active = other; + } + continue; + } + other->current_range = r; + } + if (position < r->start) { + /* move i from active to inactive */ + if (prev) { + prev->list_next = other->list_next; + } else { + active = other->list_next; + } + other->list_next = inactive; + inactive = other; + } else { + prev = other; + } + other = prev ? prev->list_next : active; + } + + /* for each interval i in inactive */ + other = inactive; + prev = NULL; + while (other) { + ir_live_range *r = other->current_range; + + IR_ASSERT(r); + if (r->end <= position) { + do { + r = r->next; + } while (r && r->end <= position); + if (!r) { + /* move i from inactive to handled */ + other = other->list_next; + if (prev) { + prev->list_next = other; + } else { + inactive = other; + } + continue; + } + other->current_range = r; + } + if (position >= r->start) { + /* move i from inactive to active */ + if (prev) { + prev->list_next = other->list_next; + } else { + inactive = other->list_next; + } + other->list_next = active; + active = other; + } else { + prev = other; + } + other = prev ? prev->list_next : inactive; + } + + reg = ir_try_allocate_free_reg(ctx, ival, &active, inactive, &unhandled); + if (reg == IR_REG_NONE) { + reg = ir_allocate_blocked_reg(ctx, ival, &active, &inactive, &unhandled); + } + } + +#if 0 //def IR_DEBUG + /* all intervals must be processed */ + ival = active; + while (ival) { + IR_ASSERT(!ival->next); + ival = ival->list_next; + } + ival = inactive; + while (ival) { + IR_ASSERT(!ival->next); + ival = ival->list_next; + } +#endif + + if (ctx->flags & (IR_RA_HAVE_SPLITS|IR_RA_HAVE_SPILLS)) { + + if (ctx->binding) { + ir_assign_bound_spill_slots(ctx); + } + + /* Use simple linear-scan (without holes) to allocate and reuse spill slots */ + unhandled = NULL; + for (j = ctx->vregs_count; j != 0; j--) { + ival = ctx->live_intervals[j]; + if (ival + && (ival->next || ival->reg == IR_REG_NONE) + && ival->stack_spill_pos == -1) { + ival->flags |= IR_LIVE_INTERVAL_SPILLED; + if (!(ival->flags & IR_LIVE_INTERVAL_MEM_PARAM)) { + ir_live_range *r; + + other = ival; + while (other->next) { + other = other->next; + } + r = &other->range; + while (r->next) { + r = r->next; + } + ival->end = r->end; + ir_add_to_unhandled_spill(&unhandled, ival); + } + } + } + + if (unhandled) { + uint8_t size; + ir_live_interval *handled[9] = {NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}; + ir_live_interval *old; + + data.handled = handled; + active = NULL; + while (unhandled) { + ival = unhandled; + ival->current_range = &ival->range; + unhandled = ival->list_next; + position = ival->range.start; + + /* for each interval i in active */ + other = active; + prev = NULL; + while (other) { + if (other->end <= position) { + /* move i from active to handled */ + if (prev) { + prev->list_next = other->list_next; + } else { + active = other->list_next; + } + size = ir_type_size[other->type]; + IR_ASSERT(size == 1 || size == 2 || size == 4 || size == 8); + old = handled[size]; + while (old) { + if (old->stack_spill_pos == other->stack_spill_pos) { + break; + } + old = old->list_next; + } + if (!old) { + other->list_next = handled[size]; + handled[size] = other; + } + } else { + prev = other; + } + other = prev ? prev->list_next : active; + } + + ival->stack_spill_pos = ir_allocate_spill_slot(ctx, ival->type, &data); + if (unhandled && ival->end > unhandled->range.start) { + ival->list_next = active; + active = ival; + } else { + size = ir_type_size[ival->type]; + IR_ASSERT(size == 1 || size == 2 || size == 4 || size == 8); + old = handled[size]; + while (old) { + if (old->stack_spill_pos == ival->stack_spill_pos) { + break; + } + old = old->list_next; + } + if (!old) { + ival->list_next = handled[size]; + handled[size] = ival; + } + } + } + data.handled = NULL; + } + } + +#ifdef IR_TARGET_X86 + if (ctx->ret_type == IR_FLOAT || ctx->ret_type == IR_DOUBLE) { + ctx->ret_slot = ir_allocate_spill_slot(ctx, ctx->ret_type, &data); + } else { + ctx->ret_slot = -1; + } +#endif + +#ifdef IR_DEBUG + if (ctx->flags & IR_DEBUG_RA) { + fprintf(stderr, "---- Finish LSRA\n"); + ir_dump_live_ranges(ctx, stderr); + fprintf(stderr, "----\n"); + } +#endif + + return 1; +} + +static bool needs_spill_reload(ir_ctx *ctx, ir_live_interval *ival, uint32_t b0, ir_bitset available) +{ + ir_worklist worklist; + ir_block *bb; + uint32_t b, *p, n; + + ir_worklist_init(&worklist, ctx->cfg_blocks_count + 1); + ir_worklist_push(&worklist, b0); + while (ir_worklist_len(&worklist) != 0) { + b = ir_worklist_pop(&worklist); + bb = &ctx->cfg_blocks[b]; + if (bb->flags & (IR_BB_ENTRY|IR_BB_START)) { + ir_worklist_free(&worklist); + return 1; + } + n = bb->predecessors_count; + for (p = &ctx->cfg_edges[bb->predecessors]; n > 0; p++, n--) { + b = *p; + bb = &ctx->cfg_blocks[b]; + + if (!ir_ival_covers(ival, IR_SAVE_LIVE_POS_FROM_REF(bb->end))) { + ir_worklist_free(&worklist); + return 1; + } else if (!ir_bitset_in(available, b)) { + ir_worklist_push(&worklist, b); + } + } + } + ir_worklist_free(&worklist); + return 0; +} + +static bool needs_spill_load(ir_ctx *ctx, ir_live_interval *ival, ir_use_pos *use_pos) +{ + if (use_pos->next + && use_pos->op_num == 1 + && use_pos->next->pos == use_pos->pos + && !(use_pos->next->flags & IR_USE_MUST_BE_IN_REG)) { + /* Support for R2 = ADD(R1, R1) */ + use_pos = use_pos->next; + } + return use_pos->next && use_pos->next->op_num != 0; +} + +static void assign_regs(ir_ctx *ctx) +{ + ir_ref i; + ir_live_interval *ival, *top_ival; + ir_use_pos *use_pos; + int8_t reg, old_reg; + ir_ref ref; + ir_regset used_regs = 0; + + if (!ctx->regs) { + ctx->regs = ir_mem_malloc(sizeof(ir_regs) * ctx->insns_count); + memset(ctx->regs, IR_REG_NONE, sizeof(ir_regs) * ctx->insns_count); + } + + if (!(ctx->flags & (IR_RA_HAVE_SPLITS|IR_RA_HAVE_SPILLS))) { + for (i = 1; i <= ctx->vregs_count; i++) { + ival = ctx->live_intervals[i]; + if (ival) { + do { + if (ival->reg != IR_REG_NONE) { + reg = ival->reg; + IR_REGSET_INCL(used_regs, reg); + use_pos = ival->use_pos; + while (use_pos) { + ref = (use_pos->hint_ref < 0) ? -use_pos->hint_ref : IR_LIVE_POS_TO_REF(use_pos->pos); + ir_set_alocated_reg(ctx, ref, use_pos->op_num, reg); + use_pos = use_pos->next; + } + } + ival = ival->next; + } while (ival); + } + } + } else { + ir_bitset available = ir_bitset_malloc(ctx->cfg_blocks_count + 1); + + for (i = 1; i <= ctx->vregs_count; i++) { + top_ival = ival = ctx->live_intervals[i]; + if (ival) { + if (!(ival->flags & IR_LIVE_INTERVAL_SPILLED)) { + do { + if (ival->reg != IR_REG_NONE) { + IR_REGSET_INCL(used_regs, ival->reg); + use_pos = ival->use_pos; + while (use_pos) { + reg = ival->reg; + ref = IR_LIVE_POS_TO_REF(use_pos->pos); + if (use_pos->hint_ref < 0) { + ref = -use_pos->hint_ref; + } + ir_set_alocated_reg(ctx, ref, use_pos->op_num, reg); + + use_pos = use_pos->next; + } + } + ival = ival->next; + } while (ival); + } else { + do { + if (ival->reg != IR_REG_NONE) { + ir_ref prev_use_ref = IR_UNUSED; + + ir_bitset_clear(available, ir_bitset_len(ctx->cfg_blocks_count + 1)); + IR_REGSET_INCL(used_regs, ival->reg); + use_pos = ival->use_pos; + while (use_pos) { + reg = ival->reg; + ref = IR_LIVE_POS_TO_REF(use_pos->pos); + // TODO: Insert spill loads and stores in optimal positions (resolution) + if (use_pos->op_num == 0) { + if (ctx->ir_base[ref].op == IR_PHI) { + /* Spilled PHI var is passed through memory */ + reg = IR_REG_NONE; + } else { + uint32_t use_b = ctx->cfg_map[ref]; + + if (ir_ival_covers(ival, IR_SAVE_LIVE_POS_FROM_REF(ctx->cfg_blocks[use_b].end))) { + ir_bitset_incl(available, use_b); + } + if (top_ival->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL) { + reg |= IR_REG_SPILL_SPECIAL; + } else { + reg |= IR_REG_SPILL_STORE; + } + prev_use_ref = ref; + } + } else if ((!prev_use_ref || ctx->cfg_map[prev_use_ref] != ctx->cfg_map[ref]) + && needs_spill_reload(ctx, ival, ctx->cfg_map[ref], available)) { + if (!(use_pos->flags & IR_USE_MUST_BE_IN_REG) + && use_pos->hint != reg +// && ctx->ir_base[ref].op != IR_CALL +// && ctx->ir_base[ref].op != IR_TAILCALL) { + && ctx->ir_base[ref].op != IR_SNAPSHOT + && !needs_spill_load(ctx, ival, use_pos)) { + /* fuse spill load (valid only when register is not reused) */ + reg = IR_REG_NONE; + if (use_pos->next + && use_pos->op_num == 1 + && use_pos->next->pos == use_pos->pos + && !(use_pos->next->flags & IR_USE_MUST_BE_IN_REG)) { + /* Support for R2 = BINOP(R1, R1) */ + if (use_pos->hint_ref < 0) { + ref = -use_pos->hint_ref; + } + ir_set_alocated_reg(ctx, ref, use_pos->op_num, reg); + use_pos = use_pos->next; + } + } else { + if (top_ival->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL) { + reg |= IR_REG_SPILL_SPECIAL; + } else { + reg |= IR_REG_SPILL_LOAD; + } + if (ctx->ir_base[ref].op != IR_SNAPSHOT) { + uint32_t use_b = ctx->cfg_map[ref]; + + if (ir_ival_covers(ival, IR_SAVE_LIVE_POS_FROM_REF(ctx->cfg_blocks[use_b].end))) { + ir_bitset_incl(available, use_b); + } + prev_use_ref = ref; + } + } + } else if (use_pos->flags & IR_PHI_USE) { + IR_ASSERT(use_pos->hint_ref < 0); + IR_ASSERT(ctx->vregs[-use_pos->hint_ref]); + IR_ASSERT(ctx->live_intervals[ctx->vregs[-use_pos->hint_ref]]); + if (ctx->live_intervals[ctx->vregs[-use_pos->hint_ref]]->flags & IR_LIVE_INTERVAL_SPILLED) { + /* Spilled PHI var is passed through memory */ + reg = IR_REG_NONE; + } + } else if (use_pos->hint_ref < 0 + && ctx->use_lists[-use_pos->hint_ref].count > 1 + && (old_reg = ir_get_alocated_reg(ctx, -use_pos->hint_ref, use_pos->op_num)) != IR_REG_NONE + && (old_reg & (IR_REG_SPILL_SPECIAL|IR_REG_SPILL_LOAD))) { + /* Force spill load */ + // TODO: Find a better solution ??? + if (top_ival->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL) { + reg |= IR_REG_SPILL_SPECIAL; + } else { + reg |= IR_REG_SPILL_LOAD; + } + IR_ASSERT(reg == old_reg); + } else { + /* reuse register without spill load */ + } + if (use_pos->hint_ref < 0) { + ref = -use_pos->hint_ref; + } + ir_set_alocated_reg(ctx, ref, use_pos->op_num, reg); + + use_pos = use_pos->next; + } + } else if (!(top_ival->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL)) { + use_pos = ival->use_pos; + while (use_pos) { + ref = IR_LIVE_POS_TO_REF(use_pos->pos); + if (ctx->ir_base[ref].op == IR_SNAPSHOT) { + IR_ASSERT(use_pos->hint_ref >= 0); + /* A reference to a CPU spill slot */ + reg = IR_REG_SPILL_STORE | IR_REG_STACK_POINTER; + ir_set_alocated_reg(ctx, ref, use_pos->op_num, reg); + } + use_pos = use_pos->next; + } + } + ival = ival->next; + } while (ival); + } + } + } + ir_mem_free(available); + } + + /* Temporary registers */ + ival = ctx->live_intervals[0]; + if (ival) { + do { + IR_ASSERT(ival->reg != IR_REG_NONE); + IR_REGSET_INCL(used_regs, ival->reg); + ir_set_alocated_reg(ctx, ival->tmp_ref, ival->tmp_op_num, ival->reg); + ival = ival->next; + } while (ival); + } + + if (ctx->fixed_stack_frame_size != -1) { + ctx->used_preserved_regs = (ir_regset)ctx->fixed_save_regset; + if (IR_REGSET_DIFFERENCE(IR_REGSET_INTERSECTION(used_regs, IR_REGSET_PRESERVED), + ctx->used_preserved_regs)) { + // TODO: Preserved reg and fixed frame conflict ??? + // IR_ASSERT(0 && "Preserved reg and fixed frame conflict"); + } + } else { + ctx->used_preserved_regs = IR_REGSET_UNION((ir_regset)ctx->fixed_save_regset, + IR_REGSET_DIFFERENCE(IR_REGSET_INTERSECTION(used_regs, IR_REGSET_PRESERVED), + (ctx->flags & IR_FUNCTION) ? (ir_regset)ctx->fixed_regset : IR_REGSET_PRESERVED)); + } + + ir_fix_stack_frame(ctx); +} + +int ir_reg_alloc(ir_ctx *ctx) +{ + if (ir_linear_scan(ctx)) { + assign_regs(ctx); + return 1; + } + return 0; +} diff --git a/ext/opcache/jit/ir/ir_save.c b/ext/opcache/jit/ir/ir_save.c new file mode 100644 index 0000000000000..d89ec6eebce67 --- /dev/null +++ b/ext/opcache/jit/ir/ir_save.c @@ -0,0 +1,128 @@ +/* + * IR - Lightweight JIT Compilation Framework + * (IR saver) + * Copyright (C) 2022 Zend by Perforce. + * Authors: Dmitry Stogov + */ + +#include "ir.h" +#include "ir_private.h" + +void ir_save(const ir_ctx *ctx, FILE *f) +{ + ir_ref i, j, n, ref, *p; + ir_insn *insn; + uint32_t flags; + bool first; + + fprintf(f, "{\n"); + for (i = IR_UNUSED + 1, insn = ctx->ir_base - i; i < ctx->consts_count; i++, insn--) { + fprintf(f, "\t%s c_%d = ", ir_type_cname[insn->type], i); + if (insn->op == IR_FUNC) { + if (!insn->const_flags) { + fprintf(f, "func(%s)", ir_get_str(ctx, insn->val.i32)); + } else { + fprintf(f, "func(%s, %d)", ir_get_str(ctx, insn->val.i32), insn->const_flags); + } + } else if (insn->op == IR_SYM) { + fprintf(f, "sym(%s)", ir_get_str(ctx, insn->val.i32)); + } else if (insn->op == IR_FUNC_ADDR) { + fprintf(f, "func_addr("); + ir_print_const(ctx, insn, f, true); + if (insn->const_flags) { + fprintf(f, ", %d", insn->const_flags); + } + fprintf(f, ")"); + } else { + ir_print_const(ctx, insn, f, true); + } + fprintf(f, ";\n"); + } + + for (i = IR_UNUSED + 1, insn = ctx->ir_base + i; i < ctx->insns_count;) { + flags = ir_op_flags[insn->op]; + if (flags & IR_OP_FLAG_CONTROL) { + if (!(flags & IR_OP_FLAG_MEM) || insn->type == IR_VOID) { + fprintf(f, "\tl_%d = ", i); + } else { + fprintf(f, "\t%s d_%d, l_%d = ", ir_type_cname[insn->type], i, i); + } + } else { + fprintf(f, "\t"); + if (flags & IR_OP_FLAG_DATA) { + fprintf(f, "%s d_%d = ", ir_type_cname[insn->type], i); + } + } + fprintf(f, "%s", ir_op_name[insn->op]); + n = ir_operands_count(ctx, insn); + if ((insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN) && n != 2) { + fprintf(f, "/%d", n); + } else if ((insn->op == IR_CALL || insn->op == IR_TAILCALL) && n != 2) { + fprintf(f, "/%d", n - 2); + } else if (insn->op == IR_PHI && n != 3) { + fprintf(f, "/%d", n - 1); + } else if (insn->op == IR_SNAPSHOT) { + fprintf(f, "/%d", n - 1); + } + first = 1; + for (j = 1, p = insn->ops + 1; j <= n; j++, p++) { + uint32_t opnd_kind = IR_OPND_KIND(flags, j); + + ref = *p; + if (ref) { + switch (opnd_kind) { + case IR_OPND_DATA: + if (IR_IS_CONST_REF(ref)) { + fprintf(f, "%sc_%d", first ? "(" : ", ", -ref); + } else { + fprintf(f, "%sd_%d", first ? "(" : ", ", ref); + } + first = 0; + break; + case IR_OPND_CONTROL: + case IR_OPND_CONTROL_DEP: + case IR_OPND_CONTROL_REF: + fprintf(f, "%sl_%d", first ? "(" : ", ", ref); + first = 0; + break; + case IR_OPND_STR: + fprintf(f, "%s\"%s\"", first ? "(" : ", ", ir_get_str(ctx, ref)); + first = 0; + break; + case IR_OPND_PROB: + if (ref == 0) { + break; + } + IR_FALLTHROUGH; + case IR_OPND_NUM: + fprintf(f, "%s%d", first ? "(" : ", ", ref); + first = 0; + break; + } + } else if (opnd_kind == IR_OPND_NUM) { + fprintf(f, "%s%d", first ? "(" : ", ", ref); + first = 0; + } else if (IR_IS_REF_OPND_KIND(opnd_kind) && j != n) { + fprintf(f, "%snull", first ? "(" : ", "); + first = 0; + } + } + if (first) { + fprintf(f, ";"); + } else { + fprintf(f, ");"); + } + if (((flags & IR_OP_FLAG_DATA) || ((flags & IR_OP_FLAG_MEM) && insn->type != IR_VOID)) && ctx->binding) { + ir_ref var = ir_binding_find(ctx, i); + if (var) { + IR_ASSERT(var < 0); + fprintf(f, " # BIND(0x%x);", -var); + } + } + fprintf(f, "\n"); + n = ir_insn_inputs_to_len(n); + i += n; + insn += n; + } + fprintf(f, "}\n"); +} diff --git a/ext/opcache/jit/ir/ir_sccp.c b/ext/opcache/jit/ir/ir_sccp.c new file mode 100644 index 0000000000000..6c0297f2b1459 --- /dev/null +++ b/ext/opcache/jit/ir/ir_sccp.c @@ -0,0 +1,885 @@ +/* + * IR - Lightweight JIT Compilation Framework + * (SCCP - Sparse Conditional Constant Propagation) + * Copyright (C) 2022 Zend by Perforce. + * Authors: Dmitry Stogov + * + * The SCCP algorithm is based on M. N. Wegman and F. K. Zadeck publication + * See: M. N. Wegman and F. K. Zadeck. "Constant propagation with conditional branches" + * ACM Transactions on Programming Languages and Systems, 13(2):181-210, April 1991 + */ + +#include "ir.h" +#include "ir_private.h" + +#define IR_TOP IR_UNUSED +#define IR_BOTTOM IR_LAST_OP + +#define IR_MAKE_TOP(ref) do {IR_ASSERT(ref > 0); _values[ref].optx = IR_TOP;} while (0) +#define IR_MAKE_BOTTOM(ref) do {IR_ASSERT(ref > 0); _values[ref].optx = IR_BOTTOM;} while (0) + +#define IR_IS_TOP(ref) (ref >= 0 && _values[ref].optx == IR_TOP) +#define IR_IS_BOTTOM(ref) (ref >= 0 && _values[ref].optx == IR_BOTTOM) +#define IR_IS_FEASIBLE(ref) (ref >= 0 && _values[ref].optx != IR_TOP) + +#define IR_COMBO_COPY_PROPAGATION 1 + +#if IR_COMBO_COPY_PROPAGATION +IR_ALWAYS_INLINE ir_ref ir_sccp_identity(ir_insn *_values, ir_ref a) +{ + if (a > 0 && _values[a].op == IR_COPY) { + a = _values[a].op1; + IR_ASSERT(a <= 0 || _values[a].op != IR_COPY); + } + return a; +} +#endif + +static ir_ref ir_sccp_fold(ir_ctx *ctx, ir_insn *_values, ir_ref res, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3) +{ + ir_insn *op1_insn, *op2_insn, *op3_insn, *insn; + +#if IR_COMBO_COPY_PROPAGATION + op1 = ir_sccp_identity(_values, op1); + op2 = ir_sccp_identity(_values, op2); + op3 = ir_sccp_identity(_values, op3); +#endif + +restart: + op1_insn = (op1 > 0 && IR_IS_CONST_OP(_values[op1].op)) ? _values + op1 : ctx->ir_base + op1; + op2_insn = (op2 > 0 && IR_IS_CONST_OP(_values[op2].op)) ? _values + op2 : ctx->ir_base + op2; + op3_insn = (op3 > 0 && IR_IS_CONST_OP(_values[op3].op)) ? _values + op3 : ctx->ir_base + op3; + + switch (ir_folding(ctx, opt, op1, op2, op3, op1_insn, op2_insn, op3_insn)) { + case IR_FOLD_DO_RESTART: + opt = ctx->fold_insn.optx; + op1 = ctx->fold_insn.op1; + op2 = ctx->fold_insn.op2; + op3 = ctx->fold_insn.op3; + goto restart; + case IR_FOLD_DO_EMIT: + IR_MAKE_BOTTOM(res); + return 1; + case IR_FOLD_DO_COPY: + op1 = ctx->fold_insn.op1; +#if IR_COMBO_COPY_PROPAGATION + op1 = ir_sccp_identity(_values, op1); +#endif + insn = (op1 > 0 && IR_IS_CONST_OP(_values[op1].op)) ? _values + op1 : ctx->ir_base + op1; + if (IR_IS_CONST_OP(insn->op)) { + /* pass */ +#if IR_COMBO_COPY_PROPAGATION + } else if (IR_IS_TOP(res)) { + _values[res].optx = IR_OPT(IR_COPY, insn->type); + _values[res].op1 = op1; + return 1; + } else if (_values[res].op == IR_COPY && _values[res].op1 == op1) { + return 0; /* not changed */ +#endif + } else { + IR_MAKE_BOTTOM(res); + return 1; + } + break; + case IR_FOLD_DO_CONST: + insn = &ctx->fold_insn; + break; + default: + IR_ASSERT(0); + return 0; + } + + if (IR_IS_TOP(res)) { + _values[res].optx = IR_OPT(insn->type, insn->type); + _values[res].val.u64 = insn->val.u64; + return 1; + } else if (_values[res].opt != IR_OPT(insn->type, insn->type) || _values[res].val.u64 != insn->val.u64) { + IR_MAKE_BOTTOM(res); + return 1; + } + return 0; /* not changed */ +} + +static bool ir_sccp_join_values(ir_ctx *ctx, ir_insn *_values, ir_ref a, ir_ref b) +{ + ir_insn *v; + + if (!IR_IS_BOTTOM(a) && !IR_IS_TOP(b)) { + b = ir_sccp_identity(_values, b); + v = IR_IS_CONST_REF(b) ? &ctx->ir_base[b] : &_values[b]; + if (IR_IS_TOP(a)) { +#if IR_COMBO_COPY_PROPAGATION + if (v->op == IR_BOTTOM) { + _values[a].optx = IR_OPT(IR_COPY, ctx->ir_base[b].type); + _values[a].op1 = b; + return 1; + } +#endif + _values[a].optx = v->opt; + _values[a].val.u64 = v->val.u64; + return 1; + } else if (_values[a].opt == v->opt && _values[a].val.u64 == v->val.u64) { + /* pass */ +#if IR_COMBO_COPY_PROPAGATION + } else if (_values[a].op == IR_COPY && _values[a].op1 == b) { + /* pass */ +#endif + } else { + IR_MAKE_BOTTOM(a); + return 1; + } + } + return 0; +} + +static bool ir_sccp_is_true(ir_ctx *ctx, ir_insn *_values, ir_ref a) +{ + ir_insn *v = IR_IS_CONST_REF(a) ? &ctx->ir_base[a] : &_values[a]; + + return ir_const_is_true(v); +} + +static bool ir_sccp_is_equal(ir_ctx *ctx, ir_insn *_values, ir_ref a, ir_ref b) +{ + ir_insn *v1 = IR_IS_CONST_REF(a) ? &ctx->ir_base[a] : &_values[a]; + ir_insn *v2 = IR_IS_CONST_REF(b) ? &ctx->ir_base[b] : &_values[b]; + + return v1->val.u64 == v2->val.u64; +} + +static void ir_sccp_remove_from_use_list(ir_ctx *ctx, ir_ref from, ir_ref ref) +{ + ir_ref j, n, *p, *q, use; + ir_use_list *use_list = &ctx->use_lists[from]; + ir_ref skip = 0; + + n = use_list->count; + for (j = 0, p = q = &ctx->use_edges[use_list->refs]; j < n; j++, p++) { + use = *p; + if (use == ref) { + skip++; + } else { + if (p != q) { + *q = use; + } + q++; + } + } + use_list->count -= skip; +#if IR_COMBO_COPY_PROPAGATION + if (skip) { + do { + *q = IR_UNUSED; + q++; + } while (--skip); + } +#endif +} + +#if IR_COMBO_COPY_PROPAGATION +static int ir_sccp_add_to_use_list(ir_ctx *ctx, ir_ref to, ir_ref ref) +{ + ir_use_list *use_list = &ctx->use_lists[to]; + ir_ref n = use_list->refs + use_list->count; + + if (n < ctx->use_edges_count && ctx->use_edges[n] == IR_UNUSED) { + ctx->use_edges[n] = ref; + use_list->count++; + return 0; + } else { + /* Reallocate the whole edges buffer (this is inefficient) */ + ctx->use_edges = ir_mem_realloc(ctx->use_edges, (ctx->use_edges_count + use_list->count + 1) * sizeof(ir_ref)); + memcpy(ctx->use_edges + ctx->use_edges_count, ctx->use_edges + use_list->refs, use_list->count * sizeof(ir_ref)); + use_list->refs = ctx->use_edges_count; + ctx->use_edges[use_list->refs + use_list->count] = ref; + use_list->count++; + ctx->use_edges_count += use_list->count; + return 1; + } +} +#endif + +static void ir_sccp_make_nop(ir_ctx *ctx, ir_ref ref) +{ + ir_ref j, n, *p; + ir_use_list *use_list = &ctx->use_lists[ref]; + ir_insn *insn; + + use_list->refs = 0; + use_list->count = 0; + + insn = &ctx->ir_base[ref]; + n = insn->inputs_count; + insn->opt = IR_NOP; /* keep "inputs_count" */ + for (j = 1, p = insn->ops + j; j <= n; j++, p++) { + *p = IR_UNUSED; + } +} + +static void ir_sccp_remove_insn(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_bitqueue *worklist) +{ + ir_ref j, n, *p; + ir_use_list *use_list = &ctx->use_lists[ref]; + ir_insn *insn; + + use_list->refs = 0; + use_list->count = 0; + + insn = &ctx->ir_base[ref]; + n = insn->inputs_count; + insn->opt = IR_NOP; /* keep "inputs_count" */ + for (j = 1, p = insn->ops + j; j <= n; j++, p++) { + ir_ref input = *p; + *p = IR_UNUSED; + if (input > 0 && _values[input].op == IR_BOTTOM) { + ir_sccp_remove_from_use_list(ctx, input, ref); + /* schedule DCE */ + if ((IR_IS_FOLDABLE_OP(ctx->ir_base[input].op) && ctx->use_lists[input].count == 0) + || ((ir_op_flags[ctx->ir_base[input].op] & (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_MASK)) == (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_LOAD) + && ctx->use_lists[input].count == 1)) { + ir_bitqueue_add(worklist, input); + } + } + } +} + +static void ir_sccp_replace_insn(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_ref new_ref, ir_bitqueue *worklist) +{ + ir_ref j, n, *p, use, k, l; + ir_insn *insn; + ir_use_list *use_list; + + IR_ASSERT(ref != new_ref); + + insn = &ctx->ir_base[ref]; + n = insn->inputs_count; + insn->opt = IR_NOP; /* keep "inputs_count" */ + for (j = 1, p = insn->ops + 1; j <= n; j++, p++) { + ir_ref input = *p; + *p = IR_UNUSED; + if (input > 0) { + ir_sccp_remove_from_use_list(ctx, input, ref); + /* schedule DCE */ + if (worklist + && ((IR_IS_FOLDABLE_OP(ctx->ir_base[input].op) && ctx->use_lists[input].count == 0) + || ((ir_op_flags[ctx->ir_base[input].op] & (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_MASK)) == (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_LOAD) + && ctx->use_lists[input].count == 1))) { + ir_bitqueue_add(worklist, input); + } + } + } + + use_list = &ctx->use_lists[ref]; + n = use_list->count; + for (j = 0, p = &ctx->use_edges[use_list->refs]; j < n; j++, p++) { + use = *p; + if (IR_IS_FEASIBLE(use)) { + insn = &ctx->ir_base[use]; + l = insn->inputs_count; + for (k = 1; k <= l; k++) { + if (ir_insn_op(insn, k) == ref) { + ir_insn_set_op(insn, k, new_ref); + } + } +#if IR_COMBO_COPY_PROPAGATION + if (new_ref > 0 && IR_IS_BOTTOM(use)) { + if (ir_sccp_add_to_use_list(ctx, new_ref, use)) { + /* restore after reallocation */ + use_list = &ctx->use_lists[ref]; + n = use_list->count; + p = &ctx->use_edges[use_list->refs + j]; + } + } +#endif + /* schedule folding */ + if (worklist && _values[use].op == IR_BOTTOM) { + ir_bitqueue_add(worklist, use); + } + } + } + + use_list->refs = 0; + use_list->count = 0; +} + +static void ir_sccp_fold2(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_bitqueue *worklist) +{ + uint32_t opt; + ir_ref op1, op2, op3; + ir_insn *op1_insn, *op2_insn, *op3_insn, *insn; + + insn = &ctx->ir_base[ref]; + opt = insn->opt; + op1 = insn->op1; + op2 = insn->op2; + op3 = insn->op3; + +restart: + op1_insn = ctx->ir_base + op1; + op2_insn = ctx->ir_base + op2; + op3_insn = ctx->ir_base + op3; + + switch (ir_folding(ctx, opt, op1, op2, op3, op1_insn, op2_insn, op3_insn)) { + case IR_FOLD_DO_RESTART: + opt = ctx->fold_insn.optx; + op1 = ctx->fold_insn.op1; + op2 = ctx->fold_insn.op2; + op3 = ctx->fold_insn.op3; + goto restart; + case IR_FOLD_DO_EMIT: + insn = &ctx->ir_base[ref]; + if (insn->opt != ctx->fold_insn.opt + || insn->op1 != ctx->fold_insn.op1 + || insn->op2 != ctx->fold_insn.op2 + || insn->op3 != ctx->fold_insn.op3) { + + ir_use_list *use_list; + ir_ref n, j, *p, use; + + insn->optx = ctx->fold_insn.opt; + IR_ASSERT(!IR_OP_HAS_VAR_INPUTS(ir_op_flags[opt & IR_OPT_OP_MASK])); + insn->inputs_count = IR_INPUT_EDGES_COUNT(ir_op_flags[opt & IR_OPT_OP_MASK]); + if (insn->op1 != ctx->fold_insn.op1) { + if (!IR_IS_CONST_REF(insn->op1) && insn->op1 != ctx->fold_insn.op2 && insn->op1 != ctx->fold_insn.op3) { + ir_sccp_remove_from_use_list(ctx, insn->op1, ref); + } + if (!IR_IS_CONST_REF(ctx->fold_insn.op1) && ctx->fold_insn.op1 != insn->op2 && ctx->fold_insn.op1 != insn->op3) { + ir_sccp_add_to_use_list(ctx, ctx->fold_insn.op1, ref); + } + } + if (insn->op2 != ctx->fold_insn.op2) { + if (!IR_IS_CONST_REF(insn->op2) && insn->op2 != ctx->fold_insn.op1 && insn->op2 != ctx->fold_insn.op3) { + ir_sccp_remove_from_use_list(ctx, insn->op2, ref); + } + if (!IR_IS_CONST_REF(ctx->fold_insn.op2) && ctx->fold_insn.op2 != insn->op1 && ctx->fold_insn.op2 != insn->op3) { + ir_sccp_add_to_use_list(ctx, ctx->fold_insn.op2, ref); + } + } + if (insn->op3 != ctx->fold_insn.op3) { + if (!IR_IS_CONST_REF(insn->op3) && insn->op3 != ctx->fold_insn.op1 && insn->op3 != ctx->fold_insn.op2) { + ir_sccp_remove_from_use_list(ctx, insn->op3, ref); + } + if (!IR_IS_CONST_REF(ctx->fold_insn.op3) && ctx->fold_insn.op3 != insn->op1 && ctx->fold_insn.op3 != insn->op2) { + ir_sccp_add_to_use_list(ctx, ctx->fold_insn.op3, ref); + } + } + insn->op1 = ctx->fold_insn.op1; + insn->op2 = ctx->fold_insn.op2; + insn->op3 = ctx->fold_insn.op3; + + use_list = &ctx->use_lists[ref]; + n = use_list->count; + for (j = 0, p = &ctx->use_edges[use_list->refs]; j < n; j++, p++) { + use = *p; + if (_values[use].op == IR_BOTTOM) { + ir_bitqueue_add(worklist, use); + } + } + } + break; + case IR_FOLD_DO_COPY: + op1 = ctx->fold_insn.op1; + ir_sccp_replace_insn(ctx, _values, ref, op1, worklist); + break; + case IR_FOLD_DO_CONST: + op1 = ir_const(ctx, ctx->fold_insn.val, ctx->fold_insn.type); + ir_sccp_replace_insn(ctx, _values, ref, op1, worklist); + break; + default: + IR_ASSERT(0); + break; + } +} + +static void ir_sccp_replace_use(ir_ctx *ctx, ir_ref ref, ir_ref use, ir_ref new_use) +{ + ir_use_list *use_list = &ctx->use_lists[ref]; + ir_ref i, n, *p; + + n = use_list->count; + for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) { + if (*p == use) { + *p = new_use; + break; + } + } +} + +static void ir_sccp_remove_if(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_ref dst) +{ + ir_ref j, n, *p, use, next; + ir_insn *insn, *next_insn; + ir_use_list *use_list = &ctx->use_lists[ref]; + + insn = &ctx->ir_base[ref]; + n = use_list->count; + for (j = 0, p = &ctx->use_edges[use_list->refs]; j < n; j++, p++) { + use = *p; + if (use == dst) { + next = ctx->use_edges[ctx->use_lists[use].refs]; + next_insn = &ctx->ir_base[next]; + /* remove IF and IF_TRUE/FALSE from double linked control list */ + next_insn->op1 = insn->op1; + ir_sccp_replace_use(ctx, insn->op1, ref, next); + /* remove IF and IF_TRUE/FALSE instructions */ + ir_sccp_make_nop(ctx, ref); + ir_sccp_make_nop(ctx, use); + break; + } + } +} + +static void ir_sccp_remove_unfeasible_merge_inputs(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_ref unfeasible_inputs) +{ + ir_ref i, j, n, k, *p, use; + ir_insn *insn, *use_insn; + ir_use_list *use_list; + ir_bitset life_inputs; + + insn = &ctx->ir_base[ref]; + IR_ASSERT(insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN); + n = insn->inputs_count; + if (n - unfeasible_inputs == 1) { + /* remove MERGE completely */ + for (j = 1; j <= n; j++) { + ir_ref input = ir_insn_op(insn, j); + if (input && IR_IS_FEASIBLE(input)) { + ir_insn *input_insn = &ctx->ir_base[input]; + + IR_ASSERT(input_insn->op == IR_END || input_insn->op == IR_IJMP || input_insn->op == IR_UNREACHABLE); + if (input_insn->op == IR_END) { + ir_ref prev, next = IR_UNUSED; + ir_insn *next_insn = NULL; + + prev = input_insn->op1; + use_list = &ctx->use_lists[ref]; + for (k = 0, p = &ctx->use_edges[use_list->refs]; k < use_list->count; k++, p++) { + use = *p; + use_insn = &ctx->ir_base[use]; + IR_ASSERT((use_insn->op != IR_PHI) && "PHI must be already removed"); + if (ir_op_flags[use_insn->op] & IR_OP_FLAG_CONTROL) { + next = use; + next_insn = use_insn; + break; + } + } + IR_ASSERT(prev && next); + /* remove MERGE and input END from double linked control list */ + next_insn->op1 = prev; + ir_sccp_replace_use(ctx, prev, input, next); + /* remove MERGE and input END instructions */ + ir_sccp_make_nop(ctx, ref); + ir_sccp_make_nop(ctx, input); + break; + } else { + for (i = 2; i <= n; i++) { + ir_insn_set_op(insn, i, IR_UNUSED); + } + insn->op = IR_BEGIN; + insn->op1 = input; + } + } + } + } else { + n = insn->inputs_count; + i = 1; + life_inputs = ir_bitset_malloc(n + 1); + for (j = 1; j <= n; j++) { + ir_ref input = ir_insn_op(insn, j); + + if (input) { + if (i != j) { + ir_insn_set_op(insn, i, input); + } + ir_bitset_incl(life_inputs, j); + i++; + } + } + j = i; + while (j < n) { + ir_insn_set_op(insn, j, IR_UNUSED); + j++; + } + i--; + insn->inputs_count = i; + + n++; + use_list = &ctx->use_lists[ref]; + if (use_list->count > 1) { + for (k = 0, p = &ctx->use_edges[use_list->refs]; k < use_list->count; k++, p++) { + use = *p; + use_insn = &ctx->ir_base[use]; + if (use_insn->op == IR_PHI) { + i = 2; + for (j = 2; j <= n; j++) { + ir_ref input = ir_insn_op(use_insn, j); + + if (ir_bitset_in(life_inputs, j - 1)) { + IR_ASSERT(input); + if (i != j) { + ir_insn_set_op(use_insn, i, input); + } + i++; + } else if (!IR_IS_CONST_REF(input)) { + ir_sccp_remove_from_use_list(ctx, input, use); + } + } + while (i <= n) { + ir_insn_set_op(use_insn, i, IR_UNUSED); + i++; + } + use_insn->inputs_count = insn->inputs_count + 1; + } + } + } + ir_mem_free(life_inputs); + } +} + +int ir_sccp(ir_ctx *ctx) +{ + ir_ref i, j, n, *p, use; + ir_use_list *use_list; + ir_insn *insn, *use_insn, *value; + uint32_t flags; + ir_bitqueue worklist; + ir_insn *_values = ir_mem_calloc(ctx->insns_count, sizeof(ir_insn)); + + ctx->flags |= IR_OPT_IN_SCCP; + + /* A bit modified SCCP algorithm of M. N. Wegman and F. K. Zadeck */ + ir_bitqueue_init(&worklist, ctx->insns_count); + worklist.pos = 0; + ir_bitset_incl(worklist.set, 1); + while ((i = ir_bitqueue_pop(&worklist)) >= 0) { + insn = &ctx->ir_base[i]; + flags = ir_op_flags[insn->op]; + if (flags & IR_OP_FLAG_DATA) { + if (insn->op == IR_PHI) { + ir_insn *merge_insn = &ctx->ir_base[insn->op1]; + bool changed = 0; + + if (!IR_IS_FEASIBLE(insn->op1)) { + continue; + } + n = merge_insn->inputs_count + 1; + if (n > 3 && _values[i].optx == IR_TOP) { + for (j = 0; j < (n>>2); j++) { + _values[i+j+1].optx = IR_BOTTOM; /* keep the tail of a long multislot instruction */ + } + } + /* for all live merge inputs */ + for (j = 1; j < n; j++) { + ir_ref merge_input = ir_insn_op(merge_insn, j); + + IR_ASSERT(merge_input > 0); + if (_values[merge_input].optx != IR_TOP) { + ir_ref input = ir_insn_op(insn, j + 1); + + if (input > 0 && IR_IS_TOP(input)) { + ir_bitqueue_add(&worklist, input); + } else if (ir_sccp_join_values(ctx, _values, i, input)) { + changed = 1; + } + } + } + if (!changed) { + continue; + } + } else if (ctx->use_lists[i].count == 0) { + /* dead code */ + continue; + } else if (EXPECTED(IR_IS_FOLDABLE_OP(insn->op))) { + bool may_benefit = 0; + bool has_top = 0; + + IR_ASSERT(!IR_OP_HAS_VAR_INPUTS(flags)); + n = IR_INPUT_EDGES_COUNT(flags); + for (p = insn->ops + 1; n > 0; p++, n--) { + ir_ref input = *p; + if (input > 0) { + if (_values[input].optx == IR_TOP) { + has_top = 1; + ir_bitqueue_add(&worklist, input); + } else if (_values[input].optx != IR_BOTTOM) { + /* Perform folding only if some of direct inputs + * is going to be replaced by a constant or copy. + * This approach may miss some folding optimizations + * dependent on indirect inputs. e.g. reassociation. + */ + may_benefit = 1; + } + } + } + if (has_top) { + continue; + } + if (!may_benefit) { + IR_MAKE_BOTTOM(i); + } else if (!ir_sccp_fold(ctx, _values, i, insn->opt, insn->op1, insn->op2, insn->op3)) { + /* not changed */ + continue; + } + } else { + IR_MAKE_BOTTOM(i); + } + } else if (flags & IR_OP_FLAG_BB_START) { + if (insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN) { + ir_ref unfeasible_inputs = 0; + + n = insn->inputs_count; + if (n > 3 && _values[i].optx == IR_TOP) { + for (j = 0; j < (n>>2); j++) { + _values[i+j+1].optx = IR_BOTTOM; /* keep the tail of a long multislot instruction */ + } + } + for (p = insn->ops + 1; n > 0; p++, n--) { + ir_ref input = *p; + IR_ASSERT(input > 0); + if (_values[input].optx == IR_TOP) { + unfeasible_inputs++; + } + } + if (unfeasible_inputs == 0) { + IR_MAKE_BOTTOM(i); + } else if (_values[i].op1 != unfeasible_inputs) { + _values[i].optx = IR_MERGE; + _values[i].op1 = unfeasible_inputs; + } else { + continue; + } + } else { + IR_ASSERT(insn->op == IR_START || IR_IS_FEASIBLE(insn->op1)); + IR_MAKE_BOTTOM(i); + } + } else { + IR_ASSERT(insn->op1 > 0); + if (_values[insn->op1].optx == IR_TOP) { + /* control inpt is not feasible */ + continue; + } + if (insn->op == IR_IF) { + if (IR_IS_TOP(insn->op2)) { + ir_bitqueue_add(&worklist, insn->op2); + continue; + } + if (!IR_IS_BOTTOM(insn->op2) +#if IR_COMBO_COPY_PROPAGATION + && (IR_IS_CONST_REF(insn->op2) || _values[insn->op2].op != IR_COPY) +#endif + ) { + bool b = ir_sccp_is_true(ctx, _values, insn->op2); + use_list = &ctx->use_lists[i]; + IR_ASSERT(use_list->count == 2); + p = &ctx->use_edges[use_list->refs]; + use = *p; + use_insn = &ctx->ir_base[use]; + IR_ASSERT(use_insn->op == IR_IF_TRUE || use_insn->op == IR_IF_FALSE); + if ((use_insn->op == IR_IF_TRUE) != b) { + use = *(p+1); + IR_ASSERT(ctx->ir_base[use].op == IR_IF_TRUE || ctx->ir_base[use].op == IR_IF_FALSE); + } + if (_values[i].optx == IR_TOP) { + _values[i].optx = IR_IF; + _values[i].op1 = use; + } else if (_values[i].optx != IR_IF || _values[i].op1 != use) { + IR_MAKE_BOTTOM(i); + } + if (!IR_IS_BOTTOM(use)) { + ir_bitqueue_add(&worklist, use); + } + continue; + } + IR_MAKE_BOTTOM(i); + } else if (insn->op == IR_SWITCH) { + if (IR_IS_TOP(insn->op2)) { + ir_bitqueue_add(&worklist, insn->op2); + continue; + } + if (!IR_IS_BOTTOM(insn->op2)) { + ir_ref use_case = IR_UNUSED; + + use_list = &ctx->use_lists[i]; + n = use_list->count; + for (j = 0, p = &ctx->use_edges[use_list->refs]; j < n; j++, p++) { + use = *p; + IR_ASSERT(use > 0); + use_insn = &ctx->ir_base[use]; + if (use_insn->op == IR_CASE_VAL) { + if (ir_sccp_is_equal(ctx, _values, insn->op2, use_insn->op2)) { + use_case = use; + break; + } + } else if (use_insn->op == IR_CASE_DEFAULT) { + use_case = use; + } + } + if (use_case) { + use_insn = &ctx->ir_base[use_case]; + if (_values[i].optx == IR_TOP) { + _values[i].optx = IR_IF; + _values[i].op1 = use_case; + } else if (_values[i].optx != IR_IF || _values[i].op1 != use_case) { + IR_MAKE_BOTTOM(i); + } + if (!IR_IS_BOTTOM(use_case)) { + ir_bitqueue_add(&worklist, use_case); + } + } + if (!IR_IS_BOTTOM(i)) { + continue; + } + } + IR_MAKE_BOTTOM(i); + } else if ((flags & (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_MASK)) == (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_LOAD) + && ctx->use_lists[i].count == 1) { + /* dead load */ + _values[i].optx = IR_LOAD; + } else { + IR_MAKE_BOTTOM(i); + + /* control, call, load and store instructions may have unprocessed inputs */ + n = IR_INPUT_EDGES_COUNT(flags); + if (IR_OP_HAS_VAR_INPUTS(flags) && (n = insn->inputs_count) > 3) { + for (j = 0; j < (n>>2); j++) { + _values[i+j+1].optx = IR_BOTTOM; /* keep the tail of a long multislot instruction */ + } + for (j = 2, p = insn->ops + j; j <= n; j++, p++) { + IR_ASSERT(IR_OPND_KIND(flags, j) == IR_OPND_DATA); + use = *p; + if (use > 0 && UNEXPECTED(_values[use].optx == IR_TOP)) { + ir_bitqueue_add(&worklist, use); + } + } + } else if (n >= 2) { + IR_ASSERT(IR_OPND_KIND(flags, 2) == IR_OPND_DATA); + use = insn->op2; + if (use > 0 && UNEXPECTED(_values[use].optx == IR_TOP)) { + ir_bitqueue_add(&worklist, use); + } + if (n > 2) { + IR_ASSERT(n == 3); + IR_ASSERT(IR_OPND_KIND(flags, 3) == IR_OPND_DATA); + use = insn->op3; + if (use > 0 && UNEXPECTED(_values[use].optx == IR_TOP)) { + ir_bitqueue_add(&worklist, use); + } + } + } + } + } + use_list = &ctx->use_lists[i]; + n = use_list->count; + for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) { + use = *p; + if (_values[use].optx != IR_BOTTOM) { + ir_bitqueue_add(&worklist, use); + } + } + } + +#ifdef IR_DEBUG + if (ctx->flags & IR_DEBUG_SCCP) { + for (i = 1; i < ctx->insns_count; i++) { + if (IR_IS_CONST_OP(_values[i].op)) { + fprintf(stderr, "%d. CONST(", i); + ir_print_const(ctx, &_values[i], stderr, true); + fprintf(stderr, ")\n"); +#if IR_COMBO_COPY_PROPAGATION + } else if (_values[i].op == IR_COPY) { + fprintf(stderr, "%d. COPY(%d)\n", i, _values[i].op1); +#endif + } else if (IR_IS_TOP(i)) { + fprintf(stderr, "%d. TOP\n", i); + } else if (_values[i].op == IR_IF) { + fprintf(stderr, "%d. IF(%d)\n", i, _values[i].op1); + } else if (_values[i].op == IR_MERGE) { + fprintf(stderr, "%d. MERGE(%d)\n", i, _values[i].op1); + } else if (!IR_IS_BOTTOM(i)) { + fprintf(stderr, "%d. %d\n", i, _values[i].op); + } + } + } +#endif + + for (i = 1, value = _values + i; i < ctx->insns_count; value++, i++) { + if (value->op == IR_BOTTOM) { + continue; + } else if (IR_IS_CONST_OP(value->op)) { + /* replace instruction by constant */ + j = ir_const(ctx, value->val, value->type); + ir_sccp_replace_insn(ctx, _values, i, j, &worklist); +#if IR_COMBO_COPY_PROPAGATION + } else if (value->op == IR_COPY) { + ir_sccp_replace_insn(ctx, _values, i, value->op1, &worklist); +#endif + } else if (value->op == IR_TOP) { + /* remove unreachable instruction */ + insn = &ctx->ir_base[i]; + if (ir_op_flags[insn->op] & (IR_OP_FLAG_DATA|IR_OP_FLAG_MEM)) { + if (insn->op != IR_PARAM && insn->op != IR_VAR) { + ir_sccp_remove_insn(ctx, _values, i, &worklist); + } + } else { + if (ir_op_flags[insn->op] & IR_OP_FLAG_TERMINATOR) { + /* remove from terminators list */ + ir_ref prev = ctx->ir_base[1].op1; + if (prev == i) { + ctx->ir_base[1].op1 = insn->op3; + } else { + while (prev) { + if (ctx->ir_base[prev].op3 == i) { + ctx->ir_base[prev].op3 = insn->op3; + break; + } + prev = ctx->ir_base[prev].op3; + } + } + } + ir_sccp_replace_insn(ctx, _values, i, IR_UNUSED, NULL); + } + } else if (value->op == IR_IF) { + /* remove one way IF/SWITCH */ + ir_sccp_remove_if(ctx, _values, i, value->op1); + } else if (value->op == IR_MERGE) { + /* schedule merge to remove unfeasible MERGE inputs */ + ir_bitqueue_add(&worklist, i); + } else if (value->op == IR_LOAD) { + /* schedule dead load elimination */ + ir_bitqueue_add(&worklist, i); + } + } + + while ((i = ir_bitqueue_pop(&worklist)) >= 0) { + if (_values[i].op == IR_MERGE) { + ir_sccp_remove_unfeasible_merge_inputs(ctx, _values, i, _values[i].op1); + } else { + insn = &ctx->ir_base[i]; + if (IR_IS_FOLDABLE_OP(insn->op)) { + if (ctx->use_lists[i].count == 0) { + ir_sccp_remove_insn(ctx, _values, i, &worklist); + } else { + ir_sccp_fold2(ctx, _values, i, &worklist); + } + } else if ((ir_op_flags[insn->op] & (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_MASK)) == (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_LOAD) + && ctx->use_lists[i].count == 1) { + /* dead load */ + ir_ref next = ctx->use_edges[ctx->use_lists[i].refs]; + + /* remove LOAD from double linked control list */ + ctx->ir_base[next].op1 = insn->op1; + ir_sccp_replace_use(ctx, insn->op1, i, next); + insn->op1 = IR_UNUSED; + ir_sccp_remove_insn(ctx, _values, i, &worklist); + } + } + } + + ir_mem_free(_values); + ir_bitqueue_free(&worklist); + + ctx->flags &= ~IR_OPT_IN_SCCP; + ctx->flags |= IR_SCCP_DONE; + + return 1; +} diff --git a/ext/opcache/jit/ir/ir_strtab.c b/ext/opcache/jit/ir/ir_strtab.c new file mode 100644 index 0000000000000..c5115e5aa0f23 --- /dev/null +++ b/ext/opcache/jit/ir/ir_strtab.c @@ -0,0 +1,227 @@ +/* + * IR - Lightweight JIT Compilation Framework + * (String table) + * Copyright (C) 2022 Zend by Perforce. + * Authors: Dmitry Stogov + */ + +#include "ir.h" +#include "ir_private.h" + +typedef struct _ir_strtab_bucket { + uint32_t h; + uint32_t len; + const char *str; + uint32_t next; + ir_ref val; +} ir_strtab_bucket; + +static uint32_t ir_str_hash(const char *str, size_t len) +{ + size_t i; + uint32_t h = 5381; + + for (i = 0; i < len; i++) { + h = ((h << 5) + h) + *str; + } + return h | 0x10000000; +} + +static uint32_t ir_strtab_hash_size(uint32_t size) +{ + /* Use big enough power of 2 */ + size -= 1; + size |= (size >> 1); + size |= (size >> 2); + size |= (size >> 4); + size |= (size >> 8); + size |= (size >> 16); + return size + 1; +} + +static void ir_strtab_resize(ir_strtab *strtab) +{ + uint32_t old_hash_size = (uint32_t)(-(int32_t)strtab->mask); + char *old_data = strtab->data; + uint32_t size = strtab->size * 2; + uint32_t hash_size = ir_strtab_hash_size(size); + char *data = ir_mem_malloc(hash_size * sizeof(uint32_t) + size * sizeof(ir_strtab_bucket)); + ir_strtab_bucket *p; + uint32_t pos, i; + + memset(data, IR_INVALID_IDX, hash_size * sizeof(uint32_t)); + strtab->data = data + (hash_size * sizeof(uint32_t)); + strtab->mask = (uint32_t)(-(int32_t)hash_size); + strtab->size = size; + + memcpy(strtab->data, old_data, strtab->count * sizeof(ir_strtab_bucket)); + ir_mem_free(old_data - (old_hash_size * sizeof(uint32_t))); + + i = strtab->count; + pos = 0; + p = (ir_strtab_bucket*)strtab->data; + do { + uint32_t h = p->h | strtab->mask; + p->next = ((uint32_t*)strtab->data)[(int32_t)h]; + ((uint32_t*)strtab->data)[(int32_t)h] = pos; + pos += sizeof(ir_strtab_bucket); + p++; + } while (--i); +} + +static void ir_strtab_grow_buf(ir_strtab *strtab, uint32_t len) +{ + size_t old = (size_t)strtab->buf; + + do { + strtab->buf_size *= 2; + } while (UNEXPECTED(strtab->buf_size - strtab->buf_top < len + 1)); + + strtab->buf = ir_mem_realloc(strtab->buf, strtab->buf_size); + if ((size_t)strtab->buf != old) { + size_t offset = (size_t)strtab->buf - old; + ir_strtab_bucket *p = (ir_strtab_bucket*)strtab->data; + uint32_t i; + for (i = strtab->count; i > 0; i--) { + p->str += offset; + p++; + } + } +} + +void ir_strtab_init(ir_strtab *strtab, uint32_t size, uint32_t buf_size) +{ + IR_ASSERT(size > 0); + uint32_t hash_size = ir_strtab_hash_size(size); + char *data = ir_mem_malloc(hash_size * sizeof(uint32_t) + size * sizeof(ir_strtab_bucket)); + memset(data, IR_INVALID_IDX, hash_size * sizeof(uint32_t)); + strtab->data = (data + (hash_size * sizeof(uint32_t))); + strtab->mask = (uint32_t)(-(int32_t)hash_size); + strtab->size = size; + strtab->count = 0; + strtab->pos = 0; + if (buf_size) { + strtab->buf = ir_mem_malloc(buf_size); + strtab->buf_size = buf_size; + strtab->buf_top = 0; + } else { + strtab->buf = NULL; + strtab->buf_size = 0; + strtab->buf_top = 0; + } +} + +ir_ref ir_strtab_find(const ir_strtab *strtab, const char *str, uint32_t len) +{ + uint32_t h = ir_str_hash(str, len); + const char *data = (const char*)strtab->data; + uint32_t pos = ((uint32_t*)data)[(int32_t)(h | strtab->mask)]; + ir_strtab_bucket *p; + + while (pos != IR_INVALID_IDX) { + p = (ir_strtab_bucket*)(data + pos); + if (p->h == h + && p->len == len + && memcmp(p->str, str, len) == 0) { + return p->val; + } + pos = p->next; + } + return 0; +} + +ir_ref ir_strtab_lookup(ir_strtab *strtab, const char *str, uint32_t len, ir_ref val) +{ + uint32_t h = ir_str_hash(str, len); + char *data = (char*)strtab->data; + uint32_t pos = ((uint32_t*)data)[(int32_t)(h | strtab->mask)]; + ir_strtab_bucket *p; + + while (pos != IR_INVALID_IDX) { + p = (ir_strtab_bucket*)(data + pos); + if (p->h == h + && p->len == len + && memcmp(p->str, str, len) == 0) { + return p->val; + } + pos = p->next; + } + + IR_ASSERT(val != 0); + + if (UNEXPECTED(strtab->count >= strtab->size)) { + ir_strtab_resize(strtab); + data = strtab->data; + } + + if (strtab->buf) { + if (UNEXPECTED(strtab->buf_size - strtab->buf_top < len + 1)) { + ir_strtab_grow_buf(strtab, len + 1); + } + + memcpy(strtab->buf + strtab->buf_top, str, len); + strtab->buf[strtab->buf_top + len] = 0; + str = (const char*)strtab->buf + strtab->buf_top; + strtab->buf_top += len + 1; + } + + pos = strtab->pos; + strtab->pos += sizeof(ir_strtab_bucket); + strtab->count++; + p = (ir_strtab_bucket*)(data + pos); + p->h = h; + p->len = len; + p->str = str; + h |= strtab->mask; + p->next = ((uint32_t*)data)[(int32_t)h]; + ((uint32_t*)data)[(int32_t)h] = pos; + p->val = val; + return val; +} + +ir_ref ir_strtab_update(ir_strtab *strtab, const char *str, uint32_t len, ir_ref val) +{ + uint32_t h = ir_str_hash(str, len); + char *data = (char*)strtab->data; + uint32_t pos = ((uint32_t*)data)[(int32_t)(h | strtab->mask)]; + ir_strtab_bucket *p; + + while (pos != IR_INVALID_IDX) { + p = (ir_strtab_bucket*)(data + pos); + if (p->h == h + && p->len == len + && memcmp(p->str, str, len) == 0) { + return p->val = val; + } + pos = p->next; + } + return 0; +} + +const char *ir_strtab_str(const ir_strtab *strtab, ir_ref idx) +{ + IR_ASSERT(idx >= 0 && (uint32_t)idx < strtab->count); + return ((const ir_strtab_bucket*)strtab->data)[idx].str; +} + +void ir_strtab_free(ir_strtab *strtab) +{ + uint32_t hash_size = (uint32_t)(-(int32_t)strtab->mask); + char *data = (char*)strtab->data - (hash_size * sizeof(uint32_t)); + ir_mem_free(data); + strtab->data = NULL; + if (strtab->buf) { + ir_mem_free(strtab->buf); + strtab->buf = NULL; + } +} + +void ir_strtab_apply(const ir_strtab *strtab, ir_strtab_apply_t func) +{ + uint32_t i; + + for (i = 0; i < strtab->count; i++) { + const ir_strtab_bucket *b = &((ir_strtab_bucket*)strtab->data)[i]; + func(b->str, b->len, b->val); + } +} diff --git a/ext/opcache/jit/ir/ir_x86.dasc b/ext/opcache/jit/ir/ir_x86.dasc new file mode 100644 index 0000000000000..2690e173d6734 --- /dev/null +++ b/ext/opcache/jit/ir/ir_x86.dasc @@ -0,0 +1,9056 @@ +/* + * IR - Lightweight JIT Compilation Framework + * (x86/x86_64 native code generator based on DynAsm) + * Copyright (C) 2022 Zend by Perforce. + * Authors: Dmitry Stogov + */ + +|.if X64 +|.arch x64 +|.else +|.arch x86 +|.endif + +|.actionlist dasm_actions +|.globals ir_lb +|.section code, cold_code, rodata, jmp_table + +#define IR_IS_SIGNED_32BIT(val) ((((intptr_t)(val)) <= 0x7fffffff) && (((intptr_t)(val)) >= (-2147483647 - 1))) +#define IR_IS_UNSIGNED_32BIT(val) (((uintptr_t)(val)) <= 0xffffffff) +#define IR_IS_32BIT(type, val) (IR_IS_TYPE_SIGNED(type) ? IR_IS_SIGNED_32BIT((val).i64) : IR_IS_UNSIGNED_32BIT((val).u64)) +#define IR_IS_FP_ZERO(insn) ((insn.type == IR_DOUBLE) ? (insn.val.u64 == 0) : (insn.val.u32 == 0)) +#define IR_MAY_USE_32BIT_ADDR(addr) \ + (ctx->code_buffer && \ + IR_IS_SIGNED_32BIT((char*)(addr) - (char*)ctx->code_buffer) && \ + IR_IS_SIGNED_32BIT((char*)(addr) - ((char*)ctx->code_buffer + ctx->code_buffer_size))) + +#define IR_SPILL_POS_TO_OFFSET(offset) \ + ((ctx->flags & IR_USE_FRAME_POINTER) ? \ + ((offset) - (ctx->stack_frame_size - ctx->stack_frame_alignment)) : \ + ((offset) + ctx->call_stack_size)) + +|.macro ASM_REG_OP, op, type, reg +|| switch (ir_type_size[type]) { +|| default: +|| IR_ASSERT(0); +|| case 1: +| op Rb(reg) +|| break; +|| case 2: +| op Rw(reg) +|| break; +|| case 4: +| op Rd(reg) +|| break; +|.if X64 +|| case 8: +| op Rq(reg) +|| break; +|.endif +|| } +|.endmacro + +|.macro ASM_MEM_OP, op, type, mem +|| switch (ir_type_size[type]) { +|| default: +|| IR_ASSERT(0); +|| case 1: +| op byte mem +|| break; +|| case 2: +| op word mem +|| break; +|| case 4: +| op dword mem +|| break; +|.if X64 +|| case 8: +| op qword mem +|| break; +|.endif +|| } +|.endmacro + +|.macro ASM_REG_REG_OP, op, type, dst, src +|| switch (ir_type_size[type]) { +|| default: +|| IR_ASSERT(0); +|| case 1: +| op Rb(dst), Rb(src) +|| break; +|| case 2: +| op Rw(dst), Rw(src) +|| break; +|| case 4: +| op Rd(dst), Rd(src) +|| break; +|.if X64 +|| case 8: +| op Rq(dst), Rq(src) +|| break; +|.endif +|| } +|.endmacro + +|.macro ASM_REG_REG_OP2, op, type, dst, src +|| switch (ir_type_size[type]) { +|| default: +|| IR_ASSERT(0); +|| case 1: +|| case 2: +| op Rw(dst), Rw(src) +|| break; +|| case 4: +| op Rd(dst), Rd(src) +|| break; +|.if X64 +|| case 8: +| op Rq(dst), Rq(src) +|| break; +|.endif +|| } +|.endmacro + +|.macro ASM_REG_TXT_OP, op, type, dst, src +|| switch (ir_type_size[type]) { +|| default: +|| IR_ASSERT(0); +|| case 1: +| op Rb(dst), src +|| break; +|| case 2: +| op Rw(dst), src +|| break; +|| case 4: +| op Rd(dst), src +|| break; +|.if X64 +|| case 8: +| op Rq(dst), src +|| break; +|.endif +|| } +|.endmacro + +|.macro ASM_REG_IMM_OP, op, type, dst, src +|| switch (ir_type_size[type]) { +|| default: +|| IR_ASSERT(0); +|| case 1: +| op Rb(dst), (src & 0xff) +|| break; +|| case 2: +| op Rw(dst), (src & 0xffff) +|| break; +|| case 4: +| op Rd(dst), src +|| break; +|.if X64 +|| case 8: +| op Rq(dst), src +|| break; +|.endif +|| } +|.endmacro + +|.macro ASM_MEM_REG_OP, op, type, dst, src +|| switch (ir_type_size[type]) { +|| default: +|| IR_ASSERT(0); +|| case 1: +| op byte dst, Rb(src) +|| break; +|| case 2: +| op word dst, Rw(src) +|| break; +|| case 4: +| op dword dst, Rd(src) +|| break; +|.if X64 +|| case 8: +| op qword dst, Rq(src) +|| break; +|.endif +|| } +|.endmacro + +|.macro ASM_MEM_TXT_OP, op, type, dst, src +|| switch (ir_type_size[type]) { +|| default: +|| IR_ASSERT(0); +|| case 1: +| op byte dst, src +|| break; +|| case 2: +| op word dst, src +|| break; +|| case 4: +| op dword dst, src +|| break; +|.if X64 +|| case 8: +| op qword dst, src +|| break; +|.endif +|| } +|.endmacro + +|.macro ASM_MEM_IMM_OP, op, type, dst, src +|| switch (ir_type_size[type]) { +|| default: +|| IR_ASSERT(0); +|| case 1: +| op byte dst, (src & 0xff) +|| break; +|| case 2: +| op word dst, (src & 0xffff) +|| break; +|| case 4: +| op dword dst, src +|| break; +|.if X64 +|| case 8: +| op qword dst, src +|| break; +|.endif +|| } +|.endmacro + +|.macro ASM_REG_MEM_OP, op, type, dst, src +|| switch (ir_type_size[type]) { +|| default: +|| IR_ASSERT(0); +|| case 1: +| op Rb(dst), byte src +|| break; +|| case 2: +| op Rw(dst), word src +|| break; +|| case 4: +| op Rd(dst), dword src +|| break; +|.if X64 +|| case 8: +| op Rq(dst), qword src +|| break; +|.endif +|| } +|.endmacro + +|.macro ASM_REG_REG_IMUL, type, dst, src +|| switch (ir_type_size[type]) { +|| default: +|| IR_ASSERT(0); +|| case 2: +| imul Rw(dst), Rw(src) +|| break; +|| case 4: +| imul Rd(dst), Rd(src) +|| break; +|.if X64 +|| case 8: +| imul Rq(dst), Rq(src) +|| break; +|.endif +|| } +|.endmacro + +|.macro ASM_REG_IMM_IMUL, type, dst, src +|| switch (ir_type_size[type]) { +|| default: +|| IR_ASSERT(0); +|| case 2: +| imul Rw(dst), src +|| break; +|| case 4: +| imul Rd(dst), src +|| break; +|.if X64 +|| case 8: +| imul Rq(dst), src +|| break; +|.endif +|| } +|.endmacro + +|.macro ASM_REG_MEM_IMUL, type, dst, src +|| switch (ir_type_size[type]) { +|| default: +|| IR_ASSERT(0); +|| case 2: +| imul Rw(dst), word src +|| break; +|| case 4: +| imul Rd(dst), dword src +|| break; +|.if X64 +|| case 8: +| imul Rq(dst), qword src +|| break; +|.endif +|| } +|.endmacro + +|.macro ASM_SSE2_REG_REG_OP, fop, dop, type, dst, src +|| if (type == IR_DOUBLE) { +| dop xmm(dst-IR_REG_FP_FIRST), xmm(src-IR_REG_FP_FIRST) +|| } else { +|| IR_ASSERT(type == IR_FLOAT); +| fop xmm(dst-IR_REG_FP_FIRST), xmm(src-IR_REG_FP_FIRST) +|| } +|.endmacro + +|.macro ASM_SSE2_REG_MEM_OP, fop, dop, type, dst, src +|| if (type == IR_DOUBLE) { +| dop xmm(dst-IR_REG_FP_FIRST), qword src +|| } else { +|| IR_ASSERT(type == IR_FLOAT); +| fop xmm(dst-IR_REG_FP_FIRST), dword src +|| } +|.endmacro + +|.macro ASM_AVX_REG_REG_REG_OP, fop, dop, type, dst, op1, op2 +|| if (type == IR_DOUBLE) { +| dop xmm(dst-IR_REG_FP_FIRST), xmm(op1-IR_REG_FP_FIRST), xmm(op2-IR_REG_FP_FIRST) +|| } else { +|| IR_ASSERT(type == IR_FLOAT); +| fop xmm(dst-IR_REG_FP_FIRST), xmm(op1-IR_REG_FP_FIRST), xmm(op2-IR_REG_FP_FIRST) +|| } +|.endmacro + +|.macro ASM_AVX_REG_REG_MEM_OP, fop, dop, type, dst, op1, op2 +|| if (type == IR_DOUBLE) { +| dop xmm(dst-IR_REG_FP_FIRST), xmm(op1-IR_REG_FP_FIRST), qword op2 +|| } else { +|| IR_ASSERT(type == IR_FLOAT); +| fop xmm(dst-IR_REG_FP_FIRST), xmm(op1-IR_REG_FP_FIRST), dword op2 +|| } +|.endmacro + +|.macro ASM_FP_REG_REG_OP, fop, dop, avx_fop, avx_dop, type, dst, src +|| if (ctx->mflags & IR_X86_AVX) { +| ASM_SSE2_REG_REG_OP avx_fop, avx_dop, type, dst, src +|| } else { +| ASM_SSE2_REG_REG_OP fop, dop, type, dst, src +|| } +|.endmacro + +|.macro ASM_FP_MEM_REG_OP, fop, dop, avx_fop, avx_dop, type, dst, src +|| if (type == IR_DOUBLE) { +|| if (ctx->mflags & IR_X86_AVX) { +| avx_dop qword dst, xmm(src-IR_REG_FP_FIRST) +|| } else { +| dop qword dst, xmm(src-IR_REG_FP_FIRST) +|| } +|| } else { +|| IR_ASSERT(type == IR_FLOAT); +|| if (ctx->mflags & IR_X86_AVX) { +| avx_fop dword dst, xmm(src-IR_REG_FP_FIRST) +|| } else { +| fop dword dst, xmm(src-IR_REG_FP_FIRST) +|| } +|| } +|.endmacro + +|.macro ASM_FP_REG_MEM_OP, fop, dop, avx_fop, avx_dop, type, dst, src +|| if (ctx->mflags & IR_X86_AVX) { +| ASM_SSE2_REG_MEM_OP avx_fop, avx_dop, type, dst, src +|| } else { +| ASM_SSE2_REG_MEM_OP fop, dop, type, dst, src +|| } +|.endmacro + +typedef struct _ir_backend_data { + ir_reg_alloc_data ra_data; + uint32_t dessa_from_block; + dasm_State *dasm_state; + int rodata_label, jmp_table_label; + bool double_neg_const; + bool float_neg_const; + bool double_abs_const; + bool float_abs_const; + bool double_zero_const; +} ir_backend_data; + +#define IR_GP_REG_NAME(code, name64, name32, name16, name8, name8h) \ + #name64, +#define IR_GP_REG_NAME32(code, name64, name32, name16, name8, name8h) \ + #name32, +#define IR_GP_REG_NAME16(code, name64, name32, name16, name8, name8h) \ + #name16, +#define IR_GP_REG_NAME8(code, name64, name32, name16, name8, name8h) \ + #name8, +#define IR_FP_REG_NAME(code, name) \ + #name, + +static const char *_ir_reg_name[IR_REG_NUM] = { + IR_GP_REGS(IR_GP_REG_NAME) + IR_FP_REGS(IR_FP_REG_NAME) +}; + +static const char *_ir_reg_name32[IR_REG_NUM] = { + IR_GP_REGS(IR_GP_REG_NAME32) +}; + +static const char *_ir_reg_name16[IR_REG_NUM] = { + IR_GP_REGS(IR_GP_REG_NAME16) +}; + +static const char *_ir_reg_name8[IR_REG_NUM] = { + IR_GP_REGS(IR_GP_REG_NAME8) +}; + +/* Calling Convention */ +#ifdef _WIN64 + +static const int8_t _ir_int_reg_params[IR_REG_INT_ARGS] = { + IR_REG_INT_ARG1, + IR_REG_INT_ARG2, + IR_REG_INT_ARG3, + IR_REG_INT_ARG4, +}; + +static const int8_t _ir_fp_reg_params[IR_REG_FP_ARGS] = { + IR_REG_FP_ARG1, + IR_REG_FP_ARG2, + IR_REG_FP_ARG3, + IR_REG_FP_ARG4, +}; + +#elif defined(IR_TARGET_X64) + +static const int8_t _ir_int_reg_params[IR_REG_INT_ARGS] = { + IR_REG_INT_ARG1, + IR_REG_INT_ARG2, + IR_REG_INT_ARG3, + IR_REG_INT_ARG4, + IR_REG_INT_ARG5, + IR_REG_INT_ARG6, +}; + +static const int8_t _ir_fp_reg_params[IR_REG_FP_ARGS] = { + IR_REG_FP_ARG1, + IR_REG_FP_ARG2, + IR_REG_FP_ARG3, + IR_REG_FP_ARG4, + IR_REG_FP_ARG5, + IR_REG_FP_ARG6, + IR_REG_FP_ARG7, + IR_REG_FP_ARG8, +}; + +#else + +static const int8_t *_ir_int_reg_params = NULL; +static const int8_t *_ir_fp_reg_params = NULL; +static const int8_t _ir_int_fc_reg_params[IR_REG_INT_FCARGS] = { + IR_REG_INT_FCARG1, + IR_REG_INT_FCARG2, +}; +static const int8_t *_ir_fp_fc_reg_params = NULL; + +#endif + +const char *ir_reg_name(int8_t reg, ir_type type) +{ + if (reg >= IR_REG_NUM) { + if (reg == IR_REG_SCRATCH) { + return "SCRATCH"; + } else { + IR_ASSERT(reg == IR_REG_ALL); + return "ALL"; + } + } + IR_ASSERT(reg >= 0 && reg < IR_REG_NUM); + if (type == IR_VOID) { + type = (reg < IR_REG_FP_FIRST) ? IR_ADDR : IR_DOUBLE; + } + if (IR_IS_TYPE_FP(type) || ir_type_size[type] == 8) { + return _ir_reg_name[reg]; + } else if (ir_type_size[type] == 4) { + return _ir_reg_name32[reg]; + } else if (ir_type_size[type] == 2) { + return _ir_reg_name16[reg]; + } else { + IR_ASSERT(ir_type_size[type] == 1); + return _ir_reg_name8[reg]; + } +} + +#define IR_RULES(_) \ + _(CMP_INT) \ + _(CMP_FP) \ + _(MUL_INT) \ + _(DIV_INT) \ + _(MOD_INT) \ + _(TEST_INT) \ + _(SETCC_INT) \ + _(TESTCC_INT) \ + _(LEA_OB) \ + _(LEA_SI) \ + _(LEA_SIB) \ + _(LEA_IB) \ + _(LEA_SI_O) \ + _(LEA_SIB_O) \ + _(LEA_IB_O) \ + _(LEA_I_OB) \ + _(LEA_OB_I) \ + _(LEA_OB_SI) \ + _(LEA_SI_OB) \ + _(LEA_B_SI) \ + _(LEA_SI_B) \ + _(INC) \ + _(DEC) \ + _(MUL_PWR2) \ + _(DIV_PWR2) \ + _(MOD_PWR2) \ + _(BOOL_NOT_INT) \ + _(ABS_INT) \ + _(OP_INT) \ + _(OP_FP) \ + _(IMUL3) \ + _(BINOP_INT) \ + _(BINOP_SSE2) \ + _(BINOP_AVX) \ + _(SHIFT) \ + _(SHIFT_CONST) \ + _(COPY_INT) \ + _(COPY_FP) \ + _(CMP_AND_BRANCH_INT) \ + _(CMP_AND_BRANCH_FP) \ + _(TEST_AND_BRANCH_INT) \ + _(JCC_INT) \ + _(GUARD_CMP_INT) \ + _(GUARD_CMP_FP) \ + _(GUARD_TEST_INT) \ + _(GUARD_JCC_INT) \ + _(GUARD_OVERFLOW) \ + _(OVERFLOW_AND_BRANCH) \ + _(MIN_MAX_INT) \ + _(MEM_OP_INT) \ + _(MEM_INC) \ + _(MEM_DEC) \ + _(MEM_MUL_PWR2) \ + _(MEM_DIV_PWR2) \ + _(MEM_MOD_PWR2) \ + _(MEM_BINOP_INT) \ + _(MEM_SHIFT) \ + _(MEM_SHIFT_CONST) \ + _(REG_BINOP_INT) \ + _(VSTORE_INT) \ + _(VSTORE_FP) \ + _(LOAD_INT) \ + _(LOAD_FP) \ + _(STORE_INT) \ + _(STORE_FP) \ + _(IF_INT) \ + _(RETURN_VOID) \ + _(RETURN_INT) \ + _(RETURN_FP) \ + +#define IR_RULE_ENUM(name) IR_ ## name, + +enum _ir_rule { + IR_FIRST_RULE = IR_LAST_OP, + IR_RULES(IR_RULE_ENUM) + IR_LAST_RULE +}; + +#define IR_RULE_NAME(name) #name, +const char *ir_rule_name[IR_LAST_OP] = { + NULL, + IR_RULES(IR_RULE_NAME) + NULL +}; + +/* register allocation */ +int ir_get_target_constraints(const ir_ctx *ctx, ir_ref ref, ir_target_constraints *constraints) +{ + uint32_t rule = ir_rule(ctx, ref); + const ir_insn *insn; + int n = 0; + int flags = IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG | IR_OP2_MUST_BE_IN_REG | IR_OP3_MUST_BE_IN_REG; + + constraints->def_reg = IR_REG_NONE; + constraints->hints_count = 0; + switch (rule & IR_RULE_MASK) { + case IR_BINOP_INT: + insn = &ctx->ir_base[ref]; + if (rule & IR_FUSED) { + if (ctx->ir_base[insn->op1].op == IR_RLOAD) { + flags = IR_OP1_MUST_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG; + } else { + flags = IR_OP2_MUST_BE_IN_REG; + } + } else { + flags = IR_DEF_REUSES_OP1_REG | IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG; + } + if (IR_IS_CONST_REF(insn->op2) && insn->op1 != insn->op2) { + insn = &ctx->ir_base[insn->op2]; + if (ir_type_size[insn->type] == 8 && !IR_IS_32BIT(insn->type, insn->val)) { + constraints->tmp_regs[0] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n = 1; + } + } + break; + case IR_IMUL3: + flags = IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG; + break; + case IR_SHIFT: + if (rule & IR_FUSED) { + flags = IR_OP2_MUST_BE_IN_REG; + } else { + flags = IR_DEF_REUSES_OP1_REG | IR_DEF_CONFLICTS_WITH_INPUT_REGS | IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG; + } + constraints->hints[1] = IR_REG_NONE; + constraints->hints[2] = IR_REG_RCX; + constraints->hints_count = 3; + constraints->tmp_regs[0] = IR_SCRATCH_REG(IR_REG_RCX, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n = 1; + break; + case IR_MUL_INT: + /* %rax - used as input and result */ + constraints->def_reg = IR_REG_RAX; + constraints->hints[1] = IR_REG_RAX; + constraints->hints_count = 2; + flags = IR_USE_SHOULD_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG; + constraints->tmp_regs[0] = IR_SCRATCH_REG(IR_REG_RDX, IR_USE_SUB_REF, IR_DEF_SUB_REF); + constraints->tmp_regs[1] = IR_SCRATCH_REG(IR_REG_RAX, IR_LOAD_SUB_REF, IR_SAVE_SUB_REF); + n = 2; + break; + case IR_DIV_INT: + /* %rax - used as input and result */ + constraints->def_reg = IR_REG_RAX; + constraints->hints[1] = IR_REG_RAX; + constraints->hints_count = 2; + flags = IR_USE_SHOULD_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG; + constraints->tmp_regs[0] = IR_SCRATCH_REG(IR_REG_RDX, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + constraints->tmp_regs[1] = IR_SCRATCH_REG(IR_REG_RAX, IR_LOAD_SUB_REF, IR_SAVE_SUB_REF); + n = 2; + goto op2_const; + case IR_MOD_INT: + constraints->def_reg = IR_REG_RDX; + constraints->hints[1] = IR_REG_RAX; + constraints->hints_count = 2; + flags = IR_USE_SHOULD_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG; + constraints->tmp_regs[0] = IR_SCRATCH_REG(IR_REG_RAX, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + constraints->tmp_regs[1] = IR_SCRATCH_REG(IR_REG_RDX, IR_LOAD_SUB_REF, IR_SAVE_SUB_REF); + n = 2; + goto op2_const; + case IR_MIN_MAX_INT: + flags = IR_DEF_REUSES_OP1_REG | IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG | IR_OP2_MUST_BE_IN_REG; +op2_const: + insn = &ctx->ir_base[ref]; + if (IR_IS_CONST_REF(insn->op2) && insn->op1 != insn->op2) { + constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; + } + break; + case IR_CMP_INT: + case IR_TEST_INT: + insn = &ctx->ir_base[ref]; + flags = IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG; + if (IR_IS_CONST_REF(insn->op1)) { + const ir_insn *val_insn = &ctx->ir_base[insn->op1]; + constraints->tmp_regs[0] = IR_TMP_REG(1, val_insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n = 1; + } else if (ir_rule(ctx, insn->op1) & IR_FUSED) { + flags = IR_USE_MUST_BE_IN_REG | IR_OP2_MUST_BE_IN_REG; + } + if (IR_IS_CONST_REF(insn->op2) && insn->op1 != insn->op2) { + const ir_insn *val_insn = &ctx->ir_base[insn->op2]; + flags = IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG; + if (ir_type_size[val_insn->type] == 8 && !IR_IS_32BIT(val_insn->type, val_insn->val)) { + constraints->tmp_regs[n] = IR_TMP_REG(2, val_insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; + } + } + break; + case IR_CMP_FP: + insn = &ctx->ir_base[ref]; + if (!(rule & IR_FUSED)) { + constraints->tmp_regs[0] = IR_TMP_REG(3, IR_BOOL, IR_DEF_SUB_REF, IR_SAVE_SUB_REF); + n = 1; + } + flags = IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG; + if (IR_IS_CONST_REF(insn->op1)) { + const ir_insn *val_insn = &ctx->ir_base[insn->op1]; + constraints->tmp_regs[n] = IR_TMP_REG(1, val_insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; + } + break; + case IR_BINOP_AVX: + flags = IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG; + insn = &ctx->ir_base[ref]; + if (IR_IS_CONST_REF(insn->op1)) { + constraints->tmp_regs[0] = IR_TMP_REG(1, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n = 1; + } + break; + case IR_VSTORE_INT: + flags = IR_OP3_MUST_BE_IN_REG; + insn = &ctx->ir_base[ref]; + if (IR_IS_CONST_REF(insn->op3)) { + insn = &ctx->ir_base[insn->op3]; + if (ir_type_size[insn->type] == 8 && !IR_IS_32BIT(insn->type, insn->val)) { + constraints->tmp_regs[0] = IR_TMP_REG(3, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n = 1; + } + } + break; + case IR_STORE_INT: + flags = IR_OP2_MUST_BE_IN_REG | IR_OP3_MUST_BE_IN_REG; + insn = &ctx->ir_base[ref]; + if (IR_IS_CONST_REF(insn->op2)) { + const ir_insn *val_insn = &ctx->ir_base[insn->op2]; + IR_ASSERT(val_insn->type == IR_ADDR); + if (ir_type_size[val_insn->type] == 8 && !IR_IS_SIGNED_32BIT(val_insn->val.i64)) { + constraints->tmp_regs[0] = IR_TMP_REG(2, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n = 1; + } + } + if (IR_IS_CONST_REF(insn->op3)) { + const ir_insn *val_insn = &ctx->ir_base[insn->op3]; + if (ir_type_size[val_insn->type] == 8 && !IR_IS_32BIT(val_insn->type, val_insn->val)) { + constraints->tmp_regs[n] = IR_TMP_REG(3, val_insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; + } + } + break; + case IR_VSTORE_FP: + flags = IR_OP3_MUST_BE_IN_REG; + insn = &ctx->ir_base[ref]; + if (IR_IS_CONST_REF(insn->op3)) { + insn = &ctx->ir_base[insn->op3]; + constraints->tmp_regs[0] = IR_TMP_REG(3, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n = 1; + } + break; + case IR_LOAD_FP: + case IR_MEM_BINOP_INT: + flags = IR_USE_MUST_BE_IN_REG | IR_OP2_MUST_BE_IN_REG; + insn = &ctx->ir_base[ref]; + if (IR_IS_CONST_REF(insn->op2)) { + const ir_insn *val_insn = &ctx->ir_base[insn->op2]; + IR_ASSERT(val_insn->type == IR_ADDR); + if (ir_type_size[val_insn->type] == 8 && !IR_IS_32BIT(val_insn->type, val_insn->val)) { + constraints->tmp_regs[0] = IR_TMP_REG(2, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n = 1; + } + } + break; + case IR_STORE_FP: + flags = IR_OP2_MUST_BE_IN_REG | IR_OP3_MUST_BE_IN_REG; + insn = &ctx->ir_base[ref]; + if (IR_IS_CONST_REF(insn->op2)) { + IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR); + constraints->tmp_regs[0] = IR_TMP_REG(2, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n = 1; + } + if (IR_IS_CONST_REF(insn->op3)) { + insn = &ctx->ir_base[insn->op3]; + constraints->tmp_regs[n] = IR_TMP_REG(3, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; + } + break; + case IR_SWITCH: + flags = IR_OP2_MUST_BE_IN_REG; + insn = &ctx->ir_base[ref]; + if (IR_IS_CONST_REF(insn->op2)) { + insn = &ctx->ir_base[insn->op2]; + constraints->tmp_regs[0] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n = 1; + } + if (sizeof(void*) == 8) { + constraints->tmp_regs[n] = IR_TMP_REG(3, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; + } + break; + case IR_CALL: + insn = &ctx->ir_base[ref]; + if (IR_IS_TYPE_INT(insn->type)) { + constraints->def_reg = IR_REG_INT_RET1; +#ifdef IR_REG_FP_RET1 + } else { + constraints->def_reg = IR_REG_FP_RET1; +#endif + } + constraints->tmp_regs[0] = IR_SCRATCH_REG(IR_REG_SCRATCH, IR_USE_SUB_REF, IR_DEF_SUB_REF); + n = 1; + IR_FALLTHROUGH; + case IR_TAILCALL: + insn = &ctx->ir_base[ref]; + if (insn->inputs_count > 2) { + constraints->hints[2] = IR_REG_NONE; + constraints->hints_count = ir_get_args_regs(ctx, insn, constraints->hints); + if (!IR_IS_CONST_REF(insn->op2)) { + constraints->tmp_regs[n] = IR_TMP_REG(1, IR_ADDR, IR_LOAD_SUB_REF, IR_USE_SUB_REF); + n++; + } + } + flags = IR_USE_SHOULD_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG | IR_OP3_SHOULD_BE_IN_REG; + break; + case IR_BINOP_SSE2: + flags = IR_DEF_REUSES_OP1_REG | IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG; + break; + case IR_SHIFT_CONST: + case IR_INC: + case IR_DEC: + case IR_MUL_PWR2: + case IR_DIV_PWR2: + case IR_MOD_PWR2: + case IR_OP_INT: + case IR_OP_FP: + flags = IR_DEF_REUSES_OP1_REG | IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG; + break; + case IR_COPY_INT: + case IR_COPY_FP: + case IR_SEXT: + case IR_ZEXT: + flags = IR_DEF_REUSES_OP1_REG | IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG; + break; + case IR_ABS_INT: + flags = IR_DEF_CONFLICTS_WITH_INPUT_REGS | IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG; + break; + case IR_PARAM: + constraints->def_reg = ir_get_param_reg(ctx, ref); + flags = 0; + break; + case IR_PI: + case IR_PHI: + flags = IR_USE_SHOULD_BE_IN_REG; + break; + case IR_RLOAD: + constraints->def_reg = ctx->ir_base[ref].op2; + flags = IR_USE_SHOULD_BE_IN_REG; + break; + case IR_EXITCALL: + flags = IR_USE_MUST_BE_IN_REG; + constraints->def_reg = IR_REG_INT_RET1; + break; + case IR_IF_INT: + case IR_GUARD: + case IR_GUARD_NOT: + flags = IR_OP2_SHOULD_BE_IN_REG; + break; + case IR_IJMP: + flags = IR_OP2_SHOULD_BE_IN_REG; + break; + case IR_RSTORE: + flags = IR_OP3_SHOULD_BE_IN_REG; + break; + case IR_RETURN_INT: + flags = IR_OP2_SHOULD_BE_IN_REG; + constraints->hints[2] = IR_REG_INT_RET1; + constraints->hints_count = 3; + break; + case IR_RETURN_FP: +#ifdef IR_REG_FP_RET1 + flags = IR_OP2_SHOULD_BE_IN_REG; + constraints->hints[2] = IR_REG_FP_RET1; + constraints->hints_count = 3; +#endif + break; + case IR_SNAPSHOT: + flags = 0; + break; + } + constraints->tmps_count = n; + + return flags; +} + +/* instruction selection */ +static uint32_t ir_match_insn(ir_ctx *ctx, ir_ref ref); + +static void ir_match_fuse_addr(ir_ctx *ctx, ir_ref addr_ref) +{ + if (!IR_IS_CONST_REF(addr_ref)) { + uint32_t rule = ctx->rules[addr_ref]; + + if (!rule) { + ctx->rules[addr_ref] = rule = ir_match_insn(ctx, addr_ref); + } + if (rule == IR_LEA_OB) { + ir_use_list *use_list = &ctx->use_lists[addr_ref]; + ir_ref j = use_list->count; + + if (j > 1) { + /* check if address is used only in LOAD and STORE */ + ir_ref *p = &ctx->use_edges[use_list->refs]; + + do { + ir_insn *insn = &ctx->ir_base[*p]; + if (insn->op != IR_LOAD && (insn->op != IR_STORE || insn->op3 == addr_ref)) { + return; + } + p++; + } while (--j); + } + ctx->rules[addr_ref] = IR_FUSED | IR_SIMPLE | IR_LEA_OB; + } + } +} + +/* A naive check if there is a STORE or CALL between this LOAD and the fusion root */ +static bool ir_match_has_mem_deps(ir_ctx *ctx, ir_ref ref, ir_ref root) +{ + if (ref + 1 != root) { + ir_ref pos = ctx->prev_ref[root]; + + do { + ir_insn *insn = &ctx->ir_base[pos]; + + if (insn->op == IR_STORE) { + // TODO: check if LOAD and STORE addresses may alias + return 1; + } else if (insn->op == IR_CALL) { + return 1; + } + pos = ctx->prev_ref[pos]; + } while (ref != pos); + } + return 0; +} + +static bool ir_match_fuse_load(ir_ctx *ctx, ir_ref ref, ir_ref root) +{ + if (ir_in_same_block(ctx, ref) + && ctx->ir_base[ref].op == IR_LOAD + && ctx->use_lists[ref].count == 2 + && !ir_match_has_mem_deps(ctx, ref, root)) { + ir_ref addr_ref = ctx->ir_base[ref].op2; + ir_insn *addr_insn = &ctx->ir_base[addr_ref]; + + if (IR_IS_CONST_REF(addr_ref)) { + if (addr_insn->op == IR_C_ADDR && + (sizeof(void*) == 4 || IR_IS_SIGNED_32BIT(addr_insn->val.i64))) { + ctx->rules[ref] = IR_FUSED | IR_SIMPLE | IR_LOAD; + return 1; + } + } else { + ctx->rules[ref] = IR_FUSED | IR_SIMPLE | IR_LOAD; + ir_match_fuse_addr(ctx, addr_ref); + return 1; + } + } else if (ir_in_same_block(ctx, ref) + && ctx->ir_base[ref].op == IR_VLOAD) { + return 1; + } + return 0; +} + +static void ir_swap_ops(ir_insn *insn) +{ + ir_ref tmp = insn->op1; + insn->op1 = insn->op2; + insn->op2 = tmp; +} + +static void ir_match_fuse_load_commutative_int(ir_ctx *ctx, ir_insn *insn, ir_ref root) +{ + if (IR_IS_CONST_REF(insn->op2)) { + if (ir_type_size[insn->type] > 4 && !IR_IS_32BIT(ctx->ir_base[insn->op2].type, ctx->ir_base[insn->op2].val) + && !IR_IS_CONST_REF(insn->op1) + && ir_match_fuse_load(ctx, insn->op1, root)) { + ir_swap_ops(insn); + } + } else if (!ir_match_fuse_load(ctx, insn->op2, root)) { + if (!IR_IS_CONST_REF(insn->op1) + && ir_match_fuse_load(ctx, insn->op1, root)) { + ir_swap_ops(insn); + } + } +} + +static void ir_match_fuse_load_commutative_fp(ir_ctx *ctx, ir_insn *insn, ir_ref root) +{ + if (!IR_IS_CONST_REF(insn->op2) + && !ir_match_fuse_load(ctx, insn->op2, root) + && (IR_IS_CONST_REF(insn->op1) || ir_match_fuse_load(ctx, insn->op1, root))) { + ir_swap_ops(insn); + } +} + +static void ir_match_fuse_load_cmp_int(ir_ctx *ctx, ir_insn *insn, ir_ref root) +{ + if (IR_IS_CONST_REF(insn->op2)) { + if (!IR_IS_CONST_REF(insn->op1) + && ir_match_fuse_load(ctx, insn->op1, root) + && ir_type_size[ctx->ir_base[insn->op2].type] > 4 + && !IR_IS_32BIT(ctx->ir_base[insn->op2].type, ctx->ir_base[insn->op2].val)) { + ir_swap_ops(insn); + if (insn->op != IR_EQ && insn->op != IR_NE) { + insn->op ^= 3; + } + } + } else if (!ir_match_fuse_load(ctx, insn->op2, root)) { + if (!IR_IS_CONST_REF(insn->op1) + && ir_match_fuse_load(ctx, insn->op1, root)) { + ir_swap_ops(insn); + if (insn->op != IR_EQ && insn->op != IR_NE) { + insn->op ^= 3; + } + } + } +} + +static void ir_match_fuse_load_cmp_fp(ir_ctx *ctx, ir_insn *insn, ir_ref root, bool direct) +{ + if (direct) { + if (insn->op == IR_LT || insn->op == IR_LE) { + /* swap operands to avoid P flag check */ + ir_swap_ops(insn); + insn->op ^= 3; + } + } else { + if (insn->op == IR_GT || insn->op == IR_GE) { + /* swap operands to avoid P flag check */ + ir_swap_ops(insn); + insn->op ^= 3; + } + } + if (IR_IS_CONST_REF(insn->op2)) { + } else if (ir_match_fuse_load(ctx, insn->op2, root)) { + } else if ((IR_IS_CONST_REF(insn->op1) && !IR_IS_FP_ZERO(ctx->ir_base[insn->op1])) || ir_match_fuse_load(ctx, insn->op1, root)) { + ir_swap_ops(insn); + if (insn->op != IR_EQ && insn->op != IR_NE) { + insn->op ^= 3; + } + } +} + +static uint32_t ir_match_insn(ir_ctx *ctx, ir_ref ref) +{ + ir_insn *op2_insn; + ir_insn *insn = &ctx->ir_base[ref]; + uint32_t store_rule; + ir_op load_op; + + switch (insn->op) { + case IR_EQ: + case IR_NE: + case IR_LT: + case IR_GE: + case IR_LE: + case IR_GT: + case IR_ULT: + case IR_UGE: + case IR_ULE: + case IR_UGT: + if (IR_IS_TYPE_INT(ctx->ir_base[insn->op1].type)) { + if (IR_IS_CONST_REF(insn->op2) + && ctx->ir_base[insn->op2].val.i64 == 0 + && insn->op1 == ref - 1) { /* previous instruction */ + ir_insn *op1_insn = &ctx->ir_base[insn->op1]; + + if (op1_insn->op == IR_ADD || + op1_insn->op == IR_SUB || +// op1_insn->op == IR_MUL || + op1_insn->op == IR_OR || + op1_insn->op == IR_AND || + op1_insn->op == IR_XOR) { + + if (ir_op_flags[op1_insn->op] & IR_OP_FLAG_COMMUTATIVE) { + ir_match_fuse_load_commutative_int(ctx, op1_insn, ref); + } else { + ir_match_fuse_load(ctx, op1_insn->op2, ref); + } + if (op1_insn->op == IR_AND && ctx->use_lists[insn->op1].count == 1) { + /* v = AND(_, _); CMP(v, 0) => SKIP_TEST; TEST */ + if (IR_IS_CONST_REF(op1_insn->op2)) { + ir_match_fuse_load(ctx, op1_insn->op1, ref); + } + ctx->rules[insn->op1] = IR_FUSED | IR_TEST_INT; + return IR_TESTCC_INT; + } else { + /* v = BINOP(_, _); CMP(v, 0) => BINOP; SETCC */ + ctx->rules[insn->op1] = IR_BINOP_INT; + return IR_SETCC_INT; + } + } + } + ir_match_fuse_load_cmp_int(ctx, insn, ref); + return IR_CMP_INT; + } else { + ir_match_fuse_load_cmp_fp(ctx, insn, ref, 1); + return IR_CMP_FP; + } + break; + case IR_ADD: + case IR_SUB: + if (IR_IS_TYPE_INT(insn->type)) { + if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { + op2_insn = &ctx->ir_base[insn->op2]; + if (IR_IS_CONST_REF(insn->op1)) { + // const + } else if (op2_insn->val.i64 == 0) { + return IR_COPY_INT; + } else if ((ir_type_size[insn->type] >= 4 && insn->op == IR_ADD && IR_IS_SIGNED_32BIT(op2_insn->val.i64)) || + (ir_type_size[insn->type] >= 4 && insn->op == IR_SUB && IR_IS_SIGNED_32BIT(-op2_insn->val.i64))) { + if (ir_in_same_block(ctx, insn->op1) && ctx->use_lists[insn->op1].count == 1) { + uint32_t rule = ctx->rules[insn->op1]; + + if (!rule) { + ctx->rules[insn->op1] = rule = ir_match_insn(ctx, insn->op1); + } + if (rule == IR_LEA_SI) { + /* z = MUL(Y, 2|4|8) ... ADD(z, imm32) => SKIP ... LEA [Y*2|4|8+im32] */ + ctx->rules[insn->op1] = IR_FUSED | IR_SIMPLE | IR_LEA_SI; + return IR_LEA_SI_O; + } else if (rule == IR_LEA_SIB) { + /* z = ADD(X, MUL(Y, 2|4|8)) ... ADD(z, imm32) => SKIP ... LEA [X+Y*2|4|8+im32] */ + ctx->rules[insn->op1] = IR_FUSED | IR_SIMPLE | IR_LEA_SIB; + return IR_LEA_SIB_O; + } else if (rule == IR_LEA_IB) { + /* z = ADD(X, Y) ... ADD(z, imm32) => SKIP ... LEA [X+Y+im32] */ + ctx->rules[insn->op1] = IR_FUSED | IR_SIMPLE | IR_LEA_IB; + return IR_LEA_IB_O; + } + } + /* ADD(X, imm32) => LEA [X+imm32] */ + return IR_LEA_OB; + } else if (op2_insn->val.i64 == 1 || op2_insn->val.i64 == -1) { + if (insn->op == IR_ADD) { + if (op2_insn->val.i64 == 1) { + /* ADD(_, 1) => INC */ + return IR_INC; + } else { + /* ADD(_, -1) => DEC */ + return IR_DEC; + } + } else { + if (op2_insn->val.i64 == 1) { + /* SUB(_, 1) => DEC */ + return IR_DEC; + } else { + /* SUB(_, -1) => INC */ + return IR_INC; + } + } + } + } else if ((ctx->flags & IR_OPT_CODEGEN) && insn->op == IR_ADD && ir_type_size[insn->type] >= 4) { + if (ir_in_same_block(ctx, insn->op1) && ctx->use_lists[insn->op1].count == 1) { + uint32_t rule =ctx->rules[insn->op1]; + if (!rule) { + ctx->rules[insn->op1] = rule = ir_match_insn(ctx, insn->op1); + } + if (rule == IR_LEA_OB) { + ctx->rules[insn->op1] = IR_FUSED | IR_SIMPLE | IR_LEA_OB; + if (ir_in_same_block(ctx, insn->op2) && ctx->use_lists[insn->op2].count == 1) { + rule = ctx->rules[insn->op2]; + if (!rule) { + ctx->rules[insn->op2] = rule = ir_match_insn(ctx, insn->op2); + } + if (rule == IR_LEA_SI) { + /* x = ADD(X, imm32) ... y = MUL(Y, 2|4|8) ... ADD(x, y) => SKIP ... SKIP ... LEA */ + ctx->rules[insn->op2] = IR_FUSED | IR_SIMPLE | IR_LEA_SI; + return IR_LEA_OB_SI; + } + } + /* x = ADD(X, imm32) ... ADD(x, Y) => SKIP ... LEA */ + return IR_LEA_OB_I; + } + } + if (ir_in_same_block(ctx, insn->op2) && ctx->use_lists[insn->op2].count == 1) { + uint32_t rule = ctx->rules[insn->op2]; + if (!rule) { + ctx->rules[insn->op2] = rule = ir_match_insn(ctx, insn->op2); + } + if (rule == IR_LEA_OB) { + ctx->rules[insn->op2] = IR_FUSED | IR_SIMPLE | IR_LEA_OB; + if (ir_in_same_block(ctx, insn->op1) && ctx->use_lists[insn->op1].count == 1) { + rule =ctx->rules[insn->op1]; + if (!rule) { + ctx->rules[insn->op1] = rule = ir_match_insn(ctx, insn->op1); + } + if (rule == IR_LEA_SI) { + /* x = ADD(X, imm32) ... y = MUL(Y, 2|4|8) ... ADD(y, x) => SKIP ... SKIP ... LEA */ + ctx->rules[insn->op1] = IR_FUSED | IR_SIMPLE | IR_LEA_SI; + return IR_LEA_SI_OB; + } + } + /* x = ADD(X, imm32) ... ADD(Y, x) => SKIP ... LEA */ + return IR_LEA_I_OB; + } + } + /* ADD(X, Y) => LEA [X + Y] */ + return IR_LEA_IB; + } +binop_int: + if (ir_op_flags[insn->op] & IR_OP_FLAG_COMMUTATIVE) { + ir_match_fuse_load_commutative_int(ctx, insn, ref); + } else { + ir_match_fuse_load(ctx, insn->op2, ref); + } + return IR_BINOP_INT; + } else { +binop_fp: + if (ir_op_flags[insn->op] & IR_OP_FLAG_COMMUTATIVE) { + ir_match_fuse_load_commutative_fp(ctx, insn, ref); + } else { + ir_match_fuse_load(ctx, insn->op2, ref); + } + if (ctx->mflags & IR_X86_AVX) { + return IR_BINOP_AVX; + } else { + return IR_BINOP_SSE2; + } + } + break; + case IR_MUL: + if (IR_IS_TYPE_INT(insn->type)) { + if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { + op2_insn = &ctx->ir_base[insn->op2]; + if (IR_IS_CONST_REF(insn->op1)) { + // const + } else if (op2_insn->val.u64 == 0) { + // 0 + } else if (op2_insn->val.u64 == 1) { + return IR_COPY_INT; + } else if (ir_type_size[insn->type] >= 4 && + (op2_insn->val.u64 == 2 || op2_insn->val.u64 == 4 || op2_insn->val.u64 == 8)) { + /* MUL(X, 2|4|8) => LEA [X*2|4|8] */ + return IR_LEA_SI; + } else if (ir_type_size[insn->type] >= 4 && + (op2_insn->val.u64 == 3 || op2_insn->val.u64 == 5 || op2_insn->val.u64 == 9)) { + /* MUL(X, 3|5|9) => LEA [X+X*2|4|8] */ + return IR_LEA_SIB; + } else if (IR_IS_POWER_OF_TWO(op2_insn->val.u64)) { + /* MUL(X, PWR2) => SHL */ + return IR_MUL_PWR2; + } else if (IR_IS_TYPE_SIGNED(insn->type) + && ir_type_size[insn->type] != 1 + && IR_IS_SIGNED_32BIT(op2_insn->val.i64) + && !IR_IS_CONST_REF(insn->op1)) { + /* MUL(_, imm32) => IMUL */ + ir_match_fuse_load(ctx, insn->op1, ref); + return IR_IMUL3; + } + } + /* Prefer IMUL over MUL because it's more flexible and uses less registers ??? */ +// if (IR_IS_TYPE_SIGNED(insn->type) && ir_type_size[insn->type] != 1) { + if (ir_type_size[insn->type] != 1) { + goto binop_int; + } + ir_match_fuse_load(ctx, insn->op2, ref); + return IR_MUL_INT; + } else { + goto binop_fp; + } + break; + case IR_ADD_OV: + case IR_SUB_OV: + IR_ASSERT(IR_IS_TYPE_INT(insn->type)); + goto binop_int; + case IR_MUL_OV: + IR_ASSERT(IR_IS_TYPE_INT(insn->type)); + if (IR_IS_TYPE_SIGNED(insn->type) && ir_type_size[insn->type] != 1) { + if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { + op2_insn = &ctx->ir_base[insn->op2]; + if (IR_IS_SIGNED_32BIT(op2_insn->val.i64) + && !IR_IS_CONST_REF(insn->op1)) { + /* MUL(_, imm32) => IMUL */ + ir_match_fuse_load(ctx, insn->op1, ref); + return IR_IMUL3; + } + } + goto binop_int; + } + ir_match_fuse_load(ctx, insn->op2, ref); + return IR_MUL_INT; + case IR_DIV: + if (IR_IS_TYPE_INT(insn->type)) { + if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { + op2_insn = &ctx->ir_base[insn->op2]; + if (IR_IS_CONST_REF(insn->op1)) { + // const + } else if (op2_insn->val.u64 == 1) { + return IR_COPY_INT; + } else if (IR_IS_POWER_OF_TWO(op2_insn->val.u64)) { + /* DIV(X, PWR2) => SHR */ + return IR_DIV_PWR2; + } + } + ir_match_fuse_load(ctx, insn->op2, ref); + return IR_DIV_INT; + } else { + goto binop_fp; + } + break; + case IR_MOD: + if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { + op2_insn = &ctx->ir_base[insn->op2]; + if (IR_IS_CONST_REF(insn->op1)) { + // const + } else if (IR_IS_TYPE_UNSIGNED(insn->type) + && IR_IS_POWER_OF_TWO(op2_insn->val.u64) + && IR_IS_UNSIGNED_32BIT(op2_insn->val.u64 - 1)) { + /* MOD(X, PWR2) => AND */ + return IR_MOD_PWR2; + } + } + ir_match_fuse_load(ctx, insn->op2, ref); + return IR_MOD_INT; + case IR_BSWAP: + case IR_NOT: + if (insn->type == IR_BOOL) { + IR_ASSERT(IR_IS_TYPE_INT(ctx->ir_base[insn->op1].type)); // TODO: IR_BOOL_NOT_FP + return IR_BOOL_NOT_INT; + } else { + IR_ASSERT(IR_IS_TYPE_INT(insn->type)); + return IR_OP_INT; + } + break; + case IR_NEG: + if (IR_IS_TYPE_INT(insn->type)) { + return IR_OP_INT; + } else { + return IR_OP_FP; + } + case IR_ABS: + if (IR_IS_TYPE_INT(insn->type)) { + return IR_ABS_INT; // movl %edi, %eax; negl %eax; cmovs %edi, %eax + } else { + return IR_OP_FP; + } + case IR_OR: + if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { + op2_insn = &ctx->ir_base[insn->op2]; + if (IR_IS_CONST_REF(insn->op1)) { + // const + } else if (op2_insn->val.i64 == 0) { + return IR_COPY_INT; + } else if (op2_insn->val.i64 == -1) { + // -1 + } + } + goto binop_int; + case IR_AND: + if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { + op2_insn = &ctx->ir_base[insn->op2]; + if (IR_IS_CONST_REF(insn->op1)) { + // const + } else if (op2_insn->val.i64 == 0) { + // 0 + } else if (op2_insn->val.i64 == -1) { + return IR_COPY_INT; + } + } + goto binop_int; + case IR_XOR: + if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { + op2_insn = &ctx->ir_base[insn->op2]; + if (IR_IS_CONST_REF(insn->op1)) { + // const + } + } + goto binop_int; + case IR_SHL: + if (IR_IS_CONST_REF(insn->op2)) { + if (ctx->flags & IR_OPT_CODEGEN) { + op2_insn = &ctx->ir_base[insn->op2]; + if (IR_IS_CONST_REF(insn->op1)) { + // const + } else if (op2_insn->val.u64 == 0) { + return IR_COPY_INT; + } else if (ir_type_size[insn->type] >= 4) { + if (op2_insn->val.u64 == 1) { + // lea [op1*2] + } else if (op2_insn->val.u64 == 2) { + // lea [op1*4] + } else if (op2_insn->val.u64 == 3) { + // lea [op1*8] + } + } + } + return IR_SHIFT_CONST; + } + return IR_SHIFT; + case IR_SHR: + case IR_SAR: + case IR_ROL: + case IR_ROR: + if (IR_IS_CONST_REF(insn->op2)) { + if (ctx->flags & IR_OPT_CODEGEN) { + op2_insn = &ctx->ir_base[insn->op2]; + if (IR_IS_CONST_REF(insn->op1)) { + // const + } else if (op2_insn->val.u64 == 0) { + return IR_COPY_INT; + } + } + return IR_SHIFT_CONST; + } + return IR_SHIFT; + case IR_MIN: + case IR_MAX: + if (IR_IS_TYPE_INT(insn->type)) { + return IR_MIN_MAX_INT; + } else { + goto binop_fp; + } + break; +// case IR_COND: + case IR_COPY: + if (IR_IS_TYPE_INT(insn->type)) { + return IR_COPY_INT; + } else { + return IR_COPY_FP; + } + break; + case IR_CALL: + ctx->flags |= IR_HAS_CALLS; + IR_FALLTHROUGH; + case IR_TAILCALL: + if (ir_in_same_block(ctx, insn->op2)) { + ir_match_fuse_load(ctx, insn->op2, ref); + } + return insn->op; + case IR_VAR: + return IR_SKIPPED | IR_VAR; + case IR_PARAM: + return ctx->use_lists[ref].count > 0 ? IR_PARAM : IR_SKIPPED | IR_PARAM; + case IR_ALLOCA: + /* alloca() may be use only in functions */ + if (ctx->flags & IR_FUNCTION) { + ctx->flags |= IR_USE_FRAME_POINTER | IR_HAS_ALLOCA; + } + return IR_ALLOCA; + case IR_VSTORE: + if (IR_IS_TYPE_INT(ctx->ir_base[insn->op3].type)) { + store_rule = IR_VSTORE_INT; + load_op = IR_VLOAD; +store_int: + if ((ctx->flags & IR_OPT_CODEGEN) + && ir_in_same_block(ctx, insn->op3) + && (ctx->use_lists[insn->op3].count == 1 || + (ctx->use_lists[insn->op3].count == 2 + && (ctx->ir_base[insn->op3].op == IR_ADD_OV || + ctx->ir_base[insn->op3].op == IR_SUB_OV)))) { + ir_insn *op_insn = &ctx->ir_base[insn->op3]; + uint32_t rule = ctx->rules[insn->op3]; + + if (!rule) { + ctx->rules[insn->op3] = rule = ir_match_insn(ctx, insn->op3); + } + if ((rule == IR_BINOP_INT && op_insn->op != IR_MUL) || rule == IR_LEA_OB || rule == IR_LEA_IB) { + if (insn->op1 == op_insn->op1 + && ctx->ir_base[op_insn->op1].op == load_op + && ctx->ir_base[op_insn->op1].op2 == insn->op2 + && ctx->use_lists[op_insn->op1].count == 2) { + /* l = LOAD(_, a) ... v = BINOP(l, _) ... STORE(l, a, v) => SKIP ... SKIP_MEM_BINOP ... MEM_BINOP */ + ctx->rules[insn->op3] = IR_FUSED | IR_BINOP_INT; + ctx->rules[op_insn->op1] = IR_SKIPPED | load_op; + if (ctx->ir_base[op_insn->op2].op == IR_LOAD) { + ir_match_fuse_addr(ctx, ctx->ir_base[op_insn->op2].op2); + ctx->rules[op_insn->op2] = IR_LOAD_INT; + } + ir_match_fuse_addr(ctx, insn->op2); + return IR_MEM_BINOP_INT; + } else if ((ir_op_flags[op_insn->op] & IR_OP_FLAG_COMMUTATIVE) + && insn->op1 == op_insn->op2 + && ctx->ir_base[op_insn->op2].op == load_op + && ctx->ir_base[op_insn->op2].op2 == insn->op2 + && ctx->use_lists[op_insn->op2].count == 2) { + /* l = LOAD(_, a) ... v = BINOP(_, l) ... STORE(l, a, v) => SKIP ... SKIP_MEM_BINOP ... MEM_BINOP */ + ir_swap_ops(op_insn); + ctx->rules[insn->op3] = IR_FUSED | IR_BINOP_INT; + ctx->rules[op_insn->op1] = IR_SKIPPED | load_op; + if (ctx->ir_base[op_insn->op2].op == IR_LOAD) { + ir_match_fuse_addr(ctx, ctx->ir_base[op_insn->op2].op2); + ctx->rules[op_insn->op2] = IR_LOAD_INT; + } + ir_match_fuse_addr(ctx, insn->op2); + return IR_MEM_BINOP_INT; + } + } else if (rule == IR_INC) { + if (insn->op1 == op_insn->op1 + && ctx->ir_base[op_insn->op1].op == load_op + && ctx->ir_base[op_insn->op1].op2 == insn->op2 + && ctx->use_lists[op_insn->op1].count == 2) { + /* l = LOAD(_, a) ... v = INC(l) ... STORE(l, a, v) => SKIP ... SKIP ... MEM_INC */ + ctx->rules[insn->op3] = IR_SKIPPED | IR_INC; + ctx->rules[op_insn->op1] = IR_SKIPPED | load_op; + return IR_MEM_INC; + } + } else if (rule == IR_DEC) { + if (insn->op1 == op_insn->op1 + && ctx->ir_base[op_insn->op1].op == load_op + && ctx->ir_base[op_insn->op1].op2 == insn->op2 + && ctx->use_lists[op_insn->op1].count == 2){ + /* l = LOAD(_, a) ... v = DEC(l) ... STORE(l, a, v) => SKIP ... SKIP ... MEM_DEC */ + ctx->rules[insn->op3] = IR_SKIPPED | IR_DEC; + ctx->rules[op_insn->op1] = IR_SKIPPED | load_op; + return IR_MEM_DEC; + } + } else if (rule == IR_MUL_PWR2) { + if (insn->op1 == op_insn->op1 + && ctx->ir_base[op_insn->op1].op == load_op + && ctx->ir_base[op_insn->op1].op2 == insn->op2 + && ctx->use_lists[op_insn->op1].count == 2) { + /* l = LOAD(_, a) ... v = MUL_PWR2(l) ... STORE(l, a, v) => SKIP ... SKIP ... MEM_MUL_PWR2 */ + ctx->rules[insn->op3] = IR_SKIPPED | IR_MUL_PWR2; + ctx->rules[op_insn->op1] = IR_SKIPPED | load_op; + return IR_MEM_MUL_PWR2; + } + } else if (rule == IR_DIV_PWR2) { + if (insn->op1 == op_insn->op1 + && ctx->ir_base[op_insn->op1].op == load_op + && ctx->ir_base[op_insn->op1].op2 == insn->op2 + && ctx->use_lists[op_insn->op1].count == 2) { + /* l = LOAD(_, a) ... v = DIV_PWR2(l) ... STORE(l, a, v) => SKIP ... SKIP ... MEM_DIV_PWR2 */ + ctx->rules[insn->op3] = IR_SKIPPED | IR_DIV_PWR2; + ctx->rules[op_insn->op1] = IR_SKIPPED | load_op; + return IR_MEM_DIV_PWR2; + } + } else if (rule == IR_MOD_PWR2) { + if (insn->op1 == op_insn->op1 + && ctx->ir_base[op_insn->op1].op == load_op + && ctx->ir_base[op_insn->op1].op2 == insn->op2 + && ctx->use_lists[op_insn->op1].count == 2) { + /* l = LOAD(_, a) ... v = MOD_PWR2(l) ... STORE(l, a, v) => SKIP ... SKIP ... MEM_MOD_PWR2 */ + ctx->rules[insn->op3] = IR_SKIPPED | IR_MOD_PWR2; + ctx->rules[op_insn->op1] = IR_SKIPPED | load_op; + return IR_MEM_MOD_PWR2; + } + } else if (rule == IR_SHIFT) { + if (insn->op1 == op_insn->op1 + && ctx->ir_base[op_insn->op1].op == load_op + && ctx->ir_base[op_insn->op1].op2 == insn->op2 + && ctx->use_lists[op_insn->op1].count == 2) { + /* l = LOAD(_, a) ... v = SHIFT(l, _) ... STORE(l, a, v) => SKIP ... SKIP_SHIFT ... MEM_SHIFT */ + ctx->rules[insn->op3] = IR_FUSED | IR_SHIFT; + ctx->rules[op_insn->op1] = IR_SKIPPED | load_op; + if (ctx->ir_base[op_insn->op2].op == IR_LOAD) { + ir_match_fuse_addr(ctx, ctx->ir_base[op_insn->op2].op2); + ctx->rules[op_insn->op2] = IR_LOAD_INT; + } + return IR_MEM_SHIFT; + } + } else if (rule == IR_SHIFT_CONST) { + if (insn->op1 == op_insn->op1 + && ctx->ir_base[op_insn->op1].op == load_op + && ctx->ir_base[op_insn->op1].op2 == insn->op2 + && ctx->use_lists[op_insn->op1].count == 2) { + /* l = LOAD(_, a) ... v = SHIFT(l, CONST) ... STORE(l, a, v) => SKIP ... SKIP ... MEM_SHIFT_CONST */ + ctx->rules[insn->op3] = IR_SKIPPED | IR_SHIFT_CONST; + ctx->rules[op_insn->op1] = IR_SKIPPED | load_op; + return IR_MEM_SHIFT_CONST; + } + } else if (rule == IR_OP_INT && op_insn->op != IR_BSWAP) { + if (insn->op1 == op_insn->op1 + && ctx->ir_base[op_insn->op1].op == load_op + && ctx->ir_base[op_insn->op1].op2 == insn->op2 + && ctx->use_lists[op_insn->op1].count == 2) { + /* l = LOAD(_, a) ... v = OP(l) ... STORE(l, a, v) => SKIP ... SKIP ... MEM_OP */ + ctx->rules[insn->op3] = IR_SKIPPED | IR_OP_INT; + ctx->rules[op_insn->op1] = IR_SKIPPED | load_op; + return IR_MEM_OP_INT; + } + } + } + return store_rule; + } else { + return IR_VSTORE_FP; + } + break; + case IR_LOAD: + ir_match_fuse_addr(ctx, insn->op2); + if (IR_IS_TYPE_INT(insn->type)) { + return IR_LOAD_INT; + } else { + return IR_LOAD_FP; + } + break; + case IR_STORE: + ir_match_fuse_addr(ctx, insn->op2); + if (IR_IS_TYPE_INT(ctx->ir_base[insn->op3].type)) { + store_rule = IR_STORE_INT; + load_op = IR_LOAD; + goto store_int; + } else { + return IR_STORE_FP; + } + break; + case IR_RLOAD: + if (IR_REGSET_IN(IR_REGSET_UNION((ir_regset)ctx->fixed_regset, IR_REGSET_FIXED), insn->op2)) { + return IR_SKIPPED | IR_RLOAD; + } + return IR_RLOAD; + case IR_RSTORE: + if (IR_IS_TYPE_INT(ctx->ir_base[insn->op2].type)) { + if ((ctx->flags & IR_OPT_CODEGEN) + && ir_in_same_block(ctx, insn->op2) + && ctx->use_lists[insn->op2].count == 1 + && IR_IS_TYPE_INT(ctx->ir_base[insn->op2].type)) { + ir_insn *op_insn = &ctx->ir_base[insn->op2]; + + if (op_insn->op == IR_ADD || + op_insn->op == IR_SUB || +// op_insn->op == IR_MUL || + op_insn->op == IR_OR || + op_insn->op == IR_AND || + op_insn->op == IR_XOR) { + if (insn->op1 == op_insn->op1 + && ctx->ir_base[op_insn->op1].op == IR_RLOAD + && ctx->ir_base[op_insn->op1].op2 == insn->op3 + && ctx->use_lists[op_insn->op1].count == 2) { + /* l = RLOAD(r) ... v = BINOP(l, _) ... RSTORE(l, r, v) => SKIP ... SKIP_REG_BINOP ... REG_BINOP */ + ctx->rules[insn->op2] = IR_FUSED | IR_BINOP_INT; + ctx->rules[op_insn->op1] = IR_SKIPPED | IR_RLOAD; + return IR_REG_BINOP_INT; + } else if ((ir_op_flags[op_insn->op] & IR_OP_FLAG_COMMUTATIVE) + && insn->op1 == op_insn->op2 + && ctx->ir_base[op_insn->op2].op == IR_RLOAD + && ctx->ir_base[op_insn->op2].op2 == insn->op3 + && ctx->use_lists[op_insn->op2].count == 2) { + /* l = RLOAD(r) ... v = BINOP(x, l) ... RSTORE(l, r, v) => SKIP ... SKIP_REG_BINOP ... REG_BINOP */ + ir_swap_ops(op_insn); + ctx->rules[insn->op2] = IR_FUSED | IR_BINOP_INT; + ctx->rules[op_insn->op1] = IR_SKIPPED | IR_RLOAD; + return IR_REG_BINOP_INT; + } + } + } + } + if (ir_in_same_block(ctx, insn->op2)) { + ir_match_fuse_load(ctx, insn->op2, ref); + } + return IR_RSTORE; + case IR_START: + case IR_BEGIN: + case IR_IF_TRUE: + case IR_IF_FALSE: + case IR_CASE_VAL: + case IR_CASE_DEFAULT: + case IR_MERGE: + case IR_LOOP_BEGIN: + case IR_UNREACHABLE: + return IR_SKIPPED | insn->op; + case IR_RETURN: + if (!insn->op2) { + return IR_RETURN_VOID; + } else if (IR_IS_TYPE_INT(ctx->ir_base[insn->op2].type)) { + return IR_RETURN_INT; + } else { + return IR_RETURN_FP; + } + case IR_IF: + if (ir_in_same_block(ctx, insn->op2) && ctx->use_lists[insn->op2].count == 1) { + op2_insn = &ctx->ir_base[insn->op2]; + if (op2_insn->op >= IR_EQ && op2_insn->op <= IR_UGT) { + if (IR_IS_TYPE_INT(ctx->ir_base[op2_insn->op1].type)) { + if (IR_IS_CONST_REF(op2_insn->op2) + && ctx->ir_base[op2_insn->op2].val.i64 == 0 + && op2_insn->op1 == insn->op2 - 1) { /* previous instruction */ + ir_insn *op1_insn = &ctx->ir_base[op2_insn->op1]; + + if (op1_insn->op == IR_ADD || + op1_insn->op == IR_SUB || +// op1_insn->op == IR_MUL || + op1_insn->op == IR_OR || + op1_insn->op == IR_AND || + op1_insn->op == IR_XOR) { + + if (ir_op_flags[op1_insn->op] & IR_OP_FLAG_COMMUTATIVE) { + ir_match_fuse_load_commutative_int(ctx, op1_insn, ref); + } else { + ir_match_fuse_load(ctx, op1_insn->op2, ref); + } + if (op1_insn->op == IR_AND && ctx->use_lists[op2_insn->op1].count == 1) { + /* v = AND(_, _); c = CMP(v, 0) ... IF(c) => SKIP_TEST; SKIP ... TEST_AND_BRANCH */ + if (IR_IS_CONST_REF(op1_insn->op2)) { + ir_match_fuse_load(ctx, op1_insn->op1, ref); + } + ctx->rules[op2_insn->op1] = IR_FUSED | IR_TEST_INT; + ctx->rules[insn->op2] = IR_FUSED | IR_SIMPLE | IR_NOP; + return IR_TEST_AND_BRANCH_INT; + } else { + /* v = BINOP(_, _); c = CMP(v, 0) ... IF(c) => BINOP; SKIP_CMP ... JCC */ + ctx->rules[op2_insn->op1] = IR_BINOP_INT; + ctx->rules[insn->op2] = IR_FUSED | IR_CMP_INT; + return IR_JCC_INT; + } + } + } + /* c = CMP(_, _) ... IF(c) => SKIP_CMP ... CMP_AND_BRANCH */ + ir_match_fuse_load_cmp_int(ctx, op2_insn, ref); + ctx->rules[insn->op2] = IR_FUSED | IR_CMP_INT; + return IR_CMP_AND_BRANCH_INT; + } else { + /* c = CMP(_, _) ... IF(c) => SKIP_CMP ... CMP_AND_BRANCH */ + ir_match_fuse_load_cmp_fp(ctx, op2_insn, ref, 1); + ctx->rules[insn->op2] = IR_FUSED | IR_CMP_FP; + return IR_CMP_AND_BRANCH_FP; + } + } else if (op2_insn->op == IR_AND) { + /* c = AND(_, _) ... IF(c) => SKIP_TEST ... TEST_AND_BRANCH */ + ir_match_fuse_load_commutative_int(ctx, op2_insn, ref); + if (IR_IS_CONST_REF(op2_insn->op2)) { + ir_match_fuse_load(ctx, op2_insn->op1, ref); + } + ctx->rules[insn->op2] = IR_FUSED | IR_TEST_INT; + return IR_TEST_AND_BRANCH_INT; + } else if (op2_insn->op == IR_OVERFLOW) { + /* c = OVERFLOW(_) ... IF(c) => SKIP_OVERFLOW ... OVERFLOW_AND_BRANCH */ + ctx->rules[insn->op2] = IR_FUSED | IR_SIMPLE | IR_OVERFLOW; + return IR_OVERFLOW_AND_BRANCH; + } + } + if (IR_IS_TYPE_INT(ctx->ir_base[insn->op2].type)) { + if (insn->op2 == ref - 1 /* previous instruction */ + && ir_in_same_block(ctx, insn->op2)) { + op2_insn = &ctx->ir_base[insn->op2]; + if (op2_insn->op == IR_ADD || + op2_insn->op == IR_SUB || +// op2_insn->op == IR_MUL || + op2_insn->op == IR_OR || + op2_insn->op == IR_AND || + op2_insn->op == IR_XOR) { + + /* v = BINOP(_, _); IF(v) => BINOP; JCC */ + if (ir_op_flags[op2_insn->op] & IR_OP_FLAG_COMMUTATIVE) { + ir_match_fuse_load_commutative_int(ctx, op2_insn, ref); + } else { + ir_match_fuse_load(ctx, op2_insn->op2, ref); + } + ctx->rules[insn->op2] = IR_BINOP_INT; + return IR_JCC_INT; + } + } else if ((ctx->flags & IR_OPT_CODEGEN) + && insn->op1 == ref - 1 /* previous instruction */ + && insn->op2 == ref - 2 /* previous instruction */ + && ir_in_same_block(ctx, insn->op2) + && ctx->use_lists[insn->op2].count == 2 + && IR_IS_TYPE_INT(ctx->ir_base[insn->op2].type)) { + ir_insn *store_insn = &ctx->ir_base[insn->op1]; + + if (store_insn->op == IR_STORE && store_insn->op3 == insn->op2) { + ir_insn *op_insn = &ctx->ir_base[insn->op2]; + + if (op_insn->op == IR_ADD || + op_insn->op == IR_SUB || +// op_insn->op == IR_MUL || + op_insn->op == IR_OR || + op_insn->op == IR_AND || + op_insn->op == IR_XOR) { + if (ctx->ir_base[op_insn->op1].op == IR_LOAD + && ctx->ir_base[op_insn->op1].op2 == store_insn->op2) { + if (ir_in_same_block(ctx, op_insn->op1) + && ctx->use_lists[op_insn->op1].count == 2 + && store_insn->op1 == op_insn->op1) { + /* v = MEM_BINOP(_, _); IF(v) => MEM_BINOP; JCC */ + ctx->rules[insn->op2] = IR_FUSED | IR_BINOP_INT; + ctx->rules[op_insn->op1] = IR_SKIPPED | IR_LOAD; + ir_match_fuse_addr(ctx, store_insn->op2); + ctx->rules[insn->op1] = IR_MEM_BINOP_INT; + return IR_JCC_INT; + } + } else if ((ir_op_flags[op_insn->op] & IR_OP_FLAG_COMMUTATIVE) + && ctx->ir_base[op_insn->op2].op == IR_LOAD + && ctx->ir_base[op_insn->op2].op2 == store_insn->op2) { + if (ir_in_same_block(ctx, op_insn->op2) + && ctx->use_lists[op_insn->op2].count == 2 + && store_insn->op1 == op_insn->op2) { + /* v = MEM_BINOP(_, _); IF(v) => MEM_BINOP; JCC */ + ir_swap_ops(op_insn); + ctx->rules[insn->op2] = IR_FUSED | IR_BINOP_INT; + ctx->rules[op_insn->op1] = IR_SKIPPED | IR_LOAD; + ir_match_fuse_addr(ctx, store_insn->op2); + ctx->rules[insn->op1] = IR_MEM_BINOP_INT; + return IR_JCC_INT; + } + } + } + } + } + ir_match_fuse_load(ctx, insn->op2, ref); + return IR_IF_INT; + } else { + IR_ASSERT(0 && "NIY IR_IF_FP"); + break; + } + case IR_GUARD: + case IR_GUARD_NOT: + if (ir_in_same_block(ctx, insn->op2) && ctx->use_lists[insn->op2].count == 1) { + op2_insn = &ctx->ir_base[insn->op2]; + if (op2_insn->op >= IR_EQ && op2_insn->op <= IR_UGT + // TODO: register allocator may clobber operands of CMP before they are used in the GUARD_CMP + && (insn->op2 == ref - 1 || + (insn->op2 == ctx->prev_ref[ref] - 1 + && ctx->ir_base[ctx->prev_ref[ref]].op == IR_SNAPSHOT))) { + if (IR_IS_TYPE_INT(ctx->ir_base[op2_insn->op1].type)) { + if (IR_IS_CONST_REF(op2_insn->op2) + && ctx->ir_base[op2_insn->op2].val.i64 == 0) { + if (op2_insn->op1 == insn->op2 - 1) { /* previous instruction */ + ir_insn *op1_insn = &ctx->ir_base[op2_insn->op1]; + + if (op1_insn->op == IR_ADD || + op1_insn->op == IR_SUB || +// op1_insn->op == IR_MUL || + op1_insn->op == IR_OR || + op1_insn->op == IR_AND || + op1_insn->op == IR_XOR) { + + if (ir_op_flags[op1_insn->op] & IR_OP_FLAG_COMMUTATIVE) { + ir_match_fuse_load_commutative_int(ctx, op1_insn, ref); + } else { + ir_match_fuse_load(ctx, op1_insn->op2, ref); + } + /* v = BINOP(_, _); c = CMP(v, 0) ... IF(c) => BINOP; SKIP_CMP ... GUARD_JCC */ + ctx->rules[op2_insn->op1] = IR_BINOP_INT; + ctx->rules[insn->op2] = IR_FUSED | IR_CMP_INT; + return IR_GUARD_JCC_INT; + } + } else if ((ctx->flags & IR_OPT_CODEGEN) + && op2_insn->op1 == insn->op2 - 2 /* before previous instruction */ + && ir_in_same_block(ctx, op2_insn->op1) + && ctx->use_lists[op2_insn->op1].count == 2) { + ir_insn *store_insn = &ctx->ir_base[insn->op2 - 1]; + + if (store_insn->op == IR_STORE && store_insn->op3 == op2_insn->op1) { + ir_insn *op_insn = &ctx->ir_base[op2_insn->op1]; + + if (op_insn->op == IR_ADD || + op_insn->op == IR_SUB || +// op_insn->op == IR_MUL || + op_insn->op == IR_OR || + op_insn->op == IR_AND || + op_insn->op == IR_XOR) { + if (ctx->ir_base[op_insn->op1].op == IR_LOAD + && ctx->ir_base[op_insn->op1].op2 == store_insn->op2) { + if (ir_in_same_block(ctx, op_insn->op1) + && ctx->use_lists[op_insn->op1].count == 2 + && store_insn->op1 == op_insn->op1) { + /* v = MEM_BINOP(_, _); IF(v) => MEM_BINOP; GUARD_JCC */ + ctx->rules[op2_insn->op1] = IR_FUSED | IR_BINOP_INT; + ctx->rules[op_insn->op1] = IR_SKIPPED | IR_LOAD; + ir_match_fuse_addr(ctx, store_insn->op2); + ctx->rules[insn->op2 - 1] = IR_MEM_BINOP_INT; + ctx->rules[insn->op2] = IR_SKIPPED | IR_NOP; + return IR_GUARD_JCC_INT; + } + } else if ((ir_op_flags[op_insn->op] & IR_OP_FLAG_COMMUTATIVE) + && ctx->ir_base[op_insn->op2].op == IR_LOAD + && ctx->ir_base[op_insn->op2].op2 == store_insn->op2) { + if (ir_in_same_block(ctx, op_insn->op2) + && ctx->use_lists[op_insn->op2].count == 2 + && store_insn->op1 == op_insn->op2) { + /* v = MEM_BINOP(_, _); IF(v) => MEM_BINOP; JCC */ + ir_swap_ops(op_insn); + ctx->rules[op2_insn->op1] = IR_FUSED | IR_BINOP_INT; + ctx->rules[op_insn->op1] = IR_SKIPPED | IR_LOAD; + ir_match_fuse_addr(ctx, store_insn->op2); + ctx->rules[insn->op2 - 1] = IR_MEM_BINOP_INT; + ctx->rules[insn->op2] = IR_SKIPPED | IR_NOP; + return IR_GUARD_JCC_INT; + } + } + } + } + } + } + /* c = CMP(_, _) ... GUARD(c) => SKIP_CMP ... GUARD_CMP */ + ir_match_fuse_load_cmp_int(ctx, op2_insn, ref); + ctx->rules[insn->op2] = IR_FUSED | IR_CMP_INT; + return IR_GUARD_CMP_INT; + } else { + /* c = CMP(_, _) ... GUARD(c) => SKIP_CMP ... GUARD_CMP */ + ir_match_fuse_load_cmp_fp(ctx, op2_insn, ref, insn->op == IR_GUARD_NOT); + ctx->rules[insn->op2] = IR_FUSED | IR_CMP_FP; + return IR_GUARD_CMP_FP; + } + } else if (op2_insn->op == IR_AND) { // TODO: OR, XOR. etc + /* c = AND(_, _) ... GUARD(c) => SKIP_TEST ... GUARD_TEST */ + ir_match_fuse_load_commutative_int(ctx, op2_insn, ref); + if (IR_IS_CONST_REF(op2_insn->op2)) { + ir_match_fuse_load(ctx, op2_insn->op1, ref); + } + ctx->rules[insn->op2] = IR_FUSED | IR_TEST_INT; + return IR_GUARD_TEST_INT; + } else if (op2_insn->op == IR_OVERFLOW) { + /* c = OVERFLOW(_) ... GUARD(c) => SKIP_OVERFLOW ... GUARD_OVERFLOW */ + ctx->rules[insn->op2] = IR_FUSED | IR_SIMPLE | IR_OVERFLOW; + return IR_GUARD_OVERFLOW; + } + } + ir_match_fuse_load(ctx, insn->op2, ref); + return insn->op; + case IR_IJMP: + if (ir_in_same_block(ctx, insn->op2)) { + ir_match_fuse_load(ctx, insn->op2, ref); + } + return insn->op; + case IR_SEXT: + case IR_ZEXT: + case IR_BITCAST: + case IR_INT2FP: + case IR_FP2INT: + case IR_FP2FP: + ir_match_fuse_load(ctx, insn->op1, ref); + return insn->op; + default: + break; + } + + return insn->op; +} + +static void ir_match_insn2(ir_ctx *ctx, ir_ref ref, uint32_t rule) +{ + if (rule == IR_LEA_IB) { + ir_insn *insn = &ctx->ir_base[ref]; + + if (insn->op1 == insn->op2) { + /* pass */ + } else if (ir_match_fuse_load(ctx, insn->op2, ref) || + (ctx->ir_base[insn->op2].op == IR_PARAM + && ctx->use_lists[insn->op2].count == 1 + && ir_get_param_reg(ctx, insn->op2) == IR_REG_NONE)) { + ctx->rules[ref] = IR_BINOP_INT; + } else if (ir_match_fuse_load(ctx, insn->op1, ref) || + (ctx->ir_base[insn->op1].op == IR_PARAM + && ctx->use_lists[insn->op1].count == 1 + && ir_get_param_reg(ctx, insn->op1) == IR_REG_NONE)) { + /* swap for better load fusion */ + ir_swap_ops(insn); + ctx->rules[ref] = IR_BINOP_INT; + } + } +} + +/* code generation */ +static int32_t ir_ref_spill_slot(ir_ctx *ctx, ir_ref ref, ir_reg *reg) +{ + int32_t offset; + + IR_ASSERT(ref >= 0 && ctx->vregs[ref] && ctx->live_intervals[ctx->vregs[ref]]); + offset = ctx->live_intervals[ctx->vregs[ref]]->stack_spill_pos; + IR_ASSERT(offset != -1); + if (ctx->live_intervals[ctx->vregs[ref]]->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL) { + IR_ASSERT(ctx->spill_base != IR_REG_NONE); + *reg = ctx->spill_base; + return offset; + } + *reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + return IR_SPILL_POS_TO_OFFSET(offset); +} + +static bool ir_is_same_spill_slot(ir_ctx *ctx, ir_ref ref, ir_reg reg, int32_t offset) +{ + ir_reg fp; + + return ir_ref_spill_slot(ctx, ref, &fp) == offset && reg == fp; +} + +static int32_t ir_var_spill_slot(ir_ctx *ctx, ir_ref ref, ir_reg *reg) +{ + ir_insn *var_insn = &ctx->ir_base[ref]; + + IR_ASSERT(var_insn->op == IR_VAR); + *reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + return IR_SPILL_POS_TO_OFFSET(var_insn->op3); +} + +static bool ir_may_avoid_spill_load(ir_ctx *ctx, ir_ref ref, ir_ref use) +{ + ir_live_interval *ival; + + IR_ASSERT(ctx->vregs[ref] && ctx->live_intervals[ctx->vregs[ref]]); + ival = ctx->live_intervals[ctx->vregs[ref]]; + while (ival) { + ir_use_pos *use_pos = ival->use_pos; + while (use_pos) { + if (IR_LIVE_POS_TO_REF(use_pos->pos) == use) { + return !use_pos->next || use_pos->next->op_num == 0; + } + use_pos = use_pos->next; + } + ival = ival->next; + } + return 0; +} + +static void ir_emit_load_imm_int(ir_ctx *ctx, ir_type type, ir_reg reg, int64_t val) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + IR_ASSERT(IR_IS_TYPE_INT(type)); + if (val == 0) { + | ASM_REG_REG_OP xor, type, reg, reg + } else if (ir_type_size[type] == 8) { + IR_ASSERT(sizeof(void*) == 8); +|.if X64 + if (IR_IS_UNSIGNED_32BIT(val)) { + | mov Rd(reg), (uint32_t)val // zero extended load + } else if (IR_IS_SIGNED_32BIT(val)) { + | mov Rq(reg), (int32_t)val // sign extended load + } else { + | mov64 Ra(reg), val + } +|.endif + } else { + | ASM_REG_IMM_OP mov, type, reg, (int32_t)val // sign extended load + } +} + +static void ir_emit_load_mem_int(ir_ctx *ctx, ir_type type, ir_reg reg, ir_reg base_reg, int32_t offset) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + if (base_reg != IR_REG_NONE) { + | ASM_REG_MEM_OP mov, type, reg, [Ra(base_reg)+offset] + } else { + | ASM_REG_MEM_OP mov, type, reg, [offset] + } +} + +static void ir_emit_load_imm_fp(ir_ctx *ctx, ir_type type, ir_reg reg, ir_ref src) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_insn *insn = &ctx->ir_base[src]; + int label; + + if (type == IR_FLOAT && insn->val.u32 == 0) { + if (ctx->mflags & IR_X86_AVX) { + | vxorps xmm(reg-IR_REG_FP_FIRST), xmm(reg-IR_REG_FP_FIRST), xmm(reg-IR_REG_FP_FIRST) + } else { + | xorps xmm(reg-IR_REG_FP_FIRST), xmm(reg-IR_REG_FP_FIRST) + } + } else if (type == IR_DOUBLE && insn->val.u64 == 0) { + if (ctx->mflags & IR_X86_AVX) { + | vxorpd xmm(reg-IR_REG_FP_FIRST), xmm(reg-IR_REG_FP_FIRST), xmm(reg-IR_REG_FP_FIRST) + } else { + | xorpd xmm(reg-IR_REG_FP_FIRST), xmm(reg-IR_REG_FP_FIRST) + } + } else { + label = ctx->cfg_blocks_count - src; + insn->const_flags |= IR_CONST_EMIT; + | ASM_FP_REG_MEM_OP movss, movsd, vmovss, vmovsd, type, reg, [=>label] + } +} + +static void ir_emit_load_mem_fp(ir_ctx *ctx, ir_type type, ir_reg reg, ir_reg base_reg, int32_t offset) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + if (base_reg != IR_REG_NONE) { + | ASM_FP_REG_MEM_OP movss, movsd, vmovss, vmovsd, type, reg, [Ra(base_reg)+offset] + } else { + | ASM_FP_REG_MEM_OP movss, movsd, vmovss, vmovsd, type, reg, [Ra(base_reg)+offset] + } +} + +static void ir_emit_load(ir_ctx *ctx, ir_type type, ir_reg reg, ir_ref src) +{ + int32_t offset; + ir_reg fp; + + if (IR_IS_CONST_REF(src)) { + if (IR_IS_TYPE_INT(type)) { + ir_insn *insn = &ctx->ir_base[src]; + + IR_ASSERT(insn->op != IR_STR && insn->op != IR_SYM && insn->op != IR_FUNC); + ir_emit_load_imm_int(ctx, type, reg, insn->val.i64); + } else { + ir_emit_load_imm_fp(ctx, type, reg, src); + } + } else { + offset = ir_ref_spill_slot(ctx, src, &fp); + if (IR_IS_TYPE_INT(type)) { + ir_emit_load_mem_int(ctx, type, reg, fp, offset); + } else { + ir_emit_load_mem_fp(ctx, type, reg, fp, offset); + } + } +} + +static void ir_emit_store_mem_int(ir_ctx *ctx, ir_type type, ir_reg base_reg, int32_t offset, ir_reg reg) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + | ASM_MEM_REG_OP mov, type, [Ra(base_reg)+offset], reg +} + +static void ir_emit_store_mem_fp(ir_ctx *ctx, ir_type type, ir_reg base_reg, int32_t offset, ir_reg reg) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + | ASM_FP_MEM_REG_OP movss, movsd, vmovss, vmovsd, type, [Ra(base_reg)+offset], reg +} + +static void ir_emit_store(ir_ctx *ctx, ir_type type, ir_ref dst, ir_reg reg) +{ + int32_t offset; + ir_reg fp; + + IR_ASSERT(dst >= 0); + offset = ir_ref_spill_slot(ctx, dst, &fp); + if (IR_IS_TYPE_INT(type)) { + ir_emit_store_mem_int(ctx, type, fp, offset, reg); + } else { + ir_emit_store_mem_fp(ctx, type, fp, offset, reg); + } +} + +static void ir_emit_store_imm(ir_ctx *ctx, ir_type type, ir_ref dst, int32_t imm) +{ + int32_t offset; + ir_reg fp; + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + IR_ASSERT(dst >= 0); + IR_ASSERT(IR_IS_TYPE_INT(type)); + offset = ir_ref_spill_slot(ctx, dst, &fp); + + | ASM_MEM_IMM_OP mov, type, [Ra(fp)+offset], imm +} + +static void ir_emit_mov(ir_ctx *ctx, ir_type type, ir_reg dst, ir_reg src) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + | ASM_REG_REG_OP mov, type, dst, src +} + +static void ir_emit_fp_mov(ir_ctx *ctx, ir_type type, ir_reg dst, ir_reg src) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + | ASM_FP_REG_REG_OP movaps, movapd, vmovaps, vmovapd, type, dst, src +} + +static int32_t ir_fuse_addr(ir_ctx *ctx, ir_ref ref, ir_reg *preg) +{ + ir_insn *addr_insn = &ctx->ir_base[ref]; + ir_reg reg; + + IR_ASSERT(addr_insn->op == IR_ADD); + IR_ASSERT(!IR_IS_CONST_REF(addr_insn->op1) && IR_IS_CONST_REF(addr_insn->op2)); + reg = ctx->regs[ref][1]; + if (IR_REG_SPILLED(reg)) { + reg = IR_REG_NUM(reg); + ir_emit_load(ctx, IR_ADDR, reg, addr_insn->op1); + } + *preg = reg; + return ctx->ir_base[addr_insn->op2].val.i32; +} + +static int32_t ir_fuse_load(ir_ctx *ctx, ir_ref ref, ir_reg *preg) +{ + ir_insn *load_insn = &ctx->ir_base[ref]; + ir_reg reg = ctx->regs[ref][2]; + + IR_ASSERT(load_insn->op == IR_LOAD); + if (IR_IS_CONST_REF(load_insn->op2)) { + *preg = reg; + if (reg == IR_REG_NONE) { + ir_insn *addr_insn = &ctx->ir_base[load_insn->op2]; + + IR_ASSERT(addr_insn->op == IR_C_ADDR); + IR_ASSERT(sizeof(void*) == 4 || IR_IS_SIGNED_32BIT(addr_insn->val.i64)); + return addr_insn->val.i32; + } else { + ir_emit_load(ctx, IR_ADDR, reg, load_insn->op2); + return 0; + } + } else if (reg == IR_REG_NONE) { + return ir_fuse_addr(ctx, load_insn->op2, preg); + } else { + if (IR_REG_SPILLED(reg)) { + reg = IR_REG_NUM(reg); + ir_emit_load(ctx, IR_ADDR, reg, load_insn->op2); + } + *preg = reg; + return 0; + } +} + +static void ir_emit_prologue(ir_ctx *ctx) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + if (ctx->flags & IR_USE_FRAME_POINTER) { + | push Ra(IR_REG_RBP) + | mov Ra(IR_REG_RBP), Ra(IR_REG_RSP) + } + if (ctx->stack_frame_size + ctx->call_stack_size) { + if (ctx->fixed_stack_red_zone) { + IR_ASSERT(ctx->stack_frame_size + ctx->call_stack_size <= ctx->fixed_stack_red_zone); + } else { + | sub Ra(IR_REG_RSP), (ctx->stack_frame_size + ctx->call_stack_size) + } + } + if (ctx->used_preserved_regs) { + int offset; + uint32_t i; + ir_regset used_preserved_regs = (ir_regset)ctx->used_preserved_regs; + + if (ctx->flags & IR_USE_FRAME_POINTER) { + offset = 0; + } else { + offset = ctx->stack_frame_size + ctx->call_stack_size; + } + for (i = 0; i < IR_REG_NUM; i++) { + if (IR_REGSET_IN(used_preserved_regs, i)) { + if (i < IR_REG_FP_FIRST) { + ir_reg fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + + offset -= sizeof(void*); + | mov aword [Ra(fp)+offset], Ra(i) + } else { + ir_reg fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + + offset -= sizeof(void*); + if (ctx->mflags & IR_X86_AVX) { + | vmovsd qword [Ra(fp)+offset], xmm(i-IR_REG_FP_FIRST) + } else { + | movsd qword [Ra(fp)+offset], xmm(i-IR_REG_FP_FIRST) + } + } + } + } + } +} + +static void ir_emit_epilogue(ir_ctx *ctx) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + if (ctx->used_preserved_regs) { + int offset; + uint32_t i; + ir_regset used_preserved_regs = (ir_regset)ctx->used_preserved_regs; + + if (ctx->flags & IR_USE_FRAME_POINTER) { + offset = 0; + } else { + offset = ctx->stack_frame_size + ctx->call_stack_size; + } + for (i = 0; i < IR_REG_NUM; i++) { + if (IR_REGSET_IN(used_preserved_regs, i)) { + if (i < IR_REG_FP_FIRST) { + ir_reg fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + + offset -= sizeof(void*); + | mov Ra(i), aword [Ra(fp)+offset] + } else { + ir_reg fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + + offset -= sizeof(void*); + if (ctx->mflags & IR_X86_AVX) { + | vmovsd xmm(i-IR_REG_FP_FIRST), qword [Ra(fp)+offset] + } else { + | movsd xmm(i-IR_REG_FP_FIRST), qword [Ra(fp)+offset] + } + } + } + } + } + + if (ctx->flags & IR_USE_FRAME_POINTER) { + | mov Ra(IR_REG_RSP), Ra(IR_REG_RBP) + | pop Ra(IR_REG_RBP) + } else if (ctx->stack_frame_size + ctx->call_stack_size) { + if (ctx->fixed_stack_red_zone) { + IR_ASSERT(ctx->stack_frame_size + ctx->call_stack_size <= ctx->fixed_stack_red_zone); + } else { + | add Ra(IR_REG_RSP), (ctx->stack_frame_size + ctx->call_stack_size) + } + } +} + +static void ir_emit_binop_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type type = insn->type; + ir_ref op1 = insn->op1; + ir_ref op2 = insn->op2; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + ir_reg op2_reg = ctx->regs[def][2]; + + IR_ASSERT(def_reg != IR_REG_NONE); + + if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, type, op1_reg, op1); + } + if (def_reg != op1_reg) { + if (op1_reg != IR_REG_NONE) { + ir_emit_mov(ctx, type, def_reg, op1_reg); + } else { + ir_emit_load(ctx, type, def_reg, op1); + } + if (op1 == op2) { + op2_reg = def_reg; + } + } + + if (op2_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(op2)) { + op2_reg = IR_REG_NUM(op2_reg); + if (op1 != op2) { + ir_emit_load(ctx, type, op2_reg, op2); + } + } + switch (insn->op) { + default: + IR_ASSERT(0 && "NIY binary op"); + case IR_ADD: + case IR_ADD_OV: + | ASM_REG_REG_OP add, type, def_reg, op2_reg + break; + case IR_SUB: + case IR_SUB_OV: + | ASM_REG_REG_OP sub, type, def_reg, op2_reg + break; + case IR_MUL: + case IR_MUL_OV: + | ASM_REG_REG_IMUL type, def_reg, op2_reg + break; + case IR_OR: + | ASM_REG_REG_OP or, type, def_reg, op2_reg + break; + case IR_AND: + | ASM_REG_REG_OP and, type, def_reg, op2_reg + break; + case IR_XOR: + | ASM_REG_REG_OP xor, type, def_reg, op2_reg + break; + } + } else if (IR_IS_CONST_REF(op2)) { + ir_insn *val_insn = &ctx->ir_base[op2]; + int32_t val; + + IR_ASSERT(IR_IS_32BIT(val_insn->type, val_insn->val)); + val = val_insn->val.i32; + switch (insn->op) { + default: + IR_ASSERT(0 && "NIY binary op"); + case IR_ADD: + case IR_ADD_OV: + | ASM_REG_IMM_OP add, type, def_reg, val + break; + case IR_SUB: + case IR_SUB_OV: + | ASM_REG_IMM_OP sub, type, def_reg, val + break; + case IR_MUL: + case IR_MUL_OV: + | ASM_REG_IMM_IMUL type, def_reg, val + break; + case IR_OR: + | ASM_REG_IMM_OP or, type, def_reg, val + break; + case IR_AND: + | ASM_REG_IMM_OP and, type, def_reg, val + break; + case IR_XOR: + | ASM_REG_IMM_OP xor, type, def_reg, val + break; + } + } else { + int32_t offset = 0; + + if (ir_rule(ctx, op2) & IR_FUSED) { + offset = ir_fuse_load(ctx, op2, &op2_reg); + } else { + offset = ir_ref_spill_slot(ctx, op2, &op2_reg); + } + if (op2_reg != IR_REG_NONE) { + switch (insn->op) { + default: + IR_ASSERT(0 && "NIY binary op"); + case IR_ADD: + case IR_ADD_OV: + | ASM_REG_MEM_OP add, type, def_reg, [Ra(op2_reg)+offset] + break; + case IR_SUB: + case IR_SUB_OV: + | ASM_REG_MEM_OP sub, type, def_reg, [Ra(op2_reg)+offset] + break; + case IR_MUL: + case IR_MUL_OV: + | ASM_REG_MEM_IMUL type, def_reg, [Ra(op2_reg)+offset] + break; + case IR_OR: + | ASM_REG_MEM_OP or, type, def_reg, [Ra(op2_reg)+offset] + break; + case IR_AND: + | ASM_REG_MEM_OP and, type, def_reg, [Ra(op2_reg)+offset] + break; + case IR_XOR: + | ASM_REG_MEM_OP xor, type, def_reg, [Ra(op2_reg)+offset] + break; + } + } else { + switch (insn->op) { + default: + IR_ASSERT(0 && "NIY binary op"); + case IR_ADD: + case IR_ADD_OV: + | ASM_REG_MEM_OP add, type, def_reg, [offset] + break; + case IR_SUB: + case IR_SUB_OV: + | ASM_REG_MEM_OP sub, type, def_reg, [offset] + break; + case IR_MUL: + case IR_MUL_OV: + | ASM_REG_MEM_IMUL type, def_reg, [offset] + break; + case IR_OR: + | ASM_REG_MEM_OP or, type, def_reg, [offset] + break; + case IR_AND: + | ASM_REG_MEM_OP and, type, def_reg, [offset] + break; + case IR_XOR: + | ASM_REG_MEM_OP xor, type, def_reg, [offset] + break; + } + } + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } +} + +static void ir_emit_imul3(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type type = insn->type; + ir_ref op1 = insn->op1; + ir_ref op2 = insn->op2; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + ir_insn *val_insn = &ctx->ir_base[op2]; + int32_t val; + + IR_ASSERT(def_reg != IR_REG_NONE); + IR_ASSERT(!IR_IS_CONST_REF(op1)); + IR_ASSERT(IR_IS_CONST_REF(op2)); + IR_ASSERT(IR_IS_32BIT(val_insn->type, val_insn->val)); + val = val_insn->val.i32; + + if (op1_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, type, op1_reg, op1); + } + switch (ir_type_size[type]) { + default: + IR_ASSERT(0); + case 2: + | imul Rw(def_reg), Rw(op1_reg), val + break; + case 4: + | imul Rd(def_reg), Rd(op1_reg), val + break; +|.if X64 +|| case 8: +| imul Rq(def_reg), Rq(op1_reg), val +|| break; +|.endif + } + } else { + int32_t offset = 0; + + if (ir_rule(ctx, op1) & IR_FUSED) { + offset = ir_fuse_load(ctx, op1, &op1_reg); + } else { + offset = ir_ref_spill_slot(ctx, op1, &op1_reg); + } + switch (ir_type_size[type]) { + default: + IR_ASSERT(0); + case 2: + | imul Rw(def_reg), word [Ra(op1_reg)+offset], val + break; + case 4: + | imul Rd(def_reg), dword [Ra(op1_reg)+offset], val + break; +|.if X64 +|| case 8: +| imul Rq(def_reg), qword [Ra(op1_reg)+offset], val +|| break; +|.endif + } + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } +} + +static void ir_emit_min_max_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type type = insn->type; + ir_ref op1 = insn->op1; + ir_ref op2 = insn->op2; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + ir_reg op2_reg = ctx->regs[def][2]; + + IR_ASSERT(def_reg != IR_REG_NONE && op2_reg != IR_REG_NONE); + + if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, type, op1_reg, op1); + } + if (def_reg != op1_reg) { + if (op1_reg != IR_REG_NONE) { + ir_emit_mov(ctx, type, def_reg, op1_reg); + } else { + ir_emit_load(ctx, type, def_reg, op1); + } + } + + if (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(op2)) { + op2_reg = IR_REG_NUM(op2_reg); + if (op1 != op2) { + ir_emit_load(ctx, type, op2_reg, op2); + } + } + + if (op1 == op2) { + return; + } + + | ASM_REG_REG_OP cmp, type, def_reg, op2_reg + if (insn->op == IR_MIN) { + if (IR_IS_TYPE_SIGNED(type)) { + | ASM_REG_REG_OP2 cmovg, type, def_reg, op2_reg + } else { + | ASM_REG_REG_OP2 cmova, type, def_reg, op2_reg + } + } else { + IR_ASSERT(insn->op == IR_MAX); + if (IR_IS_TYPE_SIGNED(type)) { + | ASM_REG_REG_OP2 cmova, type, def_reg, op2_reg + } else { + | ASM_REG_REG_OP2 cmovg, type, def_reg, op2_reg + } + } + + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } +} + +static void ir_emit_overflow(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_type type = ctx->ir_base[insn->op1].type; + + IR_ASSERT(def_reg != IR_REG_NONE); + IR_ASSERT(IR_IS_TYPE_INT(type)); + if (IR_IS_TYPE_SIGNED(type)) { + | seto Rb(def_reg) + } else { + | setc Rb(def_reg) + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, insn->type, def, def_reg); + } +} + +static void ir_emit_overflow_and_branch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_insn *overflow_insn = &ctx->ir_base[insn->op2]; + ir_type type = ctx->ir_base[overflow_insn->op1].type; + uint32_t true_block, false_block, next_block; + bool reverse = 0; + + ir_get_true_false_blocks(ctx, b, &true_block, &false_block, &next_block); + if (true_block == next_block) { + reverse = 1; + true_block = false_block; + false_block = 0; + } else if (false_block == next_block) { + false_block = 0; + } + + if (IR_IS_TYPE_SIGNED(type)) { + if (reverse) { + | jno =>true_block + } else { + | jo =>true_block + } + } else { + if (reverse) { + | jnc =>true_block + } else { + | jc =>true_block + } + } + if (false_block) { + | jmp =>false_block + } +} + +static void ir_emit_mem_binop_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_insn *op_insn = &ctx->ir_base[insn->op3]; + ir_type type = op_insn->type; + ir_ref op2 = op_insn->op2; + ir_reg op2_reg = ctx->regs[insn->op3][2]; + ir_reg reg; + int32_t offset = 0; + + if (insn->op == IR_STORE) { + reg = ctx->regs[def][2]; + if (IR_IS_CONST_REF(insn->op2)) { + if (reg == IR_REG_NONE) { + offset = ctx->ir_base[insn->op2].val.i32; + } else { + ir_emit_load(ctx, IR_ADDR, reg, insn->op2); + } + } else if (reg == IR_REG_NONE) { + offset = ir_fuse_addr(ctx, insn->op2, ®); + } else if (IR_REG_SPILLED(reg)) { + reg = IR_REG_NUM(reg); + ir_emit_load(ctx, IR_ADDR, reg, insn->op2); + } + } else { + IR_ASSERT(insn->op == IR_VSTORE); + offset = ir_var_spill_slot(ctx, insn->op2, ®); + } + + if (op2_reg == IR_REG_NONE) { + ir_val *val = &ctx->ir_base[op2].val; + + IR_ASSERT(IR_IS_CONST_REF(op2) && (ir_type_size[type] != 8 || IR_IS_32BIT(type, ctx->ir_base[op2].val))); + switch (op_insn->op) { + default: + IR_ASSERT(0 && "NIY binary op"); + case IR_ADD: + case IR_ADD_OV: + if (reg != IR_REG_NONE) { + | ASM_MEM_IMM_OP add, type, [Ra(reg)+offset], val->i32 + } else { + | ASM_MEM_IMM_OP add, type, [offset], val->i32 + } + break; + case IR_SUB: + case IR_SUB_OV: + if (reg != IR_REG_NONE) { + | ASM_MEM_IMM_OP sub, type, [Ra(reg)+offset], val->i32 + } else { + | ASM_MEM_IMM_OP sub, type, [offset], val->i32 + } + break; + case IR_OR: + if (reg != IR_REG_NONE) { + | ASM_MEM_IMM_OP or, type, [Ra(reg)+offset], val->i32 + } else { + | ASM_MEM_IMM_OP or, type, [offset], val->i32 + } + break; + case IR_AND: + if (reg != IR_REG_NONE) { + | ASM_MEM_IMM_OP and, type, [Ra(reg)+offset], val->i32 + } else { + | ASM_MEM_IMM_OP and, type, [offset], val->i32 + } + break; + case IR_XOR: + if (reg != IR_REG_NONE) { + | ASM_MEM_IMM_OP xor, type, [Ra(reg)+offset], val->i32 + } else { + | ASM_MEM_IMM_OP xor, type, [offset], val->i32 + } + break; + } + } else { + if (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(op2)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, type, op2_reg, op2); + } + switch (op_insn->op) { + default: + IR_ASSERT(0 && "NIY binary op"); + case IR_ADD: + case IR_ADD_OV: + if (reg != IR_REG_NONE) { + | ASM_MEM_REG_OP add, type, [Ra(reg)+offset], op2_reg + } else { + | ASM_MEM_REG_OP add, type, [offset], op2_reg + } + break; + case IR_SUB: + case IR_SUB_OV: + if (reg != IR_REG_NONE) { + | ASM_MEM_REG_OP sub, type, [Ra(reg)+offset], op2_reg + } else { + | ASM_MEM_REG_OP sub, type, [offset], op2_reg + } + break; + case IR_OR: + if (reg != IR_REG_NONE) { + | ASM_MEM_REG_OP or, type, [Ra(reg)+offset], op2_reg + } else { + | ASM_MEM_REG_OP or, type, [offset], op2_reg + } + break; + case IR_AND: + if (reg != IR_REG_NONE) { + | ASM_MEM_REG_OP and, type, [Ra(reg)+offset], op2_reg + } else { + | ASM_MEM_REG_OP and, type, [offset], op2_reg + } + break; + case IR_XOR: + if (reg != IR_REG_NONE) { + | ASM_MEM_REG_OP xor, type, [Ra(reg)+offset], op2_reg + } else { + | ASM_MEM_REG_OP xor, type, [offset], op2_reg + } + break; + } + } +} + +static void ir_emit_reg_binop_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_insn *op_insn = &ctx->ir_base[insn->op2]; + ir_type type = op_insn->type; + ir_ref op2 = op_insn->op2; + ir_reg op2_reg = ctx->regs[insn->op2][2]; + ir_reg reg; + + IR_ASSERT(insn->op == IR_RSTORE); + reg = insn->op3; + + if (op2_reg == IR_REG_NONE) { + ir_val *val = &ctx->ir_base[op2].val; + + IR_ASSERT(IR_IS_CONST_REF(op2) && (ir_type_size[type] != 8 || IR_IS_32BIT(type, ctx->ir_base[op2].val))); + switch (op_insn->op) { + default: + IR_ASSERT(0 && "NIY binary op"); + case IR_ADD: + | ASM_REG_IMM_OP add, type, reg, val->i32 + break; + case IR_SUB: + | ASM_REG_IMM_OP sub, type, reg, val->i32 + break; + case IR_OR: + | ASM_REG_IMM_OP or, type, reg, val->i32 + break; + case IR_AND: + | ASM_REG_IMM_OP and, type, reg, val->i32 + break; + case IR_XOR: + | ASM_REG_IMM_OP xor, type, reg, val->i32 + break; + } + } else { + if (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(op2)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, type, op2_reg, op2); + } + switch (op_insn->op) { + default: + IR_ASSERT(0 && "NIY binary op"); + case IR_ADD: + | ASM_REG_REG_OP add, type, reg, op2_reg + break; + case IR_SUB: + | ASM_REG_REG_OP sub, type, reg, op2_reg + break; + case IR_OR: + | ASM_REG_REG_OP or, type, reg, op2_reg + break; + case IR_AND: + | ASM_REG_REG_OP and, type, reg, op2_reg + break; + case IR_XOR: + | ASM_REG_REG_OP xor, type, reg, op2_reg + break; + } + } +} + +static void ir_emit_mul_div_mod_pwr2(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type type = insn->type; + ir_ref op1 = insn->op1; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + + IR_ASSERT(def_reg != IR_REG_NONE); + + if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, type, op1_reg, op1); + } + if (def_reg != op1_reg) { + if (op1_reg != IR_REG_NONE) { + ir_emit_mov(ctx, type, def_reg, op1_reg); + } else { + ir_emit_load(ctx, type, def_reg, op1); + } + } + if (insn->op == IR_MUL) { + uint32_t shift = IR_LOG2(ctx->ir_base[insn->op2].val.u64); + if (shift == 1) { + | ASM_REG_REG_OP add, insn->type, def_reg, def_reg + } else { + | ASM_REG_IMM_OP shl, insn->type, def_reg, shift + } + } else if (insn->op == IR_DIV) { + uint32_t shift = IR_LOG2(ctx->ir_base[insn->op2].val.u64); + | ASM_REG_IMM_OP shr, insn->type, def_reg, shift + } else { + IR_ASSERT(insn->op == IR_MOD); + uint64_t mask = ctx->ir_base[insn->op2].val.u64 - 1; + IR_ASSERT(IR_IS_UNSIGNED_32BIT(mask)); + | ASM_REG_IMM_OP and, insn->type, def_reg, mask + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } +} + +static void ir_emit_mem_mul_div_mod_pwr2(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_insn *op_insn = &ctx->ir_base[insn->op3]; + ir_type type = op_insn->type; + ir_reg reg; + int32_t offset = 0; + + if (insn->op == IR_STORE) { + reg = ctx->regs[def][2]; + if (IR_IS_CONST_REF(insn->op2)) { + if (reg == IR_REG_NONE) { + offset = ctx->ir_base[insn->op2].val.i32; + } else { + ir_emit_load(ctx, IR_ADDR, reg, insn->op2); + } + } else if (reg == IR_REG_NONE) { + offset = ir_fuse_addr(ctx, insn->op2, ®); + } else if (IR_REG_SPILLED(reg)) { + reg = IR_REG_NUM(reg); + ir_emit_load(ctx, IR_ADDR, reg, insn->op2); + } + } else { + IR_ASSERT(insn->op == IR_VSTORE); + offset = ir_var_spill_slot(ctx, insn->op2, ®); + } + + if (op_insn->op == IR_MUL) { + uint32_t shift = IR_LOG2(ctx->ir_base[op_insn->op2].val.u64); + | ASM_MEM_IMM_OP shl, type, [Ra(reg)+offset], shift + } else if (op_insn->op == IR_DIV) { + uint32_t shift = IR_LOG2(ctx->ir_base[op_insn->op2].val.u64); + | ASM_MEM_IMM_OP shr, type, [Ra(reg)+offset], shift + } else { + IR_ASSERT(op_insn->op == IR_MOD); + uint64_t mask = ctx->ir_base[op_insn->op2].val.u64 - 1; + IR_ASSERT(IR_IS_UNSIGNED_32BIT(mask)); + | ASM_MEM_IMM_OP and, type, [Ra(reg)+offset], mask + } +} + +static void ir_emit_shift(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type type = insn->type; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + ir_reg op2_reg = ctx->regs[def][2]; + + IR_ASSERT(def_reg != IR_REG_NONE && def_reg != IR_REG_RCX); + if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, type, op1_reg, insn->op1); + } + if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, type, op2_reg, insn->op2); + } + if (op2_reg != IR_REG_RCX) { + if (op1_reg == IR_REG_RCX) { + ir_emit_mov(ctx, type, def_reg, op1_reg); + op1_reg = def_reg; + } + if (op2_reg != IR_REG_NONE) { + ir_emit_mov(ctx, type, IR_REG_RCX, op2_reg); + } else { + ir_emit_load(ctx, type, IR_REG_RCX, insn->op2); + } + } + if (def_reg != op1_reg) { + if (op1_reg != IR_REG_NONE) { + ir_emit_mov(ctx, type, def_reg, op1_reg); + } else { + ir_emit_load(ctx, type, def_reg, insn->op1); + } + } + switch (insn->op) { + default: + IR_ASSERT(0); + case IR_SHL: + | ASM_REG_TXT_OP shl, insn->type, def_reg, cl + break; + case IR_SHR: + | ASM_REG_TXT_OP shr, insn->type, def_reg, cl + break; + case IR_SAR: + | ASM_REG_TXT_OP sar, insn->type, def_reg, cl + break; + case IR_ROL: + | ASM_REG_TXT_OP rol, insn->type, def_reg, cl + break; + case IR_ROR: + | ASM_REG_TXT_OP ror, insn->type, def_reg, cl + break; + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } +} + +static void ir_emit_mem_shift(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_insn *op_insn = &ctx->ir_base[insn->op3]; + ir_type type = op_insn->type; + ir_ref op2 = op_insn->op2; + ir_reg op2_reg = ctx->regs[insn->op3][2]; + ir_reg reg; + int32_t offset = 0; + + if (insn->op == IR_STORE) { + reg = ctx->regs[def][2]; + if (IR_IS_CONST_REF(insn->op2)) { + if (reg == IR_REG_NONE) { + offset = ctx->ir_base[insn->op2].val.i32; + } else { + ir_emit_load(ctx, IR_ADDR, reg, insn->op2); + } + } else if (reg == IR_REG_NONE) { + offset = ir_fuse_addr(ctx, insn->op2, ®); + } else if (IR_REG_SPILLED(reg)) { + reg = IR_REG_NUM(reg); + ir_emit_load(ctx, IR_ADDR, reg, insn->op2); + } + } else { + IR_ASSERT(insn->op == IR_VSTORE); + offset = ir_var_spill_slot(ctx, insn->op2, ®); + } + + if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, type, op2_reg, op2); + } + if (op2_reg != IR_REG_RCX) { + if (op2_reg != IR_REG_NONE) { + ir_emit_mov(ctx, type, IR_REG_RCX, op2_reg); + } else { + ir_emit_load(ctx, type, IR_REG_RCX, op2); + } + } + switch (op_insn->op) { + default: + IR_ASSERT(0); + case IR_SHL: + | ASM_MEM_TXT_OP shl, type, [Ra(reg)+offset], cl + break; + case IR_SHR: + | ASM_MEM_TXT_OP shr, type, [Ra(reg)+offset], cl + break; + case IR_SAR: + | ASM_MEM_TXT_OP sar, type, [Ra(reg)+offset], cl + break; + case IR_ROL: + | ASM_MEM_TXT_OP rol, type, [Ra(reg)+offset], cl + break; + case IR_ROR: + | ASM_MEM_TXT_OP ror, type, [Ra(reg)+offset], cl + break; + } +} + +static void ir_emit_shift_const(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + int32_t shift; + ir_type type = insn->type; + ir_ref op1 = insn->op1; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + + IR_ASSERT(IR_IS_SIGNED_32BIT(ctx->ir_base[insn->op2].val.i64)); + shift = ctx->ir_base[insn->op2].val.i32; + IR_ASSERT(def_reg != IR_REG_NONE); + + if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, type, op1_reg, op1); + } + if (def_reg != op1_reg) { + if (op1_reg != IR_REG_NONE) { + ir_emit_mov(ctx, type, def_reg, op1_reg); + } else { + ir_emit_load(ctx, type, def_reg, op1); + } + } + switch (insn->op) { + default: + IR_ASSERT(0); + case IR_SHL: + | ASM_REG_IMM_OP shl, insn->type, def_reg, shift + break; + case IR_SHR: + | ASM_REG_IMM_OP shr, insn->type, def_reg, shift + break; + case IR_SAR: + | ASM_REG_IMM_OP sar, insn->type, def_reg, shift + break; + case IR_ROL: + | ASM_REG_IMM_OP rol, insn->type, def_reg, shift + break; + case IR_ROR: + | ASM_REG_IMM_OP ror, insn->type, def_reg, shift + break; + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } +} + +static void ir_emit_mem_shift_const(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_insn *op_insn = &ctx->ir_base[insn->op3]; + ir_type type = op_insn->type; + int32_t shift; + ir_reg reg; + int32_t offset = 0; + + IR_ASSERT(IR_IS_SIGNED_32BIT(ctx->ir_base[op_insn->op2].val.i64)); + shift = ctx->ir_base[op_insn->op2].val.i32; + if (insn->op == IR_STORE) { + reg = ctx->regs[def][2]; + if (IR_IS_CONST_REF(insn->op2)) { + if (reg == IR_REG_NONE) { + offset = ctx->ir_base[insn->op2].val.i32; + } else { + ir_emit_load(ctx, IR_ADDR, reg, insn->op2); + } + } else if (reg == IR_REG_NONE) { + offset = ir_fuse_addr(ctx, insn->op2, ®); + } else if (IR_REG_SPILLED(reg)) { + reg = IR_REG_NUM(reg); + ir_emit_load(ctx, IR_ADDR, reg, insn->op2); + } + } else { + IR_ASSERT(insn->op == IR_VSTORE); + offset = ir_var_spill_slot(ctx, insn->op2, ®); + } + + switch (op_insn->op) { + default: + IR_ASSERT(0); + case IR_SHL: + | ASM_MEM_IMM_OP shl, type, [Ra(reg)+offset], shift + break; + case IR_SHR: + | ASM_MEM_IMM_OP shr, type, [Ra(reg)+offset], shift + break; + case IR_SAR: + | ASM_MEM_IMM_OP sar, type, [Ra(reg)+offset], shift + break; + case IR_ROL: + | ASM_MEM_IMM_OP rol, type, [Ra(reg)+offset], shift + break; + case IR_ROR: + | ASM_MEM_IMM_OP ror, type, [Ra(reg)+offset], shift + break; + } +} + +static void ir_emit_op_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type type = insn->type; + ir_ref op1 = insn->op1; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + + IR_ASSERT(def_reg != IR_REG_NONE); + + if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, type, op1_reg, op1); + } + if (def_reg != op1_reg) { + if (op1_reg != IR_REG_NONE) { + ir_emit_mov(ctx, type, def_reg, op1_reg); + } else { + ir_emit_load(ctx, type, def_reg, op1); + } + } + if (insn->op == IR_ADD) { + | ASM_REG_OP inc, insn->type, def_reg + } else if (insn->op == IR_SUB) { + | ASM_REG_OP dec, insn->type, def_reg + } else if (insn->op == IR_NOT) { + | ASM_REG_OP not, insn->type, def_reg + } else if (insn->op == IR_NEG) { + | ASM_REG_OP neg, insn->type, def_reg + } else { + IR_ASSERT(insn->op == IR_BSWAP); + switch (ir_type_size[insn->type]) { + default: + IR_ASSERT(0); + case 4: + | bswap Rd(def_reg) + break; + case 8: + IR_ASSERT(sizeof(void*) == 8); +|.if X64 + | bswap Rq(def_reg) +|.endif + break; + } + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } +} + +static void ir_emit_mem_op_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_insn *op_insn = &ctx->ir_base[insn->op3]; + ir_type type = op_insn->type; + ir_reg reg; + int32_t offset = 0; + + if (insn->op == IR_STORE) { + reg = ctx->regs[def][2]; + if (IR_IS_CONST_REF(insn->op2)) { + if (reg == IR_REG_NONE) { + offset = ctx->ir_base[insn->op2].val.i32; + } else { + ir_emit_load(ctx, IR_ADDR, reg, insn->op2); + } + } else if (reg == IR_REG_NONE) { + offset = ir_fuse_addr(ctx, insn->op2, ®); + } else if (IR_REG_SPILLED(reg)) { + reg = IR_REG_NUM(reg); + ir_emit_load(ctx, IR_ADDR, reg, insn->op2); + } + } else { + IR_ASSERT(insn->op == IR_VSTORE); + offset = ir_var_spill_slot(ctx, insn->op2, ®); + } + + if (op_insn->op == IR_ADD) { + | ASM_MEM_OP inc, type, [Ra(reg)+offset] + } else if (insn->op == IR_SUB) { + | ASM_MEM_OP dec, type, [Ra(reg)+offset] + } else if (insn->op == IR_NOT) { + | ASM_MEM_OP not, type, [Ra(reg)+offset] + } else { + IR_ASSERT(insn->op == IR_NEG); + | ASM_MEM_OP neg, type, [Ra(reg)+offset] + } +} + +static void ir_emit_abs_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type type = insn->type; + ir_ref op1 = insn->op1; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + + IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE); + + if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, type, op1_reg, op1); + } + + IR_ASSERT(def_reg != op1_reg); + + ir_emit_mov(ctx, insn->type, def_reg, op1_reg); + | ASM_REG_OP neg, insn->type, def_reg + | ASM_REG_REG_OP2, cmovs, type, def_reg, op1_reg + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } +} + +static void ir_emit_bool_not_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type type = ctx->ir_base[insn->op1].type; + ir_ref op1 = insn->op1; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + + IR_ASSERT(def_reg != IR_REG_NONE); + + if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, type, op1_reg, op1); + } + + if (op1_reg != IR_REG_NONE) { + | ASM_REG_REG_OP test, type, op1_reg, op1_reg + } else { + ir_reg fp; + int32_t offset = ir_ref_spill_slot(ctx, op1, &fp); + + | ASM_MEM_IMM_OP cmp, type, [Ra(fp)+offset], 0 + } + | sete Rb(def_reg) + + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } +} + +static void ir_emit_mul_div_mod(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type type = insn->type; + ir_ref op1 = insn->op1; + ir_ref op2 = insn->op2; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + ir_reg op2_reg = ctx->regs[def][2]; + int32_t offset = 0; + + if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, type, op1_reg, op1); + } + if (op1_reg != IR_REG_RAX) { + if (op1_reg != IR_REG_NONE) { + ir_emit_mov(ctx, type, IR_REG_RAX, op1_reg); + } else { + ir_emit_load(ctx, type, IR_REG_RAX, op1); + } + } + if (op2_reg == IR_REG_NONE && op1 == op2) { + op2_reg = IR_REG_RAX; + } else if (IR_IS_CONST_REF(op2)) { + if (insn->op == IR_MUL || insn->op == IR_MUL_OV) { + op2_reg = IR_REG_RDX; + } else { + IR_ASSERT(op2_reg != IR_REG_NONE); + } + ir_emit_load(ctx, type, op2_reg, op2); + } + if (insn->op == IR_MUL || insn->op == IR_MUL_OV) { + IR_ASSERT(!IR_IS_TYPE_SIGNED(insn->type)); + if (op2_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, type, op2_reg, op2); + } + | ASM_REG_OP mul, type, op2_reg + } else { + if (ir_rule(ctx, op2) & IR_FUSED) { + offset = ir_fuse_load(ctx, op2, &op2_reg); + } else { + offset = ir_ref_spill_slot(ctx, op2, &op2_reg); + } + | ASM_MEM_OP mul, type, [Ra(op2_reg)+offset] + } + } else { + if (IR_IS_TYPE_SIGNED(type)) { + if (ir_type_size[type] == 8) { + | cqo + } else if (ir_type_size[type] == 4) { + | cdq + } else if (ir_type_size[type] == 2) { + | cwd + } + if (op2_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, type, op2_reg, op2); + } + | ASM_REG_OP idiv, type, op2_reg + } else { + if (ir_rule(ctx, op2) & IR_FUSED) { + offset = ir_fuse_load(ctx, op2, &op2_reg); + } else { + offset = ir_ref_spill_slot(ctx, op2, &op2_reg); + } + | ASM_MEM_OP idiv, type, [Ra(op2_reg)+offset] + } + } else { + | ASM_REG_REG_OP xor, type, IR_REG_RDX, IR_REG_RDX + if (op2_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, type, op2_reg, op2); + } + | ASM_REG_OP div, type, op2_reg + } else { + if (ir_rule(ctx, op2) & IR_FUSED) { + offset = ir_fuse_load(ctx, op2, &op2_reg); + } else { + offset = ir_ref_spill_slot(ctx, op2, &op2_reg); + } + | ASM_MEM_OP div, type, [Ra(op2_reg)+offset] + } + } + } + + if (insn->op == IR_MUL || insn->op == IR_MUL_OV || insn->op == IR_DIV) { + if (def_reg != IR_REG_NONE) { + if (def_reg != IR_REG_RAX) { + ir_emit_mov(ctx, type, def_reg, IR_REG_RAX); + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } + } else { + ir_emit_store(ctx, type, def, IR_REG_RAX); + } + } else { + IR_ASSERT(insn->op == IR_MOD); + if (ir_type_size[type] == 1) { + if (def_reg != IR_REG_NONE) { + | mov al, ah + if (def_reg != IR_REG_RAX) { + | mov Rb(def_reg), al + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } + } else { + ir_reg fp; + int32_t offset = ir_ref_spill_slot(ctx, def, &fp); + + | mov byte [Ra(fp)+offset], ah + } + } else { + if (def_reg != IR_REG_NONE) { + if (def_reg != IR_REG_RDX) { + ir_emit_mov(ctx, type, def_reg, IR_REG_RDX); + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } + } else { + ir_emit_store(ctx, type, def, IR_REG_RDX); + } + } + } +} + +static void ir_rodata(ir_ctx *ctx) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + |.rodata + if (!data->rodata_label) { + int label = data->rodata_label = ctx->cfg_blocks_count + ctx->consts_count + 2; + |=>label: + } +} + +static void ir_emit_op_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type type = insn->type; + ir_ref op1 = insn->op1; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + + IR_ASSERT(def_reg != IR_REG_NONE); + + if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, type, op1_reg, op1); + } + if (def_reg != op1_reg) { + if (op1_reg != IR_REG_NONE) { + ir_emit_fp_mov(ctx, type, def_reg, op1_reg); + } else { + ir_emit_load(ctx, type, def_reg, op1); + } + } + if (insn->op == IR_NEG) { + if (insn->type == IR_DOUBLE) { + if (!data->double_neg_const) { + data->double_neg_const = 1; + ir_rodata(ctx); + |.align 16 + |->double_neg_const: + |.dword 0, 0x80000000, 0, 0 + |.code + } + if (ctx->mflags & IR_X86_AVX) { + | vxorpd xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), [->double_neg_const] + } else { + | xorpd xmm(def_reg-IR_REG_FP_FIRST), [->double_neg_const] + } + } else { + IR_ASSERT(insn->type == IR_FLOAT); + if (!data->float_neg_const) { + data->float_neg_const = 1; + ir_rodata(ctx); + |.align 16 + |->float_neg_const: + |.dword 0x80000000, 0, 0, 0 + |.code + } + if (ctx->mflags & IR_X86_AVX) { + | vxorps xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), [->float_neg_const] + } else { + | xorps xmm(def_reg-IR_REG_FP_FIRST), [->float_neg_const] + } + } + } else { + IR_ASSERT(insn->op == IR_ABS); + if (insn->type == IR_DOUBLE) { + if (!data->double_abs_const) { + data->double_abs_const = 1; + ir_rodata(ctx); + |.align 16 + |->double_abs_const: + |.dword 0xffffffff, 0x7fffffff, 0, 0 + |.code + } + if (ctx->mflags & IR_X86_AVX) { + | vandpd xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), [->double_abs_const] + } else { + | andpd xmm(def_reg-IR_REG_FP_FIRST), [->double_abs_const] + } + } else { + IR_ASSERT(insn->type == IR_FLOAT); + if (!data->float_abs_const) { + data->float_abs_const = 1; + ir_rodata(ctx); + |.align 16 + |->float_abs_const: + |.dword 0x7fffffff, 0, 0, 0 + |.code + } + if (ctx->mflags & IR_X86_AVX) { + | vandps xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), [->float_abs_const] + } else { + | andps xmm(def_reg-IR_REG_FP_FIRST), [->float_abs_const] + } + } + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, insn->type, def, def_reg); + } +} + +static void ir_emit_binop_sse2(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type type = insn->type; + ir_ref op1 = insn->op1; + ir_ref op2 = insn->op2; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + ir_reg op2_reg = ctx->regs[def][2]; + + IR_ASSERT(def_reg != IR_REG_NONE); + + if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, type, op1_reg, op1); + } + if (def_reg != op1_reg) { + if (op1_reg != IR_REG_NONE) { + ir_emit_fp_mov(ctx, type, def_reg, op1_reg); + } else { + ir_emit_load(ctx, type, def_reg, op1); + } + if (op1 == op2) { + op2_reg = def_reg; + } + } + if (op2_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(op2)) { + op2_reg = IR_REG_NUM(op2_reg); + if (op1 != op2) { + ir_emit_load(ctx, type, op2_reg, op2); + } + } + switch (insn->op) { + default: + IR_ASSERT(0 && "NIY binary op"); + case IR_ADD: + | ASM_SSE2_REG_REG_OP addss, addsd, type, def_reg, op2_reg + break; + case IR_SUB: + | ASM_SSE2_REG_REG_OP subss, subsd, type, def_reg, op2_reg + break; + case IR_MUL: + | ASM_SSE2_REG_REG_OP mulss, mulsd, type, def_reg, op2_reg + break; + case IR_DIV: + | ASM_SSE2_REG_REG_OP divss, divsd, type, def_reg, op2_reg + break; + case IR_MIN: + | ASM_SSE2_REG_REG_OP minss, minsd, type, def_reg, op2_reg + break; + case IR_MAX: + | ASM_SSE2_REG_REG_OP maxss, maxsd, type, def_reg, op2_reg + break; + } + } else if (IR_IS_CONST_REF(op2)) { + ir_insn *val_insn = &ctx->ir_base[op2]; + int label = ctx->cfg_blocks_count - op2; + + val_insn->const_flags |= IR_CONST_EMIT; + switch (insn->op) { + default: + IR_ASSERT(0 && "NIY binary op"); + case IR_ADD: + | ASM_SSE2_REG_MEM_OP addss, addsd, type, def_reg, [=>label] + break; + case IR_SUB: + | ASM_SSE2_REG_MEM_OP subss, subsd, type, def_reg, [=>label] + break; + case IR_MUL: + | ASM_SSE2_REG_MEM_OP mulss, mulsd, type, def_reg, [=>label] + break; + case IR_DIV: + | ASM_SSE2_REG_MEM_OP divss, divsd, type, def_reg, [=>label] + break; + case IR_MIN: + | ASM_SSE2_REG_MEM_OP minss, minsd, type, def_reg, [=>label] + break; + case IR_MAX: + | ASM_SSE2_REG_MEM_OP maxss, maxsd, type, def_reg, [=>label] + break; + } + } else { + int32_t offset = 0; + + if (ir_rule(ctx, op2) & IR_FUSED) { + offset = ir_fuse_load(ctx, op2, &op2_reg); + } else { + offset = ir_ref_spill_slot(ctx, op2, &op2_reg); + } + switch (insn->op) { + default: + IR_ASSERT(0 && "NIY binary op"); + case IR_ADD: + | ASM_SSE2_REG_MEM_OP addss, addsd, type, def_reg, [Ra(op2_reg)+offset] + break; + case IR_SUB: + | ASM_SSE2_REG_MEM_OP subss, subsd, type, def_reg, [Ra(op2_reg)+offset] + break; + case IR_MUL: + | ASM_SSE2_REG_MEM_OP mulss, mulsd, type, def_reg, [Ra(op2_reg)+offset] + break; + case IR_DIV: + | ASM_SSE2_REG_MEM_OP divss, divsd, type, def_reg, [Ra(op2_reg)+offset] + break; + case IR_MIN: + | ASM_SSE2_REG_MEM_OP minss, minsd, type, def_reg, [Ra(op2_reg)+offset] + break; + case IR_MAX: + | ASM_SSE2_REG_MEM_OP maxss, maxsd, type, def_reg, [Ra(op2_reg)+offset] + break; + } + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, insn->type, def, def_reg); + } +} + +static void ir_emit_binop_avx(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type type = insn->type; + ir_ref op1 = insn->op1; + ir_ref op2 = insn->op2; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + ir_reg op2_reg = ctx->regs[def][2]; + + IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE); + + if (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(op1)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, type, op1_reg, op1); + } + if (op2_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(op2)) { + op2_reg = IR_REG_NUM(op2_reg); + if (op1 != op2) { + ir_emit_load(ctx, type, op2_reg, op2); + } + } + switch (insn->op) { + default: + IR_ASSERT(0 && "NIY binary op"); + case IR_ADD: + | ASM_AVX_REG_REG_REG_OP vaddss, vaddsd, type, def_reg, op1_reg, op2_reg + break; + case IR_SUB: + | ASM_AVX_REG_REG_REG_OP vsubss, vsubsd, type, def_reg, op1_reg, op2_reg + break; + case IR_MUL: + | ASM_AVX_REG_REG_REG_OP vmulss, vmulsd, type, def_reg, op1_reg, op2_reg + break; + case IR_DIV: + | ASM_AVX_REG_REG_REG_OP vdivss, vdivsd, type, def_reg, op1_reg, op2_reg + break; + case IR_MIN: + | ASM_AVX_REG_REG_REG_OP vminss, vminsd, type, def_reg, op1_reg, op2_reg + break; + case IR_MAX: + | ASM_AVX_REG_REG_REG_OP vmaxss, vmaxsd, type, def_reg, op1_reg, op2_reg + break; + } + } else if (IR_IS_CONST_REF(op2)) { + ir_insn *val_insn = &ctx->ir_base[op2]; + int label = ctx->cfg_blocks_count - op2; + + val_insn->const_flags |= IR_CONST_EMIT; + switch (insn->op) { + default: + IR_ASSERT(0 && "NIY binary op"); + case IR_ADD: + | ASM_AVX_REG_REG_MEM_OP vaddss, vaddsd, type, def_reg, op1_reg, [=>label] + break; + case IR_SUB: + | ASM_AVX_REG_REG_MEM_OP vsubss, vsubsd, type, def_reg, op1_reg, [=>label] + break; + case IR_MUL: + | ASM_AVX_REG_REG_MEM_OP vmulss, vmulsd, type, def_reg, op1_reg, [=>label] + break; + case IR_DIV: + | ASM_AVX_REG_REG_MEM_OP vdivss, vdivsd, type, def_reg, op1_reg, [=>label] + break; + case IR_MIN: + | ASM_AVX_REG_REG_MEM_OP vminss, vminsd, type, def_reg, op1_reg, [=>label] + break; + case IR_MAX: + | ASM_AVX_REG_REG_MEM_OP vmaxss, vmaxsd, type, def_reg, op1_reg, [=>label] + break; + } + } else { + int32_t offset = 0; + + if (ir_rule(ctx, op2) & IR_FUSED) { + offset = ir_fuse_load(ctx, op2, &op2_reg); + } else { + offset = ir_ref_spill_slot(ctx, op2, &op2_reg); + } + switch (insn->op) { + default: + IR_ASSERT(0 && "NIY binary op"); + case IR_ADD: + | ASM_AVX_REG_REG_MEM_OP vaddss, vaddsd, type, def_reg, op1_reg, [Ra(op2_reg)+offset] + break; + case IR_SUB: + | ASM_AVX_REG_REG_MEM_OP vsubss, vsubsd, type, def_reg, op1_reg, [Ra(op2_reg)+offset] + break; + case IR_MUL: + | ASM_AVX_REG_REG_MEM_OP vmulss, vmulsd, type, def_reg, op1_reg, [Ra(op2_reg)+offset] + break; + case IR_DIV: + | ASM_AVX_REG_REG_MEM_OP vdivss, vdivsd, type, def_reg, op1_reg, [Ra(op2_reg)+offset] + break; + case IR_MIN: + | ASM_AVX_REG_REG_MEM_OP vminss, vminsd, type, def_reg, op1_reg, [Ra(op2_reg)+offset] + break; + case IR_MAX: + | ASM_AVX_REG_REG_MEM_OP vmaxss, vmaxsd, type, def_reg, op1_reg, [Ra(op2_reg)+offset] + break; + } + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, insn->type, def, def_reg); + } +} + +static void ir_emit_cmp_int_common(ir_ctx *ctx, ir_type type, ir_insn *insn, ir_reg op1_reg, ir_ref op1, ir_reg op2_reg, ir_ref op2) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + if (op1_reg != IR_REG_NONE) { + if (op2_reg != IR_REG_NONE) { + | ASM_REG_REG_OP cmp, type, op1_reg, op2_reg + } else if (IR_IS_CONST_REF(op2) && ctx->ir_base[op2].val.u64 == 0) { + | ASM_REG_REG_OP test, type, op1_reg, op1_reg + } else if (IR_IS_CONST_REF(op2)) { + ir_insn *val_insn = &ctx->ir_base[op2]; + + IR_ASSERT(IR_IS_32BIT(val_insn->type, val_insn->val)); + | ASM_REG_IMM_OP cmp, type, op1_reg, val_insn->val.i32 + } else { + int32_t offset = 0; + + if (ir_rule(ctx, op2) & IR_FUSED) { + offset = ir_fuse_load(ctx, op2, &op2_reg); + } else { + offset = ir_ref_spill_slot(ctx, op2, &op2_reg); + } + if (op2_reg != IR_REG_NONE) { + | ASM_REG_MEM_OP cmp, type, op1_reg, [Ra(op2_reg)+offset] + } else { + | ASM_REG_MEM_OP cmp, type, op1_reg, [offset] + } + } + } else if (IR_IS_CONST_REF(insn->op1)) { + IR_ASSERT(0); + } else { + int32_t offset = 0; + + if (ir_rule(ctx, insn->op1) & IR_FUSED) { + offset = ir_fuse_load(ctx, insn->op1, &op1_reg); + } else { + offset = ir_ref_spill_slot(ctx, insn->op1, &op1_reg); + } + if (op2_reg != IR_REG_NONE) { + if (op1_reg == IR_REG_NONE) { + | ASM_MEM_REG_OP cmp, type, [offset], op2_reg + } else { + | ASM_MEM_REG_OP cmp, type, [Ra(op1_reg)+offset], op2_reg + } + } else { + IR_ASSERT(!IR_IS_CONST_REF(op1)); + IR_ASSERT(IR_IS_CONST_REF(op2)); + IR_ASSERT(IR_IS_32BIT(ctx->ir_base[op2].type, ctx->ir_base[op2].val)); + if (op1_reg == IR_REG_NONE) { + | ASM_MEM_IMM_OP cmp, type, [offset], ctx->ir_base[op2].val.i32 + } else { + | ASM_MEM_IMM_OP cmp, type, [Ra(op1_reg)+offset], ctx->ir_base[op2].val.i32 + } + } + } +} + +static void _ir_emit_setcc_int(ir_ctx *ctx, uint8_t op, ir_reg def_reg) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + switch (op) { + default: + IR_ASSERT(0 && "NIY binary op"); + case IR_EQ: + | sete Rb(def_reg) + break; + case IR_NE: + | setne Rb(def_reg) + break; + case IR_LT: + | setl Rb(def_reg) + break; + case IR_GE: + | setge Rb(def_reg) + break; + case IR_LE: + | setle Rb(def_reg) + break; + case IR_GT: + | setg Rb(def_reg) + break; + case IR_ULT: + | setb Rb(def_reg) + break; + case IR_UGE: + | setae Rb(def_reg) + break; + case IR_ULE: + | setbe Rb(def_reg) + break; + case IR_UGT: + | seta Rb(def_reg) + break; + } +} + +static void ir_emit_cmp_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type type = ctx->ir_base[insn->op1].type; + ir_op op = insn->op; + ir_ref op1 = insn->op1; + ir_ref op2 = insn->op2; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + ir_reg op2_reg = ctx->regs[def][2]; + + IR_ASSERT(def_reg != IR_REG_NONE); + if (op1_reg != IR_REG_NONE && (IR_IS_CONST_REF(op1) || IR_REG_SPILLED(op1_reg))) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, type, op1_reg, op1); + } + if (op2_reg != IR_REG_NONE && (IR_IS_CONST_REF(op2) || IR_REG_SPILLED(op2_reg))) { + op2_reg = IR_REG_NUM(op2_reg); + if (op1 != op2) { + ir_emit_load(ctx, type, op2_reg, op2); + } + } + if (IR_IS_CONST_REF(op2) && ctx->ir_base[op2].val.u64 == 0) { + if (op == IR_ULT) { + /* always false */ + | xor Ra(def_reg), Ra(def_reg) + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, insn->type, def, def_reg); + } + return; + } else if (op == IR_UGE) { + /* always true */ + | ASM_REG_IMM_OP mov, insn->type, def_reg, 1 + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, insn->type, def, def_reg); + } + return; + } else if (op == IR_ULE) { + op = IR_EQ; + } else if (op == IR_UGT) { + op = IR_NE; + } + } + ir_emit_cmp_int_common(ctx, type, insn, op1_reg, op1, op2_reg, op2); + _ir_emit_setcc_int(ctx, op, def_reg); + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, insn->type, def, def_reg); + } +} + +static void ir_emit_test_int_common(ir_ctx *ctx, ir_ref ref, ir_op op) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_insn *binop_insn = &ctx->ir_base[ref]; + ir_type type = binop_insn->type; + ir_ref op1 = binop_insn->op1; + ir_ref op2 = binop_insn->op2; + ir_reg op1_reg = ctx->regs[ref][1]; + ir_reg op2_reg = ctx->regs[ref][2]; + + IR_ASSERT(binop_insn->op == IR_AND); + if (op1_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, type, op1_reg, op1); + } + if (op2_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(op2)) { + op2_reg = IR_REG_NUM(op2_reg); + if (op1 != op2) { + ir_emit_load(ctx, type, op2_reg, op2); + } + } + | ASM_REG_REG_OP test, type, op1_reg, op2_reg + } else if (IR_IS_CONST_REF(op2)) { + ir_insn *val_insn = &ctx->ir_base[op2]; + int32_t val; + + IR_ASSERT(IR_IS_32BIT(val_insn->type, val_insn->val)); + val = val_insn->val.i32; + if ((op == IR_EQ || op == IR_NE) && val == 0xff && (sizeof(void*) == 8 || op1_reg <= IR_REG_R3)) { + | test Rb(op1_reg), Rb(op1_reg) + } else if ((op == IR_EQ || op == IR_NE) && val == 0xff00 && op1_reg <= IR_REG_R3) { + if (op1_reg == IR_REG_RAX) { + | test ah, ah + } else if (op1_reg == IR_REG_RBX) { + | test bh, bh + } else if (op1_reg == IR_REG_RCX) { + | test ch, ch + } else if (op1_reg == IR_REG_RDX) { + | test dh, dh + } else { + IR_ASSERT(0); + } + } else if ((op == IR_EQ || op == IR_NE) && val == 0xffff) { + | test Rw(op1_reg), Rw(op1_reg) + } else if ((op == IR_EQ || op == IR_NE) && val == -1) { + | test Rd(op1_reg), Rd(op1_reg) + } else { + | ASM_REG_IMM_OP test, type, op1_reg, val + } + } else { + int32_t offset = 0; + + if (ir_rule(ctx, op2) & IR_FUSED) { + offset = ir_fuse_load(ctx, op2, &op2_reg); + } else { + offset = ir_ref_spill_slot(ctx, op2, &op2_reg); + } + | ASM_REG_MEM_OP test, type, op1_reg, [Ra(op2_reg)+offset] + } + } else if (IR_IS_CONST_REF(op1)) { + IR_ASSERT(0); + } else { + int32_t offset = 0; + + if (ir_rule(ctx, op1) & IR_FUSED) { + offset = ir_fuse_load(ctx, op1, &op1_reg); + } else { + offset = ir_ref_spill_slot(ctx, op1, &op1_reg); + } + if (op2_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(op2)) { + op2_reg = IR_REG_NUM(op2_reg); + if (op1 != op2) { + ir_emit_load(ctx, type, op2_reg, op2); + } + } + if (op1_reg == IR_REG_NONE) { + | ASM_MEM_REG_OP test, type, [offset], op2_reg + } else { + | ASM_MEM_REG_OP test, type, [Ra(op1_reg)+offset], op2_reg + } + } else { + IR_ASSERT(!IR_IS_CONST_REF(op1)); + IR_ASSERT(IR_IS_CONST_REF(op2)); + IR_ASSERT(IR_IS_32BIT(ctx->ir_base[op2].type, ctx->ir_base[op2].val)); + if (op1_reg == IR_REG_NONE) { + | ASM_MEM_IMM_OP test, type, [offset], ctx->ir_base[op2].val.i32 + } else { + | ASM_MEM_IMM_OP test, type, [Ra(op1_reg)+offset], ctx->ir_base[op2].val.i32 + } + } + } +} + +static void ir_emit_testcc_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + + IR_ASSERT(def_reg != IR_REG_NONE); + ir_emit_test_int_common(ctx, insn->op1, insn->op); + _ir_emit_setcc_int(ctx, insn->op, def_reg); + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, insn->type, def, def_reg); + } +} + +static void ir_emit_setcc_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + + IR_ASSERT(def_reg != IR_REG_NONE); + _ir_emit_setcc_int(ctx, insn->op, def_reg); + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, insn->type, def, def_reg); + } +} + +static ir_op ir_emit_cmp_fp_common(ir_ctx *ctx, ir_ref cmp_ref, ir_insn *cmp_insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type type = ctx->ir_base[cmp_insn->op1].type; + ir_op op = cmp_insn->op; + ir_ref op1, op2; + ir_reg op1_reg, op2_reg; + + op1 = cmp_insn->op1; + op2 = cmp_insn->op2; + op1_reg = ctx->regs[cmp_ref][1]; + op2_reg = ctx->regs[cmp_ref][2]; + + if (op1_reg == IR_REG_NONE && op2_reg != IR_REG_NONE && (op == IR_EQ || op == IR_NE)) { + ir_ref tmp; + ir_reg tmp_reg; + + tmp = op1; + op1 = op2; + op2 = tmp; + tmp_reg = op1_reg; + op1_reg = op2_reg; + op2_reg = tmp_reg; + } + + + IR_ASSERT(op1_reg != IR_REG_NONE); + if (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(op1)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, type, op1_reg, op1); + } + if (op2_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(op2)) { + op2_reg = IR_REG_NUM(op2_reg); + if (op1 != op2) { + ir_emit_load(ctx, type, op2_reg, op2); + } + } + | ASM_FP_REG_REG_OP ucomiss, ucomisd, vucomiss, vucomisd, type, op1_reg, op2_reg + } else if (IR_IS_CONST_REF(op2)) { + ir_insn *val_insn = &ctx->ir_base[op2]; + int label = ctx->cfg_blocks_count - op2; + + val_insn->const_flags |= IR_CONST_EMIT; + | ASM_FP_REG_MEM_OP ucomiss, ucomisd, vucomiss, vucomisd, type, op1_reg, [=>label] + } else { + int32_t offset = 0; + + if (ir_rule(ctx, op2) & IR_FUSED) { + offset = ir_fuse_load(ctx, op2, &op2_reg); + } else { + offset = ir_ref_spill_slot(ctx, op2, &op2_reg); + } + | ASM_FP_REG_MEM_OP ucomiss, ucomisd, vucomiss, vucomisd, type, op1_reg, [Ra(op2_reg)+offset] + } + return op; +} + +static void ir_emit_cmp_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_op op = ir_emit_cmp_fp_common(ctx, def, insn); + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg tmp_reg = ctx->regs[def][3]; + + IR_ASSERT(def_reg != IR_REG_NONE); + switch (op) { + default: + IR_ASSERT(0 && "NIY binary op"); + case IR_EQ: + | setnp Rb(def_reg) + | mov Rd(tmp_reg), 0 + | cmovne Rd(def_reg), Rd(tmp_reg) + break; + case IR_NE: + | setp Rb(def_reg) + | mov Rd(tmp_reg), 1 + | cmovne Rd(def_reg), Rd(tmp_reg) + break; + case IR_LT: + | setnp Rb(def_reg) + | mov Rd(tmp_reg), 0 + | cmovae Rd(def_reg), Rd(tmp_reg) + break; + case IR_GE: + | setae Rb(def_reg) + break; + case IR_LE: + | setnp Rb(def_reg) + | mov Rd(tmp_reg), 0 + | cmova Rd(def_reg), Rd(tmp_reg) + break; + case IR_GT: + | seta Rb(def_reg) + break; + case IR_ULT: + | setb Rb(def_reg) + break; + case IR_UGE: + | setnp Rb(def_reg) + | mov Rd(tmp_reg), 0 + | cmovb Rd(def_reg), Rd(tmp_reg) + break; + case IR_ULE: + | setbe Rb(def_reg) + break; + case IR_UGT: + | setnp Rb(def_reg) + | mov Rd(tmp_reg), 0 + | cmovbe Rd(def_reg), Rd(tmp_reg) + break; + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, insn->type, def, def_reg); + } +} + +static void ir_emit_jmp_true(ir_ctx *ctx, uint32_t b, ir_ref def) +{ + uint32_t true_block, false_block, next_block; + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + ir_get_true_false_blocks(ctx, b, &true_block, &false_block, &next_block); + if (true_block != next_block) { + | jmp =>true_block + } +} + +static void ir_emit_jmp_false(ir_ctx *ctx, uint32_t b, ir_ref def) +{ + uint32_t true_block, false_block, next_block; + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + ir_get_true_false_blocks(ctx, b, &true_block, &false_block, &next_block); + if (false_block != next_block) { + | jmp =>false_block + } +} + +static void ir_emit_jcc(ir_ctx *ctx, uint8_t op, uint32_t b, ir_ref def, ir_insn *insn, bool int_cmp) +{ + uint32_t true_block, false_block, next_block; + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + bool swap = 0; + + ir_get_true_false_blocks(ctx, b, &true_block, &false_block, &next_block); + if (true_block == next_block) { + /* swap to avoid unconditional JMP */ + op ^= 1; // reverse + true_block = false_block; + false_block = 0; + swap = 1; + } else if (false_block == next_block) { + false_block = 0; + } + + if (int_cmp) { + switch (op) { + default: + IR_ASSERT(0 && "NIY binary op"); + case IR_EQ: + | je =>true_block + break; + case IR_NE: + | jne =>true_block + break; + case IR_LT: + | jl =>true_block + break; + case IR_GE: + | jge =>true_block + break; + case IR_LE: + | jle =>true_block + break; + case IR_GT: + | jg =>true_block + break; + case IR_ULT: + | jb =>true_block + break; + case IR_UGE: + | jae =>true_block + break; + case IR_ULE: + | jbe =>true_block + break; + case IR_UGT: + | ja =>true_block + break; + } + } else { + switch (op) { + default: + IR_ASSERT(0 && "NIY binary op"); + case IR_EQ: + if (!false_block) { + | jp >1 + | je =>true_block + |1: + } else { + | jp =>false_block + | je =>true_block + } + break; + case IR_NE: + | jne =>true_block + | jp =>true_block + break; + case IR_LT: + if (swap) { + | jb =>true_block + } else if (!false_block) { + | jp >1 + | jb =>true_block + |1: + } else { + | jp =>false_block + | jb =>true_block + } + break; + case IR_GE: + if (swap) { + | jp =>true_block + } + | jae =>true_block + break; + case IR_LE: + if (swap) { + | jbe =>true_block + } else if (!false_block) { + | jp >1 + | jbe =>true_block + |1: + } else { + | jp =>false_block + | jbe =>true_block + } + break; + case IR_GT: + if (swap) { + | jp =>true_block + } + | ja =>true_block + break; +// + case IR_ULT: + if (swap) { + | jp =>true_block + } + | jb =>true_block + break; + case IR_UGE: + if (swap) { + | jae =>true_block + } else if (!false_block) { + | jp >1 + | jae =>true_block + |1: + } else { + | jp =>false_block + | jae =>true_block + } + break; + case IR_ULE: + if (swap) { + | jp =>true_block + } + | jbe =>true_block + break; + case IR_UGT: + if (swap) { + | ja =>true_block + } else if (!false_block) { + | jp >1 + | ja =>true_block + |1: + } else { + | jp =>false_block + | ja =>true_block + } + break; + } + } + if (false_block) { + | jmp =>false_block + } +} + +static void ir_emit_cmp_and_branch_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) +{ + ir_insn *cmp_insn = &ctx->ir_base[insn->op2]; + ir_op op = cmp_insn->op; + ir_type type = ctx->ir_base[cmp_insn->op1].type; + ir_ref op1 = cmp_insn->op1; + ir_ref op2 = cmp_insn->op2; + ir_reg op1_reg = ctx->regs[insn->op2][1]; + ir_reg op2_reg = ctx->regs[insn->op2][2]; + + if (op1_reg != IR_REG_NONE && (IR_IS_CONST_REF(op1) || IR_REG_SPILLED(op1_reg))) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, type, op1_reg, op1); + } + if (op2_reg != IR_REG_NONE && (IR_IS_CONST_REF(op2) || IR_REG_SPILLED(op2_reg))) { + op2_reg = IR_REG_NUM(op2_reg); + if (op1 != op2) { + ir_emit_load(ctx, type, op2_reg, op2); + } + } + if (IR_IS_CONST_REF(op2) && ctx->ir_base[op2].val.u64 == 0) { + if (op == IR_ULT) { + /* always false */ + ir_emit_jmp_false(ctx, b, def); + return; + } else if (op == IR_UGE) { + /* always true */ + ir_emit_jmp_true(ctx, b, def); + return; + } else if (op == IR_ULE) { + op = IR_EQ; + } else if (op == IR_UGT) { + op = IR_NE; + } + } + + bool same_comparison = 0; + ir_insn *prev_insn = &ctx->ir_base[insn->op1]; + if (prev_insn->op == IR_IF_TRUE || prev_insn->op == IR_IF_FALSE) { + if (ir_rule(ctx, prev_insn->op1) == IR_CMP_AND_BRANCH_INT) { + prev_insn = &ctx->ir_base[prev_insn->op1]; + prev_insn = &ctx->ir_base[prev_insn->op2]; + if (prev_insn->op1 == cmp_insn->op1 && prev_insn->op2 == cmp_insn->op2) { + same_comparison = true; + } + } + } + if (!same_comparison) { + ir_emit_cmp_int_common(ctx, type, cmp_insn, op1_reg, op1, op2_reg, op2); + } + ir_emit_jcc(ctx, op, b, def, insn, 1); +} + +static void ir_emit_test_and_branch_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) +{ + ir_ref op2 = insn->op2; + ir_op op = ctx->ir_base[op2].op; + + if (op >= IR_EQ && op <= IR_UGT) { + op2 = ctx->ir_base[op2].op1; + } else { + IR_ASSERT(op == IR_AND); + op = IR_NE; + } + + ir_emit_test_int_common(ctx, op2, op); + ir_emit_jcc(ctx, op, b, def, insn, 1); +} + +static void ir_emit_cmp_and_branch_fp(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) +{ + ir_op op = ir_emit_cmp_fp_common(ctx, insn->op2, &ctx->ir_base[insn->op2]); + ir_emit_jcc(ctx, op, b, def, insn, 0); +} + +static void ir_emit_if_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) +{ + ir_type type = ctx->ir_base[insn->op2].type; + ir_reg op2_reg = ctx->regs[def][2]; + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + if (op2_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, type, op2_reg, insn->op2); + } + | ASM_REG_REG_OP test, type, op2_reg, op2_reg + } else if (IR_IS_CONST_REF(insn->op2)) { + uint32_t true_block, false_block, next_block; + + ir_get_true_false_blocks(ctx, b, &true_block, &false_block, &next_block); + if (ir_const_is_true(&ctx->ir_base[insn->op2])) { + if (true_block != next_block) { + | jmp =>true_block + } + } else { + if (false_block != next_block) { + | jmp =>false_block + } + } + return; + } else { + int32_t offset = 0; + + if (ir_rule(ctx, insn->op2) & IR_FUSED) { + offset = ir_fuse_load(ctx, insn->op2, &op2_reg); + } else { + offset = ir_ref_spill_slot(ctx, insn->op2, &op2_reg); + } + if (op2_reg == IR_REG_NONE) { + | ASM_MEM_IMM_OP cmp, type, [offset], 0 + } else { + | ASM_MEM_IMM_OP cmp, type, [Ra(op2_reg)+offset], 0 + } + } + ir_emit_jcc(ctx, IR_NE, b, def, insn, 1); +} + +static void ir_emit_cond(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type type = insn->type; + ir_ref op1 = insn->op1; + ir_ref op2 = insn->op2; + ir_ref op3 = insn->op3; + ir_type op1_type = ctx->ir_base[op1].type; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + ir_reg op2_reg = ctx->regs[def][2]; + ir_reg op3_reg = ctx->regs[def][3]; + + IR_ASSERT(def_reg != IR_REG_NONE); + + if (op2_reg != IR_REG_NONE && (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(op2))) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, type, op2_reg, op2); + if (op1 == op2) { + op1_reg = op2_reg; + } + if (op3 == op2) { + op3_reg = op2_reg; + } + } + if (op3_reg != IR_REG_NONE && op3 != op2 && (IR_REG_SPILLED(op3_reg) || IR_IS_CONST_REF(op3))) { + op3_reg = IR_REG_NUM(op3_reg); + ir_emit_load(ctx, type, op3_reg, op3); + if (op1 == op2) { + op1_reg = op3_reg; + } + } + if (op1_reg != IR_REG_NONE && op1 != op2 && op1 != op3 && (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(op1))) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, op1_type, op1_reg, op1); + } + + if (IR_IS_TYPE_INT(op1_type)) { + if (op1_reg != IR_REG_NONE) { + | ASM_REG_REG_OP test, op1_type, op1_reg, op1_reg + } else { + ir_reg fp; + int32_t offset = ir_ref_spill_slot(ctx, op1, &fp); + + | ASM_MEM_IMM_OP cmp, op1_type, [Ra(fp)+offset], 0 + } + | je >2 + } else { + if (!data->double_zero_const) { + data->double_zero_const = 1; + ir_rodata(ctx); + |.align 16 + |->double_zero_const: + |.dword 0, 0 + |.code + } + | ASM_FP_REG_MEM_OP ucomiss, ucomisd, vucomiss, vucomisd, op1_type, op1_reg, [->double_zero_const] + | jp >1 + | je >2 + |1: + } + + if (op2_reg != IR_REG_NONE) { + if (def_reg != op2_reg) { + if (IR_IS_TYPE_INT(type)) { + ir_emit_mov(ctx, type, def_reg, op2_reg); + } else { + ir_emit_fp_mov(ctx, type, def_reg, op2_reg); + } + } + } else if (IR_IS_CONST_REF(op2) || !(ir_rule(ctx, op2) & IR_FUSED)) { + ir_emit_load(ctx, type, def_reg, op2); + } else { + int32_t offset = ir_fuse_load(ctx, op2, &op2_reg); + + if (IR_IS_TYPE_INT(type)) { + ir_emit_load_mem_int(ctx, type, def_reg, op2_reg, offset); + } else { + ir_emit_load_mem_fp(ctx, type, def_reg, op2_reg, offset); + } + } + | jmp >3 + |2: + if (op3_reg != IR_REG_NONE) { + if (def_reg != op3_reg) { + if (IR_IS_TYPE_INT(type)) { + ir_emit_mov(ctx, type, def_reg, op3_reg); + } else { + ir_emit_fp_mov(ctx, type, def_reg, op3_reg); + } + } + } else if (IR_IS_CONST_REF(op3) || !(ir_rule(ctx, op3) & IR_FUSED)) { + ir_emit_load(ctx, type, def_reg, op3); + } else { + int32_t offset = ir_fuse_load(ctx, op3, &op3_reg); + + if (IR_IS_TYPE_INT(type)) { + ir_emit_load_mem_int(ctx, type, def_reg, op3_reg, offset); + } else { + ir_emit_load_mem_fp(ctx, type, def_reg, op3_reg, offset); + } + } + |3: + + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } +} + +static void ir_emit_return_void(ir_ctx *ctx) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + ir_emit_epilogue(ctx); + +#ifdef IR_TARGET_X86 + if (sizeof(void*) == 4 && (ctx->flags & IR_FASTCALL_FUNC) && ctx->param_stack_size) { + | ret ctx->param_stack_size + return; + } +#endif + + | ret +} + +static void ir_emit_return_int(ir_ctx *ctx, ir_ref ref, ir_insn *insn) +{ + ir_reg op2_reg = ctx->regs[ref][2]; + + if (op2_reg != IR_REG_INT_RET1) { + ir_type type = ctx->ir_base[insn->op2].type; + + if (op2_reg != IR_REG_NONE && !IR_REG_SPILLED(op2_reg)) { + ir_emit_mov(ctx, type, IR_REG_INT_RET1, op2_reg); + } else { + ir_emit_load(ctx, type, IR_REG_INT_RET1, insn->op2); + } + } + ir_emit_return_void(ctx); +} + +static void ir_emit_return_fp(ir_ctx *ctx, ir_ref ref, ir_insn *insn) +{ + ir_reg op2_reg = ctx->regs[ref][2]; + ir_type type = ctx->ir_base[insn->op2].type; + +#ifdef IR_REG_FP_RET1 + if (op2_reg != IR_REG_FP_RET1) { + if (op2_reg != IR_REG_NONE && !IR_REG_SPILLED(op2_reg)) { + ir_emit_fp_mov(ctx, type, IR_REG_FP_RET1, op2_reg); + } else { + ir_emit_load(ctx, type, IR_REG_FP_RET1, insn->op2); + } + } +#else + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + if (op2_reg == IR_REG_NONE || IR_REG_SPILLED(op2_reg)) { + ir_reg fp; + int32_t offset = ir_ref_spill_slot(ctx, insn->op2, &fp); + + if (type == IR_DOUBLE) { + | fld qword [Ra(fp)+offset] + } else { + IR_ASSERT(type == IR_FLOAT); + | fld dword [Ra(fp)+offset] + } + } else { + int32_t offset = ctx->ret_slot; + ir_reg fp; + + IR_ASSERT(offset != -1); + offset = IR_SPILL_POS_TO_OFFSET(offset); + fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + ir_emit_store_mem_fp(ctx, type, fp, offset, op2_reg); + if (type == IR_DOUBLE) { + | fld qword [Ra(fp)+offset] + } else { + IR_ASSERT(type == IR_FLOAT); + | fld dword [Ra(fp)+offset] + } + } +#endif + ir_emit_return_void(ctx); +} + +static void ir_emit_sext(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_type dst_type = insn->type; + ir_type src_type = ctx->ir_base[insn->op1].type; + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + + IR_ASSERT(IR_IS_TYPE_INT(src_type)); + IR_ASSERT(IR_IS_TYPE_INT(dst_type)); + IR_ASSERT(ir_type_size[dst_type] > ir_type_size[src_type]); + IR_ASSERT(def_reg != IR_REG_NONE); + + if (op1_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, src_type, op1_reg, insn->op1); + } + if (ir_type_size[src_type] == 1) { + if (ir_type_size[dst_type] == 2) { + | movsx Rw(def_reg), Rb(op1_reg) + } else if (ir_type_size[dst_type] == 4) { + | movsx Rd(def_reg), Rb(op1_reg) + } else { + IR_ASSERT(ir_type_size[dst_type] == 8); + IR_ASSERT(sizeof(void*) == 8); +|.if X64 + | movsx Rq(def_reg), Rb(op1_reg) +|.endif + } + } else if (ir_type_size[src_type] == 2) { + if (ir_type_size[dst_type] == 4) { + | movsx Rd(def_reg), Rw(op1_reg) + } else { + IR_ASSERT(ir_type_size[dst_type] == 8); + IR_ASSERT(sizeof(void*) == 8); +|.if X64 + | movsx Rq(def_reg), Rw(op1_reg) +|.endif + } + } else { + IR_ASSERT(ir_type_size[src_type] == 4); + IR_ASSERT(ir_type_size[dst_type] == 8); + IR_ASSERT(sizeof(void*) == 8); +|.if X64 + | movsxd Rq(def_reg), Rd(op1_reg) +|.endif + } + } else if (IR_IS_CONST_REF(insn->op1)) { + IR_ASSERT(0); + } else { + int32_t offset = 0; + + if (ir_rule(ctx, insn->op1) & IR_FUSED) { + offset = ir_fuse_load(ctx, insn->op1, &op1_reg); + } else { + offset = ir_ref_spill_slot(ctx, insn->op1, &op1_reg); + } + + if (ir_type_size[src_type] == 1) { + if (ir_type_size[dst_type] == 2) { + if (op1_reg != IR_REG_NONE) { + | movsx Rw(def_reg), byte [Ra(op1_reg)+offset] + } else { + | movsx Rw(def_reg), byte [offset] + } + } else if (ir_type_size[dst_type] == 4) { + if (op1_reg != IR_REG_NONE) { + | movsx Rd(def_reg), byte [Ra(op1_reg)+offset] + } else { + | movsx Rd(def_reg), byte [offset] + } + } else { + IR_ASSERT(ir_type_size[dst_type] == 8); + IR_ASSERT(sizeof(void*) == 8); +|.if X64 + if (op1_reg != IR_REG_NONE) { + | movsx Rq(def_reg), byte [Ra(op1_reg)+offset] + } else { + | movsx Rq(def_reg), byte [offset] + } +|.endif + } + } else if (ir_type_size[src_type] == 2) { + if (ir_type_size[dst_type] == 4) { + if (op1_reg != IR_REG_NONE) { + | movsx Rd(def_reg), word [Ra(op1_reg)+offset] + } else { + | movsx Rd(def_reg), word [offset] + } + } else { + IR_ASSERT(ir_type_size[dst_type] == 8); + IR_ASSERT(sizeof(void*) == 8); +|.if X64 + if (op1_reg != IR_REG_NONE) { + | movsx Rq(def_reg), word [Ra(op1_reg)+offset] + } else { + | movsx Rq(def_reg), word [offset] + } +|.endif + } + } else { + IR_ASSERT(ir_type_size[src_type] == 4); + IR_ASSERT(ir_type_size[dst_type] == 8); + IR_ASSERT(sizeof(void*) == 8); +|.if X64 + if (op1_reg != IR_REG_NONE) { + | movsxd Rq(def_reg), dword [Ra(op1_reg)+offset] + } else { + | movsxd Rq(def_reg), dword [offset] + } +|.endif + } + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, dst_type, def, def_reg); + } +} + +static void ir_emit_zext(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_type dst_type = insn->type; + ir_type src_type = ctx->ir_base[insn->op1].type; + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + + IR_ASSERT(IR_IS_TYPE_INT(src_type)); + IR_ASSERT(IR_IS_TYPE_INT(dst_type)); + IR_ASSERT(ir_type_size[dst_type] > ir_type_size[src_type]); + IR_ASSERT(def_reg != IR_REG_NONE); + + if (op1_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, src_type, op1_reg, insn->op1); + } + if (ir_type_size[src_type] == 1) { + if (ir_type_size[dst_type] == 2) { + | movzx Rw(def_reg), Rb(op1_reg) + } else if (ir_type_size[dst_type] == 4) { + | movzx Rd(def_reg), Rb(op1_reg) + } else { + IR_ASSERT(ir_type_size[dst_type] == 8); + IR_ASSERT(sizeof(void*) == 8); +|.if X64 + | movzx Rq(def_reg), Rb(op1_reg) +|.endif + } + } else if (ir_type_size[src_type] == 2) { + if (ir_type_size[dst_type] == 4) { + | movzx Rd(def_reg), Rw(op1_reg) + } else { + IR_ASSERT(ir_type_size[dst_type] == 8); + IR_ASSERT(sizeof(void*) == 8); +|.if X64 + | movzx Rq(def_reg), Rw(op1_reg) +|.endif + } + } else { + IR_ASSERT(ir_type_size[src_type] == 4); + IR_ASSERT(ir_type_size[dst_type] == 8); + IR_ASSERT(sizeof(void*) == 8); +|.if X64 + /* Avoid zero extension to the same register. This may be not always safe ??? */ + if (op1_reg != def_reg) { + | mov Rd(def_reg), Rd(op1_reg) + } +|.endif + } + } else if (IR_IS_CONST_REF(insn->op1)) { + IR_ASSERT(0); + } else { + int32_t offset = 0; + + if (ir_rule(ctx, insn->op1) & IR_FUSED) { + offset = ir_fuse_load(ctx, insn->op1, &op1_reg); + } else { + offset = ir_ref_spill_slot(ctx, insn->op1, &op1_reg); + } + + if (ir_type_size[src_type] == 1) { + if (ir_type_size[dst_type] == 2) { + if (op1_reg != IR_REG_NONE) { + | movzx Rw(def_reg), byte [Ra(op1_reg)+offset] + } else { + | movzx Rw(def_reg), byte [offset] + } + } else if (ir_type_size[dst_type] == 4) { + if (op1_reg != IR_REG_NONE) { + | movzx Rd(def_reg), byte [Ra(op1_reg)+offset] + } else { + | movzx Rd(def_reg), byte [offset] + } + } else { + IR_ASSERT(ir_type_size[dst_type] == 8); + IR_ASSERT(sizeof(void*) == 8); +|.if X64 + if (op1_reg != IR_REG_NONE) { + | movzx Rq(def_reg), byte [Ra(op1_reg)+offset] + } else { + | movzx Rq(def_reg), byte [offset] + } +|.endif + } + } else if (ir_type_size[src_type] == 2) { + if (ir_type_size[dst_type] == 4) { + if (op1_reg != IR_REG_NONE) { + | movzx Rd(def_reg), word [Ra(op1_reg)+offset] + } else { + | movzx Rd(def_reg), word [offset] + } + } else { + IR_ASSERT(ir_type_size[dst_type] == 8); + IR_ASSERT(sizeof(void*) == 8); +|.if X64 + if (op1_reg != IR_REG_NONE) { + | movzx Rq(def_reg), word [Ra(op1_reg)+offset] + } else { + | movzx Rq(def_reg), word [offset] + } +|.endif + } + } else { + IR_ASSERT(ir_type_size[src_type] == 4); + IR_ASSERT(ir_type_size[dst_type] == 8); +|.if X64 + if (op1_reg != IR_REG_NONE) { + | mov Rd(def_reg), dword [Ra(op1_reg)+offset] + } else { + | mov Rd(def_reg), dword [offset] + } +|.endif + } + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, dst_type, def, def_reg); + } +} + +static void ir_emit_trunc(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_type dst_type = insn->type; + ir_type src_type = ctx->ir_base[insn->op1].type; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + + IR_ASSERT(IR_IS_TYPE_INT(src_type)); + IR_ASSERT(IR_IS_TYPE_INT(dst_type)); + IR_ASSERT(ir_type_size[dst_type] < ir_type_size[src_type]); + IR_ASSERT(def_reg != IR_REG_NONE); + if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, src_type, op1_reg, insn->op1); + } + if (op1_reg != IR_REG_NONE) { + if (op1_reg != def_reg) { + ir_emit_mov(ctx, dst_type, def_reg, op1_reg); + } + } else { + ir_emit_load(ctx, dst_type, def_reg, insn->op1); + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, dst_type, def, def_reg); + } +} + +static void ir_emit_bitcast(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_type dst_type = insn->type; + ir_type src_type = ctx->ir_base[insn->op1].type; + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + int32_t offset; + + IR_ASSERT(ir_type_size[dst_type] == ir_type_size[src_type]); + IR_ASSERT(def_reg != IR_REG_NONE); + if (IR_IS_TYPE_INT(src_type) && IR_IS_TYPE_INT(dst_type)) { + if (ir_rule(ctx, insn->op1) & IR_FUSED) { + offset = ir_fuse_load(ctx, insn->op1, &op1_reg); + ir_emit_load_mem_int(ctx, dst_type, def_reg, op1_reg, offset); + } else if (op1_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, src_type, op1_reg, insn->op1); + } + if (op1_reg != def_reg) { + ir_emit_mov(ctx, dst_type, def_reg, op1_reg); + } + } else { + ir_emit_load(ctx, dst_type, def_reg, insn->op1); + } + } else if (IR_IS_TYPE_FP(src_type) && IR_IS_TYPE_FP(dst_type)) { + if (ir_rule(ctx, insn->op1) & IR_FUSED) { + offset = ir_fuse_load(ctx, insn->op1, &op1_reg); + ir_emit_load_mem_fp(ctx, dst_type, def_reg, op1_reg, offset); + } else if (op1_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, src_type, op1_reg, insn->op1); + } + if (op1_reg != def_reg) { + ir_emit_fp_mov(ctx, dst_type, def_reg, op1_reg); + } + } else { + ir_emit_load(ctx, dst_type, def_reg, insn->op1); + } + } else if (IR_IS_TYPE_FP(src_type)) { + IR_ASSERT(IR_IS_TYPE_INT(dst_type)); + if (op1_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, src_type, op1_reg, insn->op1); + } + if (src_type == IR_DOUBLE) { + IR_ASSERT(sizeof(void*) == 8); +|.if X64 + if (ctx->mflags & IR_X86_AVX) { + | vmovd Rq(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) + } else { + | movd Rq(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) + } +|.endif + } else { + IR_ASSERT(src_type == IR_FLOAT); + if (ctx->mflags & IR_X86_AVX) { + | vmovd Rd(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) + } else { + | movd Rd(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) + } + } + } else if (IR_IS_CONST_REF(insn->op1)) { + ir_insn *_insn = &ctx->ir_base[insn->op1]; + if (src_type == IR_DOUBLE) { + IR_ASSERT(sizeof(void*) == 8); +|.if X64 + | mov64 Rq(def_reg), _insn->val.i64 +|.endif + } else { + IR_ASSERT(src_type == IR_FLOAT); + | mov Rd(def_reg), _insn->val.i32 + } + } else { + int32_t offset = 0; + + if (ir_rule(ctx, insn->op1) & IR_FUSED) { + offset = ir_fuse_load(ctx, insn->op1, &op1_reg); + } else { + offset = ir_ref_spill_slot(ctx, insn->op1, &op1_reg); + } + + if (src_type == IR_DOUBLE) { + IR_ASSERT(sizeof(void*) == 8); +|.if X64 + | mov Rq(def_reg), qword [Ra(op1_reg)+offset] +|.endif + } else { + IR_ASSERT(src_type == IR_FLOAT); + | mov Rd(def_reg), dword [Ra(op1_reg)+offset] + } + } + } else if (IR_IS_TYPE_FP(dst_type)) { + IR_ASSERT(IR_IS_TYPE_INT(src_type)); + if (op1_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, src_type, op1_reg, insn->op1); + } + if (dst_type == IR_DOUBLE) { + IR_ASSERT(sizeof(void*) == 8); +|.if X64 + if (ctx->mflags & IR_X86_AVX) { + | vmovd xmm(def_reg-IR_REG_FP_FIRST), Rq(op1_reg) + } else { + | movd xmm(def_reg-IR_REG_FP_FIRST), Rq(op1_reg) + } +|.endif + } else { + IR_ASSERT(dst_type == IR_FLOAT); + if (ctx->mflags & IR_X86_AVX) { + | vmovd xmm(def_reg-IR_REG_FP_FIRST), Rd(op1_reg) + } else { + | movd xmm(def_reg-IR_REG_FP_FIRST), Rd(op1_reg) + } + } + } else if (IR_IS_CONST_REF(insn->op1)) { + ir_insn *val_insn = &ctx->ir_base[insn->op1]; + int label = ctx->cfg_blocks_count - insn->op1; + + val_insn->const_flags |= IR_CONST_EMIT; + | ASM_FP_REG_MEM_OP movss, movsd, vmovss, vmovsd, dst_type, def_reg, [=>label] + } else { + int32_t offset = 0; + + if (ir_rule(ctx, insn->op1) & IR_FUSED) { + offset = ir_fuse_load(ctx, insn->op1, &op1_reg); + } else { + offset = ir_ref_spill_slot(ctx, insn->op1, &op1_reg); + } + + | ASM_FP_REG_MEM_OP movss, movsd, vmovss, vmovsd, dst_type, def_reg, [Ra(op1_reg)+offset] + } + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, dst_type, def, def_reg); + } +} + +static void ir_emit_int2fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_type dst_type = insn->type; + ir_type src_type = ctx->ir_base[insn->op1].type; + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + bool src64 = 0; + + IR_ASSERT(IR_IS_TYPE_INT(src_type)); + IR_ASSERT(IR_IS_TYPE_FP(dst_type)); + IR_ASSERT(def_reg != IR_REG_NONE); + if (IR_IS_TYPE_SIGNED(src_type) ? ir_type_size[src_type] == 8 : ir_type_size[src_type] >= 4) { + // TODO: we might need to perform sign/zero integer extension to 32/64 bit integer + src64 = 1; + } + if (op1_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(insn->op1)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, src_type, op1_reg, insn->op1); + } + if (!src64) { + if (dst_type == IR_DOUBLE) { + if (ctx->mflags & IR_X86_AVX) { + | vxorps xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) + | vcvtsi2sd xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), Rd(op1_reg) + } else { + | pxor xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) + | cvtsi2sd xmm(def_reg-IR_REG_FP_FIRST), Rd(op1_reg) + } + } else { + IR_ASSERT(dst_type == IR_FLOAT); + if (ctx->mflags & IR_X86_AVX) { + | vxorps xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) + | vcvtsi2ss xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), Rd(op1_reg) + } else { + | pxor xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) + | cvtsi2ss xmm(def_reg-IR_REG_FP_FIRST), Rd(op1_reg) + } + } + } else { + IR_ASSERT(sizeof(void*) == 8); +|.if X64 + if (dst_type == IR_DOUBLE) { + if (ctx->mflags & IR_X86_AVX) { + | vxorps xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) + | vcvtsi2sd xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), Rq(op1_reg) + } else { + | pxor xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) + | cvtsi2sd xmm(def_reg-IR_REG_FP_FIRST), Rq(op1_reg) + } + } else { + IR_ASSERT(dst_type == IR_FLOAT); + if (ctx->mflags & IR_X86_AVX) { + | vxorps xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) + | vcvtsi2ss xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), Rq(op1_reg) + } else { + | pxor xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) + | cvtsi2ss xmm(def_reg-IR_REG_FP_FIRST), Rq(op1_reg) + } + } +|.endif + } + } else { + int32_t offset = 0; + + if (ir_rule(ctx, insn->op1) & IR_FUSED) { + offset = ir_fuse_load(ctx, insn->op1, &op1_reg); + } else { + offset = ir_ref_spill_slot(ctx, insn->op1, &op1_reg); + } + + if (!src64) { + if (dst_type == IR_DOUBLE) { + if (ctx->mflags & IR_X86_AVX) { + | vxorps xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) + | vcvtsi2sd xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), dword [Ra(op1_reg)+offset] + } else { + | pxor xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) + | cvtsi2sd xmm(def_reg-IR_REG_FP_FIRST), dword [Ra(op1_reg)+offset] + } + } else { + IR_ASSERT(dst_type == IR_FLOAT); + if (ctx->mflags & IR_X86_AVX) { + | vxorps xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) + | vcvtsi2ss xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), dword [Ra(op1_reg)+offset] + } else { + | pxor xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) + | cvtsi2ss xmm(def_reg-IR_REG_FP_FIRST), dword [Ra(op1_reg)+offset] + } + } + } else { + IR_ASSERT(sizeof(void*) == 8); +|.if X64 + if (dst_type == IR_DOUBLE) { + if (ctx->mflags & IR_X86_AVX) { + | vxorps xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) + | vcvtsi2sd xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), qword [Ra(op1_reg)+offset] + } else { + | pxor xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) + | cvtsi2sd xmm(def_reg-IR_REG_FP_FIRST), qword [Ra(op1_reg)+offset] + } + } else { + IR_ASSERT(dst_type == IR_FLOAT); + if (ctx->mflags & IR_X86_AVX) { + | vxorps xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) + | vcvtsi2ss xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), qword [Ra(op1_reg)+offset] + } else { + | pxor xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) + | cvtsi2ss xmm(def_reg-IR_REG_FP_FIRST), qword [Ra(op1_reg)+offset] + } + } +|.endif + } + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, dst_type, def, def_reg); + } +} + +static void ir_emit_fp2int(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_type dst_type = insn->type; + ir_type src_type = ctx->ir_base[insn->op1].type; + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + bool dst64 = 0; + + IR_ASSERT(IR_IS_TYPE_FP(src_type)); + IR_ASSERT(IR_IS_TYPE_INT(dst_type)); + IR_ASSERT(def_reg != IR_REG_NONE); + if (IR_IS_TYPE_SIGNED(dst_type) ? ir_type_size[dst_type] == 8 : ir_type_size[dst_type] >= 4) { + // TODO: we might need to perform truncation from 32/64 bit integer + dst64 = 1; + } + if (op1_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, src_type, op1_reg, insn->op1); + } + if (!dst64) { + if (src_type == IR_DOUBLE) { + if (ctx->mflags & IR_X86_AVX) { + | vcvtsd2si Rd(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) + } else { + | cvtsd2si Rd(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) + } + } else { + IR_ASSERT(src_type == IR_FLOAT); + if (ctx->mflags & IR_X86_AVX) { + | vcvtss2si Rd(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) + } else { + | cvtss2si Rd(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) + } + } + } else { + IR_ASSERT(sizeof(void*) == 8); +|.if X64 + if (src_type == IR_DOUBLE) { + if (ctx->mflags & IR_X86_AVX) { + | vcvtsd2si Rq(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) + } else { + | cvtsd2si Rq(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) + } + } else { + IR_ASSERT(src_type == IR_FLOAT); + if (ctx->mflags & IR_X86_AVX) { + | vcvtss2si Rq(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) + } else { + | cvtss2si Rq(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) + } + } +|.endif + } + } else if (IR_IS_CONST_REF(insn->op1)) { + ir_insn *_insn = &ctx->ir_base[insn->op1]; + int label = ctx->cfg_blocks_count - insn->op1; + + _insn->const_flags |= IR_CONST_EMIT; + if (!dst64) { + if (src_type == IR_DOUBLE) { + if (ctx->mflags & IR_X86_AVX) { + | vcvtsd2si Rd(def_reg), qword [=>label] + } else { + | cvtsd2si Rd(def_reg), qword [=>label] + } + } else { + IR_ASSERT(src_type == IR_FLOAT); + if (ctx->mflags & IR_X86_AVX) { + | vcvtss2si Rd(def_reg), dword [=>label] + } else { + | cvtss2si Rd(def_reg), dword [=>label] + } + } + } else { + IR_ASSERT(sizeof(void*) == 8); +|.if X64 + if (src_type == IR_DOUBLE) { + if (ctx->mflags & IR_X86_AVX) { + | vcvtsd2si Rq(def_reg), qword [=>label] + } else { + | cvtsd2si Rq(def_reg), qword [=>label] + } + } else { + IR_ASSERT(src_type == IR_FLOAT); + if (ctx->mflags & IR_X86_AVX) { + | vcvtss2si Rq(def_reg), dword [=>label] + } else { + | cvtss2si Rq(def_reg), dword [=>label] + } + } +|.endif + } + } else { + int32_t offset = 0; + + if (ir_rule(ctx, insn->op1) & IR_FUSED) { + offset = ir_fuse_load(ctx, insn->op1, &op1_reg); + } else { + offset = ir_ref_spill_slot(ctx, insn->op1, &op1_reg); + } + + if (!dst64) { + if (src_type == IR_DOUBLE) { + if (ctx->mflags & IR_X86_AVX) { + | vcvtsd2si Rd(def_reg), qword [Ra(op1_reg)+offset] + } else { + | cvtsd2si Rd(def_reg), qword [Ra(op1_reg)+offset] + } + } else { + IR_ASSERT(src_type == IR_FLOAT); + if (ctx->mflags & IR_X86_AVX) { + | vcvtss2si Rd(def_reg), dword [Ra(op1_reg)+offset] + } else { + | cvtss2si Rd(def_reg), dword [Ra(op1_reg)+offset] + } + } + } else { + IR_ASSERT(sizeof(void*) == 8); +|.if X64 + if (src_type == IR_DOUBLE) { + if (ctx->mflags & IR_X86_AVX) { + | vcvtsd2si Rq(def_reg), qword [Ra(op1_reg)+offset] + } else { + | cvtsd2si Rq(def_reg), qword [Ra(op1_reg)+offset] + } + } else { + IR_ASSERT(src_type == IR_FLOAT); + if (ctx->mflags & IR_X86_AVX) { + | vcvtss2si Rq(def_reg), dword [Ra(op1_reg)+offset] + } else { + | cvtss2si Rq(def_reg), dword [Ra(op1_reg)+offset] + } + } +|.endif + } + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, dst_type, def, def_reg); + } +} + +static void ir_emit_fp2fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_type dst_type = insn->type; + ir_type src_type = ctx->ir_base[insn->op1].type; + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + + IR_ASSERT(IR_IS_TYPE_FP(src_type)); + IR_ASSERT(IR_IS_TYPE_FP(dst_type)); + IR_ASSERT(def_reg != IR_REG_NONE); + if (op1_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, src_type, op1_reg, insn->op1); + } + if (src_type == dst_type) { + if (op1_reg != def_reg) { + ir_emit_fp_mov(ctx, dst_type, def_reg, op1_reg); + } + } else if (src_type == IR_DOUBLE) { + if (ctx->mflags & IR_X86_AVX) { + | vcvtsd2ss xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), xmm(op1_reg-IR_REG_FP_FIRST) + } else { + | cvtsd2ss xmm(def_reg-IR_REG_FP_FIRST), xmm(op1_reg-IR_REG_FP_FIRST) + } + } else { + IR_ASSERT(src_type == IR_FLOAT); + if (ctx->mflags & IR_X86_AVX) { + | vcvtss2sd xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), xmm(op1_reg-IR_REG_FP_FIRST) + } else { + | cvtss2sd xmm(def_reg-IR_REG_FP_FIRST), xmm(op1_reg-IR_REG_FP_FIRST) + } + } + } else if (IR_IS_CONST_REF(insn->op1)) { + ir_insn *_insn = &ctx->ir_base[insn->op1]; + int label = ctx->cfg_blocks_count - insn->op1; + + _insn->const_flags |= IR_CONST_EMIT; + if (src_type == IR_DOUBLE) { + if (ctx->mflags & IR_X86_AVX) { + | vcvtsd2ss xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), qword [=>label] + } else { + | cvtsd2ss xmm(def_reg-IR_REG_FP_FIRST), qword [=>label] + } + } else { + IR_ASSERT(src_type == IR_FLOAT); + if (ctx->mflags & IR_X86_AVX) { + | vcvtss2sd xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), dword [=>label] + } else { + | cvtss2sd xmm(def_reg-IR_REG_FP_FIRST), dword [=>label] + } + } + } else { + int32_t offset = 0; + + if (ir_rule(ctx, insn->op1) & IR_FUSED) { + offset = ir_fuse_load(ctx, insn->op1, &op1_reg); + } else { + offset = ir_ref_spill_slot(ctx, insn->op1, &op1_reg); + } + + if (src_type == IR_DOUBLE) { + if (ctx->mflags & IR_X86_AVX) { + | vcvtsd2ss xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), qword [Ra(op1_reg)+offset] + } else { + | cvtsd2ss xmm(def_reg-IR_REG_FP_FIRST), qword [Ra(op1_reg)+offset] + } + } else { + IR_ASSERT(src_type == IR_FLOAT); + if (ctx->mflags & IR_X86_AVX) { + | vcvtss2sd xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), dword [Ra(op1_reg)+offset] + } else { + | cvtss2sd xmm(def_reg-IR_REG_FP_FIRST), dword [Ra(op1_reg)+offset] + } + } + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, dst_type, def, def_reg); + } +} + +static void ir_emit_copy_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_ref type = insn->type; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + + IR_ASSERT(def_reg != IR_REG_NONE || op1_reg != IR_REG_NONE); + if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, type, op1_reg, insn->op1); + } + if (def_reg == op1_reg) { + /* same reg */ + } else if (def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE) { + ir_emit_mov(ctx, type, def_reg, op1_reg); + } else if (def_reg != IR_REG_NONE) { + ir_emit_load(ctx, type, def_reg, insn->op1); + } else if (op1_reg != IR_REG_NONE) { + ir_emit_store(ctx, type, def, op1_reg); + } else { + IR_ASSERT(0); + } + if (def_reg != IR_REG_NONE && IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } +} + +static void ir_emit_copy_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_type type = insn->type; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op1_reg = ctx->regs[def][1]; + + IR_ASSERT(def_reg != IR_REG_NONE || op1_reg != IR_REG_NONE); + if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, type, op1_reg, insn->op1); + } + if (def_reg == op1_reg) { + /* same reg */ + } else if (def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE) { + ir_emit_fp_mov(ctx, type, def_reg, op1_reg); + } else if (def_reg != IR_REG_NONE) { + ir_emit_load(ctx, type, def_reg, insn->op1); + } else if (op1_reg != IR_REG_NONE) { + ir_emit_store(ctx, type, def, op1_reg); + } else { + IR_ASSERT(0); + } + if (def_reg != IR_REG_NONE && IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } +} + +static void ir_emit_vaddr(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_ref type = insn->type; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + int32_t offset; + ir_reg fp; + + IR_ASSERT(def_reg != IR_REG_NONE); + offset = ir_var_spill_slot(ctx, insn->op1, &fp); + | lea Ra(def_reg), aword [Ra(fp)+offset] + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } +} + +static void ir_emit_vload(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_insn *var_insn = &ctx->ir_base[insn->op2]; + ir_ref type = insn->type; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg fp; + int32_t offset; + + IR_ASSERT(var_insn->op == IR_VAR); + fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + offset = IR_SPILL_POS_TO_OFFSET(var_insn->op3); + if (def_reg == IR_REG_NONE && ir_is_same_mem_var(ctx, def, var_insn->op3)) { + return; // fake load + } + IR_ASSERT(def_reg != IR_REG_NONE); + + if (IR_IS_TYPE_INT(type)) { + ir_emit_load_mem_int(ctx, type, def_reg, fp, offset); + } else { + ir_emit_load_mem_fp(ctx, type, def_reg, fp, offset); + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } +} + +static void ir_emit_vstore_int(ir_ctx *ctx, ir_ref ref, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_insn *var_insn = &ctx->ir_base[insn->op2]; + ir_insn *val_insn = &ctx->ir_base[insn->op3]; + ir_ref type = val_insn->type; + ir_reg op3_reg = ctx->regs[ref][3]; + ir_reg fp; + int32_t offset; + + IR_ASSERT(var_insn->op == IR_VAR); + fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + offset = IR_SPILL_POS_TO_OFFSET(var_insn->op3); + if ((op3_reg == IR_REG_NONE || IR_REG_SPILLED(op3_reg)) + && !IR_IS_CONST_REF(insn->op3) && ir_is_same_mem_var(ctx, insn->op3, var_insn->op3)) { + return; // fake store + } + if (IR_IS_CONST_REF(insn->op3) && IR_IS_32BIT(type, val_insn->val)) { + | ASM_MEM_IMM_OP mov, type, [Ra(fp)+offset], val_insn->val.i32 + } else { + IR_ASSERT(op3_reg != IR_REG_NONE); + if (IR_REG_SPILLED(op3_reg) || IR_IS_CONST_REF(insn->op3)) { + op3_reg = IR_REG_NUM(op3_reg); + ir_emit_load(ctx, type, op3_reg, insn->op3); + } + ir_emit_store_mem_int(ctx, type, fp, offset, op3_reg); + } +} + +static void ir_emit_vstore_fp(ir_ctx *ctx, ir_ref ref, ir_insn *insn) +{ + ir_insn *var_insn = &ctx->ir_base[insn->op2]; + ir_ref type = ctx->ir_base[insn->op3].type; + ir_reg op3_reg = ctx->regs[ref][3]; + ir_reg fp; + int32_t offset; + + IR_ASSERT(var_insn->op == IR_VAR); + fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + offset = IR_SPILL_POS_TO_OFFSET(var_insn->op3); + if ((op3_reg == IR_REG_NONE || IR_REG_SPILLED(op3_reg)) + && !IR_IS_CONST_REF(insn->op3) && ir_is_same_mem_var(ctx, insn->op3, var_insn->op3)) { + return; // fake store + } + IR_ASSERT(op3_reg != IR_REG_NONE); + if (IR_REG_SPILLED(op3_reg) || IR_IS_CONST_REF(insn->op3)) { + op3_reg = IR_REG_NUM(op3_reg); + ir_emit_load(ctx, type, op3_reg, insn->op3); + } + ir_emit_store_mem_fp(ctx, type, fp, offset, op3_reg); +} + +static void ir_emit_load_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_ref type = insn->type; + ir_reg op2_reg = ctx->regs[def][2]; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + int32_t offset = 0; + + if (ctx->use_lists[def].count == 1) { + /* dead load */ + return; + } + IR_ASSERT(def_reg != IR_REG_NONE); + if (IR_IS_CONST_REF(insn->op2)) { + void *addr = (void*)ctx->ir_base[insn->op2].val.addr; + + if (sizeof(void*) == 4 || IR_IS_SIGNED_32BIT(addr)) { + int32_t addr32 = (int32_t)(intptr_t)addr; + | ASM_REG_MEM_OP mov, type, def_reg, [addr32] + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } + return; + } + } + if (op2_reg == IR_REG_NONE) { + op2_reg = def_reg; + } + if (!IR_IS_CONST_REF(insn->op2) && (ir_rule(ctx, insn->op2) & IR_FUSED)) { + offset = ir_fuse_addr(ctx, insn->op2, &op2_reg); + if (IR_REG_SPILLED(ctx->regs[def][0]) && ir_is_same_spill_slot(ctx, def, op2_reg, offset)) { + if (!ir_may_avoid_spill_load(ctx, def, def)) { + ir_emit_load_mem_int(ctx, type, def_reg, op2_reg, offset); + } + /* avoid load to the same location (valid only when register is not reused) */ + return; + } + } else if (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(insn->op2)) { + op2_reg = IR_REG_NUM(op2_reg); + IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR); + ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + } + + ir_emit_load_mem_int(ctx, type, def_reg, op2_reg, offset); + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } +} + +static void ir_emit_load_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_ref type = insn->type; + ir_reg op2_reg = ctx->regs[def][2]; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + int32_t offset = 0; + + if (ctx->use_lists[def].count == 1) { + /* dead load */ + return; + } + IR_ASSERT(def_reg != IR_REG_NONE); + if (IR_IS_CONST_REF(insn->op2)) { + if (op2_reg == IR_REG_NONE) { + int32_t addr32 = ctx->ir_base[insn->op2].val.i32; + + IR_ASSERT(sizeof(void*) == 4 || IR_IS_SIGNED_32BIT(ctx->ir_base[insn->op2].val.i64)); + | ASM_FP_REG_MEM_OP movss, movsd, vmovss, vmovsd, type, def_reg, [addr32] + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } + return; + } else { + ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + } + } else if (op2_reg == IR_REG_NONE) { + offset = ir_fuse_addr(ctx, insn->op2, &op2_reg); + if (IR_REG_SPILLED(ctx->regs[def][0]) && ir_is_same_spill_slot(ctx, def, op2_reg, offset)) { + if (!ir_may_avoid_spill_load(ctx, def, def)) { + ir_emit_load_mem_fp(ctx, type, def_reg, op2_reg, offset); + } + /* avoid load to the same location (valid only when register is not reused) */ + return; + } + } else if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR); + ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + } + + ir_emit_load_mem_fp(ctx, type, def_reg, op2_reg, offset); + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } +} + +static void ir_emit_store_int(ir_ctx *ctx, ir_ref ref, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_insn *val_insn = &ctx->ir_base[insn->op3]; + ir_ref type = val_insn->type; + ir_reg op2_reg = ctx->regs[ref][2]; + ir_reg op3_reg = ctx->regs[ref][3]; + int32_t offset = 0; + + if (IR_IS_CONST_REF(insn->op2)) { + + if (op2_reg == IR_REG_NONE) { + int32_t addr32 = ctx->ir_base[insn->op2].val.i32; + + IR_ASSERT(sizeof(void*) == 4 || IR_IS_SIGNED_32BIT(ctx->ir_base[insn->op2].val.i64)); + if (IR_IS_CONST_REF(insn->op3) && IR_IS_32BIT(type, val_insn->val)) { + | ASM_MEM_IMM_OP mov, type, [addr32], val_insn->val.i32 + } else { + IR_ASSERT(op3_reg != IR_REG_NONE); + if (IR_REG_SPILLED(op3_reg) || IR_IS_CONST_REF(insn->op3)) { + op3_reg = IR_REG_NUM(op3_reg); + ir_emit_load(ctx, type, op3_reg, insn->op3); + } + | ASM_MEM_REG_OP mov, type, [addr32], op3_reg + } + return; + } else { + ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + } + } else if (op2_reg == IR_REG_NONE) { + offset = ir_fuse_addr(ctx, insn->op2, &op2_reg); + if (!IR_IS_CONST_REF(insn->op3) && IR_REG_SPILLED(op3_reg) && ir_is_same_spill_slot(ctx, insn->op3, op2_reg, offset)) { + if (!ir_may_avoid_spill_load(ctx, insn->op3, ref)) { + op3_reg = IR_REG_NUM(op3_reg); + ir_emit_load(ctx, type, op3_reg, insn->op3); + } + /* avoid store to the same location */ + return; + } + } else if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR); + ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + } + + if (IR_IS_CONST_REF(insn->op3) && IR_IS_32BIT(type, val_insn->val)) { + | ASM_MEM_IMM_OP mov, type, [Ra(op2_reg)+offset], val_insn->val.i32 + } else { + IR_ASSERT(op3_reg != IR_REG_NONE); + if (IR_REG_SPILLED(op3_reg) || IR_IS_CONST_REF(insn->op3)) { + op3_reg = IR_REG_NUM(op3_reg); + ir_emit_load(ctx, type, op3_reg, insn->op3); + } + ir_emit_store_mem_int(ctx, type, op2_reg, offset, op3_reg); + } +} + +static void ir_emit_store_fp(ir_ctx *ctx, ir_ref ref, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_ref type = ctx->ir_base[insn->op3].type; + ir_reg op2_reg = ctx->regs[ref][2]; + ir_reg op3_reg = ctx->regs[ref][3]; + int32_t offset = 0; + + IR_ASSERT(op3_reg != IR_REG_NONE); + if (IR_IS_CONST_REF(insn->op2)) { + if (op2_reg == IR_REG_NONE) { + int32_t addr32 = ctx->ir_base[insn->op2].val.i32; + + IR_ASSERT(sizeof(void*) == 4 || IR_IS_SIGNED_32BIT(ctx->ir_base[insn->op2].val.i64)); + if (IR_REG_SPILLED(op3_reg) || IR_IS_CONST_REF(insn->op3)) { + op3_reg = IR_REG_NUM(op3_reg); + ir_emit_load(ctx, type, op3_reg, insn->op3); + } + | ASM_FP_MEM_REG_OP movss, movsd, vmovss, vmovsd, type, [addr32], op3_reg + return; + } else { + ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + } + } else if (op2_reg == IR_REG_NONE) { + offset = ir_fuse_addr(ctx, insn->op2, &op2_reg); + if (!IR_IS_CONST_REF(insn->op3) && IR_REG_SPILLED(op3_reg) && ir_is_same_spill_slot(ctx, insn->op3, op2_reg, offset)) { + if (!ir_may_avoid_spill_load(ctx, insn->op3, ref)) { + op3_reg = IR_REG_NUM(op3_reg); + ir_emit_load(ctx, type, op3_reg, insn->op3); + } + /* avoid store to the same location */ + return; + } + } else if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR); + ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + } + + if (IR_REG_SPILLED(op3_reg) || IR_IS_CONST_REF(insn->op3)) { + op3_reg = IR_REG_NUM(op3_reg); + ir_emit_load(ctx, type, op3_reg, insn->op3); + } + ir_emit_store_mem_fp(ctx, type, op2_reg, offset, op3_reg); +} + +static void ir_emit_rload(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_reg src_reg = insn->op2; + ir_type type = insn->type; + + if (IR_REGSET_IN(IR_REGSET_UNION((ir_regset)ctx->fixed_regset, IR_REGSET_FIXED), src_reg)) { + if (ctx->vregs[def] + && ctx->live_intervals[ctx->vregs[def]] + && ctx->live_intervals[ctx->vregs[def]]->stack_spill_pos != -1) { + ir_emit_store(ctx, type, def, src_reg); + } + } else { + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + + if (def_reg == IR_REG_NONE) { + /* op3 is used as a flag that the value is already stored in memory. + * If op3 is set we don't have to store the value once again (in case of spilling) + */ + if (!insn->op3 || !ir_is_same_spill_slot(ctx, def, ctx->spill_base, insn->op3)) { + ir_emit_store(ctx, type, def, src_reg); + } + } else { + if (src_reg != def_reg) { + if (IR_IS_TYPE_INT(type)) { + ir_emit_mov(ctx, type, def_reg, src_reg); + } else { + IR_ASSERT(IR_IS_TYPE_FP(type)); + ir_emit_fp_mov(ctx, type, def_reg, src_reg); + } + } + if (IR_REG_SPILLED(ctx->regs[def][0]) + && (!insn->op3 || !ir_is_same_spill_slot(ctx, def, ctx->spill_base, insn->op3))) { + ir_emit_store(ctx, type, def, def_reg); + } + } + } +} + +static void ir_emit_rstore(ir_ctx *ctx, ir_ref ref, ir_insn *insn) +{ + ir_ref type = ctx->ir_base[insn->op2].type; + ir_reg op2_reg = ctx->regs[ref][2]; + ir_reg dst_reg = insn->op3; + + if (!IR_IS_CONST_REF(insn->op2) && (ir_rule(ctx, insn->op2) & IR_FUSED)) { + int32_t offset = ir_fuse_load(ctx, insn->op2, &op2_reg); + if (IR_IS_TYPE_INT(type)) { + ir_emit_load_mem_int(ctx, type, dst_reg, op2_reg, offset); + } else { + ir_emit_load_mem_fp(ctx, type, dst_reg, op2_reg, offset); + } + } else if (op2_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, type, op2_reg, insn->op2); + } + if (op2_reg != dst_reg) { + if (IR_IS_TYPE_INT(type)) { + ir_emit_mov(ctx, type, dst_reg, op2_reg); + } else { + IR_ASSERT(IR_IS_TYPE_FP(type)); + ir_emit_fp_mov(ctx, type, dst_reg, op2_reg); + } + } + } else { + ir_emit_load(ctx, type, dst_reg, insn->op2); + } +} + +static void ir_emit_alloca(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + + if (IR_IS_CONST_REF(insn->op2)) { + ir_insn *val = &ctx->ir_base[insn->op2]; + int32_t size = val->val.i32; + + IR_ASSERT(IR_IS_TYPE_INT(val->type)); + IR_ASSERT(IR_IS_TYPE_UNSIGNED(val->type) || val->val.i64 > 0); + IR_ASSERT(IR_IS_SIGNED_32BIT(val->val.i64)); + + if (ctx->flags & IR_HAS_CALLS) { + /* Stack must be 16 byte aligned */ + size = IR_ALIGNED_SIZE(size, 16); + } else { + size = IR_ALIGNED_SIZE(size, 8); + } + | ASM_REG_IMM_OP sub, IR_ADDR, IR_REG_RSP, size + if (!(ctx->flags & IR_USE_FRAME_POINTER)) { + ctx->call_stack_size += size; + } + } else { + int32_t alignment = (ctx->flags & IR_HAS_CALLS) ? 16 : 8; + ir_reg op2_reg = ctx->regs[def][2]; + ir_type type = ctx->ir_base[insn->op2].type; + + IR_ASSERT(ctx->flags & IR_FUNCTION); + IR_ASSERT(def_reg != IR_REG_NONE); + if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, type, op2_reg, insn->op2); + } + if (def_reg != op2_reg) { + if (op2_reg != IR_REG_NONE) { + ir_emit_mov(ctx, type, def_reg, op2_reg); + } else { + ir_emit_load(ctx, type, def_reg, insn->op2); + } + } + + | ASM_REG_IMM_OP add, IR_ADDR, def_reg, (alignment-1) + | ASM_REG_IMM_OP and, IR_ADDR, def_reg, ~(alignment-1) + | ASM_REG_REG_OP sub, IR_ADDR, IR_REG_RSP, def_reg + } + if (def_reg != IR_REG_NONE) { + | mov Ra(def_reg), Ra(IR_REG_RSP) + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, insn->type, def, def_reg); + } + } else { + ir_emit_store(ctx, IR_ADDR, def, IR_REG_STACK_POINTER); + } +} + +static void ir_emit_afree(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + if (IR_IS_CONST_REF(insn->op2)) { + ir_insn *val = &ctx->ir_base[insn->op2]; + int32_t size = val->val.i32; + + IR_ASSERT(IR_IS_TYPE_INT(val->type)); + IR_ASSERT(IR_IS_TYPE_UNSIGNED(val->type) || val->val.i64 > 0); + IR_ASSERT(IR_IS_SIGNED_32BIT(val->val.i64)); + + if (ctx->flags & IR_HAS_CALLS) { + /* Stack must be 16 byte aligned */ + size = IR_ALIGNED_SIZE(size, 16); + } else { + size = IR_ALIGNED_SIZE(size, 8); + } + | ASM_REG_IMM_OP add, IR_ADDR, IR_REG_RSP, size + if (!(ctx->flags & IR_USE_FRAME_POINTER)) { + ctx->call_stack_size -= size; + } + } else { +// int32_t alignment = (ctx->flags & IR_HAS_CALLS) ? 16 : 8; + ir_reg op2_reg = ctx->regs[def][2]; + ir_type type = ctx->ir_base[insn->op2].type; + + IR_ASSERT(ctx->flags & IR_FUNCTION); + if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, type, op2_reg, insn->op2); + } + + // TODO: alignment ??? + + | ASM_REG_REG_OP add, IR_ADDR, IR_REG_RSP, op2_reg + } +} + +static void ir_emit_switch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type type; + ir_block *bb; + ir_insn *use_insn, *val; + uint32_t n, *p, use_block; + int i; + int label, default_label = 0; + int count = 0; + ir_val min, max; + int64_t offset; + ir_reg op2_reg = ctx->regs[def][2]; +|.if X64 +|| ir_reg tmp_reg = ctx->regs[def][3]; +|.endif + + type = ctx->ir_base[insn->op2].type; + if (IR_IS_TYPE_SIGNED(type)) { + min.u64 = 0x7fffffffffffffff; + max.u64 = 0x8000000000000000; + } else { + min.u64 = 0xffffffffffffffff; + max.u64 = 0x0; + } + + bb = &ctx->cfg_blocks[b]; + p = &ctx->cfg_edges[bb->successors]; + for (n = bb->successors_count; n != 0; p++, n--) { + use_block = *p; + use_insn = &ctx->ir_base[ctx->cfg_blocks[use_block].start]; + if (use_insn->op == IR_CASE_VAL) { + val = &ctx->ir_base[use_insn->op2]; + if (IR_IS_TYPE_SIGNED(type)) { + IR_ASSERT(IR_IS_TYPE_SIGNED(val->type)); + min.i64 = IR_MIN(min.i64, val->val.i64); + max.i64 = IR_MAX(max.i64, val->val.i64); + } else { + IR_ASSERT(!IR_IS_TYPE_SIGNED(val->type)); + min.u64 = (int64_t)IR_MIN(min.u64, val->val.u64); + max.u64 = (int64_t)IR_MAX(max.u64, val->val.u64); + } + count++; + } else { + IR_ASSERT(use_insn->op == IR_CASE_DEFAULT); + default_label = ir_skip_empty_target_blocks(ctx, use_block); + } + } + + IR_ASSERT(op2_reg != IR_REG_NONE); +|.if X64 +|| IR_ASSERT(tmp_reg != IR_REG_NONE || sizeof(void*) != 8); +|.endif + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, type, op2_reg, insn->op2); + } else if (IR_IS_CONST_REF(insn->op2)) { + ir_emit_load(ctx, type, op2_reg, insn->op2); + } + + /* Generate a table jmp or a seqence of calls */ + if ((max.i64-min.i64) < count * 8) { + int *labels = ir_mem_malloc(sizeof(int) * (size_t)(max.i64 - min.i64 + 1)); + + for (i = 0; i <= (max.i64 - min.i64); i++) { + labels[i] = default_label; + } + p = &ctx->cfg_edges[bb->successors]; + for (n = bb->successors_count; n != 0; p++, n--) { + use_block = *p; + use_insn = &ctx->ir_base[ctx->cfg_blocks[use_block].start]; + if (use_insn->op == IR_CASE_VAL) { + val = &ctx->ir_base[use_insn->op2]; + label = ir_skip_empty_target_blocks(ctx, use_block); + labels[val->val.i64 - min.i64] = label; + } + } + + if (IR_IS_32BIT(type, max)) { + | ASM_REG_IMM_OP cmp, type, op2_reg, max.i32 + } else { + IR_ASSERT(ir_type_size[type] == 8); + IR_ASSERT(sizeof(void*) == 8); +|.if X64 + | mov64 Rq(tmp_reg), max.i64 + | cmp Rq(op2_reg), Rq(tmp_reg) +|.endif + } + if (IR_IS_TYPE_SIGNED(type)) { + | jg =>default_label + } else { + | ja =>default_label + } + + if (IR_IS_32BIT(type, min)) { + offset = -min.i64 * sizeof(void*); + if (IR_IS_SIGNED_32BIT(offset)) { + | ASM_REG_IMM_OP cmp, type, op2_reg, min.i32 + } else { + | ASM_REG_REG_OP sub, type, op2_reg, (int32_t)offset // TODO: reg clobbering + offset = 0; + } + } else { + IR_ASSERT(sizeof(void*) == 8); +|.if X64 + | mov64 Rq(tmp_reg), min.i64 + | ASM_REG_REG_OP sub, type, op2_reg, tmp_reg // TODO: reg clobbering + offset = 0; +|.endif + } + if (IR_IS_TYPE_SIGNED(type)) { + | jl =>default_label + } else { + | jb =>default_label + } + if (sizeof(void*) == 8) { +|.if X64 + switch (ir_type_size[type]) { + default: + IR_ASSERT(0); + case 1: + if (IR_IS_TYPE_SIGNED(type)) { + | movsx Ra(op2_reg), Rb(op2_reg) + } else { + | movzx Ra(op2_reg), Rb(op2_reg) + } + break; + case 2: + if (IR_IS_TYPE_SIGNED(type)) { + | movsx Ra(op2_reg), Rw(op2_reg) + } else { + | movzx Ra(op2_reg), Rw(op2_reg) + } + break; + case 4: + if (IR_IS_TYPE_SIGNED(type)) { + | movsxd Ra(op2_reg), Rd(op2_reg) + } else { + | mov Rd(op2_reg), Rd(op2_reg) + } + break; + case 8: + break; + } + | lea Ra(tmp_reg), aword [>1] + | jmp aword [Ra(tmp_reg)+Ra(op2_reg)*8+(int32_t)offset] +|.endif + } else { +|.if not X64 + switch (ir_type_size[type]) { + default: + IR_ASSERT(0 && "Unsupported type size"); + case 1: + if (IR_IS_TYPE_SIGNED(type)) { + | movsx Ra(op2_reg), Rb(op2_reg) + } else { + | movzx Ra(op2_reg), Rb(op2_reg) + } + break; + case 2: + if (IR_IS_TYPE_SIGNED(type)) { + | movsx Ra(op2_reg), Rw(op2_reg) + } else { + | movzx Ra(op2_reg), Rw(op2_reg) + } + break; + case 4: + break; + } + |// jmp aword [Ra(op2_reg)*4+(int32_t)offset+>1] + | lea Ra(op2_reg), aword [Ra(op2_reg)*4+(int32_t)offset] // TODO: reg clobbering + | jmp aword [Ra(op2_reg)+>1] +|.endif + } + |.jmp_table + if (!data->jmp_table_label) { + data->jmp_table_label = ctx->cfg_blocks_count + ctx->consts_count + 3; + |=>data->jmp_table_label: + } + |.align aword + |1: + for (i = 0; i <= (max.i64 - min.i64); i++) { + int b = labels[i]; + ir_block *bb = &ctx->cfg_blocks[b]; + ir_insn *insn = &ctx->ir_base[bb->end]; + + if (insn->op == IR_IJMP && IR_IS_CONST_REF(insn->op2)) { + ir_ref prev = ctx->prev_ref[bb->end]; + if (prev != bb->start && ctx->ir_base[prev].op == IR_SNAPSHOT) { + prev = ctx->prev_ref[prev]; + } + if (prev == bb->start) { + void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op2]); + + | .aword &addr + if (ctx->ir_base[bb->start].op != IR_CASE_DEFAULT) { + bb->flags |= IR_BB_EMPTY; + } + continue; + } + } + | .aword =>b + } + |.code + ir_mem_free(labels); + } else { + p = &ctx->cfg_edges[bb->successors]; + for (n = bb->successors_count; n != 0; p++, n--) { + use_block = *p; + use_insn = &ctx->ir_base[ctx->cfg_blocks[use_block].start]; + if (use_insn->op == IR_CASE_VAL) { + val = &ctx->ir_base[use_insn->op2]; + label = ir_skip_empty_target_blocks(ctx, use_block); + if (IR_IS_32BIT(type, val->val)) { + | ASM_REG_IMM_OP cmp, type, op2_reg, val->val.i32 + } else { + IR_ASSERT(sizeof(void*) == 8); +|.if X64 + | mov64 Ra(tmp_reg), val->val.i64 + | ASM_REG_REG_OP cmp, type, op2_reg, tmp_reg +|.endif + } + | je =>label + } + } + if (default_label) { + | jmp =>default_label + } + } +} + +static int ir_parallel_copy(ir_ctx *ctx, ir_copy *copies, int count, ir_reg tmp_reg, ir_reg tmp_fp_reg) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + int i; + int8_t *pred, *loc, *types; + ir_reg to, from_reg, c; + ir_type type; + ir_regset todo, ready; + ir_reg last_reg = IR_REG_NONE, last_fp_reg = IR_REG_NONE; + + loc = ir_mem_malloc(IR_REG_NUM * 3 * sizeof(int8_t)); + pred = loc + IR_REG_NUM; + types = pred + IR_REG_NUM; + memset(loc, IR_REG_NONE, IR_REG_NUM * 2 * sizeof(int8_t)); + todo = IR_REGSET_EMPTY; + ready = IR_REGSET_EMPTY; + + for (i = 0; i < count; i++) { + from_reg = copies[i].from; + to = copies[i].to; + if (from_reg != to) { + loc[from_reg] = from_reg; + pred[to] = from_reg; + types[from_reg] = copies[i].type; + /* temporary register may be the same as some of destinations */ + if (to == tmp_reg) { + IR_ASSERT(last_reg == IR_REG_NONE); + last_reg = to; + } else if (to == tmp_fp_reg) { + IR_ASSERT(last_fp_reg == IR_REG_NONE); + last_fp_reg = to; + } else { + IR_ASSERT(!IR_REGSET_IN(todo, to)); + IR_REGSET_INCL(todo, to); + } + } + } + + IR_REGSET_FOREACH(todo, i) { + if (loc[i] == IR_REG_NONE) { + IR_REGSET_INCL(ready, i); + } + } IR_REGSET_FOREACH_END(); + + while (1) { + while (ready != IR_REGSET_EMPTY) { + to = ir_regset_pop_first(&ready); + from_reg = pred[to]; + c = loc[from_reg]; + type = types[from_reg]; + if (IR_IS_TYPE_INT(type)) { + if (ir_type_size[type] > 2) { + ir_emit_mov(ctx, type, to, c); + } else if (ir_type_size[type] == 2) { + if (IR_IS_TYPE_SIGNED(type)) { + | movsx Rd(to), Rw(c) + type = IR_I32; + } else { + | movzx Rd(to), Rw(c) + type = IR_U32; + } + } else /* if (ir_type_size[type] == 1) */ { + if (IR_IS_TYPE_SIGNED(type)) { + | movsx Rd(to), Rb(c) + type = IR_I32; + } else { + | movzx Rd(to), Rb(c) + type = IR_U32; + } + } + } else { + ir_emit_fp_mov(ctx, type, to, c); + } + IR_REGSET_EXCL(todo, to); + loc[from_reg] = to; + if (from_reg == c && pred[from_reg] != IR_REG_NONE) { + IR_REGSET_INCL(ready, from_reg); + } + } + + if (todo == IR_REGSET_EMPTY) { + break; + } + to = ir_regset_pop_first(&todo); + from_reg = pred[to]; + IR_ASSERT(to != loc[from_reg]); + type = types[from_reg]; + if (IR_IS_TYPE_INT(type)) { + IR_ASSERT(tmp_reg != IR_REG_NONE); + IR_ASSERT(tmp_reg >= IR_REG_GP_FIRST && tmp_reg <= IR_REG_GP_LAST); + ir_emit_mov(ctx, type, tmp_reg, to); + loc[to] = tmp_reg; + } else { + IR_ASSERT(tmp_fp_reg != IR_REG_NONE); + IR_ASSERT(tmp_fp_reg >= IR_REG_FP_FIRST && tmp_fp_reg <= IR_REG_FP_LAST); + ir_emit_fp_mov(ctx, type, tmp_fp_reg, to); + loc[to] = tmp_fp_reg; + } + IR_REGSET_INCL(ready, to); + } + + if (last_reg != IR_REG_NONE) { + to = last_reg; + from_reg = pred[to]; + c = loc[from_reg]; + if (to != c) { + type = types[from_reg]; + IR_ASSERT(IR_IS_TYPE_INT(type)); + if (ir_type_size[type] > 2) { + ir_emit_mov(ctx, type, to, c); + } else if (ir_type_size[type] == 2) { + if (IR_IS_TYPE_SIGNED(type)) { + | movsx Rd(to), Rw(c) + type = IR_I32; + } else { + | movzx Rd(to), Rw(c) + type = IR_U32; + } + } else /* if (ir_type_size[type] == 1) */ { + if (IR_IS_TYPE_SIGNED(type)) { + | movsx Rd(to), Rb(c) + type = IR_I32; + } else { + | movzx Rd(to), Rb(c) + type = IR_U32; + } + } + } + } + + if (last_fp_reg != IR_REG_NONE) { + to = last_fp_reg; + from_reg = pred[to]; + c = loc[from_reg]; + if (to != c) { + type = types[from_reg]; + IR_ASSERT(!IR_IS_TYPE_INT(type)); + ir_emit_fp_mov(ctx, type, to, c); + } + } + + ir_mem_free(loc); + + return 1; +} + +static int32_t ir_call_used_stack(ir_ctx *ctx, ir_insn *insn) +{ + int j, n; + ir_type type; + int int_param = 0; + int fp_param = 0; + int int_reg_params_count = IR_REG_INT_ARGS; + int fp_reg_params_count = IR_REG_FP_ARGS; + int32_t used_stack = 0; + +#ifdef IR_HAVE_FASTCALL + if (sizeof(void*) == 4 && ir_is_fastcall(ctx, insn)) { + int_reg_params_count = IR_REG_INT_FCARGS; + fp_reg_params_count = IR_REG_FP_FCARGS; + } +#endif + + n = insn->inputs_count; + for (j = 3; j <= n; j++) { + type = ctx->ir_base[ir_insn_op(insn, j)].type; + if (IR_IS_TYPE_INT(type)) { + if (int_param >= int_reg_params_count) { + used_stack += IR_MAX(sizeof(void*), ir_type_size[type]); + } + int_param++; +#ifdef _WIN64 + /* WIN64 calling convention use common couter for int and fp registers */ + fp_param++; +#endif + } else { + IR_ASSERT(IR_IS_TYPE_FP(type)); + if (fp_param >= fp_reg_params_count) { + used_stack += IR_MAX(sizeof(void*), ir_type_size[type]); + } + fp_param++; +#ifdef _WIN64 + /* WIN64 calling convention use common couter for int and fp registers */ + int_param++; +#endif + } + } + + /* Reserved "home space" or "shadow store" for register arguments (used in Windows64 ABI) */ + used_stack += IR_SHADOW_ARGS; + + return used_stack; +} + +static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg tmp_reg) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + int j, n; + ir_ref arg; + ir_insn *arg_insn; + uint8_t type; + ir_reg src_reg, dst_reg; + int int_param = 0; + int fp_param = 0; + int count = 0; + int int_reg_params_count = IR_REG_INT_ARGS; + int fp_reg_params_count = IR_REG_FP_ARGS; + const int8_t *int_reg_params = _ir_int_reg_params; + const int8_t *fp_reg_params = _ir_fp_reg_params; + int32_t used_stack, stack_offset = IR_SHADOW_ARGS; + ir_copy *copies; + bool do_pass3 = 0; + /* For temporaries we may use any scratch registers except for registers used for parameters */ + ir_reg tmp_fp_reg = IR_REG_FP_LAST; /* Temporary register for FP loads and swap */ + + n = insn->inputs_count; + if (n < 3) { + return 0; + } + + if (tmp_reg == IR_REG_NONE) { + tmp_reg = IR_REG_RAX; + } + +#ifdef IR_HAVE_FASTCALL + if (sizeof(void*) == 4 && ir_is_fastcall(ctx, insn)) { + int_reg_params_count = IR_REG_INT_FCARGS; + fp_reg_params_count = IR_REG_FP_FCARGS; + int_reg_params = _ir_int_fc_reg_params; + fp_reg_params = _ir_fp_fc_reg_params; + } +#endif + + if (insn->op == IR_CALL + && (ctx->flags & IR_PREALLOCATED_STACK) +#ifdef IR_HAVE_FASTCALL + && !ir_is_fastcall(ctx, insn) /* fast call functions restore stack pointer */ +#endif + ) { + // TODO: support for preallocated stack + used_stack = 0; + } else { + used_stack = ir_call_used_stack(ctx, insn); + if (IR_SHADOW_ARGS + && insn->op == IR_TAILCALL + && used_stack == IR_SHADOW_ARGS) { + used_stack = 0; + } + if (ctx->fixed_call_stack_size + && used_stack <= ctx->fixed_call_stack_size +#ifdef IR_HAVE_FASTCALL + && !ir_is_fastcall(ctx, insn) /* fast call functions restore stack pointer */ +#endif + ) { + used_stack = 0; + } else { + /* Stack must be 16 byte aligned */ + int32_t aligned_stack = IR_ALIGNED_SIZE(used_stack, 16); + ctx->call_stack_size += aligned_stack; + if (aligned_stack) { + | sub Ra(IR_REG_RSP), aligned_stack + } + } + } + + /* 1. move all register arguments that should be passed through stack + * and collect arguments that should be passed through registers */ + copies = ir_mem_malloc((n - 2) * sizeof(ir_copy)); + for (j = 3; j <= n; j++) { + arg = ir_insn_op(insn, j); + src_reg = ir_get_alocated_reg(ctx, def, j); + arg_insn = &ctx->ir_base[arg]; + type = arg_insn->type; + if (IR_IS_TYPE_INT(type)) { + if (int_param < int_reg_params_count) { + dst_reg = int_reg_params[int_param]; + } else { + dst_reg = IR_REG_NONE; /* pass argument through stack */ + } + int_param++; +#ifdef _WIN64 + /* WIN64 calling convention use common couter for int and fp registers */ + fp_param++; +#endif + } else { + IR_ASSERT(IR_IS_TYPE_FP(type)); + if (fp_param < fp_reg_params_count) { + dst_reg = fp_reg_params[fp_param]; + } else { + dst_reg = IR_REG_NONE; /* pass argument through stack */ + } + fp_param++; +#ifdef _WIN64 + /* WIN64 calling convention use common couter for int and fp registers */ + int_param++; +#endif + } + if (dst_reg != IR_REG_NONE) { + if (IR_IS_CONST_REF(arg) || src_reg == IR_REG_NONE) { + /* delay CONST->REG and MEM->REG moves to third pass */ + do_pass3 = 1; + } else { + if (IR_REG_SPILLED(src_reg)) { + src_reg = IR_REG_NUM(src_reg); + ir_emit_load(ctx, type, src_reg, arg); + } + if (src_reg != dst_reg) { + /* delay REG->REG moves to second pass */ + copies[count].type = type; + copies[count].from = src_reg; + copies[count].to = dst_reg; + count++; + } + } + } else { + /* Pass register arguments to stack (REG->MEM moves) */ + if (!IR_IS_CONST_REF(arg) && src_reg != IR_REG_NONE && !IR_REG_SPILLED(src_reg)) { + if (IR_IS_TYPE_INT(type)) { + ir_emit_store_mem_int(ctx, type, IR_REG_STACK_POINTER, stack_offset, src_reg); + } else { + ir_emit_store_mem_fp(ctx, type, IR_REG_STACK_POINTER, stack_offset, src_reg); + } + } else { + do_pass3 = 1; + } + stack_offset += IR_MAX(sizeof(void*), ir_type_size[type]); + } + } + + /* 2. move all arguments that should be passed from one register to another (REG->REG movs) */ + if (count) { + ir_parallel_copy(ctx, copies, count, tmp_reg, tmp_fp_reg); + } + ir_mem_free(copies); + + /* 3. move the remaining memory and immediate values */ + if (do_pass3) { + stack_offset = IR_SHADOW_ARGS; + int_param = 0; + fp_param = 0; + for (j = 3; j <= n; j++) { + arg = ir_insn_op(insn, j); + src_reg = ir_get_alocated_reg(ctx, def, j); + arg_insn = &ctx->ir_base[arg]; + type = arg_insn->type; + if (IR_IS_TYPE_INT(type)) { + if (int_param < int_reg_params_count) { + dst_reg = int_reg_params[int_param]; + } else { + dst_reg = IR_REG_NONE; /* argument already passed through stack */ + } + int_param++; +#ifdef _WIN64 + /* WIN64 calling convention use common couter for int and fp registers */ + fp_param++; +#endif + } else { + IR_ASSERT(IR_IS_TYPE_FP(type)); + if (fp_param < fp_reg_params_count) { + dst_reg = fp_reg_params[fp_param]; + } else { + dst_reg = IR_REG_NONE; /* argument already passed through stack */ + } + fp_param++; +#ifdef _WIN64 + /* WIN64 calling convention use common couter for int and fp registers */ + int_param++; +#endif + } + if (dst_reg != IR_REG_NONE) { + if (IR_IS_CONST_REF(arg) || src_reg == IR_REG_NONE) { + if (IR_IS_TYPE_INT(type)) { + if (IR_IS_CONST_REF(arg)) { + if (type == IR_ADDR) { + ir_insn *val_insn = &ctx->ir_base[arg]; + + if (val_insn->op == IR_STR) { + int label = ctx->cfg_blocks_count - arg; + + val_insn->const_flags |= IR_CONST_EMIT; + | lea Ra(dst_reg), aword [=>label] + continue; + } else if (val_insn->op == IR_SYM || val_insn->op == IR_FUNC) { + void *addr = (ctx->loader && ctx->loader->resolve_sym_name) ? + ctx->loader->resolve_sym_name(ctx->loader, ir_get_str(ctx, val_insn->val.i32)) : + ir_resolve_sym_name(ir_get_str(ctx, val_insn->val.i32)); + if (sizeof(void*) == 4 || IR_IS_SIGNED_32BIT(addr)) { + | mov Ra(dst_reg), ((ptrdiff_t)addr) + } else { +|.if X64 + | mov64 Rq(dst_reg), ((ptrdiff_t)addr) +|.endif + } + continue; + } + IR_ASSERT(val_insn->op == IR_ADDR || val_insn->op == IR_FUNC_ADDR); + } + if (type == IR_I8 || type == IR_I16) { + type = IR_I32; + } else if (type == IR_U8 || type == IR_U16) { + type = IR_U32; + } + ir_emit_load(ctx, type, dst_reg, arg); + } else { + ir_reg fp; + int32_t offset = ir_ref_spill_slot(ctx, arg, &fp); + + if (ir_type_size[type] > 2) { + ir_emit_load_mem_int(ctx, type, dst_reg, fp, offset); + } else if (ir_type_size[type] == 2) { + if (type == IR_I16) { + if (fp != IR_REG_NONE) { + | movsx Rd(dst_reg), word [Ra(fp)+offset] + } else { + | movsx Rd(dst_reg), word [offset] + } + } else { + if (fp != IR_REG_NONE) { + | movzx Rd(dst_reg), word [Ra(fp)+offset] + } else { + | movzx Rd(dst_reg), word [offset] + } + } + } else { + IR_ASSERT(ir_type_size[type] == 1); + if (type == IR_I8) { + if (fp != IR_REG_NONE) { + | movsx Rd(dst_reg), byte [Ra(fp)+offset] + } else { + | movsx Rd(dst_reg), byte [offset] + } + } else { + if (fp != IR_REG_NONE) { + | movzx Rd(dst_reg), byte [Ra(fp)+offset] + } else { + | movzx Rd(dst_reg), byte [offset] + } + } + } + } + } else { + ir_emit_load(ctx, type, dst_reg, arg); + } + } + } else { + if (IR_IS_TYPE_INT(type)) { + if (IR_IS_CONST_REF(arg)) { + ir_insn *val_insn = &ctx->ir_base[arg]; + + if (val_insn->op == IR_STR) { + int label = ctx->cfg_blocks_count - arg; + + val_insn->const_flags |= IR_CONST_EMIT; + IR_ASSERT(tmp_reg != IR_REG_NONE); +|.if X64 + | lea Ra(tmp_reg), aword [=>label] + | mov [Ra(IR_REG_RSP)+stack_offset], Ra(tmp_reg) +|.else + | mov [Ra(IR_REG_RSP)+stack_offset], =>label +|.endif + } else if (val_insn->op == IR_FUNC || val_insn->op == IR_SYM) { + void *addr = (ctx->loader && ctx->loader->resolve_sym_name) ? + ctx->loader->resolve_sym_name(ctx->loader, ir_get_str(ctx, val_insn->val.i32)) : + ir_resolve_sym_name(ir_get_str(ctx, val_insn->val.i32)); + if (sizeof(void*) == 4) { + | mov aword [Ra(IR_REG_RSP)+stack_offset], ((ptrdiff_t)addr) +|.if X64 +|| } else if (IR_IS_SIGNED_32BIT(addr)) { + | mov Ra(tmp_reg), ((ptrdiff_t)addr) + | mov [Ra(IR_REG_RSP)+stack_offset], Ra(tmp_reg) +|| } else { + | mov64 Rq(tmp_reg), ((ptrdiff_t)addr) + | mov [Ra(IR_REG_RSP)+stack_offset], Ra(tmp_reg) +|.endif + } + } else if (IR_IS_SIGNED_32BIT(val_insn->val.i64)) { + if (ir_type_size[type] <= 4) { + | mov dword [Ra(IR_REG_RSP)+stack_offset], val_insn->val.i32 + } else { + IR_ASSERT(sizeof(void*) == 8); +|.if X64 + | mov qword [rsp+stack_offset], val_insn->val.i32 +|.endif + } + } else { + IR_ASSERT(sizeof(void*) == 8); +|.if X64 + IR_ASSERT(tmp_reg != IR_REG_NONE); + | mov64 Ra(tmp_reg), val_insn->val.i64 + | mov [rsp+stack_offset], Ra(tmp_reg) +|.endif + } + } else if (src_reg == IR_REG_NONE) { + IR_ASSERT(tmp_reg != IR_REG_NONE); + ir_emit_load(ctx, type, tmp_reg, arg); + ir_emit_store_mem_int(ctx, type, IR_REG_STACK_POINTER, stack_offset, tmp_reg); + } else if (IR_REG_SPILLED(src_reg)) { + src_reg = IR_REG_NUM(src_reg); + ir_emit_load(ctx, type, src_reg, arg); + ir_emit_store_mem_int(ctx, type, IR_REG_STACK_POINTER, stack_offset, src_reg); + } + } else { + if (IR_IS_CONST_REF(arg)) { + ir_val *val = &ctx->ir_base[arg].val; + if (ir_type_size[type] == 4) { + | mov dword [Ra(IR_REG_RSP)+stack_offset], val->i32 + } else if (sizeof(void*) == 8) { +|.if X64 + if (val->i64 == 0) { + | mov qword [rsp+stack_offset], val->i32 + } else { + IR_ASSERT(tmp_reg != IR_REG_NONE); + | mov64 Rq(tmp_reg), val->i64 + | mov qword [rsp+stack_offset], Ra(tmp_reg) + } +|.endif + } else { + ir_emit_load(ctx, type, tmp_fp_reg, arg); + ir_emit_store_mem_fp(ctx, IR_DOUBLE, IR_REG_STACK_POINTER, stack_offset, tmp_fp_reg); + } + } else if (src_reg == IR_REG_NONE) { + IR_ASSERT(tmp_fp_reg != IR_REG_NONE); + ir_emit_load(ctx, type, tmp_fp_reg, arg); + ir_emit_store_mem_fp(ctx, IR_DOUBLE, IR_REG_STACK_POINTER, stack_offset, tmp_fp_reg); + } else if (IR_REG_SPILLED(src_reg)) { + src_reg = IR_REG_NUM(src_reg); + ir_emit_load(ctx, type, src_reg, arg); + ir_emit_store_mem_fp(ctx, type, IR_REG_STACK_POINTER, stack_offset, src_reg); + } + } + stack_offset += IR_MAX(sizeof(void*), ir_type_size[type]); + } + } + } + +#ifdef _WIN64 + /* WIN64 calling convention requires duplcation of parameters passed in FP register into GP ones */ + if (ir_is_vararg(ctx, insn)) { + n = IR_MIN(n, IR_MAX_REG_ARGS + 2); + for (j = 3; j <= n; j++) { + arg = ir_insn_op(insn, j); + arg_insn = &ctx->ir_base[arg]; + type = arg_insn->type; + if (IR_IS_TYPE_FP(type)) { + src_reg = fp_reg_params[j-3]; + dst_reg = int_reg_params[j-3]; +|.if X64 + if (ctx->mflags & IR_X86_AVX) { + | vmovd Rq(dst_reg), xmm(src_reg-IR_REG_FP_FIRST) + } else { + | movd Rq(dst_reg), xmm(src_reg-IR_REG_FP_FIRST) + } +|.endif + } + } + } +#endif +#ifdef IR_REG_VARARG_FP_REGS + /* set hidden argument to specify the number of vector registers used */ + if (ir_is_vararg(ctx, insn)) { + fp_param = IR_MIN(fp_param, fp_reg_params_count); + | mov Rd(IR_REG_VARARG_FP_REGS), fp_param + } +#endif + + return used_stack; +} + +static void ir_emit_call(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_reg def_reg; + int32_t used_stack = ir_emit_arguments(ctx, def, insn, ctx->regs[def][1]); + + if (IR_IS_CONST_REF(insn->op2)) { + ir_insn *addr_insn = &ctx->ir_base[insn->op2]; + void *addr; + + IR_ASSERT(addr_insn->type == IR_ADDR); + if (addr_insn->op == IR_FUNC) { + addr = (ctx->loader && ctx->loader->resolve_sym_name) ? + ctx->loader->resolve_sym_name(ctx->loader, ir_get_str(ctx, addr_insn->val.i32)) : + ir_resolve_sym_name(ir_get_str(ctx, addr_insn->val.i32)); + } else { + IR_ASSERT(addr_insn->op == IR_ADDR || addr_insn->op == IR_FUNC_ADDR); + addr = (void*)addr_insn->val.addr; + } + if (sizeof(void*) == 4 || IR_MAY_USE_32BIT_ADDR(addr)) { + | call aword &addr + } else { +|.if X64 +|| ir_reg tmp_reg = IR_REG_RAX; + +#ifdef IR_REG_VARARG_FP_REGS +|| if (ir_is_vararg(ctx, insn)) { +|| tmp_reg = IR_REG_R11; +|| } +#endif +|| if (IR_IS_SIGNED_32BIT(addr)) { + | mov Rq(tmp_reg), ((ptrdiff_t)addr) // 0x48 0xc7 0xc0 +|| } else { + | mov64 Rq(tmp_reg), ((ptrdiff_t)addr) // 0x48 0xb8 +|| } + | call Rq(tmp_reg) +|.endif + } + } else { + ir_reg op2_reg = ctx->regs[def][2]; + + if (op2_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + } + | call Ra(op2_reg) + } else { + int32_t offset; + + if (ir_rule(ctx, insn->op2) & IR_FUSED) { + offset = ir_fuse_load(ctx, insn->op2, &op2_reg); + } else { + offset = ir_ref_spill_slot(ctx, insn->op2, &op2_reg); + } + + if (op2_reg != IR_REG_NONE) { + | call aword [Ra(op2_reg)+offset] + } else { + | call aword [offset] + } + } + } + + if (used_stack) { + int32_t aligned_stack = IR_ALIGNED_SIZE(used_stack, 16); + + ctx->call_stack_size -= aligned_stack; + if (ir_is_fastcall(ctx, insn)) { + aligned_stack -= used_stack; + if (aligned_stack) { + | add Ra(IR_REG_RSP), aligned_stack + } + } else { + | add Ra(IR_REG_RSP), aligned_stack + } + } + + if (insn->type != IR_VOID) { + if (IR_IS_TYPE_INT(insn->type)) { + def_reg = IR_REG_NUM(ctx->regs[def][0]); + if (def_reg != IR_REG_NONE) { + if (def_reg != IR_REG_INT_RET1) { + ir_emit_mov(ctx, insn->type, def_reg, IR_REG_INT_RET1); + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, insn->type, def, def_reg); + } + } else if (ctx->use_lists[def].count > 1) { + ir_emit_store(ctx, insn->type, def, IR_REG_INT_RET1); + } + } else { + IR_ASSERT(IR_IS_TYPE_FP(insn->type)); + def_reg = IR_REG_NUM(ctx->regs[def][0]); +#ifdef IR_REG_FP_RET1 + if (def_reg != IR_REG_NONE) { + if (def_reg != IR_REG_FP_RET1) { + ir_emit_fp_mov(ctx, insn->type, def_reg, IR_REG_FP_RET1); + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, insn->type, def, def_reg); + } + } else if (ctx->use_lists[def].count > 1) { + ir_emit_store(ctx, insn->type, def, IR_REG_FP_RET1); + } +#else + IR_ASSERT(0); // TODO: float/double return value +#endif + } + } +} + +static void ir_emit_tailcall(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + int32_t used_stack = ir_emit_arguments(ctx, def, insn, ctx->regs[def][1]); + (void) used_stack; + + if (used_stack != 0) { + ir_emit_call(ctx, def, insn); + ir_emit_return_void(ctx); + return; + } + + ir_emit_epilogue(ctx); + + if (IR_IS_CONST_REF(insn->op2)) { + ir_insn *addr_insn = &ctx->ir_base[insn->op2]; + void *addr; + + IR_ASSERT(addr_insn->type == IR_ADDR); + if (addr_insn->op == IR_FUNC) { + addr = (ctx->loader && ctx->loader->resolve_sym_name) ? + ctx->loader->resolve_sym_name(ctx->loader, ir_get_str(ctx, addr_insn->val.i32)) : + ir_resolve_sym_name(ir_get_str(ctx, addr_insn->val.i32)); + } else { + IR_ASSERT(addr_insn->op == IR_ADDR || addr_insn->op == IR_FUNC_ADDR); + addr = (void*)addr_insn->val.addr; + } + if (sizeof(void*) == 4 || IR_MAY_USE_32BIT_ADDR(addr)) { + | jmp aword &addr + } else { +|.if X64 +|| ir_reg tmp_reg = IR_REG_RAX; + +#ifdef IR_REG_VARARG_FP_REGS +|| if (ir_is_vararg(ctx, insn)) { +|| tmp_reg = IR_REG_R11; +|| } +#endif +|| if (IR_IS_SIGNED_32BIT(addr)) { + | mov Rq(tmp_reg), ((ptrdiff_t)addr) // 0x48 0xc7 0xc0 +|| } else { + | mov64 Rq(tmp_reg), ((ptrdiff_t)addr) // 0x48 0xb8 +|| } + | jmp Rq(tmp_reg) +|.endif + } + } else { + ir_reg op2_reg = ctx->regs[def][2]; + + if (op2_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + } + | jmp Ra(op2_reg) + } else { + int32_t offset; + + if (ir_rule(ctx, insn->op2) & IR_FUSED) { + offset = ir_fuse_load(ctx, insn->op2, &op2_reg); + } else { + offset = ir_ref_spill_slot(ctx, insn->op2, &op2_reg); + } + + if (op2_reg != IR_REG_NONE) { + | jmp aword [Ra(op2_reg)+offset] + } else { + | jmp aword [offset] + } + } + } +} + +static void ir_emit_ijmp(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_reg op2_reg = ctx->regs[def][2]; + + if (IR_IS_CONST_REF(insn->op2)) { + void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op2]); + + if (sizeof(void*) == 4 || IR_MAY_USE_32BIT_ADDR(addr)) { + | jmp aword &addr + } else { +|.if X64 + if (IR_IS_SIGNED_32BIT(addr)) { + | mov rax, ((ptrdiff_t)addr) // 0x48 0xc7 0xc0 + } else { + | mov64 rax, ((ptrdiff_t)addr) // 0x48 0xb8 + } + | jmp rax +|.endif + } + } else if (ir_rule(ctx, insn->op2) & IR_FUSED) { + int32_t offset; + + offset = ir_fuse_load(ctx, insn->op2, &op2_reg); + if (op2_reg == IR_REG_NONE) { + | jmp aword [offset] + } else { + | jmp aword [Ra(op2_reg)+offset] + } + } else if (op2_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + } + | jmp Ra(op2_reg) + } else { + ir_reg fp; + int32_t offset = ir_ref_spill_slot(ctx, insn->op2, &fp); + + | jmp aword [Ra(fp)+offset] + } +} + +static bool ir_emit_guard_jcc(ir_ctx *ctx, uint32_t b, ir_ref def, uint8_t op, void *addr, bool int_cmp) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_insn *next_insn = &ctx->ir_base[def + 1]; + + if (next_insn->op == IR_END || next_insn->op == IR_LOOP_END) { + ir_block *bb = &ctx->cfg_blocks[b]; + uint32_t target; + + if (!(bb->flags & IR_BB_DESSA_MOVES)) { + target = ctx->cfg_edges[bb->successors]; + if (UNEXPECTED(bb->successors_count == 2)) { + if (ctx->cfg_blocks[target].flags & IR_BB_ENTRY) { + target = ctx->cfg_edges[bb->successors + 1]; + } else { + IR_ASSERT(ctx->cfg_blocks[ctx->cfg_edges[bb->successors + 1]].flags & IR_BB_ENTRY); + } + } else { + IR_ASSERT(bb->successors_count == 1); + } + target = ir_skip_empty_target_blocks(ctx, target); + if (b == ctx->cfg_blocks_count || target != ir_skip_empty_next_blocks(ctx, b + 1)) { + if (int_cmp) { + switch (op) { + default: + IR_ASSERT(0 && "NIY binary op"); + case IR_EQ: + | jne =>target + break; + case IR_NE: + | je =>target + break; + case IR_LT: + | jge =>target + break; + case IR_GE: + | jl =>target + break; + case IR_LE: + | jg =>target + break; + case IR_GT: + | jle =>target + break; + case IR_ULT: + | jae =>target + break; + case IR_UGE: + | jb =>target + break; + case IR_ULE: + | ja =>target + break; + case IR_UGT: + | jbe =>target + break; + } + } else { + switch (op) { + default: + IR_ASSERT(0 && "NIY binary op"); + case IR_EQ: + | jne =>target + | jp =>target + break; + case IR_NE: + | jp &addr + | je =>target + break; + case IR_LT: + | jae =>target + break; + case IR_GE: + | jp &addr + | jb =>target + break; + case IR_LE: + | ja =>target + break; + case IR_GT: + | jp &addr + | jbe =>target + break; + } + } + | jmp &addr + return 1; + } + } + } else if (next_insn->op == IR_IJMP && IR_IS_CONST_REF(next_insn->op2)) { + void *target_addr = ir_jmp_addr(ctx, next_insn, &ctx->ir_base[next_insn->op2]); + + if (sizeof(void*) == 4 || IR_MAY_USE_32BIT_ADDR(target_addr)) { + if (int_cmp) { + switch (op) { + default: + IR_ASSERT(0 && "NIY binary op"); + case IR_EQ: + | jne &target_addr + break; + case IR_NE: + | je &target_addr + break; + case IR_LT: + | jge &target_addr + break; + case IR_GE: + | jl &target_addr + break; + case IR_LE: + | jg &target_addr + break; + case IR_GT: + | jle &target_addr + break; + case IR_ULT: + | jae &target_addr + break; + case IR_UGE: + | jb &target_addr + break; + case IR_ULE: + | ja &target_addr + break; + case IR_UGT: + | jbe &target_addr + break; + } + } else { + switch (op) { + default: + IR_ASSERT(0 && "NIY binary op"); + case IR_EQ: + | jne &target_addr + | jp &target_addr + break; + case IR_NE: + | jp &addr + | je &target_addr + break; + case IR_LT: + | jae &target_addr + break; + case IR_GE: + | jp &addr + | jb &target_addr + break; + case IR_LE: + | ja &target_addr + break; + case IR_GT: + | jp &addr + | jbe &target_addr + break; + } + } + | jmp &addr + return 1; + } + } + + if (int_cmp) { + switch (op) { + default: + IR_ASSERT(0 && "NIY binary op"); + case IR_EQ: + | je &addr + break; + case IR_NE: + | jne &addr + break; + case IR_LT: + | jl &addr + break; + case IR_GE: + | jge &addr + break; + case IR_LE: + | jle &addr + break; + case IR_GT: + | jg &addr + break; + case IR_ULT: + | jb &addr + break; + case IR_UGE: + | jae &addr + break; + case IR_ULE: + | jbe &addr + break; + case IR_UGT: + | ja &addr + break; + } + } else { + switch (op) { + default: + IR_ASSERT(0 && "NIY binary op"); + case IR_EQ: + | jp >1 + | je &addr + |1: + break; + case IR_NE: + | jne &addr + | jp &addr + break; + case IR_LT: + | jp >1 + | jb &addr + |1: + break; + case IR_GE: + | jae &addr + break; + case IR_LE: + | jp >1 + | jbe &addr + |1: + break; + case IR_GT: + | ja &addr + break; +// case IR_ULT: fprintf(stderr, "\tjb .LL%d\n", true_block); break; +// case IR_UGE: fprintf(stderr, "\tjae .LL%d\n", true_block); break; +// case IR_ULE: fprintf(stderr, "\tjbe .LL%d\n", true_block); break; +// case IR_UGT: fprintf(stderr, "\tja .LL%d\n", true_block); break; + } + } + return 0; +} + +static bool ir_emit_guard(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_reg op2_reg = ctx->regs[def][2]; + ir_type type = ctx->ir_base[insn->op2].type; + void *addr; + + IR_ASSERT(IR_IS_TYPE_INT(type)); + if (IR_IS_CONST_REF(insn->op2)) { + bool is_true = ir_ref_is_true(ctx, insn->op2); + + if ((insn->op == IR_GUARD && !is_true) || (insn->op == IR_GUARD_NOT && is_true)) { + addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]); + if (sizeof(void*) == 4 || IR_MAY_USE_32BIT_ADDR(addr)) { + | jmp aword &addr + } else { +|.if X64 + if (IR_IS_SIGNED_32BIT(addr)) { + | mov rax, ((ptrdiff_t)addr) // 0x48 0xc7 0xc0 + } else { + | mov64 rax, ((ptrdiff_t)addr) // 0x48 0xb8 + } + | jmp aword [rax] +|.endif + } + } + return 0; + } + + if (op2_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, type, op2_reg, insn->op2); + } + | ASM_REG_REG_OP test, type, op2_reg, op2_reg + } else { + int32_t offset = 0; + + if (ir_rule(ctx, insn->op2) & IR_FUSED) { + offset = ir_fuse_load(ctx, insn->op2, &op2_reg); + } else { + offset = ir_ref_spill_slot(ctx, insn->op2, &op2_reg); + } + if (op2_reg == IR_REG_NONE) { + | ASM_MEM_IMM_OP cmp, type, [offset], 0 + } else { + | ASM_MEM_IMM_OP cmp, type, [Ra(op2_reg)+offset], 0 + } + } + + addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]); + if (sizeof(void*) == 4 || IR_MAY_USE_32BIT_ADDR(addr)) { + ir_op op; + + if (insn->op == IR_GUARD) { + op = IR_EQ; + } else { + op = IR_NE; + } + return ir_emit_guard_jcc(ctx, b, def, op, addr, 1); + } else { +|.if X64 + if (insn->op == IR_GUARD) { + | je >1 + } else { + | jne >1 + } + |.cold_code + |1: + if (IR_IS_SIGNED_32BIT(addr)) { + | mov rax, ((ptrdiff_t)addr) // 0x48 0xc7 0xc0 + } else { + | mov64 rax, ((ptrdiff_t)addr) // 0x48 0xb8 + } + | jmp aword [rax] + |.code +|.endif + return 0; + } +} + +static bool ir_emit_guard_cmp_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_insn *cmp_insn = &ctx->ir_base[insn->op2]; + ir_op op = cmp_insn->op; + ir_type type = ctx->ir_base[cmp_insn->op1].type; + ir_ref op1 = cmp_insn->op1; + ir_ref op2 = cmp_insn->op2; + ir_reg op1_reg = ctx->regs[insn->op2][1]; + ir_reg op2_reg = ctx->regs[insn->op2][2]; + void *addr; + + if (op1_reg != IR_REG_NONE && (IR_IS_CONST_REF(op1) || IR_REG_SPILLED(op1_reg))) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, type, op1_reg, op1); + } + if (op2_reg != IR_REG_NONE && (IR_IS_CONST_REF(op2) || IR_REG_SPILLED(op2_reg))) { + op2_reg = IR_REG_NUM(op2_reg); + if (op1 != op2) { + ir_emit_load(ctx, type, op2_reg, op2); + } + } + + addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]); + if (IR_IS_CONST_REF(op2) && ctx->ir_base[op2].val.u64 == 0) { + if (op == IR_ULT) { + /* always false */ + if (sizeof(void*) == 4 || IR_MAY_USE_32BIT_ADDR(addr)) { + | jmp aword &addr + } else { +|.if X64 + if (IR_IS_SIGNED_32BIT(addr)) { + | mov rax, ((ptrdiff_t)addr) // 0x48 0xc7 0xc0 + } else { + | mov64 rax, ((ptrdiff_t)addr) // 0x48 0xb8 + } + | jmp aword [rax] +|.endif + } + return 0; + } else if (op == IR_UGE) { + /* always true */ + return 0; + } else if (op == IR_ULE) { + op = IR_EQ; + } else if (op == IR_UGT) { + op = IR_NE; + } + } + ir_emit_cmp_int_common(ctx, type, cmp_insn, op1_reg, op1, op2_reg, op2); + + if (insn->op == IR_GUARD) { + op ^= 1; // reverse + } + + return ir_emit_guard_jcc(ctx, b, def, op, addr, 1); +} + +static bool ir_emit_guard_cmp_fp(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) +{ + ir_op op = ir_emit_cmp_fp_common(ctx, insn->op2, &ctx->ir_base[insn->op2]); + void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]); + + if (insn->op == IR_GUARD) { + op ^= 1; // reverse + } + return ir_emit_guard_jcc(ctx, b, def, op, addr, 0); +} + +static bool ir_emit_guard_test_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) +{ + void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]); + ir_op op = (insn->op == IR_GUARD) ? IR_EQ : IR_NE; + + ir_emit_test_int_common(ctx, insn->op2, op); + return ir_emit_guard_jcc(ctx, b, def, op, addr, 1); +} + +static bool ir_emit_guard_jcc_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) +{ + void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]); + ir_op op = ctx->ir_base[insn->op2].op; + + if (insn->op == IR_GUARD) { + op ^= 1; // reverse + } + return ir_emit_guard_jcc(ctx, b, def, op, addr, 1); +} + +static bool ir_emit_guard_overflow(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_type type; + void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]); + + type = ctx->ir_base[ctx->ir_base[insn->op2].op1].type; + + IR_ASSERT(IR_IS_TYPE_INT(type)); + if (IR_IS_TYPE_SIGNED(type)) { + if (insn->op == IR_GUARD) { + | jno &addr + } else { + | jo &addr + } + } else { + if (insn->op == IR_GUARD) { + | jnc &addr + } else { + | jc &addr + } + } + return 0; +} + +static void ir_emit_lea(ir_ctx *ctx, ir_ref def, ir_type type, ir_reg base_reg, ir_reg index_reg, uint8_t scale, int32_t offset) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + + IR_ASSERT(def_reg != IR_REG_NONE); + if (index_reg == IR_REG_NONE) { + IR_ASSERT(base_reg != IR_REG_NONE); + if (!offset) { + if (ir_type_size[type] == 4) { + | lea Rd(def_reg), dword [Rd(base_reg)] + } else { + | lea Ra(def_reg), aword [Ra(base_reg)] + } + } else { + if (ir_type_size[type] == 4) { + | lea Rd(def_reg), dword [Rd(base_reg)+offset] + } else { + | lea Ra(def_reg), aword [Ra(base_reg)+offset] + } + } + } else { + if (base_reg == IR_REG_NONE) { + if (!offset) { + switch (scale) { + default: + IR_ASSERT(0); + case 2: + if (ir_type_size[type] == 4) { + | lea Rd(def_reg), dword [Rd(index_reg)*2] + } else { + | lea Ra(def_reg), aword [Ra(index_reg)*2] + } + break; + case 4: + if (ir_type_size[type] == 4) { + | lea Rd(def_reg), dword [Rd(index_reg)*4] + } else { + | lea Ra(def_reg), aword [Ra(index_reg)*4] + } + break; + case 8: + if (ir_type_size[type] == 4) { + | lea Rd(def_reg), dword [Rd(index_reg)*8] + } else { + | lea Ra(def_reg), aword [Ra(index_reg)*8] + } + break; + } + } else { + switch (scale) { + default: + IR_ASSERT(0); + case 2: + if (ir_type_size[type] == 4) { + | lea Rd(def_reg), dword [Rd(index_reg)*2+offset] + } else { + | lea Ra(def_reg), aword [Ra(index_reg)*2+offset] + } + break; + case 4: + if (ir_type_size[type] == 4) { + | lea Rd(def_reg), dword [Rd(index_reg)*4+offset] + } else { + | lea Ra(def_reg), aword [Ra(index_reg)*4+offset] + } + break; + case 8: + if (ir_type_size[type] == 4) { + | lea Rd(def_reg), dword [Rd(index_reg)*8+offset] + } else { + | lea Ra(def_reg), aword [Ra(index_reg)*8+offset] + } + break; + } + } + } else { + if (!offset) { + switch (scale) { + default: + IR_ASSERT(0); + case 1: + if (ir_type_size[type] == sizeof(void*)) { + if (def_reg == base_reg) { + | add Ra(def_reg), Ra(index_reg) + } else if (def_reg == index_reg) { + | add Ra(def_reg), Ra(base_reg) + } else { + | lea Ra(def_reg), aword [Ra(base_reg)+Ra(index_reg)] + } + } else { + IR_ASSERT(sizeof(void*) == 8 && ir_type_size[type] == 4); + if (def_reg == base_reg) { + | add Rd(def_reg), Rd(index_reg) + } else if (def_reg == index_reg) { + | add Rd(def_reg), Rd(base_reg) + } else { + | lea Rd(def_reg), dword [Rd(base_reg)+Rd(index_reg)] + } + } + break; + case 2: + if (ir_type_size[type] == 4) { + | lea Rd(def_reg), dword [Rd(base_reg)+Rd(index_reg)*2] + } else { + | lea Ra(def_reg), aword [Ra(base_reg)+Ra(index_reg)*2] + } + break; + case 4: + if (ir_type_size[type] == 4) { + | lea Rd(def_reg), dword [Rd(base_reg)+Rd(index_reg)*4] + } else { + | lea Ra(def_reg), aword [Ra(base_reg)+Ra(index_reg)*4] + } + break; + case 8: + if (ir_type_size[type] == 4) { + | lea Rd(def_reg), dword [Rd(base_reg)+Rd(index_reg)*8] + } else { + | lea Ra(def_reg), aword [Ra(base_reg)+Ra(index_reg)*8] + } + break; + } + } else { + switch (scale) { + default: + IR_ASSERT(0); + case 1: + if (ir_type_size[type] == 4) { + | lea Rd(def_reg), dword [Rd(base_reg)+Rd(index_reg)+offset] + } else { + | lea Ra(def_reg), aword [Ra(base_reg)+Ra(index_reg)+offset] + } + break; + case 2: + if (ir_type_size[type] == 4) { + | lea Rd(def_reg), dword [Rd(base_reg)+Rd(index_reg)*2+offset] + } else { + | lea Ra(def_reg), aword [Ra(base_reg)+Ra(index_reg)*2+offset] + } + break; + case 4: + if (ir_type_size[type] == 4) { + | lea Rd(def_reg), dword [Rd(base_reg)+Rd(index_reg)*4+offset] + } else { + | lea Ra(def_reg), aword [Ra(base_reg)+Ra(index_reg)*4+offset] + } + break; + case 8: + if (ir_type_size[type] == 4) { + | lea Rd(def_reg), dword [Rd(base_reg)+Rd(index_reg)*8+offset] + } else { + | lea Ra(def_reg), aword [Ra(base_reg)+Ra(index_reg)*8+offset] + } + break; + } + } + } + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); + } +} + +static void ir_emit_tls(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_reg reg = IR_REG_NUM(ctx->regs[def][0]); + + if (ctx->use_lists[def].count == 1) { + /* dead load */ + return; + } + +|.if X64WIN +| gs +| mov Ra(reg), aword [0x58] +| mov Ra(reg), aword [Ra(reg)+insn->op2] +| mov Ra(reg), aword [Ra(reg)+insn->op3] +|.elif WIN +| fs +| mov Ra(reg), aword [0x2c] +| mov Ra(reg), aword [Ra(reg)+insn->op2] +| mov Ra(reg), aword [Ra(reg)+insn->op3] +|.elif X64APPLE +| gs +|| if (insn->op3 == IR_NULL) { +| mov Ra(reg), aword [insn->op2] +|| } else { +| mov Ra(reg), aword [insn->op2] +| mov Ra(reg), aword [Ra(reg)+insn->op3] +|| } +|.elif X64 +| fs +|| if (insn->op3 == IR_NULL) { +| mov Ra(reg), aword [insn->op2] +|| } else { +| mov Ra(reg), [0x8] +| mov Ra(reg), aword [Ra(reg)+insn->op2] +| mov Ra(reg), aword [Ra(reg)+insn->op3] +|| } +|.else +| gs +|| if (insn->op3 == IR_NULL) { +| mov Ra(reg), aword [insn->op2] +|| } else { +| mov Ra(reg), [0x4] +| mov Ra(reg), aword [Ra(reg)+insn->op2] +| mov Ra(reg), aword [Ra(reg)+insn->op3] +|| } +| .endif + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, IR_ADDR, def, reg); + } +} + +static void ir_emit_exitcall(ir_ctx *ctx, ir_ref def, ir_insn *insn) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + + IR_ASSERT(def_reg != IR_REG_NONE); + + |.if X64 + | sub rsp, 16*8+16*8+8 /* CPU regs + SSE regs */ + | mov aword [rsp+0*8], rax + | mov aword [rsp+1*8], rcx + | mov aword [rsp+2*8], rdx + | mov aword [rsp+3*8], rbx + | mov aword [rsp+5*8], rbp + | mov aword [rsp+6*8], rsi + | mov aword [rsp+7*8], rdi + | mov aword [rsp+8*8], r8 + | mov aword [rsp+9*8], r9 + | mov aword [rsp+10*8], r10 + | mov aword [rsp+11*8], r11 + | mov aword [rsp+12*8], r12 + | mov aword [rsp+13*8], r13 + | mov aword [rsp+14*8], r14 + | mov aword [rsp+15*8], r15 + | movsd qword [rsp+16*8+0*8], xmm0 + | movsd qword [rsp+16*8+1*8], xmm1 + | movsd qword [rsp+16*8+2*8], xmm2 + | movsd qword [rsp+16*8+3*8], xmm3 + | movsd qword [rsp+16*8+4*8], xmm4 + | movsd qword [rsp+16*8+5*8], xmm5 + | movsd qword [rsp+16*8+6*8], xmm6 + | movsd qword [rsp+16*8+7*8], xmm7 + | movsd qword [rsp+16*8+8*8], xmm8 + | movsd qword [rsp+16*8+9*8], xmm9 + | movsd qword [rsp+16*8+10*8], xmm10 + | movsd qword [rsp+16*8+11*8], xmm11 + | movsd qword [rsp+16*8+12*8], xmm12 + | movsd qword [rsp+16*8+13*8], xmm13 + | movsd qword [rsp+16*8+14*8], xmm14 + | movsd qword [rsp+16*8+15*8], xmm15 + | + | mov Ra(IR_REG_INT_ARG2), rsp + | lea Ra(IR_REG_INT_ARG1), [rsp+16*8+16*8+16] + | mov aword [rsp+4*8], Ra(IR_REG_INT_ARG1) + | mov Ra(IR_REG_INT_ARG1), [rsp+16*8+16*8+8] + |.if X64WIN + | sub rsp, 32 /* shadow space */ + |.endif + |.else + | sub esp, 8*4+8*8+12 /* CPU regs + SSE regs */ + | mov aword [esp+0*4], eax + | mov aword [esp+1*4], ecx + | mov aword [esp+2*4], edx + | mov aword [esp+3*4], ebx + | mov aword [esp+5*4], ebp + | mov aword [esp+6*4], esi + | mov aword [esp+7*4], edi + | movsd qword [esp+8*4+0*8], xmm0 + | movsd qword [esp+8*4+1*8], xmm1 + | movsd qword [esp+8*4+2*8], xmm2 + | movsd qword [esp+8*4+3*8], xmm3 + | movsd qword [esp+8*4+4*8], xmm4 + | movsd qword [esp+8*4+5*8], xmm5 + | movsd qword [esp+8*4+6*8], xmm6 + | movsd qword [esp+8*4+7*8], xmm7 + | + | mov Ra(IR_REG_INT_FCARG2), esp + | lea Ra(IR_REG_INT_FCARG1), [esp+8*4+8*8+16] + | mov aword [esp+4*4], Ra(IR_REG_INT_FCARG1) + | mov Ra(IR_REG_INT_FCARG1), [esp+8*4+8*8+12] + |.endif + + if (IR_IS_CONST_REF(insn->op2)) { + void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op2]); + + if (sizeof(void*) == 4 || IR_MAY_USE_32BIT_ADDR(addr)) { + | call aword &addr + } else { +|.if X64 + if (IR_IS_SIGNED_32BIT(addr)) { + | mov rax, ((ptrdiff_t)addr) // 0x48 0xc7 0xc0 + } else { + | mov64 rax, ((ptrdiff_t)addr) // 0x48 0xb8 + } + | call rax +|.endif + } + } else { + IR_ASSERT(0); + } + + // restore SP + |.if X64WIN + | add rsp, 32+16*8+16*8+16 /* shadow space + CPU regs + SSE regs */ + |.elif X64 + | add rsp, 16*8+16*8+16 /* CPU regs + SSE regs */ + |.else + | add esp, 8*4+8*8+16 /* CPU regs + SSE regs */ + |.endif + + if (def_reg != IR_REG_INT_RET1) { + ir_emit_mov(ctx, insn->type, def_reg, IR_REG_INT_RET1); + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, insn->type, def, def_reg); + } +} + +static void ir_emit_param_move(ir_ctx *ctx, uint8_t type, ir_reg from_reg, ir_reg to_reg, ir_ref to, int32_t offset) +{ + ir_reg fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + + IR_ASSERT(from_reg != IR_REG_NONE || to_reg != IR_REG_NONE); + + if (IR_IS_TYPE_INT(type)) { + if (from_reg != IR_REG_NONE) { + if (to_reg != IR_REG_NONE) { + ir_emit_mov(ctx, type, to_reg, from_reg); + } else { + ir_emit_store(ctx, type, to, from_reg); + } + } else { + ir_emit_load_mem_int(ctx, type, to_reg, fp, offset); + } + } else { + if (from_reg != IR_REG_NONE) { + if (to_reg != IR_REG_NONE) { + ir_emit_fp_mov(ctx, type, to_reg, from_reg); + } else { + ir_emit_store(ctx, type, to, from_reg); + } + } else { + ir_emit_load_mem_fp(ctx, type, to_reg, fp, offset); + } + } +} + +static void ir_emit_load_params(ir_ctx *ctx) +{ + ir_use_list *use_list = &ctx->use_lists[1]; + ir_insn *insn; + ir_ref i, n, *p, use; + int int_param_num = 0; + int fp_param_num = 0; + ir_reg src_reg; + ir_reg dst_reg; + // TODO: Calling convention specific + int int_reg_params_count = IR_REG_INT_ARGS; + int fp_reg_params_count = IR_REG_FP_ARGS; + const int8_t *int_reg_params = _ir_int_reg_params; + const int8_t *fp_reg_params = _ir_fp_reg_params; + int32_t stack_offset = 0; + +#ifdef IR_TARGET_X86 + if (sizeof(void*) == 4 && (ctx->flags & IR_FASTCALL_FUNC)) { + int_reg_params_count = IR_REG_INT_FCARGS; + fp_reg_params_count = IR_REG_FP_FCARGS; + int_reg_params = _ir_int_fc_reg_params; + fp_reg_params = _ir_fp_fc_reg_params; + } +#endif + + if (ctx->flags & IR_USE_FRAME_POINTER) { + stack_offset = sizeof(void*) * 2; /* skip old frame pointer and return address */ + } else { + stack_offset = sizeof(void*) + ctx->stack_frame_size + ctx->call_stack_size; /* skip return address */ + } + n = use_list->count; + for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) { + use = *p; + insn = &ctx->ir_base[use]; + if (insn->op == IR_PARAM) { + if (IR_IS_TYPE_INT(insn->type)) { + if (int_param_num < int_reg_params_count) { + src_reg = int_reg_params[int_param_num]; + } else { + src_reg = IR_REG_NONE; + } + int_param_num++; +#ifdef _WIN64 + /* WIN64 calling convention use common couter for int and fp registers */ + fp_param_num++; +#endif + } else { + if (fp_param_num < fp_reg_params_count) { + src_reg = fp_reg_params[fp_param_num]; + } else { + src_reg = IR_REG_NONE; + } + fp_param_num++; +#ifdef _WIN64 + /* WIN64 calling convention use common couter for int and fp registers */ + int_param_num++; +#endif + } + if (ctx->vregs[use]) { + dst_reg = IR_REG_NUM(ctx->regs[use][0]); + IR_ASSERT(src_reg != IR_REG_NONE || dst_reg != IR_REG_NONE || + stack_offset == ctx->live_intervals[ctx->vregs[use]]->stack_spill_pos + + ((ctx->flags & IR_USE_FRAME_POINTER) ? -ctx->stack_frame_size : ctx->call_stack_size)); + if (src_reg != dst_reg) { + ir_emit_param_move(ctx, insn->type, src_reg, dst_reg, use, stack_offset); + } + if (dst_reg != IR_REG_NONE && IR_REG_SPILLED(ctx->regs[use][0])) { + ir_emit_store(ctx, insn->type, use, dst_reg); + } + } + if (src_reg == IR_REG_NONE) { + if (sizeof(void*) == 8) { + stack_offset += sizeof(void*); + } else { + stack_offset += IR_MAX(sizeof(void*), ir_type_size[insn->type]); + } + } + } + } +} + +static ir_reg ir_get_free_reg(ir_type type, ir_regset available) +{ + if (IR_IS_TYPE_INT(type)) { + available = IR_REGSET_INTERSECTION(available, IR_REGSET_GP); + } else { + IR_ASSERT(IR_IS_TYPE_FP(type)); + available = IR_REGSET_INTERSECTION(available, IR_REGSET_FP); + } + IR_ASSERT(!IR_REGSET_IS_EMPTY(available)); + return IR_REGSET_FIRST(available); +} + +static int ir_fix_dessa_tmps(ir_ctx *ctx, uint8_t type, ir_ref from, ir_ref to) +{ + ir_backend_data *data = ctx->data; + ir_ref ref = ctx->cfg_blocks[data->dessa_from_block].end; + + if (to == 0) { + if (IR_IS_TYPE_INT(type)) { + if (ctx->regs[ref][0] == IR_REG_NONE) { + ctx->regs[ref][0] = IR_REG_RAX; + } + } else { + IR_ASSERT(IR_IS_TYPE_FP(type)); + if (ctx->regs[ref][1] == IR_REG_NONE) { + ctx->regs[ref][1] = IR_REG_XMM0; + } + } + } else if (from != 0) { + if (IR_IS_TYPE_INT(type)) { + if (ctx->regs[ref][0] == IR_REG_NONE) { + ctx->regs[ref][0] = IR_REG_RAX; + } + } else { + IR_ASSERT(IR_IS_TYPE_FP(type)); + if (ctx->regs[ref][1] == IR_REG_NONE) { + ctx->regs[ref][1] = IR_REG_XMM0; + } + } + } + return 1; +} + +static void ir_fix_param_spills(ir_ctx *ctx) +{ + ir_use_list *use_list = &ctx->use_lists[1]; + ir_insn *insn; + ir_ref i, n, *p, use; + int int_param_num = 0; + int fp_param_num = 0; + ir_reg src_reg; + // TODO: Calling convention specific + int int_reg_params_count = IR_REG_INT_ARGS; + int fp_reg_params_count = IR_REG_FP_ARGS; + const int8_t *int_reg_params = _ir_int_reg_params; + const int8_t *fp_reg_params = _ir_fp_reg_params; + int32_t stack_start = 0; + int32_t stack_offset = 0; + +#ifdef IR_TARGET_X86 + if (sizeof(void*) == 4 && (ctx->flags & IR_FASTCALL_FUNC)) { + int_reg_params_count = IR_REG_INT_FCARGS; + fp_reg_params_count = IR_REG_FP_FCARGS; + int_reg_params = _ir_int_fc_reg_params; + fp_reg_params = _ir_fp_fc_reg_params; + } +#endif + + if (ctx->flags & IR_USE_FRAME_POINTER) { + /* skip old frame pointer and return address */ + stack_start = sizeof(void*) * 2 + (ctx->stack_frame_size - ctx->stack_frame_alignment); + } else { + /* skip return address */ + stack_start = sizeof(void*) + ctx->stack_frame_size; + } + n = use_list->count; + for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) { + use = *p; + insn = &ctx->ir_base[use]; + if (insn->op == IR_PARAM) { + if (IR_IS_TYPE_INT(insn->type)) { + if (int_param_num < int_reg_params_count) { + src_reg = int_reg_params[int_param_num]; + } else { + src_reg = IR_REG_NONE; + } + int_param_num++; +#ifdef _WIN64 + /* WIN64 calling convention use common couter for int and fp registers */ + fp_param_num++; +#endif + } else { + if (fp_param_num < fp_reg_params_count) { + src_reg = fp_reg_params[fp_param_num]; + } else { + src_reg = IR_REG_NONE; + } + fp_param_num++; +#ifdef _WIN64 + /* WIN64 calling convention use common couter for int and fp registers */ + int_param_num++; +#endif + } + if (src_reg == IR_REG_NONE) { + if (ctx->vregs[use]) { + ir_live_interval *ival = ctx->live_intervals[ctx->vregs[use]]; + if ((ival->flags & IR_LIVE_INTERVAL_MEM_PARAM) + && ival->stack_spill_pos == -1 + && (ival->next || ival->reg == IR_REG_NONE)) { + ival->stack_spill_pos = stack_start + stack_offset; + ctx->regs[use][0] = IR_REG_NONE; + } + } + if (sizeof(void*) == 8) { + stack_offset += sizeof(void*); + } else { + stack_offset += IR_MAX(sizeof(void*), ir_type_size[insn->type]); + } + } + } + } + +#ifdef IR_TARGET_X86 + if (sizeof(void*) == 4 && (ctx->flags & IR_FASTCALL_FUNC)) { + ctx->param_stack_size = stack_offset; + } +#endif +} + +static void ir_allocate_unique_spill_slots(ir_ctx *ctx) +{ + uint32_t b; + ir_block *bb; + ir_insn *insn; + ir_ref i, n, j, *p; + uint32_t *rule, insn_flags; + ir_backend_data *data = ctx->data; + ir_regset available = 0; + ir_target_constraints constraints; + uint32_t def_flags; + ir_reg reg; + +#ifndef IR_REG_FP_RET1 + if (ctx->ret_type == IR_FLOAT || ctx->ret_type == IR_DOUBLE) { + ctx->ret_slot = ir_allocate_spill_slot(ctx, ctx->ret_type, &data->ra_data); + } else { + ctx->ret_slot = -1; + } +#endif + + ctx->regs = ir_mem_malloc(sizeof(ir_regs) * ctx->insns_count); + memset(ctx->regs, IR_REG_NONE, sizeof(ir_regs) * ctx->insns_count); + + /* vregs + tmp + fixed + SRATCH + ALL */ + ctx->live_intervals = ir_mem_calloc(ctx->vregs_count + 1 + IR_REG_NUM + 2, sizeof(ir_live_interval*)); + + if (!ctx->arena) { + ctx->arena = ir_arena_create(16 * 1024); + } + + for (b = 1, bb = ctx->cfg_blocks + b; b <= ctx->cfg_blocks_count; b++, bb++) { + IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); + for (i = bb->start, insn = ctx->ir_base + i, rule = ctx->rules + i; i <= bb->end;) { + switch (ctx->rules ? *rule : insn->op) { + case IR_START: + case IR_BEGIN: + case IR_END: + case IR_IF_TRUE: + case IR_IF_FALSE: + case IR_CASE_VAL: + case IR_CASE_DEFAULT: + case IR_MERGE: + case IR_LOOP_BEGIN: + case IR_LOOP_END: + break; + default: + def_flags = ir_get_target_constraints(ctx, i, &constraints); + if (ctx->rules + && *rule != IR_CMP_AND_BRANCH_INT + && *rule != IR_CMP_AND_BRANCH_FP + && *rule != IR_TEST_AND_BRANCH_INT + && *rule != IR_GUARD_CMP_INT + && *rule != IR_GUARD_CMP_FP) { + available = IR_REGSET_SCRATCH; + } + if (ctx->vregs[i]) { + reg = constraints.def_reg; + if (reg != IR_REG_NONE && IR_REGSET_IN(available, reg)) { + IR_REGSET_EXCL(available, reg); + ctx->regs[i][0] = reg | IR_REG_SPILL_STORE; + } else if (def_flags & IR_USE_MUST_BE_IN_REG) { + if (insn->op == IR_VLOAD + && ctx->live_intervals[ctx->vregs[i]] + && ctx->live_intervals[ctx->vregs[i]]->stack_spill_pos != -1) { + /* pass */ + } else if (insn->op != IR_PARAM) { + reg = ir_get_free_reg(insn->type, available); + IR_REGSET_EXCL(available, reg); + ctx->regs[i][0] = reg | IR_REG_SPILL_STORE; + } + } + if (!ctx->live_intervals[ctx->vregs[i]]) { + ir_live_interval *ival = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval)); + memset(ival, 0, sizeof(ir_live_interval)); + ctx->live_intervals[ctx->vregs[i]] = ival; + ival->type = insn->type; + ival->reg = IR_REG_NONE; + ival->vreg = ctx->vregs[i]; + ival->stack_spill_pos = -1; + if (insn->op == IR_PARAM && reg == IR_REG_NONE) { + ival->flags |= IR_LIVE_INTERVAL_MEM_PARAM; + } else { + ival->stack_spill_pos = ir_allocate_spill_slot(ctx, ival->type, &data->ra_data); + } + } else if (insn->op == IR_PARAM) { + IR_ASSERT(0 && "unexpected PARAM"); + return; + } + } else if (insn->op == IR_VAR) { + ir_use_list *use_list = &ctx->use_lists[i]; + ir_ref n = use_list->count; + + if (n > 0) { + int32_t stack_spill_pos = insn->op3 = ir_allocate_spill_slot(ctx, insn->type, &data->ra_data); + ir_ref i, *p, use; + ir_insn *use_insn; + + for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) { + use = *p; + use_insn = &ctx->ir_base[use]; + if (use_insn->op == IR_VLOAD) { + if (ctx->vregs[use] + && !ctx->live_intervals[ctx->vregs[use]]) { + ir_live_interval *ival = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval)); + memset(ival, 0, sizeof(ir_live_interval)); + ctx->live_intervals[ctx->vregs[use]] = ival; + ival->type = insn->type; + ival->reg = IR_REG_NONE; + ival->vreg = ctx->vregs[use]; + ival->stack_spill_pos = stack_spill_pos; + } + } else if (use_insn->op == IR_VSTORE) { + if (!IR_IS_CONST_REF(use_insn->op3) + && ctx->vregs[use_insn->op3] + && !ctx->live_intervals[ctx->vregs[use_insn->op3]]) { + ir_live_interval *ival = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval)); + memset(ival, 0, sizeof(ir_live_interval)); + ctx->live_intervals[ctx->vregs[use_insn->op3]] = ival; + ival->type = insn->type; + ival->reg = IR_REG_NONE; + ival->vreg = ctx->vregs[use_insn->op3]; + ival->stack_spill_pos = stack_spill_pos; + } + } + } + } + } + + insn_flags = ir_op_flags[insn->op]; + n = constraints.tmps_count; + if (n) { + do { + n--; + if (constraints.tmp_regs[n].type) { + ir_reg reg = ir_get_free_reg(constraints.tmp_regs[n].type, available); + IR_REGSET_EXCL(available, reg); + ctx->regs[i][constraints.tmp_regs[n].num] = reg; + } else if (constraints.tmp_regs[n].reg == IR_REG_SCRATCH) { + available = IR_REGSET_DIFFERENCE(available, IR_REGSET_SCRATCH); + } else { + IR_REGSET_EXCL(available, constraints.tmp_regs[n].reg); + } + } while (n); + } + n = insn->inputs_count; + for (j = 1, p = insn->ops + 1; j <= n; j++, p++) { + ir_ref input = *p; + if (IR_OPND_KIND(insn_flags, j) == IR_OPND_DATA && input > 0 && ctx->vregs[input]) { + if ((def_flags & IR_DEF_REUSES_OP1_REG) && j == 1) { + ir_reg reg = IR_REG_NUM(ctx->regs[i][0]); + ctx->regs[i][1] = reg | IR_REG_SPILL_LOAD; + } else { + uint8_t use_flags = IR_USE_FLAGS(def_flags, j); + ir_reg reg = (j < constraints.hints_count) ? constraints.hints[j] : IR_REG_NONE; + + if (reg != IR_REG_NONE && IR_REGSET_IN(available, reg)) { + IR_REGSET_EXCL(available, reg); + ctx->regs[i][j] = reg | IR_REG_SPILL_LOAD; + } else if (j > 1 && input == insn->op1 && ctx->regs[i][1] != IR_REG_NONE) { + ctx->regs[i][j] = ctx->regs[i][1]; + } else if (use_flags & IR_USE_MUST_BE_IN_REG) { + reg = ir_get_free_reg(ctx->ir_base[input].type, available); + IR_REGSET_EXCL(available, reg); + ctx->regs[i][j] = reg | IR_REG_SPILL_LOAD; + } + } + } + } + break; + } + n = ir_insn_len(insn); + i += n; + insn += n; + rule += n; + } + if (bb->flags & IR_BB_DESSA_MOVES) { + data->dessa_from_block = b; + ir_gen_dessa_moves(ctx, b, ir_fix_dessa_tmps); + } + } + + ctx->used_preserved_regs = ctx->fixed_save_regset; + ctx->flags |= IR_NO_STACK_COMBINE; + ir_fix_stack_frame(ctx); +} + +static void ir_preallocate_call_stack(ir_ctx *ctx) +{ + int call_stack_size, peak_call_stack_size = 0; + ir_ref i, n; + ir_insn *insn; + + for (i = 1, insn = ctx->ir_base + 1; i < ctx->insns_count;) { + if (insn->op == IR_CALL) { + call_stack_size = ir_call_used_stack(ctx, insn); + if (call_stack_size > peak_call_stack_size +#ifdef IR_HAVE_FASTCALL + && !ir_is_fastcall(ctx, insn) /* fast call functions restore stack pointer */ +#endif + ) { + peak_call_stack_size = call_stack_size; + } + } + n = ir_insn_len(insn); + i += n; + insn += n; + } + if (peak_call_stack_size) { + ctx->call_stack_size = peak_call_stack_size; + ctx->flags |= IR_PREALLOCATED_STACK; + } +} + +void ir_fix_stack_frame(ir_ctx *ctx) +{ + uint32_t additional_size = 0; + + if (ctx->used_preserved_regs) { + ir_regset used_preserved_regs = (ir_regset)ctx->used_preserved_regs; + ir_reg reg; + (void) reg; + + IR_REGSET_FOREACH(used_preserved_regs, reg) { + additional_size += sizeof(void*); + } IR_REGSET_FOREACH_END(); + } + + ctx->stack_frame_size = IR_ALIGNED_SIZE(ctx->stack_frame_size, sizeof(void*)); + ctx->stack_frame_size += additional_size; + ctx->stack_frame_alignment = 0; + ctx->call_stack_size = 0; + + if (ctx->flags & IR_HAS_CALLS) { + /* Stack must be 16 byte aligned */ + if (!(ctx->flags & IR_FUNCTION)) { + while (IR_ALIGNED_SIZE(ctx->stack_frame_size, 16) != ctx->stack_frame_size) { + ctx->stack_frame_size += sizeof(void*); + ctx->stack_frame_alignment += sizeof(void*); + } + } else if (ctx->flags & IR_USE_FRAME_POINTER) { + while (IR_ALIGNED_SIZE(ctx->stack_frame_size + sizeof(void*) * 2, 16) != ctx->stack_frame_size + sizeof(void*) * 2) { + ctx->stack_frame_size += sizeof(void*); + ctx->stack_frame_alignment += sizeof(void*); + } + } else { + if (!(ctx->flags & IR_NO_STACK_COMBINE)) { + ir_preallocate_call_stack(ctx); + } + while (IR_ALIGNED_SIZE(ctx->stack_frame_size + ctx->call_stack_size + sizeof(void*), 16) != + ctx->stack_frame_size + ctx->call_stack_size + sizeof(void*)) { + ctx->stack_frame_size += sizeof(void*); + ctx->stack_frame_alignment += sizeof(void*); + } + } + } + + ir_fix_param_spills(ctx); +} + +static void* dasm_labels[ir_lb_MAX]; + +void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr) +{ + uint32_t b, n, target; + ir_block *bb; + ir_ref i; + ir_insn *insn; + uint32_t *rule; + ir_backend_data data; + dasm_State **Dst; + int ret; + void *entry; + size_t size; + + data.ra_data.unused_slot_4 = 0; + data.ra_data.unused_slot_2 = 0; + data.ra_data.unused_slot_1 = 0; + data.ra_data.handled = NULL; + data.rodata_label = 0; + data.jmp_table_label = 0; + data.double_neg_const = 0; + data.float_neg_const = 0; + data.double_abs_const = 0; + data.float_abs_const = 0; + data.double_zero_const = 0; + ctx->data = &data; + + if (!ctx->live_intervals) { + ctx->stack_frame_size = 0; + ctx->stack_frame_alignment = 0; + ctx->call_stack_size = 0; + ctx->used_preserved_regs = 0; + ir_allocate_unique_spill_slots(ctx); + } + + if (ctx->fixed_stack_frame_size != -1) { + if (ctx->fixed_stack_red_zone) { + IR_ASSERT(ctx->fixed_stack_red_zone == ctx->fixed_stack_frame_size + ctx->fixed_call_stack_size); + } + if (ctx->stack_frame_size > ctx->fixed_stack_frame_size) { + // TODO: report error to caller +#ifdef IR_DEBUG_MESSAGES + fprintf(stderr, "IR Compilation Aborted: ctx->stack_frame_size > ctx->fixed_stack_frame_size at %s:%d\n", + __FILE__, __LINE__); +#endif + ctx->data = NULL; + ctx->status = IR_ERROR_FIXED_STACK_FRAME_OVERFLOW; + return NULL; + } + ctx->stack_frame_size = ctx->fixed_stack_frame_size; + ctx->call_stack_size = ctx->fixed_call_stack_size; + ctx->stack_frame_alignment = 0; + } + + Dst = &data.dasm_state; + data.dasm_state = NULL; + dasm_init(&data.dasm_state, DASM_MAXSECTION); + dasm_setupglobal(&data.dasm_state, dasm_labels, ir_lb_MAX); + dasm_setup(&data.dasm_state, dasm_actions); + /* labels for each block + for each constant + rodata label + jmp_table label + for each entry */ + dasm_growpc(&data.dasm_state, ctx->cfg_blocks_count + 1 + ctx->consts_count + 1 + 1 + 1 + ctx->entries_count); + + if ((ctx->flags & IR_GEN_ENDBR) && (ctx->flags & IR_START_BR_TARGET)) { + |.if X64 + | endbr64 + |.else + | endbr32 + |.endif + } + + if (!(ctx->flags & IR_SKIP_PROLOGUE)) { + ir_emit_prologue(ctx); + } + if (ctx->flags & IR_FUNCTION) { + ir_emit_load_params(ctx); + } + + for (b = 1, bb = ctx->cfg_blocks + b; b <= ctx->cfg_blocks_count; b++, bb++) { + IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); + if ((bb->flags & (IR_BB_START|IR_BB_ENTRY|IR_BB_EMPTY)) == IR_BB_EMPTY) { + continue; + } + |=>b: + + i = bb->start; + insn = ctx->ir_base + i; + if (bb->flags & IR_BB_ENTRY) { + uint32_t label = ctx->cfg_blocks_count + ctx->consts_count + 4 + insn->op3; + + |=>label: + if ((ctx->flags & IR_GEN_ENDBR) && (ctx->flags & IR_ENTRY_BR_TARGET)) { + |.if X64 + | endbr64 + |.else + | endbr32 + |.endif + } + ir_emit_prologue(ctx); + ctx->entries[insn->op3] = i; + } + + /* skip first instruction */ + n = ir_insn_len(insn); + i += n; + insn += n; + rule = ctx->rules + i; + + while (i <= bb->end) { + if (!((*rule) & (IR_FUSED|IR_SKIPPED))) + switch (*rule) { + case IR_VAR: + case IR_PARAM: + case IR_PI: + case IR_PHI: + case IR_SNAPSHOT: + break; + case IR_LEA_OB: + { + ir_reg op1_reg = ctx->regs[i][1]; + int32_t offset = ctx->ir_base[insn->op2].val.i32; + + if (insn->op == IR_ADD) { + offset = ctx->ir_base[insn->op2].val.i32; + } else { + IR_ASSERT(insn->op == IR_SUB); + int64_t long_offset = ctx->ir_base[insn->op2].val.i64; + long_offset = -long_offset; + IR_ASSERT(IR_IS_SIGNED_32BIT(long_offset)); + offset = (int32_t)long_offset; + } + if (IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, insn->type, op1_reg, insn->op1); + } + ir_emit_lea(ctx, i, insn->type, op1_reg, IR_REG_NONE, 1, offset); + } + break; + case IR_LEA_SI: + { + ir_reg op1_reg = ctx->regs[i][1]; + int32_t scale = ctx->ir_base[insn->op2].val.i32; + + IR_ASSERT(op1_reg != IR_REG_NONE); + if (IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, insn->type, op1_reg, insn->op1); + } + ir_emit_lea(ctx, i, insn->type, IR_REG_NONE, op1_reg, scale, 0); + } + break; + case IR_LEA_SIB: + { + ir_reg op1_reg = ctx->regs[i][1]; + int32_t scale = ctx->ir_base[insn->op2].val.i32; + + IR_ASSERT(op1_reg != IR_REG_NONE); + if (IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, insn->type, op1_reg, insn->op1); + } + ir_emit_lea(ctx, i, insn->type, op1_reg, op1_reg, scale - 1, 0); + } + break; + case IR_LEA_IB: + { + ir_reg op1_reg = ctx->regs[i][1]; + ir_reg op2_reg = ctx->regs[i][2]; + + IR_ASSERT(op1_reg != IR_REG_NONE && op2_reg != IR_REG_NONE); + if (IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, insn->type, op1_reg, insn->op1); + } + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, insn->type, op2_reg, insn->op2); + } + ir_emit_lea(ctx, i, insn->type, op1_reg, op2_reg, 1, 0); + } + break; + case IR_LEA_OB_I: + { + ir_insn *op1_insn = &ctx->ir_base[insn->op1]; + ir_reg op1_reg = ctx->regs[insn->op1][1]; + ir_reg op2_reg = ctx->regs[i][2]; + int32_t offset = ctx->ir_base[op1_insn->op2].val.i32; + + IR_ASSERT(op1_reg != IR_REG_NONE && op2_reg != IR_REG_NONE); + if (IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, insn->type, op1_reg, op1_insn->op1); + } + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, insn->type, op2_reg, insn->op2); + } + if (op1_insn->op == IR_SUB) { + offset = -offset; + } + ir_emit_lea(ctx, i, insn->type, op1_reg, op2_reg, 1, offset); + } + break; + case IR_LEA_I_OB: + { + ir_insn *op2_insn = &ctx->ir_base[insn->op2]; + ir_reg op1_reg = ctx->regs[i][1]; + ir_reg op2_reg = ctx->regs[insn->op2][1]; + int32_t offset = ctx->ir_base[op2_insn->op2].val.i32; + + IR_ASSERT(op1_reg != IR_REG_NONE && op2_reg != IR_REG_NONE); + if (IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, insn->type, op1_reg, insn->op1); + } + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, insn->type, op2_reg, op2_insn->op1); + } + if (op2_insn->op == IR_SUB) { + offset = -offset; + } + ir_emit_lea(ctx, i, insn->type, op1_reg, op2_reg, 1, offset); + } + break; + case IR_LEA_SI_O: + { + ir_insn *op1_insn = &ctx->ir_base[insn->op1]; + ir_reg op1_reg = ctx->regs[insn->op1][1]; + int32_t scale = ctx->ir_base[op1_insn->op2].val.i32; + int32_t offset = ctx->ir_base[insn->op2].val.i32; + + IR_ASSERT(op1_reg != IR_REG_NONE); + if (IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, insn->type, op1_reg, op1_insn->op1); + } + if (insn->op == IR_SUB) { + offset = -offset; + } + ir_emit_lea(ctx, i, insn->type, IR_REG_NONE, op1_reg, scale, offset); + } + break; + case IR_LEA_SIB_O: + { + ir_insn *op1_insn = &ctx->ir_base[insn->op1]; + ir_reg op1_reg = ctx->regs[insn->op1][1]; + int32_t scale = ctx->ir_base[op1_insn->op2].val.i32; + int32_t offset = ctx->ir_base[insn->op2].val.i32; + + IR_ASSERT(op1_reg != IR_REG_NONE); + if (IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, insn->type, op1_reg, op1_insn->op1); + } + if (insn->op == IR_SUB) { + offset = -offset; + } + ir_emit_lea(ctx, i, insn->type, op1_reg, op1_reg, scale - 1, offset); + } + break; + case IR_LEA_IB_O: + { + ir_insn *op1_insn = &ctx->ir_base[insn->op1]; + ir_reg op1_reg = ctx->regs[insn->op1][1]; + ir_reg op2_reg = ctx->regs[insn->op1][2]; + int32_t offset = ctx->ir_base[insn->op2].val.i32; + + IR_ASSERT(op1_reg != IR_REG_NONE && op2_reg != IR_REG_NONE); + if (IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, insn->type, op1_reg, op1_insn->op1); + } + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, insn->type, op2_reg, op1_insn->op2); + } + if (insn->op == IR_SUB) { + offset = -offset; + } + ir_emit_lea(ctx, i, insn->type, op1_reg, op2_reg, 1, offset); + } + break; + case IR_LEA_OB_SI: + { + ir_insn *op1_insn = &ctx->ir_base[insn->op1]; + ir_insn *op2_insn = &ctx->ir_base[insn->op2]; + ir_reg op1_reg = ctx->regs[insn->op1][1]; + ir_reg op2_reg = ctx->regs[insn->op2][1]; + int32_t offset = ctx->ir_base[op1_insn->op2].val.i32; + int32_t scale = ctx->ir_base[op2_insn->op2].val.i32; + + IR_ASSERT(op1_reg != IR_REG_NONE && op2_reg != IR_REG_NONE); + if (IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, insn->type, op1_reg, op1_insn->op1); + } + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, insn->type, op2_reg, op2_insn->op1); + } + if (op1_insn->op == IR_SUB) { + offset = -offset; + } + ir_emit_lea(ctx, i, insn->type, op1_reg, op2_reg, scale, offset); + } + break; + case IR_LEA_SI_OB: + { + ir_insn *op1_insn = &ctx->ir_base[insn->op1]; + ir_insn *op2_insn = &ctx->ir_base[insn->op2]; + ir_reg op1_reg = ctx->regs[insn->op1][1]; + ir_reg op2_reg = ctx->regs[insn->op2][1]; + int32_t scale = ctx->ir_base[op1_insn->op2].val.i32; + int32_t offset = ctx->ir_base[op2_insn->op2].val.i32; + + IR_ASSERT(op1_reg != IR_REG_NONE && op2_reg != IR_REG_NONE); + if (IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, insn->type, op1_reg, op1_insn->op1); + } + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, insn->type, op2_reg, op2_insn->op1); + } + if (op1_insn->op == IR_SUB) { + offset = -offset; + } + ir_emit_lea(ctx, i, insn->type, op2_reg, op1_reg, scale, offset); + } + break; + case IR_LEA_B_SI: + { + ir_insn *op2_insn = &ctx->ir_base[insn->op2]; + ir_reg op1_reg = ctx->regs[i][1]; + ir_reg op2_reg = ctx->regs[insn->op2][1]; + int32_t scale = ctx->ir_base[op2_insn->op2].val.i32; + + IR_ASSERT(op1_reg != IR_REG_NONE && op2_reg != IR_REG_NONE); + if (IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, insn->type, op1_reg, insn->op1); + } + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, insn->type, op2_reg, op2_insn->op1); + } + ir_emit_lea(ctx, i, insn->type, op1_reg, op2_reg, scale, 0); + } + break; + case IR_LEA_SI_B: + { + ir_insn *op1_insn = &ctx->ir_base[insn->op1]; + ir_reg op1_reg = ctx->regs[insn->op1][1]; + ir_reg op2_reg = ctx->regs[i][2]; + int32_t scale = ctx->ir_base[op1_insn->op2].val.i32; + + IR_ASSERT(op1_reg != IR_REG_NONE && op2_reg != IR_REG_NONE); + if (IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, insn->type, op1_reg, op1_insn->op1); + } + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, insn->type, op2_reg, insn->op2); + } + ir_emit_lea(ctx, i, insn->type, op2_reg, op1_reg, scale, 0); + } + break; + case IR_MUL_PWR2: + case IR_DIV_PWR2: + case IR_MOD_PWR2: + ir_emit_mul_div_mod_pwr2(ctx, i, insn); + break; + case IR_SHIFT: + ir_emit_shift(ctx, i, insn); + break; + case IR_SHIFT_CONST: + ir_emit_shift_const(ctx, i, insn); + break; + case IR_INC: + case IR_DEC: + case IR_OP_INT: + ir_emit_op_int(ctx, i, insn); + break; + case IR_ABS_INT: + ir_emit_abs_int(ctx, i, insn); + break; + case IR_BOOL_NOT_INT: + ir_emit_bool_not_int(ctx, i, insn); + break; + case IR_OP_FP: + ir_emit_op_fp(ctx, i, insn); + break; + case IR_IMUL3: + ir_emit_imul3(ctx, i, insn); + break; + case IR_BINOP_INT: + ir_emit_binop_int(ctx, i, insn); + break; + case IR_BINOP_SSE2: + ir_emit_binop_sse2(ctx, i, insn); + break; + case IR_BINOP_AVX: + ir_emit_binop_avx(ctx, i, insn); + break; + case IR_MUL_INT: + case IR_DIV_INT: + case IR_MOD_INT: + ir_emit_mul_div_mod(ctx, i, insn); + break; + case IR_CMP_INT: + ir_emit_cmp_int(ctx, i, insn); + break; + case IR_TESTCC_INT: + ir_emit_testcc_int(ctx, i, insn); + break; + case IR_SETCC_INT: + ir_emit_setcc_int(ctx, i, insn); + break; + case IR_CMP_FP: + ir_emit_cmp_fp(ctx, i, insn); + break; + case IR_SEXT: + ir_emit_sext(ctx, i, insn); + break; + case IR_ZEXT: + ir_emit_zext(ctx, i, insn); + break; + case IR_TRUNC: + ir_emit_trunc(ctx, i, insn); + break; + case IR_BITCAST: + ir_emit_bitcast(ctx, i, insn); + break; + case IR_INT2FP: + ir_emit_int2fp(ctx, i, insn); + break; + case IR_FP2INT: + ir_emit_fp2int(ctx, i, insn); + break; + case IR_FP2FP: + ir_emit_fp2fp(ctx, i, insn); + break; + case IR_COPY_INT: + ir_emit_copy_int(ctx, i, insn); + break; + case IR_COPY_FP: + ir_emit_copy_fp(ctx, i, insn); + break; + case IR_CMP_AND_BRANCH_INT: + ir_emit_cmp_and_branch_int(ctx, b, i, insn); + break; + case IR_CMP_AND_BRANCH_FP: + ir_emit_cmp_and_branch_fp(ctx, b, i, insn); + break; + case IR_TEST_AND_BRANCH_INT: + ir_emit_test_and_branch_int(ctx, b, i, insn); + break; + case IR_JCC_INT: + { + ir_op op = ctx->ir_base[insn->op2].op; + + if (op == IR_ADD || + op == IR_SUB || +// op == IR_MUL || + op == IR_OR || + op == IR_AND || + op == IR_XOR) { + op = IR_NE; + } else { + IR_ASSERT(op >= IR_EQ && op <= IR_UGT); + } + ir_emit_jcc(ctx, op, b, i, insn, 1); + } + break; + case IR_GUARD_CMP_INT: + if (ir_emit_guard_cmp_int(ctx, b, i, insn)) { + goto next_block; + } + break; + case IR_GUARD_CMP_FP: + if (ir_emit_guard_cmp_fp(ctx, b, i, insn)) { + goto next_block; + } + break; + case IR_GUARD_TEST_INT: + if (ir_emit_guard_test_int(ctx, b, i, insn)) { + goto next_block; + } + break; + case IR_GUARD_JCC_INT: + if (ir_emit_guard_jcc_int(ctx, b, i, insn)) { + goto next_block; + } + break; + case IR_IF_INT: + ir_emit_if_int(ctx, b, i, insn); + break; + case IR_COND: + ir_emit_cond(ctx, i, insn); + break; + case IR_SWITCH: + ir_emit_switch(ctx, b, i, insn); + break; + case IR_MIN_MAX_INT: + ir_emit_min_max_int(ctx, i, insn); + break; + case IR_OVERFLOW: + ir_emit_overflow(ctx, i, insn); + break; + case IR_OVERFLOW_AND_BRANCH: + ir_emit_overflow_and_branch(ctx, b, i, insn); + break; + case IR_END: + case IR_LOOP_END: + if (bb->flags & IR_BB_OSR_ENTRY_LOADS) { + ir_emit_osr_entry_loads(ctx, b, bb); + } + if (bb->flags & IR_BB_DESSA_MOVES) { + ir_emit_dessa_moves(ctx, b, bb); + } + do { + ir_ref succ = ctx->cfg_edges[bb->successors]; + + if (UNEXPECTED(bb->successors_count == 2)) { + if (ctx->cfg_blocks[succ].flags & IR_BB_ENTRY) { + succ = ctx->cfg_edges[bb->successors + 1]; + } else { + IR_ASSERT(ctx->cfg_blocks[ctx->cfg_edges[bb->successors + 1]].flags & IR_BB_ENTRY); + } + } else { + IR_ASSERT(bb->successors_count == 1); + } + target = ir_skip_empty_target_blocks(ctx, succ); + if (b == ctx->cfg_blocks_count || target != ir_skip_empty_next_blocks(ctx, b + 1)) { + | jmp =>target + } + } while (0); + break; + case IR_RETURN_VOID: + ir_emit_return_void(ctx); + break; + case IR_RETURN_INT: + ir_emit_return_int(ctx, i, insn); + break; + case IR_RETURN_FP: + ir_emit_return_fp(ctx, i, insn); + break; + case IR_CALL: + ir_emit_call(ctx, i, insn); + break; + case IR_TAILCALL: + ir_emit_tailcall(ctx, i, insn); + break; + case IR_IJMP: + ir_emit_ijmp(ctx, i, insn); + break; + case IR_MEM_OP_INT: + case IR_MEM_INC: + case IR_MEM_DEC: + ir_emit_mem_op_int(ctx, i, insn); + break; + case IR_MEM_BINOP_INT: + ir_emit_mem_binop_int(ctx, i, insn); + break; + case IR_MEM_MUL_PWR2: + case IR_MEM_DIV_PWR2: + case IR_MEM_MOD_PWR2: + ir_emit_mem_mul_div_mod_pwr2(ctx, i, insn); + break; + case IR_MEM_SHIFT: + ir_emit_mem_shift(ctx, i, insn); + break; + case IR_MEM_SHIFT_CONST: + ir_emit_mem_shift_const(ctx, i, insn); + break; + case IR_REG_BINOP_INT: + ir_emit_reg_binop_int(ctx, i, insn); + break; + case IR_VADDR: + ir_emit_vaddr(ctx, i, insn); + break; + case IR_VLOAD: + ir_emit_vload(ctx, i, insn); + break; + case IR_VSTORE_INT: + ir_emit_vstore_int(ctx, i, insn); + break; + case IR_VSTORE_FP: + ir_emit_vstore_fp(ctx, i, insn); + break; + case IR_RLOAD: + ir_emit_rload(ctx, i, insn); + break; + case IR_RSTORE: + ir_emit_rstore(ctx, i, insn); + break; + case IR_LOAD_INT: + ir_emit_load_int(ctx, i, insn); + break; + case IR_LOAD_FP: + ir_emit_load_fp(ctx, i, insn); + break; + case IR_STORE_INT: + ir_emit_store_int(ctx, i, insn); + break; + case IR_STORE_FP: + ir_emit_store_fp(ctx, i, insn); + break; + case IR_ALLOCA: + ir_emit_alloca(ctx, i, insn); + break; + case IR_AFREE: + ir_emit_afree(ctx, i, insn); + break; + case IR_EXITCALL: + ir_emit_exitcall(ctx, i, insn); + break; + case IR_GUARD: + case IR_GUARD_NOT: + if (ir_emit_guard(ctx, b, i, insn)) { + goto next_block; + } + break; + case IR_GUARD_OVERFLOW: + if (ir_emit_guard_overflow(ctx, b, i, insn)) { + goto next_block; + } + break; + case IR_TLS: + ir_emit_tls(ctx, i, insn); + break; + case IR_TRAP: + | int3 + break; + default: + IR_ASSERT(0 && "NIY rule/instruction"); + dasm_free(&data.dasm_state); + ctx->data = NULL; + ctx->status = IR_ERROR_UNSUPPORTED_CODE_RULE; + return NULL; + } + n = ir_insn_len(insn); + i += n; + insn += n; + rule += n; + } +next_block:; + } + + if (data.rodata_label) { + |.rodata + } + for (i = IR_UNUSED + 1, insn = ctx->ir_base - i; i < ctx->consts_count; i++, insn--) { + if (insn->const_flags & IR_CONST_EMIT) { + if (IR_IS_TYPE_FP(insn->type)) { + int label = ctx->cfg_blocks_count + i; + + if (!data.rodata_label) { + data.rodata_label = ctx->cfg_blocks_count + ctx->consts_count + 2; + + |.rodata + |=>data.rodata_label: + } + if (insn->type == IR_DOUBLE) { + |.align 8 + |=>label: + |.dword insn->val.u32, insn->val.u32_hi + } else { + IR_ASSERT(insn->type == IR_FLOAT); + |.align 4 + |=>label: + |.dword insn->val.u32 + } + } else if (insn->op == IR_STR) { + int label = ctx->cfg_blocks_count + i; + const char *str = ir_get_str(ctx, insn->val.i32); + int i = 0; + + if (!data.rodata_label) { + data.rodata_label = ctx->cfg_blocks_count + ctx->consts_count + 2; + + |.rodata + |=>data.rodata_label: + } + |.align 8 + |=>label: + while (str[i]) { + char c = str[i]; + + if (c == '\\') { + if (str[i+1] == '\\') { + i++; + c = '\\'; + } else if (str[i+1] == '\'') { + i++; + c = '\''; + } else if (str[i+1] == '"') { + i++; + c = '"'; + } else if (str[i+1] == 'a') { + i++; + c = '\a'; + } else if (str[i+1] == 'b') { + i++; + c = '\b'; + } else if (str[i+1] == 'e') { + i++; + c = 27; /* '\e'; */ + } else if (str[i+1] == 'f') { + i++; + c = '\f'; + } else if (str[i+1] == 'n') { + i++; + c = '\n'; + } else if (str[i+1] == 'r') { + i++; + c = '\r'; + } else if (str[i+1] == 't') { + i++; + c = '\t'; + } else if (str[i+1] == 'v') { + i++; + c = '\v'; + } else if (str[i+1] == '?') { + i++; + c = 0x3f; + } + } + |.byte c + i++; + } + |.byte 0 + + } else { + IR_ASSERT(0); + } + } + } + if (data.rodata_label) { + |.code + } + + if (ctx->status) { + dasm_free(&data.dasm_state); + ctx->data = NULL; + return NULL; + } + + ret = dasm_link(&data.dasm_state, size_ptr); + if (ret != DASM_S_OK) { + IR_ASSERT(0); + dasm_free(&data.dasm_state); + ctx->data = NULL; + ctx->status = IR_ERROR_LINK; + return NULL; + } + size = *size_ptr; + + if (ctx->code_buffer != NULL) { + if (IR_ALIGNED_SIZE(size, 16) > ctx->code_buffer_size) { + dasm_free(&data.dasm_state); + ctx->data = NULL; + ctx->status = IR_ERROR_CODE_MEM_OVERFLOW; + return NULL; + } + entry = ctx->code_buffer; + IR_ASSERT((uintptr_t)entry % 16 == 0); + } else { + entry = ir_mem_mmap(size); + if (!entry) { + dasm_free(&data.dasm_state); + ctx->data = NULL; + ctx->status = IR_ERROR_CODE_MEM_OVERFLOW; + return NULL; + } + ir_mem_unprotect(entry, size); + } + + ret = dasm_encode(&data.dasm_state, entry); + if (ret != DASM_S_OK) { + IR_ASSERT(0); + dasm_free(&data.dasm_state); + if (ctx->code_buffer == NULL) { + ir_mem_unmap(entry, size); + } + ctx->data = NULL; + ctx->status = IR_ERROR_ENCODE; + return NULL; + } + + if (data.jmp_table_label) { + uint32_t offset = dasm_getpclabel(&data.dasm_state, data.jmp_table_label); + ctx->jmp_table_offset = offset; + } else { + ctx->jmp_table_offset = 0; + } + if (data.rodata_label) { + uint32_t offset = dasm_getpclabel(&data.dasm_state, data.rodata_label); + ctx->rodata_offset = offset; + } else { + ctx->rodata_offset = 0; + } + + if (ctx->entries_count) { + /* For all entries */ + i = ctx->entries_count; + do { + ir_insn *insn = &ctx->ir_base[ctx->entries[--i]]; + uint32_t offset = dasm_getpclabel(&data.dasm_state, ctx->cfg_blocks_count + ctx->consts_count + 4 + insn->op3); + insn->op3 = offset; + } while (i != 0); + } + + dasm_free(&data.dasm_state); + + ir_mem_flush(entry, size); + + if (ctx->code_buffer == NULL) { + ir_mem_protect(entry, size); + } + + ctx->data = NULL; + return entry; +} + +const void *ir_emit_exitgroup(uint32_t first_exit_point, uint32_t exit_points_per_group, const void *exit_addr, void *code_buffer, size_t code_buffer_size, size_t *size_ptr) +{ + void *entry; + size_t size; + uint32_t i; + dasm_State **Dst, *dasm_state; + int ret; + + IR_ASSERT(code_buffer); + IR_ASSERT(IR_IS_SIGNED_32BIT((char*)exit_addr - (char*)code_buffer)); + IR_ASSERT(IR_IS_SIGNED_32BIT((char*)exit_addr - ((char*)code_buffer + code_buffer_size))); + + Dst = &dasm_state; + dasm_state = NULL; + dasm_init(&dasm_state, DASM_MAXSECTION); + dasm_setupglobal(&dasm_state, dasm_labels, ir_lb_MAX); + dasm_setup(&dasm_state, dasm_actions); + + for (i = 0; i < exit_points_per_group - 1; i++) { + | push byte i + | .byte 0xeb, (4*(exit_points_per_group-i)-6) // jmp >1 + } + | push byte i + |// 1: + | add aword [r4], first_exit_point + | jmp aword &exit_addr + + ret = dasm_link(&dasm_state, &size); + if (ret != DASM_S_OK) { + IR_ASSERT(0); + dasm_free(&dasm_state); + return NULL; + } + + if (code_buffer != NULL) { + if (IR_ALIGNED_SIZE(size, 16) > code_buffer_size) { + dasm_free(&dasm_state); + return NULL; + } + entry = code_buffer; + IR_ASSERT((uintptr_t)entry % 16 == 0); + } else { + entry = ir_mem_mmap(size); + ir_mem_unprotect(entry, size); + } + + ret = dasm_encode(&dasm_state, entry); + if (ret != DASM_S_OK) { + IR_ASSERT(0); + dasm_free(&dasm_state); + if (code_buffer == NULL) { + ir_mem_unmap(entry, size); + } + return NULL; + } + + dasm_free(&dasm_state); + + ir_mem_flush(entry, size); + + if (code_buffer == NULL) { + ir_mem_protect(entry, size); + } + + *size_ptr = size; + return entry; +} diff --git a/ext/opcache/jit/ir/ir_x86.h b/ext/opcache/jit/ir/ir_x86.h new file mode 100644 index 0000000000000..ff4b767b2eb09 --- /dev/null +++ b/ext/opcache/jit/ir/ir_x86.h @@ -0,0 +1,226 @@ +/* + * IR - Lightweight JIT Compilation Framework + * (x86/x86_64 CPU specific definitions) + * Copyright (C) 2022 Zend by Perforce. + * Authors: Dmitry Stogov + */ + +#ifndef IR_X86_H +#define IR_X86_H + +#if defined(IR_TARGET_X64) +# define IR_GP_REGS(_) \ + _(R0, rax, eax, ax, al, ah) \ + _(R1, rcx, ecx, cx, cl, ch) \ + _(R2, rdx, edx, dx, dl, dh) \ + _(R3, rbx, ebx, bx, bl, bh) \ + _(R4, rsp, esp, __, __, __) \ + _(R5, rbp, ebp, bp, r5b, __) \ + _(R6, rsi, esi, si, r6b, __) \ + _(R7, rdi, edi, di, r7b, __) \ + _(R8, r8, r8d, r8w, r8b, __) \ + _(R9, r9, r9d, r9w, r9b, __) \ + _(R10, r10, r10d, r10w, r10b, __) \ + _(R11, r11, r11d, r11w, r11b, __) \ + _(R12, r12, r12d, r12w, r12b, __) \ + _(R13, r13, r13d, r13w, r13b, __) \ + _(R14, r14, r14d, r14w, r14b, __) \ + _(R15, r15, r15d, r15w, r15b, __) \ + +# define IR_FP_REGS(_) \ + _(XMM0, xmm0) \ + _(XMM1, xmm1) \ + _(XMM2, xmm2) \ + _(XMM3, xmm3) \ + _(XMM4, xmm4) \ + _(XMM5, xmm5) \ + _(XMM6, xmm6) \ + _(XMM7, xmm7) \ + _(XMM8, xmm8) \ + _(XMM9, xmm9) \ + _(XMM10, xmm10) \ + _(XMM11, xmm11) \ + _(XMM12, xmm12) \ + _(XMM13, xmm13) \ + _(XMM14, xmm14) \ + _(XMM15, xmm15) \ + +#elif defined(IR_TARGET_X86) + +# define IR_GP_REGS(_) \ + _(R0, ___, eax, ax, al, ah) \ + _(R1, ___, ecx, cx, cl, ch) \ + _(R2, ___, edx, dx, dl, dh) \ + _(R3, ___, ebx, bx, bl, bh) \ + _(R4, ___, esp, __, __, __) \ + _(R5, ___, ebp, bp, __, __) \ + _(R6, ___, esi, si, __, __) \ + _(R7, ___, edi, di, __, __) \ + +# define IR_FP_REGS(_) \ + _(XMM0, xmm0) \ + _(XMM1, xmm1) \ + _(XMM2, xmm2) \ + _(XMM3, xmm3) \ + _(XMM4, xmm4) \ + _(XMM5, xmm5) \ + _(XMM6, xmm6) \ + _(XMM7, xmm7) \ + +#else +# error "Unsupported target architecture" +#endif + +#define IR_GP_REG_ENUM(code, name64, name32, name16, name8, name8h) \ + IR_REG_ ## code, + +#define IR_FP_REG_ENUM(code, name) \ + IR_REG_ ## code, + +enum _ir_reg { + _IR_REG_NONE = -1, + IR_GP_REGS(IR_GP_REG_ENUM) + IR_FP_REGS(IR_FP_REG_ENUM) + IR_REG_NUM, +}; + +#define IR_REG_GP_FIRST IR_REG_R0 +#define IR_REG_FP_FIRST IR_REG_XMM0 +#define IR_REG_GP_LAST (IR_REG_FP_FIRST - 1) +#define IR_REG_FP_LAST (IR_REG_NUM - 1) +#define IR_REG_SCRATCH (IR_REG_NUM) /* special name for regset */ +#define IR_REG_ALL (IR_REG_NUM + 1) /* special name for regset */ + +#define IR_REGSET_64BIT 0 + +#define IR_REG_STACK_POINTER \ + IR_REG_RSP +#define IR_REG_FRAME_POINTER \ + IR_REG_RBP +#define IR_REGSET_FIXED \ + (IR_REGSET(IR_REG_RSP)) +#define IR_REGSET_GP \ + IR_REGSET_DIFFERENCE(IR_REGSET_INTERVAL(IR_REG_GP_FIRST, IR_REG_GP_LAST), IR_REGSET_FIXED) +#define IR_REGSET_FP \ + IR_REGSET_DIFFERENCE(IR_REGSET_INTERVAL(IR_REG_FP_FIRST, IR_REG_FP_LAST), IR_REGSET_FIXED) + +#define IR_REG_RAX IR_REG_R0 +#define IR_REG_RCX IR_REG_R1 +#define IR_REG_RDX IR_REG_R2 +#define IR_REG_RBX IR_REG_R3 +#define IR_REG_RSP IR_REG_R4 +#define IR_REG_RBP IR_REG_R5 +#define IR_REG_RSI IR_REG_R6 +#define IR_REG_RDI IR_REG_R7 + +/* Calling Convention */ +#ifdef _WIN64 + +# define IR_REG_INT_RET1 IR_REG_RAX +# define IR_REG_FP_RET1 IR_REG_XMM0 +# define IR_REG_INT_ARGS 4 +# define IR_REG_FP_ARGS 4 +# define IR_REG_INT_ARG1 IR_REG_RCX +# define IR_REG_INT_ARG2 IR_REG_RDX +# define IR_REG_INT_ARG3 IR_REG_R8 +# define IR_REG_INT_ARG4 IR_REG_R9 +# define IR_REG_FP_ARG1 IR_REG_XMM0 +# define IR_REG_FP_ARG2 IR_REG_XMM1 +# define IR_REG_FP_ARG3 IR_REG_XMM2 +# define IR_REG_FP_ARG4 IR_REG_XMM3 +# define IR_MAX_REG_ARGS 4 +# define IR_SHADOW_ARGS 32 /* Reserved space in bytes - "home space" or "shadow store" for register arguments */ + +# define IR_REGSET_SCRATCH \ + (IR_REGSET_INTERVAL(IR_REG_RAX, IR_REG_RDX) \ + | IR_REGSET_INTERVAL(IR_REG_R8, IR_REG_R11) \ + | IR_REGSET_INTERVAL(IR_REG_XMM0, IR_REG_XMM5)) + +# define IR_REGSET_PRESERVED \ + (IR_REGSET(IR_REG_RBX) \ + | IR_REGSET_INTERVAL(IR_REG_RBP, IR_REG_RDI) \ + | IR_REGSET_INTERVAL(IR_REG_R12, IR_REG_R15) \ + | IR_REGSET_INTERVAL(IR_REG_XMM6, IR_REG_XMM15)) + +#elif defined(IR_TARGET_X64) + +# define IR_REG_INT_RET1 IR_REG_RAX +# define IR_REG_FP_RET1 IR_REG_XMM0 +# define IR_REG_INT_ARGS 6 +# define IR_REG_FP_ARGS 8 +# define IR_REG_INT_ARG1 IR_REG_RDI +# define IR_REG_INT_ARG2 IR_REG_RSI +# define IR_REG_INT_ARG3 IR_REG_RDX +# define IR_REG_INT_ARG4 IR_REG_RCX +# define IR_REG_INT_ARG5 IR_REG_R8 +# define IR_REG_INT_ARG6 IR_REG_R9 +# define IR_REG_FP_ARG1 IR_REG_XMM0 +# define IR_REG_FP_ARG2 IR_REG_XMM1 +# define IR_REG_FP_ARG3 IR_REG_XMM2 +# define IR_REG_FP_ARG4 IR_REG_XMM3 +# define IR_REG_FP_ARG5 IR_REG_XMM4 +# define IR_REG_FP_ARG6 IR_REG_XMM5 +# define IR_REG_FP_ARG7 IR_REG_XMM6 +# define IR_REG_FP_ARG8 IR_REG_XMM7 +# define IR_MAX_REG_ARGS 14 +# define IR_SHADOW_ARGS 0 + +# define IR_REG_VARARG_FP_REGS IR_REG_RAX /* hidden argument to specify the number of vector registers used */ + +# define IR_REGSET_SCRATCH \ + (IR_REGSET_INTERVAL(IR_REG_RAX, IR_REG_RDX) \ + | IR_REGSET_INTERVAL(IR_REG_RSI, IR_REG_RDI) \ + | IR_REGSET_INTERVAL(IR_REG_R8, IR_REG_R11) \ + | IR_REGSET_FP) + +# define IR_REGSET_PRESERVED \ + (IR_REGSET(IR_REG_RBX) \ + | IR_REGSET(IR_REG_RBP) \ + | IR_REGSET_INTERVAL(IR_REG_R12, IR_REG_R15)) + +#elif defined(IR_TARGET_X86) + +# define IR_REG_INT_RET1 IR_REG_RAX +# define IR_REG_INT_RET2 IR_REG_RDX +# define IR_REG_INT_ARGS 0 +# define IR_REG_FP_ARGS 0 + +# define IR_HAVE_FASTCALL 1 +# define IR_REG_INT_FCARGS 2 +# define IR_REG_FP_FCARGS 0 +# define IR_REG_INT_FCARG1 IR_REG_RCX +# define IR_REG_INT_FCARG2 IR_REG_RDX +# define IR_MAX_REG_ARGS 2 +# define IR_SHADOW_ARGS 0 + +# define IR_REGSET_SCRATCH \ + (IR_REGSET_INTERVAL(IR_REG_RAX, IR_REG_RDX) | IR_REGSET_FP) + +# define IR_REGSET_PRESERVED \ + (IR_REGSET(IR_REG_RBX) \ + | IR_REGSET(IR_REG_RBP) \ + | IR_REGSET_INTERVAL(IR_REG_RSI, IR_REG_RDI)) + +#else +# error "Unsupported target architecture" +#endif + +typedef struct _ir_tmp_reg { + union { + uint8_t num; + int8_t reg; + }; + uint8_t type; + uint8_t start; + uint8_t end; +} ir_tmp_reg; + +struct _ir_target_constraints { + int8_t def_reg; + uint8_t tmps_count; + uint8_t hints_count; + ir_tmp_reg tmp_regs[3]; + int8_t hints[IR_MAX_REG_ARGS + 3]; +}; + +#endif /* IR_X86_H */ diff --git a/ext/opcache/jit/ir/y.txt b/ext/opcache/jit/ir/y.txt new file mode 100644 index 0000000000000..b125f8500501d --- /dev/null +++ b/ext/opcache/jit/ir/y.txt @@ -0,0 +1,6 @@ +llvm.floor.f64 +llvm.fmuladd.f64 +llvm.memcpy.p0.p0.i64 +llvm.memset.p0.i64 +llvm.va_end +llvm.va_start diff --git a/ext/opcache/jit/zend_jit.c b/ext/opcache/jit/zend_jit.c index 3d086cd27c340..02cb0cab7976e 100644 --- a/ext/opcache/jit/zend_jit.c +++ b/ext/opcache/jit/zend_jit.c @@ -39,11 +39,15 @@ #include "Optimizer/zend_call_graph.h" #include "Optimizer/zend_dump.h" +#ifndef ZEND_JIT_IR #if ZEND_JIT_TARGET_X86 # include "jit/zend_jit_x86.h" #elif ZEND_JIT_TARGET_ARM64 # include "jit/zend_jit_arm64.h" #endif +#else +#include "Optimizer/zend_worklist.h" +#endif #include "jit/zend_jit_internal.h" @@ -74,6 +78,7 @@ zend_jit_globals jit_globals; #define JIT_STUB_PREFIX "JIT$$" #define TRACE_PREFIX "TRACE-" +#ifndef ZEND_JIT_IR #define DASM_M_GROW(ctx, t, p, sz, need) \ do { \ size_t _sz = (sz), _need = (need); \ @@ -102,6 +107,7 @@ typedef struct _zend_jit_stub { #define JIT_STUB(name, offset, adjustment) \ {JIT_STUB_PREFIX #name, zend_jit_ ## name ## _stub, offset, adjustment} +#endif /* ZEND_JIT_IR */ zend_ulong zend_jit_profile_counter = 0; int zend_jit_profile_counter_rid = -1; @@ -137,8 +143,11 @@ static int zend_jit_trace_may_exit(const zend_op_array *op_array, const zend_op static uint32_t zend_jit_trace_get_exit_point(const zend_op *to_opline, uint32_t flags); static const void *zend_jit_trace_get_exit_addr(uint32_t n); static void zend_jit_trace_add_code(const void *start, uint32_t size); +static zend_string *zend_jit_func_name(const zend_op_array *op_array); + static bool zend_jit_needs_arg_dtor(const zend_function *func, uint32_t arg_num, zend_call_info *call_info); +#ifndef ZEND_JIT_IR #if ZEND_JIT_TARGET_ARM64 static zend_jit_trace_info *zend_jit_get_current_trace_info(void); static uint32_t zend_jit_trace_find_exit_point(const void* addr); @@ -180,6 +189,7 @@ static int zend_jit_assign_to_variable(dasm_State **Dst, uint32_t val_info, zend_jit_addr res_addr, bool check_exception); +#endif /* ZEND_JIT_IR */ static bool dominates(const zend_basic_block *blocks, int a, int b) { while (blocks[b].level > blocks[a].level) { @@ -206,6 +216,18 @@ static bool zend_ssa_is_last_use(const zend_op_array *op_array, const zend_ssa * || (ssa->cfg.blocks[ssa->cfg.map[use]].flags & ZEND_BB_LOOP_HEADER)) { int b = ssa->cfg.map[use]; int prev_use = ssa->vars[var].use_chain; + int def_block; + + if (ssa->vars[var].definition >= 0) { + def_block =ssa->cfg.map[ssa->vars[var].definition]; + } else { + ZEND_ASSERT(ssa->vars[var].definition_phi); + def_block = ssa->vars[var].definition_phi->block; + } + if (dominates(ssa->cfg.blocks, def_block, + (ssa->cfg.blocks[b].flags & ZEND_BB_LOOP_HEADER) ? b : ssa->cfg.blocks[b].loop_header)) { + return 0; + } while (prev_use >= 0 && prev_use != use) { if (b != ssa->cfg.map[prev_use] @@ -226,6 +248,7 @@ static bool zend_ssa_is_last_use(const zend_op_array *op_array, const zend_ssa * return 0; } +#ifndef ZEND_JIT_IR static bool zend_ival_is_last_use(const zend_lifetime_interval *ival, int use) { if (ival->flags & ZREG_LAST_USE) { @@ -248,6 +271,7 @@ static bool zend_is_commutative(uint8_t opcode) opcode == ZEND_BW_AND || opcode == ZEND_BW_XOR; } +#endif static int zend_jit_is_constant_cmp_long_long(const zend_op *opline, zend_ssa_range *op1_range, @@ -737,6 +761,7 @@ static bool zend_may_be_dynamic_property(zend_class_entry *ce, zend_string *memb #define OP2_RANGE() OP_RANGE(ssa_op, op2) #define OP1_DATA_RANGE() OP_RANGE(ssa_op + 1, op1) +#ifndef ZEND_JIT_IR #if ZEND_JIT_TARGET_X86 # include "dynasm/dasm_x86.h" #elif ZEND_JIT_TARGET_ARM64 @@ -758,6 +783,11 @@ static int zend_jit_add_veneer(dasm_State *Dst, void *buffer, uint32_t ins, int # include #endif +#else /* ZEND_JIT_IR */ +#include "jit/zend_jit_helpers.c" +#include "Zend/zend_cpuinfo.h" +#endif /* ZEND_JIT_IR */ + #ifdef HAVE_GCC_GLOBAL_REGS # define GCC_GLOBAL_REGS 1 #else @@ -773,6 +803,7 @@ static int zend_jit_add_veneer(dasm_State *Dst, void *buffer, uint32_t ins, int #define BP_JIT_IS 6 /* Used for ISSET_ISEMPTY_DIM_OBJ. see BP_VAR_*defines in Zend/zend_compile.h */ +#ifndef ZEND_JIT_IR typedef enum _sp_adj_kind { SP_ADJ_NONE, SP_ADJ_RET, @@ -783,6 +814,7 @@ typedef enum _sp_adj_kind { } sp_adj_kind; static int sp_adj[SP_ADJ_LAST]; +#endif /* The generated code may contain tautological comparisons, ignore them. */ #if defined(__clang__) @@ -791,12 +823,16 @@ static int sp_adj[SP_ADJ_LAST]; # pragma clang diagnostic ignored "-Wstring-compare" #endif +#ifndef ZEND_JIT_IR #if ZEND_JIT_TARGET_X86 # include "jit/zend_jit_vtune.c" # include "jit/zend_jit_x86.c" #elif ZEND_JIT_TARGET_ARM64 # include "jit/zend_jit_arm64.c" #endif +#else +#include "jit/zend_jit_ir.c" +#endif #if defined(__clang__) # pragma clang diagnostic pop @@ -835,19 +871,20 @@ static zend_string *zend_jit_func_name(const zend_op_array *op_array) smart_str buf = {0}; if (op_array->function_name) { + smart_str_appends(&buf, JIT_PREFIX); if (op_array->scope) { - smart_str_appends(&buf, JIT_PREFIX); smart_str_appendl(&buf, ZSTR_VAL(op_array->scope->name), ZSTR_LEN(op_array->scope->name)); smart_str_appends(&buf, "::"); - smart_str_appendl(&buf, ZSTR_VAL(op_array->function_name), ZSTR_LEN(op_array->function_name)); - smart_str_0(&buf); - return buf.s; - } else { - smart_str_appends(&buf, JIT_PREFIX); - smart_str_appendl(&buf, ZSTR_VAL(op_array->function_name), ZSTR_LEN(op_array->function_name)); - smart_str_0(&buf); - return buf.s; } + smart_str_appendl(&buf, ZSTR_VAL(op_array->function_name), ZSTR_LEN(op_array->function_name)); + if (op_array->fn_flags & ZEND_ACC_CLOSURE) { + smart_str_appends(&buf, ":"); + smart_str_appendl(&buf, ZSTR_VAL(op_array->filename), ZSTR_LEN(op_array->filename)); + smart_str_appends(&buf, ":"); + smart_str_append_long(&buf, op_array->line_start); + } + smart_str_0(&buf); + return buf.s; } else if (op_array->filename) { smart_str_appends(&buf, JIT_PREFIX); smart_str_appendl(&buf, ZSTR_VAL(op_array->filename), ZSTR_LEN(op_array->filename)); @@ -858,6 +895,7 @@ static zend_string *zend_jit_func_name(const zend_op_array *op_array) } } +#ifndef ZEND_JIT_IR #if ZEND_DEBUG static void handle_dasm_error(int ret) { switch (ret & 0xff000000u) { @@ -1130,6 +1168,7 @@ static void *dasm_link_and_encode(dasm_State **dasm_state, return entry; } +#endif /* ZEND_JIT_IR */ static int zend_may_overflow(const zend_op *opline, const zend_ssa_op *ssa_op, const zend_op_array *op_array, zend_ssa *ssa) { @@ -1377,6 +1416,7 @@ static int zend_jit_op_array_analyze2(const zend_op_array *op_array, zend_script return SUCCESS; } +#ifndef ZEND_JIT_IR static int zend_jit_add_range(zend_lifetime_interval **intervals, int var, uint32_t from, uint32_t to) { zend_lifetime_interval *ival = intervals[var]; @@ -2660,6 +2700,198 @@ static zend_lifetime_interval** zend_jit_allocate_registers(const zend_op_array return NULL; } +#else /* ZEND_JIT_IR */ + +static void zend_jit_allocate_registers(zend_jit_ctx *ctx, const zend_op_array *op_array, zend_ssa *ssa) +{ + void *checkpoint; + int candidates_count, i; + zend_jit_reg_var *ra; + + checkpoint = zend_arena_checkpoint(CG(arena)); + ra = zend_arena_calloc(&CG(arena), ssa->vars_count, sizeof(zend_jit_reg_var)); + candidates_count = 0; + for (i = 0; i < ssa->vars_count; i++) { + if (zend_jit_may_be_in_reg(op_array, ssa, i)) { + ra[i].ref = IR_NULL; + candidates_count++; + } + } + if (!candidates_count) { + zend_arena_release(&CG(arena), checkpoint); + return; + } + + if (JIT_G(opt_flags) & ZEND_JIT_REG_ALLOC_GLOBAL) { + /* Naive SSA resolution */ + for (i = 0; i < ssa->vars_count; i++) { + if (ssa->vars[i].definition_phi && !ssa->vars[i].no_val) { + zend_ssa_phi *phi = ssa->vars[i].definition_phi; + int k, src; + + if (phi->pi >= 0) { + src = phi->sources[0]; + if (ra[i].ref) { + if (!ra[src].ref) { + ra[i].flags |= ZREG_LOAD; + } else { + ra[i].flags |= ZREG_PI; + } + } else if (ra[src].ref) { + ra[src].flags |= ZREG_STORE; + } + } else { + int need_move = 0; + + for (k = 0; k < ssa->cfg.blocks[phi->block].predecessors_count; k++) { + src = phi->sources[k]; + if (src >= 0) { + if (ssa->vars[src].definition_phi + && ssa->vars[src].definition_phi->pi >= 0 + && phi->block == ssa->vars[src].definition_phi->block) { + /* Skip zero-length interval for Pi variable */ + src = ssa->vars[src].definition_phi->sources[0]; + } + if (ra[i].ref) { + if (!ra[src].ref) { + need_move = 1; + } + } else if (ra[src].ref) { + need_move = 1; + } + } + } + if (need_move) { + if (ra[i].ref) { + ra[i].flags |= ZREG_LOAD; + } + for (k = 0; k < ssa->cfg.blocks[phi->block].predecessors_count; k++) { + src = phi->sources[k]; + if (src >= 0) { + if (ssa->vars[src].definition_phi + && ssa->vars[src].definition_phi->pi >= 0 + && phi->block == ssa->vars[src].definition_phi->block) { + /* Skip zero-length interval for Pi variable */ + src = ssa->vars[src].definition_phi->sources[0]; + } + if (ra[src].ref) { + ra[src].flags |= ZREG_STORE; + } + } + } + } else { + ra[i].flags |= ZREG_PHI; + } + } + } + } + + /* Remove useless register allocation */ + for (i = 0; i < ssa->vars_count; i++) { + if (ra[i].ref && + ((ra[i].flags & ZREG_LOAD) || + ((ra[i].flags & ZREG_STORE) && ssa->vars[i].definition >= 0)) && + ssa->vars[i].use_chain < 0) { + bool may_remove = 1; + zend_ssa_phi *phi = ssa->vars[i].phi_use_chain; + + while (phi) { + if (ra[phi->ssa_var].ref && + !(ra[phi->ssa_var].flags & ZREG_LOAD)) { + may_remove = 0; + break; + } + phi = zend_ssa_next_use_phi(ssa, i, phi); + } + if (may_remove) { + ra[i].ref = IR_UNUSED; + } + } + } + + /* Remove intervals used once */ + for (i = 0; i < ssa->vars_count; i++) { + if (ra[i].ref && + (ra[i].flags & ZREG_LOAD) && + (ra[i].flags & ZREG_STORE) && + (ssa->vars[i].use_chain < 0 || + zend_ssa_next_use(ssa->ops, i, ssa->vars[i].use_chain) < 0)) { + bool may_remove = 1; + zend_ssa_phi *phi = ssa->vars[i].phi_use_chain; + + while (phi) { + if (ra[phi->ssa_var].ref && + !(ra[phi->ssa_var].flags & ZREG_LOAD)) { + may_remove = 0; + break; + } + phi = zend_ssa_next_use_phi(ssa, i, phi); + } + if (may_remove) { + ra[i].ref = IR_UNUSED; + } + } + } + } + + if (JIT_G(debug) & ZEND_JIT_DEBUG_REG_ALLOC) { + fprintf(stderr, "Live Ranges \"%s\"\n", op_array->function_name ? ZSTR_VAL(op_array->function_name) : "[main]"); + for (i = 0; i < ssa->vars_count; i++) { + if (ra[i].ref) { + fprintf(stderr, "#%d.", i); + uint32_t var_num = ssa->vars[i].var; + zend_dump_var(op_array, (var_num < op_array->last_var ? IS_CV : 0), var_num); + if (ra[i].flags & ZREG_LOAD) { + fprintf(stderr, " load"); + } + if (ra[i].flags & ZREG_STORE) { + fprintf(stderr, " store"); + } + fprintf(stderr, "\n"); + } + } + fprintf(stderr, "\n"); + } + + ctx->ra = ra; +} + +static int zend_jit_compute_post_order(zend_cfg *cfg, int start, int *post_order) +{ + int count = 0; + int b, n, *p; + zend_basic_block *bb; + zend_worklist worklist; + ALLOCA_FLAG(use_heap) + + ZEND_WORKLIST_ALLOCA(&worklist, cfg->blocks_count, use_heap); + zend_worklist_push(&worklist, start); + + while (zend_worklist_len(&worklist) != 0) { +next: + b = zend_worklist_peek(&worklist); + bb = &cfg->blocks[b]; + n = bb->successors_count; + if (n > 0) { + p = bb->successors; + do { + if (cfg->blocks[*p].flags & (ZEND_BB_CATCH|ZEND_BB_FINALLY|ZEND_BB_FINALLY_END)) { + /* skip */ + } else if (zend_worklist_push(&worklist, *p)) { + goto next; + } + p++; + n--; + } while (n > 0); + } + zend_worklist_pop(&worklist); + post_order[count++] = b; + } + ZEND_WORKLIST_FREE_ALLOCA(&worklist, use_heap); + return count; +} +#endif /* ZEND_JIT_IR */ + static bool zend_jit_next_is_send_result(const zend_op *opline) { if (opline->result_type == IS_TMP_VAR @@ -2704,12 +2936,18 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op { int b, i, end; zend_op *opline; - dasm_State* dasm_state = NULL; +#ifndef ZEND_JIT_IR + dasm_State* ctx = NULL; + zend_lifetime_interval **ra = NULL; + bool is_terminated = 1; /* previous basic block is terminated by jump */ +#else + zend_jit_ctx ctx; + zend_jit_ctx *jit = &ctx; + zend_jit_reg_var *ra = NULL; +#endif void *handler; int call_level = 0; void *checkpoint = NULL; - zend_lifetime_interval **ra = NULL; - bool is_terminated = 1; /* previous basic block is terminated by jump */ bool recv_emitted = 0; /* emitted at least one RECV opcode */ uint8_t smart_branch_opcode; uint32_t target_label, target_label2; @@ -2733,6 +2971,7 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op } } +#ifndef ZEND_JIT_IR if (JIT_G(opt_flags) & (ZEND_JIT_REG_ALLOC_LOCAL|ZEND_JIT_REG_ALLOC_GLOBAL)) { checkpoint = zend_arena_checkpoint(CG(arena)); ra = zend_jit_allocate_registers(op_array, ssa); @@ -2751,13 +2990,13 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op } } - dasm_init(&dasm_state, DASM_MAXSECTION); - dasm_setupglobal(&dasm_state, dasm_labels, zend_lb_MAX); - dasm_setup(&dasm_state, dasm_actions); + dasm_init(&ctx, DASM_MAXSECTION); + dasm_setupglobal(&ctx, dasm_labels, zend_lb_MAX); + dasm_setup(&ctx, dasm_actions); - dasm_growpc(&dasm_state, ssa->cfg.blocks_count * 2 + 1); + dasm_growpc(&ctx, ssa->cfg.blocks_count * 2 + 1); - zend_jit_align_func(&dasm_state); + zend_jit_align_func(&ctx); for (b = 0; b < ssa->cfg.blocks_count; b++) { if ((ssa->cfg.blocks[b].flags & ZEND_BB_REACHABLE) == 0) { continue; @@ -2775,11 +3014,11 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op } if (ssa->cfg.blocks[b].flags & ZEND_BB_FOLLOW) { if (!is_terminated) { - zend_jit_jmp(&dasm_state, b); + zend_jit_jmp(&ctx, b); } } - zend_jit_label(&dasm_state, ssa->cfg.blocks_count + b); - zend_jit_prologue(&dasm_state); + zend_jit_label(&ctx, ssa->cfg.blocks_count + b); + zend_jit_prologue(&ctx); } else //#endif if (ssa->cfg.blocks[b].flags & (ZEND_BB_START|ZEND_BB_RECV_ENTRY)) { @@ -2789,13 +3028,13 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op if (opline == op_array->opcodes || (opline-1)->opcode != ZEND_RECV_INIT) { if (recv_emitted) { - zend_jit_jmp(&dasm_state, b); + zend_jit_jmp(&ctx, b); } - zend_jit_label(&dasm_state, ssa->cfg.blocks_count + b); + zend_jit_label(&ctx, ssa->cfg.blocks_count + b); for (i = 1; (opline+i)->opcode == ZEND_RECV_INIT; i++) { - zend_jit_label(&dasm_state, ssa->cfg.blocks_count + b + i); + zend_jit_label(&ctx, ssa->cfg.blocks_count + b + i); } - zend_jit_prologue(&dasm_state); + zend_jit_prologue(&ctx); } recv_emitted = 1; } else if (opline->opcode == ZEND_RECV) { @@ -2803,9 +3042,9 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op /* skip */ continue; } else if (recv_emitted) { - zend_jit_jmp(&dasm_state, b); - zend_jit_label(&dasm_state, ssa->cfg.blocks_count + b); - zend_jit_prologue(&dasm_state); + zend_jit_jmp(&ctx, b); + zend_jit_label(&ctx, ssa->cfg.blocks_count + b); + zend_jit_prologue(&ctx); } else { zend_arg_info *arg_info; @@ -2821,47 +3060,156 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op /* skip */ continue; } - zend_jit_label(&dasm_state, ssa->cfg.blocks_count + b); - zend_jit_prologue(&dasm_state); + zend_jit_label(&ctx, ssa->cfg.blocks_count + b); + zend_jit_prologue(&ctx); recv_emitted = 1; } } else { if (recv_emitted) { - zend_jit_jmp(&dasm_state, b); + zend_jit_jmp(&ctx, b); } else if (JIT_G(opt_level) < ZEND_JIT_LEVEL_INLINE && ssa->cfg.blocks[b].len == 1 && (ssa->cfg.blocks[b].flags & ZEND_BB_EXIT)) { /* don't generate code for BB with single opcode */ - dasm_free(&dasm_state); + dasm_free(&ctx); if (JIT_G(opt_flags) & (ZEND_JIT_REG_ALLOC_LOCAL|ZEND_JIT_REG_ALLOC_GLOBAL)) { zend_arena_release(&CG(arena), checkpoint); } return SUCCESS; } - zend_jit_label(&dasm_state, ssa->cfg.blocks_count + b); - zend_jit_prologue(&dasm_state); + zend_jit_label(&ctx, ssa->cfg.blocks_count + b); + zend_jit_prologue(&ctx); recv_emitted = 1; } } else if (JIT_G(opt_level) < ZEND_JIT_LEVEL_INLINE && ssa->cfg.blocks[b].len == 1 && (ssa->cfg.blocks[b].flags & ZEND_BB_EXIT)) { /* don't generate code for BB with single opcode */ - dasm_free(&dasm_state); + dasm_free(&ctx); if (JIT_G(opt_flags) & (ZEND_JIT_REG_ALLOC_LOCAL|ZEND_JIT_REG_ALLOC_GLOBAL)) { zend_arena_release(&CG(arena), checkpoint); } return SUCCESS; } else { - zend_jit_label(&dasm_state, ssa->cfg.blocks_count + b); - zend_jit_prologue(&dasm_state); + zend_jit_label(&ctx, ssa->cfg.blocks_count + b); + zend_jit_prologue(&ctx); } } is_terminated = 0; - zend_jit_label(&dasm_state, b); + zend_jit_label(&ctx, b); +#else /* ZEND_JIT_IR */ + + if (ssa->cfg.flags & ZEND_FUNC_IRREDUCIBLE) { + /* We can't order blocks properly */ + return FAILURE; + } + + if (rt_opline) { + /* Set BB_ENTRY flag to limit register usage across the OSR ENTRY point */ + ssa->cfg.blocks[ssa->cfg.map[rt_opline - op_array->opcodes]].flags |= ZEND_BB_ENTRY; + } + + zend_jit_start(&ctx, op_array, ssa); + if (JIT_G(opt_flags) & (ZEND_JIT_REG_ALLOC_LOCAL|ZEND_JIT_REG_ALLOC_GLOBAL)) { + checkpoint = zend_arena_checkpoint(CG(arena)); + zend_jit_allocate_registers(&ctx, op_array, ssa); + ra = ctx.ra; + } + + /* Process blocks in Reverse Post Order */ + int *sorted_blocks = alloca(sizeof(int) * ssa->cfg.blocks_count); + int n = zend_jit_compute_post_order(&ssa->cfg, 0, sorted_blocks); + + while (n > 0) { + b = sorted_blocks[--n]; + if ((ssa->cfg.blocks[b].flags & ZEND_BB_REACHABLE) == 0) { + continue; + } + + if (ssa->cfg.blocks[b].flags & (ZEND_BB_START|ZEND_BB_RECV_ENTRY)) { + opline = op_array->opcodes + ssa->cfg.blocks[b].start; + if (ssa->cfg.flags & ZEND_CFG_RECV_ENTRY) { + if (opline->opcode == ZEND_RECV_INIT) { + if (JIT_G(opt_level) < ZEND_JIT_LEVEL_INLINE) { + if (opline != op_array->opcodes && (opline-1)->opcode != ZEND_RECV_INIT) { + zend_jit_recv_entry(&ctx, b); + } + } else { + if (opline != op_array->opcodes && recv_emitted) { + zend_jit_recv_entry(&ctx, b); + } + } + recv_emitted = 1; + } else if (opline->opcode == ZEND_RECV) { + if (!(op_array->fn_flags & ZEND_ACC_HAS_TYPE_HINTS)) { + /* skip */ + zend_jit_bb_start(&ctx, b); + zend_jit_bb_end(&ctx, b); + continue; + } else if (recv_emitted) { + zend_jit_recv_entry(&ctx, b); + } else { + recv_emitted = 1; + } + } else { + if (recv_emitted) { + zend_jit_recv_entry(&ctx, b); + } else if (JIT_G(opt_level) < ZEND_JIT_LEVEL_INLINE && + ssa->cfg.blocks[b].len == 1 && + (ssa->cfg.blocks[b].flags & ZEND_BB_EXIT)) { + /* don't generate code for BB with single opcode */ + zend_jit_free_ctx(&ctx); + + if (JIT_G(opt_flags) & (ZEND_JIT_REG_ALLOC_LOCAL|ZEND_JIT_REG_ALLOC_GLOBAL)) { + zend_arena_release(&CG(arena), checkpoint); + } + return SUCCESS; + } + } + } else if (JIT_G(opt_level) < ZEND_JIT_LEVEL_INLINE && + ssa->cfg.blocks[b].len == 1 && + (ssa->cfg.blocks[b].flags & ZEND_BB_EXIT)) { + /* don't generate code for BB with single opcode */ + zend_jit_free_ctx(&ctx); + + if (JIT_G(opt_flags) & (ZEND_JIT_REG_ALLOC_LOCAL|ZEND_JIT_REG_ALLOC_GLOBAL)) { + zend_arena_release(&CG(arena), checkpoint); + } + return SUCCESS; + } + } + + zend_jit_bb_start(&ctx, b); + + if ((JIT_G(opt_flags) & ZEND_JIT_REG_ALLOC_GLOBAL) && ctx.ra) { + zend_ssa_phi *phi = ssa->blocks[b].phis; + + /* First try to insert IR Phi */ + while (phi) { + zend_jit_reg_var *ival = &ctx.ra[phi->ssa_var]; + + if (ival->ref) { + if (ival->flags & ZREG_PI) { + zend_jit_gen_pi(jit, phi); + } else if (ival->flags & ZREG_PHI) { + zend_jit_gen_phi(jit, phi); + } + } + phi = phi->next; + } + } + + if (rt_opline + && (ssa->cfg.blocks[b].flags & (ZEND_BB_START|ZEND_BB_RECV_ENTRY)) == 0 + && rt_opline == op_array->opcodes + ssa->cfg.blocks[b].start) { + zend_jit_osr_entry(&ctx, b); /* OSR (On-Stack-Replacement) Entry-Point */ + } +#endif /* ZEND_JIT_IR */ + if (JIT_G(opt_level) < ZEND_JIT_LEVEL_INLINE) { if ((ssa->cfg.blocks[b].flags & ZEND_BB_FOLLOW) && ssa->cfg.blocks[b].start != 0 @@ -2869,47 +3217,87 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op || op_array->opcodes[ssa->cfg.blocks[b].start - 1].opcode == ZEND_SWITCH_LONG || op_array->opcodes[ssa->cfg.blocks[b].start - 1].opcode == ZEND_SWITCH_STRING || op_array->opcodes[ssa->cfg.blocks[b].start - 1].opcode == ZEND_MATCH)) { +#ifndef ZEND_JIT_IR zend_jit_reset_last_valid_opline(); - if (!zend_jit_set_ip(&dasm_state, op_array->opcodes + ssa->cfg.blocks[b].start)) { + if (!zend_jit_set_ip(&ctx, op_array->opcodes + ssa->cfg.blocks[b].start)) { goto jit_failure; } +#else + zend_jit_reset_last_valid_opline(&ctx); +#endif } else { +#ifndef ZEND_JIT_IR zend_jit_set_last_valid_opline(op_array->opcodes + ssa->cfg.blocks[b].start); +#else + zend_jit_set_last_valid_opline(&ctx, op_array->opcodes + ssa->cfg.blocks[b].start); +#endif } } else if (ssa->cfg.blocks[b].flags & ZEND_BB_TARGET) { +#ifndef ZEND_JIT_IR zend_jit_reset_last_valid_opline(); } else if (ssa->cfg.blocks[b].flags & (ZEND_BB_START|ZEND_BB_RECV_ENTRY|ZEND_BB_ENTRY)) { zend_jit_set_last_valid_opline(op_array->opcodes + ssa->cfg.blocks[b].start); +#else + zend_jit_reset_last_valid_opline(&ctx); + } else if (ssa->cfg.blocks[b].flags & ZEND_BB_RECV_ENTRY) { + zend_jit_reset_last_valid_opline(&ctx); + } else if (ssa->cfg.blocks[b].flags & (ZEND_BB_START|ZEND_BB_ENTRY)) { + zend_jit_set_last_valid_opline(&ctx, op_array->opcodes + ssa->cfg.blocks[b].start); +#endif } if (ssa->cfg.blocks[b].flags & ZEND_BB_LOOP_HEADER) { - if (!zend_jit_check_timeout(&dasm_state, op_array->opcodes + ssa->cfg.blocks[b].start, NULL)) { + if (!zend_jit_check_timeout(&ctx, op_array->opcodes + ssa->cfg.blocks[b].start, NULL)) { goto jit_failure; } } if (!ssa->cfg.blocks[b].len) { +#ifdef ZEND_JIT_IR + zend_jit_bb_end(&ctx, b); +#endif continue; } if ((JIT_G(opt_flags) & ZEND_JIT_REG_ALLOC_GLOBAL) && ra) { zend_ssa_phi *phi = ssa->blocks[b].phis; while (phi) { +#ifndef ZEND_JIT_IR zend_lifetime_interval *ival = ra[phi->ssa_var]; if (ival) { if (ival->flags & ZREG_LOAD) { ZEND_ASSERT(ival->reg != ZREG_NONE); - if (!zend_jit_load_var(&dasm_state, ssa->var_info[phi->ssa_var].type, ssa->vars[phi->ssa_var].var, ival->reg)) { + if (!zend_jit_load_var(&ctx, ssa->var_info[phi->ssa_var].type, ssa->vars[phi->ssa_var].var, ival->reg)) { goto jit_failure; } } else if (ival->flags & ZREG_STORE) { ZEND_ASSERT(ival->reg != ZREG_NONE); - if (!zend_jit_store_var(&dasm_state, ssa->var_info[phi->ssa_var].type, ssa->vars[phi->ssa_var].var, ival->reg, 1)) { + if (!zend_jit_store_var(&ctx, ssa->var_info[phi->ssa_var].type, ssa->vars[phi->ssa_var].var, ival->reg, 1)) { + goto jit_failure; + } + } + } +#else + zend_jit_reg_var *ival = &ra[phi->ssa_var]; + + if (ival->ref) { + if (ival->flags & ZREG_LOAD) { + ZEND_ASSERT(ival->ref == IR_NULL); + + if (!zend_jit_load_var(&ctx, ssa->var_info[phi->ssa_var].type, ssa->vars[phi->ssa_var].var, phi->ssa_var)) { + goto jit_failure; + } + } else if (ival->flags & ZREG_STORE) { + ZEND_ASSERT(ival->ref != IR_NULL); + + if (!zend_jit_store_var(&ctx, ssa->var_info[phi->ssa_var].type, ssa->vars[phi->ssa_var].var, phi->ssa_var, 1)) { goto jit_failure; } } } +#endif + phi = phi->next; } } @@ -2965,7 +3353,7 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op res_addr = 0; } op1_def_info = OP1_DEF_INFO(); - if (!zend_jit_inc_dec(&dasm_state, opline, + if (!zend_jit_inc_dec(&ctx, opline, op1_info, OP1_REG_ADDR(), op1_def_info, OP1_DEF_REG_ADDR(), res_use_info, res_info, @@ -2986,9 +3374,11 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op } op1_info = OP1_INFO(); op2_info = OP2_INFO(); +#ifndef ZEND_JIT_IR if ((op1_info & MAY_BE_UNDEF) || (op2_info & MAY_BE_UNDEF)) { break; } +#endif if (!(op1_info & MAY_BE_LONG) || !(op2_info & MAY_BE_LONG)) { break; @@ -3000,7 +3390,7 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op i++; res_use_info = -1; res_addr = ZEND_ADDR_MEM_ZVAL(ZREG_RX, (opline+1)->result.var); - if (!zend_jit_reuse_ip(&dasm_state)) { + if (!zend_jit_reuse_ip(&ctx)) { goto jit_failure; } } else { @@ -3019,7 +3409,7 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op } } } - if (!zend_jit_long_math(&dasm_state, opline, + if (!zend_jit_long_math(&ctx, opline, op1_info, OP1_RANGE(), OP1_REG_ADDR(), op2_info, OP2_RANGE(), OP2_REG_ADDR(), res_use_info, RES_INFO(), res_addr, @@ -3054,7 +3444,7 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op i++; res_use_info = -1; res_addr = ZEND_ADDR_MEM_ZVAL(ZREG_RX, (opline+1)->result.var); - if (!zend_jit_reuse_ip(&dasm_state)) { + if (!zend_jit_reuse_ip(&ctx)) { goto jit_failure; } } else { @@ -3077,11 +3467,11 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op if (opline->opcode == ZEND_ADD && (op1_info & (MAY_BE_ANY|MAY_BE_UNDEF)) == MAY_BE_ARRAY && (op2_info & (MAY_BE_ANY|MAY_BE_UNDEF)) == MAY_BE_ARRAY) { - if (!zend_jit_add_arrays(&dasm_state, opline, op1_info, OP1_REG_ADDR(), op2_info, OP2_REG_ADDR(), res_addr)) { + if (!zend_jit_add_arrays(&ctx, opline, op1_info, OP1_REG_ADDR(), op2_info, OP2_REG_ADDR(), res_addr)) { goto jit_failure; } } else { - if (!zend_jit_math(&dasm_state, opline, + if (!zend_jit_math(&ctx, opline, op1_info, OP1_REG_ADDR(), op2_info, OP2_REG_ADDR(), res_use_info, res_info, res_addr, @@ -3110,11 +3500,11 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op && zend_jit_next_is_send_result(opline)) { i++; res_addr = ZEND_ADDR_MEM_ZVAL(ZREG_RX, (opline+1)->result.var); - if (!zend_jit_reuse_ip(&dasm_state)) { + if (!zend_jit_reuse_ip(&ctx)) { goto jit_failure; } } - if (!zend_jit_concat(&dasm_state, opline, + if (!zend_jit_concat(&ctx, opline, op1_info, op2_info, res_addr, zend_may_throw(opline, ssa_op, op_array, ssa))) { goto jit_failure; @@ -3134,7 +3524,7 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op break; } op1_def_info = OP1_DEF_INFO(); - if (!zend_jit_assign_op(&dasm_state, opline, + if (!zend_jit_assign_op(&ctx, opline, op1_info, op1_def_info, OP1_RANGE(), op2_info, OP2_RANGE(), (op1_info & MAY_BE_LONG) && (op2_info & MAY_BE_LONG) && (op1_def_info & MAY_BE_DOUBLE) && zend_may_overflow(opline, ssa_op, op_array, ssa), @@ -3153,7 +3543,7 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op opline->extended_value, MAY_BE_ANY, OP1_DATA_INFO())) { break; } - if (!zend_jit_assign_dim_op(&dasm_state, opline, + if (!zend_jit_assign_dim_op(&ctx, opline, OP1_INFO(), OP1_DEF_INFO(), OP1_REG_ADDR(), OP2_INFO(), OP1_DATA_INFO(), OP1_DATA_RANGE(), IS_UNKNOWN, zend_may_throw(opline, ssa_op, op_array, ssa))) { @@ -3167,7 +3557,7 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op if (PROFITABILITY_CHECKS && (!ssa->ops || !ssa->var_info)) { break; } - if (!zend_jit_assign_dim(&dasm_state, opline, + if (!zend_jit_assign_dim(&ctx, opline, OP1_INFO(), OP1_REG_ADDR(), OP2_INFO(), OP1_DATA_INFO(), IS_UNKNOWN, zend_may_throw(opline, ssa_op, op_array, ssa))) { goto jit_failure; @@ -3211,7 +3601,7 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op } } } - if (!zend_jit_incdec_obj(&dasm_state, opline, op_array, ssa, ssa_op, + if (!zend_jit_incdec_obj(&ctx, opline, op_array, ssa, ssa_op, op1_info, op1_addr, 0, ce, ce_is_instanceof, on_this, 0, NULL, IS_UNKNOWN)) { goto jit_failure; @@ -3259,7 +3649,7 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op } } } - if (!zend_jit_assign_obj_op(&dasm_state, opline, op_array, ssa, ssa_op, + if (!zend_jit_assign_obj_op(&ctx, opline, op_array, ssa, ssa_op, op1_info, op1_addr, OP1_DATA_INFO(), OP1_DATA_RANGE(), 0, ce, ce_is_instanceof, on_this, 0, NULL, IS_UNKNOWN)) { goto jit_failure; @@ -3300,7 +3690,7 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op } } } - if (!zend_jit_assign_obj(&dasm_state, opline, op_array, ssa, ssa_op, + if (!zend_jit_assign_obj(&ctx, opline, op_array, ssa, ssa_op, op1_info, op1_addr, OP1_DATA_INFO(), 0, ce, ce_is_instanceof, on_this, 0, NULL, IS_UNKNOWN, zend_may_throw(opline, ssa_op, op_array, ssa))) { @@ -3338,16 +3728,19 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op && (!(op1_info & MAY_HAVE_DTOR) || !(op1_info & MAY_BE_RC1))) { i++; res_addr = ZEND_ADDR_MEM_ZVAL(ZREG_RX, (opline+1)->result.var); - if (!zend_jit_reuse_ip(&dasm_state)) { + if (!zend_jit_reuse_ip(&ctx)) { goto jit_failure; } } } - if (!zend_jit_assign(&dasm_state, opline, + if (!zend_jit_assign(&ctx, opline, op1_info, OP1_REG_ADDR(), OP1_DEF_INFO(), OP1_DEF_REG_ADDR(), OP2_INFO(), op2_addr, op2_def_addr, res_info, res_addr, +#ifdef ZEND_JIT_IR + 0, +#endif zend_may_throw(opline, ssa_op, op_array, ssa))) { goto jit_failure; } @@ -3361,7 +3754,7 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op } else { op1_def_addr = op1_addr; } - if (!zend_jit_qm_assign(&dasm_state, opline, + if (!zend_jit_qm_assign(&ctx, opline, OP1_INFO(), op1_addr, op1_def_addr, -1, RES_INFO(), RES_REG_ADDR())) { goto jit_failure; @@ -3370,7 +3763,7 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op case ZEND_INIT_FCALL: case ZEND_INIT_FCALL_BY_NAME: case ZEND_INIT_NS_FCALL_BY_NAME: - if (!zend_jit_init_fcall(&dasm_state, opline, b, op_array, ssa, ssa_op, call_level, NULL, 0)) { + if (!zend_jit_init_fcall(&ctx, opline, b, op_array, ssa, ssa_op, call_level, NULL, 0)) { goto jit_failure; } goto done; @@ -3384,7 +3777,7 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op && opline->op2.num > MAX_ARG_FLAG_NUM) { break; } - if (!zend_jit_send_val(&dasm_state, opline, + if (!zend_jit_send_val(&ctx, opline, OP1_INFO(), OP1_REG_ADDR())) { goto jit_failure; } @@ -3394,7 +3787,7 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op /* Named parameters not supported in JIT (yet) */ break; } - if (!zend_jit_send_ref(&dasm_state, opline, op_array, + if (!zend_jit_send_ref(&ctx, opline, op_array, OP1_INFO(), 0)) { goto jit_failure; } @@ -3421,7 +3814,7 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op } else { op1_def_addr = op1_addr; } - if (!zend_jit_send_var(&dasm_state, opline, op_array, + if (!zend_jit_send_var(&ctx, opline, op_array, OP1_INFO(), op1_addr, op1_def_addr)) { goto jit_failure; } @@ -3434,22 +3827,24 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op if (opline->op2.num > MAX_ARG_FLAG_NUM) { break; } - if (!zend_jit_check_func_arg(&dasm_state, opline)) { + if (!zend_jit_check_func_arg(&ctx, opline)) { goto jit_failure; } goto done; case ZEND_CHECK_UNDEF_ARGS: - if (!zend_jit_check_undef_args(&dasm_state, opline)) { + if (!zend_jit_check_undef_args(&ctx, opline)) { goto jit_failure; } goto done; case ZEND_DO_UCALL: +#ifndef ZEND_JIT_IR is_terminated = 1; +#endif ZEND_FALLTHROUGH; case ZEND_DO_ICALL: case ZEND_DO_FCALL_BY_NAME: case ZEND_DO_FCALL: - if (!zend_jit_do_fcall(&dasm_state, opline, op_array, ssa, call_level, b + 1, NULL)) { + if (!zend_jit_do_fcall(&ctx, opline, op_array, ssa, call_level, b + 1, NULL)) { goto jit_failure; } goto done; @@ -3480,7 +3875,7 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op smart_branch_opcode = 0; target_label = target_label2 = (uint32_t)-1; } - if (!zend_jit_cmp(&dasm_state, opline, + if (!zend_jit_cmp(&ctx, opline, OP1_INFO(), OP1_RANGE(), OP1_REG_ADDR(), OP2_INFO(), OP2_RANGE(), OP2_REG_ADDR(), res_addr, @@ -3494,9 +3889,14 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op case ZEND_IS_IDENTICAL: case ZEND_IS_NOT_IDENTICAL: case ZEND_CASE_STRICT: + res_addr = RES_REG_ADDR(); if ((opline->result_type & IS_TMP_VAR) && (i + 1) <= end && ((opline+1)->opcode == ZEND_JMPZ +#ifdef ZEND_JIT_IR + || (opline+1)->opcode == ZEND_JMPZ_EX + || (opline+1)->opcode == ZEND_JMPNZ_EX +#endif || (opline+1)->opcode == ZEND_JMPNZ) && (opline+1)->op1_type == IS_TMP_VAR && (opline+1)->op1.var == opline->result.var) { @@ -3504,14 +3904,21 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op smart_branch_opcode = (opline+1)->opcode; target_label = ssa->cfg.blocks[b].successors[0]; target_label2 = ssa->cfg.blocks[b].successors[1]; +#ifdef ZEND_JIT_IR + /* For EX variant write into the result of EX opcode. */ + if ((opline+1)->opcode == ZEND_JMPZ_EX + || (opline+1)->opcode == ZEND_JMPNZ_EX) { + res_addr = OP_REG_ADDR(opline + 1, result_type, result, result_def); + } +#endif } else { smart_branch_opcode = 0; target_label = target_label2 = (uint32_t)-1; } - if (!zend_jit_identical(&dasm_state, opline, + if (!zend_jit_identical(&ctx, opline, OP1_INFO(), OP1_RANGE(), OP1_REG_ADDR(), OP2_INFO(), OP2_RANGE(), OP2_REG_ADDR(), - RES_REG_ADDR(), + res_addr, zend_may_throw(opline, ssa_op, op_array, ssa), smart_branch_opcode, target_label, target_label2, NULL, 0)) { @@ -3533,7 +3940,7 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op smart_branch_opcode = 0; target_label = target_label2 = (uint32_t)-1; } - if (!zend_jit_defined(&dasm_state, opline, smart_branch_opcode, target_label, target_label2, NULL)) { + if (!zend_jit_defined(&ctx, opline, smart_branch_opcode, target_label, target_label2, NULL)) { goto jit_failure; } goto done; @@ -3556,7 +3963,7 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op smart_branch_opcode = 0; target_label = target_label2 = (uint32_t)-1; } - if (!zend_jit_type_check(&dasm_state, opline, OP1_INFO(), smart_branch_opcode, target_label, target_label2, NULL)) { + if (!zend_jit_type_check(&ctx, opline, OP1_INFO(), smart_branch_opcode, target_label, target_label2, NULL)) { goto jit_failure; } goto done; @@ -3568,60 +3975,62 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op || !op_array->function_name // TODO: support for IS_UNDEF ??? || (op1_info & MAY_BE_UNDEF)) { - if (!zend_jit_tail_handler(&dasm_state, opline)) { + if (!zend_jit_tail_handler(&ctx, opline)) { goto jit_failure; } } else { - int j; - bool left_frame = 0; - - if (!zend_jit_return(&dasm_state, opline, op_array, + if (!zend_jit_return(&ctx, opline, op_array, op1_info, OP1_REG_ADDR())) { goto jit_failure; } +#ifndef ZEND_JIT_IR + bool left_frame = 0; if (jit_return_label >= 0) { - if (!zend_jit_jmp(&dasm_state, jit_return_label)) { + if (!zend_jit_jmp(&ctx, jit_return_label)) { goto jit_failure; } goto done; } jit_return_label = ssa->cfg.blocks_count * 2; - if (!zend_jit_label(&dasm_state, jit_return_label)) { + if (!zend_jit_label(&ctx, jit_return_label)) { goto jit_failure; } if (op_array->last_var > 100) { /* To many CVs to unroll */ - if (!zend_jit_free_cvs(&dasm_state)) { + if (!zend_jit_free_cvs(&ctx)) { goto jit_failure; } left_frame = 1; } if (!left_frame) { + int j; + for (j = 0 ; j < op_array->last_var; j++) { uint32_t info = zend_ssa_cv_info(op_array, ssa, j); if (info & (MAY_BE_STRING|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE|MAY_BE_REF)) { if (!left_frame) { left_frame = 1; - if (!zend_jit_leave_frame(&dasm_state)) { + if (!zend_jit_leave_frame(&ctx)) { goto jit_failure; } } - if (!zend_jit_free_cv(&dasm_state, info, j)) { + if (!zend_jit_free_cv(&ctx, info, j)) { goto jit_failure; } } } } - if (!zend_jit_leave_func(&dasm_state, op_array, opline, op1_info, left_frame, + if (!zend_jit_leave_func(&ctx, op_array, opline, op1_info, left_frame, NULL, NULL, (ssa->cfg.flags & ZEND_FUNC_INDIRECT_VAR_ACCESS) != 0, 1)) { goto jit_failure; } +#endif } goto done; case ZEND_BOOL: case ZEND_BOOL_NOT: - if (!zend_jit_bool_jmpznz(&dasm_state, opline, + if (!zend_jit_bool_jmpznz(&ctx, opline, OP1_INFO(), OP1_REG_ADDR(), RES_REG_ADDR(), -1, -1, zend_may_throw(opline, ssa_op, op_array, ssa), @@ -3634,7 +4043,7 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op if (opline > op_array->opcodes + ssa->cfg.blocks[b].start && ((opline-1)->result_type & (IS_SMART_BRANCH_JMPZ|IS_SMART_BRANCH_JMPNZ)) != 0) { /* smart branch */ - if (!zend_jit_cond_jmp(&dasm_state, opline + 1, ssa->cfg.blocks[b].successors[0])) { + if (!zend_jit_cond_jmp(&ctx, opline + 1, ssa->cfg.blocks[b].successors[0])) { goto jit_failure; } goto done; @@ -3647,7 +4056,7 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op } else { res_addr = RES_REG_ADDR(); } - if (!zend_jit_bool_jmpznz(&dasm_state, opline, + if (!zend_jit_bool_jmpznz(&ctx, opline, OP1_INFO(), OP1_REG_ADDR(), res_addr, ssa->cfg.blocks[b].successors[0], ssa->cfg.blocks[b].successors[1], zend_may_throw(opline, ssa_op, op_array, ssa), @@ -3674,7 +4083,7 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op smart_branch_opcode = 0; target_label = target_label2 = (uint32_t)-1; } - if (!zend_jit_isset_isempty_cv(&dasm_state, opline, + if (!zend_jit_isset_isempty_cv(&ctx, opline, OP1_INFO(), OP1_REG_ADDR(), smart_branch_opcode, target_label, target_label2, NULL)) { @@ -3703,7 +4112,7 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op smart_branch_opcode = 0; target_label = target_label2 = (uint32_t)-1; } - if (!zend_jit_in_array(&dasm_state, opline, + if (!zend_jit_in_array(&ctx, opline, op1_info, OP1_REG_ADDR(), smart_branch_opcode, target_label, target_label2, NULL)) { @@ -3716,7 +4125,7 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op if (PROFITABILITY_CHECKS && (!ssa->ops || !ssa->var_info)) { break; } - if (!zend_jit_fetch_dim_read(&dasm_state, opline, ssa, ssa_op, + if (!zend_jit_fetch_dim_read(&ctx, opline, ssa, ssa_op, OP1_INFO(), OP1_REG_ADDR(), 0, OP2_INFO(), RES_INFO(), RES_REG_ADDR(), IS_UNKNOWN)) { goto jit_failure; @@ -3732,7 +4141,7 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op if (opline->op1_type != IS_CV) { break; } - if (!zend_jit_fetch_dim(&dasm_state, opline, + if (!zend_jit_fetch_dim(&ctx, opline, OP1_INFO(), OP1_REG_ADDR(), OP2_INFO(), RES_REG_ADDR(), IS_UNKNOWN)) { goto jit_failure; } @@ -3759,7 +4168,7 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op smart_branch_opcode = 0; target_label = target_label2 = (uint32_t)-1; } - if (!zend_jit_isset_isempty_dim(&dasm_state, opline, + if (!zend_jit_isset_isempty_dim(&ctx, opline, OP1_INFO(), OP1_REG_ADDR(), 0, OP2_INFO(), IS_UNKNOWN, zend_may_throw(opline, ssa_op, op_array, ssa), @@ -3802,7 +4211,7 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op } } } - if (!zend_jit_fetch_obj(&dasm_state, opline, op_array, ssa, ssa_op, + if (!zend_jit_fetch_obj(&ctx, opline, op_array, ssa, ssa_op, op1_info, op1_addr, 0, ce, ce_is_instanceof, on_this, 0, 0, NULL, IS_UNKNOWN, zend_may_throw(opline, ssa_op, op_array, ssa))) { @@ -3815,17 +4224,17 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op } else { op1_info = OP1_INFO(); } - if (!zend_jit_bind_global(&dasm_state, opline, op1_info)) { + if (!zend_jit_bind_global(&ctx, opline, op1_info)) { goto jit_failure; } goto done; case ZEND_RECV: - if (!zend_jit_recv(&dasm_state, opline, op_array)) { + if (!zend_jit_recv(&ctx, opline, op_array)) { goto jit_failure; } goto done; case ZEND_RECV_INIT: - if (!zend_jit_recv_init(&dasm_state, opline, op_array, + if (!zend_jit_recv_init(&ctx, opline, op_array, (opline + 1)->opcode != ZEND_RECV_INIT, zend_may_throw(opline, ssa_op, op_array, ssa))) { goto jit_failure; @@ -3833,7 +4242,7 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op goto done; case ZEND_FREE: case ZEND_FE_FREE: - if (!zend_jit_free(&dasm_state, opline, OP1_INFO(), + if (!zend_jit_free(&ctx, opline, OP1_INFO(), zend_may_throw(opline, ssa_op, op_array, ssa))) { goto jit_failure; } @@ -3843,7 +4252,7 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op if ((op1_info & (MAY_BE_UNDEF|MAY_BE_ANY|MAY_BE_REF)) != MAY_BE_STRING) { break; } - if (!zend_jit_echo(&dasm_state, opline, op1_info)) { + if (!zend_jit_echo(&ctx, opline, op1_info)) { goto jit_failure; } goto done; @@ -3852,7 +4261,7 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op if ((op1_info & (MAY_BE_UNDEF|MAY_BE_ANY|MAY_BE_REF)) != MAY_BE_STRING) { break; } - if (!zend_jit_strlen(&dasm_state, opline, op1_info, OP1_REG_ADDR(), RES_REG_ADDR())) { + if (!zend_jit_strlen(&ctx, opline, op1_info, OP1_REG_ADDR(), RES_REG_ADDR())) { goto jit_failure; } goto done; @@ -3861,19 +4270,19 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op if ((op1_info & (MAY_BE_UNDEF|MAY_BE_ANY|MAY_BE_REF)) != MAY_BE_ARRAY) { break; } - if (!zend_jit_count(&dasm_state, opline, op1_info, OP1_REG_ADDR(), RES_REG_ADDR(), zend_may_throw(opline, ssa_op, op_array, ssa))) { + if (!zend_jit_count(&ctx, opline, op1_info, OP1_REG_ADDR(), RES_REG_ADDR(), zend_may_throw(opline, ssa_op, op_array, ssa))) { goto jit_failure; } goto done; case ZEND_FETCH_THIS: - if (!zend_jit_fetch_this(&dasm_state, opline, op_array, 0)) { + if (!zend_jit_fetch_this(&ctx, opline, op_array, 0)) { goto jit_failure; } goto done; case ZEND_SWITCH_LONG: case ZEND_SWITCH_STRING: case ZEND_MATCH: - if (!zend_jit_switch(&dasm_state, opline, op_array, ssa, NULL, NULL)) { + if (!zend_jit_switch(&ctx, opline, op_array, ssa, NULL, NULL)) { goto jit_failure; } goto done; @@ -3894,7 +4303,7 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op /* TODO May need reference unwrapping. */ break; } - if (!zend_jit_verify_return_type(&dasm_state, opline, op_array, OP1_INFO())) { + if (!zend_jit_verify_return_type(&ctx, opline, op_array, OP1_INFO())) { goto jit_failure; } goto done; @@ -3903,7 +4312,7 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op if ((op1_info & (MAY_BE_ANY|MAY_BE_REF|MAY_BE_UNDEF)) != MAY_BE_ARRAY) { break; } - if (!zend_jit_fe_reset(&dasm_state, opline, op1_info)) { + if (!zend_jit_fe_reset(&ctx, opline, op1_info)) { goto jit_failure; } goto done; @@ -3912,13 +4321,13 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op if ((op1_info & MAY_BE_ANY) != MAY_BE_ARRAY) { break; } - if (!zend_jit_fe_fetch(&dasm_state, opline, op1_info, OP2_INFO(), + if (!zend_jit_fe_fetch(&ctx, opline, op1_info, OP2_INFO(), ssa->cfg.blocks[b].successors[0], opline->opcode, NULL)) { goto jit_failure; } goto done; case ZEND_FETCH_CONSTANT: - if (!zend_jit_fetch_constant(&dasm_state, opline, op_array, ssa, ssa_op, RES_REG_ADDR())) { + if (!zend_jit_fetch_constant(&ctx, opline, op_array, ssa, ssa_op, RES_REG_ADDR())) { goto jit_failure; } goto done; @@ -3953,9 +4362,13 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op } } } - if (!zend_jit_init_method_call(&dasm_state, opline, b, op_array, ssa, ssa_op, call_level, + if (!zend_jit_init_method_call(&ctx, opline, b, op_array, ssa, ssa_op, call_level, op1_info, op1_addr, ce, ce_is_instanceof, on_this, 0, NULL, - NULL, 0, 0)) { + NULL, 0, +#ifdef ZEND_JIT_IR + -1, -1, +#endif + 0)) { goto jit_failure; } goto done; @@ -3966,7 +4379,7 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op if ((op2_info & (MAY_BE_UNDEF|MAY_BE_ANY|MAY_BE_REF)) != MAY_BE_STRING) { break; } - if (!zend_jit_rope(&dasm_state, opline, op2_info)) { + if (!zend_jit_rope(&ctx, opline, op2_info)) { goto jit_failure; } goto done; @@ -3981,12 +4394,16 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op if (opline == op_array->opcodes || opline->opcode != op_array->opcodes[i-1].opcode) { /* repeatable opcodes */ - if (!zend_jit_handler(&dasm_state, opline, + if (!zend_jit_handler(&ctx, opline, zend_may_throw(opline, ssa_op, op_array, ssa))) { goto jit_failure; } } +#ifndef ZEND_JIT_IR zend_jit_set_last_valid_opline(opline+1); +#else + zend_jit_set_last_valid_opline(&ctx, opline+1); +#endif break; case ZEND_NOP: case ZEND_OP_DATA: @@ -3998,14 +4415,16 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op if (JIT_G(opt_level) < ZEND_JIT_LEVEL_INLINE) { const zend_op *target = OP_JMP_ADDR(opline, opline->op1); - if (!zend_jit_set_ip(&dasm_state, target)) { + if (!zend_jit_set_ip(&ctx, target)) { goto jit_failure; } } - if (!zend_jit_jmp(&dasm_state, ssa->cfg.blocks[b].successors[0])) { +#ifndef ZEND_JIT_IR + if (!zend_jit_jmp(&ctx, ssa->cfg.blocks[b].successors[0])) { goto jit_failure; } is_terminated = 1; +#endif break; case ZEND_CATCH: case ZEND_FAST_CALL: @@ -4019,27 +4438,39 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op /* switch through trampoline */ case ZEND_YIELD: case ZEND_YIELD_FROM: - if (!zend_jit_tail_handler(&dasm_state, opline)) { +#ifdef ZEND_JIT_IR + case ZEND_THROW: + case ZEND_VERIFY_NEVER_TYPE: +#endif + if (!zend_jit_tail_handler(&ctx, opline)) { goto jit_failure; } +#ifndef ZEND_JIT_IR is_terminated = 1; +#else + /* THROW and EXIT may be used in the middle of BB */ + /* don't generate code for the rest of BB */ + i = end; +#endif break; /* stackless execution */ case ZEND_INCLUDE_OR_EVAL: case ZEND_DO_FCALL: case ZEND_DO_UCALL: case ZEND_DO_FCALL_BY_NAME: - if (!zend_jit_call(&dasm_state, opline, b + 1)) { + if (!zend_jit_call(&ctx, opline, b + 1)) { goto jit_failure; } +#ifndef ZEND_JIT_IR is_terminated = 1; +#endif break; case ZEND_JMPZ: case ZEND_JMPNZ: if (opline > op_array->opcodes + ssa->cfg.blocks[b].start && ((opline-1)->result_type & (IS_SMART_BRANCH_JMPZ|IS_SMART_BRANCH_JMPNZ)) != 0) { /* smart branch */ - if (!zend_jit_cond_jmp(&dasm_state, opline + 1, ssa->cfg.blocks[b].successors[0])) { + if (!zend_jit_cond_jmp(&ctx, opline + 1, ssa->cfg.blocks[b].successors[0])) { goto jit_failure; } goto done; @@ -4056,14 +4487,14 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op case ZEND_FE_FETCH_R: case ZEND_FE_FETCH_RW: case ZEND_BIND_INIT_STATIC_OR_JMP: - if (!zend_jit_handler(&dasm_state, opline, + if (!zend_jit_handler(&ctx, opline, zend_may_throw(opline, ssa_op, op_array, ssa)) || - !zend_jit_cond_jmp(&dasm_state, opline + 1, ssa->cfg.blocks[b].successors[0])) { + !zend_jit_cond_jmp(&ctx, opline + 1, ssa->cfg.blocks[b].successors[0])) { goto jit_failure; } break; case ZEND_NEW: - if (!zend_jit_handler(&dasm_state, opline, 1)) { + if (!zend_jit_handler(&ctx, opline, 1)) { return 0; } if (opline->extended_value == 0 && (opline+1)->opcode == ZEND_DO_FCALL) { @@ -4091,13 +4522,18 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op if (!ce || !(ce->ce_flags & ZEND_ACC_LINKED) || ce->constructor) { const zend_op *next_opline = opline + 1; - zend_jit_cond_jmp(&dasm_state, next_opline, ssa->cfg.blocks[b].successors[0]); +#ifndef ZEND_JIT_IR + zend_jit_cond_jmp(&ctx, next_opline, ssa->cfg.blocks[b].successors[0]); if (JIT_G(opt_level) < ZEND_JIT_LEVEL_INLINE) { - zend_jit_call(&dasm_state, next_opline, b + 1); + zend_jit_call(&ctx, next_opline, b + 1); is_terminated = 1; } else { - zend_jit_do_fcall(&dasm_state, next_opline, op_array, ssa, call_level, b + 1, NULL); + zend_jit_do_fcall(&ctx, next_opline, op_array, ssa, call_level, b + 1, NULL); } +#else + ZEND_ASSERT(b + 1 == ssa->cfg.blocks[b].successors[0]); + zend_jit_constructor(&ctx, next_opline, op_array, ssa, call_level, b + 1); +#endif } /* We skip over the DO_FCALL, so decrement call_level ourselves. */ @@ -4105,20 +4541,26 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op } break; default: - if (!zend_jit_handler(&dasm_state, opline, + if (!zend_jit_handler(&ctx, opline, zend_may_throw(opline, ssa_op, op_array, ssa))) { goto jit_failure; } if (i == end && (opline->result_type & (IS_SMART_BRANCH_JMPZ|IS_SMART_BRANCH_JMPNZ)) != 0) { /* smart branch split across basic blocks */ - if (!zend_jit_cond_jmp(&dasm_state, opline + 2, ssa->cfg.blocks[b+1].successors[0])) { +#ifndef ZEND_JIT_IR + if (!zend_jit_cond_jmp(&ctx, opline + 2, ssa->cfg.blocks[b+1].successors[0])) { goto jit_failure; } - if (!zend_jit_jmp(&dasm_state, ssa->cfg.blocks[b+1].successors[1])) { + if (!zend_jit_jmp(&ctx, ssa->cfg.blocks[b+1].successors[1])) { goto jit_failure; } is_terminated = 1; +#else + if (!zend_jit_set_cond(&ctx, opline + 2, opline->result.var)) { + goto jit_failure; + } +#endif } } done: @@ -4131,14 +4573,61 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op call_level--; } } +#ifdef ZEND_JIT_IR + zend_jit_bb_end(&ctx, b); +#endif } - handler = dasm_link_and_encode(&dasm_state, op_array, ssa, rt_opline, ra, NULL, 0, +#ifndef ZEND_JIT_IR + handler = dasm_link_and_encode(&ctx, op_array, ssa, rt_opline, ra, NULL, 0, (zend_jit_vm_kind == ZEND_VM_KIND_HYBRID) ? SP_ADJ_VM : SP_ADJ_RET, SP_ADJ_JIT); if (!handler) { goto jit_failure; } - dasm_free(&dasm_state); + dasm_free(&ctx); +#else + if (jit->return_inputs) { + zend_jit_common_return(jit); + + bool left_frame = 0; + if (op_array->last_var > 100) { + /* To many CVs to unroll */ + if (!zend_jit_free_cvs(&ctx)) { + goto jit_failure; + } + left_frame = 1; + } + if (!left_frame) { + int j; + + for (j = 0 ; j < op_array->last_var; j++) { + uint32_t info = zend_ssa_cv_info(op_array, ssa, j); + + if (info & (MAY_BE_STRING|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE|MAY_BE_REF)) { + if (!left_frame) { + left_frame = 1; + if (!zend_jit_leave_frame(&ctx)) { + goto jit_failure; + } + } + if (!zend_jit_free_cv(&ctx, info, j)) { + goto jit_failure; + } + } + } + } + if (!zend_jit_leave_func(&ctx, op_array, NULL, MAY_BE_ANY, left_frame, + NULL, NULL, (ssa->cfg.flags & ZEND_FUNC_INDIRECT_VAR_ACCESS) != 0, 1)) { + goto jit_failure; + } + } + + handler = zend_jit_finish(&ctx); + if (!handler) { + goto jit_failure; + } + zend_jit_free_ctx(&ctx); +#endif if (JIT_G(opt_flags) & (ZEND_JIT_REG_ALLOC_LOCAL|ZEND_JIT_REG_ALLOC_GLOBAL)) { zend_arena_release(&CG(arena), checkpoint); @@ -4146,9 +4635,13 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op return SUCCESS; jit_failure: - if (dasm_state) { - dasm_free(&dasm_state); +#ifndef ZEND_JIT_IR + if (ctx) { + dasm_free(&ctx); } +#else + zend_jit_free_ctx(&ctx); +#endif if (JIT_G(opt_flags) & (ZEND_JIT_REG_ALLOC_LOCAL|ZEND_JIT_REG_ALLOC_GLOBAL)) { zend_arena_release(&CG(arena), checkpoint); } @@ -4426,8 +4919,8 @@ static int zend_jit_setup_hot_counters(zend_op_array *op_array) zend_cfg cfg; uint32_t i; - ZEND_ASSERT(zend_jit_func_hot_counter_handler != NULL); - ZEND_ASSERT(zend_jit_loop_hot_counter_handler != NULL); + ZEND_ASSERT(!JIT_G(hot_func) || zend_jit_func_hot_counter_handler != NULL); + ZEND_ASSERT(!JIT_G(hot_loop) || zend_jit_loop_hot_counter_handler != NULL); if (zend_jit_build_cfg(op_array, &cfg) != SUCCESS) { return FAILURE; @@ -4528,6 +5021,7 @@ ZEND_EXT_API int zend_jit_op_array(zend_op_array *op_array, zend_script *script) } else { ZEND_UNREACHABLE(); } + return FAILURE; } ZEND_EXT_API int zend_jit_script(zend_script *script) @@ -4564,6 +5058,7 @@ ZEND_EXT_API int zend_jit_script(zend_script *script) if (zend_jit_op_array_analyze1(call_graph.op_arrays[i], script, &info->ssa) != SUCCESS) { goto jit_failure; } + info->ssa.cfg.flags |= info->flags; info->flags = info->ssa.cfg.flags; } } @@ -4706,6 +5201,7 @@ ZEND_EXT_API void zend_jit_protect(void) static void zend_jit_init_handlers(void) { if (zend_jit_vm_kind == ZEND_VM_KIND_HYBRID) { +#ifndef ZEND_JIT_IR zend_jit_runtime_jit_handler = dasm_labels[zend_lbhybrid_runtime_jit]; zend_jit_profile_jit_handler = dasm_labels[zend_lbhybrid_profile_jit]; zend_jit_func_hot_counter_handler = dasm_labels[zend_lbhybrid_func_hot_counter]; @@ -4713,6 +5209,15 @@ static void zend_jit_init_handlers(void) zend_jit_func_trace_counter_handler = dasm_labels[zend_lbhybrid_func_trace_counter]; zend_jit_ret_trace_counter_handler = dasm_labels[zend_lbhybrid_ret_trace_counter]; zend_jit_loop_trace_counter_handler = dasm_labels[zend_lbhybrid_loop_trace_counter]; +#else + zend_jit_runtime_jit_handler = zend_jit_stub_handlers[jit_stub_hybrid_runtime_jit]; + zend_jit_profile_jit_handler = zend_jit_stub_handlers[jit_stub_hybrid_profile_jit]; + zend_jit_func_hot_counter_handler = zend_jit_stub_handlers[jit_stub_hybrid_func_hot_counter]; + zend_jit_loop_hot_counter_handler = zend_jit_stub_handlers[jit_stub_hybrid_loop_hot_counter]; + zend_jit_func_trace_counter_handler = zend_jit_stub_handlers[jit_stub_hybrid_func_trace_counter]; + zend_jit_ret_trace_counter_handler = zend_jit_stub_handlers[jit_stub_hybrid_ret_trace_counter]; + zend_jit_loop_trace_counter_handler = zend_jit_stub_handlers[jit_stub_hybrid_loop_trace_counter]; +#endif } else { zend_jit_runtime_jit_handler = (const void*)zend_runtime_jit; zend_jit_profile_jit_handler = (const void*)zend_jit_profile_helper; @@ -4724,6 +5229,7 @@ static void zend_jit_init_handlers(void) } } +#ifndef ZEND_JIT_IR static int zend_jit_make_stubs(void) { dasm_State* dasm_state = NULL; @@ -4749,6 +5255,7 @@ static int zend_jit_make_stubs(void) dasm_free(&dasm_state); return 1; } +#endif static void zend_jit_globals_ctor(zend_jit_globals *jit_globals) { @@ -4925,21 +5432,13 @@ ZEND_EXT_API int zend_jit_check_support(void) ZEND_EXT_API int zend_jit_startup(void *buf, size_t size, bool reattached) { +#ifndef ZEND_JIT_IR int ret; +#endif zend_jit_halt_op = zend_get_halt_op(); - - if (zend_jit_setup() != SUCCESS) { - // TODO: error reporting and cleanup ??? - return FAILURE; - } - zend_jit_profile_counter_rid = zend_get_op_array_extension_handle(ACCELERATOR_PRODUCT_NAME); -#ifdef HAVE_GDB - zend_jit_gdb_init(); -#endif - #if ZEND_JIT_SUPPORT_CLDEMOTE cpu_support_cldemote = zend_cpu_supports_cldemote(); #endif @@ -4950,6 +5449,7 @@ ZEND_EXT_API int zend_jit_startup(void *buf, size_t size, bool reattached) dasm_buf = buf; dasm_size = size; + dasm_ptr = dasm_end = (void*)(((char*)dasm_buf) + size - sizeof(*dasm_ptr) * 2); #ifdef HAVE_MPROTECT #ifdef HAVE_PTHREAD_JIT_WRITE_PROTECT_NP @@ -4988,17 +5488,48 @@ ZEND_EXT_API int zend_jit_startup(void *buf, size_t size, bool reattached) } #endif - dasm_ptr = dasm_end = (void*)(((char*)dasm_buf) + size - sizeof(*dasm_ptr) * 2); if (!reattached) { zend_jit_unprotect(); *dasm_ptr = dasm_buf; -#if _WIN32 +#if defined(_WIN32) && !defined(ZEND_JIT_IR) /* reserve space for global labels */ *dasm_ptr = (void**)*dasm_ptr + zend_lb_MAX; +#elif defined(_WIN32) && defined(ZEND_JIT_IR) + zend_jit_stub_handlers = dasm_buf; + *dasm_ptr = (void**)*dasm_ptr + sizeof(zend_jit_stubs) / sizeof(zend_jit_stubs[0]); +#elif defined(IR_TARGET_AARCH64) && defined(ZEND_JIT_IR) + zend_jit_stub_handlers = dasm_buf; + *dasm_ptr = (void**)*dasm_ptr + (sizeof(zend_jit_stubs) / sizeof(zend_jit_stubs[0])) * 2; + memset(zend_jit_stub_handlers, 0, (sizeof(zend_jit_stubs) / sizeof(zend_jit_stubs[0])) * 2 * sizeof(void*)); #endif + *dasm_ptr = (void*)ZEND_MM_ALIGNED_SIZE_EX(((size_t)(*dasm_ptr)), 16); zend_jit_protect(); + } else { +#if (defined(_WIN32) || defined(IR_TARGET_AARCH64)) && defined(ZEND_JIT_IR) + zend_jit_stub_handlers = dasm_buf; + zend_jit_init_handlers(); +#endif + } + +#ifndef ZEND_JIT_IR + if (zend_jit_setup() != SUCCESS) { + // TODO: error reporting and cleanup ??? + return FAILURE; } +#ifdef HAVE_GDB + zend_jit_gdb_init(); +#endif + +#ifdef HAVE_OPROFILE + if (JIT_G(debug) & ZEND_JIT_DEBUG_OPROFILE) { + if (!zend_jit_oprofile_startup()) { + // TODO: error reporting and cleanup ??? + return FAILURE; + } + } +#endif + #ifdef HAVE_DISASM if (JIT_G(debug) & (ZEND_JIT_DEBUG_ASM|ZEND_JIT_DEBUG_ASM_STUBS)) { if (!zend_jit_disasm_init()) { @@ -5017,7 +5548,7 @@ ZEND_EXT_API int zend_jit_startup(void *buf, size_t size, bool reattached) if (!reattached) { zend_jit_unprotect(); ret = zend_jit_make_stubs(); -#if _WIN32 +#if defined(_WIN32) && !defined(ZEND_JIT_IR) /* save global labels */ memcpy(dasm_buf, dasm_labels, sizeof(void*) * zend_lb_MAX); #endif @@ -5027,23 +5558,36 @@ ZEND_EXT_API int zend_jit_startup(void *buf, size_t size, bool reattached) return FAILURE; } } else { -#if _WIN32 +#if defined(_WIN32) /* restore global labels */ memcpy(dasm_labels, dasm_buf, sizeof(void*) * zend_lb_MAX); zend_jit_init_handlers(); #endif } +#else /* ZEND_JIT_IR */ + zend_jit_unprotect(); + if (zend_jit_setup() != SUCCESS) { + zend_jit_protect(); + // TODO: error reporting and cleanup ??? + return FAILURE; + } + zend_jit_protect(); + zend_jit_init_handlers(); +#endif /* ZEND_JIT_IR */ + if (zend_jit_trace_startup(reattached) != SUCCESS) { return FAILURE; } zend_jit_unprotect(); +#ifndef ZEND_JIT_IR #if ZEND_JIT_TARGET_ARM64 /* reserve space for global labels veneers */ dasm_labels_veneers = *dasm_ptr; *dasm_ptr = (void**)*dasm_ptr + ZEND_MM_ALIGNED_SIZE_EX(zend_lb_MAX, DASM_ALIGNMENT); memset(dasm_labels_veneers, 0, sizeof(void*) * ZEND_MM_ALIGNED_SIZE_EX(zend_lb_MAX, DASM_ALIGNMENT)); +#endif #endif /* save JIT buffer pos */ dasm_ptr[1] = dasm_ptr[0]; @@ -5058,6 +5602,7 @@ ZEND_EXT_API void zend_jit_shutdown(void) fprintf(stderr, "\nJIT memory usage: %td\n", (ptrdiff_t)((char*)*dasm_ptr - (char*)dasm_buf)); } +#ifndef ZEND_JIT_IR #ifdef HAVE_GDB if (JIT_G(debug) & ZEND_JIT_DEBUG_GDB) { zend_jit_gdb_unregister(); @@ -5073,6 +5618,10 @@ ZEND_EXT_API void zend_jit_shutdown(void) zend_jit_perf_jitdump_close(); } #endif +#else + zend_jit_shutdown_ir(); +#endif + #ifdef ZTS ts_free_id(jit_globals_id); #else @@ -5190,8 +5739,10 @@ ZEND_EXT_API void zend_jit_restart(void) if (dasm_buf) { zend_jit_unprotect(); +#ifndef ZEND_JIT_IR //??? #if ZEND_JIT_TARGET_ARM64 memset(dasm_labels_veneers, 0, sizeof(void*) * ZEND_MM_ALIGNED_SIZE_EX(zend_lb_MAX, DASM_ALIGNMENT)); +#endif #endif /* restore JIT buffer pos */ diff --git a/ext/opcache/jit/zend_jit.h b/ext/opcache/jit/zend_jit.h index 029bdd9a510a3..8a1aab1d03909 100644 --- a/ext/opcache/jit/zend_jit.h +++ b/ext/opcache/jit/zend_jit.h @@ -74,6 +74,16 @@ #define ZEND_JIT_DEBUG_TRACE_TSSA (1<<19) #define ZEND_JIT_DEBUG_TRACE_EXIT_INFO (1<<20) +#define ZEND_JIT_DEBUG_IR_SRC (1<<24) +#define ZEND_JIT_DEBUG_IR_FINAL (1<<25) +#define ZEND_JIT_DEBUG_IR_CFG (1<<26) +#define ZEND_JIT_DEBUG_IR_REGS (1<<27) + +#define ZEND_JIT_DEBUG_IR_AFTER_SCCP (1<<28) +#define ZEND_JIT_DEBUG_IR_AFTER_SCHEDULE (1<<29) +#define ZEND_JIT_DEBUG_IR_AFTER_REGS (1<<30) +#define ZEND_JIT_DEBUG_IR_CODEGEN (1U<<31) + #define ZEND_JIT_DEBUG_PERSISTENT 0x1f0 /* profile and debugger flags can't be changed at run-time */ #define ZEND_JIT_TRACE_MAX_LENGTH 1024 /* max length of single trace */ @@ -155,6 +165,27 @@ ZEND_EXT_API void zend_jit_deactivate(void); ZEND_EXT_API void zend_jit_status(zval *ret); ZEND_EXT_API void zend_jit_restart(void); +#ifdef ZEND_JIT_IR + +#define ZREG_LOAD (1<<0) +#define ZREG_STORE (1<<1) +#define ZREG_LAST_USE (1<<2) + +#define ZREG_PI (1<<3) +#define ZREG_PHI (1<<4) +#define ZREG_FORWARD (1<<5) + +#define ZREG_SPILL_SLOT (1<<3) + +#define ZREG_CONST (1<<4) +#define ZREG_ZVAL_COPY (2<<4) +#define ZREG_TYPE_ONLY (3<<4) +#define ZREG_ZVAL_ADDREF (4<<4) +#define ZREG_THIS (5<<4) + +#define ZREG_NONE -1 + +#else typedef struct _zend_lifetime_interval zend_lifetime_interval; typedef struct _zend_life_range zend_life_range; @@ -187,5 +218,6 @@ struct _zend_lifetime_interval { zend_lifetime_interval *used_as_hint; zend_lifetime_interval *list_next; }; +#endif #endif /* HAVE_JIT_H */ diff --git a/ext/opcache/jit/zend_jit_internal.h b/ext/opcache/jit/zend_jit_internal.h index 49331b76a6649..77df230b8315a 100644 --- a/ext/opcache/jit/zend_jit_internal.h +++ b/ext/opcache/jit/zend_jit_internal.h @@ -21,6 +21,8 @@ #ifndef ZEND_JIT_INTERNAL_H #define ZEND_JIT_INTERNAL_H +#ifndef ZEND_JIT_IR + #include "zend_bitset.h" /* Register Set */ @@ -108,21 +110,20 @@ uint32_t __inline __zend_jit_clz(uint32_t value) { /* Register Names */ extern const char *zend_reg_name[]; +#endif /* ZEND_JIT_IR */ /* Address Encoding */ typedef uintptr_t zend_jit_addr; #define IS_CONST_ZVAL 0 #define IS_MEM_ZVAL 1 -#define IS_REG 2 +#define IS_REG 2 /* CPU register or PHP SSA variable number (for IR JIT) */ +#define IS_REF_ZVAL 3 /* IR reference */ #define _ZEND_ADDR_MODE_MASK 0x3 #define _ZEND_ADDR_REG_SHIFT 2 #define _ZEND_ADDR_REG_MASK 0x3f /* no more than 64 registers */ #define _ZEND_ADDR_OFFSET_SHIFT 8 -#define _ZEND_ADDR_REG_STORE_BIT 8 -#define _ZEND_ADDR_REG_LOAD_BIT 9 -#define _ZEND_ADDR_REG_LAST_USE_BIT 10 #define ZEND_ADDR_CONST_ZVAL(zv) \ (((zend_jit_addr)(uintptr_t)(zv)) | IS_CONST_ZVAL) @@ -138,6 +139,13 @@ typedef uintptr_t zend_jit_addr; #define Z_ZV(addr) ((zval*)(addr)) #define Z_OFFSET(addr) ((uint32_t)((addr)>>_ZEND_ADDR_OFFSET_SHIFT)) #define Z_REG(addr) ((zend_reg)(((addr)>>_ZEND_ADDR_REG_SHIFT) & _ZEND_ADDR_REG_MASK)) + +#ifndef ZEND_JIT_IR + +#define _ZEND_ADDR_REG_STORE_BIT 8 +#define _ZEND_ADDR_REG_LOAD_BIT 9 +#define _ZEND_ADDR_REG_LAST_USE_BIT 10 + #define Z_STORE(addr) ((zend_reg)(((addr)>>_ZEND_ADDR_REG_STORE_BIT) & 1)) #define Z_LOAD(addr) ((zend_reg)(((addr)>>_ZEND_ADDR_REG_LOAD_BIT) & 1)) #define Z_LAST_USE(addr) ((zend_reg)(((addr)>>_ZEND_ADDR_REG_LAST_USE_BIT) & 1)) @@ -178,6 +186,47 @@ static zend_always_inline zend_jit_addr _zend_jit_decode_op(uint8_t op_type, zno #define OP_ADDR(opline, type, op) \ _zend_jit_decode_op((opline)->type, (opline)->op, opline, ZREG_NONE) +#define OP_REG_ADDR(opline, type, _op, _ssa_op) \ + _zend_jit_decode_op((opline)->type, (opline)->_op, opline, \ + OP_REG(ssa_op, _ssa_op)) + +#else /* ZEND_JIT_IR */ + +#define ZEND_ADDR_REF_ZVAL(ref) \ + ((((zend_jit_addr)(uintptr_t)(ref)) << _ZEND_ADDR_REG_SHIFT) | \ + IS_REF_ZVAL) + +#define Z_SSA_VAR(addr) ((addr)>>_ZEND_ADDR_REG_SHIFT) +#define Z_IR_REF(addr) ((addr)>>_ZEND_ADDR_REG_SHIFT) + +#define Z_STORE(addr) \ + ((jit->ra && jit->ra[Z_SSA_VAR(addr)].ref) ? \ + (jit->ra[Z_SSA_VAR(addr)].flags & ZREG_STORE) : \ + 0) +#define Z_LOAD(addr) \ + ((jit->ra && jit->ra[Z_SSA_VAR(addr)].ref) ? \ + (jit->ra[Z_SSA_VAR(addr)].flags & ZREG_LOAD) : \ + 0) + +#if ZEND_USE_ABS_CONST_ADDR +# define OP_ADDR(opline, type, op) \ + (((opline)->type == IS_CONST) ? \ + ZEND_ADDR_CONST_ZVAL((opline)->op.zv) : \ + ZEND_ADDR_MEM_ZVAL(ZREG_FP, (opline)->op.var)) +#else +# define OP_ADDR(opline, type, op) \ + (((opline)->type == IS_CONST) ? \ + ZEND_ADDR_CONST_ZVAL(RT_CONSTANT(opline, (opline)->op)) : \ + ZEND_ADDR_MEM_ZVAL(ZREG_FP, (opline)->op.var)) +#endif + +#define OP_REG_ADDR(opline, type, op, _ssa_op) \ + ((ctx.ra && ssa_op->_ssa_op >= 0 && ctx.ra[ssa_op->_ssa_op].ref) ? \ + ZEND_ADDR_REG(ssa_op->_ssa_op) : \ + OP_ADDR(opline, type, op)) + +#endif /* ZEND_JIT_IR */ + #define OP1_ADDR() \ OP_ADDR(opline, op1_type, op1) #define OP2_ADDR() \ @@ -187,10 +236,6 @@ static zend_always_inline zend_jit_addr _zend_jit_decode_op(uint8_t op_type, zno #define OP1_DATA_ADDR() \ OP_ADDR(opline + 1, op1_type, op1) -#define OP_REG_ADDR(opline, type, _op, _ssa_op) \ - _zend_jit_decode_op((opline)->type, (opline)->_op, opline, \ - OP_REG(ssa_op, _ssa_op)) - #define OP1_REG_ADDR() \ OP_REG_ADDR(opline, op1_type, op1, op1_use) #define OP2_REG_ADDR() \ @@ -213,8 +258,15 @@ static zend_always_inline bool zend_jit_same_addr(zend_jit_addr addr1, zend_jit_ { if (addr1 == addr2) { return 1; +#ifndef ZEND_JIT_IR } else if (Z_MODE(addr1) == IS_REG && Z_MODE(addr2) == IS_REG) { return Z_REG(addr1) == Z_REG(addr2); +#else + } else if (Z_MODE(addr1) == IS_REG && Z_MODE(addr2) == IS_REG) { + return Z_SSA_VAR(addr1) == Z_SSA_VAR(addr2); + } else if (Z_MODE(addr1) == IS_REF_ZVAL && Z_MODE(addr2) == IS_REF_ZVAL) { + return Z_IR_REF(addr1) == Z_IR_REF(addr2); +#endif } return 0; } @@ -414,6 +466,8 @@ typedef enum _zend_jit_trace_stop { #define ZEND_JIT_EXIT_METHOD_CALL (1<<9) /* exit because of polymorphic INIT_METHOD_CALL call */ #define ZEND_JIT_EXIT_INVALIDATE (1<<10) /* invalidate current trace */ +#define ZEND_JIT_EXIT_FIXED (1U<<31) /* the exit_info can't be changed by zend_jit_snapshot_handler() */ + typedef union _zend_op_trace_info { zend_op dummy; /* the size of this structure must be the same as zend_op */ struct { @@ -515,8 +569,15 @@ typedef struct _zend_jit_trace_exit_info { uint32_t flags; /* set of ZEND_JIT_EXIT_... */ uint32_t stack_size; uint32_t stack_offset; +#ifdef ZEND_JIT_IR + int32_t poly_func_ref; + int32_t poly_this_ref; + int8_t poly_func_reg; + int8_t poly_this_reg; +#endif } zend_jit_trace_exit_info; +#ifndef ZEND_JIT_IR typedef union _zend_jit_trace_stack { int32_t ssa_var; uint32_t info; @@ -530,6 +591,50 @@ typedef union _zend_jit_trace_stack { #define STACK_VAR(_stack, _slot) \ (_stack)[_slot].ssa_var +#define SET_STACK_VAR(_stack, _slot, _ssa_var) do { \ + (_stack)[_slot].ssa_var = _ssa_var; \ + } while (0) + +#define CLEAR_STACK_REF(_stack, _slot) + +#else /* ZEND_JIT_IR */ + +typedef struct _zend_jit_trace_stack { + union { + uint32_t info; + struct { + uint8_t type; /* variable type (for type inference) */ + uint8_t mem_type; /* stack slot type (for eliminate dead type store) */ + int8_t reg; + uint8_t flags; + }; + }; + int32_t ref; +} zend_jit_trace_stack; + +#define STACK_VAR(_stack, _slot) \ + ((int32_t*)(_stack))[_slot] +#define SET_STACK_VAR(_stack, _slot, _ssa_var) do { \ + ((int32_t*)(_stack))[_slot] = _ssa_var; \ + } while (0) + +#define CLEAR_STACK_REF(_stack, _slot) do { \ + (_stack)[_slot].ref = IR_UNUSED; \ + (_stack)[_slot].flags = 0; \ + } while (0) +#define STACK_REF(_stack, _slot) \ + (_stack)[_slot].ref +#define SET_STACK_REF(_stack, _slot, _ref) do { \ + (_stack)[_slot].ref = (_ref); \ + (_stack)[_slot].flags = 0; \ + } while (0) +#define SET_STACK_REF_EX(_stack, _slot, _ref, _flags) do { \ + (_stack)[_slot].ref = (_ref); \ + (_stack)[_slot].flags = _flags; \ + } while (0) + +#endif /* ZEND_JIT_IR */ + #define STACK_INFO(_stack, _slot) \ (_stack)[_slot].info #define STACK_TYPE(_stack, _slot) \ @@ -540,9 +645,6 @@ typedef union _zend_jit_trace_stack { (_stack)[_slot].reg #define STACK_FLAGS(_stack, _slot) \ (_stack)[_slot].flags -#define SET_STACK_VAR(_stack, _slot, _ssa_var) do { \ - (_stack)[_slot].ssa_var = _ssa_var; \ - } while (0) #define SET_STACK_INFO(_stack, _slot, _info) do { \ (_stack)[_slot].info = _info; \ } while (0) @@ -554,6 +656,7 @@ typedef union _zend_jit_trace_stack { } \ (_stack)[_slot].reg = ZREG_NONE; \ (_stack)[_slot].flags = 0; \ + CLEAR_STACK_REF(_stack, _slot); \ } while (0) #define SET_STACK_REG(_stack, _slot, _reg) do { \ (_stack)[_slot].reg = _reg; \ @@ -572,6 +675,13 @@ typedef union _zend_jit_trace_stack { #define ZEND_JIT_TRACE_LOOP (1<<1) #define ZEND_JIT_TRACE_USES_INITIAL_IP (1<<2) +#ifdef ZEND_JIT_IR +typedef union _zend_jit_exit_const { + int64_t i; + double d; +} zend_jit_exit_const; +#endif + typedef struct _zend_jit_trace_info { uint32_t id; /* trace id */ uint32_t root; /* root trace id or self id for root traces */ @@ -591,6 +701,10 @@ typedef struct _zend_jit_trace_info { zend_jit_trace_exit_info *exit_info; /* info about side exits */ zend_jit_trace_stack *stack_map; //uint32_t loop_offset; +#ifdef ZEND_JIT_IR + uint32_t consts_count; /* number of side exits */ + zend_jit_exit_const *constants; +#endif } zend_jit_trace_info; struct _zend_jit_trace_stack_frame { @@ -709,7 +823,9 @@ ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL zend_jit_ret_trace_helper(ZEND_OPCODE_HAND ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL zend_jit_loop_trace_helper(ZEND_OPCODE_HANDLER_ARGS); int ZEND_FASTCALL zend_jit_trace_hot_root(zend_execute_data *execute_data, const zend_op *opline); +#ifndef ZEND_JIT_IR int ZEND_FASTCALL zend_jit_trace_exit(uint32_t exit_num, zend_jit_registers_buf *regs); +#endif zend_jit_trace_stop ZEND_FASTCALL zend_jit_trace_execute(zend_execute_data *execute_data, const zend_op *opline, zend_jit_trace_rec *trace_buffer, uint8_t start, uint32_t is_megamorphc); static zend_always_inline const zend_op* zend_jit_trace_get_exit_opline(zend_jit_trace_rec *trace, const zend_op *opline, bool *exit_if_true) @@ -794,11 +910,13 @@ static zend_always_inline bool zend_long_is_power_of_two(zend_long x) return (x > 0) && !(x & (x - 1)); } +#ifndef ZEND_JIT_IR static zend_always_inline uint32_t zend_long_floor_log2(zend_long x) { ZEND_ASSERT(zend_long_is_power_of_two(x)); return zend_ulong_ntz(x); } +#endif /* from http://aggregate.org/MAGIC/ */ static zend_always_inline uint32_t ones32(uint32_t x) diff --git a/ext/opcache/jit/zend_jit_ir.c b/ext/opcache/jit/zend_jit_ir.c new file mode 100644 index 0000000000000..7253e48613144 --- /dev/null +++ b/ext/opcache/jit/zend_jit_ir.c @@ -0,0 +1,16531 @@ +/* + * +----------------------------------------------------------------------+ + * | Zend JIT | + * +----------------------------------------------------------------------+ + * | Copyright (c) The PHP Group | + * +----------------------------------------------------------------------+ + * | This source file is subject to version 3.01 of the PHP license, | + * | that is bundled with this package in the file LICENSE, and is | + * | available through the world-wide-web at the following url: | + * | https://www.php.net/license/3_01.txt | + * | If you did not receive a copy of the PHP license and are unable to | + * | obtain it through the world-wide-web, please send a note to | + * | license@php.net so we can mail you a copy immediately. | + * +----------------------------------------------------------------------+ + * | Authors: Dmitry Stogov | + * +----------------------------------------------------------------------+ + */ + +#include "jit/ir/ir.h" +#include "jit/ir/ir_builder.h" + +#if defined(IR_TARGET_X86) +# define IR_REG_SP 4 /* IR_REG_RSP */ +# define IR_REG_FP 5 /* IR_REG_RBP */ +# define ZREG_FP 6 /* IR_REG_RSI */ +# define ZREG_IP 7 /* IR_REG_RDI */ +# define ZREG_FIRST_FPR 8 +# define IR_REGSET_PRESERVED ((1<<3) | (1<<5) | (1<<6) | (1<<7)) /* all preserved registers */ +#elif defined(IR_TARGET_X64) +# define IR_REG_SP 4 /* IR_REG_RSP */ +# define IR_REG_FP 5 /* IR_REG_RBP */ +# define ZREG_FP 14 /* IR_REG_R14 */ +# define ZREG_IP 15 /* IR_REG_R15 */ +# define ZREG_FIRST_FPR 16 +# if defined(_WIN64) +# define IR_REGSET_PRESERVED ((1<<3) | (1<<5) | (1<<6) | (1<<7) | (1<<12) | (1<<13) | (1<<14) | (1<<15)) +/* +# define IR_REGSET_PRESERVED ((1<<3) | (1<<5) | (1<<6) | (1<<7) | (1<<12) | (1<<13) | (1<<14) | (1<<15) | \ + (1<<(16+6)) | (1<<(16+7)) | (1<<(16+8)) | (1<<(16+9)) | (1<<(16+10)) | \ + (1<<(16+11)) | (1<<(16+12)) | (1<<(16+13)) | (1<<(16+14)) | (1<<(16+15))) +*/ +# else +# define IR_REGSET_PRESERVED ((1<<3) | (1<<5) | (1<<12) | (1<<13) | (1<<14) | (1<<15)) /* all preserved registers */ +# endif +#elif defined(IR_TARGET_AARCH64) +# define IR_REG_SP 31 /* IR_REG_RSP */ +# define IR_REG_FP 29 /* IR_REG_X29 */ +# define ZREG_FP 27 /* IR_REG_X27 */ +# define ZREG_IP 28 /* IR_REG_X28 */ +# define ZREG_FIRST_FPR 32 +# define IR_REGSET_PRESERVED ((1<<19) | (1<<20) | (1<<21) | (1<<22) | (1<<23) | \ + (1<<24) | (1<<25) | (1<<26) | (1<<27) | (1<<28)) /* all preserved registers */ +#else +# error "Unknown IR target" +#endif + +#define ZREG_RX ZREG_IP + +#define OPTIMIZE_FOR_SIZE 0 + +/* IR builder defines */ +#undef _ir_CTX +#define _ir_CTX (&jit->ctx) + +#undef ir_CONST_ADDR +#define ir_CONST_ADDR(_addr) jit_CONST_ADDR(jit, (uintptr_t)(_addr)) +#define ir_CONST_FUNC(_addr) jit_CONST_FUNC(jit, (uintptr_t)(_addr), 0) +#define ir_CONST_FC_FUNC(_addr) jit_CONST_FUNC(jit, (uintptr_t)(_addr), IR_CONST_FASTCALL_FUNC) +#define ir_CAST_FC_FUNC(_addr) ir_fold2(_ir_CTX, IR_OPT(IR_BITCAST, IR_ADDR), (_addr), IR_CONST_FASTCALL_FUNC) + +#undef ir_ADD_OFFSET +#define ir_ADD_OFFSET(_addr, _offset) \ + jit_ADD_OFFSET(jit, _addr, _offset) + +#ifdef ZEND_ENABLE_ZVAL_LONG64 +# define IR_LONG IR_I64 +# define ir_CONST_LONG ir_CONST_I64 +# define ir_UNARY_OP_L ir_UNARY_OP_I64 +# define ir_BINARY_OP_L ir_BINARY_OP_I64 +# define ir_ADD_L ir_ADD_I64 +# define ir_SUB_L ir_SUB_I64 +# define ir_MUL_L ir_MUL_I64 +# define ir_DIV_L ir_DIV_I64 +# define ir_MOD_L ir_MOD_I64 +# define ir_NEG_L ir_NEG_I64 +# define ir_ABS_L ir_ABS_I64 +# define ir_SEXT_L ir_SEXT_I64 +# define ir_ZEXT_L ir_ZEXT_I64 +# define ir_TRUNC_L ir_TRUNC_I64 +# define ir_BITCAST_L ir_BITCAST_I64 +# define ir_FP2L ir_FP2I64 +# define ir_ADD_OV_L ir_ADD_OV_I64 +# define ir_SUB_OV_L ir_SUB_OV_I64 +# define ir_MUL_OV_L ir_MUL_OV_I64 +# define ir_NOT_L ir_NOT_I64 +# define ir_OR_L ir_OR_I64 +# define ir_AND_L ir_AND_I64 +# define ir_XOR_L ir_XOR_I64 +# define ir_SHL_L ir_SHL_I64 +# define ir_SHR_L ir_SHR_I64 +# define ir_SAR_L ir_SAR_I64 +# define ir_ROL_L ir_ROL_I64 +# define ir_ROR_L ir_ROR_I64 +# define ir_MIN_L ir_MIN_I64 +# define ir_MAX_L ir_MAX_I64 +# define ir_LOAD_L ir_LOAD_I64 +#else +# define IR_LONG IR_I32 +# define ir_CONST_LONG ir_CONST_I32 +# define ir_UNARY_OP_L ir_UNARY_OP_I32 +# define ir_BINARY_OP_L ir_BINARY_OP_I32 +# define ir_ADD_L ir_ADD_I32 +# define ir_SUB_L ir_SUB_I32 +# define ir_MUL_L ir_MUL_I32 +# define ir_DIV_L ir_DIV_I32 +# define ir_MOD_L ir_MOD_I32 +# define ir_NEG_L ir_NEG_I32 +# define ir_ABS_L ir_ABS_I32 +# define ir_SEXT_L ir_SEXT_I32 +# define ir_ZEXT_L ir_ZEXT_I32 +# define ir_TRUNC_L ir_TRUNC_I32 +# define ir_BITCAST_L ir_BITCAST_I32 +# define ir_FP2L ir_FP2I32 +# define ir_ADD_OV_L ir_ADD_OV_I32 +# define ir_SUB_OV_L ir_SUB_OV_I32 +# define ir_MUL_OV_L ir_MUL_OV_I32 +# define ir_NOT_L ir_NOT_I32 +# define ir_OR_L ir_OR_I32 +# define ir_AND_L ir_AND_I32 +# define ir_XOR_L ir_XOR_I32 +# define ir_SHL_L ir_SHL_I32 +# define ir_SHR_L ir_SHR_I32 +# define ir_SAR_L ir_SAR_I32 +# define ir_ROL_L ir_ROL_I32 +# define ir_ROR_L ir_ROR_I32 +# define ir_MIN_L ir_MIN_I32 +# define ir_MAX_L ir_MAX_I32 +# define ir_LOAD_L ir_LOAD_I32 +#endif + +/* A helper structure to collect IT rers for the following use in (MERGE/PHI)_N */ +typedef struct _ir_refs { + uint32_t count; + uint32_t limit; + ir_ref refs[0]; +} ir_refs; + +#define ir_refs_size(_n) (offsetof(ir_refs, refs) + sizeof(ir_ref) * (_n)) +#define ir_refs_init(_name, _n) _name = alloca(ir_refs_size(_n)); \ + do {_name->count = 0; _name->limit = (_n);} while (0) + +static void ir_refs_add(ir_refs *refs, ir_ref ref) +{ + ir_ref *ptr; + + ZEND_ASSERT(refs->count < refs->limit); + ptr = refs->refs; + ptr[refs->count++] = ref; +} + +static size_t zend_jit_trace_prologue_size = (size_t)-1; +#if defined(IR_TARGET_X86) || defined(IR_TARGET_X64) +static uint32_t allowed_opt_flags = 0; +#endif +static bool delayed_call_chain = 0; // TODO: remove this var (use jit->delayed_call_level) ??? + +#ifdef ZTS +# ifdef _WIN32 +extern uint32_t _tls_index; +extern char *_tls_start; +extern char *_tls_end; +# endif + +static size_t tsrm_ls_cache_tcb_offset = 0; +static size_t tsrm_tls_index = 0; +static size_t tsrm_tls_offset = 0; + +# define EG_TLS_OFFSET(field) \ + (executor_globals_offset + offsetof(zend_executor_globals, field)) + +# define CG_TLS_OFFSET(field) \ + (compiler_globals_offset + offsetof(zend_compiler_globals, field)) + +# define jit_EG(_field) \ + ir_ADD_OFFSET(jit_TLS(jit), EG_TLS_OFFSET(_field)) + +# define jit_CG(_field) \ + ir_ADD_OFFSET(jit_TLS(jit), CG_TLS_OFFSET(_field)) + +#else + +# define jit_EG(_field) \ + ir_CONST_ADDR(&EG(_field)) + +# define jit_CG(_field) \ + ir_CONST_ADDR(&CG(_field)) + +#endif + +#define jit_CALL(_call, _field) \ + ir_ADD_OFFSET(_call, offsetof(zend_execute_data, _field)) + +#define jit_EX(_field) \ + jit_CALL(jit_FP(jit), _field) + +#define jit_RX(_field) \ + jit_CALL(jit_IP(jit), _field) + +#define JIT_STUBS(_) \ + _(exception_handler, IR_SKIP_PROLOGUE) \ + _(exception_handler_undef, IR_SKIP_PROLOGUE) \ + _(exception_handler_free_op2, IR_SKIP_PROLOGUE) \ + _(exception_handler_free_op1_op2, IR_SKIP_PROLOGUE) \ + _(interrupt_handler, IR_SKIP_PROLOGUE) \ + _(leave_function_handler, IR_SKIP_PROLOGUE) \ + _(negative_shift, IR_SKIP_PROLOGUE) \ + _(mod_by_zero, IR_SKIP_PROLOGUE) \ + _(invalid_this, IR_SKIP_PROLOGUE) \ + _(undefined_function, IR_SKIP_PROLOGUE) \ + _(throw_cannot_pass_by_ref, IR_SKIP_PROLOGUE) \ + _(icall_throw, IR_SKIP_PROLOGUE) \ + _(leave_throw, IR_SKIP_PROLOGUE) \ + _(hybrid_runtime_jit, IR_SKIP_PROLOGUE | IR_START_BR_TARGET) \ + _(hybrid_profile_jit, IR_SKIP_PROLOGUE | IR_START_BR_TARGET) \ + _(hybrid_func_hot_counter, IR_SKIP_PROLOGUE | IR_START_BR_TARGET) \ + _(hybrid_loop_hot_counter, IR_SKIP_PROLOGUE | IR_START_BR_TARGET) \ + _(hybrid_func_trace_counter, IR_SKIP_PROLOGUE | IR_START_BR_TARGET) \ + _(hybrid_ret_trace_counter, IR_SKIP_PROLOGUE | IR_START_BR_TARGET) \ + _(hybrid_loop_trace_counter, IR_SKIP_PROLOGUE | IR_START_BR_TARGET) \ + _(trace_halt, IR_SKIP_PROLOGUE) \ + _(trace_escape, IR_SKIP_PROLOGUE) \ + _(trace_exit, IR_SKIP_PROLOGUE) \ + _(undefined_offset, IR_FUNCTION | IR_FASTCALL_FUNC) \ + _(undefined_key, IR_FUNCTION | IR_FASTCALL_FUNC) \ + _(cannot_add_element, IR_FUNCTION | IR_FASTCALL_FUNC) \ + _(assign_const, IR_FUNCTION | IR_FASTCALL_FUNC) \ + _(assign_tmp, IR_FUNCTION | IR_FASTCALL_FUNC) \ + _(assign_var, IR_FUNCTION | IR_FASTCALL_FUNC) \ + _(assign_cv_noref, IR_FUNCTION | IR_FASTCALL_FUNC) \ + _(assign_cv, IR_FUNCTION | IR_FASTCALL_FUNC) \ + _(new_array, IR_FUNCTION | IR_FASTCALL_FUNC) \ + +#define JIT_STUB_ID(name, flags) \ + jit_stub_ ## name, + +#define JIT_STUB_FORWARD(name, flags) \ + static int zend_jit_ ## name ## _stub(zend_jit_ctx *jit); + +#define JIT_STUB(name, flags) \ + {JIT_STUB_PREFIX #name, zend_jit_ ## name ## _stub, flags}, + +typedef enum _jit_stub_id { + JIT_STUBS(JIT_STUB_ID) + jit_last_stub +} jit_stub_id; + +typedef struct _zend_jit_reg_var { + ir_ref ref; + uint32_t flags; +} zend_jit_reg_var; + +typedef struct _zend_jit_ctx { + ir_ctx ctx; + const zend_op *last_valid_opline; + bool use_last_valid_opline; + bool track_last_valid_opline; + bool reuse_ip; + uint32_t delayed_call_level; + int b; /* current basic block number or -1 */ +#ifdef ZTS + ir_ref tls; +#endif + ir_ref fp; + ir_ref trace_loop_ref; + ir_ref return_inputs; + const zend_op_array *op_array; + const zend_op_array *current_op_array; + zend_ssa *ssa; + zend_string *name; + ir_ref *bb_start_ref; /* PHP BB -> IR ref mapping */ + ir_ref *bb_predecessors; /* PHP BB -> index in bb_edges -> IR refs of predessors */ + ir_ref *bb_edges; + zend_jit_trace_info *trace; + zend_jit_reg_var *ra; + int delay_var; + ir_refs *delay_refs; + ir_ref eg_exception_addr; + HashTable addr_hash; + ir_ref stub_addr[jit_last_stub]; +} zend_jit_ctx; + +typedef int8_t zend_reg; + +typedef struct _zend_jit_registers_buf { +#if defined(IR_TARGET_X64) + uint64_t gpr[16]; /* general purpose integer register */ + double fpr[16]; /* floating point registers */ +#elif defined(IR_TARGET_X86) + uint32_t gpr[8]; /* general purpose integer register */ + double fpr[8]; /* floating point registers */ +#elif defined (IR_TARGET_AARCH64) + uint64_t gpr[32]; /* general purpose integer register */ + double fpr[32]; /* floating point registers */ +#else +# error "Unknown IR target" +#endif +} zend_jit_registers_buf; + +/* Keep 32 exit points in a single code block */ +#define ZEND_JIT_EXIT_POINTS_SPACING 4 // push byte + short jmp = bytes +#define ZEND_JIT_EXIT_POINTS_PER_GROUP 32 // number of continuous exit points + +static uint32_t zend_jit_exit_point_by_addr(void *addr); +int ZEND_FASTCALL zend_jit_trace_exit(uint32_t exit_num, zend_jit_registers_buf *regs); + +static int zend_jit_assign_to_variable(zend_jit_ctx *jit, + const zend_op *opline, + zend_jit_addr var_use_addr, + zend_jit_addr var_addr, + uint32_t var_info, + uint32_t var_def_info, + uint8_t val_type, + zend_jit_addr val_addr, + uint32_t val_info, + zend_jit_addr res_addr, + zend_jit_addr ref_addr, + bool check_exception); + +typedef struct _zend_jit_stub { + const char *name; + int (*stub)(zend_jit_ctx *jit); + uint32_t flags; +} zend_jit_stub; + +JIT_STUBS(JIT_STUB_FORWARD) + +static const zend_jit_stub zend_jit_stubs[] = { + JIT_STUBS(JIT_STUB) +}; + +#if defined(_WIN32) || defined(IR_TARGET_AARCH64) +/* We keep addresses in SHM to share them between sepaeate processes (on Windows) or to support veneers (on AArch64) */ +static void** zend_jit_stub_handlers = NULL; +#else +static void* zend_jit_stub_handlers[sizeof(zend_jit_stubs) / sizeof(zend_jit_stubs[0])]; +#endif + +#if defined(IR_TARGET_AARCH64) +static const void *zend_jit_get_veneer(ir_ctx *ctx, const void *addr) +{ + int i, count = sizeof(zend_jit_stubs) / sizeof(zend_jit_stubs[0]); + + for (i = 0; i < count; i++) { + if (zend_jit_stub_handlers[i] == addr) { + return zend_jit_stub_handlers[count + i]; + } + } + + if (((zend_jit_ctx*)ctx)->trace + && (void*)addr >= dasm_buf && (void*)addr < dasm_end) { + uint32_t exit_point = zend_jit_exit_point_by_addr((void*)addr); + + if (exit_point != (uint32_t)-1) { + zend_jit_trace_info *t = ((zend_jit_ctx*)ctx)->trace; + + ZEND_ASSERT(exit_point < t->exit_count); + return (const void*)((char*)ctx->code_buffer + ctx->code_size - (t->exit_count - exit_point) * 4); + } + } + + return NULL; +} + +static bool zend_jit_set_veneer(ir_ctx *ctx, const void *addr, const void *veneer) +{ + int i, count = sizeof(zend_jit_stubs) / sizeof(zend_jit_stubs[0]); + int64_t offset; + + for (i = 0; i < count; i++) { + if (zend_jit_stub_handlers[i] == addr) { + const void **ptr = (const void**)&zend_jit_stub_handlers[count + i]; + *ptr = veneer; + if (JIT_G(debug) & ZEND_JIT_DEBUG_ASM) { + const char *name = ir_disasm_find_symbol((uint64_t)(uintptr_t)addr, &offset); + + if (name && !offset) { + if (strstr(name, "@veneer") == NULL) { + char *new_name; + + zend_spprintf(&new_name, 0, "%s@veneer", name); + ir_disasm_add_symbol(new_name, (uint64_t)(uintptr_t)veneer, 4); + efree(new_name); + } else { + ir_disasm_add_symbol(name, (uint64_t)(uintptr_t)veneer, 4); + } + } + } + return 1; + } + } + + return 0; +} + +static void zend_jit_commit_veneers(void) +{ + int i, count = sizeof(zend_jit_stubs) / sizeof(zend_jit_stubs[0]); + + for (i = 0; i < count; i++) { + if (zend_jit_stub_handlers[count + i]) { + zend_jit_stub_handlers[i] = zend_jit_stub_handlers[count + i]; + zend_jit_stub_handlers[count + i] = NULL; + } + } +} +#endif + +static bool zend_jit_prefer_const_addr_load(zend_jit_ctx *jit, uintptr_t addr) +{ +#if defined(IR_TARGET_X86) + return 0; /* always use immediate value */ +#elif defined(IR_TARGET_X64) + return addr > 0xffffffff; /* prefer loading long constant from memery */ +#elif defined(IR_TARGET_AARCH64) + return addr > 0xffff; +#else +# error "Unknown IR target" +#endif +} + +static const char* zend_reg_name(int8_t reg) +{ + return ir_reg_name(reg, ir_reg_is_int(reg) ? IR_LONG : IR_DOUBLE); +} + +/* IR helpers */ + +#ifdef ZTS +static ir_ref jit_TLS(zend_jit_ctx *jit) +{ + ZEND_ASSERT(jit->ctx.control); + if (jit->tls) { + /* Emit "TLS" once for basic block */ + ir_insn *insn; + ir_ref ref = jit->ctx.control; + + while (1) { + if (ref == jit->tls) { + return jit->tls; + } + insn = &jit->ctx.ir_base[ref]; + if (insn->op >= IR_START || insn->op == IR_CALL) { + break; + } + ref = insn->op1; + } + } + jit->tls = ir_TLS( + tsrm_ls_cache_tcb_offset ? tsrm_ls_cache_tcb_offset : tsrm_tls_index, + tsrm_ls_cache_tcb_offset ? IR_NULL : tsrm_tls_offset); + return jit->tls; +} +#endif + +static ir_ref jit_CONST_ADDR(zend_jit_ctx *jit, uintptr_t addr) +{ + ir_ref ref; + zval *zv; + + if (addr == 0) { + return IR_NULL; + } + zv = zend_hash_index_lookup(&jit->addr_hash, addr); + if (Z_TYPE_P(zv) == IS_LONG) { + ref = Z_LVAL_P(zv); + ZEND_ASSERT(jit->ctx.ir_base[ref].opt == IR_OPT(IR_ADDR, IR_ADDR)); + } else { + ref = ir_unique_const_addr(&jit->ctx, addr); + ZVAL_LONG(zv, ref); + } + return ref; +} + +static ir_ref jit_CONST_FUNC(zend_jit_ctx *jit, uintptr_t addr, uint16_t flags) +{ + ir_ref ref; + ir_insn *insn; + zval *zv; + + ZEND_ASSERT(addr != 0); + zv = zend_hash_index_lookup(&jit->addr_hash, addr); + if (Z_TYPE_P(zv) == IS_LONG) { + ref = Z_LVAL_P(zv); + ZEND_ASSERT(jit->ctx.ir_base[ref].opt == IR_OPT(IR_FUNC_ADDR, IR_ADDR) && jit->ctx.ir_base[ref].const_flags == flags); + } else { + ref = ir_unique_const_addr(&jit->ctx, addr); + insn = &jit->ctx.ir_base[ref]; + insn->optx = IR_OPT(IR_FUNC_ADDR, IR_ADDR); + insn->const_flags = flags; + ZVAL_LONG(zv, ref); + } + return ref; +} + +static ir_ref jit_ADD_OFFSET(zend_jit_ctx *jit, ir_ref addr, uintptr_t offset) +{ + if (offset) { + addr = ir_ADD_A(addr, ir_CONST_ADDR(offset)); + } + return addr; +} + +static ir_ref jit_EG_exception(zend_jit_ctx *jit) +{ +#ifdef ZTS + return jit_EG(exception); +#else + ir_ref ref = jit->eg_exception_addr; + + if (UNEXPECTED(!ref)) { + ref = ir_unique_const_addr(&jit->ctx, (uintptr_t)&EG(exception)); + jit->eg_exception_addr = ref; + } + return ref; +#endif +} + +static ir_ref jit_STUB_ADDR(zend_jit_ctx *jit, jit_stub_id id) +{ + ir_ref ref = jit->stub_addr[id]; + + if (UNEXPECTED(!ref)) { + ref = ir_unique_const_addr(&jit->ctx, (uintptr_t)zend_jit_stub_handlers[id]); + jit->stub_addr[id] = ref; + } + return ref; +} + +static ir_ref jit_STUB_FUNC_ADDR(zend_jit_ctx *jit, jit_stub_id id, uint16_t flags) +{ + ir_ref ref = jit->stub_addr[id]; + ir_insn *insn; + + if (UNEXPECTED(!ref)) { + ref = ir_unique_const_addr(&jit->ctx, (uintptr_t)zend_jit_stub_handlers[id]); + insn = &jit->ctx.ir_base[ref]; + insn->optx = IR_OPT(IR_FUNC_ADDR, IR_ADDR); + insn->const_flags = flags; + jit->stub_addr[id] = ref; + } + return ref; +} + +static void jit_SNAPSHOT(zend_jit_ctx *jit, ir_ref addr) +{ + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE && JIT_G(current_frame)) { + const void *ptr = (const void*)jit->ctx.ir_base[addr].val.addr; + const zend_op_array *op_array = &JIT_G(current_frame)->func->op_array; + uint32_t stack_size = op_array->last_var + op_array->T; + + if (ptr == zend_jit_stub_handlers[jit_stub_exception_handler] + || ptr == zend_jit_stub_handlers[jit_stub_exception_handler_undef] + || ptr == zend_jit_stub_handlers[jit_stub_exception_handler_free_op1_op2] + || ptr == zend_jit_stub_handlers[jit_stub_exception_handler_free_op2] + || ptr == zend_jit_stub_handlers[jit_stub_interrupt_handler] + || ptr == zend_jit_stub_handlers[jit_stub_leave_function_handler] + || ptr == zend_jit_stub_handlers[jit_stub_negative_shift] + || ptr == zend_jit_stub_handlers[jit_stub_mod_by_zero] + || ptr == zend_jit_stub_handlers[jit_stub_invalid_this] + || ptr == zend_jit_stub_handlers[jit_stub_undefined_function] + || ptr == zend_jit_stub_handlers[jit_stub_throw_cannot_pass_by_ref] + || ptr == zend_jit_stub_handlers[jit_stub_icall_throw] + || ptr == zend_jit_stub_handlers[jit_stub_leave_throw] + || ptr == zend_jit_stub_handlers[jit_stub_trace_halt] + || ptr == zend_jit_stub_handlers[jit_stub_trace_escape]) { + /* This is a GUARD that trigger exit through a stub code (without deoptimization) */ + return; + } + + /* Check if we need snapshot entries for polymorphic method call */ + zend_jit_trace_info *t = jit->trace; + uint32_t exit_point = 0, n = 0; + + if (addr < 0) { + if (t->exit_count > 0 + && jit->ctx.ir_base[addr].val.u64 == (uintptr_t)zend_jit_trace_get_exit_addr(t->exit_count - 1)) { + exit_point = t->exit_count - 1; + if (t->exit_info[exit_point].flags & ZEND_JIT_EXIT_METHOD_CALL) { + n = 2; + } + } + } + + if (stack_size || n) { + zend_jit_trace_stack *stack = JIT_G(current_frame)->stack; + uint32_t snapshot_size, i; + + snapshot_size = stack_size; + while (snapshot_size > 0) { + ir_ref ref = STACK_REF(stack, snapshot_size - 1); + + if (!ref || ref == IR_NULL || (STACK_FLAGS(stack, snapshot_size - 1) & (/*ZREG_LOAD|*/ZREG_STORE))) { + snapshot_size--; + } else { + break; + } + } + if (snapshot_size || n) { + ir_ref snapshot; + + snapshot = ir_SNAPSHOT(snapshot_size + n); + for (i = 0; i < snapshot_size; i++) { + ir_ref ref = STACK_REF(stack, i); + + if (!ref || ref == IR_NULL || (STACK_FLAGS(stack, i) & (/*ZREG_LOAD|*/ZREG_STORE))) { + ref = IR_UNUSED; + } + ir_SNAPSHOT_SET_OP(snapshot, i + 1, ref); + } + if (n) { + ir_SNAPSHOT_SET_OP(snapshot, snapshot_size + 1, t->exit_info[exit_point].poly_func_ref); + ir_SNAPSHOT_SET_OP(snapshot, snapshot_size + 2, t->exit_info[exit_point].poly_this_ref); + } + } + } + } +} + +static int32_t _add_trace_const(zend_jit_trace_info *t, int64_t val) +{ + int32_t i; + + for (i = 0; i < t->consts_count; i++) { + if (t->constants[i].i == val) { + return i; + } + } + ZEND_ASSERT(i < 0x7fffffff); + t->consts_count = i + 1; + t->constants = erealloc(t->constants, (i + 1) * sizeof(zend_jit_exit_const)); + t->constants[i].i = val; + return i; +} + +uint32_t zend_jit_duplicate_exit_point(ir_ctx *ctx, zend_jit_trace_info *t, uint32_t exit_point, ir_ref snapshot_ref) +{ + uint32_t stack_size, stack_offset; + uint32_t new_exit_point = t->exit_count; + + if (new_exit_point >= ZEND_JIT_TRACE_MAX_EXITS) { + ctx->status = -ZEND_JIT_TRACE_STOP_TOO_MANY_EXITS; + return exit_point; + } + + t->exit_count++; + memcpy(&t->exit_info[new_exit_point], &t->exit_info[exit_point], sizeof(zend_jit_trace_exit_info)); + stack_size = t->exit_info[new_exit_point].stack_size; + if (stack_size != 0) { + stack_offset = t->stack_map_size; + t->stack_map_size += stack_size; + // TODO: reduce number of reallocations ??? + t->stack_map = erealloc(t->stack_map, t->stack_map_size * sizeof(zend_jit_trace_stack)); + memcpy(t->stack_map + stack_offset, t->stack_map + t->exit_info[new_exit_point].stack_offset, stack_size * sizeof(zend_jit_trace_stack)); + t->exit_info[new_exit_point].stack_offset = stack_offset; + } + t->exit_info[new_exit_point].flags &= ~ZEND_JIT_EXIT_FIXED; + + return new_exit_point; +} + +void *zend_jit_snapshot_handler(ir_ctx *ctx, ir_ref snapshot_ref, ir_insn *snapshot, void *addr) +{ + zend_jit_trace_info *t = ((zend_jit_ctx*)ctx)->trace; + uint32_t exit_point, exit_flags; + ir_ref n = snapshot->inputs_count; + ir_ref i; + + exit_point = zend_jit_exit_point_by_addr(addr); + ZEND_ASSERT(exit_point < t->exit_count); + exit_flags = t->exit_info[exit_point].flags; + + if (exit_flags & ZEND_JIT_EXIT_METHOD_CALL) { + int8_t *reg_ops = ctx->regs[snapshot_ref]; + + ZEND_ASSERT(reg_ops[n - 1] != -1 && reg_ops[n] != -1); + if ((exit_flags & ZEND_JIT_EXIT_FIXED) + && (t->exit_info[exit_point].poly_func_reg != reg_ops[n - 1] + || t->exit_info[exit_point].poly_this_reg != reg_ops[n])) { + exit_point = zend_jit_duplicate_exit_point(ctx, t, exit_point, snapshot_ref); + addr = (void*)zend_jit_trace_get_exit_addr(exit_point); + exit_flags &= ~ZEND_JIT_EXIT_FIXED; + } + t->exit_info[exit_point].poly_func_reg = reg_ops[n - 1]; + t->exit_info[exit_point].poly_this_reg = reg_ops[n]; + n -= 2; + } + + for (i = 2; i <= n; i++) { + ir_ref ref = ir_insn_op(snapshot, i); + + if (ref) { + int8_t *reg_ops = ctx->regs[snapshot_ref]; + int8_t reg = reg_ops[i]; + ir_ref var = i - 2; + + ZEND_ASSERT(var < t->exit_info[exit_point].stack_size); + if (t->stack_map[t->exit_info[exit_point].stack_offset + var].flags == ZREG_ZVAL_COPY) { + ZEND_ASSERT(reg != ZREG_NONE); + if ((exit_flags & ZEND_JIT_EXIT_FIXED) + && t->stack_map[t->exit_info[exit_point].stack_offset + var].reg != IR_REG_NUM(reg)) { + exit_point = zend_jit_duplicate_exit_point(ctx, t, exit_point, snapshot_ref); + addr = (void*)zend_jit_trace_get_exit_addr(exit_point); + exit_flags &= ~ZEND_JIT_EXIT_FIXED; + } + t->stack_map[t->exit_info[exit_point].stack_offset + var].reg = IR_REG_NUM(reg); + } else if (t->stack_map[t->exit_info[exit_point].stack_offset + var].flags != ZREG_CONST) { + ZEND_ASSERT(t->stack_map[t->exit_info[exit_point].stack_offset + var].type == IS_LONG || + t->stack_map[t->exit_info[exit_point].stack_offset + var].type == IS_DOUBLE); + + if (ref > 0) { + if (reg != ZREG_NONE) { + if (reg & IR_REG_SPILL_LOAD) { + ZEND_ASSERT(!(reg & IR_REG_SPILL_SPECIAL)); + /* spill slot on a CPU stack */ + if ((exit_flags & ZEND_JIT_EXIT_FIXED) + && (t->stack_map[t->exit_info[exit_point].stack_offset + var].ref != ref + || t->stack_map[t->exit_info[exit_point].stack_offset + var].reg != ZREG_NONE + || !(t->stack_map[t->exit_info[exit_point].stack_offset + var].flags & ZREG_SPILL_SLOT))) { + exit_point = zend_jit_duplicate_exit_point(ctx, t, exit_point, snapshot_ref); + addr = (void*)zend_jit_trace_get_exit_addr(exit_point); + exit_flags &= ~ZEND_JIT_EXIT_FIXED; + } + t->stack_map[t->exit_info[exit_point].stack_offset + var].ref = ref; + t->stack_map[t->exit_info[exit_point].stack_offset + var].reg = ZREG_NONE; + t->stack_map[t->exit_info[exit_point].stack_offset + var].flags |= ZREG_SPILL_SLOT; + } else if (reg & IR_REG_SPILL_SPECIAL) { + /* spill slot on a VM stack */ + if ((exit_flags & ZEND_JIT_EXIT_FIXED) + && (t->stack_map[t->exit_info[exit_point].stack_offset + var].reg != ZREG_NONE + || t->stack_map[t->exit_info[exit_point].stack_offset + var].flags != ZREG_TYPE_ONLY)) { + exit_point = zend_jit_duplicate_exit_point(ctx, t, exit_point, snapshot_ref); + addr = (void*)zend_jit_trace_get_exit_addr(exit_point); + exit_flags &= ~ZEND_JIT_EXIT_FIXED; + } + t->stack_map[t->exit_info[exit_point].stack_offset + var].reg = ZREG_NONE; + t->stack_map[t->exit_info[exit_point].stack_offset + var].flags = ZREG_TYPE_ONLY; + } else { + if ((exit_flags & ZEND_JIT_EXIT_FIXED) + && t->stack_map[t->exit_info[exit_point].stack_offset + var].reg != IR_REG_NUM(reg)) { + exit_point = zend_jit_duplicate_exit_point(ctx, t, exit_point, snapshot_ref); + addr = (void*)zend_jit_trace_get_exit_addr(exit_point); + exit_flags &= ~ZEND_JIT_EXIT_FIXED; + } + t->stack_map[t->exit_info[exit_point].stack_offset + var].reg = IR_REG_NUM(reg); + } + } else { + if ((exit_flags & ZEND_JIT_EXIT_FIXED) + && (t->stack_map[t->exit_info[exit_point].stack_offset + var].reg != ZREG_NONE + || t->stack_map[t->exit_info[exit_point].stack_offset + var].flags != ZREG_TYPE_ONLY)) { + exit_point = zend_jit_duplicate_exit_point(ctx, t, exit_point, snapshot_ref); + addr = (void*)zend_jit_trace_get_exit_addr(exit_point); + exit_flags &= ~ZEND_JIT_EXIT_FIXED; + } + t->stack_map[t->exit_info[exit_point].stack_offset + var].flags = ZREG_TYPE_ONLY; + } + } else if (!(exit_flags & ZEND_JIT_EXIT_FIXED)) { + int32_t idx = _add_trace_const(t, ctx->ir_base[ref].val.i64); + t->stack_map[t->exit_info[exit_point].stack_offset + var].flags = ZREG_CONST; + t->stack_map[t->exit_info[exit_point].stack_offset + var].ref = idx; + } + } + } + } + t->exit_info[exit_point].flags |= ZEND_JIT_EXIT_FIXED; + return addr; +} + +static void jit_SIDE_EXIT(zend_jit_ctx *jit, ir_ref addr) +{ + jit_SNAPSHOT(jit, addr); + ir_IJMP(addr); +} + +/* PHP JIT helpers */ + +static ir_ref jit_EMALLOC(zend_jit_ctx *jit, size_t size, const zend_op_array *op_array, const zend_op *opline) +{ +#if ZEND_DEBUG + return ir_CALL_5(IR_ADDR, ir_CONST_FC_FUNC(_emalloc), + ir_CONST_ADDR(size), + op_array->filename ? ir_CONST_ADDR(op_array->filename->val) : IR_NULL, + ir_CONST_U32(opline ? opline->lineno : 0), + IR_NULL, + ir_CONST_U32(0)); +#elif defined(HAVE_BUILTIN_CONSTANT_P) + if (size > 24 && size <= 32) { + return ir_CALL(IR_ADDR, ir_CONST_FC_FUNC(_emalloc_32)); + } else { + return ir_CALL_1(IR_ADDR, ir_CONST_FC_FUNC(_emalloc), ir_CONST_ADDR(size)); + } +#else + return ir_CALL_1(IR_ADDR, ir_CONST_FC_FUNC(_emalloc), ir_CONST_ADDR(size)); +#endif +} + +static ir_ref jit_EFREE(zend_jit_ctx *jit, ir_ref ptr, size_t size, const zend_op_array *op_array, const zend_op *opline) +{ +#if ZEND_DEBUG + return ir_CALL_5(IR_ADDR, ir_CONST_FC_FUNC(_efree), + ptr, + op_array && op_array->filename ? ir_CONST_ADDR(op_array->filename->val) : IR_NULL, + ir_CONST_U32(opline ? opline->lineno : 0), + IR_NULL, + ir_CONST_U32(0)); +#elif defined(HAVE_BUILTIN_CONSTANT_P) + if (size > 24 && size <= 32) { + return ir_CALL_1(IR_ADDR, ir_CONST_FC_FUNC(_efree_32), ptr); + } else { + return ir_CALL_1(IR_ADDR, ir_CONST_FC_FUNC(_efree), ptr); + } +#else + return ir_CALL_1(IR_ADDR, ir_CONST_FC_FUNC(_efree), ptr); +#endif +} + +static ir_ref jit_FP(zend_jit_ctx *jit) +{ + ZEND_ASSERT(jit->ctx.control); + if (jit->fp == IR_UNUSED) { + /* Emit "RLOAD FP" once for basic block */ + jit->fp = ir_RLOAD_A(ZREG_FP); + } else { + ir_insn *insn; + ir_ref ref = jit->ctx.control; + + while (1) { + if (ref == jit->fp) { + break; + } + insn = &jit->ctx.ir_base[ref]; + if (insn->op >= IR_START || insn->op == IR_CALL) { + jit->fp = ir_RLOAD_A(ZREG_FP); + break; + } + ref = insn->op1; + } + } + return jit->fp; +} + +static void jit_STORE_FP(zend_jit_ctx *jit, ir_ref ref) +{ + ir_RSTORE(ZREG_FP, ref); + jit->fp = IR_UNUSED; +} + +static ir_ref jit_IP(zend_jit_ctx *jit) +{ + return ir_RLOAD_A(ZREG_IP); +} + +static void jit_STORE_IP(zend_jit_ctx *jit, ir_ref ref) +{ + ir_RSTORE(ZREG_IP, ref); +} + +static ir_ref jit_IP32(zend_jit_ctx *jit) +{ + return ir_RLOAD_U32(ZREG_IP); +} + +static void jit_LOAD_IP(zend_jit_ctx *jit, ir_ref ref) +{ + if (GCC_GLOBAL_REGS) { + jit_STORE_IP(jit, ref); + } else { + ir_STORE(jit_EX(opline), ref); + } +} + +static void jit_LOAD_IP_ADDR(zend_jit_ctx *jit, const zend_op *target) +{ + jit_LOAD_IP(jit, ir_CONST_ADDR(target)); +} + +static void zend_jit_track_last_valid_opline(zend_jit_ctx *jit) +{ + jit->use_last_valid_opline = 0; + jit->track_last_valid_opline = 1; +} + +static void zend_jit_use_last_valid_opline(zend_jit_ctx *jit) +{ + if (jit->track_last_valid_opline) { + jit->use_last_valid_opline = 1; + jit->track_last_valid_opline = 0; + } +} + +static bool zend_jit_trace_uses_initial_ip(zend_jit_ctx *jit) +{ + return jit->use_last_valid_opline; +} + +static void zend_jit_set_last_valid_opline(zend_jit_ctx *jit, const zend_op *opline) +{ + if (!jit->reuse_ip) { + jit->track_last_valid_opline = 1; + jit->last_valid_opline = opline; + } +} + +static void zend_jit_reset_last_valid_opline(zend_jit_ctx *jit) +{ + jit->track_last_valid_opline = 0; + jit->last_valid_opline = NULL; +} + +static void zend_jit_start_reuse_ip(zend_jit_ctx *jit) +{ + zend_jit_reset_last_valid_opline(jit); + jit->reuse_ip = 1; +} + +static int zend_jit_reuse_ip(zend_jit_ctx *jit) +{ + if (!jit->reuse_ip) { + zend_jit_start_reuse_ip(jit); + // RX = EX(call); + jit_STORE_IP(jit, ir_LOAD_A(jit_EX(call))); + } + return 1; +} + +static void zend_jit_stop_reuse_ip(zend_jit_ctx *jit) +{ + jit->reuse_ip = 0; +} + +static int zend_jit_save_call_chain(zend_jit_ctx *jit, uint32_t call_level) +{ + ir_ref rx, call; + + if (call_level == 1) { + // JIT: call = NULL; + call = IR_NULL; + } else { + // JIT: call = EX(call); + call = ir_LOAD_A(jit_EX(call)); + } + + rx = jit_IP(jit); + + // JIT: call->prev_execute_data = call; + ir_STORE(jit_CALL(rx, prev_execute_data), call); + + // JIT: EX(call) = call; + ir_STORE(jit_EX(call), rx); + + jit->delayed_call_level = 0; + delayed_call_chain = 0; + + return 1; +} + +static int zend_jit_set_ip(zend_jit_ctx *jit, const zend_op *target) +{ + ir_ref ref; + ir_ref addr = IR_UNUSED; + + if (jit->delayed_call_level) { + if (!zend_jit_save_call_chain(jit, jit->delayed_call_level)) { + return 0; + } + } + + if (jit->last_valid_opline) { + zend_jit_use_last_valid_opline(jit); + if (jit->last_valid_opline != target) { + if (GCC_GLOBAL_REGS) { + ref = jit_IP(jit); + } else { + addr = jit_EX(opline); + ref = ir_LOAD_A(addr); + } + if (target > jit->last_valid_opline) { + ref = ir_ADD_OFFSET(ref, (uintptr_t)target - (uintptr_t)jit->last_valid_opline); + } else { + ref = ir_SUB_A(ref, ir_CONST_ADDR((uintptr_t)jit->last_valid_opline - (uintptr_t)target)); + } + if (GCC_GLOBAL_REGS) { + jit_STORE_IP(jit, ref); + } else { + ir_STORE(addr, ref); + } + } + } else { + if (GCC_GLOBAL_REGS) { + jit_STORE_IP(jit, ir_CONST_ADDR(target)); + } else { + ir_STORE(jit_EX(opline), ir_CONST_ADDR(target)); + } + } + jit->reuse_ip = 0; + zend_jit_set_last_valid_opline(jit, target); + return 1; +} + +static int zend_jit_set_ip_ex(zend_jit_ctx *jit, const zend_op *target, bool set_ip_reg) +{ + if (!GCC_GLOBAL_REGS && set_ip_reg && !jit->last_valid_opline) { + /* Optimization to avoid duplicate constant load */ + ir_STORE(jit_EX(opline), ir_HARD_COPY_A(ir_CONST_ADDR(target))); + return 1; + } + return zend_jit_set_ip(jit, target); +} + +static void jit_SET_EX_OPLINE(zend_jit_ctx *jit, const zend_op *target) +{ + if (jit->last_valid_opline == target) { + zend_jit_use_last_valid_opline(jit); + if (GCC_GLOBAL_REGS) { + // EX(opline) = opline + ir_STORE(jit_EX(opline), jit_IP(jit)); + } + } else { + ir_STORE(jit_EX(opline), ir_CONST_ADDR(target)); + if (!GCC_GLOBAL_REGS) { + zend_jit_reset_last_valid_opline(jit); + } + } +} + +static ir_ref jit_ZVAL_ADDR(zend_jit_ctx *jit, zend_jit_addr addr) +{ + if (Z_MODE(addr) == IS_MEM_ZVAL) { + ir_ref reg; + + if (Z_REG(addr) == ZREG_FP) { + reg = jit_FP(jit); + } else if (Z_REG(addr) == ZREG_RX) { + reg = jit_IP(jit); + } else { + ZEND_UNREACHABLE(); + } + return ir_ADD_OFFSET(reg, Z_OFFSET(addr)); + } else if (Z_MODE(addr) == IS_REF_ZVAL) { + return Z_IR_REF(addr); + } else { + ZEND_ASSERT(Z_MODE(addr) == IS_CONST_ZVAL); + return ir_CONST_ADDR(Z_ZV(addr)); + } +} + +static ir_ref jit_Z_TYPE_ref(zend_jit_ctx *jit, ir_ref ref) +{ + return ir_LOAD_U8(ir_ADD_OFFSET(ref, offsetof(zval, u1.v.type))); +} + +static ir_ref jit_Z_TYPE(zend_jit_ctx *jit, zend_jit_addr addr) +{ + if (Z_MODE(addr) == IS_CONST_ZVAL) { + return ir_CONST_U8(Z_TYPE_P(Z_ZV(addr))); + } else if (Z_MODE(addr) == IS_MEM_ZVAL) { + ir_ref reg; + + ZEND_ASSERT(Z_MODE(addr) == IS_MEM_ZVAL); + if (Z_REG(addr) == ZREG_FP) { + reg = jit_FP(jit); + } else if (Z_REG(addr) == ZREG_RX) { + reg = jit_IP(jit); + } else { + ZEND_UNREACHABLE(); + } + return ir_LOAD_U8(ir_ADD_OFFSET(reg, Z_OFFSET(addr) + offsetof(zval, u1.v.type))); + } else { + return jit_Z_TYPE_ref(jit, jit_ZVAL_ADDR(jit, addr)); + } +} + +static ir_ref jit_Z_TYPE_FLAGS_ref(zend_jit_ctx *jit, ir_ref ref) +{ + return ir_LOAD_U8(ir_ADD_OFFSET(ref, offsetof(zval, u1.v.type_flags))); +} + +static ir_ref jit_Z_TYPE_FLAGS(zend_jit_ctx *jit, zend_jit_addr addr) +{ + if (Z_MODE(addr) == IS_CONST_ZVAL) { + return ir_CONST_U8(Z_TYPE_FLAGS_P(Z_ZV(addr))); + } else if (Z_MODE(addr) == IS_MEM_ZVAL) { + ir_ref reg; + + ZEND_ASSERT(Z_MODE(addr) == IS_MEM_ZVAL); + if (Z_REG(addr) == ZREG_FP) { + reg = jit_FP(jit); + } else if (Z_REG(addr) == ZREG_RX) { + reg = jit_IP(jit); + } else { + ZEND_UNREACHABLE(); + } + return ir_LOAD_U8(ir_ADD_OFFSET(reg, Z_OFFSET(addr) + offsetof(zval, u1.v.type_flags))); + } else { + return jit_Z_TYPE_FLAGS_ref(jit, jit_ZVAL_ADDR(jit, addr)); + } +} + +static ir_ref jit_Z_TYPE_INFO_ref(zend_jit_ctx *jit, ir_ref ref) +{ + return ir_LOAD_U32(ir_ADD_OFFSET(ref, offsetof(zval, u1.type_info))); +} + +static ir_ref jit_Z_TYPE_INFO(zend_jit_ctx *jit, zend_jit_addr addr) +{ + if (Z_MODE(addr) == IS_CONST_ZVAL) { + return ir_CONST_U32(Z_TYPE_INFO_P(Z_ZV(addr))); + } else if (Z_MODE(addr) == IS_MEM_ZVAL) { + ir_ref reg; + + ZEND_ASSERT(Z_MODE(addr) == IS_MEM_ZVAL); + if (Z_REG(addr) == ZREG_FP) { + reg = jit_FP(jit); + } else if (Z_REG(addr) == ZREG_RX) { + reg = jit_IP(jit); + } else { + ZEND_UNREACHABLE(); + } + return ir_LOAD_U32(ir_ADD_OFFSET(reg, Z_OFFSET(addr) + offsetof(zval, u1.type_info))); + } else { + return jit_Z_TYPE_INFO_ref(jit, jit_ZVAL_ADDR(jit, addr)); + } +} + +static void jit_set_Z_TYPE_INFO_ref(zend_jit_ctx *jit, ir_ref ref, ir_ref type_info) +{ + ir_STORE(ir_ADD_OFFSET(ref, offsetof(zval, u1.type_info)), type_info); +} + +static void jit_set_Z_TYPE_INFO_ex(zend_jit_ctx *jit, zend_jit_addr addr, ir_ref type_info) +{ + if (Z_MODE(addr) == IS_MEM_ZVAL) { + ir_ref reg; + + ZEND_ASSERT(Z_MODE(addr) == IS_MEM_ZVAL); + if (Z_REG(addr) == ZREG_FP) { + reg = jit_FP(jit); + } else if (Z_REG(addr) == ZREG_RX) { + reg = jit_IP(jit); + } else { + ZEND_UNREACHABLE(); + } + ir_STORE(ir_ADD_OFFSET(reg, Z_OFFSET(addr) + offsetof(zval, u1.type_info)), type_info); + } else { + jit_set_Z_TYPE_INFO_ref(jit, jit_ZVAL_ADDR(jit, addr), type_info); + } +} + +static void jit_set_Z_TYPE_INFO(zend_jit_ctx *jit, zend_jit_addr addr, uint32_t type_info) +{ + if (type_info < IS_STRING + && Z_MODE(addr) == IS_MEM_ZVAL + && Z_REG(addr) == ZREG_FP + && JIT_G(current_frame) + && STACK_MEM_TYPE(JIT_G(current_frame)->stack, EX_VAR_TO_NUM(Z_OFFSET(addr))) == type_info) { + /* type is already set */ + return; + } + jit_set_Z_TYPE_INFO_ex(jit, addr, ir_CONST_U32(type_info)); +} + +static ir_ref jit_if_Z_TYPE_ref(zend_jit_ctx *jit, ir_ref ref, ir_ref type) +{ + return ir_IF(ir_EQ(jit_Z_TYPE_ref(jit, ref), type)); +} + +static ir_ref jit_if_Z_TYPE(zend_jit_ctx *jit, zend_jit_addr addr, uint8_t type) +{ + ZEND_ASSERT(type != IS_UNDEF); + return ir_IF(ir_EQ(jit_Z_TYPE(jit, addr), ir_CONST_U8(type))); +} + +static ir_ref jit_if_not_Z_TYPE(zend_jit_ctx *jit, zend_jit_addr addr, uint8_t type) +{ + ir_ref ref = jit_Z_TYPE(jit, addr); + + if (type != IS_UNDEF) { + ref = ir_NE(ref, ir_CONST_U8(type)); + } + return ir_IF(ref); +} + +static void jit_guard_Z_TYPE(zend_jit_ctx *jit, zend_jit_addr addr, uint8_t type, const void *exit_addr) +{ + ir_ref ref = jit_Z_TYPE(jit, addr); + + if (type != IS_UNDEF) { + ir_GUARD(ir_EQ(ref, ir_CONST_U8(type)), ir_CONST_ADDR(exit_addr)); + } else { + ir_GUARD_NOT(ref, ir_CONST_ADDR(exit_addr)); + } +} + +static void jit_guard_not_Z_TYPE(zend_jit_ctx *jit, zend_jit_addr addr, uint8_t type, const void *exit_addr) +{ + ir_ref ref = jit_Z_TYPE(jit, addr); + + if (type != IS_UNDEF) { + ref = ir_NE(ref, ir_CONST_U8(type)); + } + ir_GUARD(ref, ir_CONST_ADDR(exit_addr)); +} + +static ir_ref jit_if_REFCOUNTED(zend_jit_ctx *jit, zend_jit_addr addr) +{ + return ir_IF(jit_Z_TYPE_FLAGS(jit, addr)); +} + +static ir_ref jit_if_COLLECTABLE_ref(zend_jit_ctx *jit, ir_ref addr_ref) +{ + return ir_IF(ir_AND_U8(jit_Z_TYPE_FLAGS_ref(jit, addr_ref), ir_CONST_U8(IS_TYPE_COLLECTABLE))); +} + +static ir_ref jit_Z_LVAL_ref(zend_jit_ctx *jit, ir_ref ref) +{ + return ir_LOAD_L(ref); +} + +static ir_ref jit_Z_DVAL_ref(zend_jit_ctx *jit, ir_ref ref) +{ + return ir_LOAD_D(ref); +} + +static bool zend_jit_spilling_may_cause_conflict(zend_jit_ctx *jit, int var, ir_ref val) +{ + if (jit->ctx.ir_base[val].op == IR_RLOAD) { + /* Deoptimization */ + return 0; + } +// if (jit->ctx.ir_base[val].op == IR_LOAD +// && jit->ctx.ir_base[jit->ctx.ir_base[val].op2].op == IR_ADD +// && jit->ctx.ir_base[jit->ctx.ir_base[jit->ctx.ir_base[val].op2].op1].op == IR_RLOAD +// && jit->ctx.ir_base[jit->ctx.ir_base[jit->ctx.ir_base[val].op2].op1].op2 == ZREG_FP +// && IR_IS_CONST_REF(jit->ctx.ir_base[jit->ctx.ir_base[val].op2].op2) +// && jit->ctx.ir_base[jit->ctx.ir_base[jit->ctx.ir_base[val].op2].op2].val.addr == (uintptr_t)EX_NUM_TO_VAR(jit->ssa->vars[var].var)) { +// /* LOAD from the same location (the LOAD is pinned) */ +// // TODO: should be anti-dependent with the following stores ??? +// return 0; +// } + if (jit->ssa->vars[var].var < jit->current_op_array->last_var) { + /* IS_CV */ + return 0; + } + return 1; +} + +static void zend_jit_def_reg(zend_jit_ctx *jit, zend_jit_addr addr, ir_ref val) +{ + int var; + + ZEND_ASSERT(Z_MODE(addr) == IS_REG); + var = Z_SSA_VAR(addr); + if (var == jit->delay_var) { + ir_refs_add(jit->delay_refs, val); + return; + } + ZEND_ASSERT(jit->ra && jit->ra[var].ref == IR_NULL); + + /* Negative "var" has special meaning for IR */ + if (val > 0 && !zend_jit_spilling_may_cause_conflict(jit, var, val)) { + val = ir_bind(&jit->ctx, -EX_NUM_TO_VAR(jit->ssa->vars[var].var), val); + } + jit->ra[var].ref = val; + + if (jit->ra[var].flags & ZREG_FORWARD) { + zend_ssa_phi *phi = jit->ssa->vars[var].phi_use_chain; + zend_basic_block *bb; + int n, j, *p; + ir_ref *q; + + jit->ra[var].flags &= ~ZREG_FORWARD; + while (phi != NULL) { + zend_ssa_phi *dst_phi = phi; + int src_var = var; + + if (dst_phi->pi >= 0) { + jit->ra[src_var].ref = val; + src_var = dst_phi->ssa_var; + if (!(jit->ra[src_var].flags & ZREG_FORWARD)) { + phi = zend_ssa_next_use_phi(jit->ssa, var, phi); + continue; + } + dst_phi = jit->ssa->vars[src_var].phi_use_chain; + ZEND_ASSERT(dst_phi != NULL && "reg forwarding"); + ZEND_ASSERT(!zend_ssa_next_use_phi(jit->ssa, src_var, dst_phi) && "reg forwarding"); + jit->ra[src_var].flags &= ~ZREG_FORWARD; + } + + if (jit->ra[dst_phi->ssa_var].ref > 0) { + ir_insn *phi_insn = &jit->ctx.ir_base[jit->ra[dst_phi->ssa_var].ref]; + ZEND_ASSERT(phi_insn->op == IR_PHI); +// ZEND_ASSERT(ir_operands_count(ctx, phi_insn) == n + 1); + bb = &jit->ssa->cfg.blocks[dst_phi->block]; + n = bb->predecessors_count; + for (j = 0, p = &dst_phi->sources[0], q = phi_insn->ops + 2; j < n; j++, p++, q++) { + if (*p == src_var) { + *q = val; + } + } + } + + phi = zend_ssa_next_use_phi(jit->ssa, var, phi); + } + } +} + +static ir_ref zend_jit_use_reg(zend_jit_ctx *jit, zend_jit_addr addr) +{ + int var = Z_SSA_VAR(addr); + + ZEND_ASSERT(Z_MODE(addr) == IS_REG); + ZEND_ASSERT(jit->ra && jit->ra[var].ref); + if (jit->ra[var].ref == IR_NULL) { + zend_jit_addr mem_addr; + ir_ref ref; + + ZEND_ASSERT(jit->ra[var].flags & ZREG_LOAD); + mem_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, EX_NUM_TO_VAR(jit->ssa->vars[var].var)); + if ((jit->ssa->var_info[var].type & MAY_BE_ANY) == MAY_BE_LONG) { + ref = jit_Z_LVAL_ref(jit, jit_ZVAL_ADDR(jit, mem_addr)); + } else if ((jit->ssa->var_info[var].type & MAY_BE_ANY) == MAY_BE_DOUBLE) { + ref = jit_Z_DVAL_ref(jit, jit_ZVAL_ADDR(jit, mem_addr)); + } else { + ZEND_UNREACHABLE(); + } + zend_jit_def_reg(jit, addr, ref); + return ref; + } + return jit->ra[Z_SSA_VAR(addr)].ref; +} + +static void zend_jit_gen_pi(zend_jit_ctx *jit, zend_ssa_phi *phi) +{ + int src_var = phi->sources[0]; + int dst_var = phi->ssa_var; + + ZEND_ASSERT(phi->pi >= 0); + ZEND_ASSERT(!(jit->ra[dst_var].flags & ZREG_LOAD)); + ZEND_ASSERT(jit->ra[src_var].ref); + + if (jit->ra[src_var].ref == IR_NULL) { + /* Not defined yet */ + if (jit->ssa->vars[dst_var].use_chain < 0 + && jit->ssa->vars[dst_var].phi_use_chain) { + zend_ssa_phi *phi = jit->ssa->vars[dst_var].phi_use_chain; + if (!zend_ssa_next_use_phi(jit->ssa, dst_var, phi)) { + /* This is a Pi forwarded to Phi */ + jit->ra[src_var].flags |= ZREG_FORWARD; + return; + } + } + ZEND_ASSERT(0 && "Not defined Pi source"); + } + /* Reuse register */ + zend_jit_def_reg(jit, ZEND_ADDR_REG(dst_var), + zend_jit_use_reg(jit, ZEND_ADDR_REG(src_var))); +} + +static void zend_jit_gen_phi(zend_jit_ctx *jit, zend_ssa_phi *phi) +{ + int dst_var = phi->ssa_var; + zend_basic_block *bb = &jit->ssa->cfg.blocks[phi->block]; + int n = bb->predecessors_count; + int i; + ir_type type = (jit->ssa->var_info[phi->ssa_var].type & MAY_BE_LONG) ? IR_LONG : IR_DOUBLE; + ir_ref merge = jit->bb_start_ref[phi->block]; + ir_ref ref; + ir_ref old_insns_count = jit->ctx.insns_count; + ir_ref same_src_ref = IR_UNUSED; + bool phi_inputs_are_the_same = 1; + + ZEND_ASSERT(phi->pi < 0); + ZEND_ASSERT(!(jit->ra[dst_var].flags & ZREG_LOAD)); + ZEND_ASSERT(merge); + ZEND_ASSERT(jit->ctx.ir_base[merge].op == IR_MERGE || jit->ctx.ir_base[merge].op == IR_LOOP_BEGIN); + ZEND_ASSERT(n == jit->ctx.ir_base[merge].inputs_count); + + ref = ir_emit_N(&jit->ctx, IR_OPT(IR_PHI, type), n + 1); + ir_set_op(&jit->ctx, ref, 1, merge); + + for (i = 0; i < n; i++) { + int src_var = phi->sources[i]; + + ZEND_ASSERT(jit->ra[src_var].ref); + if (jit->ra[src_var].ref == IR_NULL) { + jit->ra[src_var].flags |= ZREG_FORWARD; + phi_inputs_are_the_same = 0; + } else { + ir_ref src_ref = zend_jit_use_reg(jit, ZEND_ADDR_REG(src_var)); + if (i == 0) { + same_src_ref = src_ref; + } else if (same_src_ref != src_ref) { + phi_inputs_are_the_same = 0; + } + ir_set_op(&jit->ctx, ref, i + 2, src_ref); + } + } + if (phi_inputs_are_the_same) { + ref = same_src_ref; + jit->ctx.insns_count = old_insns_count; + } + + zend_jit_def_reg(jit, ZEND_ADDR_REG(dst_var), ref); +} + +static ir_ref jit_Z_LVAL(zend_jit_ctx *jit, zend_jit_addr addr) +{ + if (Z_MODE(addr) == IS_CONST_ZVAL) { + return ir_CONST_LONG(Z_LVAL_P(Z_ZV(addr))); + } else if (Z_MODE(addr) == IS_REG) { + return zend_jit_use_reg(jit, addr); + } else { + return jit_Z_LVAL_ref(jit, jit_ZVAL_ADDR(jit, addr)); + } +} + +static void jit_set_Z_LVAL(zend_jit_ctx *jit, zend_jit_addr addr, ir_ref lval) +{ + if (Z_MODE(addr) == IS_REG) { + zend_jit_def_reg(jit, addr, lval); + } else { + ir_STORE(jit_ZVAL_ADDR(jit, addr), lval); + } +} + +#if SIZEOF_ZEND_LONG == 4 +static ir_ref jit_Z_W2(zend_jit_ctx *jit, zend_jit_addr addr) +{ + if (Z_MODE(addr) == IS_CONST_ZVAL) { + return ir_CONST_U32((Z_ZV(addr))->value.ww.w2); + } else { + return ir_LOAD_L(ir_ADD_OFFSET(jit_ZVAL_ADDR(jit, addr), offsetof(zval, value.ww.w2))); + } +} + +static void jit_set_Z_W2(zend_jit_ctx *jit, zend_jit_addr addr, ir_ref lval) +{ + ir_STORE(ir_ADD_OFFSET(jit_ZVAL_ADDR(jit, addr), offsetof(zval, value.ww.w2)), lval); +} +#endif + +static ir_ref jit_Z_DVAL(zend_jit_ctx *jit, zend_jit_addr addr) +{ + if (Z_MODE(addr) == IS_CONST_ZVAL) { + return ir_CONST_DOUBLE(Z_DVAL_P(Z_ZV(addr))); + } else if (Z_MODE(addr) == IS_REG) { + return zend_jit_use_reg(jit, addr); + } else { + return jit_Z_DVAL_ref(jit, jit_ZVAL_ADDR(jit, addr)); + } +} + +static void jit_set_Z_DVAL(zend_jit_ctx *jit, zend_jit_addr addr, ir_ref dval) +{ + if (Z_MODE(addr) == IS_REG) { + zend_jit_def_reg(jit, addr, dval); + } else { + ir_STORE(jit_ZVAL_ADDR(jit, addr), dval); + } +} + +static ir_ref jit_Z_PTR_ref(zend_jit_ctx *jit, ir_ref ref) +{ + return ir_LOAD_A(ref); +} + +static ir_ref jit_Z_PTR(zend_jit_ctx *jit, zend_jit_addr addr) +{ + if (Z_MODE(addr) == IS_CONST_ZVAL) { + return ir_CONST_ADDR(Z_PTR_P(Z_ZV(addr))); + } else { + return jit_Z_PTR_ref(jit, jit_ZVAL_ADDR(jit, addr)); + } +} + +static void jit_set_Z_PTR(zend_jit_ctx *jit, zend_jit_addr addr, ir_ref ptr) +{ + ir_STORE(jit_ZVAL_ADDR(jit, addr), ptr); +} + +static ir_ref jit_GC_REFCOUNT(zend_jit_ctx *jit, ir_ref ref) +{ + return ir_LOAD_U32(ref); +} + +static void jit_set_GC_REFCOUNT(zend_jit_ctx *jit, ir_ref ref, uint32_t refcount) +{ + ir_STORE(ref, ir_CONST_U32(refcount)); +} + +static void jit_GC_ADDREF(zend_jit_ctx *jit, ir_ref ref) +{ + ir_STORE(ref, ir_ADD_U32(ir_LOAD_U32(ref), ir_CONST_U32(1))); +} + +static void jit_GC_ADDREF2(zend_jit_ctx *jit, ir_ref ref) +{ + ir_ref counter = ir_LOAD_U32(ref); + ir_STORE(ref, ir_ADD_U32(counter, ir_CONST_U32(2))); +} + +static ir_ref jit_GC_DELREF(zend_jit_ctx *jit, ir_ref ref) +{ + ir_ref counter = ir_LOAD_U32(ref); + counter = ir_SUB_U32(counter, ir_CONST_U32(1)); + ir_STORE(ref, counter); + return counter; +} + +static ir_ref jit_if_GC_MAY_NOT_LEAK(zend_jit_ctx *jit, ir_ref ref) +{ + return ir_IF( + ir_AND_U32( + ir_LOAD_U32(ir_ADD_OFFSET(ref, offsetof(zend_refcounted, gc.u.type_info))), + ir_CONST_U32(GC_INFO_MASK | (GC_NOT_COLLECTABLE << GC_FLAGS_SHIFT)))); +} + +static void jit_ZVAL_COPY_CONST(zend_jit_ctx *jit, zend_jit_addr dst, uint32_t dst_info, uint32_t dst_def_info, zval *zv, bool addref) +{ + ir_ref ref = IR_UNUSED; + + if (Z_TYPE_P(zv) > IS_TRUE) { + if (Z_TYPE_P(zv) == IS_DOUBLE) { + jit_set_Z_DVAL(jit, dst, ir_CONST_DOUBLE(Z_DVAL_P(zv))); + } else if (Z_TYPE_P(zv) == IS_LONG && dst_def_info == MAY_BE_DOUBLE) { + jit_set_Z_DVAL(jit, dst, ir_CONST_DOUBLE((double)Z_LVAL_P(zv))); + } else if (Z_TYPE_P(zv) == IS_LONG) { + jit_set_Z_LVAL(jit, dst, ir_CONST_LONG(Z_LVAL_P(zv))); + } else { + ref = ir_CONST_ADDR(Z_PTR_P(zv)); + jit_set_Z_PTR(jit, dst, ref); + if (addref && Z_REFCOUNTED_P(zv)) { + jit_GC_ADDREF(jit, ref); + } + } + } + if (Z_MODE(dst) != IS_REG) { + if (dst_def_info == MAY_BE_DOUBLE) { + if ((dst_info & (MAY_BE_ANY|MAY_BE_UNDEF|MAY_BE_GUARD)) != MAY_BE_DOUBLE) { + jit_set_Z_TYPE_INFO(jit, dst, IS_DOUBLE); + } + } else if (((dst_info & (MAY_BE_ANY|MAY_BE_UNDEF|MAY_BE_GUARD)) != (1<last_valid_opline == opline) { + ir_GUARD_NOT(ref, jit_STUB_ADDR(jit, jit_stub_interrupt_handler)); + } else { + ir_ref if_timeout = ir_IF(ref); + + ir_IF_TRUE_cold(if_timeout); + jit_LOAD_IP_ADDR(jit, opline); + ir_IJMP(jit_STUB_ADDR(jit, jit_stub_interrupt_handler)); + ir_IF_FALSE(if_timeout); + } + return 1; +} + +/* stubs */ + +static int zend_jit_exception_handler_stub(zend_jit_ctx *jit) +{ + const void *handler; + + if (zend_jit_vm_kind == ZEND_VM_KIND_HYBRID) { + handler = zend_get_opcode_handler_func(EG(exception_op)); + + ir_CALL(IR_VOID, ir_CONST_FUNC(handler)); + ir_TAILCALL(IR_VOID, ir_LOAD_A(jit_IP(jit))); + } else { + handler = EG(exception_op)->handler; + + if (GCC_GLOBAL_REGS) { + ir_TAILCALL(IR_VOID, ir_CONST_FUNC(handler)); + } else { + ir_ref ref, if_negative; + + ref = ir_CALL_1(IR_I32, ir_CONST_FC_FUNC(handler), jit_FP(jit)); + if_negative = ir_IF(ir_LT(ref, ir_CONST_U32(0))); + ir_IF_TRUE(if_negative); + ir_MERGE_WITH_EMPTY_FALSE(if_negative); + ref = ir_PHI_2(IR_I32, ref, ir_CONST_I32(1)); + ir_RETURN(ref); + } + } + return 1; +} + +static int zend_jit_exception_handler_undef_stub(zend_jit_ctx *jit) +{ + ir_ref ref, result_type, if_result_used; + + ref = jit_EG(opline_before_exception); + result_type = ir_LOAD_U8(ir_ADD_OFFSET(ir_LOAD_A(ref), offsetof(zend_op, result_type))); + + if_result_used = ir_IF(ir_AND_U8(result_type, ir_CONST_U8(IS_TMP_VAR|IS_VAR))); + ir_IF_TRUE(if_result_used); + + ref = ir_LOAD_U32(ir_ADD_OFFSET(ir_LOAD_A(ref), offsetof(zend_op, result.var))); + if (sizeof(void*) == 8) { + ref = ir_ZEXT_A(ref); + } + ir_STORE(ir_ADD_OFFSET(ir_ADD_A(jit_FP(jit), ref), offsetof(zval, u1.type_info)), ir_CONST_U32(IS_UNDEF)); + ir_MERGE_WITH_EMPTY_FALSE(if_result_used); + + ir_IJMP(jit_STUB_ADDR(jit, jit_stub_exception_handler)); + + return 1; +} + +static int zend_jit_exception_handler_free_op1_op2_stub(zend_jit_ctx *jit) +{ + ir_ref ref, if_dtor; + zend_jit_addr var_addr; + + ref = ir_LOAD_A(jit_EG(opline_before_exception)); + if_dtor = ir_IF(ir_AND_U8(ir_LOAD_U8(ir_ADD_OFFSET(ref, offsetof(zend_op, op1_type))), + ir_CONST_U8(IS_TMP_VAR|IS_VAR))); + ir_IF_TRUE(if_dtor); + ref = ir_LOAD_U32(ir_ADD_OFFSET(ref, offsetof(zend_op, op1.var))); + if (sizeof(void*) == 8) { + ref = ir_ZEXT_A(ref); + } + ref = ir_ADD_A(jit_FP(jit), ref); + var_addr = ZEND_ADDR_REF_ZVAL(ref); + jit_ZVAL_PTR_DTOR(jit, var_addr, MAY_BE_ANY|MAY_BE_RC1|MAY_BE_RCN|MAY_BE_REF, 0, NULL); + ir_MERGE_WITH_EMPTY_FALSE(if_dtor); + + ir_IJMP(jit_STUB_ADDR(jit, jit_stub_exception_handler_free_op2)); + + return 1; +} + +static int zend_jit_exception_handler_free_op2_stub(zend_jit_ctx *jit) +{ + ir_ref ref, if_dtor; + zend_jit_addr var_addr; + + ref = ir_LOAD_A(jit_EG(opline_before_exception)); + if_dtor = ir_IF(ir_AND_U8(ir_LOAD_U8(ir_ADD_OFFSET(ref, offsetof(zend_op, op2_type))), + ir_CONST_U8(IS_TMP_VAR|IS_VAR))); + ir_IF_TRUE(if_dtor); + ref = ir_LOAD_U32(ir_ADD_OFFSET(ref, offsetof(zend_op, op2.var))); + if (sizeof(void*) == 8) { + ref = ir_ZEXT_A(ref); + } + ref = ir_ADD_A(jit_FP(jit), ref); + var_addr = ZEND_ADDR_REF_ZVAL(ref); + jit_ZVAL_PTR_DTOR(jit, var_addr, MAY_BE_ANY|MAY_BE_RC1|MAY_BE_RCN|MAY_BE_REF, 0, NULL); + ir_MERGE_WITH_EMPTY_FALSE(if_dtor); + + ir_IJMP(jit_STUB_ADDR(jit, jit_stub_exception_handler_undef)); + + return 1; +} + +static int zend_jit_interrupt_handler_stub(zend_jit_ctx *jit) +{ + ir_ref if_timeout, if_exception; + + if (GCC_GLOBAL_REGS) { + // EX(opline) = opline + ir_STORE(jit_EX(opline), jit_IP(jit)); + } + + ir_STORE(jit_EG(vm_interrupt), ir_CONST_U8(0)); + if_timeout = ir_IF(ir_EQ(ir_LOAD_U8(jit_EG(timed_out)), ir_CONST_U8(0))); + ir_IF_FALSE(if_timeout); + ir_CALL(IR_VOID, ir_CONST_FUNC(zend_timeout)); + ir_MERGE_WITH_EMPTY_TRUE(if_timeout); + + if (zend_interrupt_function) { + ir_CALL_1(IR_VOID, ir_CONST_FUNC(zend_interrupt_function), jit_FP(jit)); + if_exception = ir_IF(ir_LOAD_A(jit_EG(exception))); + ir_IF_TRUE(if_exception); + ir_CALL(IR_VOID, ir_CONST_FUNC(zend_jit_exception_in_interrupt_handler_helper)); + ir_MERGE_WITH_EMPTY_FALSE(if_exception); + + jit_STORE_FP(jit, ir_LOAD_A(jit_EG(current_execute_data))); + jit_STORE_IP(jit, ir_LOAD_A(jit_EX(opline))); + } + + if (GCC_GLOBAL_REGS) { + ir_TAILCALL(IR_VOID, ir_LOAD_A(jit_IP(jit))); + } else { + ir_RETURN(ir_CONST_I32(1)); + } + return 1; +} + +static int zend_jit_leave_function_handler_stub(zend_jit_ctx *jit) +{ + ir_ref call_info = ir_LOAD_U32(jit_EX(This.u1.type_info)); + ir_ref if_top = ir_IF(ir_AND_U32(call_info, ir_CONST_U32(ZEND_CALL_TOP))); + + ir_IF_FALSE(if_top); + + if (zend_jit_vm_kind == ZEND_VM_KIND_HYBRID) { + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_jit_leave_nested_func_helper), call_info); + jit_STORE_IP(jit, + ir_LOAD_A(jit_EX(opline))); + ir_TAILCALL(IR_VOID, ir_LOAD_A(jit_IP(jit))); + } else if (GCC_GLOBAL_REGS) { + ir_TAILCALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_jit_leave_nested_func_helper), call_info); + } else { + ir_TAILCALL_2(IR_I32, ir_CONST_FC_FUNC(zend_jit_leave_nested_func_helper), call_info, jit_FP(jit)); + } + + ir_IF_TRUE(if_top); + + if (zend_jit_vm_kind == ZEND_VM_KIND_HYBRID) { + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_jit_leave_top_func_helper), call_info); + ir_TAILCALL(IR_VOID, ir_LOAD_A(jit_IP(jit))); + } else if (GCC_GLOBAL_REGS) { + ir_TAILCALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_jit_leave_top_func_helper), call_info); + } else { + ir_TAILCALL_2(IR_I32, ir_CONST_FC_FUNC(zend_jit_leave_top_func_helper), call_info, jit_FP(jit)); + } + + return 1; +} + +static int zend_jit_negative_shift_stub(zend_jit_ctx *jit) +{ + ir_CALL_2(IR_VOID, ir_CONST_FUNC(zend_throw_error), + ir_CONST_ADDR(zend_ce_arithmetic_error), + ir_CONST_ADDR("Bit shift by negative number")); + ir_IJMP(jit_STUB_ADDR(jit, jit_stub_exception_handler_free_op1_op2)); + return 1; +} + +static int zend_jit_mod_by_zero_stub(zend_jit_ctx *jit) +{ + ir_CALL_2(IR_VOID, ir_CONST_FUNC(zend_throw_error), + ir_CONST_ADDR(zend_ce_division_by_zero_error), + ir_CONST_ADDR("Modulo by zero")); + ir_IJMP(jit_STUB_ADDR(jit, jit_stub_exception_handler_free_op1_op2)); + return 1; +} + +static int zend_jit_invalid_this_stub(zend_jit_ctx *jit) +{ + ir_CALL_2(IR_VOID, ir_CONST_FUNC(zend_throw_error), + IR_NULL, + ir_CONST_ADDR("Using $this when not in object context")); + ir_IJMP(jit_STUB_ADDR(jit, jit_stub_exception_handler_undef)); + return 1; +} + +static int zend_jit_undefined_function_stub(zend_jit_ctx *jit) +{ + // JIT: load EX(opline) + ir_ref ref = ir_LOAD_A(jit_FP(jit)); + ir_ref arg3 = ir_LOAD_U32(ir_ADD_OFFSET(ref, offsetof(zend_op, op2.constant))); + + if (sizeof(void*) == 8) { + arg3 = ir_LOAD_A(ir_ADD_A(ref, ir_SEXT_A(arg3))); + } else { + arg3 = ir_LOAD_A(arg3); + } + arg3 = ir_ADD_OFFSET(arg3, offsetof(zend_string, val)); + + ir_CALL_3(IR_VOID, ir_CONST_FUNC(zend_throw_error), + IR_NULL, + ir_CONST_ADDR("Call to undefined function %s()"), + arg3); + + ir_IJMP(jit_STUB_ADDR(jit, jit_stub_exception_handler)); + + return 1; +} + +static int zend_jit_throw_cannot_pass_by_ref_stub(zend_jit_ctx *jit) +{ + ir_ref opline, ref, rx, if_eq, if_tmp; + + // JIT: opline = EX(opline) + opline = ir_LOAD_A(jit_FP(jit)); + + // JIT: ZVAL_UNDEF(ZEND_CALL_VAR(RX, opline->result.var)) + ref = ir_LOAD_U32(ir_ADD_OFFSET(opline, offsetof(zend_op, result.var))); + if (sizeof(void*) == 8) { + ref = ir_ZEXT_A(ref); + } + rx = jit_IP(jit); + jit_set_Z_TYPE_INFO_ref(jit, ir_ADD_A(rx, ref), ir_CONST_U32(IS_UNDEF)); + + // last EX(call) frame may be delayed + // JIT: if (EX(call) == RX) + ref = ir_LOAD_A(jit_EX(call)); + if_eq = ir_IF(ir_EQ(rx, ref)); + ir_IF_FALSE(if_eq); + + // JIT: RX->prev_execute_data == EX(call) + ir_STORE(jit_CALL(rx, prev_execute_data), ref); + + // JIT: EX(call) = RX + ir_STORE(jit_EX(call), rx); + ir_MERGE_WITH_EMPTY_TRUE(if_eq); + + // JIT: IP = opline + jit_STORE_IP(jit, opline); + + // JIT: zend_cannot_pass_by_reference(opline->op2.num) + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_cannot_pass_by_reference), + ir_LOAD_U32(ir_ADD_OFFSET(opline, offsetof(zend_op, op2.num)))); + + // JIT: if (IP->op1_type == IS_TMP_VAR) + ref = ir_LOAD_U8(ir_ADD_OFFSET(jit_IP(jit), offsetof(zend_op, op1_type))); + if_tmp = ir_IF(ir_EQ(ref, ir_CONST_U8(IS_TMP_VAR))); + ir_IF_TRUE(if_tmp); + + // JIT: zval_ptr_dtor(EX_VAR(IP->op1.var)) + ref = ir_LOAD_U32(ir_ADD_OFFSET(jit_IP(jit), offsetof(zend_op, op1.var))); + if (sizeof(void*) == 8) { + ref = ir_ZEXT_A(ref); + } + ref = ir_ADD_A(jit_FP(jit), ref); + jit_ZVAL_PTR_DTOR(jit, + ZEND_ADDR_REF_ZVAL(ref), + MAY_BE_ANY|MAY_BE_RC1|MAY_BE_RCN|MAY_BE_REF, 0, NULL); + ir_MERGE_WITH_EMPTY_FALSE(if_tmp); + + ir_IJMP(jit_STUB_ADDR(jit, jit_stub_exception_handler)); + + return 1; +} + +static int zend_jit_icall_throw_stub(zend_jit_ctx *jit) +{ + ir_ref ip, if_set; + + // JIT: zend_rethrow_exception(zend_execute_data *execute_data) + // JIT: if (EX(opline)->opcode != ZEND_HANDLE_EXCEPTION) { + jit_STORE_IP(jit, ir_LOAD_A(jit_EX(opline))); + ip = jit_IP(jit); + if_set = ir_IF(ir_EQ(ir_LOAD_U8(ir_ADD_OFFSET(ip, offsetof(zend_op, opcode))), + ir_CONST_U8(ZEND_HANDLE_EXCEPTION))); + ir_IF_FALSE(if_set); + + // JIT: EG(opline_before_exception) = opline; + ir_STORE(jit_EG(opline_before_exception), ip); + ir_MERGE_WITH_EMPTY_TRUE(if_set); + + // JIT: opline = EG(exception_op); + jit_STORE_IP(jit, jit_EG(exception_op)); + + if (GCC_GLOBAL_REGS) { + ir_STORE(jit_EX(opline), jit_IP(jit)); + } + + ir_IJMP(jit_STUB_ADDR(jit, jit_stub_exception_handler)); + + return 1; +} + +static int zend_jit_leave_throw_stub(zend_jit_ctx *jit) +{ + ir_ref ip, if_set; + + // JIT: if (opline->opcode != ZEND_HANDLE_EXCEPTION) { + jit_STORE_IP(jit, ir_LOAD_A(jit_EX(opline))); + ip = jit_IP(jit); + if_set = ir_IF(ir_EQ(ir_LOAD_U8(ir_ADD_OFFSET(ip, offsetof(zend_op, opcode))), + ir_CONST_U8(ZEND_HANDLE_EXCEPTION))); + ir_IF_FALSE(if_set); + + // JIT: EG(opline_before_exception) = opline; + ir_STORE(jit_EG(opline_before_exception), ip); + ir_MERGE_WITH_EMPTY_TRUE(if_set); + + // JIT: opline = EG(exception_op); + jit_LOAD_IP(jit, jit_EG(exception_op)); + + if (GCC_GLOBAL_REGS) { + ir_STORE(jit_EX(opline), jit_IP(jit)); + + // JIT: HANDLE_EXCEPTION() + ir_IJMP(jit_STUB_ADDR(jit, jit_stub_exception_handler)); + } else { + ir_RETURN(ir_CONST_I32(2)); // ZEND_VM_LEAVE + } + + return 1; +} + +static int zend_jit_hybrid_runtime_jit_stub(zend_jit_ctx *jit) +{ + if (zend_jit_vm_kind != ZEND_VM_KIND_HYBRID) { + return 0; + } + + ir_CALL(IR_VOID, ir_CONST_FC_FUNC(zend_runtime_jit)); + ir_IJMP(ir_LOAD_A(jit_IP(jit))); + return 1; +} + +static int zend_jit_hybrid_profile_jit_stub(zend_jit_ctx *jit) +{ + ir_ref addr, func, run_time_cache, jit_extension; + + if (zend_jit_vm_kind != ZEND_VM_KIND_HYBRID) { + return 0; + } + + addr = ir_CONST_ADDR(&zend_jit_profile_counter), + ir_STORE(addr, ir_ADD_L(ir_LOAD_L(addr), ir_CONST_LONG(1))); + + func = ir_LOAD_A(jit_EX(func)); + run_time_cache = ir_LOAD_A(jit_EX(run_time_cache)); + jit_extension = ir_LOAD_A(ir_ADD_OFFSET(func, offsetof(zend_op_array, reserved[zend_func_info_rid]))); + + if (zend_jit_profile_counter_rid) { + addr = ir_ADD_OFFSET(run_time_cache, zend_jit_profile_counter_rid * sizeof(void*)); + } else { + addr = run_time_cache; + } + ir_STORE(addr, ir_ADD_L(ir_LOAD_L(addr), ir_CONST_LONG(1))); + + addr = ir_ADD_OFFSET(jit_extension, offsetof(zend_jit_op_array_extension, orig_handler)); + ir_IJMP(ir_LOAD_A(addr)); + + return 1; +} + +static int _zend_jit_hybrid_hot_counter_stub(zend_jit_ctx *jit, uint32_t cost) +{ + ir_ref func, jit_extension, addr, ref, if_overflow; + + func = ir_LOAD_A(jit_EX(func)); + jit_extension = ir_LOAD_A(ir_ADD_OFFSET(func, offsetof(zend_op_array, reserved[zend_func_info_rid]))); + addr = ir_LOAD_A(ir_ADD_OFFSET(jit_extension, offsetof(zend_jit_op_array_hot_extension, counter))); + ref = ir_SUB_I16(ir_LOAD_I16(addr), ir_CONST_I16(cost)); + ir_STORE(addr, ref); + if_overflow = ir_IF(ir_LE(ref, ir_CONST_I16(0))); + + ir_IF_TRUE_cold(if_overflow); + ir_STORE(addr, ir_CONST_I16(ZEND_JIT_COUNTER_INIT)); + ir_CALL_2(IR_VOID, ir_CONST_FC_FUNC(zend_jit_hot_func), + jit_FP(jit), + jit_IP(jit)); + ir_IJMP(ir_LOAD_A(jit_IP(jit))); + + ir_IF_FALSE(if_overflow); + ref = ir_SUB_A(jit_IP(jit), + ir_LOAD_A(ir_ADD_OFFSET(func, offsetof(zend_op_array, opcodes)))); + ref = ir_DIV_A(ref, ir_CONST_ADDR(sizeof(zend_op) / sizeof(void*))); + + addr = ir_ADD_A(ir_ADD_OFFSET(jit_extension, offsetof(zend_jit_op_array_hot_extension, orig_handlers)), + ref); + ir_IJMP(ir_LOAD_A(addr)); + + return 1; +} + +static int zend_jit_hybrid_func_hot_counter_stub(zend_jit_ctx *jit) +{ + if (zend_jit_vm_kind != ZEND_VM_KIND_HYBRID || !JIT_G(hot_func)) { + return 0; + } + + return _zend_jit_hybrid_hot_counter_stub(jit, + ((ZEND_JIT_COUNTER_INIT + JIT_G(hot_func) - 1) / JIT_G(hot_func))); +} + +static int zend_jit_hybrid_loop_hot_counter_stub(zend_jit_ctx *jit) +{ + if (zend_jit_vm_kind != ZEND_VM_KIND_HYBRID || !JIT_G(hot_loop)) { + return 0; + } + + return _zend_jit_hybrid_hot_counter_stub(jit, + ((ZEND_JIT_COUNTER_INIT + JIT_G(hot_loop) - 1) / JIT_G(hot_loop))); +} + +static ir_ref _zend_jit_orig_opline_handler(zend_jit_ctx *jit, ir_ref offset) +{ + ir_ref addr; + + if (GCC_GLOBAL_REGS) { + addr = ir_ADD_A(offset, jit_IP(jit)); + } else { + addr = ir_ADD_A(offset, ir_LOAD_A(jit_EX(opline))); + } + + return ir_LOAD_A(addr); +} + +static ir_ref zend_jit_orig_opline_handler(zend_jit_ctx *jit) +{ + ir_ref func, jit_extension, offset; + + func = ir_LOAD_A(jit_EX(func)); + jit_extension = ir_LOAD_A(ir_ADD_OFFSET(func, offsetof(zend_op_array, reserved[zend_func_info_rid]))); + offset = ir_LOAD_A(ir_ADD_OFFSET(jit_extension, offsetof(zend_jit_op_array_trace_extension, offset))); + return _zend_jit_orig_opline_handler(jit, offset); +} + +static int _zend_jit_hybrid_trace_counter_stub(zend_jit_ctx *jit, uint32_t cost) +{ + ir_ref func, jit_extension, offset, addr, ref, if_overflow, ret, if_halt; + + func = ir_LOAD_A(jit_EX(func)); + jit_extension = ir_LOAD_A(ir_ADD_OFFSET(func, offsetof(zend_op_array, reserved[zend_func_info_rid]))); + offset = ir_LOAD_A(ir_ADD_OFFSET(jit_extension, offsetof(zend_jit_op_array_trace_extension, offset))); + addr = ir_LOAD_A(ir_ADD_OFFSET(ir_ADD_A(offset, jit_IP(jit)), offsetof(zend_op_trace_info, counter))); + ref = ir_SUB_I16(ir_LOAD_I16(addr), ir_CONST_I16(cost)); + ir_STORE(addr, ref); + if_overflow = ir_IF(ir_LE(ref, ir_CONST_I16(0))); + + ir_IF_TRUE_cold(if_overflow); + ir_STORE(addr, ir_CONST_I16(ZEND_JIT_COUNTER_INIT)); + ret = ir_CALL_2(IR_I32, ir_CONST_FC_FUNC(zend_jit_trace_hot_root), + jit_FP(jit), + jit_IP(jit)); + if_halt = ir_IF(ir_LT(ret, ir_CONST_I32(0))); + ir_IF_FALSE(if_halt); + + ref = jit_EG(current_execute_data); + jit_STORE_FP(jit, ir_LOAD_A(ref)); + ref = ir_LOAD_A(jit_EX(opline)); + jit_STORE_IP(jit, ref); + ir_IJMP(ir_LOAD_A(jit_IP(jit))); + + ir_IF_FALSE(if_overflow); + ir_IJMP(_zend_jit_orig_opline_handler(jit, offset)); + + ir_IF_TRUE(if_halt); + ir_IJMP(ir_CONST_FC_FUNC(zend_jit_halt_op->handler)); + + return 1; +} + +static int zend_jit_hybrid_func_trace_counter_stub(zend_jit_ctx *jit) +{ + if (zend_jit_vm_kind != ZEND_VM_KIND_HYBRID || !JIT_G(hot_func)) { + return 0; + } + + return _zend_jit_hybrid_trace_counter_stub(jit, + ((ZEND_JIT_COUNTER_INIT + JIT_G(hot_func) - 1) / JIT_G(hot_func))); +} + +static int zend_jit_hybrid_ret_trace_counter_stub(zend_jit_ctx *jit) +{ + if (zend_jit_vm_kind != ZEND_VM_KIND_HYBRID || !JIT_G(hot_return)) { + return 0; + } + + return _zend_jit_hybrid_trace_counter_stub(jit, + ((ZEND_JIT_COUNTER_INIT + JIT_G(hot_return) - 1) / JIT_G(hot_return))); +} + +static int zend_jit_hybrid_loop_trace_counter_stub(zend_jit_ctx *jit) +{ + if (zend_jit_vm_kind != ZEND_VM_KIND_HYBRID || !JIT_G(hot_loop)) { + return 0; + } + + return _zend_jit_hybrid_trace_counter_stub(jit, + ((ZEND_JIT_COUNTER_INIT + JIT_G(hot_loop) - 1) / JIT_G(hot_loop))); +} + +static int zend_jit_trace_halt_stub(zend_jit_ctx *jit) +{ + if (zend_jit_vm_kind == ZEND_VM_KIND_HYBRID) { + ir_TAILCALL(IR_VOID, ir_CONST_FC_FUNC(zend_jit_halt_op->handler)); + } else if (GCC_GLOBAL_REGS) { + jit_STORE_IP(jit, IR_NULL); + ir_RETURN(IR_VOID); + } else { + ir_RETURN(ir_CONST_I32(-1)); // ZEND_VM_RETURN + } + return 1; +} + +static int zend_jit_trace_escape_stub(zend_jit_ctx *jit) +{ + if (GCC_GLOBAL_REGS) { + ir_TAILCALL(IR_VOID, ir_LOAD_A(jit_IP(jit))); + } else { + ir_RETURN(ir_CONST_I32(1)); // ZEND_VM_ENTER + } + + return 1; +} + +static int zend_jit_trace_exit_stub(zend_jit_ctx *jit) +{ + ir_ref ref, ret, if_zero, addr; + + if (GCC_GLOBAL_REGS) { + // EX(opline) = opline + ir_STORE(jit_EX(opline), jit_IP(jit)); + } + + ret = ir_EXITCALL(ir_CONST_FC_FUNC(zend_jit_trace_exit)); + + if_zero = ir_IF(ir_EQ(ret, ir_CONST_I32(0))); + + ir_IF_TRUE(if_zero); + + if (GCC_GLOBAL_REGS) { + ref = jit_EG(current_execute_data); + jit_STORE_FP(jit, ir_LOAD_A(ref)); + ref = ir_LOAD_A(jit_EX(opline)); + jit_STORE_IP(jit, ref); + ir_TAILCALL(IR_VOID, ir_LOAD_A(jit_IP(jit))); + } else { + ir_RETURN(ir_CONST_I32(1)); // ZEND_VM_ENTER + } + + ir_IF_FALSE(if_zero); + + ir_GUARD(ir_GE(ret, ir_CONST_I32(0)), jit_STUB_ADDR(jit, jit_stub_trace_halt)); + + ref = jit_EG(current_execute_data); + jit_STORE_FP(jit, ir_LOAD_A(ref)); + + if (GCC_GLOBAL_REGS) { + ref = ir_LOAD_A(jit_EX(opline)); + jit_STORE_IP(jit, ref); + } + + // check for interrupt (try to avoid this ???) + if (!zend_jit_check_timeout(jit, NULL, NULL)) { + return 0; + } + + addr = zend_jit_orig_opline_handler(jit); + if (GCC_GLOBAL_REGS) { + ir_TAILCALL(IR_VOID, addr); + } else { +#if defined(IR_TARGET_X86) + addr = ir_CAST_FC_FUNC(addr); +#endif + ref = ir_CALL_1(IR_I32, addr, jit_FP(jit)); + ir_GUARD(ir_GE(ref, ir_CONST_I32(0)), jit_STUB_ADDR(jit, jit_stub_trace_halt)); + ir_RETURN(ir_CONST_I32(1)); // ZEND_VM_ENTER + } + + return 1; +} + +static int zend_jit_undefined_offset_stub(zend_jit_ctx *jit) +{ + ir_ref opline = ir_LOAD_A(jit_EX(opline)); + ir_ref ref = ir_LOAD_U32(ir_ADD_OFFSET(opline, offsetof(zend_op, result.var))); + ir_ref if_const, end1, ref1; + + if (sizeof(void*) == 8) { + ref = ir_ZEXT_A(ref); + } + jit_set_Z_TYPE_INFO_ref(jit, ir_ADD_A(jit_FP(jit), ref), ir_CONST_U32(IS_NULL)); + + if_const = ir_IF(ir_EQ(ir_LOAD_U8(ir_ADD_OFFSET(opline, offsetof(zend_op, op2_type))), ir_CONST_U8(IS_CONST))); + + ir_IF_TRUE(if_const); +#if ZEND_USE_ABS_CONST_ADDR + ref1 = ir_LOAD_A(ir_ADD_OFFSET(opline, offsetof(zend_op, op2.zv))); +#else + ref = ir_LOAD_U32(ir_ADD_OFFSET(opline, offsetof(zend_op, op2.constant))); + if (sizeof(void*) == 8) { + ref = ir_SEXT_A(ref); + } + ref1 = ir_ADD_A(ref, opline); +#endif + + end1 = ir_END(); + + ir_IF_FALSE(if_const); + ref = ir_LOAD_U32(ir_ADD_OFFSET(opline, offsetof(zend_op, op2.var))); + if (sizeof(void*) == 8) { + ref = ir_ZEXT_A(ref); + } + ref = ir_ADD_A(jit_FP(jit), ref); + + ir_MERGE_WITH(end1); + ref = ir_PHI_2(IR_ADDR, ref, ref1); + + ref = jit_Z_LVAL_ref(jit, ref); + ir_CALL_3(IR_VOID, ir_CONST_FUNC(zend_error), + ir_CONST_U8(E_WARNING), + ir_CONST_ADDR("Undefined array key " ZEND_LONG_FMT), + ref); + ir_RETURN(IR_VOID); + + return 1; +} + +static int zend_jit_undefined_key_stub(zend_jit_ctx *jit) +{ + ir_ref opline = ir_LOAD_A(jit_EX(opline)); + ir_ref ref = ir_LOAD_U32(ir_ADD_OFFSET(opline, offsetof(zend_op, result.var))); + ir_ref if_const, end1, ref1; + + if (sizeof(void*) == 8) { + ref = ir_ZEXT_A(ref); + } + jit_set_Z_TYPE_INFO_ref(jit, ir_ADD_A(jit_FP(jit), ref), ir_CONST_U32(IS_NULL)); + + if_const = ir_IF(ir_EQ(ir_LOAD_U8(ir_ADD_OFFSET(opline, offsetof(zend_op, op2_type))), ir_CONST_U8(IS_CONST))); + + ir_IF_TRUE(if_const); +#if ZEND_USE_ABS_CONST_ADDR + ref1 = ir_LOAD_A(ir_ADD_OFFSET(opline, offsetof(zend_op, op2.zv))); +#else + ref = ir_LOAD_U32(ir_ADD_OFFSET(opline, offsetof(zend_op, op2.constant))); + if (sizeof(void*) == 8) { + ref = ir_SEXT_A(ref); + } + ref1 = ir_ADD_A(ref, opline); +#endif + + end1 = ir_END(); + + ir_IF_FALSE(if_const); + ref = ir_LOAD_U32(ir_ADD_OFFSET(opline, offsetof(zend_op, op2.var))); + if (sizeof(void*) == 8) { + ref = ir_ZEXT_A(ref); + } + ref = ir_ADD_A(jit_FP(jit), ref); + + ir_MERGE_WITH(end1); + ref = ir_PHI_2(IR_ADDR, ref, ref1); + + ref = ir_ADD_OFFSET(jit_Z_PTR_ref(jit, ref), offsetof(zend_string, val)); + ir_CALL_3(IR_VOID, ir_CONST_FUNC(zend_error), + ir_CONST_U8(E_WARNING), + ir_CONST_ADDR("Undefined array key \"%s\""), + ref); + ir_RETURN(IR_VOID); + + return 1; +} + +static int zend_jit_cannot_add_element_stub(zend_jit_ctx *jit) +{ + ir_ref opline = ir_LOAD_A(jit_EX(opline)); + ir_ref ref, if_result_used; + + if_result_used = ir_IF(ir_AND_U8( + ir_LOAD_U8(ir_ADD_OFFSET(opline, offsetof(zend_op, result_type))), + ir_CONST_U8(IS_TMP_VAR|IS_VAR))); + ir_IF_TRUE(if_result_used); + + ref = ir_LOAD_U32(ir_ADD_OFFSET(opline, offsetof(zend_op, result.var))); + if (sizeof(void*) == 8) { + ref = ir_ZEXT_A(ref); + } + jit_set_Z_TYPE_INFO_ref(jit, ir_ADD_A(jit_FP(jit), ref), ir_CONST_U32(IS_UNDEF)); + ir_MERGE_WITH_EMPTY_FALSE(if_result_used); + + ir_CALL_2(IR_VOID, ir_CONST_FUNC(zend_throw_error), + IR_NULL, + ir_CONST_ADDR("Cannot add element to the array as the next element is already occupied")); + ir_RETURN(IR_VOID); + + return 1; +} + +static int zend_jit_assign_const_stub(zend_jit_ctx *jit) +{ + ir_ref var = ir_PARAM(IR_ADDR, "var", 1); + ir_ref val = ir_PARAM(IR_ADDR, "val", 2); + + zend_jit_addr var_addr = ZEND_ADDR_REF_ZVAL(var); + zend_jit_addr val_addr = ZEND_ADDR_REF_ZVAL(val); + uint32_t val_info = MAY_BE_ANY|MAY_BE_RC1|MAY_BE_RCN; + + if (!zend_jit_assign_to_variable( + jit, NULL, + var_addr, var_addr, -1, -1, + IS_CONST, val_addr, val_info, + 0, 0, 0)) { + return 0; + } + ir_RETURN(IR_VOID); + return 1; +} + +static int zend_jit_assign_tmp_stub(zend_jit_ctx *jit) +{ + ir_ref var = ir_PARAM(IR_ADDR, "var", 1); + ir_ref val = ir_PARAM(IR_ADDR, "val", 2); + + zend_jit_addr var_addr = ZEND_ADDR_REF_ZVAL(var); + zend_jit_addr val_addr = ZEND_ADDR_REF_ZVAL(val); + uint32_t val_info = MAY_BE_ANY|MAY_BE_RC1|MAY_BE_RCN; + + if (!zend_jit_assign_to_variable( + jit, NULL, + var_addr, var_addr, -1, -1, + IS_TMP_VAR, val_addr, val_info, + 0, 0, 0)) { + return 0; + } + ir_RETURN(IR_VOID); + return 1; +} + +static int zend_jit_assign_var_stub(zend_jit_ctx *jit) +{ + ir_ref var = ir_PARAM(IR_ADDR, "var", 1); + ir_ref val = ir_PARAM(IR_ADDR, "val", 2); + + zend_jit_addr var_addr = ZEND_ADDR_REF_ZVAL(var); + zend_jit_addr val_addr = ZEND_ADDR_REF_ZVAL(val); + uint32_t val_info = MAY_BE_ANY|MAY_BE_RC1|MAY_BE_RCN|MAY_BE_REF; + + if (!zend_jit_assign_to_variable( + jit, NULL, + var_addr, var_addr, -1, -1, + IS_VAR, val_addr, val_info, + 0, 0, 0)) { + return 0; + } + ir_RETURN(IR_VOID); + return 1; +} + +static int zend_jit_assign_cv_noref_stub(zend_jit_ctx *jit) +{ + ir_ref var = ir_PARAM(IR_ADDR, "var", 1); + ir_ref val = ir_PARAM(IR_ADDR, "val", 2); + + zend_jit_addr var_addr = ZEND_ADDR_REF_ZVAL(var); + zend_jit_addr val_addr = ZEND_ADDR_REF_ZVAL(val); + uint32_t val_info = MAY_BE_ANY|MAY_BE_RC1|MAY_BE_RCN/*|MAY_BE_UNDEF*/; + + if (!zend_jit_assign_to_variable( + jit, NULL, + var_addr, var_addr, -1, -1, + IS_CV, val_addr, val_info, + 0, 0, 0)) { + return 0; + } + ir_RETURN(IR_VOID); + return 1; +} + +static int zend_jit_new_array_stub(zend_jit_ctx *jit) +{ + ir_ref var = ir_PARAM(IR_ADDR, "var", 1); + zend_jit_addr var_addr = ZEND_ADDR_REF_ZVAL(var); + ir_ref ref = ir_CALL(IR_ADDR, ir_CONST_FC_FUNC(_zend_new_array_0)); + + jit_set_Z_PTR(jit, var_addr, ref); + jit_set_Z_TYPE_INFO(jit, var_addr, IS_ARRAY_EX); + ir_RETURN(ref); + return 1; +} + +static int zend_jit_assign_cv_stub(zend_jit_ctx *jit) +{ + ir_ref var = ir_PARAM(IR_ADDR, "var", 1); + ir_ref val = ir_PARAM(IR_ADDR, "val", 2); + + zend_jit_addr var_addr = ZEND_ADDR_REF_ZVAL(var); + zend_jit_addr val_addr = ZEND_ADDR_REF_ZVAL(val); + uint32_t val_info = MAY_BE_ANY|MAY_BE_RC1|MAY_BE_RCN|MAY_BE_REF/*|MAY_BE_UNDEF*/; + + if (!zend_jit_assign_to_variable( + jit, NULL, + var_addr, var_addr, -1, -1, + IS_CV, val_addr, val_info, + 0, 0, 0)) { + return 0; + } + ir_RETURN(IR_VOID); + return 1; +} + +static void zend_jit_init_ctx(zend_jit_ctx *jit, uint32_t flags) +{ +#if defined (__CET__) && (__CET__ & 1) != 0 + flags |= IR_GEN_ENDBR; +#endif + flags |= IR_OPT_FOLDING | IR_OPT_CFG | IR_OPT_CODEGEN | IR_HAS_CALLS; + + ir_init(&jit->ctx, flags, 256, 1024); + jit->ctx.ret_type = -1; + +#if defined(IR_TARGET_X86) || defined(IR_TARGET_X64) + if (JIT_G(opt_flags) & allowed_opt_flags & ZEND_JIT_CPU_AVX) { + jit->ctx.mflags |= IR_X86_AVX; + } +#elif defined(IR_TARGET_AARCH64) + jit->ctx.get_veneer = zend_jit_get_veneer; + jit->ctx.set_veneer = zend_jit_set_veneer; +#endif + + jit->ctx.fixed_regset = (1<ctx.flags |= IR_NO_STACK_COMBINE; + if (zend_jit_vm_kind == ZEND_VM_KIND_CALL) { + jit->ctx.flags |= IR_FUNCTION; + /* Stack must be 16 byte aligned */ + /* TODO: select stack size ??? */ +#if defined(IR_TARGET_AARCH64) + jit->ctx.fixed_stack_frame_size = sizeof(void*) * 16; /* 10 saved registers and 6 spill slots (8 bytes) */ +#elif defined(_WIN64) + jit->ctx.fixed_stack_frame_size = sizeof(void*) * 11; /* 8 saved registers and 3 spill slots (8 bytes) */ +#elif defined(IR_TARGET_X86_64) + jit->ctx.fixed_stack_frame_size = sizeof(void*) * 9; /* 6 saved registers and 3 spill slots (8 bytes) */ +#else /* IR_TARGET_x86 */ + jit->ctx.fixed_stack_frame_size = sizeof(void*) * 11; /* 4 saved registers and 7 spill slots (4 bytes) */ +#endif + if (GCC_GLOBAL_REGS) { + jit->ctx.fixed_save_regset = IR_REGSET_PRESERVED & ~((1<ctx.fixed_save_regset = IR_REGSET_PRESERVED; +//#ifdef _WIN64 +// jit->ctx.fixed_save_regset &= 0xffff; // TODO: don't save FP registers ??? +//#endif + } + jit->ctx.fixed_call_stack_size = 16; + } else { +#ifdef ZEND_VM_HYBRID_JIT_RED_ZONE_SIZE + jit->ctx.fixed_stack_red_zone = ZEND_VM_HYBRID_JIT_RED_ZONE_SIZE; + if (jit->ctx.fixed_stack_red_zone > 16) { + jit->ctx.fixed_stack_frame_size = jit->ctx.fixed_stack_red_zone - 16; + jit->ctx.fixed_call_stack_size = 16; + } + jit->ctx.flags |= IR_MERGE_EMPTY_ENTRIES; +#else + jit->ctx.fixed_stack_red_zone = 0; + jit->ctx.fixed_stack_frame_size = 32; /* 4 spill slots (8 bytes) or 8 spill slots (4 bytes) */ + jit->ctx.fixed_call_stack_size = 16; +#endif +#if defined(IR_TARGET_X86) || defined(IR_TARGET_X64) + jit->ctx.fixed_regset |= (1<ctx.snapshot_create = (ir_snapshot_create_t)jit_SNAPSHOT; + + jit->op_array = NULL; + jit->current_op_array = NULL; + jit->ssa = NULL; + jit->name = NULL; + jit->last_valid_opline = NULL; + jit->use_last_valid_opline = 0; + jit->track_last_valid_opline = 0; + jit->reuse_ip = 0; + jit->delayed_call_level = 0; + delayed_call_chain = 0; + jit->b = -1; +#ifdef ZTS + jit->tls = IR_UNUSED; +#endif + jit->fp = IR_UNUSED; + jit->trace_loop_ref = IR_UNUSED; + jit->return_inputs = IR_UNUSED; + jit->bb_start_ref = NULL; + jit->bb_predecessors = NULL; + jit->bb_edges = NULL; + jit->trace = NULL; + jit->ra = NULL; + jit->delay_var = -1; + jit->delay_refs = NULL; + jit->eg_exception_addr = 0; + zend_hash_init(&jit->addr_hash, 64, NULL, NULL, 0); + memset(jit->stub_addr, 0, sizeof(jit->stub_addr)); + + ir_START(); +} + +static int zend_jit_free_ctx(zend_jit_ctx *jit) +{ + if (jit->name) { + zend_string_release(jit->name); + } + zend_hash_destroy(&jit->addr_hash); + ir_free(&jit->ctx); + return 1; +} + +static void *zend_jit_ir_compile(ir_ctx *ctx, size_t *size, const char *name) +{ + void *entry; + + if (JIT_G(debug) & ZEND_JIT_DEBUG_IR_SRC) { + if (name) fprintf(stderr, "%s: ; after folding\n", name); + ir_save(ctx, stderr); + } + +#if ZEND_DEBUG + ir_check(ctx); +#endif + + ir_build_def_use_lists(ctx); + +#if ZEND_DEBUG + ir_check(ctx); +#endif + +#if 1 + ir_sccp(ctx); +#endif + + if (JIT_G(debug) & ZEND_JIT_DEBUG_IR_AFTER_SCCP) { + if (name) fprintf(stderr, "%s: ; after SCCP\n", name); + ir_save(ctx, stderr); + } + + ir_build_cfg(ctx); + ir_build_dominators_tree(ctx); + ir_find_loops(ctx); + + if (JIT_G(debug) & ZEND_JIT_DEBUG_IR_AFTER_SCCP) { + if (JIT_G(debug) & ZEND_JIT_DEBUG_IR_CFG) { + ir_dump_cfg(ctx, stderr); + } + } + + ir_gcm(ctx); + ir_schedule(ctx); + + if (JIT_G(debug) & ZEND_JIT_DEBUG_IR_AFTER_SCHEDULE) { + if (name) fprintf(stderr, "%s: ; after schedule\n", name); + ir_save(ctx, stderr); + if (JIT_G(debug) & ZEND_JIT_DEBUG_IR_CFG) { + ir_dump_cfg(ctx, stderr); + } + } + + ir_match(ctx); + ctx->flags &= ~IR_USE_FRAME_POINTER; /* don't use FRAME_POINTER even with ALLOCA, TODO: cleanup this ??? */ + ir_assign_virtual_registers(ctx); + ir_compute_live_ranges(ctx); + ir_coalesce(ctx); + ir_reg_alloc(ctx); + + if (JIT_G(debug) & ZEND_JIT_DEBUG_IR_AFTER_REGS) { + if (name) fprintf(stderr, "%s: ; after register allocation\n", name); + ir_save(ctx, stderr); + if (JIT_G(debug) & ZEND_JIT_DEBUG_IR_CFG) { + ir_dump_cfg(ctx, stderr); + } + if (JIT_G(debug) & ZEND_JIT_DEBUG_IR_REGS) { + ir_dump_live_ranges(ctx, stderr); + } + } + + ir_schedule_blocks(ctx); + + if (JIT_G(debug) & (ZEND_JIT_DEBUG_IR_FINAL|ZEND_JIT_DEBUG_IR_CODEGEN)) { + if (JIT_G(debug) & ZEND_JIT_DEBUG_IR_CODEGEN) { + if (name) fprintf(stderr, "%s: ; codegen\n", name); + ir_dump_codegen(ctx, stderr); + } else { + if (name) fprintf(stderr, "%s: ; final\n", name); + ir_save(ctx, stderr); + } + if (JIT_G(debug) & ZEND_JIT_DEBUG_IR_CFG) { + ir_dump_cfg(ctx, stderr); + } + if (JIT_G(debug) & ZEND_JIT_DEBUG_IR_REGS) { + ir_dump_live_ranges(ctx, stderr); + } + } + +#if ZEND_DEBUG + ir_check(ctx); +#endif + + ctx->code_buffer = *dasm_ptr; + ctx->code_buffer_size = (char*)dasm_end - (char*)*dasm_ptr; + + entry = ir_emit_code(ctx, size); + if (entry) { + *dasm_ptr = (char*)entry + ZEND_MM_ALIGNED_SIZE_EX(*size, 16); + } + +#if defined(IR_TARGET_AARCH64) + if (ctx->veneers_size) { + zend_jit_commit_veneers(); + *size -= ctx->veneers_size; + } +#endif + + return entry; +} + +static int zend_jit_setup_stubs(void) +{ + zend_jit_ctx jit; + void *entry; + size_t size; + uint32_t i; + + for (i = 0; i < sizeof(zend_jit_stubs)/sizeof(zend_jit_stubs[0]); i++) { + zend_jit_init_ctx(&jit, zend_jit_stubs[i].flags); + + if (!zend_jit_stubs[i].stub(&jit)) { + zend_jit_free_ctx(&jit); + zend_jit_stub_handlers[i] = NULL; + continue; + } + + entry = zend_jit_ir_compile(&jit.ctx, &size, zend_jit_stubs[i].name); + if (!entry) { + zend_jit_free_ctx(&jit); + return 0; + } + + zend_jit_stub_handlers[i] = entry; + + if (JIT_G(debug) & (ZEND_JIT_DEBUG_ASM|ZEND_JIT_DEBUG_ASM_STUBS|ZEND_JIT_DEBUG_GDB|ZEND_JIT_DEBUG_PERF|ZEND_JIT_DEBUG_PERF_DUMP)) { +#ifdef HAVE_CAPSTONE + if (JIT_G(debug) & (ZEND_JIT_DEBUG_ASM|ZEND_JIT_DEBUG_ASM_STUBS)) { + ir_disasm_add_symbol(zend_jit_stubs[i].name, (uintptr_t)entry, size); + } + if (JIT_G(debug) & ZEND_JIT_DEBUG_ASM_STUBS) { + ir_disasm(zend_jit_stubs[i].name, + entry, size, (JIT_G(debug) & ZEND_JIT_DEBUG_ASM_ADDR) != 0, &jit.ctx, stderr); + } +#endif +#ifndef _WIN32 + if (JIT_G(debug) & ZEND_JIT_DEBUG_GDB) { +// ir_mem_unprotect(entry, size); + ir_gdb_register(zend_jit_stubs[i].name, entry, size, 0, 0); +// ir_mem_protect(entry, size); + } + + if (JIT_G(debug) & (ZEND_JIT_DEBUG_PERF|ZEND_JIT_DEBUG_PERF_DUMP)) { + ir_perf_map_register(zend_jit_stubs[i].name, entry, size); + if (JIT_G(debug) & ZEND_JIT_DEBUG_PERF_DUMP) { + ir_perf_jitdump_register(zend_jit_stubs[i].name, entry, size); + } + } +#endif + } + zend_jit_free_ctx(&jit); + } + return 1; +} + +#define REGISTER_HELPER(n) \ + ir_disasm_add_symbol(#n, (uint64_t)(uintptr_t)n, sizeof(void*)); +#define REGISTER_DATA(n) \ + ir_disasm_add_symbol(#n, (uint64_t)(uintptr_t)&n, sizeof(n)); + +static void zend_jit_setup_disasm(void) +{ +#ifdef HAVE_CAPSTONE + ir_disasm_init(); + + if (zend_vm_kind() == ZEND_VM_KIND_HYBRID) { + zend_op opline; + + memset(&opline, 0, sizeof(opline)); + + opline.opcode = ZEND_DO_UCALL; + opline.result_type = IS_UNUSED; + zend_vm_set_opcode_handler(&opline); + ir_disasm_add_symbol("ZEND_DO_UCALL_SPEC_RETVAL_UNUSED_LABEL", (uint64_t)(uintptr_t)opline.handler, sizeof(void*)); + + opline.opcode = ZEND_DO_UCALL; + opline.result_type = IS_VAR; + zend_vm_set_opcode_handler(&opline); + ir_disasm_add_symbol("ZEND_DO_UCALL_SPEC_RETVAL_USED_LABEL", (uint64_t)(uintptr_t)opline.handler, sizeof(void*)); + + opline.opcode = ZEND_DO_FCALL_BY_NAME; + opline.result_type = IS_UNUSED; + zend_vm_set_opcode_handler(&opline); + ir_disasm_add_symbol("ZEND_DO_FCALL_BY_NAME_SPEC_RETVAL_UNUSED_LABEL", (uint64_t)(uintptr_t)opline.handler, sizeof(void*)); + + opline.opcode = ZEND_DO_FCALL_BY_NAME; + opline.result_type = IS_VAR; + zend_vm_set_opcode_handler(&opline); + ir_disasm_add_symbol("ZEND_DO_FCALL_BY_NAME_SPEC_RETVAL_USED_LABEL", (uint64_t)(uintptr_t)opline.handler, sizeof(void*)); + + opline.opcode = ZEND_DO_FCALL; + opline.result_type = IS_UNUSED; + zend_vm_set_opcode_handler(&opline); + ir_disasm_add_symbol("ZEND_DO_FCALL_SPEC_RETVAL_UNUSED_LABEL", (uint64_t)(uintptr_t)opline.handler, sizeof(void*)); + + opline.opcode = ZEND_DO_FCALL; + opline.result_type = IS_VAR; + zend_vm_set_opcode_handler(&opline); + ir_disasm_add_symbol("ZEND_DO_FCALL_SPEC_RETVAL_USED_LABEL", (uint64_t)(uintptr_t)opline.handler, sizeof(void*)); + + opline.opcode = ZEND_RETURN; + opline.op1_type = IS_CONST; + zend_vm_set_opcode_handler(&opline); + ir_disasm_add_symbol("ZEND_RETURN_SPEC_CONST_LABEL", (uint64_t)(uintptr_t)opline.handler, sizeof(void*)); + + opline.opcode = ZEND_RETURN; + opline.op1_type = IS_TMP_VAR; + zend_vm_set_opcode_handler(&opline); + ir_disasm_add_symbol("ZEND_RETURN_SPEC_TMP_LABEL", (uint64_t)(uintptr_t)opline.handler, sizeof(void*)); + + opline.opcode = ZEND_RETURN; + opline.op1_type = IS_VAR; + zend_vm_set_opcode_handler(&opline); + ir_disasm_add_symbol("ZEND_RETURN_SPEC_VAR_LABEL", (uint64_t)(uintptr_t)opline.handler, sizeof(void*)); + + opline.opcode = ZEND_RETURN; + opline.op1_type = IS_CV; + zend_vm_set_opcode_handler(&opline); + ir_disasm_add_symbol("ZEND_RETURN_SPEC_CV_LABEL", (uint64_t)(uintptr_t)opline.handler, sizeof(void*)); + + ir_disasm_add_symbol("ZEND_HYBRID_HALT_LABEL", (uint64_t)(uintptr_t)zend_jit_halt_op->handler, sizeof(void*)); + } + + REGISTER_DATA(zend_jit_profile_counter); + + REGISTER_HELPER(zend_runtime_jit); + REGISTER_HELPER(zend_jit_hot_func); + REGISTER_HELPER(zend_jit_trace_hot_root); + REGISTER_HELPER(zend_jit_trace_exit); + + REGISTER_HELPER(zend_jit_array_free); + REGISTER_HELPER(zend_jit_undefined_op_helper); + REGISTER_HELPER(zend_jit_pre_inc_typed_ref); + REGISTER_HELPER(zend_jit_pre_dec_typed_ref); + REGISTER_HELPER(zend_jit_post_inc_typed_ref); + REGISTER_HELPER(zend_jit_post_dec_typed_ref); + REGISTER_HELPER(zend_jit_pre_inc); + REGISTER_HELPER(zend_jit_pre_dec); + REGISTER_HELPER(zend_jit_add_arrays_helper); + REGISTER_HELPER(zend_jit_fast_assign_concat_helper); + REGISTER_HELPER(zend_jit_fast_concat_helper); + REGISTER_HELPER(zend_jit_fast_concat_tmp_helper); + REGISTER_HELPER(zend_jit_assign_op_to_typed_ref_tmp); + REGISTER_HELPER(zend_jit_assign_op_to_typed_ref); + REGISTER_HELPER(zend_jit_assign_const_to_typed_ref); + REGISTER_HELPER(zend_jit_assign_tmp_to_typed_ref); + REGISTER_HELPER(zend_jit_assign_var_to_typed_ref); + REGISTER_HELPER(zend_jit_assign_cv_to_typed_ref); + REGISTER_HELPER(zend_jit_assign_const_to_typed_ref2); + REGISTER_HELPER(zend_jit_assign_tmp_to_typed_ref2); + REGISTER_HELPER(zend_jit_assign_var_to_typed_ref2); + REGISTER_HELPER(zend_jit_assign_cv_to_typed_ref2); + REGISTER_HELPER(zend_jit_check_constant); + REGISTER_HELPER(zend_jit_get_constant); + REGISTER_HELPER(zend_jit_int_extend_stack_helper); + REGISTER_HELPER(zend_jit_extend_stack_helper); + REGISTER_HELPER(zend_jit_init_func_run_time_cache_helper); + REGISTER_HELPER(zend_jit_find_func_helper); + REGISTER_HELPER(zend_jit_find_ns_func_helper); + REGISTER_HELPER(zend_jit_unref_helper); + REGISTER_HELPER(zend_jit_invalid_method_call); + REGISTER_HELPER(zend_jit_invalid_method_call_tmp); + REGISTER_HELPER(zend_jit_find_method_helper); + REGISTER_HELPER(zend_jit_find_method_tmp_helper); + REGISTER_HELPER(zend_jit_push_static_metod_call_frame); + REGISTER_HELPER(zend_jit_push_static_metod_call_frame_tmp); + REGISTER_HELPER(zend_jit_free_trampoline_helper); + REGISTER_HELPER(zend_jit_verify_return_slow); + REGISTER_HELPER(zend_jit_deprecated_helper); + REGISTER_HELPER(zend_jit_copy_extra_args_helper); + REGISTER_HELPER(zend_jit_vm_stack_free_args_helper); + REGISTER_HELPER(zend_free_extra_named_params); + REGISTER_HELPER(zend_jit_free_call_frame); + REGISTER_HELPER(zend_jit_exception_in_interrupt_handler_helper); + REGISTER_HELPER(zend_jit_verify_arg_slow); + REGISTER_HELPER(zend_missing_arg_error); + REGISTER_HELPER(zend_jit_only_vars_by_reference); + REGISTER_HELPER(zend_jit_leave_func_helper); + REGISTER_HELPER(zend_jit_leave_nested_func_helper); + REGISTER_HELPER(zend_jit_leave_top_func_helper); + REGISTER_HELPER(zend_jit_fetch_global_helper); + REGISTER_HELPER(zend_jit_hash_index_lookup_rw_no_packed); + REGISTER_HELPER(zend_jit_hash_index_lookup_rw); + REGISTER_HELPER(zend_jit_hash_lookup_rw); + REGISTER_HELPER(zend_jit_symtable_find); + REGISTER_HELPER(zend_jit_symtable_lookup_w); + REGISTER_HELPER(zend_jit_symtable_lookup_rw); + REGISTER_HELPER(zend_jit_fetch_dim_r_helper); + REGISTER_HELPER(zend_jit_fetch_dim_is_helper); + REGISTER_HELPER(zend_jit_fetch_dim_isset_helper); + REGISTER_HELPER(zend_jit_fetch_dim_rw_helper); + REGISTER_HELPER(zend_jit_fetch_dim_w_helper); + REGISTER_HELPER(zend_jit_fetch_dim_str_offset_r_helper); + REGISTER_HELPER(zend_jit_fetch_dim_str_r_helper); + REGISTER_HELPER(zend_jit_fetch_dim_str_is_helper); + REGISTER_HELPER(zend_jit_fetch_dim_obj_r_helper); + REGISTER_HELPER(zend_jit_fetch_dim_obj_is_helper); + REGISTER_HELPER(zend_jit_invalid_array_access); + REGISTER_HELPER(zend_jit_zval_array_dup); + REGISTER_HELPER(zend_jit_prepare_assign_dim_ref); + REGISTER_HELPER(zend_jit_fetch_dim_obj_w_helper); + REGISTER_HELPER(zend_jit_fetch_dim_obj_rw_helper); + REGISTER_HELPER(zend_jit_isset_dim_helper); + REGISTER_HELPER(zend_jit_assign_dim_helper); + REGISTER_HELPER(zend_jit_assign_dim_op_helper); + REGISTER_HELPER(zend_jit_fetch_obj_w_slow); + REGISTER_HELPER(zend_jit_fetch_obj_r_slow); + REGISTER_HELPER(zend_jit_fetch_obj_is_slow); + REGISTER_HELPER(zend_jit_fetch_obj_r_dynamic); + REGISTER_HELPER(zend_jit_fetch_obj_is_dynamic); + REGISTER_HELPER(zend_jit_check_array_promotion); + REGISTER_HELPER(zend_jit_create_typed_ref); + REGISTER_HELPER(zend_jit_invalid_property_write); + REGISTER_HELPER(zend_jit_invalid_property_read); + REGISTER_HELPER(zend_jit_extract_helper); + REGISTER_HELPER(zend_jit_invalid_property_assign); + REGISTER_HELPER(zend_jit_assign_to_typed_prop); + REGISTER_HELPER(zend_jit_assign_obj_helper); + REGISTER_HELPER(zend_jit_invalid_property_assign_op); + REGISTER_HELPER(zend_jit_assign_op_to_typed_prop); + REGISTER_HELPER(zend_jit_assign_obj_op_helper); + REGISTER_HELPER(zend_jit_invalid_property_incdec); + REGISTER_HELPER(zend_jit_inc_typed_prop); + REGISTER_HELPER(zend_jit_dec_typed_prop); + REGISTER_HELPER(zend_jit_pre_inc_typed_prop); + REGISTER_HELPER(zend_jit_post_inc_typed_prop); + REGISTER_HELPER(zend_jit_pre_dec_typed_prop); + REGISTER_HELPER(zend_jit_post_dec_typed_prop); + REGISTER_HELPER(zend_jit_pre_inc_obj_helper); + REGISTER_HELPER(zend_jit_post_inc_obj_helper); + REGISTER_HELPER(zend_jit_pre_dec_obj_helper); + REGISTER_HELPER(zend_jit_post_dec_obj_helper); + REGISTER_HELPER(zend_jit_rope_end); + +#ifndef ZTS + REGISTER_DATA(EG(current_execute_data)); + REGISTER_DATA(EG(exception)); + REGISTER_DATA(EG(opline_before_exception)); + REGISTER_DATA(EG(vm_interrupt)); + REGISTER_DATA(EG(timed_out)); + REGISTER_DATA(EG(uninitialized_zval)); + REGISTER_DATA(EG(zend_constants)); + REGISTER_DATA(EG(jit_trace_num)); + REGISTER_DATA(EG(vm_stack_top)); + REGISTER_DATA(EG(vm_stack_end)); + REGISTER_DATA(EG(exception_op)); + REGISTER_DATA(EG(symbol_table)); + + REGISTER_DATA(CG(map_ptr_base)); +#endif +#endif +} + +static int zend_jit_calc_trace_prologue_size(void) +{ + zend_jit_ctx jit_ctx; + zend_jit_ctx *jit = &jit_ctx; + void *entry; + size_t size; + + zend_jit_init_ctx(jit, (zend_jit_vm_kind == ZEND_VM_KIND_CALL) ? 0 : IR_START_BR_TARGET); + + if (!GCC_GLOBAL_REGS) { + ir_ref ref = ir_PARAM(IR_ADDR, "execute_data", 1); + jit_STORE_FP(jit, ref); + jit->ctx.flags |= IR_FASTCALL_FUNC; + } + + ir_UNREACHABLE(); + + entry = zend_jit_ir_compile(&jit->ctx, &size, "JIT$trace_prologue"); + zend_jit_free_ctx(jit); + + if (!entry) { + return 0; + } + + zend_jit_trace_prologue_size = size; + return 1; +} + +#if !ZEND_WIN32 && !defined(IR_TARGET_AARCH64) +static uintptr_t zend_jit_hybrid_vm_sp_adj = 0; + +typedef struct _Unwind_Context _Unwind_Context; +typedef int (*_Unwind_Trace_Fn)(_Unwind_Context *, void *); +extern int _Unwind_Backtrace(_Unwind_Trace_Fn, void *); +extern uintptr_t _Unwind_GetCFA(_Unwind_Context *); + +typedef struct _zend_jit_unwind_arg { + int cnt; + uintptr_t cfa[3]; +} zend_jit_unwind_arg; + +static int zend_jit_unwind_cb(_Unwind_Context *ctx, void *a) +{ + zend_jit_unwind_arg *arg = (zend_jit_unwind_arg*)a; + arg->cfa[arg->cnt] = _Unwind_GetCFA(ctx); + arg->cnt++; + if (arg->cnt == 3) { + return 5; // _URC_END_OF_STACK + } + return 0; // _URC_NO_REASON; +} + +static void ZEND_FASTCALL zend_jit_touch_vm_stack_data(void *vm_stack_data) +{ + zend_jit_unwind_arg arg; + + memset(&arg, 0, sizeof(arg)); + _Unwind_Backtrace(zend_jit_unwind_cb, &arg); + if (arg.cnt == 3) { + zend_jit_hybrid_vm_sp_adj = arg.cfa[2] - arg.cfa[1]; + } +} + +extern void (ZEND_FASTCALL *zend_touch_vm_stack_data)(void *vm_stack_data); + +static zend_never_inline void zend_jit_set_sp_adj_vm(void) +{ + void (ZEND_FASTCALL *orig_zend_touch_vm_stack_data)(void *); + + orig_zend_touch_vm_stack_data = zend_touch_vm_stack_data; + zend_touch_vm_stack_data = zend_jit_touch_vm_stack_data; + execute_ex(NULL); // set sp_adj[SP_ADJ_VM] + zend_touch_vm_stack_data = orig_zend_touch_vm_stack_data; +} +#endif + +static int zend_jit_setup(void) +{ +#if defined(IR_TARGET_X86) + if (!zend_cpu_supports_sse2()) { + zend_error(E_CORE_ERROR, "CPU doesn't support SSE2"); + return FAILURE; + } +#endif +#if defined(IR_TARGET_X86) || defined(IR_TARGET_X64) + allowed_opt_flags = 0; + if (zend_cpu_supports_avx()) { + allowed_opt_flags |= ZEND_JIT_CPU_AVX; + } +#endif +#ifdef ZTS +#if defined(IR_TARGET_AARCH64) + tsrm_ls_cache_tcb_offset = tsrm_get_ls_cache_tcb_offset(); + ZEND_ASSERT(tsrm_ls_cache_tcb_offset != 0); +# elif defined(_WIN64) + tsrm_tls_index = _tls_index * sizeof(void*); + + /* To find offset of "_tsrm_ls_cache" in TLS segment we perform a linear scan of local TLS memory */ + /* Probably, it might be better solution */ + do { + void ***tls_mem = ((void****)__readgsqword(0x58))[_tls_index]; + void *val = _tsrm_ls_cache; + size_t offset = 0; + size_t size = (char*)&_tls_end - (char*)&_tls_start; + + while (offset < size) { + if (*tls_mem == val) { + tsrm_tls_offset = offset; + break; + } + tls_mem++; + offset += sizeof(void*); + } + if (offset >= size) { + // TODO: error message ??? + return FAILURE; + } + } while(0); +# elif ZEND_WIN32 + tsrm_tls_index = _tls_index * sizeof(void*); + + /* To find offset of "_tsrm_ls_cache" in TLS segment we perform a linear scan of local TLS memory */ + /* Probably, it might be better solution */ + do { + void ***tls_mem = ((void****)__readfsdword(0x2c))[_tls_index]; + void *val = _tsrm_ls_cache; + size_t offset = 0; + size_t size = (char*)&_tls_end - (char*)&_tls_start; + + while (offset < size) { + if (*tls_mem == val) { + tsrm_tls_offset = offset; + break; + } + tls_mem++; + offset += sizeof(void*); + } + if (offset >= size) { + // TODO: error message ??? + return FAILURE; + } + } while(0); +# elif defined(__APPLE__) && defined(__x86_64__) + tsrm_ls_cache_tcb_offset = tsrm_get_ls_cache_tcb_offset(); + if (tsrm_ls_cache_tcb_offset == 0) { + size_t *ti; + __asm__( + "leaq __tsrm_ls_cache(%%rip),%0" + : "=r" (ti)); + tsrm_tls_offset = ti[2]; + tsrm_tls_index = ti[1] * 8; + } +# elif defined(__GNUC__) && defined(__x86_64__) + tsrm_ls_cache_tcb_offset = tsrm_get_ls_cache_tcb_offset(); + if (tsrm_ls_cache_tcb_offset == 0) { +#if defined(__has_attribute) && __has_attribute(tls_model) && !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__MUSL__) + size_t ret; + + asm ("movq _tsrm_ls_cache@gottpoff(%%rip),%0" + : "=r" (ret)); + tsrm_ls_cache_tcb_offset = ret; +#else + size_t *ti; + + __asm__( + "leaq _tsrm_ls_cache@tlsgd(%%rip), %0\n" + : "=a" (ti)); + tsrm_tls_offset = ti[1]; + tsrm_tls_index = ti[0] * 16; +#endif + } +# elif defined(__GNUC__) && defined(__i386__) + tsrm_ls_cache_tcb_offset = tsrm_get_ls_cache_tcb_offset(); + if (tsrm_ls_cache_tcb_offset == 0) { +#if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__MUSL__) + size_t ret; + + asm ("leal _tsrm_ls_cache@ntpoff,%0\n" + : "=a" (ret)); + tsrm_ls_cache_tcb_offset = ret; +#else + size_t *ti, _ebx, _ecx, _edx; + + __asm__( + "call 1f\n" + ".subsection 1\n" + "1:\tmovl (%%esp), %%ebx\n\t" + "ret\n" + ".previous\n\t" + "addl $_GLOBAL_OFFSET_TABLE_, %%ebx\n\t" + "leal _tsrm_ls_cache@tlsldm(%%ebx), %0\n\t" + "call ___tls_get_addr@plt\n\t" + "leal _tsrm_ls_cache@tlsldm(%%ebx), %0\n" + : "=a" (ti), "=&b" (_ebx), "=&c" (_ecx), "=&d" (_edx)); + tsrm_tls_offset = ti[1]; + tsrm_tls_index = ti[0] * 8; +#endif + } +# endif +#endif + +#if !ZEND_WIN32 && !defined(IR_TARGET_AARCH64) + if (zend_jit_vm_kind == ZEND_VM_KIND_HYBRID) { + zend_jit_set_sp_adj_vm(); // set zend_jit_hybrid_vm_sp_adj + } +#endif + + if (JIT_G(debug) & (ZEND_JIT_DEBUG_ASM|ZEND_JIT_DEBUG_ASM_STUBS)) { + zend_jit_setup_disasm(); + } + +#ifndef _WIN32 + if (JIT_G(debug) & ZEND_JIT_DEBUG_PERF_DUMP) { + ir_perf_jitdump_open(); + } + +#endif + zend_long debug = JIT_G(debug); + if (!(debug & ZEND_JIT_DEBUG_ASM_STUBS)) { + JIT_G(debug) &= ~(ZEND_JIT_DEBUG_IR_SRC|ZEND_JIT_DEBUG_IR_FINAL|ZEND_JIT_DEBUG_IR_CFG|ZEND_JIT_DEBUG_IR_REGS| + ZEND_JIT_DEBUG_IR_CODEGEN| + ZEND_JIT_DEBUG_IR_AFTER_SCCP|ZEND_JIT_DEBUG_IR_AFTER_SCHEDULE|ZEND_JIT_DEBUG_IR_AFTER_REGS); + } + + if (!zend_jit_calc_trace_prologue_size()) { + JIT_G(debug) = debug; + return FAILURE; + } + if (!zend_jit_setup_stubs()) { + JIT_G(debug) = debug; + return FAILURE; + } + JIT_G(debug) = debug; + + return SUCCESS; +} + +static void zend_jit_shutdown_ir(void) +{ +#ifndef _WIN32 + if (JIT_G(debug) & ZEND_JIT_DEBUG_PERF_DUMP) { + ir_perf_jitdump_close(); + } + if (JIT_G(debug) & ZEND_JIT_DEBUG_GDB) { + ir_gdb_unregister_all(); + } +#endif +#ifdef HAVE_CAPSTONE + if (JIT_G(debug) & (ZEND_JIT_DEBUG_ASM|ZEND_JIT_DEBUG_ASM_STUBS)) { + ir_disasm_free(); + } +#endif +} + +/* PHP control flow reconstruction helpers */ +static ir_ref jit_IF_ex(zend_jit_ctx *jit, ir_ref condition, ir_ref true_block) +{ + ir_ref ref = ir_IF(condition); + /* op3 is used as a temporary storage for PHP BB number to reconstruct PHP control flow. + * + * It's used in jit_IF_TRUE_FALSE_ex() to select IF_TRUE or IF_FALSE instructions + * to start target block + */ + ir_set_op(&jit->ctx, ref, 3, true_block); + return ref; +} + +static void jit_IF_TRUE_FALSE_ex(zend_jit_ctx *jit, ir_ref if_ref, ir_ref true_block) +{ + ZEND_ASSERT(JIT_G(trigger) != ZEND_JIT_ON_HOT_TRACE); + ZEND_ASSERT(if_ref); + ZEND_ASSERT(jit->ctx.ir_base[if_ref].op == IR_IF); + ZEND_ASSERT(jit->ctx.ir_base[if_ref].op3); + if (jit->ctx.ir_base[if_ref].op3 == true_block) { + ir_IF_TRUE(if_ref); + } else { + ir_IF_FALSE(if_ref); + } +} + +static void _zend_jit_add_predecessor_ref(zend_jit_ctx *jit, int b, int pred, ir_ref ref) +{ + int i, *p; + zend_basic_block *bb; + ir_ref *r, header; + + ZEND_ASSERT(JIT_G(trigger) != ZEND_JIT_ON_HOT_TRACE); + bb = &jit->ssa->cfg.blocks[b]; + p = &jit->ssa->cfg.predecessors[bb->predecessor_offset]; + r = &jit->bb_edges[jit->bb_predecessors[b]]; + for (i = 0; i < bb->predecessors_count; i++, p++, r++) { + if (*p == pred) { + ZEND_ASSERT(*r == IR_UNUSED || *r == ref); + header = jit->bb_start_ref[b]; + if (header) { + /* this is back edge */ + ZEND_ASSERT(jit->ctx.ir_base[header].op == IR_LOOP_BEGIN); + if (jit->ctx.ir_base[ref].op == IR_END) { + jit->ctx.ir_base[ref].op = IR_LOOP_END; + } else if (jit->ctx.ir_base[ref].op == IR_IF) { + jit_IF_TRUE_FALSE_ex(jit, ref, b); + ref = ir_LOOP_END(); + } else if (jit->ctx.ir_base[ref].op == IR_UNREACHABLE) { + ir_BEGIN(ref); + ref = ir_LOOP_END(); + } else { + ZEND_UNREACHABLE(); + } + ir_MERGE_SET_OP(header, i + 1, ref); + } + *r = ref; + return; + } + } + ZEND_UNREACHABLE(); +} + +static void _zend_jit_merge_smart_branch_inputs(zend_jit_ctx *jit, + uint32_t true_label, + uint32_t false_label, + ir_ref true_inputs, + ir_ref false_inputs) +{ + ir_ref true_path = IR_UNUSED, false_path = IR_UNUSED; + + ZEND_ASSERT(JIT_G(trigger) != ZEND_JIT_ON_HOT_TRACE); + if (true_inputs) { + ZEND_ASSERT(jit->ctx.ir_base[true_inputs].op == IR_END); + if (!jit->ctx.ir_base[true_inputs].op2) { + true_path = true_inputs; + } else { + ir_MERGE_list(true_inputs); + true_path = ir_END(); + } + } + if (false_inputs) { + ZEND_ASSERT(jit->ctx.ir_base[false_inputs].op == IR_END); + if (!jit->ctx.ir_base[false_inputs].op2) { + false_path = false_inputs; + } else { + ir_MERGE_list(false_inputs); + false_path = ir_END(); + } + } + + if (true_label == false_label && true_path && false_path) { + ir_MERGE_2(true_path, false_path); + _zend_jit_add_predecessor_ref(jit, true_label, jit->b, ir_END()); + } else { + if (true_path) { + _zend_jit_add_predecessor_ref(jit, true_label, jit->b, true_path); + } + if (false_path) { + _zend_jit_add_predecessor_ref(jit, false_label, jit->b, false_path); + } + } + + jit->b = -1; +} + +static void _zend_jit_fix_merges(zend_jit_ctx *jit) +{ + int i, count; + ir_ref j, k, n, *p, *q, *r; + ir_ref ref; + ir_insn *insn, *phi; + + ZEND_ASSERT(JIT_G(trigger) != ZEND_JIT_ON_HOT_TRACE); + count = jit->ssa->cfg.blocks_count; + for (i = 0, p = jit->bb_start_ref; i < count; i++, p++) { + ref = *p; + if (ref) { + insn = &jit->ctx.ir_base[ref]; + if (insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN) { + n = insn->inputs_count; + /* Remove IS_UNUSED inputs */ + for (j = k = 0, q = r = insn->ops + 1; j < n; j++, q++) { + if (*q) { + if (q != r) { + *r = *q; + phi = insn + 1 + (n >> 2); + while (phi->op == IR_PI) { + phi++; + } + while (phi->op == IR_PHI) { + ir_insn_set_op(phi, k + 2, ir_insn_op(phi, j + 2)); + phi += 1 + ((n + 1) >> 2); + } + } + k++; + r++; + } + } + if (k != n) { + ir_ref n2, k2; + + if (k <= 1) { + insn->op = IR_BEGIN; + insn->inputs_count = 0; + } else { + insn->inputs_count = k; + } + n2 = 1 + (n >> 2); + k2 = 1 + (k >> 2); + while (k2 != n2) { + (insn+k2)->optx = IR_NOP; + k2++; + } + phi = insn + 1 + (n >> 2); + while (phi->op == IR_PI) { + phi++; + } + while (phi->op == IR_PHI) { + if (k <= 1) { + phi->op = IR_COPY; + phi->op1 = phi->op2; + phi->op2 = 1; + } + n2 = 1 + ((n + 1) >> 2); + k2 = 1 + ((k + 1) >> 2); + while (k2 != n2) { + (insn+k2)->optx = IR_NOP; + k2++; + } + phi += 1 + ((n + 1) >> 2); + } + } + } + } + } +} + +static void zend_jit_case_start(zend_jit_ctx *jit, int switch_b, int case_b, ir_ref switch_ref) +{ + zend_basic_block *bb = &jit->ssa->cfg.blocks[switch_b]; + const zend_op *opline = &jit->op_array->opcodes[bb->start + bb->len - 1]; + + if (opline->opcode == ZEND_SWITCH_LONG + || opline->opcode == ZEND_SWITCH_STRING + || opline->opcode == ZEND_MATCH) { + HashTable *jumptable = Z_ARRVAL_P(RT_CONSTANT(opline, opline->op2)); + const zend_op *default_opline = ZEND_OFFSET_TO_OPLINE(opline, opline->extended_value); + int default_b = jit->ssa->cfg.map[default_opline - jit->op_array->opcodes]; + zval *zv; + ir_ref list = IR_UNUSED, idx; + bool first = 1; + + ZEND_HASH_FOREACH_VAL(jumptable, zv) { + const zend_op *target = ZEND_OFFSET_TO_OPLINE(opline, Z_LVAL_P(zv)); + int b = jit->ssa->cfg.map[target - jit->op_array->opcodes]; + + if (b == case_b) { + if (!first) { + ir_END_list(list); + } + if (HT_IS_PACKED(jumptable)) { + idx = ir_CONST_LONG(zv - jumptable->arPacked); + } else { + idx = ir_CONST_LONG((Bucket*)zv - jumptable->arData); + } + ir_CASE_VAL(switch_ref, idx); + first = 0; + } + } ZEND_HASH_FOREACH_END(); + if (default_b == case_b) { + if (!first) { + ir_END_list(list); + } + if (jit->ctx.ir_base[switch_ref].op3) { + /* op3 may contain a list of additional "default" path inputs for MATCH */ + ir_ref ref = jit->ctx.ir_base[switch_ref].op3; + jit->ctx.ir_base[switch_ref].op3 = IS_UNDEF; + ZEND_ASSERT(jit->ctx.ir_base[ref].op == IR_END); + ir_ref end = ref; + while (jit->ctx.ir_base[end].op2) { + ZEND_ASSERT(jit->ctx.ir_base[end].op == IR_END); + end = jit->ctx.ir_base[end].op2; + } + jit->ctx.ir_base[end].op2 = list; + list = ref; + } + ir_CASE_DEFAULT(switch_ref); + } + if (list) { + ir_END_list(list); + ir_MERGE_list(list); + } + } else { + ZEND_UNREACHABLE(); + } +} + +static int zend_jit_bb_start(zend_jit_ctx *jit, int b) +{ + zend_basic_block *bb; + int i, n, *p, pred; + ir_ref ref, bb_start; + + ZEND_ASSERT(JIT_G(trigger) != ZEND_JIT_ON_HOT_TRACE); + ZEND_ASSERT(b < jit->ssa->cfg.blocks_count); + bb = &jit->ssa->cfg.blocks[b]; + ZEND_ASSERT((bb->flags & ZEND_BB_REACHABLE) != 0); + n = bb->predecessors_count; + + if (n == 0) { + /* pass */ + ZEND_ASSERT(jit->ctx.control); +#if ZEND_DEBUG + ref = jit->ctx.control; + ir_insn *insn = &jit->ctx.ir_base[ref]; + while (insn->op >= IR_CALL && insn->op <= IR_TRAP) { + ref = insn->op1; + insn = &jit->ctx.ir_base[ref]; + } + ZEND_ASSERT(insn->op == IR_START); + ZEND_ASSERT(ref == 1); +#endif + bb_start = 1; + if (jit->ssa->cfg.flags & ZEND_FUNC_RECURSIVE_DIRECTLY) { + /* prvent END/BEGIN merging */ + jit->ctx.control = ir_emit1(&jit->ctx, IR_BEGIN, ir_END()); + bb_start = jit->ctx.control; + } + } else if (n == 1) { + ZEND_ASSERT(!jit->ctx.control); + pred = jit->ssa->cfg.predecessors[bb->predecessor_offset]; + ref = jit->bb_edges[jit->bb_predecessors[b]]; + if (ref == IR_UNUSED) { + if (!jit->ctx.control) { + ir_BEGIN(IR_UNUSED); /* unreachable block */ + } + } else { + ir_op op = jit->ctx.ir_base[ref].op; + + if (op == IR_IF) { + if (!jit->ctx.control) { + jit_IF_TRUE_FALSE_ex(jit, ref, b); + } else { + ir_ref entry_path = ir_END(); + jit_IF_TRUE_FALSE_ex(jit, ref, b); + ir_MERGE_WITH(entry_path); + } + } else if (op == IR_SWITCH) { + zend_jit_case_start(jit, pred, b, ref); + } else { + if (!jit->ctx.control) { + ZEND_ASSERT(op == IR_END || op == IR_UNREACHABLE || op == IR_RETURN); + if ((jit->ssa->cfg.blocks[b].flags & ZEND_BB_RECV_ENTRY) + && (jit->ssa->cfg.flags & ZEND_FUNC_RECURSIVE_DIRECTLY)) { + /* prvent END/BEGIN merging */ + jit->ctx.control = ir_emit1(&jit->ctx, IR_BEGIN, ref); + } else { + ir_BEGIN(ref); + } + } else { + ir_MERGE_WITH(ref); + } + } + } + bb_start = jit->ctx.control; + } else { + int forward_edges_count = 0; + int back_edges_count = 0; + ir_ref *pred_refs; + ir_ref entry_path = IR_UNUSED; + ALLOCA_FLAG(use_heap); + + ZEND_ASSERT(!jit->ctx.control); + if (jit->ctx.control) { + entry_path = ir_END(); + } + pred_refs = (ir_ref *)do_alloca(sizeof(ir_ref) * n, use_heap); + for (i = 0, p = jit->ssa->cfg.predecessors + bb->predecessor_offset; i < n; p++, i++) { + pred = *p; + if (jit->bb_start_ref[pred]) { + /* forward edge */ + forward_edges_count++; + ref = jit->bb_edges[jit->bb_predecessors[b] + i]; + if (ref == IR_UNUSED) { + /* dead edge */ + pred_refs[i] = IR_UNUSED; + } else { + ir_op op = jit->ctx.ir_base[ref].op; + + if (op == IR_IF) { + jit_IF_TRUE_FALSE_ex(jit, ref, b); + pred_refs[i] = ir_END(); + } else if (op == IR_SWITCH) { + zend_jit_case_start(jit, pred, b, ref); + pred_refs[i] = ir_END(); + } else { + ZEND_ASSERT(op == IR_END || op == IR_UNREACHABLE || op == IR_RETURN); + pred_refs[i] = ref; + } + } + } else { + /* backward edge */ + back_edges_count++; + pred_refs[i] = IR_UNUSED; + } + } + + if (bb->flags & ZEND_BB_LOOP_HEADER) { + ZEND_ASSERT(back_edges_count != 0); + ZEND_ASSERT(forward_edges_count != 0); + ir_MERGE_N(n, pred_refs); + jit->ctx.ir_base[jit->ctx.control].op = IR_LOOP_BEGIN; + bb_start = jit->ctx.control; + if (entry_path) { + ir_MERGE_WITH(entry_path); + } + } else { +// ZEND_ASSERT(back_edges_count != 0); + /* edges from exceptional blocks may be counted as back edges */ + ir_MERGE_N(n, pred_refs); + bb_start = jit->ctx.control; + if (entry_path) { + ir_MERGE_WITH(entry_path); + } + } + free_alloca(pred_refs, use_heap); + } + jit->b = b; + jit->bb_start_ref[b] = bb_start; + + if ((bb->flags & ZEND_BB_ENTRY) || (bb->idom >= 0 && jit->bb_start_ref[bb->idom] < jit->ctx.fold_cse_limit)) { + jit->ctx.fold_cse_limit = bb_start; + } + + return 1; +} + +static int zend_jit_bb_end(zend_jit_ctx *jit, int b) +{ + int succ; + zend_basic_block *bb; + + ZEND_ASSERT(JIT_G(trigger) != ZEND_JIT_ON_HOT_TRACE); + if (jit->b != b) { + return 1; + } + + bb = &jit->ssa->cfg.blocks[b]; + ZEND_ASSERT(bb->successors_count != 0); + if (bb->successors_count == 1) { + succ = bb->successors[0]; + } else { + const zend_op *opline = &jit->op_array->opcodes[bb->start + bb->len - 1]; + + /* Use only the following successor of SWITCH and FE_RESET_R */ + ZEND_ASSERT(opline->opcode == ZEND_SWITCH_LONG + || opline->opcode == ZEND_SWITCH_STRING + || opline->opcode == ZEND_MATCH + || opline->opcode == ZEND_FE_RESET_R); + succ = b + 1; + } + _zend_jit_add_predecessor_ref(jit, succ, b, ir_END()); + jit->b = -1; + return 1; +} + +static int jit_CMP_IP(zend_jit_ctx *jit, ir_op op, const zend_op *next_opline) +{ + ir_ref ref; + +#if 1 + if (GCC_GLOBAL_REGS) { + ref = jit_IP32(jit); + } else { + ref = ir_LOAD_U32(jit_EX(opline)); + } + ref = ir_CMP_OP(op, ref, ir_CONST_U32((uint32_t)(uintptr_t)next_opline)); +#else + if (GCC_GLOBAL_REGS) { + ref = jit_IP(jit); + } else { + ref = ir_LOAD_A(jit_EX(opline)); + } + ref = ir_CMP_OP(op, ref, ir_CONST_ADDR(next_opline)); +#endif + return ref; +} + +static int zend_jit_cond_jmp(zend_jit_ctx *jit, const zend_op *next_opline, int target_block) +{ + ir_ref ref; + zend_basic_block *bb; + + ZEND_ASSERT(jit->b >= 0); + bb = &jit->ssa->cfg.blocks[jit->b]; + + ZEND_ASSERT(bb->successors_count == 2); + if (bb->successors[0] == bb->successors[1]) { + _zend_jit_add_predecessor_ref(jit, bb->successors[0], jit->b, ir_END()); + jit->b = -1; + zend_jit_set_last_valid_opline(jit, next_opline); + return 1; + } + + ref = jit_IF_ex(jit, jit_CMP_IP(jit, IR_NE, next_opline), target_block); + + _zend_jit_add_predecessor_ref(jit, bb->successors[0], jit->b, ref); + _zend_jit_add_predecessor_ref(jit, bb->successors[1], jit->b, ref); + + jit->b = -1; + zend_jit_set_last_valid_opline(jit, next_opline); + + return 1; +} + +static int zend_jit_set_cond(zend_jit_ctx *jit, const zend_op *next_opline, uint32_t var) +{ + ir_ref ref; + + ref = ir_ADD_U32(ir_ZEXT_U32(jit_CMP_IP(jit, IR_EQ, next_opline)), ir_CONST_U32(IS_FALSE)); + + // EX_VAR(var) = ... + ir_STORE(ir_ADD_OFFSET(jit_FP(jit), var + offsetof(zval, u1.type_info)), ref); + + zend_jit_reset_last_valid_opline(jit); + return zend_jit_set_ip(jit, next_opline - 1); +} + +/* PHP JIT handlers */ +static void zend_jit_check_exception(zend_jit_ctx *jit) +{ + ir_GUARD_NOT(ir_LOAD_A(jit_EG_exception(jit)), + jit_STUB_ADDR(jit, jit_stub_exception_handler)); +} + +static void zend_jit_check_exception_undef_result(zend_jit_ctx *jit, const zend_op *opline) +{ + ir_GUARD_NOT(ir_LOAD_A(jit_EG_exception(jit)), + jit_STUB_ADDR(jit, + (opline->result_type & (IS_TMP_VAR|IS_VAR)) ? jit_stub_exception_handler_undef : jit_stub_exception_handler)); +} + +static void zend_jit_type_check_undef(zend_jit_ctx *jit, + ir_ref type, + uint32_t var, + const zend_op *opline, + bool check_exception, + bool in_cold_path) +{ + ir_ref if_def = ir_IF(type); + + if (!in_cold_path) { + ir_IF_FALSE_cold(if_def); + } else { + ir_IF_FALSE(if_def); + } + if (opline) { + jit_SET_EX_OPLINE(jit, opline); + } + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_jit_undefined_op_helper), ir_CONST_U32(var)); + if (check_exception) { + zend_jit_check_exception(jit); + } + ir_MERGE_WITH_EMPTY_TRUE(if_def); +} + +static ir_ref zend_jit_zval_check_undef(zend_jit_ctx *jit, + ir_ref ref, + uint32_t var, + const zend_op *opline, + bool check_exception) +{ + ir_ref if_def, ref2; + + if_def = ir_IF(jit_Z_TYPE_ref(jit, ref)); + ir_IF_FALSE_cold(if_def); + + if (opline) { + jit_SET_EX_OPLINE(jit, opline); + } + + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_jit_undefined_op_helper), ir_CONST_U32(var)); + + if (check_exception) { + zend_jit_check_exception(jit); + } + + ref2 = jit_EG(uninitialized_zval); + + ir_MERGE_WITH_EMPTY_TRUE(if_def); + + return ir_PHI_2(IR_ADDR, ref2, ref); +} + +static void zend_jit_recv_entry(zend_jit_ctx *jit, int b) +{ + zend_basic_block *bb = &jit->ssa->cfg.blocks[b]; + int pred; + ir_ref ref; + + ZEND_ASSERT(bb->predecessors_count > 0); + + pred = jit->bb_predecessors[b]; + ref = jit->bb_edges[pred]; + + ZEND_ASSERT(ref); + ZEND_ASSERT(jit->ctx.ir_base[ref].op == IR_END); + + /* Insert a MERGE block with additional ENTRY input between predecessor and this one */ + ir_ENTRY(ref, bb->start); + if (!GCC_GLOBAL_REGS) { + /* 2 is hardcoded reference to IR_PARAM */ + ZEND_ASSERT(jit->ctx.ir_base[2].op == IR_PARAM); + ZEND_ASSERT(jit->ctx.ir_base[2].op3 == 1); + jit_STORE_FP(jit, 2); + } + + ir_MERGE_WITH(ref); + jit->bb_edges[pred] = ir_END(); +} + +static void zend_jit_osr_entry(zend_jit_ctx *jit, int b) +{ + zend_basic_block *bb = &jit->ssa->cfg.blocks[b]; + ir_ref ref = ir_END(); + + /* Insert a MERGE block with additional ENTRY input between predecessor and this one */ + ir_ENTRY(ref, bb->start); + if (!GCC_GLOBAL_REGS) { + /* 2 is hardcoded reference to IR_PARAM */ + ZEND_ASSERT(jit->ctx.ir_base[2].op == IR_PARAM); + ZEND_ASSERT(jit->ctx.ir_base[2].op3 == 1); + jit_STORE_FP(jit, 2); + } + + ir_MERGE_WITH(ref); +} + +static ir_ref zend_jit_continue_entry(zend_jit_ctx *jit, ir_ref src, unsigned int label) +{ + ir_ENTRY(src, label); + if (!GCC_GLOBAL_REGS) { + /* 2 is hardcoded reference to IR_PARAM */ + ZEND_ASSERT(jit->ctx.ir_base[2].op == IR_PARAM); + ZEND_ASSERT(jit->ctx.ir_base[2].op3 == 1); + jit_STORE_FP(jit, 2); + } + return ir_END(); +} + +static int zend_jit_handler(zend_jit_ctx *jit, const zend_op *opline, int may_throw) +{ + ir_ref ref; + const void *handler; + + zend_jit_set_ip(jit, opline); + if (zend_jit_vm_kind == ZEND_VM_KIND_HYBRID) { + handler = zend_get_opcode_handler_func(opline); + } else { + handler = opline->handler; + } + if (GCC_GLOBAL_REGS) { + ir_CALL(IR_VOID, ir_CONST_FUNC(handler)); + } else { + ref = jit_FP(jit); + ref = ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(handler), ref); + } + if (may_throw) { + zend_jit_check_exception(jit); + } + /* Skip the following OP_DATA */ + switch (opline->opcode) { + case ZEND_ASSIGN_DIM: + case ZEND_ASSIGN_OBJ: + case ZEND_ASSIGN_STATIC_PROP: + case ZEND_ASSIGN_DIM_OP: + case ZEND_ASSIGN_OBJ_OP: + case ZEND_ASSIGN_STATIC_PROP_OP: + case ZEND_ASSIGN_STATIC_PROP_REF: + case ZEND_ASSIGN_OBJ_REF: + zend_jit_set_last_valid_opline(jit, opline + 2); + break; + default: + zend_jit_set_last_valid_opline(jit, opline + 1); + break; + } + return 1; +} + +static int zend_jit_tail_handler(zend_jit_ctx *jit, const zend_op *opline) +{ + const void *handler; + ir_ref ref; + zend_basic_block *bb; + + zend_jit_set_ip(jit, opline); + if (zend_jit_vm_kind == ZEND_VM_KIND_HYBRID) { + if (opline->opcode == ZEND_DO_UCALL || + opline->opcode == ZEND_DO_FCALL_BY_NAME || + opline->opcode == ZEND_DO_FCALL || + opline->opcode == ZEND_RETURN) { + + /* Use inlined HYBRID VM handler */ + handler = opline->handler; + ir_TAILCALL(IR_VOID, ir_CONST_FUNC(handler)); + } else { + handler = zend_get_opcode_handler_func(opline); + ir_CALL(IR_VOID, ir_CONST_FUNC(handler)); + ref = ir_LOAD_A(jit_IP(jit)); + ir_TAILCALL(IR_VOID, ref); + } + } else { + handler = opline->handler; + if (GCC_GLOBAL_REGS) { + ir_TAILCALL(IR_VOID, ir_CONST_FUNC(handler)); + } else { + ref = jit_FP(jit); + ir_TAILCALL_1(IR_I32, ir_CONST_FC_FUNC(handler), ref); + } + } + if (jit->b >= 0) { + bb = &jit->ssa->cfg.blocks[jit->b]; + if (bb->successors_count > 0 + && (opline->opcode == ZEND_DO_FCALL + || opline->opcode == ZEND_DO_UCALL + || opline->opcode == ZEND_DO_FCALL_BY_NAME + || opline->opcode == ZEND_INCLUDE_OR_EVAL + || opline->opcode == ZEND_GENERATOR_CREATE + || opline->opcode == ZEND_YIELD + || opline->opcode == ZEND_YIELD_FROM + || opline->opcode == ZEND_FAST_CALL)) { + /* Add a fake control edge from UNREACHABLE to the following ENTRY */ + int succ; + + if (bb->successors_count == 1) { + succ = bb->successors[0]; + ZEND_ASSERT(jit->ssa->cfg.blocks[succ].flags & ZEND_BB_ENTRY); + } else { + /* Use only the following successor of FAST_CALL */ + ZEND_ASSERT(opline->opcode == ZEND_FAST_CALL); + succ = jit->b + 1; + /* we need an entry */ + jit->ssa->cfg.blocks[succ].flags |= ZEND_BB_ENTRY; + } + ref = jit->ctx.insns_count - 1; + ZEND_ASSERT(jit->ctx.ir_base[ref].op == IR_UNREACHABLE); + ref = zend_jit_continue_entry(jit, ref, jit->ssa->cfg.blocks[succ].start); + _zend_jit_add_predecessor_ref(jit, succ, jit->b, ref); + } + jit->b = -1; + zend_jit_reset_last_valid_opline(jit); + } + return 1; +} + +static int zend_jit_call(zend_jit_ctx *jit, const zend_op *opline, unsigned int next_block) +{ + return zend_jit_tail_handler(jit, opline); +} + +static int zend_jit_spill_store(zend_jit_ctx *jit, zend_jit_addr src, zend_jit_addr dst, uint32_t info, bool set_type) +{ + ZEND_ASSERT(Z_MODE(src) == IS_REG); + ZEND_ASSERT(Z_MODE(dst) == IS_MEM_ZVAL); + + if ((info & MAY_BE_ANY) == MAY_BE_LONG) { + jit_set_Z_LVAL(jit, dst, zend_jit_use_reg(jit, src)); + if (set_type && + (Z_REG(dst) != ZREG_FP || + !JIT_G(current_frame) || + STACK_MEM_TYPE(JIT_G(current_frame)->stack, EX_VAR_TO_NUM(Z_OFFSET(dst))) != IS_LONG)) { + jit_set_Z_TYPE_INFO(jit, dst, IS_LONG); + } + } else if ((info & MAY_BE_ANY) == MAY_BE_DOUBLE) { + jit_set_Z_DVAL(jit, dst, zend_jit_use_reg(jit, src)); + if (set_type && + (Z_REG(dst) != ZREG_FP || + !JIT_G(current_frame) || + STACK_MEM_TYPE(JIT_G(current_frame)->stack, EX_VAR_TO_NUM(Z_OFFSET(dst))) != IS_DOUBLE)) { + jit_set_Z_TYPE_INFO(jit, dst, IS_DOUBLE); + } + } else { + ZEND_UNREACHABLE(); + } + return 1; +} + +static int zend_jit_load_reg(zend_jit_ctx *jit, zend_jit_addr src, zend_jit_addr dst, uint32_t info) +{ + ZEND_ASSERT(Z_MODE(src) == IS_MEM_ZVAL); + ZEND_ASSERT(Z_MODE(dst) == IS_REG); + + if ((info & MAY_BE_ANY) == MAY_BE_LONG) { + zend_jit_def_reg(jit, dst, jit_Z_LVAL(jit, src)); + } else if ((info & MAY_BE_ANY) == MAY_BE_DOUBLE) { + zend_jit_def_reg(jit, dst, jit_Z_DVAL(jit, src)); + } else { + ZEND_UNREACHABLE(); + } + return 1; +} + +static int zend_jit_store_var(zend_jit_ctx *jit, uint32_t info, int var, int ssa_var, bool set_type) +{ + zend_jit_addr src = ZEND_ADDR_REG(ssa_var); + zend_jit_addr dst = ZEND_ADDR_MEM_ZVAL(ZREG_FP, EX_NUM_TO_VAR(var)); + + return zend_jit_spill_store(jit, src, dst, info, set_type); +} + +static int zend_jit_store_ref(zend_jit_ctx *jit, uint32_t info, int var, int32_t src, bool set_type) +{ + zend_jit_addr dst = ZEND_ADDR_MEM_ZVAL(ZREG_FP, EX_NUM_TO_VAR(var)); + + if ((info & MAY_BE_ANY) == MAY_BE_LONG) { + jit_set_Z_LVAL(jit, dst, src); + if (set_type && + (Z_REG(dst) != ZREG_FP || + !JIT_G(current_frame) || + STACK_MEM_TYPE(JIT_G(current_frame)->stack, EX_VAR_TO_NUM(Z_OFFSET(dst))) != IS_LONG)) { + jit_set_Z_TYPE_INFO(jit, dst, IS_LONG); + } + } else if ((info & MAY_BE_ANY) == MAY_BE_DOUBLE) { + jit_set_Z_DVAL(jit, dst, src); + if (set_type && + (Z_REG(dst) != ZREG_FP || + !JIT_G(current_frame) || + STACK_MEM_TYPE(JIT_G(current_frame)->stack, EX_VAR_TO_NUM(Z_OFFSET(dst))) != IS_DOUBLE)) { + jit_set_Z_TYPE_INFO(jit, dst, IS_DOUBLE); + } + } else { + ZEND_UNREACHABLE(); + } + return 1; +} + +static ir_ref zend_jit_deopt_rload(zend_jit_ctx *jit, ir_type type, int32_t reg) +{ + ir_ref ref = jit->ctx.control; + ir_insn *insn; + + while (1) { + insn = &jit->ctx.ir_base[ref]; + if (insn->op == IR_RLOAD && insn->op2 == reg) { + ZEND_ASSERT(insn->type == type); + return ref; + } else if (insn->op == IR_START) { + break; + } + ref = insn->op1; + } + return ir_RLOAD(type, reg); +} + +static int zend_jit_store_const_long(zend_jit_ctx *jit, int var, zend_long val) +{ + zend_jit_addr dst = ZEND_ADDR_MEM_ZVAL(ZREG_FP, EX_NUM_TO_VAR(var)); + ir_ref src = ir_CONST_LONG(val); + + if (jit->ra && jit->ra[var].ref == IR_NULL) { + zend_jit_def_reg(jit, ZEND_ADDR_REG(var), src); + } + jit_set_Z_LVAL(jit, dst, src); + jit_set_Z_TYPE_INFO(jit, dst, IS_LONG); + return 1; +} + +static int zend_jit_store_const_double(zend_jit_ctx *jit, int var, double val) +{ + zend_jit_addr dst = ZEND_ADDR_MEM_ZVAL(ZREG_FP, EX_NUM_TO_VAR(var)); + ir_ref src = ir_CONST_DOUBLE(val); + + if (jit->ra && jit->ra[var].ref == IR_NULL) { + zend_jit_def_reg(jit, ZEND_ADDR_REG(var), src); + } + jit_set_Z_DVAL(jit, dst, src); + jit_set_Z_TYPE_INFO(jit, dst, IS_DOUBLE); + return 1; +} + +static int zend_jit_store_type(zend_jit_ctx *jit, int var, uint8_t type) +{ + zend_jit_addr dst = ZEND_ADDR_MEM_ZVAL(ZREG_FP, EX_NUM_TO_VAR(var)); + + ZEND_ASSERT(type <= IS_DOUBLE); + jit_set_Z_TYPE_INFO(jit, dst, type); + return 1; +} + +static int zend_jit_store_reg(zend_jit_ctx *jit, uint32_t info, int var, int8_t reg, bool in_mem, bool set_type) +{ + zend_jit_addr src; + zend_jit_addr dst = ZEND_ADDR_MEM_ZVAL(ZREG_FP, EX_NUM_TO_VAR(var)); + ir_type type; + + if ((info & MAY_BE_ANY) == MAY_BE_LONG) { + type = IR_LONG; + src = zend_jit_deopt_rload(jit, type, reg); + if (jit->ra && jit->ra[var].ref == IR_NULL) { + zend_jit_def_reg(jit, ZEND_ADDR_REG(var), src); + } else if (!in_mem) { + jit_set_Z_LVAL(jit, dst, src); + if (set_type && + (Z_REG(dst) != ZREG_FP || + !JIT_G(current_frame) || + STACK_MEM_TYPE(JIT_G(current_frame)->stack, EX_VAR_TO_NUM(Z_OFFSET(dst))) != IS_LONG)) { + jit_set_Z_TYPE_INFO(jit, dst, IS_LONG); + } + } + } else if ((info & MAY_BE_ANY) == MAY_BE_DOUBLE) { + type = IR_DOUBLE; + src = zend_jit_deopt_rload(jit, type, reg); + if (jit->ra && jit->ra[var].ref == IR_NULL) { + zend_jit_def_reg(jit, ZEND_ADDR_REG(var), src); + } else if (!in_mem) { + jit_set_Z_DVAL(jit, dst, src); + if (set_type && + (Z_REG(dst) != ZREG_FP || + !JIT_G(current_frame) || + STACK_MEM_TYPE(JIT_G(current_frame)->stack, EX_VAR_TO_NUM(Z_OFFSET(dst))) != IS_DOUBLE)) { + jit_set_Z_TYPE_INFO(jit, dst, IS_DOUBLE); + } + } + } else { + ZEND_UNREACHABLE(); + } + return 1; +} + +static int zend_jit_store_spill_slot(zend_jit_ctx *jit, uint32_t info, int var, int8_t reg, int32_t offset, bool set_type) +{ + zend_jit_addr src; + zend_jit_addr dst = ZEND_ADDR_MEM_ZVAL(ZREG_FP, EX_NUM_TO_VAR(var)); + + if ((info & MAY_BE_ANY) == MAY_BE_LONG) { + src = ir_LOAD_L(ir_ADD_OFFSET(ir_RLOAD_A(reg), offset)); + if (jit->ra && jit->ra[var].ref == IR_NULL) { + zend_jit_def_reg(jit, ZEND_ADDR_REG(var), src); + } else { + jit_set_Z_LVAL(jit, dst, src); + if (set_type && + (Z_REG(dst) != ZREG_FP || + !JIT_G(current_frame) || + STACK_MEM_TYPE(JIT_G(current_frame)->stack, EX_VAR_TO_NUM(Z_OFFSET(dst))) != IS_LONG)) { + jit_set_Z_TYPE_INFO(jit, dst, IS_LONG); + } + } + } else if ((info & MAY_BE_ANY) == MAY_BE_DOUBLE) { + src = ir_LOAD_D(ir_ADD_OFFSET(ir_RLOAD_A(reg), offset)); + if (jit->ra && jit->ra[var].ref == IR_NULL) { + zend_jit_def_reg(jit, ZEND_ADDR_REG(var), src); + } else { + jit_set_Z_DVAL(jit, dst, src); + if (set_type && + (Z_REG(dst) != ZREG_FP || + !JIT_G(current_frame) || + STACK_MEM_TYPE(JIT_G(current_frame)->stack, EX_VAR_TO_NUM(Z_OFFSET(dst))) != IS_DOUBLE)) { + jit_set_Z_TYPE_INFO(jit, dst, IS_DOUBLE); + } + } + } else { + ZEND_UNREACHABLE(); + } + return 1; +} + +static int zend_jit_store_var_type(zend_jit_ctx *jit, int var, uint32_t type) +{ + zend_jit_addr dst = ZEND_ADDR_MEM_ZVAL(ZREG_FP, EX_NUM_TO_VAR(var)); + + jit_set_Z_TYPE_INFO(jit, dst, type); + return 1; +} + +static int zend_jit_zval_try_addref(zend_jit_ctx *jit, zend_jit_addr var_addr) +{ + ir_ref if_refcounted, end1; + + if_refcounted = jit_if_REFCOUNTED(jit, var_addr); + ir_IF_FALSE(if_refcounted); + end1 = ir_END(); + ir_IF_TRUE(if_refcounted); + jit_GC_ADDREF(jit, jit_Z_PTR(jit, var_addr)); + ir_MERGE_WITH(end1); + return 1; +} + +static int zend_jit_store_var_if_necessary(zend_jit_ctx *jit, int var, zend_jit_addr src, uint32_t info) +{ + if (Z_MODE(src) == IS_REG && Z_STORE(src)) { + zend_jit_addr dst = ZEND_ADDR_MEM_ZVAL(ZREG_FP, var); + return zend_jit_spill_store(jit, src, dst, info, 1); + } + return 1; +} + +static int zend_jit_store_var_if_necessary_ex(zend_jit_ctx *jit, int var, zend_jit_addr src, uint32_t info, zend_jit_addr old, uint32_t old_info) +{ + if (Z_MODE(src) == IS_REG && Z_STORE(src)) { + zend_jit_addr dst = ZEND_ADDR_MEM_ZVAL(ZREG_FP, var); + bool set_type = 1; + + if ((info & (MAY_BE_ANY|MAY_BE_REF|MAY_BE_UNDEF)) == + (old_info & (MAY_BE_ANY|MAY_BE_REF|MAY_BE_UNDEF))) { + if (Z_MODE(old) != IS_REG || Z_LOAD(old) || Z_STORE(old)) { + if (JIT_G(current_frame)) { + uint32_t mem_type = STACK_MEM_TYPE(JIT_G(current_frame)->stack, EX_VAR_TO_NUM(var)); + + if (mem_type != IS_UNKNOWN + && (info & (MAY_BE_ANY|MAY_BE_REF|MAY_BE_UNDEF)) == (1 << mem_type)) { + set_type = 0; + } + } else { + set_type = 0; + } + } + } + return zend_jit_spill_store(jit, src, dst, info, set_type); + } + return 1; +} + +static int zend_jit_load_var(zend_jit_ctx *jit, uint32_t info, int var, int ssa_var) +{ + zend_jit_addr src = ZEND_ADDR_MEM_ZVAL(ZREG_FP, EX_NUM_TO_VAR(var)); + zend_jit_addr dst = ZEND_ADDR_REG(ssa_var); + + return zend_jit_load_reg(jit, src, dst, info); +} + +static int zend_jit_invalidate_var_if_necessary(zend_jit_ctx *jit, uint8_t op_type, zend_jit_addr addr, znode_op op) +{ + if ((op_type & (IS_TMP_VAR|IS_VAR)) && Z_MODE(addr) == IS_REG && !Z_LOAD(addr) && !Z_STORE(addr)) { + /* Invalidate operand type to prevent incorrect destuction by exception_handler_free_op1_op2() */ + zend_jit_addr dst = ZEND_ADDR_MEM_ZVAL(ZREG_FP, op.var); + jit_set_Z_TYPE_INFO(jit, dst, IS_UNDEF); + } + return 1; +} + +static int zend_jit_update_regs(zend_jit_ctx *jit, uint32_t var, zend_jit_addr src, zend_jit_addr dst, uint32_t info) +{ + if (!zend_jit_same_addr(src, dst)) { + if (Z_MODE(src) == IS_REG) { + if (Z_MODE(dst) == IS_REG) { + zend_jit_def_reg(jit, dst, zend_jit_use_reg(jit, src)); + if (!Z_LOAD(src) && !Z_STORE(src) && Z_STORE(dst)) { + zend_jit_addr var_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, var); + + if (!zend_jit_spill_store(jit, dst, var_addr, info, + JIT_G(trigger) != ZEND_JIT_ON_HOT_TRACE || + JIT_G(current_frame) == NULL || + STACK_MEM_TYPE(JIT_G(current_frame)->stack, EX_VAR_TO_NUM(var)) == IS_UNKNOWN || + (1 << STACK_MEM_TYPE(JIT_G(current_frame)->stack, EX_VAR_TO_NUM(var))) != (info & MAY_BE_ANY) + )) { + return 0; + } + } + } else if (Z_MODE(dst) == IS_MEM_ZVAL) { + if (!Z_LOAD(src) && !Z_STORE(src)) { + if (!zend_jit_spill_store(jit, src, dst, info, + JIT_G(trigger) != ZEND_JIT_ON_HOT_TRACE || + JIT_G(current_frame) == NULL || + STACK_MEM_TYPE(JIT_G(current_frame)->stack, EX_VAR_TO_NUM(var)) == IS_UNKNOWN || + (1 << STACK_MEM_TYPE(JIT_G(current_frame)->stack, EX_VAR_TO_NUM(var))) != (info & MAY_BE_ANY) + )) { + return 0; + } + } + } else { + ZEND_UNREACHABLE(); + } + } else if (Z_MODE(src) == IS_MEM_ZVAL) { + if (Z_MODE(dst) == IS_REG) { + if (!zend_jit_load_reg(jit, src, dst, info)) { + return 0; + } + } else { + ZEND_UNREACHABLE(); + } + } else { + ZEND_UNREACHABLE(); + } + } else if (Z_MODE(dst) == IS_REG && Z_STORE(dst)) { + dst = ZEND_ADDR_MEM_ZVAL(ZREG_FP, var); + if (!zend_jit_spill_store(jit, src, dst, info, + JIT_G(trigger) != ZEND_JIT_ON_HOT_TRACE || + JIT_G(current_frame) == NULL || + STACK_MEM_TYPE(JIT_G(current_frame)->stack, EX_VAR_TO_NUM(var)) == IS_UNKNOWN || + (1 << STACK_MEM_TYPE(JIT_G(current_frame)->stack, EX_VAR_TO_NUM(var))) != (info & MAY_BE_ANY) + )) { + return 0; + } + } + return 1; +} + +static int zend_jit_inc_dec(zend_jit_ctx *jit, const zend_op *opline, uint32_t op1_info, zend_jit_addr op1_addr, uint32_t op1_def_info, zend_jit_addr op1_def_addr, uint32_t res_use_info, uint32_t res_info, zend_jit_addr res_addr, int may_overflow, int may_throw) +{ + ir_ref if_long = IR_UNUSED; + ir_ref op1_lval_ref = IR_UNUSED; + ir_ref ref; + ir_op op; + + if (op1_info & ((MAY_BE_UNDEF|MAY_BE_ANY)-MAY_BE_LONG)) { + if_long = jit_if_Z_TYPE(jit, op1_addr, IS_LONG); + ir_IF_TRUE(if_long); + } + if (opline->opcode == ZEND_POST_INC || opline->opcode == ZEND_POST_DEC) { + op1_lval_ref = jit_Z_LVAL(jit, op1_addr); + jit_set_Z_LVAL(jit, res_addr, op1_lval_ref); + if (Z_MODE(res_addr) != IS_REG) { + jit_set_Z_TYPE_INFO(jit, res_addr, IS_LONG); + } + } + if (Z_MODE(op1_def_addr) == IS_MEM_ZVAL + && Z_MODE(op1_addr) == IS_REG + && !Z_LOAD(op1_addr) + && !Z_STORE(op1_addr)) { + jit_set_Z_TYPE_INFO(jit, op1_def_addr, IS_LONG); + } + if (opline->opcode == ZEND_PRE_INC || opline->opcode == ZEND_POST_INC) { + op = may_overflow ? IR_ADD_OV : IR_ADD; + } else { + op = may_overflow ? IR_SUB_OV : IR_SUB; + } + if (!op1_lval_ref) { + op1_lval_ref = jit_Z_LVAL(jit, op1_addr); + } + ref = ir_BINARY_OP_L(op, op1_lval_ref, ir_CONST_LONG(1)); + if (op1_def_info & MAY_BE_LONG) { + jit_set_Z_LVAL(jit, op1_def_addr, ref); + } + if (may_overflow && + (((op1_def_info & (MAY_BE_ANY|MAY_BE_GUARD)) == (MAY_BE_LONG|MAY_BE_GUARD)) || + ((opline->result_type != IS_UNUSED && (res_info & (MAY_BE_ANY|MAY_BE_GUARD)) == (MAY_BE_LONG|MAY_BE_GUARD))))) { + int32_t exit_point; + const void *exit_addr; + zend_jit_trace_stack *stack; + uint32_t old_op1_info, old_res_info = 0; + + stack = JIT_G(current_frame)->stack; + old_op1_info = STACK_INFO(stack, EX_VAR_TO_NUM(opline->op1.var)); + SET_STACK_TYPE(stack, EX_VAR_TO_NUM(opline->op1.var), IS_DOUBLE, 0); + if (opline->opcode == ZEND_PRE_INC || opline->opcode == ZEND_POST_INC) { + SET_STACK_REF(stack, EX_VAR_TO_NUM(opline->op1.var), ir_CONST_DOUBLE((double)ZEND_LONG_MAX + 1.0)); + } else { + SET_STACK_REF(stack, EX_VAR_TO_NUM(opline->op1.var), ir_CONST_DOUBLE((double)ZEND_LONG_MIN - 1.0)); + } + if (opline->result_type != IS_UNUSED) { + old_res_info = STACK_INFO(stack, EX_VAR_TO_NUM(opline->result.var)); + if (opline->opcode == ZEND_PRE_INC) { + SET_STACK_TYPE(stack, EX_VAR_TO_NUM(opline->result.var), IS_DOUBLE, 0); + SET_STACK_REF(stack, EX_VAR_TO_NUM(opline->result.var), ir_CONST_DOUBLE((double)ZEND_LONG_MAX + 1.0)); + } else if (opline->opcode == ZEND_PRE_DEC) { + SET_STACK_TYPE(stack, EX_VAR_TO_NUM(opline->result.var), IS_DOUBLE, 0); + SET_STACK_REF(stack, EX_VAR_TO_NUM(opline->result.var), ir_CONST_DOUBLE((double)ZEND_LONG_MIN - 1.0)); + } else if (opline->opcode == ZEND_POST_INC) { + SET_STACK_TYPE(stack, EX_VAR_TO_NUM(opline->result.var), IS_LONG, 0); + SET_STACK_REF(stack, EX_VAR_TO_NUM(opline->result.var), ir_CONST_LONG(ZEND_LONG_MAX)); + } else if (opline->opcode == ZEND_POST_DEC) { + SET_STACK_TYPE(stack, EX_VAR_TO_NUM(opline->result.var), IS_LONG, 0); + SET_STACK_REF(stack, EX_VAR_TO_NUM(opline->result.var), ir_CONST_LONG(ZEND_LONG_MIN)); + } + } + + exit_point = zend_jit_trace_get_exit_point(opline + 1, 0); + exit_addr = zend_jit_trace_get_exit_addr(exit_point); + ir_GUARD_NOT(ir_OVERFLOW(ref), ir_CONST_ADDR(exit_addr)); + + if ((opline->opcode == ZEND_PRE_INC || opline->opcode == ZEND_PRE_DEC) && + opline->result_type != IS_UNUSED) { + jit_set_Z_LVAL(jit, res_addr, ref); + if (Z_MODE(res_addr) != IS_REG) { + jit_set_Z_TYPE_INFO(jit, res_addr, IS_LONG); + } + } + + SET_STACK_INFO(stack, EX_VAR_TO_NUM(opline->op1.var), old_op1_info); + if (opline->result_type != IS_UNUSED) { + SET_STACK_INFO(stack, EX_VAR_TO_NUM(opline->result.var), old_res_info); + } + } else if (may_overflow) { + ir_ref if_overflow; + ir_ref merge_inputs = IR_UNUSED; + + if (((op1_def_info & (MAY_BE_ANY|MAY_BE_GUARD)) == (MAY_BE_DOUBLE|MAY_BE_GUARD)) + || (opline->result_type != IS_UNUSED && (res_info & (MAY_BE_ANY|MAY_BE_GUARD)) == (MAY_BE_DOUBLE|MAY_BE_GUARD))) { + int32_t exit_point; + const void *exit_addr; + zend_jit_trace_stack *stack; + uint32_t old_res_info = 0; + + stack = JIT_G(current_frame)->stack; + if (opline->result_type != IS_UNUSED) { + old_res_info = STACK_INFO(stack, EX_VAR_TO_NUM(opline->result.var)); + if (opline->opcode == ZEND_PRE_INC) { + SET_STACK_TYPE(stack, EX_VAR_TO_NUM(opline->result.var), IS_LONG, 0); + } + } + exit_point = zend_jit_trace_get_exit_point(opline + 1, 0); + exit_addr = zend_jit_trace_get_exit_addr(exit_point); + if ((opline->opcode == ZEND_PRE_INC || opline->opcode == ZEND_PRE_DEC) && + opline->result_type != IS_UNUSED) { + if_overflow = ir_IF(ir_OVERFLOW(ref)); + ir_IF_FALSE_cold(if_overflow); + jit_set_Z_LVAL(jit, res_addr, ref); + if (Z_MODE(res_addr) != IS_REG) { + jit_set_Z_TYPE_INFO(jit, res_addr, IS_LONG); + } + jit_SIDE_EXIT(jit, ir_CONST_ADDR(exit_addr)); + ir_IF_TRUE(if_overflow); + } else { + ir_GUARD(ir_OVERFLOW(ref), ir_CONST_ADDR(exit_addr)); + } + if (opline->result_type != IS_UNUSED) { + SET_STACK_INFO(stack, EX_VAR_TO_NUM(opline->result.var), old_res_info); + } + } else { + if_overflow = ir_IF(ir_OVERFLOW(ref)); + ir_IF_FALSE(if_overflow); + if ((opline->opcode == ZEND_PRE_INC || opline->opcode == ZEND_PRE_DEC) && + opline->result_type != IS_UNUSED) { + jit_set_Z_LVAL(jit, res_addr, ref); + if (Z_MODE(res_addr) != IS_REG) { + jit_set_Z_TYPE_INFO(jit, res_addr, IS_LONG); + } + } + ir_END_list(merge_inputs); + + /* overflow => cold path */ + ir_IF_TRUE_cold(if_overflow); + } + + if (opline->opcode == ZEND_PRE_INC || opline->opcode == ZEND_POST_INC) { + if (Z_MODE(op1_def_addr) == IS_REG) { + jit_set_Z_DVAL(jit, op1_def_addr, ir_CONST_DOUBLE((double)ZEND_LONG_MAX + 1.0)); + } else { +#if SIZEOF_ZEND_LONG == 4 + jit_set_Z_LVAL(jit, op1_def_addr, ir_CONST_LONG(0)); + jit_set_Z_W2(jit, op1_def_addr, ir_CONST_U32(0x41e00000)); +#else + jit_set_Z_LVAL(jit, op1_def_addr, ir_CONST_LONG(0x43e0000000000000)); +#endif + jit_set_Z_TYPE_INFO(jit, op1_def_addr, IS_DOUBLE); + } + } else { + if (Z_MODE(op1_def_addr) == IS_REG) { + jit_set_Z_DVAL(jit, op1_def_addr, ir_CONST_DOUBLE((double)ZEND_LONG_MIN - 1.0)); + } else { +#if SIZEOF_ZEND_LONG == 4 + jit_set_Z_LVAL(jit, op1_def_addr, ir_CONST_LONG(0x00200000)); + jit_set_Z_W2(jit, op1_def_addr, ir_CONST_U32(0xc1e00000)); +#else + jit_set_Z_LVAL(jit, op1_def_addr, ir_CONST_LONG(0xc3e0000000000000)); +#endif + jit_set_Z_TYPE_INFO(jit, op1_def_addr, IS_DOUBLE); + } + } + if ((opline->opcode == ZEND_PRE_INC || opline->opcode == ZEND_PRE_DEC) && + opline->result_type != IS_UNUSED) { + if (opline->opcode == ZEND_PRE_INC || opline->opcode == ZEND_POST_INC) { + if (Z_MODE(res_addr) == IS_REG) { + jit_set_Z_DVAL(jit, res_addr, ir_CONST_DOUBLE((double)ZEND_LONG_MAX + 1.0)); + } else { +#if SIZEOF_ZEND_LONG == 4 + jit_set_Z_LVAL(jit, res_addr, ir_CONST_LONG(0)); + jit_set_Z_W2(jit, res_addr, ir_CONST_U32(0x41e00000)); +#else + jit_set_Z_LVAL(jit, res_addr, ir_CONST_LONG(0x43e0000000000000)); +#endif + jit_set_Z_TYPE_INFO(jit, res_addr, IS_DOUBLE); + } + } else { + if (Z_MODE(res_addr) == IS_REG) { + jit_set_Z_DVAL(jit, res_addr, ir_CONST_DOUBLE((double)ZEND_LONG_MIN - 1.0)); + } else { +#if SIZEOF_ZEND_LONG == 4 + jit_set_Z_LVAL(jit, res_addr, ir_CONST_LONG(0x00200000)); + jit_set_Z_W2(jit, res_addr, ir_CONST_U32(0xc1e00000)); +#else + jit_set_Z_LVAL(jit, res_addr, ir_CONST_LONG(0xc3e0000000000000)); +#endif + jit_set_Z_TYPE_INFO(jit, res_addr, IS_DOUBLE); + } + } + } + + if (merge_inputs) { + ir_END_list(merge_inputs); + ir_MERGE_list(merge_inputs); + } + } else { + if ((opline->opcode == ZEND_PRE_INC || opline->opcode == ZEND_PRE_DEC) && + opline->result_type != IS_UNUSED) { + jit_set_Z_LVAL(jit, res_addr, ref); + if (Z_MODE(res_addr) != IS_REG) { + jit_set_Z_TYPE_INFO(jit, res_addr, IS_LONG); + } + } + } + if (op1_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-MAY_BE_LONG)) { + ir_ref merge_inputs = ir_END(); + + /* !is_long => cold path */ + ir_IF_FALSE_cold(if_long); + if (op1_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-(MAY_BE_LONG|MAY_BE_DOUBLE))) { + jit_SET_EX_OPLINE(jit, opline); + if (op1_info & MAY_BE_UNDEF) { + ir_ref if_def; + + if_def = jit_if_not_Z_TYPE(jit, op1_addr, IS_UNDEF); + ir_IF_FALSE_cold(if_def); + + // zend_error(E_WARNING, "Undefined variable $%s", ZSTR_VAL(CV_DEF_OF(EX_VAR_TO_NUM(opline->op1.var)))); + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_jit_undefined_op_helper), ir_CONST_U32(opline->op1.var)); + + jit_set_Z_TYPE_INFO(jit, op1_def_addr, IS_NULL); + ir_MERGE_WITH_EMPTY_TRUE(if_def); + + op1_info |= MAY_BE_NULL; + } + + ref = jit_ZVAL_ADDR(jit, op1_addr); + + if (op1_info & MAY_BE_REF) { + ir_ref if_ref, if_typed, func, ref2, arg2; + + if_ref = jit_if_Z_TYPE_ref(jit, ref, ir_CONST_U8(IS_REFERENCE)); + ir_IF_TRUE(if_ref); + ref2 = jit_Z_PTR_ref(jit, ref); + + if_typed = jit_if_TYPED_REF(jit, ref2); + ir_IF_TRUE(if_typed); + + if (RETURN_VALUE_USED(opline)) { + ZEND_ASSERT(Z_MODE(res_addr) != IS_REG); + arg2 = jit_ZVAL_ADDR(jit, res_addr); + } else { + arg2 = IR_NULL; + } + if (opline->opcode == ZEND_PRE_INC) { + func = ir_CONST_FC_FUNC(zend_jit_pre_inc_typed_ref); + } else if (opline->opcode == ZEND_PRE_DEC) { + func = ir_CONST_FC_FUNC(zend_jit_pre_dec_typed_ref); + } else if (opline->opcode == ZEND_POST_INC) { + func = ir_CONST_FC_FUNC(zend_jit_post_inc_typed_ref); + } else if (opline->opcode == ZEND_POST_DEC) { + func = ir_CONST_FC_FUNC(zend_jit_post_dec_typed_ref); + } else { + ZEND_UNREACHABLE(); + } + + ir_CALL_2(IR_VOID, func, ref2, arg2); + zend_jit_check_exception(jit); + ir_END_list(merge_inputs); + + ir_IF_FALSE(if_typed); + ref2 = ir_ADD_OFFSET(ref2, offsetof(zend_reference, val)); + ir_MERGE_WITH_EMPTY_FALSE(if_ref); + ref = ir_PHI_2(IR_ADDR, ref2, ref); + } + + if (opline->opcode == ZEND_POST_INC || opline->opcode == ZEND_POST_DEC) { + jit_ZVAL_COPY(jit, + res_addr, + res_use_info, + ZEND_ADDR_REF_ZVAL(ref), op1_info, 1); + } + if (opline->opcode == ZEND_PRE_INC || opline->opcode == ZEND_POST_INC) { + if (opline->opcode == ZEND_PRE_INC && opline->result_type != IS_UNUSED) { + ir_ref arg2 = jit_ZVAL_ADDR(jit, res_addr); + ir_CALL_2(IR_VOID, ir_CONST_FC_FUNC(zend_jit_pre_inc), ref, arg2); + } else { + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(increment_function), ref); + } + } else { + if (opline->opcode == ZEND_PRE_DEC && opline->result_type != IS_UNUSED) { + ir_ref arg2 = jit_ZVAL_ADDR(jit, res_addr); + ir_CALL_2(IR_VOID, ir_CONST_FC_FUNC(zend_jit_pre_dec), ref, arg2); + } else { + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(decrement_function), ref); + } + } + if (may_throw) { + zend_jit_check_exception(jit); + } + } else { + ref = jit_Z_DVAL(jit, op1_addr); + if (opline->opcode == ZEND_POST_INC || opline->opcode == ZEND_POST_DEC) { + jit_set_Z_DVAL(jit, res_addr, ref); + jit_set_Z_TYPE_INFO(jit, res_addr, IS_DOUBLE); + } + if (opline->opcode == ZEND_PRE_INC || opline->opcode == ZEND_POST_INC) { + op = IR_ADD; + } else { + op = IR_SUB; + } + ref = ir_BINARY_OP_D(op, ref, ir_CONST_DOUBLE(1.0)); + jit_set_Z_DVAL(jit, op1_def_addr, ref); + if ((opline->opcode == ZEND_PRE_INC || opline->opcode == ZEND_PRE_DEC) && + opline->result_type != IS_UNUSED) { + jit_set_Z_DVAL(jit, res_addr, ref); + jit_set_Z_TYPE_INFO(jit, res_addr, IS_DOUBLE); + } + } + ir_END_list(merge_inputs); + ir_MERGE_list(merge_inputs); + } + if (!zend_jit_store_var_if_necessary_ex(jit, opline->op1.var, op1_def_addr, op1_def_info, op1_addr, op1_info)) { + return 0; + } + if (opline->result_type != IS_UNUSED) { + if (!zend_jit_store_var_if_necessary(jit, opline->result.var, res_addr, res_info)) { + return 0; + } + } + return 1; +} + +static int zend_jit_math_long_long(zend_jit_ctx *jit, + const zend_op *opline, + uint8_t opcode, + zend_jit_addr op1_addr, + zend_jit_addr op2_addr, + zend_jit_addr res_addr, + uint32_t res_info, + uint32_t res_use_info, + int may_overflow) +{ + bool same_ops = zend_jit_same_addr(op1_addr, op2_addr); + ir_op op; + ir_ref op1, op2, ref, if_overflow = IR_UNUSED; + + if (opcode == ZEND_ADD) { + op = may_overflow ? IR_ADD_OV : IR_ADD; + } else if (opcode == ZEND_SUB) { + op = may_overflow ? IR_SUB_OV : IR_SUB; + } else if (opcode == ZEND_MUL) { + op = may_overflow ? IR_MUL_OV : IR_MUL; + } else { + ZEND_UNREACHABLE(); + } + op1 = jit_Z_LVAL(jit, op1_addr); + op2 = (same_ops) ? op1 : jit_Z_LVAL(jit, op2_addr); + ref = ir_BINARY_OP_L(op, op1, op2); + + if (may_overflow) { + if (res_info & MAY_BE_GUARD) { + if ((res_info & MAY_BE_ANY) == MAY_BE_LONG) { + zend_jit_trace_stack *stack = JIT_G(current_frame)->stack; + uint32_t old_res_info; + int32_t exit_point; + const void *exit_addr; + + if (opline->opcode == ZEND_ADD + && Z_MODE(op2_addr) == IS_CONST_ZVAL && Z_LVAL_P(Z_ZV(op2_addr)) == 1) { + old_res_info = STACK_INFO(stack, EX_VAR_TO_NUM(opline->result.var)); + SET_STACK_TYPE(stack, EX_VAR_TO_NUM(opline->result.var), IS_DOUBLE, 0); + SET_STACK_REF(stack, EX_VAR_TO_NUM(opline->result.var), ir_CONST_DOUBLE((double)ZEND_LONG_MAX + 1.0)); + exit_point = zend_jit_trace_get_exit_point(opline + 1, 0); + SET_STACK_INFO(stack, EX_VAR_TO_NUM(opline->result.var), old_res_info); + } else if (opline->opcode == ZEND_SUB + && Z_MODE(op2_addr) == IS_CONST_ZVAL && Z_LVAL_P(Z_ZV(op2_addr)) == 1) { + old_res_info = STACK_INFO(stack, EX_VAR_TO_NUM(opline->result.var)); + SET_STACK_TYPE(stack, EX_VAR_TO_NUM(opline->result.var), IS_DOUBLE, 0); + SET_STACK_REF(stack, EX_VAR_TO_NUM(opline->result.var), ir_CONST_DOUBLE((double)ZEND_LONG_MIN - 1.0)); + exit_point = zend_jit_trace_get_exit_point(opline + 1, 0); + SET_STACK_INFO(stack, EX_VAR_TO_NUM(opline->result.var), old_res_info); + } else { + exit_point = zend_jit_trace_get_exit_point(opline, 0); + } + + exit_addr = zend_jit_trace_get_exit_addr(exit_point); + if (!exit_addr) { + return 0; + } + ir_GUARD_NOT(ir_OVERFLOW(ref), ir_CONST_ADDR(exit_addr)); + may_overflow = 0; + } else if ((res_info & MAY_BE_ANY) == MAY_BE_DOUBLE) { + int32_t exit_point = zend_jit_trace_get_exit_point(opline, 0); + const void *exit_addr = zend_jit_trace_get_exit_addr(exit_point); + + if (!exit_addr) { + return 0; + } + ir_GUARD(ir_OVERFLOW(ref), ir_CONST_ADDR(exit_addr)); + } else { + ZEND_UNREACHABLE(); + } + } else { + if_overflow = ir_IF(ir_OVERFLOW(ref)); + ir_IF_FALSE(if_overflow); + } + } + + if ((res_info & MAY_BE_ANY) != MAY_BE_DOUBLE) { + jit_set_Z_LVAL(jit, res_addr, ref); + + if (Z_MODE(res_addr) != IS_REG) { + if (!zend_jit_same_addr(op1_addr, res_addr)) { + if ((res_use_info & (MAY_BE_ANY|MAY_BE_UNDEF|MAY_BE_REF|MAY_BE_GUARD)) != MAY_BE_LONG) { + jit_set_Z_TYPE_INFO(jit, res_addr, IS_LONG); + } + } + } + } + + if (may_overflow) { + ir_ref fast_path = IR_UNUSED; + + if ((res_info & MAY_BE_ANY) != MAY_BE_DOUBLE) { + fast_path = ir_END(); + ir_IF_TRUE_cold(if_overflow); + } + if (opcode == ZEND_ADD) { + if (Z_MODE(op2_addr) == IS_CONST_ZVAL && Z_LVAL_P(Z_ZV(op2_addr)) == 1) { + if (Z_MODE(res_addr) == IS_REG) { + jit_set_Z_DVAL(jit, res_addr, ir_CONST_DOUBLE((double)ZEND_LONG_MAX + 1.0)); + } else { +#if SIZEOF_ZEND_LONG == 4 + jit_set_Z_LVAL(jit, res_addr, ir_CONST_LONG(0)); + jit_set_Z_W2(jit, res_addr, ir_CONST_U32(0x41e00000)); +#else + jit_set_Z_LVAL(jit, res_addr, ir_CONST_LONG(0x43e0000000000000)); +#endif + jit_set_Z_TYPE_INFO(jit, res_addr, IS_DOUBLE); + } + if ((res_info & MAY_BE_ANY) != MAY_BE_DOUBLE) { + ir_MERGE_WITH(fast_path); + } + return 1; + } + op = IR_ADD; + } else if (opcode == ZEND_SUB) { + if (Z_MODE(op2_addr) == IS_CONST_ZVAL && Z_LVAL_P(Z_ZV(op2_addr)) == 1) { + if (Z_MODE(res_addr) == IS_REG) { + jit_set_Z_DVAL(jit, res_addr, ir_CONST_DOUBLE((double)ZEND_LONG_MIN - 1.0)); + } else { +#if SIZEOF_ZEND_LONG == 4 + jit_set_Z_LVAL(jit, res_addr, ir_CONST_LONG(0x00200000)); + jit_set_Z_W2(jit, res_addr, ir_CONST_U32(0xc1e00000)); +#else + jit_set_Z_LVAL(jit, res_addr, ir_CONST_LONG(0xc3e0000000000000)); +#endif + jit_set_Z_TYPE_INFO(jit, res_addr, IS_DOUBLE); + } + if ((res_info & MAY_BE_ANY) != MAY_BE_DOUBLE) { + ir_MERGE_WITH(fast_path); + } + return 1; + } + op = IR_SUB; + } else if (opcode == ZEND_MUL) { + op = IR_MUL; + } else { + ZEND_UNREACHABLE(); + } +#if 1 + /* reload */ + op1 = jit_Z_LVAL(jit, op1_addr); + op2 = (same_ops) ? op1 : jit_Z_LVAL(jit, op2_addr); +#endif +#if 1 + /* disable CSE */ + ir_ref old_cse_limit = jit->ctx.fold_cse_limit; + jit->ctx.fold_cse_limit = 0x7fffffff; +#endif + op1 = ir_INT2D(op1); + op2 = ir_INT2D(op2); +#if 1 + jit->ctx.fold_cse_limit = old_cse_limit; +#endif + ref = ir_BINARY_OP_D(op, op1, op2); + jit_set_Z_DVAL(jit, res_addr, ref); + if (Z_MODE(res_addr) != IS_REG) { + jit_set_Z_TYPE_INFO(jit, res_addr, IS_DOUBLE); + } + if ((res_info & MAY_BE_ANY) != MAY_BE_DOUBLE) { + ir_MERGE_WITH(fast_path); + } + } + + return 1; +} + +static int zend_jit_math_long_double(zend_jit_ctx *jit, + uint8_t opcode, + zend_jit_addr op1_addr, + zend_jit_addr op2_addr, + zend_jit_addr res_addr, + uint32_t res_use_info) +{ + ir_op op; + ir_ref op1, op2, ref; + + if (opcode == ZEND_ADD) { + op = IR_ADD; + } else if (opcode == ZEND_SUB) { + op = IR_SUB; + } else if (opcode == ZEND_MUL) { + op = IR_MUL; + } else if (opcode == ZEND_DIV) { + op = IR_DIV; + } else { + ZEND_UNREACHABLE(); + } + op1 = jit_Z_LVAL(jit, op1_addr); + op2 = jit_Z_DVAL(jit, op2_addr); + ref = ir_BINARY_OP_D(op, ir_INT2D(op1), op2); + jit_set_Z_DVAL(jit, res_addr, ref); + + if (Z_MODE(res_addr) != IS_REG) { + if ((res_use_info & (MAY_BE_ANY|MAY_BE_UNDEF|MAY_BE_REF|MAY_BE_GUARD)) != MAY_BE_DOUBLE) { + jit_set_Z_TYPE_INFO(jit, res_addr, IS_DOUBLE); + } + } + return 1; +} + +static int zend_jit_math_double_long(zend_jit_ctx *jit, + uint8_t opcode, + zend_jit_addr op1_addr, + zend_jit_addr op2_addr, + zend_jit_addr res_addr, + uint32_t res_use_info) +{ + ir_op op; + ir_ref op1, op2, ref; + + if (opcode == ZEND_ADD) { + op = IR_ADD; + } else if (opcode == ZEND_SUB) { + op = IR_SUB; + } else if (opcode == ZEND_MUL) { + op = IR_MUL; + } else if (opcode == ZEND_DIV) { + op = IR_DIV; + } else { + ZEND_UNREACHABLE(); + } + op1 = jit_Z_DVAL(jit, op1_addr); + op2 = jit_Z_LVAL(jit, op2_addr); + ref = ir_BINARY_OP_D(op, op1, ir_INT2D(op2)); + jit_set_Z_DVAL(jit, res_addr, ref); + + if (Z_MODE(res_addr) != IS_REG) { + if (!zend_jit_same_addr(op1_addr, res_addr)) { + if ((res_use_info & (MAY_BE_ANY|MAY_BE_UNDEF|MAY_BE_REF|MAY_BE_GUARD)) != MAY_BE_DOUBLE) { + jit_set_Z_TYPE_INFO(jit, res_addr, IS_DOUBLE); + } + } + } + return 1; +} + +static int zend_jit_math_double_double(zend_jit_ctx *jit, + uint8_t opcode, + zend_jit_addr op1_addr, + zend_jit_addr op2_addr, + zend_jit_addr res_addr, + uint32_t res_use_info) +{ + bool same_ops = zend_jit_same_addr(op1_addr, op2_addr); + ir_op op; + ir_ref op1, op2, ref; + + if (opcode == ZEND_ADD) { + op = IR_ADD; + } else if (opcode == ZEND_SUB) { + op = IR_SUB; + } else if (opcode == ZEND_MUL) { + op = IR_MUL; + } else if (opcode == ZEND_DIV) { + op = IR_DIV; + } else { + ZEND_UNREACHABLE(); + } + op1 = jit_Z_DVAL(jit, op1_addr); + op2 = (same_ops) ? op1 : jit_Z_DVAL(jit, op2_addr); + ref = ir_BINARY_OP_D(op, op1, op2); + jit_set_Z_DVAL(jit, res_addr, ref); + + if (Z_MODE(res_addr) != IS_REG) { + if (!zend_jit_same_addr(op1_addr, res_addr)) { + if ((res_use_info & (MAY_BE_ANY|MAY_BE_UNDEF|MAY_BE_REF|MAY_BE_GUARD)) != MAY_BE_DOUBLE) { + jit_set_Z_TYPE_INFO(jit, res_addr, IS_DOUBLE); + } + } + } + return 1; +} + +static int zend_jit_math_helper(zend_jit_ctx *jit, + const zend_op *opline, + uint8_t opcode, + uint8_t op1_type, + znode_op op1, + zend_jit_addr op1_addr, + uint32_t op1_info, + uint8_t op2_type, + znode_op op2, + zend_jit_addr op2_addr, + uint32_t op2_info, + uint32_t res_var, + zend_jit_addr res_addr, + uint32_t res_info, + uint32_t res_use_info, + int may_overflow, + int may_throw) +{ + ir_ref if_op1_long = IR_UNUSED; + ir_ref if_op1_double = IR_UNUSED; + ir_ref if_op2_double = IR_UNUSED; + ir_ref if_op1_long_op2_long = IR_UNUSED; + ir_ref if_op1_long_op2_double = IR_UNUSED; + ir_ref if_op1_double_op2_double = IR_UNUSED; + ir_ref if_op1_double_op2_long = IR_UNUSED; + ir_ref slow_inputs = IR_UNUSED; + bool same_ops = zend_jit_same_addr(op1_addr, op2_addr); + ir_refs *end_inputs; + ir_refs *res_inputs; + + ir_refs_init(end_inputs, 6); + ir_refs_init(res_inputs, 6); + + if (Z_MODE(op1_addr) == IS_REG) { + if (!has_concrete_type(op2_info & MAY_BE_ANY) && jit->ra[Z_SSA_VAR(op1_addr)].ref == IR_NULL) { + /* Force load */ + zend_jit_use_reg(jit, op1_addr); + } + } else if (Z_MODE(op2_addr) == IS_REG) { + if (!has_concrete_type(op1_info & MAY_BE_ANY) && jit->ra[Z_SSA_VAR(op2_addr)].ref == IR_NULL) { + /* Force load */ + zend_jit_use_reg(jit, op2_addr); + } + } + + if (Z_MODE(res_addr) == IS_REG) { + jit->delay_var = Z_SSA_VAR(res_addr); + jit->delay_refs = res_inputs; + } + + if ((res_info & MAY_BE_GUARD) && (res_info & MAY_BE_LONG) && (op1_info & MAY_BE_LONG) && (op2_info & MAY_BE_LONG)) { + if (op1_info & (MAY_BE_ANY-MAY_BE_LONG)) { + if_op1_long = jit_if_Z_TYPE(jit, op1_addr, IS_LONG); + ir_IF_TRUE(if_op1_long); + } + if (!same_ops && (op2_info & (MAY_BE_ANY-MAY_BE_LONG))) { + if_op1_long_op2_long = jit_if_Z_TYPE(jit, op2_addr, IS_LONG); + ir_IF_TRUE(if_op1_long_op2_long); + } + if (!zend_jit_math_long_long(jit, opline, opcode, op1_addr, op2_addr, res_addr, res_info, res_use_info, may_overflow)) { + return 0; + } + ir_refs_add(end_inputs, ir_END()); + if (if_op1_long) { + ir_IF_FALSE_cold(if_op1_long); + ir_END_list(slow_inputs); + } + if (if_op1_long_op2_long) { + ir_IF_FALSE_cold(if_op1_long_op2_long); + ir_END_list(slow_inputs); + } + } else if ((op1_info & MAY_BE_LONG) && (op2_info & MAY_BE_LONG) && (res_info & (MAY_BE_LONG|MAY_BE_DOUBLE))) { + if (op1_info & (MAY_BE_ANY-MAY_BE_LONG)) { + if_op1_long = jit_if_Z_TYPE(jit, op1_addr, IS_LONG); + ir_IF_TRUE(if_op1_long); + } + if (!same_ops && (op2_info & (MAY_BE_ANY-MAY_BE_LONG))) { + if_op1_long_op2_long = jit_if_Z_TYPE(jit, op2_addr, IS_LONG); + ir_IF_FALSE_cold(if_op1_long_op2_long); + if (op2_info & MAY_BE_DOUBLE) { + if (op2_info & (MAY_BE_ANY-(MAY_BE_LONG|MAY_BE_DOUBLE))) { + if_op1_long_op2_double = jit_if_Z_TYPE(jit, op2_addr, IS_DOUBLE); + ir_IF_FALSE_cold(if_op1_long_op2_double); + ir_END_list(slow_inputs); + ir_IF_TRUE(if_op1_long_op2_double); + } + if (!zend_jit_math_long_double(jit, opcode, op1_addr, op2_addr, res_addr, res_use_info)) { + return 0; + } + ir_refs_add(end_inputs, ir_END()); + } else { + ir_END_list(slow_inputs); + } + ir_IF_TRUE(if_op1_long_op2_long); + } + if (!zend_jit_math_long_long(jit, opline, opcode, op1_addr, op2_addr, res_addr, res_info, res_use_info, may_overflow)) { + return 0; + } + ir_refs_add(end_inputs, ir_END()); + + if (if_op1_long) { + ir_IF_FALSE_cold(if_op1_long); + } + + if (op1_info & MAY_BE_DOUBLE) { + if (op1_info & (MAY_BE_ANY-(MAY_BE_LONG|MAY_BE_DOUBLE))) { + if_op1_double = jit_if_Z_TYPE(jit, op1_addr, IS_DOUBLE); + ir_IF_FALSE_cold(if_op1_double); + ir_END_list(slow_inputs); + ir_IF_TRUE(if_op1_double); + } + if (op2_info & MAY_BE_DOUBLE) { + if (!same_ops && (op2_info & (MAY_BE_ANY-MAY_BE_DOUBLE))) { + if_op1_double_op2_double = jit_if_Z_TYPE(jit, op2_addr, IS_DOUBLE); + ir_IF_TRUE(if_op1_double_op2_double); + } + if (!zend_jit_math_double_double(jit, opcode, op1_addr, op2_addr, res_addr, res_use_info)) { + return 0; + } + ir_refs_add(end_inputs, ir_END()); + if (if_op1_double_op2_double) { + ir_IF_FALSE_cold(if_op1_double_op2_double); + } + } + if (!same_ops) { + if (op2_info & (MAY_BE_ANY-(MAY_BE_LONG|MAY_BE_DOUBLE))) { + if_op1_double_op2_long = jit_if_Z_TYPE(jit, op2_addr, IS_LONG); + ir_IF_FALSE_cold(if_op1_double_op2_long); + ir_END_list(slow_inputs); + ir_IF_TRUE(if_op1_double_op2_long); + } + if (!zend_jit_math_double_long(jit, opcode, op1_addr, op2_addr, res_addr, res_use_info)) { + return 0; + } + ir_refs_add(end_inputs, ir_END()); + } else if (if_op1_double_op2_double) { + ir_END_list(slow_inputs); + } + } else if (if_op1_long) { + ir_END_list(slow_inputs); + } + } else if ((op1_info & MAY_BE_DOUBLE) && + !(op1_info & MAY_BE_LONG) && + (op2_info & (MAY_BE_LONG|MAY_BE_DOUBLE)) && + (res_info & MAY_BE_DOUBLE)) { + if (op1_info & (MAY_BE_ANY-MAY_BE_DOUBLE)) { + if_op1_double = jit_if_Z_TYPE(jit, op1_addr, IS_DOUBLE); + ir_IF_FALSE_cold(if_op1_double); + ir_END_list(slow_inputs); + ir_IF_TRUE(if_op1_double); + } + if (op2_info & MAY_BE_DOUBLE) { + if (!same_ops && (op2_info & (MAY_BE_ANY-MAY_BE_DOUBLE))) { + if_op1_double_op2_double = jit_if_Z_TYPE(jit, op2_addr, IS_DOUBLE); + ir_IF_TRUE(if_op1_double_op2_double); + } + if (!zend_jit_math_double_double(jit, opcode, op1_addr, op2_addr, res_addr, res_use_info)) { + return 0; + } + ir_refs_add(end_inputs, ir_END()); + if (if_op1_double_op2_double) { + ir_IF_FALSE_cold(if_op1_double_op2_double); + } + } + if (!same_ops && (op2_info & MAY_BE_LONG)) { + if (op2_info & (MAY_BE_ANY-(MAY_BE_DOUBLE|MAY_BE_LONG))) { + if_op1_double_op2_long = jit_if_Z_TYPE(jit, op2_addr, IS_LONG); + ir_IF_FALSE_cold(if_op1_double_op2_long); + ir_END_list(slow_inputs); + ir_IF_TRUE(if_op1_double_op2_long); + } + if (!zend_jit_math_double_long(jit, opcode, op1_addr, op2_addr, res_addr, res_use_info)) { + return 0; + } + ir_refs_add(end_inputs, ir_END()); + } else if (if_op1_double_op2_double) { + ir_END_list(slow_inputs); + } + } else if ((op2_info & MAY_BE_DOUBLE) && + !(op2_info & MAY_BE_LONG) && + (op1_info & (MAY_BE_LONG|MAY_BE_DOUBLE)) && + (res_info & MAY_BE_DOUBLE)) { + if (op2_info & (MAY_BE_ANY-MAY_BE_DOUBLE)) { + if_op2_double = jit_if_Z_TYPE(jit, op2_addr, IS_DOUBLE); + ir_IF_FALSE_cold(if_op2_double); + ir_END_list(slow_inputs); + ir_IF_TRUE(if_op2_double); + } + if (op1_info & MAY_BE_DOUBLE) { + if (!same_ops && (op1_info & (MAY_BE_ANY-MAY_BE_DOUBLE))) { + if_op1_double_op2_double = jit_if_Z_TYPE(jit, op1_addr, IS_DOUBLE); + ir_IF_TRUE(if_op1_double_op2_double); + } + if (!zend_jit_math_double_double(jit, opcode, op1_addr, op2_addr, res_addr, res_use_info)) { + return 0; + } + ir_refs_add(end_inputs, ir_END()); + if (if_op1_double_op2_double) { + ir_IF_FALSE_cold(if_op1_double_op2_double); + } + } + if (!same_ops && (op1_info & MAY_BE_LONG)) { + if (op1_info & (MAY_BE_ANY-(MAY_BE_DOUBLE|MAY_BE_LONG))) { + if_op1_long_op2_double = jit_if_Z_TYPE(jit, op1_addr, IS_LONG); + ir_IF_FALSE_cold(if_op1_long_op2_double); + ir_END_list(slow_inputs); + ir_IF_TRUE(if_op1_long_op2_double); + } + if (!zend_jit_math_long_double(jit, opcode, op1_addr, op2_addr, res_addr, res_use_info)) { + return 0; + } + ir_refs_add(end_inputs, ir_END()); + } else if (if_op1_double_op2_double) { + ir_END_list(slow_inputs); + } + } + + if ((op1_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-(MAY_BE_LONG|MAY_BE_DOUBLE))) || + (op2_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-(MAY_BE_LONG|MAY_BE_DOUBLE)))) { + ir_ref func, arg1, arg2, arg3; + + if (slow_inputs) { + ir_MERGE_list(slow_inputs); + } + + if (Z_MODE(op1_addr) == IS_REG) { + zend_jit_addr real_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, op1.var); + if (!zend_jit_spill_store(jit, op1_addr, real_addr, op1_info, 1)) { + return 0; + } + op1_addr = real_addr; + } + if (Z_MODE(op2_addr) == IS_REG) { + zend_jit_addr real_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, op2.var); + if (!zend_jit_spill_store(jit, op2_addr, real_addr, op2_info, 1)) { + return 0; + } + op2_addr = real_addr; + } + if (Z_MODE(res_addr) == IS_REG) { + arg1 = jit_ZVAL_ADDR(jit, ZEND_ADDR_MEM_ZVAL(ZREG_FP, res_var)); + } else { + arg1 = jit_ZVAL_ADDR(jit, res_addr); + } + arg2 = jit_ZVAL_ADDR(jit, op1_addr); + arg3 = jit_ZVAL_ADDR(jit, op2_addr); + jit_SET_EX_OPLINE(jit, opline); + if (opcode == ZEND_ADD) { + func = ir_CONST_FC_FUNC(add_function); + } else if (opcode == ZEND_SUB) { + func = ir_CONST_FC_FUNC(sub_function); + } else if (opcode == ZEND_MUL) { + func = ir_CONST_FC_FUNC(mul_function); + } else if (opcode == ZEND_DIV) { + func = ir_CONST_FC_FUNC(div_function); + } else { + ZEND_UNREACHABLE(); + } + ir_CALL_3(IR_VOID, func, arg1, arg2, arg3); + + jit_FREE_OP(jit, op1_type, op1, op1_info, NULL); + jit_FREE_OP(jit, op2_type, op2, op2_info, NULL); + + if (may_throw) { + if (opline->opcode == ZEND_ASSIGN_DIM_OP && (opline->op2_type & (IS_VAR|IS_TMP_VAR))) { + ir_GUARD_NOT(ir_LOAD_A(jit_EG_exception(jit)), + jit_STUB_ADDR(jit, jit_stub_exception_handler_free_op2)); + } else if (Z_MODE(res_addr) == IS_MEM_ZVAL && Z_REG(res_addr) == ZREG_RX) { + zend_jit_check_exception_undef_result(jit, opline); + } else { + zend_jit_check_exception(jit); + } + } + if (Z_MODE(res_addr) == IS_REG) { + zend_jit_addr real_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, res_var); + if (!zend_jit_load_reg(jit, real_addr, res_addr, res_info)) { + return 0; + } + } + ir_refs_add(end_inputs, ir_END()); + } + + if (end_inputs->count) { + ir_MERGE_N(end_inputs->count, end_inputs->refs); + } + + if (Z_MODE(res_addr) == IS_REG) { + ZEND_ASSERT(jit->delay_refs == res_inputs); + ZEND_ASSERT(end_inputs->count == res_inputs->count); + jit->delay_var = -1; + jit->delay_refs = NULL; + if (res_inputs->count == 1) { + zend_jit_def_reg(jit, res_addr, res_inputs->refs[0]); + } else { + ir_ref phi = ir_PHI_N((res_info & MAY_BE_LONG) ? IR_LONG : IR_DOUBLE, res_inputs->count, res_inputs->refs); + zend_jit_def_reg(jit, res_addr, phi); + } + } + + return 1; +} + +static int zend_jit_math(zend_jit_ctx *jit, const zend_op *opline, uint32_t op1_info, zend_jit_addr op1_addr, uint32_t op2_info, zend_jit_addr op2_addr, uint32_t res_use_info, uint32_t res_info, zend_jit_addr res_addr, int may_overflow, int may_throw) +{ + ZEND_ASSERT(!(op1_info & MAY_BE_UNDEF) && !(op2_info & MAY_BE_UNDEF)); + + if (!zend_jit_math_helper(jit, opline, opline->opcode, opline->op1_type, opline->op1, op1_addr, op1_info, opline->op2_type, opline->op2, op2_addr, op2_info, opline->result.var, res_addr, res_info, res_use_info, may_overflow, may_throw)) { + return 0; + } + if (!zend_jit_store_var_if_necessary(jit, opline->result.var, res_addr, res_info)) { + return 0; + } + return 1; +} + +static int zend_jit_add_arrays(zend_jit_ctx *jit, const zend_op *opline, uint32_t op1_info, zend_jit_addr op1_addr, uint32_t op2_info, zend_jit_addr op2_addr, zend_jit_addr res_addr) +{ + ir_ref ref; + ir_ref arg1 = jit_Z_PTR(jit, op1_addr); + ir_ref arg2 = jit_Z_PTR(jit, op2_addr); + + ref = ir_CALL_2(IR_ADDR, ir_CONST_FC_FUNC(zend_jit_add_arrays_helper), arg1, arg2); + jit_set_Z_PTR(jit, res_addr, ref); + jit_set_Z_TYPE_INFO(jit, res_addr, IS_ARRAY_EX); + jit_FREE_OP(jit, opline->op1_type, opline->op1, op1_info, opline); + jit_FREE_OP(jit, opline->op2_type, opline->op2, op2_info, opline); + return 1; +} + +static int zend_jit_long_math_helper(zend_jit_ctx *jit, + const zend_op *opline, + uint8_t opcode, + uint8_t op1_type, + znode_op op1, + zend_jit_addr op1_addr, + uint32_t op1_info, + zend_ssa_range *op1_range, + uint8_t op2_type, + znode_op op2, + zend_jit_addr op2_addr, + uint32_t op2_info, + zend_ssa_range *op2_range, + uint32_t res_var, + zend_jit_addr res_addr, + uint32_t res_info, + uint32_t res_use_info, + int may_throw) +{ + ir_ref ref = IR_UNUSED; + ir_ref if_long1 = IR_UNUSED; + ir_ref if_long2 = IR_UNUSED; + bool same_ops = zend_jit_same_addr(op1_addr, op2_addr); + ir_refs *res_inputs; + + ir_refs_init(res_inputs, 2); + + if (op1_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-MAY_BE_LONG)) { + if_long1 = jit_if_Z_TYPE(jit, op1_addr, IS_LONG); + ir_IF_TRUE(if_long1); + } + if (!same_ops && (op2_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-MAY_BE_LONG))) { + if_long2 = jit_if_Z_TYPE(jit, op2_addr, IS_LONG); + ir_IF_TRUE(if_long2); + } + + if (opcode == ZEND_SL) { + if (Z_MODE(op2_addr) == IS_CONST_ZVAL) { + zend_long op2_lval = Z_LVAL_P(Z_ZV(op2_addr)); + + if (UNEXPECTED((zend_ulong)op2_lval >= SIZEOF_ZEND_LONG * 8)) { + if (EXPECTED(op2_lval > 0)) { + ref = ir_CONST_LONG(0); + } else { + zend_jit_invalidate_var_if_necessary(jit, op1_type, op1_addr, op1); + zend_jit_invalidate_var_if_necessary(jit, op2_type, op2_addr, op2); + jit_SET_EX_OPLINE(jit, opline); + ir_GUARD(IR_FALSE, jit_STUB_ADDR(jit, jit_stub_negative_shift)); + if (Z_MODE(res_addr) == IS_REG) { + zend_jit_def_reg(jit, res_addr, ir_CONST_LONG(0)); // dead code + } + } + } else { + ref = ir_SHL_L(jit_Z_LVAL(jit, op1_addr), ir_CONST_LONG(op2_lval)); + } + } else { + ref = jit_Z_LVAL(jit, op2_addr); + if (!op2_range || + op2_range->min < 0 || + op2_range->max >= SIZEOF_ZEND_LONG * 8) { + + ir_ref if_wrong, cold_path, ref2, if_ok; + ir_ref op1_ref = jit_Z_LVAL(jit, op1_addr); + + if_wrong = ir_IF(ir_UGT(ref, ir_CONST_LONG((SIZEOF_ZEND_LONG * 8) - 1))); + ir_IF_TRUE_cold(if_wrong); + if_ok = ir_IF(ir_GE(ref, ir_CONST_LONG(0))); + ir_IF_FALSE(if_ok); + jit_SET_EX_OPLINE(jit, opline); + zend_jit_invalidate_var_if_necessary(jit, op1_type, op1_addr, op1); + zend_jit_invalidate_var_if_necessary(jit, op2_type, op2_addr, op2); + ir_IJMP(jit_STUB_ADDR(jit, jit_stub_negative_shift)); + ir_IF_TRUE(if_ok); + ref2 = ir_CONST_LONG(0); + cold_path = ir_END(); + ir_IF_FALSE(if_wrong); + ref = ir_SHL_L(op1_ref, ref); + ir_MERGE_WITH(cold_path); + ref = ir_PHI_2(IR_LONG, ref, ref2); + } else { + ref = ir_SHL_L(jit_Z_LVAL(jit, op1_addr), ref); + } + } + } else if (opcode == ZEND_SR) { + if (Z_MODE(op2_addr) == IS_CONST_ZVAL) { + zend_long op2_lval = Z_LVAL_P(Z_ZV(op2_addr)); + + if (UNEXPECTED((zend_ulong)op2_lval >= SIZEOF_ZEND_LONG * 8)) { + if (EXPECTED(op2_lval > 0)) { + ref = ir_SAR_L( + jit_Z_LVAL(jit, op1_addr), + ir_CONST_LONG((SIZEOF_ZEND_LONG * 8) - 1)); + } else { + zend_jit_invalidate_var_if_necessary(jit, op1_type, op1_addr, op1); + zend_jit_invalidate_var_if_necessary(jit, op2_type, op2_addr, op2); + jit_SET_EX_OPLINE(jit, opline); + ir_GUARD(IR_FALSE, jit_STUB_ADDR(jit, jit_stub_negative_shift)); + if (Z_MODE(res_addr) == IS_REG) { + zend_jit_def_reg(jit, res_addr, ir_CONST_LONG(0)); // dead code + } + } + } else { + ref = ir_SAR_L(jit_Z_LVAL(jit, op1_addr), ir_CONST_LONG(op2_lval)); + } + } else { + ref = jit_Z_LVAL(jit, op2_addr); + if (!op2_range || + op2_range->min < 0 || + op2_range->max >= SIZEOF_ZEND_LONG * 8) { + + ir_ref if_wrong, cold_path, ref2, if_ok; + + if_wrong = ir_IF(ir_UGT(ref, ir_CONST_LONG((SIZEOF_ZEND_LONG * 8) - 1))); + ir_IF_TRUE_cold(if_wrong); + if_ok = ir_IF(ir_GE(ref, ir_CONST_LONG(0))); + ir_IF_FALSE(if_ok); + jit_SET_EX_OPLINE(jit, opline); + zend_jit_invalidate_var_if_necessary(jit, op1_type, op1_addr, op1); + zend_jit_invalidate_var_if_necessary(jit, op2_type, op2_addr, op2); + ir_IJMP(jit_STUB_ADDR(jit, jit_stub_negative_shift)); + ir_IF_TRUE(if_ok); + ref2 = ir_CONST_LONG((SIZEOF_ZEND_LONG * 8) - 1); + cold_path = ir_END(); + ir_IF_FALSE(if_wrong); + ir_MERGE_WITH(cold_path); + ref = ir_PHI_2(IR_LONG, ref, ref2); + } + ref = ir_SAR_L(jit_Z_LVAL(jit, op1_addr), ref); + } + } else if (opcode == ZEND_MOD) { + if (Z_MODE(op2_addr) == IS_CONST_ZVAL) { + zend_long op2_lval = Z_LVAL_P(Z_ZV(op2_addr)); + + if (op2_lval == 0) { + zend_jit_invalidate_var_if_necessary(jit, op1_type, op1_addr, op1); + zend_jit_invalidate_var_if_necessary(jit, op2_type, op2_addr, op2); + jit_SET_EX_OPLINE(jit, opline); + ir_GUARD(IR_FALSE, jit_STUB_ADDR(jit, jit_stub_mod_by_zero)); + if (Z_MODE(res_addr) == IS_REG) { + zend_jit_def_reg(jit, res_addr, ir_CONST_LONG(0)); // dead code + } + } else if (zend_long_is_power_of_two(op2_lval) && op1_range && op1_range->min >= 0) { + ref = ir_AND_L(jit_Z_LVAL(jit, op1_addr), ir_CONST_LONG(op2_lval - 1)); + } else { + ref = ir_MOD_L(jit_Z_LVAL(jit, op1_addr), ir_CONST_LONG(op2_lval)); + } + } else { + ir_ref zero_path = 0; + ir_ref op1_ref = jit_Z_LVAL(jit, op1_addr); + + ref = jit_Z_LVAL(jit, op2_addr); + if ((op2_type & (MAY_BE_UNDEF|MAY_BE_NULL|MAY_BE_FALSE)) || !op2_range || (op2_range->min <= 0 && op2_range->max >= 0)) { + ir_ref if_ok = ir_IF(ref); + ir_IF_FALSE(if_ok); + jit_SET_EX_OPLINE(jit, opline); + zend_jit_invalidate_var_if_necessary(jit, op1_type, op1_addr, op1); + zend_jit_invalidate_var_if_necessary(jit, op2_type, op2_addr, op2); + ir_IJMP(jit_STUB_ADDR(jit, jit_stub_mod_by_zero)); + ir_IF_TRUE(if_ok); + } + + /* Prevent overflow error/crash if op1 == LONG_MIN and op2 == -1 */ + if (!op2_range || (op2_range->min <= -1 && op2_range->max >= -1)) { + ir_ref if_minus_one = ir_IF(ir_EQ(ref, ir_CONST_LONG(-1))); + ir_IF_TRUE_cold(if_minus_one); + zero_path = ir_END(); + ir_IF_FALSE(if_minus_one); + } + ref = ir_MOD_L(op1_ref, ref); + + if (zero_path) { + ir_MERGE_WITH(zero_path); + ref = ir_PHI_2(IR_LONG, ref, ir_CONST_LONG(0)); + } + } + } else { + ir_op op; + ir_ref op1, op2; + + if (opcode == ZEND_BW_OR) { + op = IR_OR; + } else if (opcode == ZEND_BW_AND) { + op = IR_AND; + } else if (opcode == ZEND_BW_XOR) { + op = IR_XOR; + } else { + ZEND_UNREACHABLE(); + } + op1 = jit_Z_LVAL(jit, op1_addr); + op2 = (same_ops) ? op1 : jit_Z_LVAL(jit, op2_addr); + ref = ir_BINARY_OP_L(op, op1, op2); + } + + if (ref) { + if (Z_MODE(res_addr) == IS_REG + && ((op1_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-MAY_BE_LONG)) + || (op2_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-MAY_BE_LONG)))) { + jit->delay_var = Z_SSA_VAR(res_addr); + jit->delay_refs = res_inputs; + } + jit_set_Z_LVAL(jit, res_addr, ref); + if (Z_MODE(res_addr) != IS_REG) { + if (!zend_jit_same_addr(op1_addr, res_addr)) { + if ((res_use_info & (MAY_BE_ANY|MAY_BE_UNDEF|MAY_BE_REF|MAY_BE_GUARD)) != MAY_BE_LONG) { + jit_set_Z_TYPE_INFO(jit, res_addr, IS_LONG); + } + } + } + } + + if ((op1_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-MAY_BE_LONG)) || + (op2_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-MAY_BE_LONG))) { + ir_ref fast_path = ir_END(); + ir_ref func, arg1, arg2, arg3; + + if (if_long2 && if_long1) { + ir_ref ref; + ir_IF_FALSE_cold(if_long2); + ref = ir_END(); + ir_IF_FALSE_cold(if_long1); + ir_MERGE_2(ref, ir_END()); + } else if (if_long1) { + ir_IF_FALSE_cold(if_long1); + } else if (if_long2) { + ir_IF_FALSE_cold(if_long2); + } + + if (op1_info & MAY_BE_UNDEF) { + ir_ref if_def; + + if_def = jit_if_not_Z_TYPE(jit, op1_addr, IS_UNDEF); + ir_IF_FALSE_cold(if_def); + + // zend_error(E_WARNING, "Undefined variable $%s", ZSTR_VAL(CV_DEF_OF(EX_VAR_TO_NUM(opline->op1.var)))); + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_jit_undefined_op_helper), ir_CONST_U32(opline->op1.var)); + + jit_set_Z_TYPE_INFO(jit, op1_addr, IS_NULL); + ir_MERGE_WITH_EMPTY_TRUE(if_def); + } + + if (op2_info & MAY_BE_UNDEF) { + ir_ref if_def; + + if_def = jit_if_not_Z_TYPE(jit, op2_addr, IS_UNDEF); + ir_IF_FALSE_cold(if_def); + + // zend_error(E_WARNING, "Undefined variable $%s", ZSTR_VAL(CV_DEF_OF(EX_VAR_TO_NUM(opline->op2.var)))); + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_jit_undefined_op_helper), ir_CONST_U32(opline->op2.var)); + + jit_set_Z_TYPE_INFO(jit, op2_addr, IS_NULL); + ir_MERGE_WITH_EMPTY_TRUE(if_def); + } + + if (Z_MODE(op1_addr) == IS_REG) { + zend_jit_addr real_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, op1.var); + if (!zend_jit_spill_store(jit, op1_addr, real_addr, op1_info, 1)) { + return 0; + } + op1_addr = real_addr; + } + if (Z_MODE(op2_addr) == IS_REG) { + zend_jit_addr real_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, op2.var); + if (!zend_jit_spill_store(jit, op2_addr, real_addr, op2_info, 1)) { + return 0; + } + op2_addr = real_addr; + } + if (Z_MODE(res_addr) == IS_REG) { + arg1 = jit_ZVAL_ADDR(jit, ZEND_ADDR_MEM_ZVAL(ZREG_FP, res_var)); + } else { + arg1 = jit_ZVAL_ADDR(jit, res_addr); + } + arg2 = jit_ZVAL_ADDR(jit, op1_addr); + arg3 = jit_ZVAL_ADDR(jit, op2_addr); + jit_SET_EX_OPLINE(jit, opline); + if (opcode == ZEND_BW_OR) { + func = ir_CONST_FC_FUNC(bitwise_or_function); + } else if (opcode == ZEND_BW_AND) { + func = ir_CONST_FC_FUNC(bitwise_and_function); + } else if (opcode == ZEND_BW_XOR) { + func = ir_CONST_FC_FUNC(bitwise_xor_function); + } else if (opcode == ZEND_SL) { + func = ir_CONST_FC_FUNC(shift_left_function); + } else if (opcode == ZEND_SR) { + func = ir_CONST_FC_FUNC(shift_right_function); + } else if (opcode == ZEND_MOD) { + func = ir_CONST_FC_FUNC(mod_function); + } else { + ZEND_UNREACHABLE(); + } + ir_CALL_3(IR_VOID, func, arg1, arg2, arg3); + + if (op1_addr == res_addr && (op2_info & MAY_BE_RCN)) { + /* compound assignment may decrement "op2" refcount */ + op2_info |= MAY_BE_RC1; + } + + jit_FREE_OP(jit, op1_type, op1, op1_info, NULL); + jit_FREE_OP(jit, op2_type, op2, op2_info, NULL); + + if (may_throw) { + if (opline->opcode == ZEND_ASSIGN_DIM_OP && (opline->op2_type & (IS_VAR|IS_TMP_VAR))) { + ir_GUARD_NOT(ir_LOAD_A(jit_EG_exception(jit)), + jit_STUB_ADDR(jit, jit_stub_exception_handler_free_op2)); + } else if (Z_MODE(res_addr) == IS_MEM_ZVAL && Z_REG(res_addr) == ZREG_RX) { + zend_jit_check_exception_undef_result(jit, opline); + } else { + zend_jit_check_exception(jit); + } + } + + if (Z_MODE(res_addr) == IS_REG) { + zend_jit_addr real_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, res_var); + if (!zend_jit_load_reg(jit, real_addr, res_addr, res_info)) { + return 0; + } + } + + ir_MERGE_2(fast_path, ir_END()); + + if (Z_MODE(res_addr) == IS_REG) { + ZEND_ASSERT(jit->delay_refs == res_inputs); + ZEND_ASSERT(res_inputs->count == 2); + jit->delay_var = -1; + jit->delay_refs = NULL; + if (res_inputs->count == 1) { + zend_jit_def_reg(jit, res_addr, res_inputs->refs[0]); + } else { + ir_ref phi = ir_PHI_N(IR_LONG, res_inputs->count, res_inputs->refs); + zend_jit_def_reg(jit, res_addr, phi); + } + } + } + + return 1; +} + +static int zend_jit_long_math(zend_jit_ctx *jit, const zend_op *opline, uint32_t op1_info, zend_ssa_range *op1_range, zend_jit_addr op1_addr, uint32_t op2_info, zend_ssa_range *op2_range, zend_jit_addr op2_addr, uint32_t res_use_info, uint32_t res_info, zend_jit_addr res_addr, int may_throw) +{ + ZEND_ASSERT((op1_info & MAY_BE_LONG) && (op2_info & MAY_BE_LONG)); + + if (!zend_jit_long_math_helper(jit, opline, opline->opcode, + opline->op1_type, opline->op1, op1_addr, op1_info, op1_range, + opline->op2_type, opline->op2, op2_addr, op2_info, op2_range, + opline->result.var, res_addr, res_info, res_use_info, may_throw)) { + return 0; + } + if (!zend_jit_store_var_if_necessary(jit, opline->result.var, res_addr, res_info)) { + return 0; + } + return 1; +} + +static int zend_jit_concat_helper(zend_jit_ctx *jit, + const zend_op *opline, + uint8_t op1_type, + znode_op op1, + zend_jit_addr op1_addr, + uint32_t op1_info, + uint8_t op2_type, + znode_op op2, + zend_jit_addr op2_addr, + uint32_t op2_info, + zend_jit_addr res_addr, + int may_throw) +{ + ir_ref if_op1_string = IR_UNUSED; + ir_ref if_op2_string = IR_UNUSED; + ir_ref fast_path = IR_UNUSED; + + if ((op1_info & MAY_BE_STRING) && (op2_info & MAY_BE_STRING)) { + if (op1_info & ((MAY_BE_UNDEF|MAY_BE_ANY|MAY_BE_REF) - MAY_BE_STRING)) { + if_op1_string = jit_if_Z_TYPE(jit, op1_addr, IS_STRING); + ir_IF_TRUE(if_op1_string); + } + if (op2_info & ((MAY_BE_UNDEF|MAY_BE_ANY|MAY_BE_REF) - MAY_BE_STRING)) { + if_op2_string = jit_if_Z_TYPE(jit, op2_addr, IS_STRING); + ir_IF_TRUE(if_op2_string); + } + if (zend_jit_same_addr(op1_addr, res_addr)) { + ir_ref arg1 = jit_ZVAL_ADDR(jit, res_addr); + ir_ref arg2 = jit_ZVAL_ADDR(jit, op2_addr); + + ir_CALL_2(IR_VOID, ir_CONST_FC_FUNC(zend_jit_fast_assign_concat_helper), arg1, arg2); + /* concatenation with itself may reduce refcount */ + op2_info |= MAY_BE_RC1; + } else { + ir_ref arg1 = jit_ZVAL_ADDR(jit, res_addr); + ir_ref arg2 = jit_ZVAL_ADDR(jit, op1_addr); + ir_ref arg3 = jit_ZVAL_ADDR(jit, op2_addr); + + if (op1_type == IS_CV || op1_type == IS_CONST) { + ir_CALL_3(IR_VOID, ir_CONST_FC_FUNC(zend_jit_fast_concat_helper), arg1, arg2, arg3); + } else { + ir_CALL_3(IR_VOID, ir_CONST_FC_FUNC(zend_jit_fast_concat_tmp_helper), arg1, arg2, arg3); + } + } + /* concatenation with empty string may increase refcount */ + op2_info |= MAY_BE_RCN; + jit_FREE_OP(jit, op2_type, op2, op2_info, opline); + if (if_op1_string || if_op2_string) { + fast_path = ir_END(); + } + } + if ((op1_info & ((MAY_BE_UNDEF|MAY_BE_ANY|MAY_BE_REF) - MAY_BE_STRING)) || + (op2_info & ((MAY_BE_UNDEF|MAY_BE_ANY|MAY_BE_REF) - MAY_BE_STRING))) { + if ((op1_info & MAY_BE_STRING) && (op2_info & MAY_BE_STRING)) { + if (if_op1_string && if_op2_string) { + ir_IF_FALSE(if_op1_string); + ir_MERGE_WITH_EMPTY_FALSE(if_op2_string); + } else if (if_op1_string) { + ir_IF_FALSE_cold(if_op1_string); + } else if (if_op2_string) { + ir_IF_FALSE_cold(if_op2_string); + } + } + ir_ref arg1 = jit_ZVAL_ADDR(jit, res_addr); + ir_ref arg2 = jit_ZVAL_ADDR(jit, op1_addr); + ir_ref arg3 = jit_ZVAL_ADDR(jit, op2_addr); + + jit_SET_EX_OPLINE(jit, opline); + ir_CALL_3(IR_VOID, ir_CONST_FC_FUNC(concat_function), arg1, arg2, arg3); + /* concatenation with empty string may increase refcount */ + op1_info |= MAY_BE_RCN; + op2_info |= MAY_BE_RCN; + jit_FREE_OP(jit, op1_type, op1, op1_info, NULL); + jit_FREE_OP(jit, op2_type, op2, op2_info, NULL); + if (may_throw) { + if (opline->opcode == ZEND_ASSIGN_DIM_OP && (opline->op2_type & (IS_VAR|IS_TMP_VAR))) { + ir_GUARD_NOT(ir_LOAD_A(jit_EG_exception(jit)), + jit_STUB_ADDR(jit, jit_stub_exception_handler_free_op2)); + } else if (Z_MODE(res_addr) == IS_MEM_ZVAL && Z_REG(res_addr) == ZREG_RX) { + zend_jit_check_exception_undef_result(jit, opline); + } else { + zend_jit_check_exception(jit); + } + } + if ((op1_info & MAY_BE_STRING) && (op2_info & MAY_BE_STRING)) { + ir_MERGE_WITH(fast_path); + } + } + return 1; +} + +static int zend_jit_concat(zend_jit_ctx *jit, const zend_op *opline, uint32_t op1_info, uint32_t op2_info, zend_jit_addr res_addr, int may_throw) +{ + zend_jit_addr op1_addr, op2_addr; + + ZEND_ASSERT(!(op1_info & MAY_BE_UNDEF) && !(op2_info & MAY_BE_UNDEF)); + ZEND_ASSERT((op1_info & MAY_BE_STRING) && (op2_info & MAY_BE_STRING)); + + op1_addr = OP1_ADDR(); + op2_addr = OP2_ADDR(); + + return zend_jit_concat_helper(jit, opline, opline->op1_type, opline->op1, op1_addr, op1_info, opline->op2_type, opline->op2, op2_addr, op2_info, res_addr, may_throw); +} + +static int zend_jit_assign_op(zend_jit_ctx *jit, const zend_op *opline, uint32_t op1_info, uint32_t op1_def_info, zend_ssa_range *op1_range, uint32_t op2_info, zend_ssa_range *op2_range, int may_overflow, int may_throw) +{ + int result = 1; + zend_jit_addr op1_addr, op2_addr; + ir_ref slow_path = IR_UNUSED; + + + ZEND_ASSERT(opline->op1_type == IS_CV && opline->result_type == IS_UNUSED); + ZEND_ASSERT(!(op1_info & MAY_BE_UNDEF) && !(op2_info & MAY_BE_UNDEF)); + + op1_addr = OP1_ADDR(); + op2_addr = OP2_ADDR(); + + if (op1_info & MAY_BE_REF) { + ir_ref ref, ref2, arg2, op1_noref_path; + ir_ref if_op1_ref = IR_UNUSED; + ir_ref if_op1_typed = IR_UNUSED; + binary_op_type binary_op = get_binary_op(opline->extended_value); + + ref = jit_ZVAL_ADDR(jit, op1_addr); + if_op1_ref = jit_if_Z_TYPE_ref(jit, ref, ir_CONST_U8(IS_REFERENCE)); + ir_IF_FALSE(if_op1_ref); + op1_noref_path = ir_END(); + ir_IF_TRUE(if_op1_ref); + ref2 = jit_Z_PTR_ref(jit, ref); + + if_op1_typed = jit_if_TYPED_REF(jit, ref2); + ir_IF_TRUE_cold(if_op1_typed); + + arg2 = jit_ZVAL_ADDR(jit, op2_addr); + jit_SET_EX_OPLINE(jit, opline); + if ((opline->op2_type & (IS_TMP_VAR|IS_VAR)) + && (op2_info & (MAY_BE_STRING|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE))) { + ir_CALL_3(IR_VOID, ir_CONST_FC_FUNC(zend_jit_assign_op_to_typed_ref_tmp), + ref2, arg2, ir_CONST_FC_FUNC(binary_op)); + } else { + ir_CALL_3(IR_VOID, ir_CONST_FC_FUNC(zend_jit_assign_op_to_typed_ref), + ref2, arg2, ir_CONST_FC_FUNC(binary_op)); + } + zend_jit_check_exception(jit); + slow_path = ir_END(); + + ir_IF_FALSE(if_op1_typed); + ref2 = ir_ADD_OFFSET(ref2, offsetof(zend_reference, val)); + + ir_MERGE_WITH(op1_noref_path); + ref = ir_PHI_2(IR_ADDR, ref2, ref); + op1_addr = ZEND_ADDR_REF_ZVAL(ref); + } + + switch (opline->extended_value) { + case ZEND_ADD: + case ZEND_SUB: + case ZEND_MUL: + case ZEND_DIV: + result = zend_jit_math_helper(jit, opline, opline->extended_value, opline->op1_type, opline->op1, op1_addr, op1_info, opline->op2_type, opline->op2, op2_addr, op2_info, opline->op1.var, op1_addr, op1_def_info, op1_info, may_overflow, may_throw); + break; + case ZEND_BW_OR: + case ZEND_BW_AND: + case ZEND_BW_XOR: + case ZEND_SL: + case ZEND_SR: + case ZEND_MOD: + result = zend_jit_long_math_helper(jit, opline, opline->extended_value, + opline->op1_type, opline->op1, op1_addr, op1_info, op1_range, + opline->op2_type, opline->op2, op2_addr, op2_info, op2_range, + opline->op1.var, op1_addr, op1_def_info, op1_info, may_throw); + break; + case ZEND_CONCAT: + result = zend_jit_concat_helper(jit, opline, opline->op1_type, opline->op1, op1_addr, op1_info, opline->op2_type, opline->op2, op2_addr, op2_info, op1_addr, may_throw); + break; + default: + ZEND_UNREACHABLE(); + } + + if (op1_info & MAY_BE_REF) { + ir_MERGE_WITH(slow_path); + } + + return result; +} + +static ir_ref jit_ZVAL_DEREF_ref(zend_jit_ctx *jit, ir_ref ref) +{ + ir_ref if_ref, ref2; + + if_ref = ir_IF(ir_EQ(jit_Z_TYPE_ref(jit, ref), ir_CONST_U8(IS_REFERENCE))); + ir_IF_TRUE(if_ref); + ref2 = ir_ADD_OFFSET(jit_Z_PTR_ref(jit, ref), offsetof(zend_reference, val)); + ir_MERGE_WITH_EMPTY_FALSE(if_ref); + return ir_PHI_2(IR_ADDR, ref2, ref); +} + +static zend_jit_addr jit_ZVAL_DEREF(zend_jit_ctx *jit, zend_jit_addr addr) +{ + ir_ref ref = jit_ZVAL_ADDR(jit, addr); + ref = jit_ZVAL_DEREF_ref(jit, ref); + return ZEND_ADDR_REF_ZVAL(ref); +} + +static ir_ref jit_ZVAL_INDIRECT_DEREF_ref(zend_jit_ctx *jit, ir_ref ref) +{ + ir_ref if_ref, ref2; + + if_ref = ir_IF(ir_EQ(jit_Z_TYPE_ref(jit, ref), ir_CONST_U8(IS_INDIRECT))); + ir_IF_TRUE(if_ref); + ref2 = jit_Z_PTR_ref(jit, ref); + ir_MERGE_WITH_EMPTY_FALSE(if_ref); + return ir_PHI_2(IR_ADDR, ref2, ref); +} + +static zend_jit_addr jit_ZVAL_INDIRECT_DEREF(zend_jit_ctx *jit, zend_jit_addr addr) +{ + ir_ref ref = jit_ZVAL_ADDR(jit, addr); + ref = jit_ZVAL_INDIRECT_DEREF_ref(jit, ref); + return ZEND_ADDR_REF_ZVAL(ref); +} + +static int zend_jit_simple_assign(zend_jit_ctx *jit, + const zend_op *opline, + zend_jit_addr var_addr, + uint32_t var_info, + uint32_t var_def_info, + uint8_t val_type, + zend_jit_addr val_addr, + uint32_t val_info, + zend_jit_addr res_addr, + bool check_exception) +{ + ir_ref end_inputs = IR_UNUSED; + + if (Z_MODE(val_addr) == IS_CONST_ZVAL) { + zval *zv = Z_ZV(val_addr); + + if (!res_addr) { + jit_ZVAL_COPY_CONST(jit, + var_addr, + var_info, var_def_info, + zv, 1); + } else { + jit_ZVAL_COPY_CONST(jit, + var_addr, + var_info, var_def_info, + zv, 1); + jit_ZVAL_COPY_CONST(jit, + res_addr, + -1, var_def_info, + zv, 1); + } + } else { + if (val_info & MAY_BE_UNDEF) { + ir_ref if_def, ret; + + if_def = jit_if_not_Z_TYPE(jit, val_addr, IS_UNDEF); + ir_IF_FALSE_cold(if_def); + + jit_set_Z_TYPE_INFO(jit, var_addr, IS_NULL); + if (res_addr) { + jit_set_Z_TYPE_INFO(jit, res_addr, IS_NULL); + } + jit_SET_EX_OPLINE(jit, opline); + + ZEND_ASSERT(Z_MODE(val_addr) == IS_MEM_ZVAL); + // zend_error(E_WARNING, "Undefined variable $%s", ZSTR_VAL(CV_DEF_OF(EX_VAR_TO_NUM(opline->op1.var)))); + ret = ir_CALL_1(IR_I32, ir_CONST_FC_FUNC(zend_jit_undefined_op_helper), ir_CONST_U32(Z_OFFSET(val_addr))); + + if (check_exception) { + ir_GUARD(ret, jit_STUB_ADDR(jit, jit_stub_exception_handler_undef)); + } + + ir_END_list(end_inputs); + ir_IF_TRUE(if_def); + } + if (val_info & MAY_BE_REF) { + if (val_type == IS_CV) { + ir_ref ref = jit_ZVAL_ADDR(jit, val_addr); + ref = jit_ZVAL_DEREF_ref(jit, ref); + val_addr = ZEND_ADDR_REF_ZVAL(ref); + } else { + ir_ref ref, type, if_ref, ref2, refcount, if_not_zero; + + ref = jit_ZVAL_ADDR(jit, val_addr); + type = jit_Z_TYPE_ref(jit, ref); + if_ref = ir_IF(ir_EQ(type, ir_CONST_U8(IS_REFERENCE))); + + ir_IF_TRUE_cold(if_ref); + ref = jit_Z_PTR_ref(jit, ref); + ref2 = ir_ADD_OFFSET(ref, offsetof(zend_reference, val)); + if (!res_addr) { + jit_ZVAL_COPY(jit, + var_addr, + var_info, + ZEND_ADDR_REF_ZVAL(ref2), val_info, 1); + } else { + jit_ZVAL_COPY_2(jit, + res_addr, + var_addr, + var_info, + ZEND_ADDR_REF_ZVAL(ref2), val_info, 2); + } + + refcount = jit_GC_DELREF(jit, ref); + if_not_zero = ir_IF(refcount); + ir_IF_FALSE(if_not_zero); + // TODO: instead of dtor() call and ADDREF above, we may call efree() and move addref at "true" path ??? + // This is related to GH-10168 (keep this before GH-10168 is completely closed) + // jit_EFREE(jit, ref, sizeof(zend_reference), NULL, NULL); + jit_ZVAL_DTOR(jit, ref, val_info, opline); + ir_END_list(end_inputs); + ir_IF_TRUE(if_not_zero); + ir_END_list(end_inputs); + + ir_IF_FALSE(if_ref); + } + } + + if (!res_addr) { + jit_ZVAL_COPY(jit, + var_addr, + var_info, + val_addr, val_info, val_type == IS_CV); + } else { + jit_ZVAL_COPY_2(jit, + res_addr, + var_addr, + var_info, + val_addr, val_info, val_type == IS_CV ? 2 : 1); + } + } + + if (end_inputs) { + ir_END_list(end_inputs); + ir_MERGE_list(end_inputs); + } + + return 1; +} + +static int zend_jit_assign_to_variable_call(zend_jit_ctx *jit, + const zend_op *opline, + zend_jit_addr __var_use_addr, + zend_jit_addr var_addr, + uint32_t __var_info, + uint32_t __var_def_info, + uint8_t val_type, + zend_jit_addr val_addr, + uint32_t val_info, + zend_jit_addr __res_addr, + bool __check_exception) +{ + jit_stub_id func; + ir_ref undef_path = IR_UNUSED; + + if (val_info & MAY_BE_UNDEF) { + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE) { + int32_t exit_point = zend_jit_trace_get_exit_point(opline, ZEND_JIT_EXIT_TO_VM); + const void *exit_addr = zend_jit_trace_get_exit_addr(exit_point); + + if (!exit_addr) { + return 0; + } + + jit_guard_not_Z_TYPE(jit, val_addr, IS_UNDEF, exit_addr); + } else { + ir_ref if_def; + + ZEND_ASSERT(Z_MODE(val_addr) == IS_MEM_ZVAL && Z_REG(val_addr) == ZREG_FP); + if_def = ir_IF(jit_Z_TYPE(jit, val_addr)); + ir_IF_FALSE_cold(if_def); + jit_SET_EX_OPLINE(jit, opline); + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_jit_undefined_op_helper), ir_CONST_U32(Z_OFFSET(val_addr))); + + ir_CALL_2(IR_VOID, jit_STUB_FUNC_ADDR(jit, jit_stub_assign_const, IR_CONST_FASTCALL_FUNC), + jit_ZVAL_ADDR(jit, var_addr), + jit_EG(uninitialized_zval)); + + undef_path = ir_END(); + ir_IF_TRUE(if_def); + } + } + + if (!(val_info & (MAY_BE_STRING|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE|MAY_BE_REF))) { + func = jit_stub_assign_tmp; + } else if (val_type == IS_CONST) { + func = jit_stub_assign_const; + } else if (val_type == IS_TMP_VAR) { + func = jit_stub_assign_tmp; + } else if (val_type == IS_VAR) { + if (!(val_info & MAY_BE_REF)) { + func = jit_stub_assign_tmp; + } else { + func = jit_stub_assign_var; + } + } else if (val_type == IS_CV) { + if (!(val_info & MAY_BE_REF)) { + func = jit_stub_assign_cv_noref; + } else { + func = jit_stub_assign_cv; + } + } else { + ZEND_UNREACHABLE(); + } + + if (opline) { + jit_SET_EX_OPLINE(jit, opline); + } + + ir_CALL_2(IR_VOID, jit_STUB_FUNC_ADDR(jit, func, IR_CONST_FASTCALL_FUNC), + jit_ZVAL_ADDR(jit, var_addr), + jit_ZVAL_ADDR(jit, val_addr)); + + if (undef_path) { + ir_MERGE_WITH(undef_path); + } + + return 1; +} + +static int zend_jit_assign_to_variable(zend_jit_ctx *jit, + const zend_op *opline, + zend_jit_addr var_use_addr, + zend_jit_addr var_addr, + uint32_t var_info, + uint32_t var_def_info, + uint8_t val_type, + zend_jit_addr val_addr, + uint32_t val_info, + zend_jit_addr res_addr, + zend_jit_addr ref_addr, + bool check_exception) +{ + ir_ref if_refcounted = IR_UNUSED; + ir_ref simple_inputs = IR_UNUSED; + bool done = 0; + zend_jit_addr real_res_addr = 0; + ir_refs *end_inputs; + ir_refs *res_inputs; + + ir_refs_init(end_inputs, 6); + ir_refs_init(res_inputs, 6); + + if (Z_MODE(val_addr) == IS_REG && jit->ra[Z_SSA_VAR(val_addr)].ref == IR_NULL) { + /* Force load */ + zend_jit_use_reg(jit, val_addr); + } + + if (Z_MODE(var_addr) == IS_REG) { + jit->delay_var = Z_SSA_VAR(var_addr); + jit->delay_refs = res_inputs; + if (Z_MODE(res_addr) == IS_REG) { + real_res_addr = res_addr; + res_addr = 0; + } + } else if (Z_MODE(res_addr) == IS_REG) { + jit->delay_var = Z_SSA_VAR(res_addr); + jit->delay_refs = res_inputs; + } + + if ((var_info & MAY_BE_REF) || ref_addr) { + ir_ref ref = 0, if_ref = 0, ref2, arg2, if_typed, non_ref_path; + uintptr_t func; + + if (!ref_addr) { + ref = jit_ZVAL_ADDR(jit, var_use_addr); + if_ref = jit_if_Z_TYPE_ref(jit, ref, ir_CONST_U8(IS_REFERENCE)); + ir_IF_TRUE(if_ref); + ref2 = jit_Z_PTR_ref(jit, ref); + } else { + ref2 = jit_ZVAL_ADDR(jit, ref_addr); + } + if_typed = jit_if_TYPED_REF(jit, ref2); + ir_IF_TRUE_cold(if_typed); + jit_SET_EX_OPLINE(jit, opline); + if (Z_MODE(val_addr) == IS_REG) { + ZEND_ASSERT(opline->opcode == ZEND_ASSIGN); + zend_jit_addr real_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, opline->op2.var); + if (!zend_jit_spill_store(jit, val_addr, real_addr, val_info, 1)) { + return 0; + } + arg2 = jit_ZVAL_ADDR(jit, real_addr); + } else { + arg2 = jit_ZVAL_ADDR(jit, val_addr); + } + if (!res_addr) { + if (val_type == IS_CONST) { + func = (uintptr_t)zend_jit_assign_const_to_typed_ref; + } else if (val_type == IS_TMP_VAR) { + func = (uintptr_t)zend_jit_assign_tmp_to_typed_ref; + } else if (val_type == IS_VAR) { + func = (uintptr_t)zend_jit_assign_var_to_typed_ref; + } else if (val_type == IS_CV) { + func = (uintptr_t)zend_jit_assign_cv_to_typed_ref; + } else { + ZEND_UNREACHABLE(); + } + ir_CALL_2(IR_ADDR, ir_CONST_FC_FUNC(func), ref2, arg2); + } else { + if (val_type == IS_CONST) { + func = (uintptr_t)zend_jit_assign_const_to_typed_ref2; + } else if (val_type == IS_TMP_VAR) { + func = (uintptr_t)zend_jit_assign_tmp_to_typed_ref2; + } else if (val_type == IS_VAR) { + func = (uintptr_t)zend_jit_assign_var_to_typed_ref2; + } else if (val_type == IS_CV) { + func = (uintptr_t)zend_jit_assign_cv_to_typed_ref2; + } else { + ZEND_UNREACHABLE(); + } + ir_CALL_3(IR_ADDR, ir_CONST_FC_FUNC(func), ref2, arg2, jit_ZVAL_ADDR(jit, res_addr)); + } + if (check_exception) { + zend_jit_check_exception(jit); + } + ir_refs_add(end_inputs, ir_END()); + + if (!ref_addr) { + ir_IF_FALSE(if_ref); + non_ref_path = ir_END(); + ir_IF_FALSE(if_typed); + ref2 = ir_ADD_OFFSET(ref2, offsetof(zend_reference, val)); + ir_MERGE_WITH(non_ref_path); + ref = ir_PHI_2(IR_ADDR, ref2, ref); + var_addr = var_use_addr = ZEND_ADDR_REF_ZVAL(ref); + } else { + ir_IF_FALSE(if_typed); + } + } + + if (var_info & (MAY_BE_STRING|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE)) { + ir_ref ref, counter, if_not_zero; + + if (var_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-(MAY_BE_OBJECT|MAY_BE_RESOURCE))) { + if_refcounted = jit_if_REFCOUNTED(jit, var_use_addr); + ir_IF_FALSE(if_refcounted); + ir_END_list(simple_inputs); + ir_IF_TRUE_cold(if_refcounted); + } else if (RC_MAY_BE_1(var_info)) { + done = 1; + } + ref = jit_Z_PTR(jit, var_use_addr); + if (RC_MAY_BE_1(var_info)) { + if (!zend_jit_simple_assign(jit, opline, var_addr, var_info, var_def_info, val_type, val_addr, val_info, res_addr, 0)) { + return 0; + } + counter = jit_GC_DELREF(jit, ref); + + if_not_zero = ir_IF(counter); + ir_IF_FALSE(if_not_zero); + jit_ZVAL_DTOR(jit, ref, var_info, opline); + if (check_exception) { + zend_jit_check_exception(jit); + } + ir_refs_add(end_inputs, ir_END()); + ir_IF_TRUE(if_not_zero); + if (RC_MAY_BE_N(var_info) && (var_info & (MAY_BE_ARRAY|MAY_BE_OBJECT)) != 0) { + ir_ref if_may_leak = jit_if_GC_MAY_NOT_LEAK(jit, ref); + ir_IF_FALSE(if_may_leak); + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(gc_possible_root), ref); + + if (Z_MODE(var_addr) == IS_REG || Z_MODE(res_addr) == IS_REG) { + ZEND_ASSERT(jit->delay_refs == res_inputs); + ZEND_ASSERT(res_inputs->count > 0); + ir_refs_add(res_inputs, res_inputs->refs[res_inputs->count - 1]); + } + if (check_exception && (val_info & MAY_BE_UNDEF)) { + zend_jit_check_exception(jit); + } + ir_refs_add(end_inputs, ir_END()); + ir_IF_TRUE(if_may_leak); + } + if (Z_MODE(var_addr) == IS_REG || Z_MODE(res_addr) == IS_REG) { + ZEND_ASSERT(jit->delay_refs == res_inputs); + ZEND_ASSERT(res_inputs->count > 0); + ir_refs_add(res_inputs, res_inputs->refs[res_inputs->count - 1]); + } + if (check_exception && (val_info & MAY_BE_UNDEF)) { + zend_jit_check_exception(jit); + } + ir_refs_add(end_inputs, ir_END()); + } else /* if (RC_MAY_BE_N(var_info)) */ { + jit_GC_DELREF(jit, ref); + if (var_info & (MAY_BE_ARRAY|MAY_BE_OBJECT)) { + ir_ref if_may_leak = jit_if_GC_MAY_NOT_LEAK(jit, ref); + ir_IF_FALSE(if_may_leak); + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(gc_possible_root), ref); + ir_END_list(simple_inputs); + ir_IF_TRUE(if_may_leak); + } + ir_END_list(simple_inputs); + } + } + + if (simple_inputs) { + ir_MERGE_list(simple_inputs); + } + + if (!done) { + if (!zend_jit_simple_assign(jit, opline, var_addr, var_info, var_def_info, val_type, val_addr, val_info, res_addr, check_exception)) { + return 0; + } + if (end_inputs->count) { + ir_refs_add(end_inputs, ir_END()); + } + } + + if (end_inputs->count) { + ir_MERGE_N(end_inputs->count, end_inputs->refs); + } + + if (Z_MODE(var_addr) == IS_REG || Z_MODE(res_addr) == IS_REG) { + ir_ref phi; + + ZEND_ASSERT(jit->delay_refs == res_inputs); + ZEND_ASSERT(end_inputs->count == res_inputs->count || (end_inputs->count == 0 && res_inputs->count == 1)); + jit->delay_var = -1; + jit->delay_refs = NULL; + if (res_inputs->count == 1) { + phi = res_inputs->refs[0]; + } else { + phi = ir_PHI_N((var_def_info & MAY_BE_LONG & MAY_BE_LONG) ? IR_LONG : IR_DOUBLE, + res_inputs->count, res_inputs->refs); + } + if (Z_MODE(var_addr) == IS_REG) { + if ((var_info & (MAY_BE_REF|MAY_BE_STRING|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE)) || ref_addr) { + phi = ir_emit2(&jit->ctx, IR_OPT(IR_COPY, jit->ctx.ir_base[phi].type), phi, 1); + } + zend_jit_def_reg(jit, var_addr, phi); + if (real_res_addr) { + if (var_def_info & MAY_BE_LONG) { + jit_set_Z_LVAL(jit, real_res_addr, jit_Z_LVAL(jit, var_addr)); + } else { + jit_set_Z_DVAL(jit, real_res_addr, jit_Z_DVAL(jit, var_addr)); + } + } + } else { + zend_jit_def_reg(jit, res_addr, phi); + } + } + + return 1; +} + +static int zend_jit_qm_assign(zend_jit_ctx *jit, const zend_op *opline, uint32_t op1_info, zend_jit_addr op1_addr, zend_jit_addr op1_def_addr, uint32_t res_use_info, uint32_t res_info, zend_jit_addr res_addr) +{ + if (op1_addr != op1_def_addr) { + if (!zend_jit_update_regs(jit, opline->op1.var, op1_addr, op1_def_addr, op1_info)) { + return 0; + } + if (Z_MODE(op1_def_addr) == IS_REG && Z_MODE(op1_addr) != IS_REG) { + op1_addr = op1_def_addr; + } + } + + if (!zend_jit_simple_assign(jit, opline, res_addr, res_use_info, res_info, opline->op1_type, op1_addr, op1_info, 0, 1)) { + return 0; + } + if (!zend_jit_store_var_if_necessary(jit, opline->result.var, res_addr, res_info)) { + return 0; + } + return 1; +} + +static int zend_jit_assign(zend_jit_ctx *jit, + const zend_op *opline, + uint32_t op1_info, + zend_jit_addr op1_use_addr, + uint32_t op1_def_info, + zend_jit_addr op1_addr, + uint32_t op2_info, + zend_jit_addr op2_addr, + zend_jit_addr op2_def_addr, + uint32_t res_info, + zend_jit_addr res_addr, + zend_jit_addr ref_addr, + int may_throw) +{ + ZEND_ASSERT(opline->op1_type == IS_CV); + + if (op2_addr != op2_def_addr) { + if (!zend_jit_update_regs(jit, opline->op2.var, op2_addr, op2_def_addr, op2_info)) { + return 0; + } + if (Z_MODE(op2_def_addr) == IS_REG && Z_MODE(op2_addr) != IS_REG) { + op2_addr = op2_def_addr; + } + } + + if (Z_MODE(op1_addr) != IS_REG + && Z_MODE(op1_use_addr) == IS_REG + && !Z_LOAD(op1_use_addr) + && !Z_STORE(op1_use_addr)) { + /* Force type update */ + op1_info |= MAY_BE_UNDEF; + } + if (!zend_jit_assign_to_variable(jit, opline, op1_use_addr, op1_addr, op1_info, op1_def_info, + opline->op2_type, op2_addr, op2_info, res_addr, ref_addr, may_throw)) { + return 0; + } + if (Z_MODE(op1_addr) == IS_REG) { + if (Z_STORE(op1_addr)) { + if (!zend_jit_store_var_if_necessary_ex(jit, opline->op1.var, op1_addr, op1_def_info, op1_use_addr, op1_info)) { + return 0; + } + } else if ((op1_info & (MAY_BE_STRING|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE)) + && Z_MODE(op1_use_addr) == IS_MEM_ZVAL + && Z_REG(op1_use_addr) == ZREG_FP + && EX_VAR_TO_NUM(Z_OFFSET(op1_use_addr)) < jit->current_op_array->last_var) { + /* We have to update type of CV because it may be captured by exception backtrace or released on RETURN */ + if ((op1_def_info & MAY_BE_ANY) == MAY_BE_LONG) { + jit_set_Z_TYPE_INFO(jit, op1_use_addr, IS_LONG); + } else if ((op1_def_info & MAY_BE_ANY) == MAY_BE_DOUBLE) { + jit_set_Z_TYPE_INFO(jit, op1_use_addr, IS_DOUBLE); + } else { + ZEND_UNREACHABLE(); + } + } + } + if (opline->result_type != IS_UNUSED) { + if (!zend_jit_store_var_if_necessary(jit, opline->result.var, res_addr, res_info)) { + return 0; + } + } + + return 1; +} + +static ir_op zend_jit_cmp_op(const zend_op *opline) +{ + ir_op op; + + switch (opline->opcode) { + case ZEND_IS_EQUAL: + case ZEND_IS_IDENTICAL: + case ZEND_CASE: + case ZEND_CASE_STRICT: + op = IR_EQ; + break; + case ZEND_IS_NOT_EQUAL: + case ZEND_IS_NOT_IDENTICAL: + op = IR_NE; + break; + case ZEND_IS_SMALLER: + op = IR_LT; + break; + case ZEND_IS_SMALLER_OR_EQUAL: + op = IR_LE; + break; + default: + ZEND_UNREACHABLE(); + } + return op; +} + +static ir_ref zend_jit_cmp_long_long(zend_jit_ctx *jit, + const zend_op *opline, + zend_ssa_range *op1_range, + zend_jit_addr op1_addr, + zend_ssa_range *op2_range, + zend_jit_addr op2_addr, + zend_jit_addr res_addr, + uint8_t smart_branch_opcode, + uint32_t target_label, + uint32_t target_label2, + const void *exit_addr, + bool skip_comparison) +{ + ir_ref ref; + bool result; + + if (zend_jit_is_constant_cmp_long_long(opline, op1_range, op1_addr, op2_range, op2_addr, &result)) { + if (!smart_branch_opcode || + smart_branch_opcode == ZEND_JMPZ_EX || + smart_branch_opcode == ZEND_JMPNZ_EX) { + jit_set_Z_TYPE_INFO(jit, res_addr, result ? IS_TRUE : IS_FALSE); + } + if (smart_branch_opcode && !exit_addr) { + if (smart_branch_opcode == ZEND_JMPZ || + smart_branch_opcode == ZEND_JMPZ_EX) { + return jit_IF_ex(jit, IR_FALSE, result ? target_label : target_label2); + } else if (smart_branch_opcode == ZEND_JMPNZ || + smart_branch_opcode == ZEND_JMPNZ_EX) { + return jit_IF_ex(jit, IR_TRUE, result ? target_label : target_label2); + } else { + ZEND_UNREACHABLE(); + } + } + if (opline->opcode != ZEND_IS_IDENTICAL + && opline->opcode != ZEND_IS_NOT_IDENTICAL + && opline->opcode != ZEND_CASE_STRICT) { + return ir_END(); + } else { + return IR_NULL; /* success */ + } + } + + ref = ir_CMP_OP(zend_jit_cmp_op(opline), jit_Z_LVAL(jit, op1_addr), jit_Z_LVAL(jit, op2_addr)); + + if (!smart_branch_opcode || smart_branch_opcode == ZEND_JMPNZ_EX || smart_branch_opcode == ZEND_JMPZ_EX) { + jit_set_Z_TYPE_INFO_ref(jit, jit_ZVAL_ADDR(jit, res_addr), + ir_ADD_U32(ir_ZEXT_U32(ref), ir_CONST_U32(IS_FALSE))); + } + if (exit_addr) { + if (smart_branch_opcode == ZEND_JMPZ || smart_branch_opcode == ZEND_JMPZ_EX) { + if (opline->opcode != ZEND_IS_NOT_IDENTICAL) { + ir_GUARD(ref, ir_CONST_ADDR(exit_addr)); + } else { + ir_GUARD_NOT(ref, ir_CONST_ADDR(exit_addr)); + } + } else { + if (opline->opcode != ZEND_IS_NOT_IDENTICAL) { + ir_GUARD_NOT(ref, ir_CONST_ADDR(exit_addr)); + } else { + ir_GUARD(ref, ir_CONST_ADDR(exit_addr)); + } + } + } else if (smart_branch_opcode) { + return jit_IF_ex(jit, ref, + (smart_branch_opcode == ZEND_JMPZ || smart_branch_opcode == ZEND_JMPZ_EX) ? target_label2 : target_label); + } + + if (opline->opcode != ZEND_IS_IDENTICAL + && opline->opcode != ZEND_IS_NOT_IDENTICAL + && opline->opcode != ZEND_CASE_STRICT) { + return ir_END(); + } else { + return IR_NULL; /* success */ + } +} + +static ir_ref zend_jit_cmp_long_double(zend_jit_ctx *jit, const zend_op *opline, zend_jit_addr op1_addr, zend_jit_addr op2_addr, zend_jit_addr res_addr, uint8_t smart_branch_opcode, uint32_t target_label, uint32_t target_label2, const void *exit_addr) +{ + ir_ref ref = ir_CMP_OP(zend_jit_cmp_op(opline), ir_INT2D(jit_Z_LVAL(jit, op1_addr)), jit_Z_DVAL(jit, op2_addr)); + + if (!smart_branch_opcode || smart_branch_opcode == ZEND_JMPNZ_EX || smart_branch_opcode == ZEND_JMPZ_EX) { + jit_set_Z_TYPE_INFO_ref(jit, jit_ZVAL_ADDR(jit, res_addr), + ir_ADD_U32(ir_ZEXT_U32(ref), ir_CONST_U32(IS_FALSE))); + } + if (exit_addr) { + if (smart_branch_opcode == ZEND_JMPZ || smart_branch_opcode == ZEND_JMPZ_EX) { + ir_GUARD(ref, ir_CONST_ADDR(exit_addr)); + } else { + ir_GUARD_NOT(ref, ir_CONST_ADDR(exit_addr)); + } + } else if (smart_branch_opcode) { + return jit_IF_ex(jit, ref, + (smart_branch_opcode == ZEND_JMPZ || smart_branch_opcode == ZEND_JMPZ_EX) ? target_label2 : target_label); + } + return ir_END(); +} + +static ir_ref zend_jit_cmp_double_long(zend_jit_ctx *jit, const zend_op *opline, zend_jit_addr op1_addr, zend_jit_addr op2_addr, zend_jit_addr res_addr, uint8_t smart_branch_opcode, uint32_t target_label, uint32_t target_label2, const void *exit_addr) +{ + ir_ref ref = ir_CMP_OP(zend_jit_cmp_op(opline), jit_Z_DVAL(jit, op1_addr), ir_INT2D(jit_Z_LVAL(jit, op2_addr))); + + if (!smart_branch_opcode || smart_branch_opcode == ZEND_JMPNZ_EX || smart_branch_opcode == ZEND_JMPZ_EX) { + jit_set_Z_TYPE_INFO_ref(jit, jit_ZVAL_ADDR(jit, res_addr), + ir_ADD_U32(ir_ZEXT_U32(ref), ir_CONST_U32(IS_FALSE))); + } + if (exit_addr) { + if (smart_branch_opcode == ZEND_JMPZ || smart_branch_opcode == ZEND_JMPZ_EX) { + ir_GUARD(ref, ir_CONST_ADDR(exit_addr)); + } else { + ir_GUARD_NOT(ref, ir_CONST_ADDR(exit_addr)); + } + } else if (smart_branch_opcode) { + return jit_IF_ex(jit, ref, + (smart_branch_opcode == ZEND_JMPZ || smart_branch_opcode == ZEND_JMPZ_EX) ? target_label2 : target_label); + } + return ir_END(); +} + +static ir_ref zend_jit_cmp_double_double(zend_jit_ctx *jit, const zend_op *opline, zend_jit_addr op1_addr, zend_jit_addr op2_addr, zend_jit_addr res_addr, uint8_t smart_branch_opcode, uint32_t target_label, uint32_t target_label2, const void *exit_addr) +{ + ir_ref ref = ir_CMP_OP(zend_jit_cmp_op(opline), jit_Z_DVAL(jit, op1_addr), jit_Z_DVAL(jit, op2_addr)); + + if (!smart_branch_opcode || smart_branch_opcode == ZEND_JMPNZ_EX || smart_branch_opcode == ZEND_JMPZ_EX) { + jit_set_Z_TYPE_INFO_ref(jit, jit_ZVAL_ADDR(jit, res_addr), + ir_ADD_U32(ir_ZEXT_U32(ref), ir_CONST_U32(IS_FALSE))); + } + if (exit_addr) { + if (smart_branch_opcode == ZEND_JMPZ || smart_branch_opcode == ZEND_JMPZ_EX) { + if (opline->opcode != ZEND_IS_NOT_IDENTICAL) { + ir_GUARD(ref, ir_CONST_ADDR(exit_addr)); + } else { + ir_GUARD_NOT(ref, ir_CONST_ADDR(exit_addr)); + } + } else { + if (opline->opcode != ZEND_IS_NOT_IDENTICAL) { + ir_GUARD_NOT(ref, ir_CONST_ADDR(exit_addr)); + } else { + ir_GUARD(ref, ir_CONST_ADDR(exit_addr)); + } + } + } else if (smart_branch_opcode) { + return jit_IF_ex(jit, ref, + (smart_branch_opcode == ZEND_JMPZ || smart_branch_opcode == ZEND_JMPZ_EX) ? target_label2 : target_label); + } + if (opline->opcode != ZEND_IS_IDENTICAL + && opline->opcode != ZEND_IS_NOT_IDENTICAL + && opline->opcode != ZEND_CASE_STRICT) { + return ir_END(); + } else { + return IR_NULL; /* success */ + } +} + +static ir_ref zend_jit_cmp_slow(zend_jit_ctx *jit, ir_ref ref, const zend_op *opline, zend_jit_addr res_addr, uint8_t smart_branch_opcode, uint32_t target_label, uint32_t target_label2, const void *exit_addr) +{ + ref = ir_CMP_OP(zend_jit_cmp_op(opline), ref, ir_CONST_I32(0)); + + if (!smart_branch_opcode || smart_branch_opcode == ZEND_JMPNZ_EX || smart_branch_opcode == ZEND_JMPZ_EX) { + jit_set_Z_TYPE_INFO_ref(jit, jit_ZVAL_ADDR(jit, res_addr), + ir_ADD_U32(ir_ZEXT_U32(ref), ir_CONST_U32(IS_FALSE))); + } + if (exit_addr) { + if (smart_branch_opcode == ZEND_JMPZ || smart_branch_opcode == ZEND_JMPZ_EX) { + ir_GUARD(ref, ir_CONST_ADDR(exit_addr)); + } else { + ir_GUARD_NOT(ref, ir_CONST_ADDR(exit_addr)); + } + } else if (smart_branch_opcode) { + return jit_IF_ex(jit, ref, + (smart_branch_opcode == ZEND_JMPZ || smart_branch_opcode == ZEND_JMPZ_EX) ? target_label2 : target_label); + } + + return ir_END(); +} + +static int zend_jit_cmp(zend_jit_ctx *jit, + const zend_op *opline, + uint32_t op1_info, + zend_ssa_range *op1_range, + zend_jit_addr op1_addr, + uint32_t op2_info, + zend_ssa_range *op2_range, + zend_jit_addr op2_addr, + zend_jit_addr res_addr, + int may_throw, + uint8_t smart_branch_opcode, + uint32_t target_label, + uint32_t target_label2, + const void *exit_addr, + bool skip_comparison) +{ + ir_ref ref = IR_UNUSED; + ir_ref if_op1_long = IR_UNUSED; + ir_ref if_op1_double = IR_UNUSED; + ir_ref if_op2_double = IR_UNUSED; + ir_ref if_op1_long_op2_long = IR_UNUSED; + ir_ref if_op1_long_op2_double = IR_UNUSED; + ir_ref if_op1_double_op2_double = IR_UNUSED; + ir_ref if_op1_double_op2_long = IR_UNUSED; + ir_ref slow_inputs = IR_UNUSED; + bool same_ops = zend_jit_same_addr(op1_addr, op2_addr); + bool has_slow = + (op1_info & (MAY_BE_LONG|MAY_BE_DOUBLE)) && + (op2_info & (MAY_BE_LONG|MAY_BE_DOUBLE)) && + ((op1_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-(MAY_BE_LONG|MAY_BE_DOUBLE))) || + (op2_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-(MAY_BE_LONG|MAY_BE_DOUBLE)))); + ir_refs *end_inputs; + + ir_refs_init(end_inputs, 8); + + if (Z_MODE(op1_addr) == IS_REG) { + if (!has_concrete_type(op2_info & MAY_BE_ANY) && jit->ra[Z_SSA_VAR(op1_addr)].ref == IR_NULL) { + /* Force load */ + zend_jit_use_reg(jit, op1_addr); + } + } else if (Z_MODE(op2_addr) == IS_REG) { + if (!has_concrete_type(op1_info & MAY_BE_ANY) && jit->ra[Z_SSA_VAR(op2_addr)].ref == IR_NULL) { + /* Force load */ + zend_jit_use_reg(jit, op2_addr); + } + } + + if ((op1_info & MAY_BE_LONG) && (op2_info & MAY_BE_LONG)) { + if (op1_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-MAY_BE_LONG)) { + if_op1_long = jit_if_Z_TYPE(jit, op1_addr, IS_LONG); + ir_IF_TRUE(if_op1_long); + } + if (!same_ops && (op2_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-MAY_BE_LONG))) { + if_op1_long_op2_long = jit_if_Z_TYPE(jit, op2_addr, IS_LONG); + ir_IF_FALSE_cold(if_op1_long_op2_long); + if (op2_info & MAY_BE_DOUBLE) { + if (op2_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-(MAY_BE_LONG|MAY_BE_DOUBLE))) { + if_op1_long_op2_double = jit_if_Z_TYPE(jit, op2_addr, IS_DOUBLE); + ir_IF_FALSE_cold(if_op1_long_op2_double); + ir_END_list(slow_inputs); + ir_IF_TRUE(if_op1_long_op2_double); + } + ref = zend_jit_cmp_long_double(jit, opline, op1_addr, op2_addr, res_addr, smart_branch_opcode, target_label, target_label2, exit_addr); + if (!ref) { + return 0; + } + ir_refs_add(end_inputs, ref); + } else { + ir_END_list(slow_inputs); + } + ir_IF_TRUE(if_op1_long_op2_long); + } + ref = zend_jit_cmp_long_long(jit, opline, op1_range, op1_addr, op2_range, op2_addr, res_addr, smart_branch_opcode, target_label, target_label2, exit_addr, skip_comparison); + if (!ref) { + return 0; + } + ir_refs_add(end_inputs, ref); + + if (if_op1_long) { + ir_IF_FALSE_cold(if_op1_long); + } + if (op1_info & MAY_BE_DOUBLE) { + if (op1_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-(MAY_BE_LONG|MAY_BE_DOUBLE))) { + if_op1_double = jit_if_Z_TYPE(jit, op1_addr, IS_DOUBLE); + ir_IF_FALSE_cold(if_op1_double); + ir_END_list(slow_inputs); + ir_IF_TRUE(if_op1_double); + } + if (op2_info & MAY_BE_DOUBLE) { + if (!same_ops && (op2_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-MAY_BE_DOUBLE))) { + if_op1_double_op2_double = jit_if_Z_TYPE(jit, op2_addr, IS_DOUBLE); + ir_IF_TRUE(if_op1_double_op2_double); + } + ref = zend_jit_cmp_double_double(jit, opline, op1_addr, op2_addr, res_addr, smart_branch_opcode, target_label, target_label2, exit_addr); + if (!ref) { + return 0; + } + ir_refs_add(end_inputs, ref); + if (if_op1_double_op2_double) { + ir_IF_FALSE_cold(if_op1_double_op2_double); + } + } + if (!same_ops) { + if (op2_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-(MAY_BE_LONG|MAY_BE_DOUBLE))) { + if_op1_double_op2_long = jit_if_Z_TYPE(jit, op2_addr, IS_LONG); + ir_IF_FALSE_cold(if_op1_double_op2_long); + ir_END_list(slow_inputs); + ir_IF_TRUE(if_op1_double_op2_long); + } + ref = zend_jit_cmp_double_long(jit, opline, op1_addr, op2_addr, res_addr, smart_branch_opcode, target_label, target_label2, exit_addr); + if (!ref) { + return 0; + } + ir_refs_add(end_inputs, ref); + } else if (if_op1_double_op2_double) { + ir_END_list(slow_inputs); + } + } else if (if_op1_long) { + ir_END_list(slow_inputs); + } + } else if ((op1_info & MAY_BE_DOUBLE) && + !(op1_info & MAY_BE_LONG) && + (op2_info & (MAY_BE_LONG|MAY_BE_DOUBLE))) { + if (op1_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-MAY_BE_DOUBLE)) { + if_op1_double = jit_if_Z_TYPE(jit, op1_addr, IS_DOUBLE); + ir_IF_FALSE_cold(if_op1_double); + ir_END_list(slow_inputs); + ir_IF_TRUE(if_op1_double); + } + if (op2_info & MAY_BE_DOUBLE) { + if (!same_ops && (op2_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-MAY_BE_DOUBLE))) { + if_op1_double_op2_double = jit_if_Z_TYPE(jit, op2_addr, IS_DOUBLE); + ir_IF_TRUE(if_op1_double_op2_double); + } + ref = zend_jit_cmp_double_double(jit, opline, op1_addr, op2_addr, res_addr, smart_branch_opcode, target_label, target_label2, exit_addr); + if (!ref) { + return 0; + } + ir_refs_add(end_inputs, ref); + if (if_op1_double_op2_double) { + ir_IF_FALSE_cold(if_op1_double_op2_double); + } + } + if (!same_ops && (op2_info & MAY_BE_LONG)) { + if (op2_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-(MAY_BE_DOUBLE|MAY_BE_LONG))) { + if_op1_double_op2_long = jit_if_Z_TYPE(jit, op2_addr, IS_LONG); + ir_IF_FALSE_cold(if_op1_double_op2_long); + ir_END_list(slow_inputs); + ir_IF_TRUE(if_op1_double_op2_long); + } + ref = zend_jit_cmp_double_long(jit, opline, op1_addr, op2_addr, res_addr, smart_branch_opcode, target_label, target_label2, exit_addr); + if (!ref) { + return 0; + } + ir_refs_add(end_inputs, ref); + } else if (if_op1_double_op2_double) { + ir_END_list(slow_inputs); + } + } else if ((op2_info & MAY_BE_DOUBLE) && + !(op2_info & MAY_BE_LONG) && + (op1_info & (MAY_BE_LONG|MAY_BE_DOUBLE))) { + if (op2_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-MAY_BE_DOUBLE)) { + if_op2_double = jit_if_Z_TYPE(jit, op2_addr, IS_DOUBLE); + ir_IF_FALSE_cold(if_op2_double); + ir_END_list(slow_inputs); + ir_IF_TRUE(if_op2_double); + } + if (op1_info & MAY_BE_DOUBLE) { + if (!same_ops && (op1_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-MAY_BE_DOUBLE))) { + if_op1_double_op2_double = jit_if_Z_TYPE(jit, op1_addr, IS_DOUBLE); + ir_IF_TRUE(if_op1_double_op2_double); + } + ref = zend_jit_cmp_double_double(jit, opline, op1_addr, op2_addr, res_addr, smart_branch_opcode, target_label, target_label2, exit_addr); + if (!ref) { + return 0; + } + ir_refs_add(end_inputs, ref); + if (if_op1_double_op2_double) { + ir_IF_FALSE_cold(if_op1_double_op2_double); + } + } + if (!same_ops && (op1_info & MAY_BE_LONG)) { + if (op1_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-(MAY_BE_DOUBLE|MAY_BE_LONG))) { + if_op1_long_op2_double = jit_if_Z_TYPE(jit, op1_addr, IS_LONG); + ir_IF_FALSE_cold(if_op1_long_op2_double); + ir_END_list(slow_inputs); + ir_IF_TRUE(if_op1_long_op2_double); + } + ref = zend_jit_cmp_long_double(jit, opline, op1_addr, op2_addr, res_addr, smart_branch_opcode, target_label, target_label2, exit_addr); + if (!ref) { + return 0; + } + ir_refs_add(end_inputs, ref); + } else if (if_op1_double_op2_double) { + ir_END_list(slow_inputs); + } + } + + if (has_slow || + (op1_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-(MAY_BE_LONG|MAY_BE_DOUBLE))) || + (op2_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-(MAY_BE_LONG|MAY_BE_DOUBLE)))) { + ir_ref op1, op2, ref; + + if (slow_inputs) { + ir_MERGE_list(slow_inputs); + } + jit_SET_EX_OPLINE(jit, opline); + + if (Z_MODE(op1_addr) == IS_REG) { + zend_jit_addr real_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, opline->op1.var); + if (!zend_jit_spill_store(jit, op1_addr, real_addr, op1_info, 1)) { + return 0; + } + op1_addr = real_addr; + } + if (Z_MODE(op2_addr) == IS_REG) { + zend_jit_addr real_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, opline->op2.var); + if (!zend_jit_spill_store(jit, op2_addr, real_addr, op2_info, 1)) { + return 0; + } + op2_addr = real_addr; + } + + op1 = jit_ZVAL_ADDR(jit, op1_addr); + if (opline->op1_type == IS_CV && (op1_info & MAY_BE_UNDEF)) { + op1 = zend_jit_zval_check_undef(jit, op1, opline->op1.var, NULL, 0); + } + op2 = jit_ZVAL_ADDR(jit, op2_addr); + if (opline->op2_type == IS_CV && (op2_info & MAY_BE_UNDEF)) { + op2 = zend_jit_zval_check_undef(jit, op2, opline->op2.var, NULL, 0); + } + ref = ir_CALL_2(IR_I32, ir_CONST_FC_FUNC(zend_compare), op1, op2); + if (opline->opcode != ZEND_CASE) { + jit_FREE_OP(jit, opline->op1_type, opline->op1, op1_info, NULL); + } + jit_FREE_OP(jit, opline->op2_type, opline->op2, op2_info, NULL); + if (may_throw) { + zend_jit_check_exception_undef_result(jit, opline); + } + + ref = zend_jit_cmp_slow(jit, ref, opline, res_addr, smart_branch_opcode, target_label, target_label2, exit_addr); + if (!ref) { + return 0; + } + ir_refs_add(end_inputs, ref); + } + + if (end_inputs->count) { + uint32_t n = end_inputs->count; + + if (smart_branch_opcode && !exit_addr) { + zend_basic_block *bb; + ir_ref ref; + uint32_t label = (smart_branch_opcode == ZEND_JMPZ || smart_branch_opcode == ZEND_JMPZ_EX) ? + target_label2 : target_label; + uint32_t label2 = (smart_branch_opcode == ZEND_JMPZ || smart_branch_opcode == ZEND_JMPZ_EX) ? + target_label : target_label2; + + ZEND_ASSERT(jit->b >= 0); + bb = &jit->ssa->cfg.blocks[jit->b]; + ZEND_ASSERT(bb->successors_count == 2); + + if (UNEXPECTED(bb->successors[0] == bb->successors[1])) { + ir_ref merge_inputs = IR_UNUSED; + + while (n) { + n--; + ir_IF_TRUE(end_inputs->refs[n]); + ir_END_list(merge_inputs); + ir_IF_FALSE(end_inputs->refs[n]); + ir_END_list(merge_inputs); + } + ir_MERGE_list(merge_inputs); + _zend_jit_add_predecessor_ref(jit, label, jit->b, ir_END()); + } else if (n == 1) { + ref = end_inputs->refs[0]; + _zend_jit_add_predecessor_ref(jit, bb->successors[0], jit->b, ref); + _zend_jit_add_predecessor_ref(jit, bb->successors[1], jit->b, ref); + } else { + ir_ref true_inputs = IR_UNUSED, false_inputs = IR_UNUSED; + + while (n) { + n--; + ir_IF_TRUE(end_inputs->refs[n]); + ir_END_list(true_inputs); + ir_IF_FALSE(end_inputs->refs[n]); + ir_END_list(false_inputs); + } + ir_MERGE_list(true_inputs); + _zend_jit_add_predecessor_ref(jit, label, jit->b, ir_END()); + ir_MERGE_list(false_inputs); + _zend_jit_add_predecessor_ref(jit, label2, jit->b, ir_END()); + } + jit->b = -1; + } else { + ir_MERGE_N(n, end_inputs->refs); + } + } + + return 1; +} + +static int zend_jit_identical(zend_jit_ctx *jit, + const zend_op *opline, + uint32_t op1_info, + zend_ssa_range *op1_range, + zend_jit_addr op1_addr, + uint32_t op2_info, + zend_ssa_range *op2_range, + zend_jit_addr op2_addr, + zend_jit_addr res_addr, + int may_throw, + uint8_t smart_branch_opcode, + uint32_t target_label, + uint32_t target_label2, + const void *exit_addr, + bool skip_comparison) +{ + bool always_false = 0, always_true = 0; + ir_ref ref = IR_UNUSED; + + if (opline->op1_type == IS_CV && (op1_info & MAY_BE_UNDEF)) { + ir_ref op1 = jit_ZVAL_ADDR(jit, op1_addr); + op1 = zend_jit_zval_check_undef(jit, op1, opline->op1.var, NULL, 0); + op1_info |= MAY_BE_NULL; + op1_addr = ZEND_ADDR_REF_ZVAL(op1); + } + if (opline->op2_type == IS_CV && (op2_info & MAY_BE_UNDEF)) { + ir_ref op2 = jit_ZVAL_ADDR(jit, op2_addr); + op2 = zend_jit_zval_check_undef(jit, op2, opline->op2.var, NULL, 0); + op2_info |= MAY_BE_NULL; + op2_addr = ZEND_ADDR_REF_ZVAL(op2); + } + + if ((op1_info & op2_info & MAY_BE_ANY) == 0) { + always_false = 1; + } else if (has_concrete_type(op1_info) + && has_concrete_type(op2_info) + && concrete_type(op1_info) == concrete_type(op2_info) + && concrete_type(op1_info) <= IS_TRUE) { + always_true = 1; + } else if (Z_MODE(op1_addr) == IS_CONST_ZVAL && Z_MODE(op2_addr) == IS_CONST_ZVAL) { + if (zend_is_identical(Z_ZV(op1_addr), Z_ZV(op2_addr))) { + always_true = 1; + } else { + always_false = 1; + } + } + + if (always_true) { + if (opline->opcode != ZEND_CASE_STRICT) { + jit_FREE_OP(jit, opline->op1_type, opline->op1, op1_info, opline); + } + jit_FREE_OP(jit, opline->op2_type, opline->op2, op2_info, opline); + if (!smart_branch_opcode + || smart_branch_opcode == ZEND_JMPZ_EX + || smart_branch_opcode == ZEND_JMPNZ_EX) { + jit_set_Z_TYPE_INFO(jit, res_addr, opline->opcode != ZEND_IS_NOT_IDENTICAL ? IS_TRUE : IS_FALSE); + } + if (may_throw) { + zend_jit_check_exception(jit); + } + if (exit_addr) { + if (smart_branch_opcode == ZEND_JMPNZ || smart_branch_opcode == ZEND_JMPNZ_EX) { + jit_SIDE_EXIT(jit, ir_CONST_ADDR(exit_addr)); + } + } else if (smart_branch_opcode) { + uint32_t label; + + if (opline->opcode == ZEND_IS_NOT_IDENTICAL) { + label = (smart_branch_opcode == ZEND_JMPZ || smart_branch_opcode == ZEND_JMPZ_EX) ? + target_label : target_label2; + } else { + label = (smart_branch_opcode == ZEND_JMPZ || smart_branch_opcode == ZEND_JMPZ_EX) ? + target_label2 : target_label; + } + _zend_jit_add_predecessor_ref(jit, label, jit->b, ir_END()); + jit->b = -1; + } + return 1; + } else if (always_false) { + if (opline->opcode != ZEND_CASE_STRICT) { + jit_FREE_OP(jit, opline->op1_type, opline->op1, op1_info, opline); + } + jit_FREE_OP(jit, opline->op2_type, opline->op2, op2_info, opline); + if (!smart_branch_opcode + || smart_branch_opcode == ZEND_JMPZ_EX + || smart_branch_opcode == ZEND_JMPNZ_EX) { + jit_set_Z_TYPE_INFO(jit, res_addr, opline->opcode != ZEND_IS_NOT_IDENTICAL ? IS_FALSE : IS_TRUE); + } + if (may_throw) { + zend_jit_check_exception(jit); + } + if (exit_addr) { + if (smart_branch_opcode == ZEND_JMPZ || smart_branch_opcode == ZEND_JMPZ_EX) { + jit_SIDE_EXIT(jit, ir_CONST_ADDR(exit_addr)); + } + } else if (smart_branch_opcode) { + uint32_t label; + + if (opline->opcode == ZEND_IS_NOT_IDENTICAL) { + label = (smart_branch_opcode == ZEND_JMPZ || smart_branch_opcode == ZEND_JMPZ_EX) ? + target_label2 : target_label; + } else { + label = (smart_branch_opcode == ZEND_JMPZ || smart_branch_opcode == ZEND_JMPZ_EX) ? + target_label : target_label2; + } + _zend_jit_add_predecessor_ref(jit, label, jit->b, ir_END()); + jit->b = -1; + } + return 1; + } + + if ((opline->op1_type & (IS_CV|IS_VAR)) && (op1_info & MAY_BE_REF)) { + ref = jit_ZVAL_ADDR(jit, op1_addr); + ref = jit_ZVAL_DEREF_ref(jit, ref); + op1_addr = ZEND_ADDR_REF_ZVAL(ref); + } + if ((opline->op2_type & (IS_CV|IS_VAR)) && (op2_info & MAY_BE_REF)) { + ref = jit_ZVAL_ADDR(jit, op2_addr); + ref = jit_ZVAL_DEREF_ref(jit, ref); + op2_addr = ZEND_ADDR_REF_ZVAL(ref); + } + + if ((op1_info & (MAY_BE_REF|MAY_BE_ANY|MAY_BE_UNDEF)) == MAY_BE_LONG && + (op2_info & (MAY_BE_REF|MAY_BE_ANY|MAY_BE_UNDEF)) == MAY_BE_LONG) { + ref = zend_jit_cmp_long_long(jit, opline, op1_range, op1_addr, op2_range, op2_addr, res_addr, smart_branch_opcode, target_label, target_label2, exit_addr, skip_comparison); + if (!ref) { + return 0; + } + } else if ((op1_info & (MAY_BE_REF|MAY_BE_ANY|MAY_BE_UNDEF)) == MAY_BE_DOUBLE && + (op2_info & (MAY_BE_REF|MAY_BE_ANY|MAY_BE_UNDEF)) == MAY_BE_DOUBLE) { + ref = zend_jit_cmp_double_double(jit, opline, op1_addr, op2_addr, res_addr, smart_branch_opcode, target_label, target_label2, exit_addr); + if (!ref) { + return 0; + } + } else { + if (opline->op1_type != IS_CONST) { + if (Z_MODE(op1_addr) == IS_REG) { + zend_jit_addr real_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, opline->op1.var); + if (!zend_jit_spill_store(jit, op1_addr, real_addr, op1_info, 1)) { + return 0; + } + op1_addr = real_addr; + } + } + if (opline->op2_type != IS_CONST) { + if (Z_MODE(op2_addr) == IS_REG) { + zend_jit_addr real_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, opline->op2.var); + if (!zend_jit_spill_store(jit, op2_addr, real_addr, op2_info, 1)) { + return 0; + } + } + } + + if (Z_MODE(op1_addr) == IS_CONST_ZVAL && Z_TYPE_P(Z_ZV(op1_addr)) <= IS_TRUE) { + zval *val = Z_ZV(op1_addr); + + ref = ir_EQ(jit_Z_TYPE(jit, op2_addr), ir_CONST_U8(Z_TYPE_P(val))); + } else if (Z_MODE(op2_addr) == IS_CONST_ZVAL && Z_TYPE_P(Z_ZV(op2_addr)) <= IS_TRUE) { + zval *val = Z_ZV(op2_addr); + + ref = ir_EQ(jit_Z_TYPE(jit, op1_addr), ir_CONST_U8(Z_TYPE_P(val))); + } else { + if (Z_MODE(op1_addr) == IS_REG) { + zend_jit_addr real_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, opline->op1.var); + if (!zend_jit_spill_store(jit, op1_addr, real_addr, op1_info, 1)) { + return 0; + } + op1_addr = real_addr; + } + if (Z_MODE(op2_addr) == IS_REG) { + zend_jit_addr real_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, opline->op2.var); + if (!zend_jit_spill_store(jit, op2_addr, real_addr, op2_info, 1)) { + return 0; + } + op2_addr = real_addr; + } + + ref = ir_CALL_2(IR_BOOL, ir_CONST_FC_FUNC(zend_is_identical), + jit_ZVAL_ADDR(jit, op1_addr), + jit_ZVAL_ADDR(jit, op2_addr)); + } + + if (!smart_branch_opcode || smart_branch_opcode == ZEND_JMPNZ_EX || smart_branch_opcode == ZEND_JMPZ_EX) { + if (opline->opcode == ZEND_IS_NOT_IDENTICAL) { + jit_set_Z_TYPE_INFO_ref(jit, jit_ZVAL_ADDR(jit, res_addr), + ir_SUB_U32(ir_CONST_U32(IS_TRUE), ir_ZEXT_U32(ref))); + } else { + jit_set_Z_TYPE_INFO_ref(jit, jit_ZVAL_ADDR(jit, res_addr), + ir_ADD_U32(ir_ZEXT_U32(ref), ir_CONST_U32(IS_FALSE))); + } + } + if (opline->opcode != ZEND_CASE_STRICT) { + jit_FREE_OP(jit, opline->op1_type, opline->op1, op1_info, NULL); + } + jit_FREE_OP(jit, opline->op2_type, opline->op2, op2_info, NULL); + if (may_throw) { + zend_jit_check_exception_undef_result(jit, opline); + } + if (exit_addr) { + if (smart_branch_opcode == ZEND_JMPZ || smart_branch_opcode == ZEND_JMPZ_EX) { + ir_GUARD(ref, ir_CONST_ADDR(exit_addr)); + } else { + ir_GUARD_NOT(ref, ir_CONST_ADDR(exit_addr)); + } + } else if (smart_branch_opcode) { + if (opline->opcode == ZEND_IS_NOT_IDENTICAL) { + /* swap labels */ + uint32_t tmp = target_label; + target_label = target_label2; + target_label2 = tmp; + } + ref = jit_IF_ex(jit, ref, + (smart_branch_opcode == ZEND_JMPZ || smart_branch_opcode == ZEND_JMPZ_EX) ? target_label2 : target_label); + } + } + + if (smart_branch_opcode && !exit_addr) { + zend_basic_block *bb; + + ZEND_ASSERT(jit->b >= 0); + bb = &jit->ssa->cfg.blocks[jit->b]; + ZEND_ASSERT(bb->successors_count == 2); + + if (bb->successors_count == 2 && bb->successors[0] == bb->successors[1]) { + ir_IF_TRUE(ref); + ir_MERGE_WITH_EMPTY_FALSE(ref); + _zend_jit_add_predecessor_ref(jit, bb->successors[0], jit->b, ir_END()); + } else { + ZEND_ASSERT(bb->successors_count == 2); + _zend_jit_add_predecessor_ref(jit, bb->successors[0], jit->b, ref); + _zend_jit_add_predecessor_ref(jit, bb->successors[1], jit->b, ref); + } + jit->b = -1; + } + + return 1; +} + +static int zend_jit_bool_jmpznz(zend_jit_ctx *jit, const zend_op *opline, uint32_t op1_info, zend_jit_addr op1_addr, zend_jit_addr res_addr, uint32_t target_label, uint32_t target_label2, int may_throw, uint8_t branch_opcode, const void *exit_addr) +{ + uint32_t true_label = -1; + uint32_t false_label = -1; + bool set_bool = 0; + bool set_bool_not = 0; + bool always_true = 0, always_false = 0; + ir_ref ref, end_inputs = IR_UNUSED, true_inputs = IR_UNUSED, false_inputs = IR_UNUSED; + ir_type type = IR_UNUSED; + + if (branch_opcode == ZEND_BOOL) { + set_bool = 1; + } else if (branch_opcode == ZEND_BOOL_NOT) { + set_bool = 1; + set_bool_not = 1; + } else if (branch_opcode == ZEND_JMPZ) { + true_label = target_label2; + false_label = target_label; + } else if (branch_opcode == ZEND_JMPNZ) { + true_label = target_label; + false_label = target_label2; + } else if (branch_opcode == ZEND_JMPZ_EX) { + set_bool = 1; + true_label = target_label2; + false_label = target_label; + } else if (branch_opcode == ZEND_JMPNZ_EX) { + set_bool = 1; + true_label = target_label; + false_label = target_label2; + } else { + ZEND_UNREACHABLE(); + } + + if (opline->op1_type == IS_CV && (op1_info & MAY_BE_REF)) { + ref = jit_ZVAL_ADDR(jit, op1_addr); + ref = jit_ZVAL_DEREF_ref(jit, ref); + op1_addr = ZEND_ADDR_REF_ZVAL(ref); + } + + if (Z_MODE(op1_addr) == IS_CONST_ZVAL) { + if (zend_is_true(Z_ZV(op1_addr))) { + always_true = 1; + } else { + always_false = 1; + } + } else if (op1_info & (MAY_BE_UNDEF|MAY_BE_NULL|MAY_BE_FALSE|MAY_BE_TRUE)) { + if (!(op1_info & ((MAY_BE_UNDEF|MAY_BE_ANY)-MAY_BE_TRUE))) { + always_true = 1; + } else if (!(op1_info & (MAY_BE_ANY-(MAY_BE_NULL|MAY_BE_FALSE)))) { + if (opline->op1_type == IS_CV && (op1_info & MAY_BE_UNDEF)) { + ref = jit_ZVAL_ADDR(jit, op1_addr); + zend_jit_zval_check_undef(jit, ref, opline->op1.var, opline, 0); + } + always_false = 1; + } + } + + if (always_true) { + if (set_bool) { + jit_set_Z_TYPE_INFO(jit, res_addr, set_bool_not ? IS_FALSE : IS_TRUE); + } + jit_FREE_OP(jit, opline->op1_type, opline->op1, op1_info, opline); + if (may_throw) { + zend_jit_check_exception(jit); + } + if (true_label != (uint32_t)-1) { + ZEND_ASSERT(exit_addr == NULL); + _zend_jit_add_predecessor_ref(jit, true_label, jit->b, ir_END()); + jit->b = -1; + } + return 1; + } else if (always_false) { + if (set_bool) { + jit_set_Z_TYPE_INFO(jit, res_addr, set_bool_not ? IS_TRUE : IS_FALSE); + } + jit_FREE_OP(jit, opline->op1_type, opline->op1, op1_info, opline); + if (may_throw) { + zend_jit_check_exception(jit); + } + if (false_label != (uint32_t)-1) { + ZEND_ASSERT(exit_addr == NULL); + _zend_jit_add_predecessor_ref(jit, false_label, jit->b, ir_END()); + jit->b = -1; + } + return 1; + } + + if (op1_info & (MAY_BE_UNDEF|MAY_BE_NULL|MAY_BE_FALSE|MAY_BE_TRUE)) { + type = jit_Z_TYPE(jit, op1_addr); + if (op1_info & (MAY_BE_UNDEF|MAY_BE_NULL|MAY_BE_FALSE)) { + ir_ref if_type = ir_IF(ir_LT(type, ir_CONST_U8(IS_TRUE))); + + ir_IF_TRUE_cold(if_type); + + if (op1_info & MAY_BE_UNDEF) { + zend_jit_type_check_undef(jit, + type, + opline->op1.var, + opline, 1, 0); + } + if (set_bool) { + jit_set_Z_TYPE_INFO(jit, res_addr, set_bool_not ? IS_TRUE : IS_FALSE); + } + if (exit_addr) { + if (branch_opcode == ZEND_JMPNZ || branch_opcode == ZEND_JMPNZ_EX) { + ir_END_list(end_inputs); + } else { + jit_SIDE_EXIT(jit, ir_CONST_ADDR(exit_addr)); + } + } else if (false_label != (uint32_t)-1) { + ir_END_list(false_inputs); + } else { + ir_END_list(end_inputs); + } + ir_IF_FALSE(if_type); + } + + if (op1_info & MAY_BE_TRUE) { + ir_ref if_type = IR_UNUSED; + + if (op1_info & (MAY_BE_ANY-(MAY_BE_NULL|MAY_BE_FALSE|MAY_BE_TRUE))) { + if_type = ir_IF(ir_EQ(type, ir_CONST_U8(IS_TRUE))); + + ir_IF_TRUE(if_type); + } + if (set_bool) { + jit_set_Z_TYPE_INFO(jit, res_addr, set_bool_not ? IS_FALSE : IS_TRUE); + } + if (exit_addr) { + if (branch_opcode == ZEND_JMPZ || branch_opcode == ZEND_JMPZ_EX) { + ir_END_list(end_inputs); + } else { + jit_SIDE_EXIT(jit, ir_CONST_ADDR(exit_addr)); + } + } else if (true_label != (uint32_t)-1) { + ir_END_list(true_inputs); + } else { + ir_END_list(end_inputs); + } + if (if_type) { + ir_IF_FALSE(if_type); + } + } + } + + if (op1_info & MAY_BE_LONG) { + ir_ref if_long = IR_UNUSED; + ir_ref ref; + + if (op1_info & (MAY_BE_ANY-(MAY_BE_NULL|MAY_BE_FALSE|MAY_BE_TRUE|MAY_BE_LONG))) { + if (!type) { + type = jit_Z_TYPE(jit, op1_addr); + } + if_long = ir_IF(ir_EQ(type, ir_CONST_U8(IS_LONG))); + ir_IF_TRUE(if_long); + } + ref = jit_Z_LVAL(jit, op1_addr); + if (branch_opcode == ZEND_BOOL || branch_opcode == ZEND_BOOL_NOT) { + ref = ir_NE(ref, ir_CONST_LONG(0)); + if (set_bool_not) { + jit_set_Z_TYPE_INFO_ref(jit, jit_ZVAL_ADDR(jit, res_addr), + ir_SUB_U32(ir_CONST_U32(IS_TRUE), ir_ZEXT_U32(ref))); + } else { + jit_set_Z_TYPE_INFO_ref(jit, jit_ZVAL_ADDR(jit, res_addr), + ir_ADD_U32(ir_ZEXT_U32(ref), ir_CONST_U32(IS_FALSE))); + } + ir_END_list(end_inputs); + } else if (exit_addr) { + if (set_bool) { + jit_set_Z_TYPE_INFO_ref(jit, jit_ZVAL_ADDR(jit, res_addr), + ir_ADD_U32(ir_ZEXT_U32(ir_NE(ref, ir_CONST_LONG(0))), ir_CONST_U32(IS_FALSE))); + } + if (branch_opcode == ZEND_JMPZ || branch_opcode == ZEND_JMPZ_EX) { + ir_GUARD(ref, ir_CONST_ADDR(exit_addr)); + } else { + ir_GUARD_NOT(ref, ir_CONST_ADDR(exit_addr)); + } + ir_END_list(end_inputs); + } else { + ir_ref if_val = ir_IF(ref); + ir_IF_TRUE(if_val); + if (set_bool) { + jit_set_Z_TYPE_INFO(jit, res_addr, set_bool_not ? IS_FALSE : IS_TRUE); + } + ir_END_list(true_inputs); + ir_IF_FALSE(if_val); + if (set_bool) { + jit_set_Z_TYPE_INFO(jit, res_addr, set_bool_not ? IS_TRUE : IS_FALSE); + } + ir_END_list(false_inputs); + } + if (if_long) { + ir_IF_FALSE(if_long); + } + } + + if (op1_info & MAY_BE_DOUBLE) { + ir_ref if_double = IR_UNUSED; + ir_ref ref; + + if (op1_info & (MAY_BE_ANY-(MAY_BE_NULL|MAY_BE_FALSE|MAY_BE_TRUE|MAY_BE_LONG|MAY_BE_DOUBLE))) { + if (!type) { + type = jit_Z_TYPE(jit, op1_addr); + } + if_double = ir_IF(ir_EQ(type, ir_CONST_U8(IS_DOUBLE))); + ir_IF_TRUE(if_double); + } + ref = ir_NE(jit_Z_DVAL(jit, op1_addr), ir_CONST_DOUBLE(0.0)); + if (branch_opcode == ZEND_BOOL || branch_opcode == ZEND_BOOL_NOT) { + if (set_bool_not) { + jit_set_Z_TYPE_INFO_ref(jit, jit_ZVAL_ADDR(jit, res_addr), + ir_SUB_U32(ir_CONST_U32(IS_TRUE), ir_ZEXT_U32(ref))); + } else { + jit_set_Z_TYPE_INFO_ref(jit, jit_ZVAL_ADDR(jit, res_addr), + ir_ADD_U32(ir_ZEXT_U32(ref), ir_CONST_U32(IS_FALSE))); + } + ir_END_list(end_inputs); + } else if (exit_addr) { + if (set_bool) { + jit_set_Z_TYPE_INFO_ref(jit, jit_ZVAL_ADDR(jit, res_addr), + ir_ADD_U32(ir_ZEXT_U32(ref), ir_CONST_U32(IS_FALSE))); + } + if (branch_opcode == ZEND_JMPZ || branch_opcode == ZEND_JMPZ_EX) { + ir_GUARD(ref, ir_CONST_ADDR(exit_addr)); + } else { + ir_GUARD_NOT(ref, ir_CONST_ADDR(exit_addr)); + } + ir_END_list(end_inputs); + } else { + ir_ref if_val = ir_IF(ref); + ir_IF_TRUE(if_val); + if (set_bool) { + jit_set_Z_TYPE_INFO(jit, res_addr, set_bool_not ? IS_FALSE : IS_TRUE); + } + ir_END_list(true_inputs); + ir_IF_FALSE(if_val); + if (set_bool) { + jit_set_Z_TYPE_INFO(jit, res_addr, set_bool_not ? IS_TRUE : IS_FALSE); + } + ir_END_list(false_inputs); + } + if (if_double) { + ir_IF_FALSE(if_double); + } + } + + if (op1_info & (MAY_BE_ANY - (MAY_BE_NULL|MAY_BE_FALSE|MAY_BE_TRUE|MAY_BE_LONG|MAY_BE_DOUBLE))) { + jit_SET_EX_OPLINE(jit, opline); + ref = ir_CALL_1(IR_BOOL, ir_CONST_FC_FUNC(zend_is_true), jit_ZVAL_ADDR(jit, op1_addr)); + jit_FREE_OP(jit, opline->op1_type, opline->op1, op1_info, NULL); + if (may_throw) { + zend_jit_check_exception_undef_result(jit, opline); + } + if (branch_opcode == ZEND_BOOL || branch_opcode == ZEND_BOOL_NOT) { + if (set_bool_not) { + jit_set_Z_TYPE_INFO_ref(jit, jit_ZVAL_ADDR(jit, res_addr), + ir_SUB_U32(ir_CONST_U32(IS_TRUE), ir_ZEXT_U32(ref))); + } else { + jit_set_Z_TYPE_INFO_ref(jit, jit_ZVAL_ADDR(jit, res_addr), + ir_ADD_U32(ir_ZEXT_U32(ref), ir_CONST_U32(IS_FALSE))); + } + if (end_inputs) { + ir_END_list(end_inputs); + } + } else if (exit_addr) { + if (set_bool) { + jit_set_Z_TYPE_INFO_ref(jit, jit_ZVAL_ADDR(jit, res_addr), + ir_ADD_U32(ir_ZEXT_U32(ref), ir_CONST_U32(IS_FALSE))); + } + if (branch_opcode == ZEND_JMPZ || branch_opcode == ZEND_JMPZ_EX) { + ir_GUARD(ref, ir_CONST_ADDR(exit_addr)); + } else { + ir_GUARD_NOT(ref, ir_CONST_ADDR(exit_addr)); + } + if (end_inputs) { + ir_END_list(end_inputs); + } + } else { + ir_ref if_val = ir_IF(ref); + ir_IF_TRUE(if_val); + if (set_bool) { + jit_set_Z_TYPE_INFO(jit, res_addr, set_bool_not ? IS_FALSE : IS_TRUE); + } + ir_END_list(true_inputs); + ir_IF_FALSE(if_val); + if (set_bool) { + jit_set_Z_TYPE_INFO(jit, res_addr, set_bool_not ? IS_TRUE : IS_FALSE); + } + ir_END_list(false_inputs); + } + } + + if (branch_opcode == ZEND_BOOL || branch_opcode == ZEND_BOOL_NOT || exit_addr) { + if (end_inputs) { + ir_MERGE_list(end_inputs); + } + } else { + _zend_jit_merge_smart_branch_inputs(jit, true_label, false_label, true_inputs, false_inputs); + } + + return 1; +} + +static int zend_jit_defined(zend_jit_ctx *jit, const zend_op *opline, uint8_t smart_branch_opcode, uint32_t target_label, uint32_t target_label2, const void *exit_addr) +{ + uint32_t defined_label = (uint32_t)-1; + uint32_t undefined_label = (uint32_t)-1; + zval *zv = RT_CONSTANT(opline, opline->op1); + zend_jit_addr res_addr; + ir_ref ref, ref2, if_set, if_zero, if_set2; + ir_ref end_inputs = IR_UNUSED, true_inputs = IR_UNUSED, false_inputs = IR_UNUSED; + + if (smart_branch_opcode && !exit_addr) { + if (smart_branch_opcode == ZEND_JMPZ) { + defined_label = target_label2; + undefined_label = target_label; + } else if (smart_branch_opcode == ZEND_JMPNZ) { + defined_label = target_label; + undefined_label = target_label2; + } else { + ZEND_UNREACHABLE(); + } + } else { + res_addr = RES_ADDR(); + } + + // if (CACHED_PTR(opline->extended_value)) { + ref = ir_LOAD_A(ir_ADD_OFFSET(ir_LOAD_A(jit_EX(run_time_cache)), opline->extended_value)); + + if_set = ir_IF(ref); + + ir_IF_FALSE_cold(if_set); + if_zero = ir_END(); + + ir_IF_TRUE(if_set); + if_set2 = ir_IF(ir_AND_A(ref, ir_CONST_ADDR(CACHE_SPECIAL))); + ir_IF_FALSE(if_set2); + + if (exit_addr) { + if (smart_branch_opcode == ZEND_JMPNZ) { + jit_SIDE_EXIT(jit, ir_CONST_ADDR(exit_addr)); + } else { + ir_END_list(end_inputs); + } + } else if (smart_branch_opcode) { + ir_END_list(true_inputs); + } else { + jit_set_Z_TYPE_INFO(jit, res_addr, IS_TRUE); + ir_END_list(end_inputs); + } + + ir_IF_TRUE_cold(if_set2); + + ref2 = jit_EG(zend_constants); + ref = ir_SHR_A(ref, ir_CONST_ADDR(1)); + if (sizeof(void*) == 8) { + ref = ir_TRUNC_U32(ref); + } + ref2 = ir_EQ(ref, ir_LOAD_U32(ir_ADD_OFFSET(ir_LOAD_A(ref2), offsetof(HashTable, nNumOfElements)))); + ref2 = ir_IF(ref2); + ir_IF_TRUE(ref2); + + if (exit_addr) { + if (smart_branch_opcode == ZEND_JMPZ) { + jit_SIDE_EXIT(jit, ir_CONST_ADDR(exit_addr)); + } else { + ir_END_list(end_inputs); + } + } else if (smart_branch_opcode) { + ir_END_list(false_inputs); + } else { + jit_set_Z_TYPE_INFO(jit, res_addr, IS_FALSE); + ir_END_list(end_inputs); + } + + ir_IF_FALSE(ref2); + ir_MERGE_2(if_zero, ir_END()); + + jit_SET_EX_OPLINE(jit, opline); + ref2 = ir_NE(ir_CALL_1(IR_ADDR, ir_CONST_FC_FUNC(zend_jit_check_constant), ir_CONST_ADDR(zv)), IR_NULL); + if (exit_addr) { + if (smart_branch_opcode == ZEND_JMPZ) { + ir_GUARD(ref2, ir_CONST_ADDR(exit_addr)); + } else { + ir_GUARD_NOT(ref2, ir_CONST_ADDR(exit_addr)); + } + ir_END_list(end_inputs); + } else if (smart_branch_opcode) { + ref2 = ir_IF(ref2); + ir_IF_TRUE(ref2); + ir_END_list(true_inputs); + ir_IF_FALSE(ref2); + ir_END_list(false_inputs); + } else { + jit_set_Z_TYPE_INFO_ref(jit, jit_ZVAL_ADDR(jit, res_addr), + ir_ADD_U32(ir_ZEXT_U32(ref2), ir_CONST_U32(IS_FALSE))); + ir_END_list(end_inputs); + } + + if (!smart_branch_opcode || exit_addr) { + if (end_inputs) { + ir_MERGE_list(end_inputs); + } + } else { + _zend_jit_merge_smart_branch_inputs(jit, defined_label, undefined_label, true_inputs, false_inputs); + } + + return 1; +} + +static int zend_jit_escape_if_undef(zend_jit_ctx *jit, int var, uint32_t flags, const zend_op *opline, int8_t reg) +{ + zend_jit_addr reg_addr = ZEND_ADDR_REF_ZVAL(zend_jit_deopt_rload(jit, IR_ADDR, reg)); + ir_ref if_def = ir_IF(jit_Z_TYPE(jit, reg_addr)); + + ir_IF_FALSE_cold(if_def); + + if (flags & ZEND_JIT_EXIT_RESTORE_CALL) { + if (!zend_jit_save_call_chain(jit, -1)) { + return 0; + } + } + + if ((opline-1)->opcode != ZEND_FETCH_CONSTANT + && (opline-1)->opcode != ZEND_FETCH_LIST_R + && ((opline-1)->op1_type & (IS_VAR|IS_TMP_VAR)) + && !(flags & ZEND_JIT_EXIT_FREE_OP1)) { + zend_jit_addr val_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, (opline-1)->op1.var); + + zend_jit_zval_try_addref(jit, val_addr); + } + + jit_LOAD_IP_ADDR(jit, opline - 1); + ir_IJMP(jit_STUB_ADDR(jit, jit_stub_trace_escape)); + + ir_IF_TRUE(if_def); + + return 1; +} + +static int zend_jit_restore_zval(zend_jit_ctx *jit, int var, int8_t reg) +{ + zend_jit_addr var_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, var); + zend_jit_addr reg_addr = ZEND_ADDR_REF_ZVAL(zend_jit_deopt_rload(jit, IR_ADDR, reg)); + + // JIT: ZVAL_COPY_OR_DUP(EX_VAR(opline->result.var), &c->value); (no dup) + jit_ZVAL_COPY(jit, var_addr, MAY_BE_ANY, reg_addr, MAY_BE_ANY, 1); + return 1; +} + +static zend_jit_addr zend_jit_guard_fetch_result_type(zend_jit_ctx *jit, + const zend_op *opline, + zend_jit_addr val_addr, + uint8_t type, + bool deref, + uint32_t flags, + bool op1_avoid_refcounting) +{ + zend_jit_trace_stack *stack = JIT_G(current_frame)->stack; + int32_t exit_point; + const void *res_exit_addr = NULL; + ir_ref end1 = IR_UNUSED, ref1 = IR_UNUSED; + ir_ref ref = jit_ZVAL_ADDR(jit, val_addr); + uint32_t old_op1_info = 0; + uint32_t old_info; + ir_ref old_ref; + + + if (opline->op1_type & (IS_VAR|IS_TMP_VAR|IS_CV)) { + old_op1_info = STACK_INFO(stack, EX_VAR_TO_NUM(opline->op1.var)); + if (op1_avoid_refcounting + || ((opline->op1_type & (IS_VAR|IS_TMP_VAR)) + && STACK_FLAGS(stack, EX_VAR_TO_NUM(opline->op1.var)) & (ZREG_ZVAL_ADDREF|ZREG_THIS))) { + SET_STACK_REG(stack, EX_VAR_TO_NUM(opline->op1.var), ZREG_NONE); + } + } + old_info = STACK_INFO(stack, EX_VAR_TO_NUM(opline->result.var)); + old_ref = STACK_REF(stack, EX_VAR_TO_NUM(opline->result.var)); + CLEAR_STACK_REF(stack, EX_VAR_TO_NUM(opline->result.var)); + SET_STACK_TYPE(stack, EX_VAR_TO_NUM(opline->result.var), IS_UNKNOWN, 1); + + if (deref) { + ir_ref if_type = jit_if_Z_TYPE(jit, val_addr, type); + + ir_IF_TRUE(if_type); + end1 = ir_END(); + ref1 = ref; + ir_IF_FALSE_cold(if_type); + + SET_STACK_REF_EX(stack, EX_VAR_TO_NUM(opline->result.var), ref, ZREG_ZVAL_COPY); + exit_point = zend_jit_trace_get_exit_point(opline+1, flags); + res_exit_addr = zend_jit_trace_get_exit_addr(exit_point); + if (!res_exit_addr) { + return 0; + } + + jit_guard_Z_TYPE(jit, val_addr, IS_REFERENCE, res_exit_addr); + ref = ir_ADD_OFFSET(jit_Z_PTR(jit, val_addr), offsetof(zend_reference, val)); + val_addr = ZEND_ADDR_REF_ZVAL(ref); + } + + SET_STACK_REF_EX(stack, EX_VAR_TO_NUM(opline->result.var), ref, ZREG_ZVAL_COPY); + exit_point = zend_jit_trace_get_exit_point(opline+1, flags); + res_exit_addr = zend_jit_trace_get_exit_addr(exit_point); + if (!res_exit_addr) { + return 0; + } + + jit_guard_Z_TYPE(jit, val_addr, type, res_exit_addr); + + if (deref) { + ir_MERGE_WITH(end1); + ref = ir_PHI_2(IR_ADDR, ref, ref1); + } + + val_addr = ZEND_ADDR_REF_ZVAL(ref); + + SET_STACK_REF(stack, EX_VAR_TO_NUM(opline->result.var), old_ref); + SET_STACK_INFO(stack, EX_VAR_TO_NUM(opline->result.var), old_info); + if (opline->op1_type & (IS_VAR|IS_TMP_VAR|IS_CV)) { + SET_STACK_INFO(stack, EX_VAR_TO_NUM(opline->op1.var), old_op1_info); + } + + return val_addr; +} + +static int zend_jit_fetch_constant(zend_jit_ctx *jit, + const zend_op *opline, + const zend_op_array *op_array, + zend_ssa *ssa, + const zend_ssa_op *ssa_op, + zend_jit_addr res_addr) +{ + zval *zv = RT_CONSTANT(opline, opline->op2) + 1; + uint32_t res_info = RES_INFO(); + ir_ref ref, ref2, if_set, if_special, not_set_path, special_path, fast_path; + + // JIT: c = CACHED_PTR(opline->extended_value); + ref = ir_LOAD_A(ir_ADD_OFFSET(ir_LOAD_A(jit_EX(run_time_cache)), opline->extended_value)); + + // JIT: if (c != NULL) + if_set = ir_IF(ref); + + if (!zend_jit_is_persistent_constant(zv, opline->op1.num)) { + // JIT: if (!IS_SPECIAL_CACHE_VAL(c)) + ir_IF_FALSE_cold(if_set); + not_set_path = ir_END(); + ir_IF_TRUE(if_set); + if_special = ir_IF(ir_AND_A(ref, ir_CONST_ADDR(CACHE_SPECIAL))); + ir_IF_TRUE_cold(if_special); + special_path = ir_END(); + ir_IF_FALSE(if_special); + fast_path = ir_END(); + ir_MERGE_2(not_set_path, special_path); + } else { + ir_IF_TRUE(if_set); + fast_path = ir_END(); + ir_IF_FALSE_cold(if_set); + } + + // JIT: zend_jit_get_constant(RT_CONSTANT(opline, opline->op2) + 1, opline->op1.num); + jit_SET_EX_OPLINE(jit, opline); + ref2 = ir_CALL_2(IR_ADDR, ir_CONST_FC_FUNC(zend_jit_get_constant), + ir_CONST_ADDR(zv), + ir_CONST_U32(opline->op1.num)); + ir_GUARD(ref2, jit_STUB_ADDR(jit, jit_stub_exception_handler)); + + ir_MERGE_WITH(fast_path); + ref = ir_PHI_2(IR_ADDR, ref2, ref); + + if ((res_info & MAY_BE_GUARD) && JIT_G(current_frame)) { + uint8_t type = concrete_type(res_info); + zend_jit_addr const_addr = ZEND_ADDR_REF_ZVAL(ref); + + const_addr = zend_jit_guard_fetch_result_type(jit, opline, const_addr, type, 0, 0, 0); + if (!const_addr) { + return 0; + } + + res_info &= ~MAY_BE_GUARD; + ssa->var_info[ssa_op->result_def].type &= ~MAY_BE_GUARD; + + // JIT: ZVAL_COPY_OR_DUP(EX_VAR(opline->result.var), &c->value); (no dup) + jit_ZVAL_COPY(jit, res_addr, MAY_BE_ANY, const_addr, res_info, 1); + if (!zend_jit_store_var_if_necessary(jit, opline->result.var, res_addr, res_info)) { + return 0; + } + } else { + ir_ref const_addr = ZEND_ADDR_REF_ZVAL(ref); + + // JIT: ZVAL_COPY_OR_DUP(EX_VAR(opline->result.var), &c->value); (no dup) + jit_ZVAL_COPY(jit, res_addr, MAY_BE_ANY, const_addr, MAY_BE_ANY, 1); + } + + + return 1; +} + +static int zend_jit_type_check(zend_jit_ctx *jit, const zend_op *opline, uint32_t op1_info, uint8_t smart_branch_opcode, uint32_t target_label, uint32_t target_label2, const void *exit_addr) +{ + uint32_t mask; + zend_jit_addr op1_addr = OP1_ADDR(); + zend_jit_addr res_addr = 0; + uint32_t true_label = -1, false_label = -1; + ir_ref end_inputs = IR_UNUSED, true_inputs = IR_UNUSED, false_inputs = IR_UNUSED; + + // TODO: support for is_resource() ??? + ZEND_ASSERT(opline->extended_value != MAY_BE_RESOURCE); + + if (smart_branch_opcode && !exit_addr) { + if (smart_branch_opcode == ZEND_JMPZ) { + true_label = target_label2; + false_label = target_label; + } else if (smart_branch_opcode == ZEND_JMPNZ) { + true_label = target_label; + false_label = target_label2; + } else { + ZEND_UNREACHABLE(); + } + } else { + res_addr = RES_ADDR(); + } + + if (op1_info & MAY_BE_UNDEF) { + ir_ref if_def = IR_UNUSED; + + if (op1_info & (MAY_BE_ANY|MAY_BE_REF)) { + if_def = jit_if_not_Z_TYPE(jit, op1_addr, IS_UNDEF); + ir_IF_FALSE_cold(if_def); + } + + jit_SET_EX_OPLINE(jit, opline); + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_jit_undefined_op_helper), ir_CONST_U32(opline->op1.var)); + zend_jit_check_exception_undef_result(jit, opline); + if (opline->extended_value & MAY_BE_NULL) { + if (exit_addr) { + if (smart_branch_opcode == ZEND_JMPNZ) { + jit_SIDE_EXIT(jit, ir_CONST_ADDR(exit_addr)); + } else { + ir_END_list(end_inputs); + } + } else if (smart_branch_opcode) { + ir_END_list(true_inputs); + } else { + jit_set_Z_TYPE_INFO(jit, res_addr, IS_TRUE); + ir_END_list(end_inputs); + } + } else { + if (exit_addr) { + if (smart_branch_opcode == ZEND_JMPZ) { + jit_SIDE_EXIT(jit, ir_CONST_ADDR(exit_addr)); + } else { + ir_END_list(end_inputs); + } + } else if (smart_branch_opcode) { + ir_END_list(false_inputs); + } else { + jit_set_Z_TYPE_INFO(jit, res_addr, IS_FALSE); + if (if_def) { + ir_END_list(end_inputs); + } + } + } + + if (if_def) { + ir_IF_TRUE(if_def); + op1_info |= MAY_BE_NULL; + } + } + + if (op1_info & (MAY_BE_ANY|MAY_BE_REF)) { + mask = opline->extended_value; + if (!(op1_info & MAY_BE_GUARD) && !(op1_info & (MAY_BE_ANY - mask))) { + jit_FREE_OP(jit, opline->op1_type, opline->op1, op1_info, opline); + if (exit_addr) { + if (smart_branch_opcode == ZEND_JMPNZ) { + jit_SIDE_EXIT(jit, ir_CONST_ADDR(exit_addr)); + } else if (end_inputs) { + ir_END_list(end_inputs); + } + } else if (smart_branch_opcode) { + ir_END_list(true_inputs); + } else { + jit_set_Z_TYPE_INFO(jit, res_addr, IS_TRUE); + ir_END_list(end_inputs); + } + } else if (!(op1_info & MAY_BE_GUARD) && !(op1_info & mask)) { + jit_FREE_OP(jit, opline->op1_type, opline->op1, op1_info, opline); + if (exit_addr) { + if (smart_branch_opcode == ZEND_JMPZ) { + jit_SIDE_EXIT(jit, ir_CONST_ADDR(exit_addr)); + } else if (end_inputs) { + ir_END_list(end_inputs); + } + } else if (smart_branch_opcode) { + ir_END_list(false_inputs); + } else { + jit_set_Z_TYPE_INFO(jit, res_addr, IS_FALSE); + ir_END_list(end_inputs); + } + } else { + ir_ref ref; + bool invert = 0; + uint8_t type; + + switch (mask) { + case MAY_BE_NULL: type = IS_NULL; break; + case MAY_BE_FALSE: type = IS_FALSE; break; + case MAY_BE_TRUE: type = IS_TRUE; break; + case MAY_BE_LONG: type = IS_LONG; break; + case MAY_BE_DOUBLE: type = IS_DOUBLE; break; + case MAY_BE_STRING: type = IS_STRING; break; + case MAY_BE_ARRAY: type = IS_ARRAY; break; + case MAY_BE_OBJECT: type = IS_OBJECT; break; + case MAY_BE_ANY - MAY_BE_NULL: type = IS_NULL; invert = 1; break; + case MAY_BE_ANY - MAY_BE_FALSE: type = IS_FALSE; invert = 1; break; + case MAY_BE_ANY - MAY_BE_TRUE: type = IS_TRUE; invert = 1; break; + case MAY_BE_ANY - MAY_BE_LONG: type = IS_LONG; invert = 1; break; + case MAY_BE_ANY - MAY_BE_DOUBLE: type = IS_DOUBLE; invert = 1; break; + case MAY_BE_ANY - MAY_BE_STRING: type = IS_STRING; invert = 1; break; + case MAY_BE_ANY - MAY_BE_ARRAY: type = IS_ARRAY; invert = 1; break; + case MAY_BE_ANY - MAY_BE_OBJECT: type = IS_OBJECT; invert = 1; break; + case MAY_BE_ANY - MAY_BE_RESOURCE: type = IS_OBJECT; invert = 1; break; + default: + type = 0; + } + + if (op1_info & MAY_BE_REF) { + ir_ref ref = jit_ZVAL_ADDR(jit, op1_addr); + ref = jit_ZVAL_DEREF_ref(jit, ref); + op1_addr = ZEND_ADDR_REF_ZVAL(ref); + } + if (type == 0) { + ref = ir_AND_U32(ir_SHL_U32(ir_CONST_U32(1), jit_Z_TYPE(jit, op1_addr)), ir_CONST_U32(mask)); + if (!smart_branch_opcode) { + ref = ir_NE(ref, ir_CONST_U32(0)); + } + } else if (invert) { + ref = ir_NE(jit_Z_TYPE(jit, op1_addr), ir_CONST_U8(type)); + } else { + ref = ir_EQ(jit_Z_TYPE(jit, op1_addr), ir_CONST_U8(type)); + } + + jit_FREE_OP(jit, opline->op1_type, opline->op1, op1_info, opline); + + if (exit_addr) { + if (smart_branch_opcode == ZEND_JMPZ) { + ir_GUARD(ref, ir_CONST_ADDR(exit_addr)); + } else { + ir_GUARD_NOT(ref, ir_CONST_ADDR(exit_addr)); + } + if (end_inputs) { + ir_END_list(end_inputs); + } + } else if (smart_branch_opcode) { + ir_ref if_val = ir_IF(ref); + ir_IF_TRUE(if_val); + ir_END_list(true_inputs); + ir_IF_FALSE(if_val); + ir_END_list(false_inputs); + } else { + jit_set_Z_TYPE_INFO_ref(jit, jit_ZVAL_ADDR(jit, res_addr), + ir_ADD_U32(ir_ZEXT_U32(ref), ir_CONST_U32(IS_FALSE))); + ir_END_list(end_inputs); + } + } + } + + if (!smart_branch_opcode || exit_addr) { + if (end_inputs) { + ir_MERGE_list(end_inputs); + } + } else { + _zend_jit_merge_smart_branch_inputs(jit, true_label, false_label, true_inputs, false_inputs); + } + + return 1; +} + +static int zend_jit_isset_isempty_cv(zend_jit_ctx *jit, const zend_op *opline, uint32_t op1_info, zend_jit_addr op1_addr, uint8_t smart_branch_opcode, uint32_t target_label, uint32_t target_label2, const void *exit_addr) +{ + zend_jit_addr res_addr = RES_ADDR(); + uint32_t true_label = -1, false_label = -1; + ir_ref end_inputs = IR_UNUSED, true_inputs = IR_UNUSED, false_inputs = IR_UNUSED; + + // TODO: support for empty() ??? + ZEND_ASSERT(opline->extended_value != MAY_BE_RESOURCE); + + if (smart_branch_opcode && !exit_addr) { + if (smart_branch_opcode == ZEND_JMPZ) { + true_label = target_label2; + false_label = target_label; + } else if (smart_branch_opcode == ZEND_JMPNZ) { + true_label = target_label; + false_label = target_label2; + } else { + ZEND_UNREACHABLE(); + } + } else { + res_addr = RES_ADDR(); + } + + if (op1_info & MAY_BE_REF) { + ir_ref ref = jit_ZVAL_ADDR(jit, op1_addr); + ref = jit_ZVAL_DEREF_ref(jit, ref); + op1_addr = ZEND_ADDR_REF_ZVAL(ref); + } + + if (!(op1_info & (MAY_BE_UNDEF|MAY_BE_NULL))) { + if (exit_addr) { + ZEND_ASSERT(smart_branch_opcode == ZEND_JMPZ); + } else if (smart_branch_opcode) { + ir_END_list(true_inputs); + } else { + jit_set_Z_TYPE_INFO(jit, res_addr, IS_TRUE); + ir_END_list(end_inputs); + } + } else if (!(op1_info & (MAY_BE_ANY - MAY_BE_NULL))) { + if (exit_addr) { + ZEND_ASSERT(smart_branch_opcode == ZEND_JMPNZ); + } else if (smart_branch_opcode) { + ir_END_list(false_inputs); + } else { + jit_set_Z_TYPE_INFO(jit, res_addr, IS_FALSE); + ir_END_list(end_inputs); + } + } else { + ir_ref ref = ir_GT(jit_Z_TYPE(jit, op1_addr), ir_CONST_U8(IS_NULL)); + if (exit_addr) { + if (smart_branch_opcode == ZEND_JMPNZ) { + ir_GUARD_NOT(ref, ir_CONST_ADDR(exit_addr)); + } else { + ir_GUARD(ref, ir_CONST_ADDR(exit_addr)); + } + } else if (smart_branch_opcode) { + ir_ref if_val = ir_IF(ref); + ir_IF_TRUE(if_val); + ir_END_list(true_inputs); + ir_IF_FALSE(if_val); + ir_END_list(false_inputs); + } else { + jit_set_Z_TYPE_INFO_ref(jit, jit_ZVAL_ADDR(jit, res_addr), + ir_ADD_U32(ir_ZEXT_U32(ref), ir_CONST_U32(IS_FALSE))); + ir_END_list(end_inputs); + } + } + + if (!smart_branch_opcode || exit_addr) { + if (end_inputs) { + ir_MERGE_list(end_inputs); + } + } else { + _zend_jit_merge_smart_branch_inputs(jit, true_label, false_label, true_inputs, false_inputs); + } + + return 1; +} + +/* copy of hidden zend_closure */ +typedef struct _zend_closure { + zend_object std; + zend_function func; + zval this_ptr; + zend_class_entry *called_scope; + zif_handler orig_internal_handler; +} zend_closure; + +static int zend_jit_stack_check(zend_jit_ctx *jit, const zend_op *opline, uint32_t used_stack) +{ + int32_t exit_point = zend_jit_trace_get_exit_point(opline, ZEND_JIT_EXIT_TO_VM); + const void *exit_addr = zend_jit_trace_get_exit_addr(exit_point); + + if (!exit_addr) { + return 0; + } + + // JIT: if (EG(vm_stack_end) - EG(vm_stack_top) < used_stack) + ir_GUARD( + ir_UGE( + ir_SUB_A(ir_LOAD_A(jit_EG(vm_stack_end)), ir_LOAD_A(jit_EG(vm_stack_top))), + ir_CONST_ADDR(used_stack)), + ir_CONST_ADDR(exit_addr)); + + return 1; +} + +static int zend_jit_free_trampoline(zend_jit_ctx *jit, int8_t func_reg) +{ + // JIT: if (UNEXPECTED(func->common.fn_flags & ZEND_ACC_CALL_VIA_TRAMPOLINE)) + ir_ref func = ir_RLOAD_A(func_reg); + ir_ref if_trampoline = ir_IF(ir_AND_U32( + ir_LOAD_U32(ir_ADD_OFFSET(func, offsetof(zend_function, common.fn_flags))), + ir_CONST_U32(ZEND_ACC_CALL_VIA_TRAMPOLINE))); + + ir_IF_TRUE(if_trampoline); + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_jit_free_trampoline_helper), func); + ir_MERGE_WITH_EMPTY_FALSE(if_trampoline); + + return 1; +} + +static int zend_jit_push_call_frame(zend_jit_ctx *jit, const zend_op *opline, const zend_op_array *op_array, zend_function *func, bool is_closure, bool delayed_fetch_this, int checked_stack, ir_ref func_ref, ir_ref this_ref) +{ + uint32_t used_stack; + ir_ref used_stack_ref = IR_UNUSED; + bool stack_check = 1; + ir_ref rx, ref, top, if_enough_stack, cold_path = IR_UNUSED; + + ZEND_ASSERT(func_ref != IR_NULL); + if (func) { + used_stack = zend_vm_calc_used_stack(opline->extended_value, func); + if ((int)used_stack <= checked_stack) { + stack_check = 0; + } + used_stack_ref = ir_CONST_ADDR(used_stack); + } else { + ir_ref num_args_ref; + ir_ref if_internal_func = IR_UNUSED; + + used_stack = (ZEND_CALL_FRAME_SLOT + opline->extended_value + ZEND_OBSERVER_ENABLED) * sizeof(zval); + used_stack_ref = ir_CONST_ADDR(used_stack); + + if (!is_closure) { + used_stack_ref = ir_HARD_COPY_A(used_stack_ref); /* load constant once */ + + // JIT: if (EXPECTED(ZEND_USER_CODE(func->type))) { + ir_ref tmp = ir_LOAD_U8(ir_ADD_OFFSET(func_ref, offsetof(zend_function, type))); + if_internal_func = ir_IF(ir_AND_U8(tmp, ir_CONST_U8(1))); + ir_IF_FALSE(if_internal_func); + } + + // JIT: used_stack += (func->op_array.last_var + func->op_array.T - MIN(func->op_array.num_args, num_args)) * sizeof(zval); + num_args_ref = ir_CONST_U32(opline->extended_value); + if (!is_closure) { + ref = ir_SUB_U32( + ir_SUB_U32( + ir_MIN_U32( + num_args_ref, + ir_LOAD_U32(ir_ADD_OFFSET(func_ref, offsetof(zend_function, op_array.num_args)))), + ir_LOAD_U32(ir_ADD_OFFSET(func_ref, offsetof(zend_function, op_array.last_var)))), + ir_LOAD_U32(ir_ADD_OFFSET(func_ref, offsetof(zend_function, op_array.T)))); + } else { + ref = ir_SUB_U32( + ir_SUB_U32( + ir_MIN_U32( + num_args_ref, + ir_LOAD_U32(ir_ADD_OFFSET(func_ref, offsetof(zend_closure, func.op_array.num_args)))), + ir_LOAD_U32(ir_ADD_OFFSET(func_ref, offsetof(zend_closure, func.op_array.last_var)))), + ir_LOAD_U32(ir_ADD_OFFSET(func_ref, offsetof(zend_closure, func.op_array.T)))); + } + ref = ir_MUL_U32(ref, ir_CONST_U32(sizeof(zval))); + if (sizeof(void*) == 8) { + ref = ir_SEXT_A(ref); + } + ref = ir_SUB_A(used_stack_ref, ref); + + if (is_closure) { + used_stack_ref = ref; + } else { + ir_MERGE_WITH_EMPTY_TRUE(if_internal_func); + used_stack_ref = ir_PHI_2(IR_ADDR, ref, used_stack_ref); + } + } + + zend_jit_start_reuse_ip(jit); + + // JIT: if (UNEXPECTED(used_stack > (size_t)(((char*)EG(vm_stack_end)) - (char*)call))) { + jit_STORE_IP(jit, ir_LOAD_A(jit_EG(vm_stack_top))); + + if (stack_check) { + // JIT: Check Stack Overflow + ref = ir_UGE( + ir_SUB_A( + ir_LOAD_A(jit_EG(vm_stack_end)), + jit_IP(jit)), + used_stack_ref); + + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE) { + int32_t exit_point = zend_jit_trace_get_exit_point(opline, ZEND_JIT_EXIT_TO_VM); + const void *exit_addr = zend_jit_trace_get_exit_addr(exit_point); + + if (!exit_addr) { + return 0; + } + + ir_GUARD(ref, ir_CONST_ADDR(exit_addr)); + } else { + if_enough_stack = ir_IF(ref); + ir_IF_FALSE_cold(if_enough_stack); + +#ifdef _WIN32 + if (0) { +#else + if (opline->opcode == ZEND_INIT_FCALL && func && func->type == ZEND_INTERNAL_FUNCTION) { +#endif + jit_SET_EX_OPLINE(jit, opline); + ref = ir_CALL_1(IR_ADDR, ir_CONST_FC_FUNC(zend_jit_int_extend_stack_helper), used_stack_ref); + } else { + if (!is_closure) { + ref = func_ref; + } else { + ref = ir_ADD_OFFSET(func_ref, offsetof(zend_closure, func)); + } + jit_SET_EX_OPLINE(jit, opline); + ref = ir_CALL_2(IR_ADDR, ir_CONST_FC_FUNC(zend_jit_extend_stack_helper), + used_stack_ref, ref); + } + jit_STORE_IP(jit, ref); + + cold_path = ir_END(); + ir_IF_TRUE(if_enough_stack); + } + } + + ref = jit_EG(vm_stack_top); + rx = jit_IP(jit); +#if !OPTIMIZE_FOR_SIZE + /* JIT: EG(vm_stack_top) = (zval*)((char*)call + used_stack); + * This vesions is longer but faster + * mov EG(vm_stack_top), %CALL + * lea size(%call), %tmp + * mov %tmp, EG(vm_stack_top) + */ + top = rx; +#else + /* JIT: EG(vm_stack_top) += used_stack; + * Use ir_emit() because ir_LOAD() makes load forwarding and doesn't allow load/store fusion + * mov EG(vm_stack_top), %CALL + * add $size, EG(vm_stack_top) + */ + top = jit->ctx.control = ir_emit2(&jit->ctx, IR_OPT(IR_LOAD, IR_ADDR), jit->ctx.control, ref); +#endif + ir_STORE(ref, ir_ADD_A(top, used_stack_ref)); + + // JIT: zend_vm_init_call_frame(call, call_info, func, num_args, called_scope, object); + if (JIT_G(trigger) != ZEND_JIT_ON_HOT_TRACE || opline->opcode != ZEND_INIT_METHOD_CALL) { + // JIT: ZEND_SET_CALL_INFO(call, 0, call_info); + ir_STORE(jit_CALL(rx, This.u1.type_info), ir_CONST_U32(IS_UNDEF | ZEND_CALL_NESTED_FUNCTION)); + } +#ifdef _WIN32 + if (0) { +#else + if (opline->opcode == ZEND_INIT_FCALL && func && func->type == ZEND_INTERNAL_FUNCTION) { +#endif + if (cold_path) { + ir_MERGE_WITH(cold_path); + rx = jit_IP(jit); + } + + // JIT: call->func = func; + ir_STORE(jit_CALL(rx, func), func_ref); + } else { + if (!is_closure) { + // JIT: call->func = func; + ir_STORE(jit_CALL(rx, func), func_ref); + } else { + // JIT: call->func = &closure->func; + ir_STORE(jit_CALL(rx, func), ir_ADD_OFFSET(func_ref, offsetof(zend_closure, func))); + } + if (cold_path) { + ir_MERGE_WITH(cold_path); + rx = jit_IP(jit); + } + } + if (opline->opcode == ZEND_INIT_METHOD_CALL) { + // JIT: Z_PTR(call->This) = obj; + ZEND_ASSERT(this_ref != IR_NULL); + ir_STORE(jit_CALL(rx, This.value.ptr), this_ref); + if (opline->op1_type == IS_UNUSED || delayed_fetch_this) { + // JIT: call->call_info |= ZEND_CALL_HAS_THIS; + ref = jit_CALL(rx, This.u1.type_info); + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE) { + ir_STORE(ref, ir_CONST_U32( ZEND_CALL_HAS_THIS)); + } else { + ir_STORE(ref, ir_OR_U32(ir_LOAD_U32(ref), ir_CONST_U32(ZEND_CALL_HAS_THIS))); + } + } else { + if (opline->op1_type == IS_CV) { + // JIT: GC_ADDREF(obj); + jit_GC_ADDREF(jit, this_ref); + } + + // JIT: call->call_info |= ZEND_CALL_HAS_THIS | ZEND_CALL_RELEASE_THIS; + ref = jit_CALL(rx, This.u1.type_info); + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE) { + ir_STORE(ref, ir_CONST_U32( ZEND_CALL_HAS_THIS | ZEND_CALL_RELEASE_THIS)); + } else { + ir_STORE(ref, + ir_OR_U32(ir_LOAD_U32(ref), + ir_CONST_U32(ZEND_CALL_HAS_THIS | ZEND_CALL_RELEASE_THIS))); + } + } + } else if (!is_closure) { + // JIT: Z_CE(call->This) = called_scope; + ir_STORE(jit_CALL(rx, This), IR_NULL); + } else { + ir_ref object_or_called_scope, call_info, call_info2, object, if_cond; + + if (opline->op2_type == IS_CV) { + // JIT: GC_ADDREF(closure); + jit_GC_ADDREF(jit, func_ref); + } + + // JIT: RX(object_or_called_scope) = closure->called_scope; + object_or_called_scope = ir_LOAD_A(ir_ADD_OFFSET(func_ref, offsetof(zend_closure, called_scope))); + + // JIT: call_info = ZEND_CALL_NESTED_FUNCTION | ZEND_CALL_DYNAMIC | ZEND_CALL_CLOSURE | + // (closure->func->common.fn_flags & ZEND_ACC_FAKE_CLOSURE); + call_info = ir_OR_U32( + ir_AND_U32( + ir_LOAD_U32(ir_ADD_OFFSET(func_ref, offsetof(zend_closure, func.common.fn_flags))), + ir_CONST_U32(ZEND_ACC_FAKE_CLOSURE)), + ir_CONST_U32(ZEND_CALL_NESTED_FUNCTION | ZEND_CALL_DYNAMIC | ZEND_CALL_CLOSURE)); + // JIT: if (Z_TYPE(closure->this_ptr) != IS_UNDEF) { + if_cond = ir_IF(ir_LOAD_U8(ir_ADD_OFFSET(func_ref, offsetof(zend_closure, this_ptr.u1.v.type)))); + ir_IF_TRUE(if_cond); + + // JIT: call_info |= ZEND_CALL_HAS_THIS; + call_info2 = ir_OR_U32(call_info, ir_CONST_U32(ZEND_CALL_HAS_THIS)); + + // JIT: object_or_called_scope = Z_OBJ(closure->this_ptr); + object = ir_LOAD_A(ir_ADD_OFFSET(func_ref, offsetof(zend_closure, this_ptr.value.ptr))); + + ir_MERGE_WITH_EMPTY_FALSE(if_cond); + call_info = ir_PHI_2(IR_U32, call_info2, call_info); + object_or_called_scope = ir_PHI_2(IR_ADDR, object, object_or_called_scope); + + // JIT: ZEND_SET_CALL_INFO(call, 0, call_info); + ref = jit_CALL(rx, This.u1.type_info); + ir_STORE(ref, ir_OR_U32(ir_LOAD_U32(ref), call_info)); + + // JIT: Z_PTR(call->This) = object_or_called_scope; + ir_STORE(jit_CALL(rx, This.value.ptr), object_or_called_scope); + + // JIT: if (closure->func.op_array.run_time_cache__ptr) + if_cond = ir_IF(ir_LOAD_A(ir_ADD_OFFSET(func_ref, offsetof(zend_closure, func.op_array.run_time_cache__ptr)))); + ir_IF_FALSE(if_cond); + + // JIT: zend_jit_init_func_run_time_cache_helper(closure->func); + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_jit_init_func_run_time_cache_helper), + ir_ADD_OFFSET(func_ref, offsetof(zend_closure, func))); + + ir_MERGE_WITH_EMPTY_TRUE(if_cond); + } + + // JIT: ZEND_CALL_NUM_ARGS(call) = num_args; + ir_STORE(jit_CALL(rx, This.u2.num_args), ir_CONST_U32(opline->extended_value)); + + return 1; +} + +static int zend_jit_init_fcall_guard(zend_jit_ctx *jit, uint32_t level, const zend_function *func, const zend_op *to_opline) +{ + int32_t exit_point; + const void *exit_addr; + ir_ref call; + + if (func->type == ZEND_INTERNAL_FUNCTION) { +#ifdef ZEND_WIN32 + // TODO: ASLR may cause different addresses in different workers ??? + return 0; +#endif + } else if (func->type == ZEND_USER_FUNCTION) { + if (!zend_accel_in_shm(func->op_array.opcodes)) { + /* op_array and op_array->opcodes are not persistent. We can't link. */ + return 0; + } + } else { + ZEND_UNREACHABLE(); + return 0; + } + + exit_point = zend_jit_trace_get_exit_point(to_opline, ZEND_JIT_EXIT_POLYMORPHISM); + exit_addr = zend_jit_trace_get_exit_addr(exit_point); + if (!exit_addr) { + return 0; + } + + // call = EX(call); + call = ir_LOAD_A(jit_EX(call)); + while (level > 0) { + // call = call->prev_execute_data + call = ir_LOAD_A(jit_CALL(call, prev_execute_data)); + level--; + } + + if (func->type == ZEND_USER_FUNCTION && + (!(func->common.fn_flags & ZEND_ACC_IMMUTABLE) || + (func->common.fn_flags & ZEND_ACC_CLOSURE) || + !func->common.function_name)) { + const zend_op *opcodes = func->op_array.opcodes; + + // JIT: if (call->func.op_array.opcodes != opcodes) goto exit_addr; + ir_GUARD( + ir_EQ( + ir_LOAD_A(ir_ADD_OFFSET(ir_LOAD_A(jit_CALL(call, func)), offsetof(zend_op_array, opcodes))), + ir_CONST_ADDR(opcodes)), + ir_CONST_ADDR(exit_addr)); + } else { + // JIT: if (call->func != func) goto exit_addr; + ir_GUARD(ir_EQ(ir_LOAD_A(jit_CALL(call, func)), ir_CONST_ADDR(func)), ir_CONST_ADDR(exit_addr)); + } + + return 1; +} + +static int zend_jit_init_fcall(zend_jit_ctx *jit, const zend_op *opline, uint32_t b, const zend_op_array *op_array, zend_ssa *ssa, const zend_ssa_op *ssa_op, int call_level, zend_jit_trace_rec *trace, int checked_stack) +{ + zend_func_info *info = ZEND_FUNC_INFO(op_array); + zend_call_info *call_info = NULL; + zend_function *func = NULL; + ir_ref func_ref = IR_UNUSED; + + if (jit->delayed_call_level) { + if (!zend_jit_save_call_chain(jit, jit->delayed_call_level)) { + return 0; + } + } + + if (info) { + call_info = info->callee_info; + while (call_info && call_info->caller_init_opline != opline) { + call_info = call_info->next_callee; + } + if (call_info && call_info->callee_func && !call_info->is_prototype) { + func = call_info->callee_func; + } + } + + if (!func + && trace + && trace->op == ZEND_JIT_TRACE_INIT_CALL) { +#ifdef _WIN32 + /* ASLR */ + if (trace->func->type != ZEND_INTERNAL_FUNCTION) { + func = (zend_function*)trace->func; + } +#else + func = (zend_function*)trace->func; +#endif + } + +#ifdef _WIN32 + if (0) { +#else + if (opline->opcode == ZEND_INIT_FCALL + && func + && func->type == ZEND_INTERNAL_FUNCTION) { +#endif + /* load constant address later */ + func_ref = ir_CONST_ADDR(func); + } else if (func && op_array == &func->op_array) { + /* recursive call */ + if (!(func->op_array.fn_flags & ZEND_ACC_IMMUTABLE) + || zend_jit_prefer_const_addr_load(jit, (uintptr_t)func)) { + func_ref = ir_LOAD_A(jit_EX(func)); + } else { + func_ref = ir_CONST_ADDR(func); + } + } else { + ir_ref if_func, cache_slot_ref, ref; + + // JIT: if (CACHED_PTR(opline->result.num)) + cache_slot_ref = ir_ADD_OFFSET(ir_LOAD_A(jit_EX(run_time_cache)), opline->result.num); + func_ref = ir_LOAD_A(cache_slot_ref); + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE + && func + && (func->common.fn_flags & ZEND_ACC_IMMUTABLE) + && opline->opcode != ZEND_INIT_FCALL) { + /* Called func may be changed because of recompilation. See ext/opcache/tests/jit/init_fcall_003.phpt */ + if_func = ir_IF(ir_EQ(func_ref, ir_CONST_ADDR(func))); + } else { + if_func = ir_IF(func_ref); + } + ir_IF_FALSE_cold(if_func); + if (opline->opcode == ZEND_INIT_FCALL + && func + && func->type == ZEND_USER_FUNCTION + && (func->op_array.fn_flags & ZEND_ACC_IMMUTABLE)) { + ref = ir_HARD_COPY_A(ir_CONST_ADDR(func)); /* load constant once */ + ir_STORE(cache_slot_ref, ref); + ref = ir_CALL_1(IR_ADDR, ir_CONST_FC_FUNC(zend_jit_init_func_run_time_cache_helper), ref); + } else { + zval *zv = RT_CONSTANT(opline, opline->op2); + + if (opline->opcode == ZEND_INIT_FCALL) { + ref = ir_CALL_2(IR_ADDR, ir_CONST_FC_FUNC(zend_jit_find_func_helper), + ir_CONST_ADDR(Z_STR_P(zv)), + cache_slot_ref); + } else if (opline->opcode == ZEND_INIT_FCALL_BY_NAME) { + ref = ir_CALL_2(IR_ADDR, ir_CONST_FC_FUNC(zend_jit_find_func_helper), + ir_CONST_ADDR(Z_STR_P(zv + 1)), + cache_slot_ref); + } else if (opline->opcode == ZEND_INIT_NS_FCALL_BY_NAME) { + ref = ir_CALL_2(IR_ADDR, ir_CONST_FC_FUNC(zend_jit_find_ns_func_helper), + ir_CONST_ADDR(zv), + cache_slot_ref); + } else { + ZEND_UNREACHABLE(); + } + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE) { + int32_t exit_point = zend_jit_trace_get_exit_point(opline, + func && (func->common.fn_flags & ZEND_ACC_IMMUTABLE) ? ZEND_JIT_EXIT_INVALIDATE : 0); + const void *exit_addr = zend_jit_trace_get_exit_addr(exit_point); + + if (!exit_addr) { + return 0; + } + if (!func || opline->opcode == ZEND_INIT_FCALL) { + ir_GUARD(ref, ir_CONST_ADDR(exit_addr)); + } else if (func->type == ZEND_USER_FUNCTION + && !(func->common.fn_flags & ZEND_ACC_IMMUTABLE)) { + const zend_op *opcodes = func->op_array.opcodes; + + ir_GUARD( + ir_EQ( + ir_LOAD_A(ir_ADD_OFFSET(ref, offsetof(zend_op_array, opcodes))), + ir_CONST_ADDR(opcodes)), + ir_CONST_ADDR(exit_addr)); + } else { + ir_GUARD(ir_EQ(ref, ir_CONST_ADDR(func)), ir_CONST_ADDR(exit_addr)); + } + } else { +jit_SET_EX_OPLINE(jit, opline); + ir_GUARD(ref, jit_STUB_ADDR(jit, jit_stub_undefined_function)); + } + } + ir_MERGE_WITH_EMPTY_TRUE(if_func); + func_ref = ir_PHI_2(IR_ADDR, ref, func_ref); + } + + if (!zend_jit_push_call_frame(jit, opline, op_array, func, 0, 0, checked_stack, func_ref, IR_UNUSED)) { + return 0; + } + + if (zend_jit_needs_call_chain(call_info, b, op_array, ssa, ssa_op, opline, call_level, trace)) { + if (!zend_jit_save_call_chain(jit, call_level)) { + return 0; + } + } else { + ZEND_ASSERT(call_level > 0); + jit->delayed_call_level = call_level; + delayed_call_chain = 1; + } + + return 1; +} + +static int zend_jit_init_method_call(zend_jit_ctx *jit, + const zend_op *opline, + uint32_t b, + const zend_op_array *op_array, + zend_ssa *ssa, + const zend_ssa_op *ssa_op, + int call_level, + uint32_t op1_info, + zend_jit_addr op1_addr, + zend_class_entry *ce, + bool ce_is_instanceof, + bool on_this, + bool delayed_fetch_this, + zend_class_entry *trace_ce, + zend_jit_trace_rec *trace, + int checked_stack, + int8_t func_reg, + int8_t this_reg, + bool polymorphic_side_trace) +{ + zend_func_info *info = ZEND_FUNC_INFO(op_array); + zend_call_info *call_info = NULL; + zend_function *func = NULL; + zval *function_name; + ir_ref if_static = IR_UNUSED, cold_path, this_ref = IR_NULL, func_ref = IR_NULL; + + ZEND_ASSERT(opline->op2_type == IS_CONST); + ZEND_ASSERT(op1_info & MAY_BE_OBJECT); + + function_name = RT_CONSTANT(opline, opline->op2); + + if (info) { + call_info = info->callee_info; + while (call_info && call_info->caller_init_opline != opline) { + call_info = call_info->next_callee; + } + if (call_info && call_info->callee_func && !call_info->is_prototype) { + func = call_info->callee_func; + } + } + + if (polymorphic_side_trace) { + /* function is passed in r0 from parent_trace */ + ZEND_ASSERT(func_reg >= 0 && this_reg >= 0); + func_ref = zend_jit_deopt_rload(jit, IR_ADDR, func_reg); + this_ref = zend_jit_deopt_rload(jit, IR_ADDR, this_reg); + } else { + ir_ref ref, ref2, if_found, fast_path, run_time_cache, this_ref2; + + if (on_this) { + zend_jit_addr this_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, offsetof(zend_execute_data, This)); + this_ref = jit_Z_PTR(jit, this_addr); + } else { + if (op1_info & MAY_BE_REF) { + if (opline->op1_type == IS_CV) { + // JIT: ZVAL_DEREF(op1) + ir_ref ref = jit_ZVAL_ADDR(jit, op1_addr); + ref = jit_ZVAL_DEREF_ref(jit, ref); + op1_addr = ZEND_ADDR_REF_ZVAL(ref); + } else { + ir_ref if_ref; + + /* Hack: Convert reference to regular value to simplify JIT code */ + ZEND_ASSERT(Z_REG(op1_addr) == ZREG_FP); + + if_ref = jit_if_Z_TYPE(jit, op1_addr, IS_REFERENCE); + ir_IF_TRUE(if_ref); + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_jit_unref_helper), jit_ZVAL_ADDR(jit, op1_addr)); + + ir_MERGE_WITH_EMPTY_FALSE(if_ref); + } + } + if (op1_info & ((MAY_BE_UNDEF|MAY_BE_ANY)- MAY_BE_OBJECT)) { + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE) { + int32_t exit_point = zend_jit_trace_get_exit_point(opline, ZEND_JIT_EXIT_TO_VM); + const void *exit_addr = zend_jit_trace_get_exit_addr(exit_point); + + if (!exit_addr) { + return 0; + } + ir_GUARD(ir_EQ(jit_Z_TYPE(jit, op1_addr), ir_CONST_U8(IS_OBJECT)), + ir_CONST_ADDR(exit_addr)); + } else { + ir_ref if_object = jit_if_Z_TYPE(jit, op1_addr, IS_OBJECT); + + ir_IF_FALSE_cold(if_object); + + jit_SET_EX_OPLINE(jit, opline); + if ((opline->op1_type & (IS_VAR|IS_TMP_VAR)) && !delayed_fetch_this) { + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_jit_invalid_method_call_tmp), + jit_ZVAL_ADDR(jit, op1_addr)); + } else { + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_jit_invalid_method_call), + jit_ZVAL_ADDR(jit, op1_addr)); + } + ir_IJMP(jit_STUB_ADDR(jit, jit_stub_exception_handler)); + ir_IF_TRUE(if_object); + } + } + + this_ref = jit_Z_PTR(jit, op1_addr); + } + + if (jit->delayed_call_level) { + if (!zend_jit_save_call_chain(jit, jit->delayed_call_level)) { + return 0; + } + } + + if (func) { + // JIT: fbc = CACHED_PTR(opline->result.num + sizeof(void*)); + ref = ir_LOAD_A(ir_ADD_OFFSET(ir_LOAD_A(jit_EX(run_time_cache)), opline->result.num + sizeof(void*))); + + if_found = ir_IF(ref); + ir_IF_TRUE(if_found); + fast_path = ir_END(); + } else { + // JIT: if (CACHED_PTR(opline->result.num) == obj->ce)) { + run_time_cache = ir_LOAD_A(jit_EX(run_time_cache)); + ref = ir_EQ( + ir_LOAD_A(ir_ADD_OFFSET(run_time_cache, opline->result.num)), + ir_LOAD_A(ir_ADD_OFFSET(this_ref, offsetof(zend_object, ce)))); + if_found = ir_IF(ref); + ir_IF_TRUE(if_found); + + // JIT: fbc = CACHED_PTR(opline->result.num + sizeof(void*)); + ref = ir_LOAD_A(ir_ADD_OFFSET(run_time_cache, opline->result.num + sizeof(void*))); + fast_path = ir_END(); + + } + + ir_IF_FALSE_cold(if_found); + jit_SET_EX_OPLINE(jit, opline); + + if (!jit->ctx.fixed_call_stack_size) { + // JIT: alloca(sizeof(void*)); + this_ref2 = ir_ALLOCA(ir_CONST_ADDR(0x10)); + } else { + this_ref2 = ir_HARD_COPY_A(ir_RLOAD_A(IR_REG_SP)); + } + ir_STORE(this_ref2, this_ref); + + if ((opline->op1_type & (IS_VAR|IS_TMP_VAR)) && !delayed_fetch_this) { + ref2 = ir_CALL_3(IR_ADDR, ir_CONST_FC_FUNC(zend_jit_find_method_tmp_helper), + this_ref, + ir_CONST_ADDR(function_name), + this_ref2); + } else { + ref2 = ir_CALL_3(IR_ADDR, ir_CONST_FC_FUNC(zend_jit_find_method_helper), + this_ref, + ir_CONST_ADDR(function_name), + this_ref2); + } + + this_ref2 = ir_LOAD_A(ir_RLOAD_A(IR_REG_SP)); + if (!jit->ctx.fixed_call_stack_size) { + // JIT: revert alloca + ir_AFREE(ir_CONST_ADDR(0x10)); + } + + ir_GUARD(ref2, jit_STUB_ADDR(jit, jit_stub_exception_handler)); + + ir_MERGE_WITH(fast_path); + func_ref = ir_PHI_2(IR_ADDR, ref2, ref); + this_ref = ir_PHI_2(IR_ADDR, this_ref2, this_ref); + } + + if ((!func || zend_jit_may_be_modified(func, op_array)) + && trace + && trace->op == ZEND_JIT_TRACE_INIT_CALL + && trace->func +#ifdef _WIN32 + && trace->func->type != ZEND_INTERNAL_FUNCTION +#endif + ) { + int32_t exit_point; + const void *exit_addr; + + exit_point = zend_jit_trace_get_exit_point(opline, func ? ZEND_JIT_EXIT_INVALIDATE : ZEND_JIT_EXIT_METHOD_CALL); + exit_addr = zend_jit_trace_get_exit_addr(exit_point); + if (!exit_addr) { + return 0; + } + + jit->trace->exit_info[exit_point].poly_func_ref = func_ref; + jit->trace->exit_info[exit_point].poly_this_ref = this_ref; + + func = (zend_function*)trace->func; + + if (func->type == ZEND_USER_FUNCTION && + (!(func->common.fn_flags & ZEND_ACC_IMMUTABLE) || + (func->common.fn_flags & ZEND_ACC_CLOSURE) || + !func->common.function_name)) { + const zend_op *opcodes = func->op_array.opcodes; + + ir_GUARD( + ir_EQ( + ir_LOAD_A(ir_ADD_OFFSET(func_ref, offsetof(zend_op_array, opcodes))), + ir_CONST_ADDR(opcodes)), + ir_CONST_ADDR(exit_addr)); + } else { + ir_GUARD(ir_EQ(func_ref, ir_CONST_ADDR(func)), ir_CONST_ADDR(exit_addr)); + } + } + + if (!func) { + // JIT: if (fbc->common.fn_flags & ZEND_ACC_STATIC) { + if_static = ir_IF(ir_AND_U32( + ir_LOAD_U32(ir_ADD_OFFSET(func_ref, offsetof(zend_function, common.fn_flags))), + ir_CONST_U32(ZEND_ACC_STATIC))); + ir_IF_TRUE_cold(if_static); + } + + if (!func || (func->common.fn_flags & ZEND_ACC_STATIC) != 0) { + ir_ref ret; + + if ((opline->op1_type & (IS_VAR|IS_TMP_VAR)) && !delayed_fetch_this) { + ret = ir_CALL_3(IR_ADDR, ir_CONST_FC_FUNC(zend_jit_push_static_metod_call_frame_tmp), + this_ref, + func_ref, + ir_CONST_U32(opline->extended_value)); + } else { + ret = ir_CALL_3(IR_ADDR, ir_CONST_FC_FUNC(zend_jit_push_static_metod_call_frame), + this_ref, + func_ref, + ir_CONST_U32(opline->extended_value)); + } + + if ((opline->op1_type & (IS_VAR|IS_TMP_VAR) && !delayed_fetch_this)) { + ir_GUARD(ret, jit_STUB_ADDR(jit, jit_stub_exception_handler)); + } + jit_STORE_IP(jit, ret); + } + + if (!func) { + cold_path = ir_END(); + ir_IF_FALSE(if_static); + } + + if (!func || (func->common.fn_flags & ZEND_ACC_STATIC) == 0) { + if (!zend_jit_push_call_frame(jit, opline, NULL, func, 0, delayed_fetch_this, checked_stack, func_ref, this_ref)) { + return 0; + } + } + + if (!func) { + ir_MERGE_WITH(cold_path); + } + zend_jit_start_reuse_ip(jit); + + if (zend_jit_needs_call_chain(call_info, b, op_array, ssa, ssa_op, opline, call_level, trace)) { + if (!zend_jit_save_call_chain(jit, call_level)) { + return 0; + } + } else { + ZEND_ASSERT(call_level > 0); + delayed_call_chain = 1; + jit->delayed_call_level = call_level; + } + + return 1; +} + +static int zend_jit_init_closure_call(zend_jit_ctx *jit, + const zend_op *opline, + uint32_t b, + const zend_op_array *op_array, + zend_ssa *ssa, + const zend_ssa_op *ssa_op, + int call_level, + zend_jit_trace_rec *trace, + int checked_stack) +{ + zend_function *func = NULL; + zend_jit_addr op2_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, opline->op2.var); + ir_ref ref; + + ref = jit_Z_PTR(jit, op2_addr); + + if (ssa->var_info[ssa_op->op2_use].ce != zend_ce_closure + && !(ssa->var_info[ssa_op->op2_use].type & MAY_BE_CLASS_GUARD)) { + int32_t exit_point = zend_jit_trace_get_exit_point(opline, ZEND_JIT_EXIT_TO_VM); + const void *exit_addr = zend_jit_trace_get_exit_addr(exit_point); + + if (!exit_addr) { + return 0; + } + + ir_GUARD( + ir_EQ( + ir_LOAD_A(ir_ADD_OFFSET(ref, offsetof(zend_object, ce))), + ir_CONST_ADDR(zend_ce_closure)), + ir_CONST_ADDR(exit_addr)); + + if (ssa->var_info && ssa_op->op2_use >= 0) { + ssa->var_info[ssa_op->op2_use].type |= MAY_BE_CLASS_GUARD; + ssa->var_info[ssa_op->op2_use].ce = zend_ce_closure; + ssa->var_info[ssa_op->op2_use].is_instanceof = 0; + } + } + + if (trace + && trace->op == ZEND_JIT_TRACE_INIT_CALL + && trace->func + && trace->func->type == ZEND_USER_FUNCTION) { + const zend_op *opcodes; + int32_t exit_point; + const void *exit_addr; + + func = (zend_function*)trace->func; + opcodes = func->op_array.opcodes; + exit_point = zend_jit_trace_get_exit_point(opline, ZEND_JIT_EXIT_CLOSURE_CALL); + exit_addr = zend_jit_trace_get_exit_addr(exit_point); + if (!exit_addr) { + return 0; + } + + ir_GUARD( + ir_EQ( + ir_LOAD_A(ir_ADD_OFFSET(ref, offsetof(zend_closure, func.op_array.opcodes))), + ir_CONST_ADDR(opcodes)), + ir_CONST_ADDR(exit_addr)); + } + + if (jit->delayed_call_level) { + if (!zend_jit_save_call_chain(jit, jit->delayed_call_level)) { + return 0; + } + } + + if (!zend_jit_push_call_frame(jit, opline, NULL, func, 1, 0, checked_stack, ref, IR_UNUSED)) { + return 0; + } + + if (zend_jit_needs_call_chain(NULL, b, op_array, ssa, ssa_op, opline, call_level, trace)) { + if (!zend_jit_save_call_chain(jit, call_level)) { + return 0; + } + } else { + ZEND_ASSERT(call_level > 0); + delayed_call_chain = 1; + jit->delayed_call_level = call_level; + } + + if (trace + && trace->op == ZEND_JIT_TRACE_END + && trace->stop == ZEND_JIT_TRACE_STOP_INTERPRETER) { + if (!zend_jit_set_ip(jit, opline + 1)) { + return 0; + } + } + + return 1; +} + +static int zend_jit_send_val(zend_jit_ctx *jit, const zend_op *opline, uint32_t op1_info, zend_jit_addr op1_addr) +{ + uint32_t arg_num = opline->op2.num; + zend_jit_addr arg_addr; + + ZEND_ASSERT(opline->opcode == ZEND_SEND_VAL || arg_num <= MAX_ARG_FLAG_NUM); + + if (!zend_jit_reuse_ip(jit)) { + return 0; + } + + if (opline->opcode == ZEND_SEND_VAL_EX) { + uint32_t mask = ZEND_SEND_BY_REF << ((arg_num + 3) * 2); + + ZEND_ASSERT(arg_num <= MAX_ARG_FLAG_NUM); + + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE + && JIT_G(current_frame) + && JIT_G(current_frame)->call + && JIT_G(current_frame)->call->func) { + if (ARG_MUST_BE_SENT_BY_REF(JIT_G(current_frame)->call->func, arg_num)) { + /* Don't generate code that always throws exception */ + return 0; + } + } else { + ir_ref cond = ir_AND_U32( + ir_LOAD_U32(ir_ADD_OFFSET(ir_LOAD_A(jit_RX(func)), offsetof(zend_function, quick_arg_flags))), + ir_CONST_U32(mask)); + + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE) { + int32_t exit_point = zend_jit_trace_get_exit_point(opline, ZEND_JIT_EXIT_TO_VM); + const void *exit_addr = zend_jit_trace_get_exit_addr(exit_point); + if (!exit_addr) { + return 0; + } + ir_GUARD_NOT(cond, ir_CONST_ADDR(exit_addr)); + } else { + ir_ref if_pass_by_ref; + + if_pass_by_ref = ir_IF(cond); + + ir_IF_TRUE_cold(if_pass_by_ref); + if (Z_MODE(op1_addr) == IS_REG) { + /* set type to avoid zval_ptr_dtor() on uninitialized value */ + zend_jit_addr addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, opline->op1.var); + jit_set_Z_TYPE_INFO(jit, addr, IS_UNDEF); + } + jit_SET_EX_OPLINE(jit, opline); + ir_IJMP(jit_STUB_ADDR(jit, jit_stub_throw_cannot_pass_by_ref)); + + ir_IF_FALSE(if_pass_by_ref); + } + } + } + + arg_addr = ZEND_ADDR_MEM_ZVAL(ZREG_RX, opline->result.var); + + if (opline->op1_type == IS_CONST) { + zval *zv = RT_CONSTANT(opline, opline->op1); + + jit_ZVAL_COPY_CONST(jit, + arg_addr, + MAY_BE_ANY, MAY_BE_ANY, + zv, 1); + } else { + jit_ZVAL_COPY(jit, + arg_addr, + MAY_BE_ANY, + op1_addr, op1_info, 0); + } + + return 1; +} + +static int zend_jit_send_ref(zend_jit_ctx *jit, const zend_op *opline, const zend_op_array *op_array, uint32_t op1_info, int cold) +{ + zend_jit_addr op1_addr, arg_addr, ref_addr; + ir_ref ref_path = IR_UNUSED; + + op1_addr = OP1_ADDR(); + arg_addr = ZEND_ADDR_MEM_ZVAL(ZREG_RX, opline->result.var); + + if (!zend_jit_reuse_ip(jit)) { + return 0; + } + + if (opline->op1_type == IS_VAR) { + if (op1_info & MAY_BE_INDIRECT) { + op1_addr = jit_ZVAL_INDIRECT_DEREF(jit, op1_addr); + } + } else if (opline->op1_type == IS_CV) { + if (op1_info & MAY_BE_UNDEF) { + if (op1_info & (MAY_BE_ANY|MAY_BE_REF)) { + // JIT: if (Z_TYPE_P(op1) == IS_UNDEF) + ir_ref if_def = jit_if_not_Z_TYPE(jit, op1_addr, IS_UNDEF); + ir_IF_FALSE(if_def); + // JIT: ZVAL_NULL(op1) + jit_set_Z_TYPE_INFO(jit,op1_addr, IS_NULL); + ir_MERGE_WITH_EMPTY_TRUE(if_def); + } + op1_info &= ~MAY_BE_UNDEF; + op1_info |= MAY_BE_NULL; + } + } else { + ZEND_UNREACHABLE(); + } + + if (op1_info & (MAY_BE_UNDEF|MAY_BE_ANY|MAY_BE_REF)) { + ir_ref ref, ref2; + + if (op1_info & MAY_BE_REF) { + ir_ref if_ref; + + // JIT: if (Z_TYPE_P(op1) == IS_UNDEF) + if_ref = jit_if_Z_TYPE(jit, op1_addr, IS_REFERENCE); + ir_IF_TRUE(if_ref); + // JIT: ref = Z_PTR_P(op1) + ref = jit_Z_PTR(jit, op1_addr); + // JIT: GC_ADDREF(ref) + jit_GC_ADDREF(jit, ref); + // JIT: ZVAL_REFERENCE(arg, ref) + jit_set_Z_PTR(jit, arg_addr, ref); + jit_set_Z_TYPE_INFO(jit, arg_addr, IS_REFERENCE_EX); + ref_path = ir_END(); + ir_IF_FALSE(if_ref); + } + + // JIT: ZVAL_NEW_REF(arg, varptr); + // JIT: ref = emalloc(sizeof(zend_reference)); + ref = jit_EMALLOC(jit, sizeof(zend_reference), op_array, opline); + // JIT: GC_REFCOUNT(ref) = 2 + jit_set_GC_REFCOUNT(jit, ref, 2); + // JIT: GC_TYPE(ref) = GC_REFERENCE + ir_STORE(ir_ADD_OFFSET(ref, offsetof(zend_reference, gc.u.type_info)), ir_CONST_U32(GC_REFERENCE)); + ir_STORE(ir_ADD_OFFSET(ref, offsetof(zend_reference, sources.ptr)), IR_NULL); + ref2 = ir_ADD_OFFSET(ref, offsetof(zend_reference, val)); + ref_addr = ZEND_ADDR_REF_ZVAL(ref2); + + // JIT: ZVAL_COPY_VALUE(&ref->val, op1) + jit_ZVAL_COPY(jit, + ref_addr, + MAY_BE_ANY, + op1_addr, op1_info, 0); + + // JIT: ZVAL_REFERENCE(arg, ref) + jit_set_Z_PTR(jit, op1_addr, ref); + jit_set_Z_TYPE_INFO(jit, op1_addr, IS_REFERENCE_EX); + + // JIT: ZVAL_REFERENCE(arg, ref) + jit_set_Z_PTR(jit, arg_addr, ref); + jit_set_Z_TYPE_INFO(jit, arg_addr, IS_REFERENCE_EX); + } + + if (ref_path) { + ir_MERGE_WITH(ref_path); + } + + jit_FREE_OP(jit, opline->op1_type, opline->op1, op1_info, opline); + + return 1; +} + +static int zend_jit_send_var(zend_jit_ctx *jit, const zend_op *opline, const zend_op_array *op_array, uint32_t op1_info, zend_jit_addr op1_addr, zend_jit_addr op1_def_addr) +{ + uint32_t arg_num = opline->op2.num; + zend_jit_addr arg_addr; + ir_ref end_inputs = IR_UNUSED; + + ZEND_ASSERT((opline->opcode != ZEND_SEND_VAR_EX && + opline->opcode != ZEND_SEND_VAR_NO_REF_EX) || + arg_num <= MAX_ARG_FLAG_NUM); + + arg_addr = ZEND_ADDR_MEM_ZVAL(ZREG_RX, opline->result.var); + + if (!zend_jit_reuse_ip(jit)) { + return 0; + } + + if (opline->opcode == ZEND_SEND_VAR_EX) { + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE + && JIT_G(current_frame) + && JIT_G(current_frame)->call + && JIT_G(current_frame)->call->func) { + if (ARG_SHOULD_BE_SENT_BY_REF(JIT_G(current_frame)->call->func, arg_num)) { + if (!zend_jit_send_ref(jit, opline, op_array, op1_info, 0)) { + return 0; + } + return 1; + } + } else { + uint32_t mask = (ZEND_SEND_BY_REF|ZEND_SEND_PREFER_REF) << ((arg_num + 3) * 2); + + // JIT: if (RX->func->quick_arg_flags & mask) + ir_ref if_send_by_ref = ir_IF(ir_AND_U32( + ir_LOAD_U32(ir_ADD_OFFSET(ir_LOAD_A(jit_RX(func)), offsetof(zend_function, quick_arg_flags))), + ir_CONST_U32(mask))); + ir_IF_TRUE_cold(if_send_by_ref); + + if (!zend_jit_send_ref(jit, opline, op_array, op1_info, 1)) { + return 0; + } + + ir_END_list(end_inputs); + ir_IF_FALSE(if_send_by_ref); + } + } else if (opline->opcode == ZEND_SEND_VAR_NO_REF_EX) { + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE + && JIT_G(current_frame) + && JIT_G(current_frame)->call + && JIT_G(current_frame)->call->func) { + if (ARG_SHOULD_BE_SENT_BY_REF(JIT_G(current_frame)->call->func, arg_num)) { + + // JIT: ZVAL_COPY_VALUE(arg, op1) + jit_ZVAL_COPY(jit, + arg_addr, + MAY_BE_ANY, + op1_addr, op1_info, 0); + + if (!ARG_MAY_BE_SENT_BY_REF(JIT_G(current_frame)->call->func, arg_num)) { + if (!(op1_info & MAY_BE_REF)) { + /* Don't generate code that always throws exception */ + return 0; + } else { + int32_t exit_point = zend_jit_trace_get_exit_point(opline, ZEND_JIT_EXIT_TO_VM); + const void *exit_addr = zend_jit_trace_get_exit_addr(exit_point); + if (!exit_addr) { + return 0; + } + + // JIT: if (Z_TYPE_P(op1) != IS_REFERENCE) + ir_GUARD(ir_EQ(jit_Z_TYPE(jit, op1_addr), ir_CONST_U32(IS_REFERENCE)), + ir_CONST_ADDR(exit_addr)); + } + } + return 1; + } + } else { + uint32_t mask = (ZEND_SEND_BY_REF|ZEND_SEND_PREFER_REF) << ((arg_num + 3) * 2); + ir_ref func, if_send_by_ref, if_prefer_ref; + + // JIT: if (RX->func->quick_arg_flags & mask) + func = ir_LOAD_A(jit_RX(func)); + if_send_by_ref = ir_IF(ir_AND_U32( + ir_LOAD_U32(ir_ADD_OFFSET(func, offsetof(zend_function, quick_arg_flags))), + ir_CONST_U32(mask))); + ir_IF_TRUE_cold(if_send_by_ref); + + mask = ZEND_SEND_PREFER_REF << ((arg_num + 3) * 2); + + // JIT: ZVAL_COPY_VALUE(arg, op1) + jit_ZVAL_COPY(jit, + arg_addr, + MAY_BE_ANY, + op1_addr, op1_info, 0); + + if (op1_info & MAY_BE_REF) { + ir_ref if_ref = jit_if_Z_TYPE(jit, arg_addr, IS_REFERENCE); + ir_IF_TRUE(if_ref); + ir_END_list(end_inputs); + ir_IF_FALSE(if_ref); + } + + // JIT: if (RX->func->quick_arg_flags & mask) + if_prefer_ref = ir_IF(ir_AND_U32( + ir_LOAD_U32(ir_ADD_OFFSET(func, offsetof(zend_function, quick_arg_flags))), + ir_CONST_U32(mask))); + ir_IF_TRUE(if_prefer_ref); + ir_END_list(end_inputs); + ir_IF_FALSE(if_prefer_ref); + + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE) { + int32_t exit_point = zend_jit_trace_get_exit_point(opline, ZEND_JIT_EXIT_TO_VM); + const void *exit_addr = zend_jit_trace_get_exit_addr(exit_point); + if (!exit_addr) { + return 0; + } + jit_SIDE_EXIT(jit, ir_CONST_ADDR(exit_addr)); + } else { + jit_SET_EX_OPLINE(jit, opline); + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_jit_only_vars_by_reference), + jit_ZVAL_ADDR(jit, arg_addr)); + zend_jit_check_exception(jit); + ir_END_list(end_inputs); + } + + ir_IF_FALSE(if_send_by_ref); + } + } else if (opline->opcode == ZEND_SEND_FUNC_ARG) { + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE + && JIT_G(current_frame) + && JIT_G(current_frame)->call + && JIT_G(current_frame)->call->func) { + if (ARG_SHOULD_BE_SENT_BY_REF(JIT_G(current_frame)->call->func, arg_num)) { + if (!zend_jit_send_ref(jit, opline, op_array, op1_info, 0)) { + return 0; + } + return 1; + } + } else { + // JIT: if (RX->This.u1.type_info & ZEND_CALL_SEND_ARG_BY_REF) + ir_ref if_send_by_ref = ir_IF(ir_AND_U32( + ir_LOAD_U32(jit_RX(This.u1.type_info)), + ir_CONST_U32(ZEND_CALL_SEND_ARG_BY_REF))); + ir_IF_TRUE_cold(if_send_by_ref); + + if (!zend_jit_send_ref(jit, opline, op_array, op1_info, 1)) { + return 0; + } + + ir_END_list(end_inputs); + ir_IF_FALSE(if_send_by_ref); + } + } + + if (op1_info & MAY_BE_UNDEF) { + ir_ref ref, if_def = IR_UNUSED; + + if (op1_info & (MAY_BE_ANY|MAY_BE_REF)) { + if_def = jit_if_not_Z_TYPE(jit, op1_addr, IS_UNDEF); + ir_IF_FALSE_cold(if_def); + } + + // JIT: zend_jit_undefined_op_helper(opline->op1.var) + jit_SET_EX_OPLINE(jit, opline); + ref = ir_CALL_1(IR_I32, ir_CONST_FC_FUNC(zend_jit_undefined_op_helper), + ir_CONST_U32(opline->op1.var)); + + // JIT: ZVAL_NULL(arg) + jit_set_Z_TYPE_INFO(jit, arg_addr, IS_NULL); + + // JIT: check_exception + ir_GUARD(ref, jit_STUB_ADDR(jit, jit_stub_exception_handler)); + + if (op1_info & (MAY_BE_ANY|MAY_BE_REF)) { + ir_END_list(end_inputs); + ir_IF_TRUE(if_def); + } else { + if (end_inputs) { + ir_END_list(end_inputs); + ir_MERGE_list(end_inputs); + } + return 1; + } + } + + if (opline->opcode == ZEND_SEND_VAR_NO_REF) { + // JIT: ZVAL_COPY_VALUE(arg, op1) + jit_ZVAL_COPY(jit, + arg_addr, + MAY_BE_ANY, + op1_addr, op1_info, 0); + if (op1_info & MAY_BE_REF) { + // JIT: if (Z_TYPE_P(arg) == IS_REFERENCE) + ir_ref if_ref = jit_if_Z_TYPE(jit, arg_addr, IS_REFERENCE); + ir_IF_TRUE(if_ref); + ir_END_list(end_inputs); + ir_IF_FALSE(if_ref); + } + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE) { + int32_t exit_point = zend_jit_trace_get_exit_point(opline, ZEND_JIT_EXIT_TO_VM); + const void *exit_addr = zend_jit_trace_get_exit_addr(exit_point); + if (!exit_addr) { + return 0; + } + ir_GUARD(IR_FALSE, ir_CONST_ADDR(exit_addr)); + } else { + jit_SET_EX_OPLINE(jit, opline); + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_jit_only_vars_by_reference), + jit_ZVAL_ADDR(jit, arg_addr)); + zend_jit_check_exception(jit); + } + } else { + if (op1_info & MAY_BE_REF) { + if (opline->op1_type == IS_CV) { + ir_ref ref; + + // JIT: ZVAL_DEREF(op1) + ref = jit_ZVAL_ADDR(jit, op1_addr); + ref = jit_ZVAL_DEREF_ref(jit, ref); + op1_addr = ZEND_ADDR_REF_ZVAL(ref); + + // JIT: ZVAL_COPY(arg, op1) + jit_ZVAL_COPY(jit, + arg_addr, + MAY_BE_ANY, + op1_addr, op1_info, 1); + } else { + ir_ref if_ref, ref, ref2, refcount, if_not_zero, if_refcounted; + zend_jit_addr ref_addr; + + // JIT: if (Z_TYPE_P(op1) == IS_REFERENCE) + if_ref = jit_if_Z_TYPE(jit, op1_addr, IS_REFERENCE); + ir_IF_TRUE_cold(if_ref); + + // JIT: ref = Z_COUNTED_P(op1); + ref = jit_Z_PTR(jit, op1_addr); + ref2 = ir_ADD_OFFSET(ref, offsetof(zend_reference, val)); + ref_addr = ZEND_ADDR_REF_ZVAL(ref2); + + // JIT: ZVAL_COPY_VALUE(arg, op1); + jit_ZVAL_COPY(jit, + arg_addr, + MAY_BE_ANY, + ref_addr, op1_info, 0); + + // JIT: if (GC_DELREF(ref) != 0) + refcount = jit_GC_DELREF(jit, ref); + if_not_zero = ir_IF(refcount); + ir_IF_TRUE(if_not_zero); + + // JIT: if (Z_REFCOUNTED_P(arg) + if_refcounted = jit_if_REFCOUNTED(jit, arg_addr); + ir_IF_TRUE(if_refcounted); + // JIT: Z_ADDREF_P(arg) + jit_GC_ADDREF(jit, jit_Z_PTR(jit, arg_addr)); + ir_END_list(end_inputs); + ir_IF_FALSE(if_refcounted); + ir_END_list(end_inputs); + + ir_IF_FALSE(if_not_zero); + + // JIT: efree(ref) + jit_EFREE(jit, ref, sizeof(zend_reference), op_array, opline); + ir_END_list(end_inputs); + + ir_IF_FALSE(if_ref); + + // JIT: ZVAL_COPY_VALUE(arg, op1); + jit_ZVAL_COPY(jit, + arg_addr, + MAY_BE_ANY, + op1_addr, op1_info, 0); + } + } else { + if (op1_addr != op1_def_addr) { + if (!zend_jit_update_regs(jit, opline->op1.var, op1_addr, op1_def_addr, op1_info)) { + return 0; + } + if (Z_MODE(op1_def_addr) == IS_REG && Z_MODE(op1_addr) != IS_REG) { + op1_addr = op1_def_addr; + } + } + + // JIT: ZVAL_COPY_VALUE(arg, op1) + jit_ZVAL_COPY(jit, + arg_addr, + MAY_BE_ANY, + op1_addr, op1_info, opline->op1_type == IS_CV); + } + } + + if (end_inputs) { + ir_END_list(end_inputs); + ir_MERGE_list(end_inputs); + } + + return 1; +} + +static int zend_jit_check_func_arg(zend_jit_ctx *jit, const zend_op *opline) +{ + uint32_t arg_num = opline->op2.num; + ir_ref ref; + + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE + && JIT_G(current_frame) + && JIT_G(current_frame)->call + && JIT_G(current_frame)->call->func) { + if (ARG_SHOULD_BE_SENT_BY_REF(JIT_G(current_frame)->call->func, arg_num)) { + if (!TRACE_FRAME_IS_LAST_SEND_BY_REF(JIT_G(current_frame)->call)) { + TRACE_FRAME_SET_LAST_SEND_BY_REF(JIT_G(current_frame)->call); + // JIT: ZEND_ADD_CALL_FLAG(EX(call), ZEND_CALL_SEND_ARG_BY_REF); + if (jit->reuse_ip) { + ref = jit_IP(jit); + } else { + ref = ir_LOAD_A(jit_EX(call)); + } + ref = jit_CALL(ref, This.u1.type_info); + ir_STORE(ref, ir_OR_U32(ir_LOAD_U32(ref), ir_CONST_U32(ZEND_CALL_SEND_ARG_BY_REF))); + } + } else { + if (!TRACE_FRAME_IS_LAST_SEND_BY_VAL(JIT_G(current_frame)->call)) { + TRACE_FRAME_SET_LAST_SEND_BY_VAL(JIT_G(current_frame)->call); + // JIT: ZEND_DEL_CALL_FLAG(EX(call), ZEND_CALL_SEND_ARG_BY_REF); + if (jit->reuse_ip) { + ref = jit_IP(jit); + } else { + ref = ir_LOAD_A(jit_EX(call)); + } + ref = jit_CALL(ref, This.u1.type_info); + ir_STORE(ref, ir_AND_U32(ir_LOAD_U32(ref), ir_CONST_U32(~ZEND_CALL_SEND_ARG_BY_REF))); + } + } + } else { + // JIT: if (QUICK_ARG_SHOULD_BE_SENT_BY_REF(EX(call)->func, arg_num)) { + uint32_t mask = (ZEND_SEND_BY_REF|ZEND_SEND_PREFER_REF) << ((arg_num + 3) * 2); + ir_ref rx, if_ref, cold_path; + + if (!zend_jit_reuse_ip(jit)) { + return 0; + } + + rx = jit_IP(jit); + + ref = ir_AND_U32( + ir_LOAD_U32(ir_ADD_OFFSET(ir_LOAD_A(jit_CALL(rx, func)), offsetof(zend_function, quick_arg_flags))), + ir_CONST_U32(mask)); + if_ref = ir_IF(ref); + ir_IF_TRUE_cold(if_ref); + + // JIT: ZEND_ADD_CALL_FLAG(EX(call), ZEND_CALL_SEND_ARG_BY_REF); + ref = jit_CALL(rx, This.u1.type_info); + ir_STORE(ref, ir_OR_U32(ir_LOAD_U32(ref), ir_CONST_U32(ZEND_CALL_SEND_ARG_BY_REF))); + + cold_path = ir_END(); + ir_IF_FALSE(if_ref); + + // JIT: ZEND_DEL_CALL_FLAG(EX(call), ZEND_CALL_SEND_ARG_BY_REF); + ref = jit_CALL(rx, This.u1.type_info); + ir_STORE(ref, ir_AND_U32(ir_LOAD_U32(ref), ir_CONST_U32(~ZEND_CALL_SEND_ARG_BY_REF))); + + ir_MERGE_WITH(cold_path); + } + + return 1; +} + +static int zend_jit_check_undef_args(zend_jit_ctx *jit, const zend_op *opline) +{ + ir_ref call, if_may_have_undef, ret; + + if (jit->reuse_ip) { + call = jit_IP(jit); + } else { + call = ir_LOAD_A(jit_EX(call)); + } + + if_may_have_undef = ir_IF(ir_AND_U8( + ir_LOAD_U8(ir_ADD_OFFSET(call, offsetof(zend_execute_data, This.u1.type_info) + 3)), + ir_CONST_U8(ZEND_CALL_MAY_HAVE_UNDEF >> 24))); + + ir_IF_TRUE_cold(if_may_have_undef); + jit_SET_EX_OPLINE(jit, opline); + ret = ir_CALL_1(IR_I32, ir_CONST_FC_FUNC(zend_handle_undef_args), call); + ir_GUARD_NOT(ret, jit_STUB_ADDR(jit, jit_stub_exception_handler)); + ir_MERGE_WITH_EMPTY_FALSE(if_may_have_undef); + + return 1; +} + +static int zend_jit_do_fcall(zend_jit_ctx *jit, const zend_op *opline, const zend_op_array *op_array, zend_ssa *ssa, int call_level, unsigned int next_block, zend_jit_trace_rec *trace) +{ + zend_func_info *info = ZEND_FUNC_INFO(op_array); + zend_call_info *call_info = NULL; + const zend_function *func = NULL; + uint32_t i; + uint32_t call_num_args = 0; + bool unknown_num_args = 0; + const void *exit_addr = NULL; + const zend_op *prev_opline; + ir_ref rx, func_ref = IR_UNUSED, if_user = IR_UNUSED, user_path = IR_UNUSED; + + prev_opline = opline - 1; + while (prev_opline->opcode == ZEND_EXT_FCALL_BEGIN || prev_opline->opcode == ZEND_TICKS) { + prev_opline--; + } + if (prev_opline->opcode == ZEND_SEND_UNPACK || prev_opline->opcode == ZEND_SEND_ARRAY || + prev_opline->opcode == ZEND_CHECK_UNDEF_ARGS) { + unknown_num_args = 1; + } + + if (info) { + call_info = info->callee_info; + while (call_info && call_info->caller_call_opline != opline) { + call_info = call_info->next_callee; + } + if (call_info && call_info->callee_func && !call_info->is_prototype) { + func = call_info->callee_func; + } + if ((op_array->fn_flags & ZEND_ACC_TRAIT_CLONE) + && JIT_G(current_frame) + && JIT_G(current_frame)->call + && !JIT_G(current_frame)->call->func) { + call_info = NULL; func = NULL; /* megamorphic call from trait */ + } + } + if (!func) { + /* resolve function at run time */ + } else if (func->type == ZEND_USER_FUNCTION) { + ZEND_ASSERT(opline->opcode != ZEND_DO_ICALL); + call_num_args = call_info->num_args; + } else if (func->type == ZEND_INTERNAL_FUNCTION) { + ZEND_ASSERT(opline->opcode != ZEND_DO_UCALL); + call_num_args = call_info->num_args; + } else { + ZEND_UNREACHABLE(); + } + + if (trace && !func) { + if (trace->op == ZEND_JIT_TRACE_DO_ICALL) { + ZEND_ASSERT(trace->func->type == ZEND_INTERNAL_FUNCTION); +#ifndef ZEND_WIN32 + // TODO: ASLR may cause different addresses in different workers ??? + func = trace->func; + if (JIT_G(current_frame) && + JIT_G(current_frame)->call && + TRACE_FRAME_NUM_ARGS(JIT_G(current_frame)->call) >= 0) { + call_num_args = TRACE_FRAME_NUM_ARGS(JIT_G(current_frame)->call); + } else { + unknown_num_args = 1; + } +#endif + } else if (trace->op == ZEND_JIT_TRACE_ENTER) { + ZEND_ASSERT(trace->func->type == ZEND_USER_FUNCTION); + if (zend_accel_in_shm(trace->func->op_array.opcodes)) { + func = trace->func; + if (JIT_G(current_frame) && + JIT_G(current_frame)->call && + TRACE_FRAME_NUM_ARGS(JIT_G(current_frame)->call) >= 0) { + call_num_args = TRACE_FRAME_NUM_ARGS(JIT_G(current_frame)->call); + } else { + unknown_num_args = 1; + } + } + } + } + + bool may_have_extra_named_params = + opline->extended_value == ZEND_FCALL_MAY_HAVE_EXTRA_NAMED_PARAMS && + (!func || func->common.fn_flags & ZEND_ACC_VARIADIC); + + if (!jit->reuse_ip) { + zend_jit_start_reuse_ip(jit); + // JIT: call = EX(call); + jit_STORE_IP(jit, ir_LOAD_A(jit_EX(call))); + } + rx = jit_IP(jit); + zend_jit_stop_reuse_ip(jit); + + jit_SET_EX_OPLINE(jit, opline); + + if (opline->opcode == ZEND_DO_FCALL) { + if (!func) { + if (trace) { + uint32_t exit_point = zend_jit_trace_get_exit_point(opline, ZEND_JIT_EXIT_TO_VM); + + exit_addr = zend_jit_trace_get_exit_addr(exit_point); + if (!exit_addr) { + return 0; + } + + func_ref = ir_LOAD_A(jit_CALL(rx, func)); + ir_GUARD_NOT( + ir_AND_U32( + ir_LOAD_U32(ir_ADD_OFFSET(func_ref, offsetof(zend_op_array, fn_flags))), + ir_CONST_U32(ZEND_ACC_DEPRECATED)), + ir_CONST_ADDR(exit_addr)); + } + } + } + + if (!jit->delayed_call_level) { + // JIT: EX(call) = call->prev_execute_data; + ir_STORE(jit_EX(call), + (call_level == 1) ? IR_NULL : ir_LOAD_A(jit_CALL(rx, prev_execute_data))); + } + delayed_call_chain = 0; + jit->delayed_call_level = 0; + + // JIT: call->prev_execute_data = execute_data; + ir_STORE(jit_CALL(rx, prev_execute_data), jit_FP(jit)); + + if (!func) { + if (!func_ref) { + func_ref = ir_LOAD_A(jit_CALL(rx, func)); + } + } + + if (opline->opcode == ZEND_DO_FCALL) { + if (!func) { + if (!trace) { + ir_ref if_deprecated, ret; + + if_deprecated = ir_IF(ir_AND_U32( + ir_LOAD_U32(ir_ADD_OFFSET(func_ref, offsetof(zend_op_array, fn_flags))), + ir_CONST_U32(ZEND_ACC_DEPRECATED))); + ir_IF_TRUE_cold(if_deprecated); + + if (GCC_GLOBAL_REGS) { + ret = ir_CALL(IR_BOOL, ir_CONST_FC_FUNC(zend_jit_deprecated_helper)); + } else { + ret = ir_CALL_1(IR_BOOL, ir_CONST_FC_FUNC(zend_jit_deprecated_helper), rx); + } + ir_GUARD(ret, jit_STUB_ADDR(jit, jit_stub_exception_handler)); + ir_MERGE_WITH_EMPTY_FALSE(if_deprecated); + } + } else if (func->common.fn_flags & ZEND_ACC_DEPRECATED) { + ir_ref ret; + + if (GCC_GLOBAL_REGS) { + ret = ir_CALL(IR_BOOL, ir_CONST_FC_FUNC(zend_jit_deprecated_helper)); + } else { + ret = ir_CALL_1(IR_BOOL, ir_CONST_FC_FUNC(zend_jit_deprecated_helper), rx); + } + ir_GUARD(ret, jit_STUB_ADDR(jit, jit_stub_exception_handler)); + } + } + + if (!func + && opline->opcode != ZEND_DO_UCALL + && opline->opcode != ZEND_DO_ICALL) { + ir_ref type_ref = ir_LOAD_U8(ir_ADD_OFFSET(func_ref, offsetof(zend_function, type))); + if_user = ir_IF(ir_EQ(type_ref, ir_CONST_U8(ZEND_USER_FUNCTION))); + ir_IF_TRUE(if_user); + } + + if ((!func || func->type == ZEND_USER_FUNCTION) + && opline->opcode != ZEND_DO_ICALL) { + bool recursive_call_through_jmp = 0; + + // JIT: EX(call) = NULL; + ir_STORE(jit_CALL(rx, call), IR_NULL); + + // JIT: EX(return_value) = RETURN_VALUE_USED(opline) ? EX_VAR(opline->result.var) : 0; + ir_STORE(jit_CALL(rx, return_value), + RETURN_VALUE_USED(opline) ? + jit_ZVAL_ADDR(jit, ZEND_ADDR_MEM_ZVAL(ZREG_FP, opline->result.var)) : + IR_NULL); + + // JIT: EX_LOAD_RUN_TIME_CACHE(op_array); + if (!func || func->op_array.cache_size) { + ir_ref run_time_cache; + + if (func && op_array == &func->op_array) { + /* recursive call */ + run_time_cache = ir_LOAD_A(jit_EX(run_time_cache)); + } else if (func + && !(func->op_array.fn_flags & ZEND_ACC_CLOSURE) + && ZEND_MAP_PTR_IS_OFFSET(func->op_array.run_time_cache)) { + run_time_cache = ir_LOAD_A(ir_ADD_OFFSET(ir_LOAD_A(jit_CG(map_ptr_base)), + (uintptr_t)ZEND_MAP_PTR(func->op_array.run_time_cache))); + } else if ((func && (func->op_array.fn_flags & ZEND_ACC_CLOSURE)) || + (JIT_G(current_frame) && + JIT_G(current_frame)->call && + TRACE_FRAME_IS_CLOSURE_CALL(JIT_G(current_frame)->call))) { + /* Closures always use direct pointers */ + ir_ref local_func_ref = func_ref ? func_ref : ir_LOAD_A(jit_CALL(rx, func)); + + run_time_cache = ir_LOAD_A(ir_ADD_OFFSET(local_func_ref, offsetof(zend_op_array, run_time_cache__ptr))); + } else { + ir_ref if_odd, run_time_cache2; + ir_ref local_func_ref = func_ref ? func_ref : ir_LOAD_A(jit_CALL(rx, func)); + + run_time_cache = ir_LOAD_A(ir_ADD_OFFSET(local_func_ref, offsetof(zend_op_array, run_time_cache__ptr))); + if_odd = ir_IF(ir_AND_A(run_time_cache, ir_CONST_ADDR(1))); + ir_IF_TRUE(if_odd); + + run_time_cache2 = ir_LOAD_A(ir_ADD_A(run_time_cache, ir_LOAD_A(jit_CG(map_ptr_base)))); + + ir_MERGE_WITH_EMPTY_FALSE(if_odd); + run_time_cache = ir_PHI_2(IR_ADDR, run_time_cache2, run_time_cache); + } + + ir_STORE(jit_CALL(rx, run_time_cache), run_time_cache); + } + + // JIT: EG(current_execute_data) = execute_data = call; + ir_STORE(jit_EG(current_execute_data), rx); + jit_STORE_FP(jit, rx); + + // JIT: opline = op_array->opcodes; + if (func && !unknown_num_args) { + + for (i = call_num_args; i < func->op_array.last_var; i++) { + uint32_t n = EX_NUM_TO_VAR(i); + zend_jit_addr var_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, n); + + jit_set_Z_TYPE_INFO_ex(jit, var_addr, ir_CONST_U32(IS_UNDEF)); + } + + if (call_num_args <= func->op_array.num_args) { + if (!trace || (trace->op == ZEND_JIT_TRACE_END + && trace->stop == ZEND_JIT_TRACE_STOP_INTERPRETER)) { + uint32_t num_args; + + if ((func->op_array.fn_flags & ZEND_ACC_HAS_TYPE_HINTS) != 0) { + if (trace) { + num_args = 0; + } else if (call_info) { + num_args = skip_valid_arguments(op_array, ssa, call_info); + } else { + num_args = call_num_args; + } + } else { + num_args = call_num_args; + } + if (zend_accel_in_shm(func->op_array.opcodes)) { + jit_LOAD_IP_ADDR(jit, func->op_array.opcodes + num_args); + } else { + if (!func_ref) { + func_ref = ir_LOAD_A(jit_CALL(rx, func)); + } + ir_ref ip = ir_LOAD_A(ir_ADD_OFFSET(func_ref, offsetof(zend_op_array, opcodes))); + if (num_args) { + ip = ir_ADD_OFFSET(ip, num_args * sizeof(zend_op)); + } + jit_LOAD_IP(jit, ip); + } + + if (!trace && op_array == &func->op_array && call_num_args >= op_array->required_num_args) { + /* recursive call */ + recursive_call_through_jmp = 1; + } + } + } else { + if (!trace || (trace->op == ZEND_JIT_TRACE_END + && trace->stop == ZEND_JIT_TRACE_STOP_INTERPRETER)) { + ir_ref ip; + + if (zend_accel_in_shm(func->op_array.opcodes)) { + ip = ir_CONST_ADDR(func->op_array.opcodes); + } else { + if (!func_ref) { + func_ref = ir_LOAD_A(jit_CALL(rx, func)); + } + ip = ir_LOAD_A(ir_ADD_OFFSET(func_ref, offsetof(zend_op_array, opcodes))); + } + jit_LOAD_IP(jit, ip); + } + if (GCC_GLOBAL_REGS) { + ir_CALL(IR_VOID, ir_CONST_FC_FUNC(zend_jit_copy_extra_args_helper)); + } else { + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_jit_copy_extra_args_helper), jit_FP(jit)); + } + } + } else { + ir_ref ip; + ir_ref merge_inputs = IR_UNUSED; + + // JIT: opline = op_array->opcodes + if (func && zend_accel_in_shm(func->op_array.opcodes)) { + ip = ir_CONST_ADDR(func->op_array.opcodes); + } else { + if (!func_ref) { + func_ref = ir_LOAD_A(jit_CALL(rx, func)); + } + ip = ir_LOAD_A(ir_ADD_OFFSET(func_ref, offsetof(zend_op_array, opcodes))); + } + jit_LOAD_IP(jit, ip); + + // JIT: num_args = EX_NUM_ARGS(); + ir_ref num_args, first_extra_arg; + + num_args = ir_LOAD_U32(jit_EX(This.u2.num_args)); + if (func) { + first_extra_arg = ir_CONST_U32(func->op_array.num_args); + } else { + // JIT: first_extra_arg = op_array->num_args; + ZEND_ASSERT(func_ref); + first_extra_arg = ir_LOAD_U32(ir_ADD_OFFSET(func_ref, offsetof(zend_op_array, num_args))); + } + + // JIT: if (UNEXPECTED(num_args > first_extra_arg)) + ir_ref if_extra_args = ir_IF(ir_GT(num_args, first_extra_arg)); + ir_IF_TRUE_cold(if_extra_args); + if (GCC_GLOBAL_REGS) { + ir_CALL(IR_VOID, ir_CONST_FC_FUNC(zend_jit_copy_extra_args_helper)); + } else { + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_jit_copy_extra_args_helper), jit_FP(jit)); + } + ir_END_list(merge_inputs); + ir_IF_FALSE(if_extra_args); + if (!func || (func->op_array.fn_flags & ZEND_ACC_HAS_TYPE_HINTS) == 0) { + if (!func) { + // JIT: if (EXPECTED((op_array->fn_flags & ZEND_ACC_HAS_TYPE_HINTS) == 0)) + ir_ref if_has_type_hints = ir_IF(ir_AND_U32( + ir_LOAD_U32(ir_ADD_OFFSET(func_ref, offsetof(zend_op_array, fn_flags))), + ir_CONST_U32(ZEND_ACC_HAS_TYPE_HINTS))); + ir_IF_TRUE(if_has_type_hints); + ir_END_list(merge_inputs); + ir_IF_FALSE(if_has_type_hints); + } + // JIT: opline += num_args; + + ir_ref ref = ir_MUL_U32(num_args, ir_CONST_U32(sizeof(zend_op))); + + if (sizeof(void*) == 8) { + ref = ir_ZEXT_A(ref); + } + + if (GCC_GLOBAL_REGS) { + jit_STORE_IP(jit, ir_ADD_A(jit_IP(jit), ref)); + } else { + ir_ref addr = jit_EX(opline); + + ir_STORE(addr, ir_ADD_A(ir_LOAD_A(addr), ref)); + } + } + + ir_END_list(merge_inputs); + ir_MERGE_list(merge_inputs); + + // JIT: if (EXPECTED((int)num_args < op_array->last_var)) { + ir_ref last_var; + + if (func) { + last_var = ir_CONST_U32(func->op_array.last_var); + } else { + ZEND_ASSERT(func_ref); + last_var = ir_LOAD_U32(ir_ADD_OFFSET(func_ref, offsetof(zend_op_array, last_var))); + } + + ir_ref idx = ir_SUB_U32(last_var, num_args); + ir_ref if_need = ir_IF(ir_GT(idx, ir_CONST_U32(0))); + ir_IF_TRUE(if_need); + + // JIT: zval *var = EX_VAR_NUM(num_args); + if (sizeof(void*) == 8) { + num_args = ir_ZEXT_A(num_args); + } + ir_ref var_ref = ir_ADD_OFFSET( + ir_ADD_A(jit_FP(jit), ir_MUL_A(num_args, ir_CONST_ADDR(sizeof(zval)))), + (ZEND_CALL_FRAME_SLOT * sizeof(zval)) + offsetof(zval, u1.type_info)); + + ir_ref loop = ir_LOOP_BEGIN(ir_END()); + var_ref = ir_PHI_2(IR_ADDR, var_ref, IR_UNUSED); + idx = ir_PHI_2(IR_U32, idx, IR_UNUSED); + ir_STORE(var_ref, ir_CONST_I32(IS_UNDEF)); + ir_PHI_SET_OP(var_ref, 2, ir_ADD_OFFSET(var_ref, sizeof(zval))); + ir_ref idx2 = ir_SUB_U32(idx, ir_CONST_U32(1)); + ir_PHI_SET_OP(idx, 2, idx2); + ir_ref if_not_zero = ir_IF(idx2); + ir_IF_TRUE(if_not_zero); + ir_MERGE_SET_OP(loop, 2, ir_LOOP_END()); + ir_IF_FALSE(if_not_zero); + ir_MERGE_WITH_EMPTY_FALSE(if_need); + } + + if (ZEND_OBSERVER_ENABLED) { + if (GCC_GLOBAL_REGS) { + // EX(opline) = opline + ir_STORE(jit_EX(opline), jit_IP(jit)); + } + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_observer_fcall_begin), jit_FP(jit)); + } + + if (trace) { + if (!func && (opline->opcode != ZEND_DO_UCALL)) { + user_path = ir_END(); + } + } else { + zend_basic_block *bb; + + do { + if (recursive_call_through_jmp) { + ir_ref begin, end; + ir_insn *insn; + + /* attempt to convert direct recursive call into loop */ + begin = jit->bb_start_ref[call_num_args]; + ZEND_ASSERT(begin != IR_UNUSED); + insn = &jit->ctx.ir_base[begin]; + if (insn->op == IR_BEGIN) { + end = ir_LOOP_END(); + insn = &jit->ctx.ir_base[begin]; + insn->op = IR_LOOP_BEGIN; + insn->inputs_count = 2; + insn->op2 = end; + break; + } else if ((insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN) + && insn->inputs_count == 2) { + end = ir_LOOP_END(); + insn = &jit->ctx.ir_base[begin]; + insn->op = IR_LOOP_BEGIN; + insn->inputs_count = 3; + insn->op3 = end; + break; + } else if (insn->op == IR_LOOP_BEGIN && insn->inputs_count == 3) { + ZEND_ASSERT(jit->ctx.ir_base[insn->op3].op == IR_LOOP_END); + jit->ctx.ir_base[insn->op3].op = IR_END; + ir_MERGE_2(insn->op3, ir_END()); + end = ir_LOOP_END(); + insn = &jit->ctx.ir_base[begin]; + insn->op3 = end; + break; + } + } + /* fallback to indirect JMP or RETURN */ + if (GCC_GLOBAL_REGS) { + ir_TAILCALL(IR_VOID, ir_LOAD_A(jit_IP(jit))); + } else { + ir_RETURN(ir_CONST_I32(1)); + } + } while (0); + + bb = &jit->ssa->cfg.blocks[jit->b]; + if (bb->successors_count > 0) { + int succ; + ir_ref ref; + + ZEND_ASSERT(bb->successors_count == 1); + succ = bb->successors[0]; + /* Add a fake control edge from UNREACHABLE/RETURN to the following ENTRY */ + ref = jit->ctx.insns_count - 1; + ZEND_ASSERT(jit->ctx.ir_base[ref].op == IR_UNREACHABLE + || jit->ctx.ir_base[ref].op == IR_RETURN + || jit->ctx.ir_base[ref].op == IR_LOOP_END); + ZEND_ASSERT(jit->ssa->cfg.blocks[succ].flags & ZEND_BB_ENTRY); + ref = zend_jit_continue_entry(jit, ref, jit->ssa->cfg.blocks[succ].start); + if (func || (opline->opcode == ZEND_DO_UCALL)) { + _zend_jit_add_predecessor_ref(jit, succ, jit->b, ref); + jit->b = -1; + } else { + user_path = ref; + } + } + } + } + + if ((!func || func->type == ZEND_INTERNAL_FUNCTION) + && (opline->opcode != ZEND_DO_UCALL)) { + if (!func && (opline->opcode != ZEND_DO_ICALL)) { + ir_IF_FALSE(if_user); + } + if (opline->opcode == ZEND_DO_FCALL_BY_NAME) { + if (!func) { + if (trace) { + uint32_t exit_point = zend_jit_trace_get_exit_point(opline, ZEND_JIT_EXIT_TO_VM); + + exit_addr = zend_jit_trace_get_exit_addr(exit_point); + if (!exit_addr) { + return 0; + } + ZEND_ASSERT(func_ref); + ir_GUARD_NOT( + ir_AND_U32( + ir_LOAD_U32(ir_ADD_OFFSET(func_ref, offsetof(zend_op_array, fn_flags))), + ir_CONST_U32(ZEND_ACC_DEPRECATED)), + ir_CONST_ADDR(exit_addr)); + } else { + ir_ref if_deprecated, ret; + + if_deprecated = ir_IF(ir_AND_U32( + ir_LOAD_U32(ir_ADD_OFFSET(func_ref, offsetof(zend_op_array, fn_flags))), + ir_CONST_U32(ZEND_ACC_DEPRECATED))); + ir_IF_TRUE_cold(if_deprecated); + + if (GCC_GLOBAL_REGS) { + ret = ir_CALL(IR_BOOL, ir_CONST_FC_FUNC(zend_jit_deprecated_helper)); + } else { + ret = ir_CALL_1(IR_BOOL, ir_CONST_FC_FUNC(zend_jit_deprecated_helper), rx); + } + ir_GUARD(ret, jit_STUB_ADDR(jit, jit_stub_exception_handler)); + ir_MERGE_WITH_EMPTY_FALSE(if_deprecated); + } + } else if (func->common.fn_flags & ZEND_ACC_DEPRECATED) { + ir_ref ret; + + if (GCC_GLOBAL_REGS) { + ret = ir_CALL(IR_BOOL, ir_CONST_FC_FUNC(zend_jit_deprecated_helper)); + } else { + ret = ir_CALL_1(IR_BOOL, ir_CONST_FC_FUNC(zend_jit_deprecated_helper), rx); + } + ir_GUARD(ret, jit_STUB_ADDR(jit, jit_stub_exception_handler)); + } + } + + // JIT: EG(current_execute_data) = execute_data; + ir_STORE(jit_EG(current_execute_data), rx); + + if (ZEND_OBSERVER_ENABLED) { + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_observer_fcall_begin), rx); + } + + // JIT: ZVAL_NULL(EX_VAR(opline->result.var)); + ir_ref res_addr = IR_UNUSED, func_ptr; + + if (RETURN_VALUE_USED(opline)) { + res_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, opline->result.var); + } else { + /* CPU stack allocated temporary zval */ + ir_ref ptr; + + if (!jit->ctx.fixed_call_stack_size) { + // JIT: alloca(sizeof(void*)); + ptr = ir_ALLOCA(ir_CONST_ADDR(sizeof(zval))); + } else { + ptr = ir_HARD_COPY_A(ir_RLOAD_A(IR_REG_SP)); + } + res_addr = ZEND_ADDR_REF_ZVAL(ptr); + } + + jit_set_Z_TYPE_INFO(jit, res_addr, IS_NULL); + + zend_jit_reset_last_valid_opline(jit); + + // JIT: (zend_execute_internal ? zend_execute_internal : fbc->internal_function.handler)(call, ret); + if (zend_execute_internal) { + ir_CALL_2(IR_VOID, ir_CONST_FUNC(zend_execute_internal), rx, jit_ZVAL_ADDR(jit, res_addr)); + } else { + if (func) { + func_ptr = ir_CONST_FC_FUNC(func->internal_function.handler); + } else { + func_ptr = ir_LOAD_A(ir_ADD_OFFSET(func_ref, offsetof(zend_internal_function, handler))); +#if defined(IR_TARGET_X86) + func_ptr = ir_CAST_FC_FUNC(func_ptr); +#endif + } + ir_CALL_2(IR_VOID, func_ptr, rx, jit_ZVAL_ADDR(jit, res_addr)); + } + + if (ZEND_OBSERVER_ENABLED) { + ir_CALL_2(IR_VOID, ir_CONST_FC_FUNC(zend_observer_fcall_end), + rx, jit_ZVAL_ADDR(jit, res_addr)); + } + + // JIT: EG(current_execute_data) = execute_data; + ir_STORE(jit_EG(current_execute_data), jit_FP(jit)); + + // JIT: zend_vm_stack_free_args(call); + if (func && !unknown_num_args) { + for (i = 0; i < call_num_args; i++ ) { + if (zend_jit_needs_arg_dtor(func, i, call_info)) { + uint32_t offset = EX_NUM_TO_VAR(i); + zend_jit_addr var_addr = ZEND_ADDR_MEM_ZVAL(ZREG_RX, offset); + + jit_ZVAL_PTR_DTOR(jit, var_addr, MAY_BE_ANY|MAY_BE_RC1|MAY_BE_RCN, 0, opline); + } + } + } else { + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_jit_vm_stack_free_args_helper), rx); + } + + if (may_have_extra_named_params) { + // JIT: if (UNEXPECTED(ZEND_CALL_INFO(call) & ZEND_CALL_HAS_EXTRA_NAMED_PARAMS)) + ir_ref if_has_named = ir_IF(ir_AND_U8( + ir_LOAD_U8(ir_ADD_OFFSET(rx, offsetof(zend_execute_data, This.u1.type_info) + 3)), + ir_CONST_U8(ZEND_CALL_HAS_EXTRA_NAMED_PARAMS >> 24))); + ir_IF_TRUE_cold(if_has_named); + + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_free_extra_named_params), + ir_LOAD_A(jit_CALL(rx, extra_named_params))); + + ir_MERGE_WITH_EMPTY_FALSE(if_has_named); + } + + if (opline->opcode == ZEND_DO_FCALL) { + // TODO: optimize ??? + // JIT: if (UNEXPECTED(ZEND_CALL_INFO(call) & ZEND_CALL_RELEASE_THIS)) + ir_ref if_release_this = ir_IF(ir_AND_U8( + ir_LOAD_U8(ir_ADD_OFFSET(rx, offsetof(zend_execute_data, This.u1.type_info) + 2)), + ir_CONST_U8(ZEND_CALL_RELEASE_THIS >> 16))); + ir_IF_TRUE_cold(if_release_this); + + // JIT: OBJ_RELEASE(Z_OBJ(RX->This)); + jit_OBJ_RELEASE(jit, ir_LOAD_A(jit_CALL(rx, This.value.obj))); + + ir_MERGE_WITH_EMPTY_FALSE(if_release_this); + } + + + ir_ref allocated_path = IR_UNUSED; + + if (JIT_G(trigger) != ZEND_JIT_ON_HOT_TRACE || + !JIT_G(current_frame) || + !JIT_G(current_frame)->call || + !TRACE_FRAME_IS_NESTED(JIT_G(current_frame)->call) || + prev_opline->opcode == ZEND_SEND_UNPACK || + prev_opline->opcode == ZEND_SEND_ARRAY || + prev_opline->opcode == ZEND_CHECK_UNDEF_ARGS) { + + // JIT: zend_vm_stack_free_call_frame(call); + // JIT: if (UNEXPECTED(ZEND_CALL_INFO(call) & ZEND_CALL_ALLOCATED)) + ir_ref if_allocated = ir_IF(ir_AND_U8( + ir_LOAD_U8(ir_ADD_OFFSET(rx, offsetof(zend_execute_data, This.u1.type_info) + 2)), + ir_CONST_U8(ZEND_CALL_ALLOCATED >> 16))); + ir_IF_TRUE_cold(if_allocated); + + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_jit_free_call_frame), rx); + + allocated_path = ir_END(); + ir_IF_FALSE(if_allocated); + } + + ir_STORE(jit_EG(vm_stack_top), rx); + + if (allocated_path) { + ir_MERGE_WITH(allocated_path); + } + + if (!RETURN_VALUE_USED(opline)) { + zend_class_entry *ce; + bool ce_is_instanceof; + uint32_t func_info = call_info ? + zend_get_func_info(call_info, ssa, &ce, &ce_is_instanceof) : + (MAY_BE_ANY|MAY_BE_REF|MAY_BE_RC1|MAY_BE_RCN); + + /* If an exception is thrown, the return_value may stay at the + * original value of null. */ + func_info |= MAY_BE_NULL; + + if (func_info & (MAY_BE_STRING|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE|MAY_BE_REF)) { + ir_ref sp = ir_RLOAD_A(IR_REG_SP); + res_addr = ZEND_ADDR_REF_ZVAL(sp); + jit_ZVAL_PTR_DTOR(jit, res_addr, func_info, 1, opline); + } + if (!jit->ctx.fixed_call_stack_size) { + // JIT: revert alloca + ir_AFREE(ir_CONST_ADDR(sizeof(zval))); + } + } + + // JIT: if (UNEXPECTED(EG(exception) != NULL)) { + ir_GUARD_NOT(ir_LOAD_A(jit_EG_exception(jit)), + jit_STUB_ADDR(jit, jit_stub_icall_throw)); + + // TODO: Can we avoid checking for interrupts after each call ??? + if (trace && jit->last_valid_opline != opline) { + int32_t exit_point = zend_jit_trace_get_exit_point(opline + 1, ZEND_JIT_EXIT_TO_VM); + + exit_addr = zend_jit_trace_get_exit_addr(exit_point); + if (!exit_addr) { + return 0; + } + } else { + exit_addr = NULL; + } + + if (!zend_jit_check_timeout(jit, opline + 1, exit_addr)) { + return 0; + } + + if ((!trace || !func) && opline->opcode != ZEND_DO_ICALL) { + jit_LOAD_IP_ADDR(jit, opline + 1); + } else if (trace + && trace->op == ZEND_JIT_TRACE_END + && trace->stop == ZEND_JIT_TRACE_STOP_INTERPRETER) { + jit_LOAD_IP_ADDR(jit, opline + 1); + } + } + + if (user_path) { + ir_MERGE_WITH(user_path); + } + + return 1; +} + +static int zend_jit_constructor(zend_jit_ctx *jit, const zend_op *opline, const zend_op_array *op_array, zend_ssa *ssa, int call_level, int next_block) +{ + ir_ref if_skip_constructor = jit_IF_ex(jit, jit_CMP_IP(jit, IR_NE, opline), next_block); + + ir_IF_FALSE(if_skip_constructor); + + if (JIT_G(opt_level) < ZEND_JIT_LEVEL_INLINE) { + if (!zend_jit_tail_handler(jit, opline)) { + return 0; + } + } else { + if (!zend_jit_do_fcall(jit, opline, op_array, ssa, call_level, next_block, NULL)) { + return 0; + } + } + + /* override predecessors of the next block */ + ZEND_ASSERT(jit->ssa->cfg.blocks[next_block].predecessors_count == 1); + if (!jit->ctx.control) { + ZEND_ASSERT(jit->bb_edges[jit->bb_predecessors[next_block]]); + ir_IF_TRUE(if_skip_constructor); + ir_MERGE_2(jit->bb_edges[jit->bb_predecessors[next_block]], ir_END()); + jit->bb_edges[jit->bb_predecessors[next_block]] = ir_END(); + } else { + ZEND_ASSERT(!jit->bb_edges[jit->bb_predecessors[next_block]]); + /* merge current control path with the true branch of constructor skip condition */ + ir_MERGE_WITH_EMPTY_TRUE(if_skip_constructor); + jit->bb_edges[jit->bb_predecessors[next_block]] = ir_END(); + + jit->b = -1; + } + + return 1; +} + +static int zend_jit_verify_arg_type(zend_jit_ctx *jit, const zend_op *opline, zend_arg_info *arg_info, bool check_exception) +{ + zend_jit_addr res_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, opline->result.var); + uint32_t type_mask = ZEND_TYPE_PURE_MASK(arg_info->type) & MAY_BE_ANY; + ir_ref ref, fast_path = IR_UNUSED; + + ref = jit_ZVAL_ADDR(jit, res_addr); + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE + && JIT_G(current_frame) + && JIT_G(current_frame)->prev) { + zend_jit_trace_stack *stack = JIT_G(current_frame)->stack; + uint8_t type = STACK_TYPE(stack, EX_VAR_TO_NUM(opline->result.var)); + + if (type != IS_UNKNOWN && (type_mask & (1u << type))) { + return 1; + } + } + + if (ZEND_ARG_SEND_MODE(arg_info)) { + if (opline->opcode == ZEND_RECV_INIT) { + ref = jit_ZVAL_DEREF_ref(jit, ref); + } else { + ref = jit_Z_PTR_ref(jit, ref); + ref = ir_ADD_OFFSET(ref, offsetof(zend_reference, val)); + } + } + + if (type_mask != 0) { + if (is_power_of_two(type_mask)) { + uint32_t type_code = concrete_type(type_mask); + ir_ref if_ok = jit_if_Z_TYPE_ref(jit, ref, ir_CONST_U8(type_code)); + ir_IF_TRUE(if_ok); + fast_path = ir_END(); + ir_IF_FALSE_cold(if_ok); + } else { + ir_ref if_ok = ir_IF(ir_AND_U32( + ir_SHL_U32(ir_CONST_U32(1), jit_Z_TYPE_ref(jit, ref)), + ir_CONST_U32(type_mask))); + ir_IF_TRUE(if_ok); + fast_path = ir_END(); + ir_IF_FALSE_cold(if_ok); + } + } + + jit_SET_EX_OPLINE(jit, opline); + ref = ir_CALL_2(IR_BOOL, ir_CONST_FC_FUNC(zend_jit_verify_arg_slow), + ref, ir_CONST_ADDR(arg_info)); + + if (check_exception) { + ir_GUARD(ref, jit_STUB_ADDR(jit, jit_stub_exception_handler)); + } + + if (fast_path) { + ir_MERGE_WITH(fast_path); + } + + return 1; +} + +static int zend_jit_recv(zend_jit_ctx *jit, const zend_op *opline, const zend_op_array *op_array) +{ + uint32_t arg_num = opline->op1.num; + zend_arg_info *arg_info = NULL; + + if (op_array->fn_flags & ZEND_ACC_HAS_TYPE_HINTS) { + if (EXPECTED(arg_num <= op_array->num_args)) { + arg_info = &op_array->arg_info[arg_num-1]; + } else if (UNEXPECTED(op_array->fn_flags & ZEND_ACC_VARIADIC)) { + arg_info = &op_array->arg_info[op_array->num_args]; + } + if (arg_info && !ZEND_TYPE_IS_SET(arg_info->type)) { + arg_info = NULL; + } + } + + if (arg_info || (opline+1)->opcode != ZEND_RECV) { + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE) { + if (!JIT_G(current_frame) || + TRACE_FRAME_NUM_ARGS(JIT_G(current_frame)) < 0 || + arg_num > TRACE_FRAME_NUM_ARGS(JIT_G(current_frame))) { + int32_t exit_point = zend_jit_trace_get_exit_point(opline, ZEND_JIT_EXIT_TO_VM); + const void *exit_addr = zend_jit_trace_get_exit_addr(exit_point); + + if (!exit_addr) { + return 0; + } + ir_GUARD(ir_GE(ir_LOAD_U32(jit_EX(This.u2.num_args)), ir_CONST_U32(arg_num)), + ir_CONST_ADDR(exit_addr)); + } + } else { + ir_ref if_ok =ir_IF(ir_GE(ir_LOAD_U32(jit_EX(This.u2.num_args)), ir_CONST_U32(arg_num))); + ir_IF_FALSE_cold(if_ok); + + jit_SET_EX_OPLINE(jit, opline); + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_missing_arg_error), jit_FP(jit)); + ir_IJMP(jit_STUB_ADDR(jit, jit_stub_exception_handler)); + ir_IF_TRUE(if_ok); + } + } + + if (arg_info) { + if (!zend_jit_verify_arg_type(jit, opline, arg_info, 1)) { + return 0; + } + } + + return 1; +} + +static int zend_jit_recv_init(zend_jit_ctx *jit, const zend_op *opline, const zend_op_array *op_array, bool is_last, int may_throw) +{ + uint32_t arg_num = opline->op1.num; + zval *zv = RT_CONSTANT(opline, opline->op2); + zend_jit_addr res_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, opline->result.var); + ir_ref ref, if_fail, skip_path = IR_UNUSED; + + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE + && JIT_G(current_frame) + && TRACE_FRAME_NUM_ARGS(JIT_G(current_frame)) >= 0) { + if (arg_num > TRACE_FRAME_NUM_ARGS(JIT_G(current_frame))) { + jit_ZVAL_COPY_CONST(jit, + res_addr, + -1, -1, + zv, 1); + } + } else { + if (JIT_G(trigger) != ZEND_JIT_ON_HOT_TRACE || + (op_array->fn_flags & ZEND_ACC_HAS_TYPE_HINTS)) { + ir_ref if_skip = ir_IF(ir_GE(ir_LOAD_U32(jit_EX(This.u2.num_args)), ir_CONST_U32(arg_num))); + ir_IF_TRUE(if_skip); + skip_path = ir_END(); + ir_IF_FALSE(if_skip); + } + jit_ZVAL_COPY_CONST(jit, + res_addr, + -1, -1, + zv, 1); + } + + if (Z_CONSTANT_P(zv)) { + jit_SET_EX_OPLINE(jit, opline); + ref = ir_CALL_2(IR_I32, ir_CONST_FC_FUNC(zval_update_constant_ex), + jit_ZVAL_ADDR(jit, res_addr), + ir_LOAD_A(ir_ADD_OFFSET(ir_LOAD_A(jit_EX(func)), offsetof(zend_op_array, scope)))); + + if_fail = ir_IF(ref); + ir_IF_TRUE_cold(if_fail); + jit_ZVAL_PTR_DTOR(jit, res_addr, MAY_BE_ANY|MAY_BE_RC1|MAY_BE_RCN, 1, opline); + ir_IJMP(jit_STUB_ADDR(jit, jit_stub_exception_handler)); + ir_IF_FALSE(if_fail); + } + + if (skip_path) { + ir_MERGE_WITH(skip_path); + } + + if (op_array->fn_flags & ZEND_ACC_HAS_TYPE_HINTS) { + do { + zend_arg_info *arg_info; + + if (arg_num <= op_array->num_args) { + arg_info = &op_array->arg_info[arg_num-1]; + } else if (op_array->fn_flags & ZEND_ACC_VARIADIC) { + arg_info = &op_array->arg_info[op_array->num_args]; + } else { + break; + } + if (!ZEND_TYPE_IS_SET(arg_info->type)) { + break; + } + if (!zend_jit_verify_arg_type(jit, opline, arg_info, may_throw)) { + return 0; + } + } while (0); + } + + return 1; +} + +static bool zend_jit_verify_return_type(zend_jit_ctx *jit, const zend_op *opline, const zend_op_array *op_array, uint32_t op1_info) +{ + zend_arg_info *arg_info = &op_array->arg_info[-1]; + ZEND_ASSERT(ZEND_TYPE_IS_SET(arg_info->type)); + zend_jit_addr op1_addr = OP1_ADDR(); + bool needs_slow_check = 1; + uint32_t type_mask = ZEND_TYPE_PURE_MASK(arg_info->type) & MAY_BE_ANY; + ir_ref fast_path = IR_UNUSED; + + if (type_mask != 0) { + if (((op1_info & MAY_BE_ANY) & type_mask) == 0) { + /* pass */ + } else if (((op1_info & MAY_BE_ANY) | type_mask) == type_mask) { + needs_slow_check = 0; + } else if (is_power_of_two(type_mask)) { + uint32_t type_code = concrete_type(type_mask); + ir_ref if_ok = jit_if_Z_TYPE(jit, op1_addr, type_code); + + ir_IF_TRUE(if_ok); + fast_path = ir_END(); + ir_IF_FALSE_cold(if_ok); + } else { + ir_ref if_ok = ir_IF(ir_AND_U32( + ir_SHL_U32(ir_CONST_U32(1), jit_Z_TYPE(jit, op1_addr)), + ir_CONST_U32(type_mask))); + + ir_IF_TRUE(if_ok); + fast_path = ir_END(); + ir_IF_FALSE_cold(if_ok); + } + } + if (needs_slow_check) { + ir_ref ref; + + jit_SET_EX_OPLINE(jit, opline); + ref = jit_ZVAL_ADDR(jit, op1_addr); + if (op1_info & MAY_BE_UNDEF) { + ref = zend_jit_zval_check_undef(jit, ref, opline->op1.var, NULL, 1); + } + + ir_CALL_4(IR_VOID, ir_CONST_FC_FUNC(zend_jit_verify_return_slow), + ref, + ir_LOAD_A(jit_EX(func)), + ir_CONST_ADDR(arg_info), + ir_ADD_OFFSET(ir_LOAD_A(jit_EX(run_time_cache)), opline->op2.num)); + + zend_jit_check_exception(jit); + + if (fast_path) { + ir_MERGE_WITH(fast_path); + } + } + + return 1; +} + +static int zend_jit_leave_frame(zend_jit_ctx *jit) +{ + // JIT: EG(current_execute_data) = EX(prev_execute_data); + ir_STORE(jit_EG(current_execute_data), ir_LOAD_A(jit_EX(prev_execute_data))); + return 1; +} + +static int zend_jit_free_cvs(zend_jit_ctx *jit) +{ + // JIT: EG(current_execute_data) = EX(prev_execute_data); + ir_STORE(jit_EG(current_execute_data), ir_LOAD_A(jit_EX(prev_execute_data))); + + // JIT: zend_free_compiled_variables(execute_data); + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_free_compiled_variables), jit_FP(jit)); + return 1; +} + +static int zend_jit_free_cv(zend_jit_ctx *jit, uint32_t info, uint32_t var) +{ + if (info & (MAY_BE_STRING|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE|MAY_BE_REF)) { + zend_jit_addr var_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, EX_NUM_TO_VAR(var)); + + jit_ZVAL_PTR_DTOR(jit, var_addr, info, 1, NULL); + } + return 1; +} + +static int zend_jit_free_op(zend_jit_ctx *jit, const zend_op *opline, uint32_t info, uint32_t var_offset) +{ + if (info & (MAY_BE_STRING|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE|MAY_BE_REF)) { + jit_ZVAL_PTR_DTOR(jit, ZEND_ADDR_MEM_ZVAL(ZREG_FP, var_offset), info, 0, opline); + } + return 1; +} + +static int zend_jit_leave_func(zend_jit_ctx *jit, + const zend_op_array *op_array, + const zend_op *opline, + uint32_t op1_info, + bool left_frame, + zend_jit_trace_rec *trace, + zend_jit_trace_info *trace_info, + int indirect_var_access, + int may_throw) +{ + bool may_be_top_frame = + JIT_G(trigger) != ZEND_JIT_ON_HOT_TRACE || + !JIT_G(current_frame) || + !TRACE_FRAME_IS_NESTED(JIT_G(current_frame)); + bool may_need_call_helper = + indirect_var_access || /* may have symbol table */ + !op_array->function_name || /* may have symbol table */ + may_be_top_frame || + (op_array->fn_flags & ZEND_ACC_VARIADIC) || /* may have extra named args */ + JIT_G(trigger) != ZEND_JIT_ON_HOT_TRACE || + !JIT_G(current_frame) || + TRACE_FRAME_NUM_ARGS(JIT_G(current_frame)) == -1 || /* unknown number of args */ + (uint32_t)TRACE_FRAME_NUM_ARGS(JIT_G(current_frame)) > op_array->num_args; /* extra args */ + bool may_need_release_this = + !(op_array->fn_flags & ZEND_ACC_CLOSURE) && + op_array->scope && + !(op_array->fn_flags & ZEND_ACC_STATIC) && + (JIT_G(trigger) != ZEND_JIT_ON_HOT_TRACE || + !JIT_G(current_frame) || + !TRACE_FRAME_NO_NEED_RELEASE_THIS(JIT_G(current_frame))); + ir_ref call_info = IR_UNUSED, ref, cold_path = IR_UNUSED; + + if (may_need_call_helper) { + if (!left_frame) { + left_frame = 1; + if (!zend_jit_leave_frame(jit)) { + return 0; + } + } + /* ZEND_CALL_FAKE_CLOSURE handled on slow path to eliminate check for ZEND_CALL_CLOSURE on fast path */ + call_info = ir_LOAD_U32(jit_EX(This.u1.type_info)); + ref = ir_AND_U32(call_info, + ir_CONST_U32(ZEND_CALL_TOP|ZEND_CALL_HAS_SYMBOL_TABLE|ZEND_CALL_FREE_EXTRA_ARGS|ZEND_CALL_ALLOCATED|ZEND_CALL_HAS_EXTRA_NAMED_PARAMS|ZEND_CALL_FAKE_CLOSURE)); + if (trace && trace->op != ZEND_JIT_TRACE_END) { + ir_ref if_slow = ir_IF(ref); + + ir_IF_TRUE_cold(if_slow); + if (!GCC_GLOBAL_REGS) { + ref = ir_CALL_1(IR_I32, ir_CONST_FC_FUNC(zend_jit_leave_func_helper), jit_FP(jit)); + } else { + ir_CALL(IR_VOID, ir_CONST_FC_FUNC(zend_jit_leave_func_helper)); + } + + if (may_be_top_frame) { + // TODO: try to avoid this check ??? + if (zend_jit_vm_kind == ZEND_VM_KIND_HYBRID) { +#if 0 + /* this check should be handled by the following OPLINE guard */ + | cmp IP, zend_jit_halt_op + | je ->trace_halt +#endif + } else if (GCC_GLOBAL_REGS) { + ir_GUARD(jit_IP(jit), jit_STUB_ADDR(jit, jit_stub_trace_halt)); + } else { + ir_GUARD(ir_GE(ref, ir_CONST_I32(0)), jit_STUB_ADDR(jit, jit_stub_trace_halt)); + } + } + + if (!GCC_GLOBAL_REGS) { + // execute_data = EG(current_execute_data) + jit_STORE_FP(jit, ir_LOAD_A(jit_EG(current_execute_data))); + } + cold_path = ir_END(); + ir_IF_FALSE(if_slow); + } else { + ir_GUARD_NOT(ref, jit_STUB_ADDR(jit, jit_stub_leave_function_handler)); + } + } + + if ((op_array->fn_flags & (ZEND_ACC_CLOSURE|ZEND_ACC_FAKE_CLOSURE)) == ZEND_ACC_CLOSURE) { + if (!left_frame) { + left_frame = 1; + if (!zend_jit_leave_frame(jit)) { + return 0; + } + } + // JIT: OBJ_RELEASE(ZEND_CLOSURE_OBJECT(EX(func))); + jit_OBJ_RELEASE(jit, ir_ADD_OFFSET(ir_LOAD_A(jit_EX(func)), -sizeof(zend_object))); + } else if (may_need_release_this) { + ir_ref if_release, fast_path = IR_UNUSED; + + if (!left_frame) { + left_frame = 1; + if (!zend_jit_leave_frame(jit)) { + return 0; + } + } + if (!JIT_G(current_frame) || !TRACE_FRAME_ALWAYS_RELEASE_THIS(JIT_G(current_frame))) { + // JIT: if (call_info & ZEND_CALL_RELEASE_THIS) + if (!call_info) { + call_info = ir_LOAD_U32(jit_EX(This.u1.type_info)); + } + if_release = ir_IF(ir_AND_U32(call_info, ir_CONST_U32(ZEND_CALL_RELEASE_THIS))); + ir_IF_FALSE(if_release); + fast_path = ir_END(); + ir_IF_TRUE(if_release); + } + // JIT: OBJ_RELEASE(execute_data->This)) + jit_OBJ_RELEASE(jit, ir_LOAD_A(jit_EX(This.value.obj))); + if (fast_path) { + ir_MERGE_WITH(fast_path); + } + // TODO: avoid EG(excption) check for $this->foo() calls + may_throw = 1; + } + + // JIT: EG(vm_stack_top) = (zval*)execute_data + ir_STORE(jit_EG(vm_stack_top), jit_FP(jit)); + + // JITL execute_data = EX(prev_execute_data) + jit_STORE_FP(jit, ir_LOAD_A(jit_EX(prev_execute_data))); + + if (!left_frame) { + // JIT: EG(current_execute_data) = execute_data + ir_STORE(jit_EG(current_execute_data), jit_FP(jit)); + } + + if (trace) { + if (trace->op != ZEND_JIT_TRACE_END + && (JIT_G(current_frame) && !TRACE_FRAME_IS_UNKNOWN_RETURN(JIT_G(current_frame)))) { + zend_jit_reset_last_valid_opline(jit); + } else { + if (GCC_GLOBAL_REGS) { + /* We add extra RLOAD and RSTORE to make fusion for persistent register + * mov (%FP), %IP + * add $0x1c, %IP + * The naive (commented) code leads to extra register allocation and move. + * mov (%FP), %tmp + * add $0x1c, %tmp + * mov %tmp, %FP + */ +#if 0 + jit_STORE_IP(jit, ir_ADD_OFFSET(ir_LOAD_A(jit_EX(opline)), sizeof(zend_op))); +#else + jit_STORE_IP(jit, ir_LOAD_A(jit_EX(opline))); + jit_STORE_IP(jit, ir_ADD_OFFSET(jit_IP(jit), sizeof(zend_op))); +#endif + } else { + ir_ref ref = jit_EX(opline); + + ir_STORE(ref, ir_ADD_OFFSET(ir_LOAD_A(ref), sizeof(zend_op))); + } + } + + if (cold_path) { + ir_MERGE_WITH(cold_path); + } + + if (trace->op == ZEND_JIT_TRACE_BACK + && (!JIT_G(current_frame) || TRACE_FRAME_IS_UNKNOWN_RETURN(JIT_G(current_frame)))) { + const zend_op *next_opline = trace->opline; + + if ((opline->op1_type & (IS_VAR|IS_TMP_VAR)) + && (op1_info & MAY_BE_RC1) + && (op1_info & (MAY_BE_OBJECT|MAY_BE_RESOURCE|MAY_BE_ARRAY_OF_OBJECT|MAY_BE_ARRAY_OF_RESOURCE|MAY_BE_ARRAY_OF_ARRAY))) { + /* exception might be thrown during destruction of unused return value */ + // JIT: if (EG(exception)) + ir_GUARD_NOT(ir_LOAD_A(jit_EG(exception)), jit_STUB_ADDR(jit, jit_stub_leave_throw)); + } + do { + trace++; + } while (trace->op == ZEND_JIT_TRACE_INIT_CALL); + ZEND_ASSERT(trace->op == ZEND_JIT_TRACE_VM || trace->op == ZEND_JIT_TRACE_END); + next_opline = trace->opline; + ZEND_ASSERT(next_opline != NULL); + + if (trace->op == ZEND_JIT_TRACE_END + && trace->stop == ZEND_JIT_TRACE_STOP_RECURSIVE_RET) { + trace_info->flags |= ZEND_JIT_TRACE_LOOP; + + ir_ref if_eq = ir_IF(jit_CMP_IP(jit, IR_EQ, next_opline)); + + ir_IF_TRUE(if_eq); + ZEND_ASSERT(jit->trace_loop_ref); + ZEND_ASSERT(jit->ctx.ir_base[jit->trace_loop_ref].op2 == IR_UNUSED); + ir_MERGE_SET_OP(jit->trace_loop_ref, 2, ir_END()); + ir_IF_FALSE(if_eq); + +#ifdef ZEND_VM_HYBRID_JIT_RED_ZONE_SIZE + ir_TAILCALL(IR_VOID, ir_LOAD_A(jit_IP(jit))); +#else + ir_IJMP(jit_STUB_ADDR(jit, jit_stub_trace_escape)); +#endif + } else { + ir_GUARD(jit_CMP_IP(jit, IR_EQ, next_opline), jit_STUB_ADDR(jit, jit_stub_trace_escape)); + } + + zend_jit_set_last_valid_opline(jit, trace->opline); + + return 1; + } else if (may_throw || + (((opline->op1_type & (IS_VAR|IS_TMP_VAR)) + && (op1_info & MAY_BE_RC1) + && (op1_info & (MAY_BE_OBJECT|MAY_BE_RESOURCE|MAY_BE_ARRAY_OF_OBJECT|MAY_BE_ARRAY_OF_RESOURCE|MAY_BE_ARRAY_OF_ARRAY))) + && (!JIT_G(current_frame) || TRACE_FRAME_IS_RETURN_VALUE_UNUSED(JIT_G(current_frame))))) { + // JIT: if (EG(exception)) + ir_GUARD_NOT(ir_LOAD_A(jit_EG(exception)), jit_STUB_ADDR(jit, jit_stub_leave_throw)); + } + + return 1; + } else { + // JIT: if (EG(exception)) + ir_GUARD_NOT(ir_LOAD_A(jit_EG(exception)), jit_STUB_ADDR(jit, jit_stub_leave_throw)); + // JIT: opline = EX(opline) + 1 + if (GCC_GLOBAL_REGS) { + jit_STORE_IP(jit, ir_LOAD_A(jit_EX(opline))); + jit_STORE_IP(jit, ir_ADD_OFFSET(jit_IP(jit), sizeof(zend_op))); + } else { + ir_ref ref = jit_EX(opline); + + ir_STORE(ref, ir_ADD_OFFSET(ir_LOAD_A(ref), sizeof(zend_op))); + } + } + + if (GCC_GLOBAL_REGS) { + ir_TAILCALL(IR_VOID, ir_LOAD_A(jit_IP(jit))); + } else { + ir_RETURN(ir_CONST_I32(2)); // ZEND_VM_LEAVE + } + + jit->b = -1; + + return 1; +} + +static void zend_jit_common_return(zend_jit_ctx *jit) +{ + ZEND_ASSERT(jit->return_inputs); + ir_MERGE_list(jit->return_inputs); +} + +static int zend_jit_return(zend_jit_ctx *jit, const zend_op *opline, const zend_op_array *op_array, uint32_t op1_info, zend_jit_addr op1_addr) +{ + zend_jit_addr ret_addr; + int8_t return_value_used = -1; + ir_ref return_value = IR_UNUSED, ref, refcount, if_return_value_used = IR_UNUSED; + + ZEND_ASSERT(op_array->type != ZEND_EVAL_CODE && op_array->function_name); + ZEND_ASSERT(!(op1_info & MAY_BE_UNDEF)); + + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE) { + jit->return_inputs = IR_UNUSED; + if (JIT_G(current_frame)) { + if (TRACE_FRAME_IS_RETURN_VALUE_USED(JIT_G(current_frame))) { + return_value_used = 1; + } else if (TRACE_FRAME_IS_RETURN_VALUE_UNUSED(JIT_G(current_frame))) { + return_value_used = 0; + } else { + return_value_used = -1; + } + } + } + + if (ZEND_OBSERVER_ENABLED) { + if (Z_MODE(op1_addr) == IS_REG) { + zend_jit_addr dst = ZEND_ADDR_MEM_ZVAL(ZREG_FP, opline->op1.var); + + if (!zend_jit_spill_store(jit, op1_addr, dst, op1_info, 1)) { + return 0; + } + op1_addr = dst; + } + ir_CALL_2(IR_VOID, ir_CONST_FC_FUNC(zend_observer_fcall_end), + jit_FP(jit), + jit_ZVAL_ADDR(jit, op1_addr)); + } + + // JIT: if (!EX(return_value)) + return_value = ir_LOAD_A(jit_EX(return_value)); + ret_addr = ZEND_ADDR_REF_ZVAL(return_value); + if ((opline->op1_type & (IS_VAR|IS_TMP_VAR)) && + (op1_info & (MAY_BE_STRING|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE))) { + if (return_value_used == -1) { + if_return_value_used = ir_IF(return_value); + ir_IF_FALSE_cold(if_return_value_used); + } + if (return_value_used != 1) { + if (op1_info & ((MAY_BE_UNDEF|MAY_BE_ANY|MAY_BE_REF)-(MAY_BE_OBJECT|MAY_BE_RESOURCE))) { + ir_ref if_refcounted = jit_if_REFCOUNTED(jit, op1_addr); + ir_IF_FALSE(if_refcounted); + ir_END_list(jit->return_inputs); + ir_IF_TRUE(if_refcounted); + } + ref = jit_Z_PTR(jit, op1_addr); + refcount = jit_GC_DELREF(jit, ref); + + if (RC_MAY_BE_1(op1_info)) { + if (RC_MAY_BE_N(op1_info)) { + ir_ref if_non_zero = ir_IF(refcount); + ir_IF_TRUE(if_non_zero); + ir_END_list(jit->return_inputs); + ir_IF_FALSE(if_non_zero); + } + jit_ZVAL_DTOR(jit, ref, op1_info, opline); + } + if (return_value_used == -1) { + ir_END_list(jit->return_inputs); + } + } + } else if (return_value_used == -1) { + if_return_value_used = ir_IF(return_value); + ir_IF_FALSE_cold(if_return_value_used); + ir_END_list(jit->return_inputs); + } + + if (if_return_value_used) { + ir_IF_TRUE(if_return_value_used); + } + + if (return_value_used == 0) { + if (jit->return_inputs) { + ZEND_ASSERT(JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE); + ir_END_list(jit->return_inputs); + ir_MERGE_list(jit->return_inputs); + jit->return_inputs = IR_UNUSED; + } + return 1; + } + + if (opline->op1_type == IS_CONST) { + zval *zv = RT_CONSTANT(opline, opline->op1); + + jit_ZVAL_COPY_CONST(jit, ret_addr, MAY_BE_ANY, MAY_BE_ANY, zv, 1); + } else if (opline->op1_type == IS_TMP_VAR) { + jit_ZVAL_COPY(jit, ret_addr, MAY_BE_ANY, op1_addr, op1_info, 0); + } else if (opline->op1_type == IS_CV) { + if (op1_info & MAY_BE_REF) { + ref = jit_ZVAL_ADDR(jit, op1_addr); + ref = jit_ZVAL_DEREF_ref(jit, ref); + op1_addr = ZEND_ADDR_REF_ZVAL(ref); + } + + if (op1_info & (MAY_BE_STRING|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE)) { + if (JIT_G(trigger) != ZEND_JIT_ON_HOT_TRACE || + (op1_info & (MAY_BE_REF|MAY_BE_OBJECT)) || + !op_array->function_name) { + jit_ZVAL_COPY(jit, ret_addr, MAY_BE_ANY, op1_addr, op1_info, 1); + } else if (return_value_used != 1) { + jit_ZVAL_COPY(jit, ret_addr, MAY_BE_ANY, op1_addr, op1_info, 0); + // JIT: if (EXPECTED(!(EX_CALL_INFO() & ZEND_CALL_CODE))) ZVAL_NULL(retval_ptr); + jit_set_Z_TYPE_INFO(jit, op1_addr, IS_NULL); + } else { + jit_ZVAL_COPY(jit, ret_addr, MAY_BE_ANY, op1_addr, op1_info, 0); + } + } else { + jit_ZVAL_COPY(jit, ret_addr, MAY_BE_ANY, op1_addr, op1_info, 0); + } + } else { + if (op1_info & MAY_BE_REF) { + ir_ref if_ref, ref2, if_non_zero; + zend_jit_addr ref_addr; + + if_ref = jit_if_Z_TYPE(jit, op1_addr, IS_REFERENCE); + ir_IF_TRUE_cold(if_ref); + + // JIT: zend_refcounted *ref = Z_COUNTED_P(retval_ptr) + ref = jit_Z_PTR(jit, op1_addr); + + // JIT: ZVAL_COPY_VALUE(return_value, &ref->value) + ref2 = ir_ADD_OFFSET(ref, offsetof(zend_reference, val)); + ref_addr = ZEND_ADDR_REF_ZVAL(ref2); + jit_ZVAL_COPY(jit, ret_addr, MAY_BE_ANY, ref_addr, op1_info, 0); + ref2 = jit_GC_DELREF(jit, ref); + if_non_zero = ir_IF(ref2); + ir_IF_TRUE(if_non_zero); + + // JIT: if (IS_REFCOUNTED()) + ir_ref if_refcounted = jit_if_REFCOUNTED(jit, ret_addr); + ir_IF_FALSE(if_refcounted); + ir_END_list(jit->return_inputs); + ir_IF_TRUE(if_refcounted); + + // JIT: ADDREF + ref2 = jit_Z_PTR(jit, ret_addr); + jit_GC_ADDREF(jit, ref2); + ir_END_list(jit->return_inputs); + + ir_IF_FALSE(if_non_zero); + + jit_EFREE(jit, ref, sizeof(zend_reference), op_array, opline); + ir_END_list(jit->return_inputs); + + ir_IF_FALSE(if_ref); + } + jit_ZVAL_COPY(jit, ret_addr, MAY_BE_ANY, op1_addr, op1_info, 0); + } + + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE) { + if (jit->return_inputs) { + ir_END_list(jit->return_inputs); + ir_MERGE_list(jit->return_inputs); + jit->return_inputs = IR_UNUSED; + } + } else { + ir_END_list(jit->return_inputs); + jit->b = -1; + } + + return 1; +} + +static int zend_jit_bind_global(zend_jit_ctx *jit, const zend_op *opline, uint32_t op1_info) +{ + zend_jit_addr op1_addr = OP1_ADDR(); + zend_string *varname = Z_STR_P(RT_CONSTANT(opline, opline->op2)); + ir_ref cache_slot_ref, idx_ref, num_used_ref, bucket_ref, ref, ref2; + ir_ref if_fit, if_reference, if_same_key, fast_path; + ir_ref slow_inputs = IR_UNUSED, end_inputs = IR_UNUSED; + + // JIT: idx = (uintptr_t)CACHED_PTR(opline->extended_value) - 1; + cache_slot_ref = ir_ADD_OFFSET(ir_LOAD_A(jit_EX(run_time_cache)), opline->extended_value); + idx_ref = ir_SUB_A(ir_LOAD_A(cache_slot_ref), ir_CONST_ADDR(1)); + + // JIT: if (EXPECTED(idx < EG(symbol_table).nNumUsed * sizeof(Bucket))) + num_used_ref = ir_MUL_U32(ir_LOAD_U32(jit_EG(symbol_table.nNumUsed)), + ir_CONST_U32(sizeof(Bucket))); + if (sizeof(void*) == 8) { + num_used_ref = ir_ZEXT_A(num_used_ref); + } + if_fit = ir_IF(ir_ULT(idx_ref, num_used_ref)); + ir_IF_FALSE_cold(if_fit); + ir_END_list(slow_inputs); + ir_IF_TRUE(if_fit); + + // JIT: Bucket *p = (Bucket*)((char*)EG(symbol_table).arData + idx); + bucket_ref = ir_ADD_A(ir_LOAD_A(jit_EG(symbol_table.arData)), idx_ref); + if_reference = jit_if_Z_TYPE_ref(jit, bucket_ref, ir_CONST_U8(IS_REFERENCE)); + ir_IF_FALSE_cold(if_reference); + ir_END_list(slow_inputs); + ir_IF_TRUE(if_reference); + + // JIT: (EXPECTED(p->key == varname)) + if_same_key = ir_IF(ir_EQ(ir_LOAD_A(ir_ADD_OFFSET(bucket_ref, offsetof(Bucket, key))), ir_CONST_ADDR(varname))); + ir_IF_FALSE_cold(if_same_key); + ir_END_list(slow_inputs); + ir_IF_TRUE(if_same_key); + + // JIT: GC_ADDREF(Z_PTR(p->val)) + ref = jit_Z_PTR_ref(jit, bucket_ref); + jit_GC_ADDREF(jit, ref); + + fast_path = ir_END(); + ir_MERGE_list(slow_inputs); + + ref2 = ir_CALL_2(IR_ADDR, ir_CONST_FC_FUNC(zend_jit_fetch_global_helper), + ir_CONST_ADDR(varname), + cache_slot_ref); + + ir_MERGE_WITH(fast_path); + ref = ir_PHI_2(IR_ADDR, ref2, ref); + + if (op1_info & (MAY_BE_STRING|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE|MAY_BE_REF)) { + ir_ref if_refcounted = IR_UNUSED, refcount, if_non_zero, if_may_not_leak; + + if (op1_info & ((MAY_BE_ANY|MAY_BE_UNDEF) - (MAY_BE_STRING|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE))) { + // JIT: if (UNEXPECTED(Z_REFCOUNTED_P(variable_ptr))) + if_refcounted = jit_if_REFCOUNTED(jit, op1_addr); + ir_IF_TRUE_cold(if_refcounted); + } + + // JIT:zend_refcounted *garbage = Z_COUNTED_P(variable_ptr); + ref2 = jit_Z_PTR(jit, op1_addr); + + // JIT: ZVAL_REF(variable_ptr, ref) + jit_set_Z_PTR(jit, op1_addr, ref); + jit_set_Z_TYPE_INFO(jit, op1_addr, IS_REFERENCE_EX); + + // JIT: if (GC_DELREF(garbage) == 0) + refcount = jit_GC_DELREF(jit, ref2); + if_non_zero = ir_IF(refcount); + if (!(op1_info & (MAY_BE_REF|MAY_BE_ARRAY|MAY_BE_OBJECT))) { + ir_IF_TRUE(if_non_zero); + ir_END_list(end_inputs); + } + ir_IF_FALSE(if_non_zero); + + jit_ZVAL_DTOR(jit, ref2, op1_info, opline); + if (op1_info & (MAY_BE_REF|MAY_BE_ARRAY|MAY_BE_OBJECT)) { + ir_END_list(end_inputs); + ir_IF_TRUE(if_non_zero); + + // JIT: GC_ZVAL_CHECK_POSSIBLE_ROOT(variable_ptr) + if_may_not_leak = jit_if_GC_MAY_NOT_LEAK(jit, ref2); + ir_IF_TRUE(if_may_not_leak); + ir_END_list(end_inputs); + ir_IF_FALSE(if_may_not_leak); + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(gc_possible_root), ref2); + } + if (op1_info & ((MAY_BE_ANY|MAY_BE_UNDEF) - (MAY_BE_STRING|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE))) { + ir_END_list(end_inputs); + ir_IF_FALSE(if_refcounted); + } + } + + if (op1_info & ((MAY_BE_ANY|MAY_BE_UNDEF) - (MAY_BE_STRING|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE))) { + // JIT: ZVAL_REF(variable_ptr, ref) + jit_set_Z_PTR(jit, op1_addr, ref); + jit_set_Z_TYPE_INFO(jit, op1_addr, IS_REFERENCE_EX); + } + + if (end_inputs) { + ir_END_list(end_inputs); + ir_MERGE_list(end_inputs); + } + + return 1; +} + +static int zend_jit_free(zend_jit_ctx *jit, const zend_op *opline, uint32_t op1_info, int may_throw) +{ + zend_jit_addr op1_addr = OP1_ADDR(); + + if (op1_info & (MAY_BE_STRING|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE|MAY_BE_REF)) { + if (may_throw) { + jit_SET_EX_OPLINE(jit, opline); + } + if (opline->opcode == ZEND_FE_FREE && (op1_info & (MAY_BE_OBJECT|MAY_BE_REF))) { + ir_ref ref, if_array, if_exists, end_inputs = IR_UNUSED; + + if (op1_info & MAY_BE_ARRAY) { + if_array = jit_if_Z_TYPE(jit, op1_addr, IS_ARRAY); + ir_IF_TRUE(if_array); + ir_END_list(end_inputs); + ir_IF_FALSE(if_array); + } + ref = ir_LOAD_U32(ir_ADD_OFFSET(jit_FP(jit), opline->op1.var + offsetof(zval, u2.fe_iter_idx))); + if_exists = ir_IF(ir_EQ(ref, ir_CONST_U32(-1))); + ir_IF_TRUE(if_exists); + ir_END_list(end_inputs); + ir_IF_FALSE(if_exists); + + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_hash_iterator_del), ref); + + ir_END_list(end_inputs); + ir_MERGE_list(end_inputs); + } + + jit_ZVAL_PTR_DTOR(jit, op1_addr, op1_info, 0, opline); + + if (may_throw) { + zend_jit_check_exception(jit); + } + } + + return 1; +} + +static int zend_jit_echo(zend_jit_ctx *jit, const zend_op *opline, uint32_t op1_info) +{ + if (opline->op1_type == IS_CONST) { + zval *zv; + size_t len; + + zv = RT_CONSTANT(opline, opline->op1); + ZEND_ASSERT(Z_TYPE_P(zv) == IS_STRING); + len = Z_STRLEN_P(zv); + + if (len > 0) { + const char *str = Z_STRVAL_P(zv); + + jit_SET_EX_OPLINE(jit, opline); + ir_CALL_2(IR_VOID, ir_CONST_FUNC(zend_write), + ir_CONST_ADDR(str), ir_CONST_ADDR(len)); + + zend_jit_check_exception(jit); + } + } else { + zend_jit_addr op1_addr = OP1_ADDR(); + ir_ref ref; + + ZEND_ASSERT((op1_info & (MAY_BE_UNDEF|MAY_BE_ANY|MAY_BE_REF)) == MAY_BE_STRING); + + jit_SET_EX_OPLINE(jit, opline); + + ref = jit_Z_PTR(jit, op1_addr); + ir_CALL_2(IR_VOID, ir_CONST_FUNC(zend_write), + ir_ADD_OFFSET(ref, offsetof(zend_string, val)), + ir_LOAD_A(ir_ADD_OFFSET(ref, offsetof(zend_string, len)))); + + if (opline->op1_type & (IS_VAR|IS_TMP_VAR)) { + jit_ZVAL_PTR_DTOR(jit, op1_addr, op1_info, 0, opline); + } + + zend_jit_check_exception(jit); + } + return 1; +} + +static int zend_jit_strlen(zend_jit_ctx *jit, const zend_op *opline, uint32_t op1_info, zend_jit_addr op1_addr, zend_jit_addr res_addr) +{ + if (opline->op1_type == IS_CONST) { + zval *zv; + size_t len; + + zv = RT_CONSTANT(opline, opline->op1); + ZEND_ASSERT(Z_TYPE_P(zv) == IS_STRING); + len = Z_STRLEN_P(zv); + + jit_set_Z_LVAL(jit, res_addr, ir_CONST_LONG(len)); + if (Z_MODE(res_addr) != IS_REG) { + jit_set_Z_TYPE_INFO(jit, res_addr, IS_LONG); + } else if (!zend_jit_store_var_if_necessary(jit, opline->result.var, res_addr, MAY_BE_LONG)) { + return 0; + } + } else { + ir_ref ref; + + ZEND_ASSERT((op1_info & (MAY_BE_UNDEF|MAY_BE_ANY|MAY_BE_REF)) == MAY_BE_STRING); + + ref = jit_Z_PTR(jit, op1_addr); + ref = ir_LOAD_L(ir_ADD_OFFSET(ref, offsetof(zend_string, len))); + jit_set_Z_LVAL(jit, res_addr, ref); + + if (Z_MODE(res_addr) == IS_REG) { + if (!zend_jit_store_var_if_necessary(jit, opline->result.var, res_addr, MAY_BE_LONG)) { + return 0; + } + } else { + jit_set_Z_TYPE_INFO(jit, res_addr, IS_LONG); + } + jit_FREE_OP(jit, opline->op1_type, opline->op1, op1_info, opline); + } + return 1; +} + +static int zend_jit_count(zend_jit_ctx *jit, const zend_op *opline, uint32_t op1_info, zend_jit_addr op1_addr, zend_jit_addr res_addr, int may_throw) +{ + if (opline->op1_type == IS_CONST) { + zval *zv; + zend_long count; + + zv = RT_CONSTANT(opline, opline->op1); + ZEND_ASSERT(Z_TYPE_P(zv) == IS_ARRAY); + count = zend_hash_num_elements(Z_ARRVAL_P(zv)); + + jit_set_Z_LVAL(jit, res_addr, ir_CONST_LONG(count)); + if (Z_MODE(res_addr) != IS_REG) { + jit_set_Z_TYPE_INFO(jit, res_addr, IS_LONG); + } else if (!zend_jit_store_var_if_necessary(jit, opline->result.var, res_addr, MAY_BE_LONG)) { + return 0; + } + } else { + ir_ref ref; + + ZEND_ASSERT((op1_info & (MAY_BE_UNDEF|MAY_BE_ANY|MAY_BE_REF)) == MAY_BE_ARRAY); + // Note: See the implementation of ZEND_COUNT in Zend/zend_vm_def.h - arrays do not contain IS_UNDEF starting in php 8.1+. + + ref = jit_Z_PTR(jit, op1_addr); + if (sizeof(void*) == 8) { + ref = ir_LOAD_U32(ir_ADD_OFFSET(ref, offsetof(HashTable, nNumOfElements))); + ref = ir_ZEXT_L(ref); + } else { + ref = ir_LOAD_L(ir_ADD_OFFSET(ref, offsetof(HashTable, nNumOfElements))); + } + jit_set_Z_LVAL(jit, res_addr, ref); + + if (Z_MODE(res_addr) == IS_REG) { + if (!zend_jit_store_var_if_necessary(jit, opline->result.var, res_addr, MAY_BE_LONG)) { + return 0; + } + } else { + jit_set_Z_TYPE_INFO(jit, res_addr, IS_LONG); + } + jit_FREE_OP(jit, opline->op1_type, opline->op1, op1_info, opline); + } + + if (may_throw) { + zend_jit_check_exception(jit); + } + return 1; +} + +static int zend_jit_in_array(zend_jit_ctx *jit, const zend_op *opline, uint32_t op1_info, zend_jit_addr op1_addr, uint8_t smart_branch_opcode, uint32_t target_label, uint32_t target_label2, const void *exit_addr) +{ + HashTable *ht = Z_ARRVAL_P(RT_CONSTANT(opline, opline->op2)); + zend_jit_addr res_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, opline->result.var); + ir_ref ref; + + ZEND_ASSERT(opline->op1_type != IS_VAR && opline->op1_type != IS_TMP_VAR); + ZEND_ASSERT((op1_info & (MAY_BE_ANY|MAY_BE_UNDEF|MAY_BE_REF)) == MAY_BE_STRING); + + // JIT: result = zend_hash_find_ex(ht, Z_STR_P(op1), OP1_TYPE == IS_CONST); + if (opline->op1_type != IS_CONST) { + ref = ir_CALL_2(IR_ADDR, ir_CONST_FC_FUNC(zend_hash_find), + ir_CONST_ADDR(ht), + jit_Z_PTR(jit, op1_addr)); + } else { + zend_string *str = Z_STR_P(RT_CONSTANT(opline, opline->op1)); + + ref = ir_CALL_2(IR_ADDR, ir_CONST_FC_FUNC(zend_hash_find_known_hash), + ir_CONST_ADDR(ht), ir_CONST_ADDR(str)); + } + + if (exit_addr) { + if (smart_branch_opcode == ZEND_JMPZ) { + ir_GUARD(ref, ir_CONST_ADDR(exit_addr)); + } else { + ir_GUARD_NOT(ref, ir_CONST_ADDR(exit_addr)); + } + } else if (smart_branch_opcode) { + zend_basic_block *bb; + + ZEND_ASSERT(jit->b >= 0); + bb = &jit->ssa->cfg.blocks[jit->b]; + ZEND_ASSERT(bb->successors_count == 2); + ref = jit_IF_ex(jit, ref, + (smart_branch_opcode == ZEND_JMPZ) ? target_label2 : target_label); + _zend_jit_add_predecessor_ref(jit, bb->successors[0], jit->b, ref); + _zend_jit_add_predecessor_ref(jit, bb->successors[1], jit->b, ref); + jit->b = -1; + } else { + jit_set_Z_TYPE_INFO_ex(jit, res_addr, + ir_ADD_U32(ir_ZEXT_U32(ir_NE(ref, IR_NULL)), ir_CONST_U32(IS_FALSE))); + } + + return 1; +} + +static int zend_jit_rope(zend_jit_ctx *jit, const zend_op *opline, uint32_t op2_info) +{ + uint32_t offset; + + offset = (opline->opcode == ZEND_ROPE_INIT) ? + opline->result.var : + opline->op1.var + opline->extended_value * sizeof(zend_string*); + + if (opline->op2_type == IS_CONST) { + zval *zv = RT_CONSTANT(opline, opline->op2); + zend_string *str; + + ZEND_ASSERT(Z_TYPE_P(zv) == IS_STRING); + str = Z_STR_P(zv); + + ir_STORE(ir_ADD_OFFSET(jit_FP(jit), offset), ir_CONST_ADDR(str)); + } else { + zend_jit_addr op2_addr = OP2_ADDR(); + ir_ref ref; + + ZEND_ASSERT((op2_info & (MAY_BE_UNDEF|MAY_BE_ANY|MAY_BE_REF)) == MAY_BE_STRING); + + ref = jit_Z_PTR(jit, op2_addr); + ir_STORE(ir_ADD_OFFSET(jit_FP(jit), offset), ref); + if (opline->op2_type == IS_CV) { + ir_ref if_refcounted, long_path; + + if_refcounted = jit_if_REFCOUNTED(jit, op2_addr); + ir_IF_TRUE(if_refcounted); + jit_GC_ADDREF(jit, ref); + long_path = ir_END(); + + ir_IF_FALSE(if_refcounted); + ir_MERGE_WITH(long_path); + } + } + + if (opline->opcode == ZEND_ROPE_END) { + zend_jit_addr res_addr = RES_ADDR(); + ir_ref ref; + + ref = ir_CALL_2(IR_ADDR, ir_CONST_FC_FUNC(zend_jit_rope_end), + ir_ADD_OFFSET(jit_FP(jit), opline->op1.var), + ir_CONST_U32(opline->extended_value)); + + jit_set_Z_PTR(jit, res_addr, ref); + jit_set_Z_TYPE_INFO(jit, res_addr, IS_STRING_EX); + } + + return 1; +} + +static int zend_jit_zval_copy_deref(zend_jit_ctx *jit, zend_jit_addr res_addr, zend_jit_addr val_addr, ir_ref type) +{ + ir_ref if_refcounted, if_reference, if_refcounted2, ptr, val2, ptr2, type2; + ir_refs *merge_inputs, *types, *ptrs; +#if SIZEOF_ZEND_LONG == 4 + ir_ref val = jit_ZVAL_ADDR(jit, val_addr); + ir_refs *values; /* we need this only for zval.w2 copy */ +#endif + + ir_refs_init(merge_inputs, 4); + ir_refs_init(types, 4); + ir_refs_init(ptrs, 4); +#if SIZEOF_ZEND_LONG == 4 + ir_refs_init(values, 4); +#endif + + // JIT: ptr = Z_PTR_P(val); + ptr = jit_Z_PTR(jit, val_addr); + + // JIT: if (Z_OPT_REFCOUNTED_P(val)) { + if_refcounted = ir_IF(ir_AND_U32(type, ir_CONST_U32(Z_TYPE_FLAGS_MASK))); + ir_IF_FALSE_cold(if_refcounted); + ir_refs_add(merge_inputs, ir_END()); + ir_refs_add(types, type); + ir_refs_add(ptrs, ptr); +#if SIZEOF_ZEND_LONG == 4 + ir_refs_add(values, val); +#endif + + ir_IF_TRUE(if_refcounted); + + // JIT: if (UNEXPECTED(Z_OPT_ISREF_P(val))) { + if_reference = ir_IF(ir_EQ(type, ir_CONST_U32(IS_REFERENCE_EX))); +// if_reference = ir_IF(ir_EQ(ir_TRUNC_U8(type), ir_CONST_U8(IS_REFERENCE))); // TODO: fix IR to avoid need for extra register ??? + ir_IF_TRUE(if_reference); + + // JIT: val = Z_REFVAL_P(val); + val2 = ir_ADD_OFFSET(ptr, offsetof(zend_reference, val)); + type2 = jit_Z_TYPE_INFO_ref(jit, val2); + ptr2 = jit_Z_PTR_ref(jit, val2); + + // JIT: if (Z_OPT_REFCOUNTED_P(val)) { + if_refcounted2 = ir_IF(ir_AND_U32(type2, ir_CONST_U32(Z_TYPE_FLAGS_MASK))); + ir_IF_FALSE_cold(if_refcounted2); + ir_refs_add(merge_inputs, ir_END()); + ir_refs_add(types, type2); + ir_refs_add(ptrs, ptr2); +#if SIZEOF_ZEND_LONG == 4 + ir_refs_add(values, val2); +#endif + + ir_IF_TRUE(if_refcounted2); + ir_MERGE_WITH_EMPTY_FALSE(if_reference); + type = ir_PHI_2(IR_U32, type2, type); + ptr = ir_PHI_2(IR_ADDR, ptr2, ptr); +#if SIZEOF_ZEND_LONG == 4 + val = ir_PHI_2(IR_ADDR, val2, val); +#endif + + // JIT: Z_ADDREF_P(val); + jit_GC_ADDREF(jit, ptr); + ir_refs_add(merge_inputs, ir_END()); + ir_refs_add(types, type); + ir_refs_add(ptrs, ptr); +#if SIZEOF_ZEND_LONG == 4 + ir_refs_add(values, val); +#endif + + ir_MERGE_N(merge_inputs->count, merge_inputs->refs); + type = ir_PHI_N(IR_U32, types->count, types->refs); + ptr = ir_PHI_N(IR_ADDR, ptrs->count, ptrs->refs); +#if SIZEOF_ZEND_LONG == 4 + val = ir_PHI_N(IR_ADDR, values->count, values->refs); + val_addr = ZEND_ADDR_REF_ZVAL(val); +#endif + + // JIT: Z_PTR_P(res) = ptr; + jit_set_Z_PTR(jit, res_addr, ptr); +#if SIZEOF_ZEND_LONG == 4 + jit_set_Z_W2(jit, res_addr, jit_Z_W2(jit, val_addr)); +#endif + jit_set_Z_TYPE_INFO_ex(jit, res_addr, type); + + return 1; +} + +static int zend_jit_fetch_dimension_address_inner(zend_jit_ctx *jit, + const zend_op *opline, + uint32_t type, + uint32_t op1_info, + uint32_t op2_info, + uint8_t dim_type, + const void *found_exit_addr, + const void *not_found_exit_addr, + const void *exit_addr, + bool result_type_guard, + ir_ref ht_ref, + ir_refs *found_inputs, + ir_refs *found_vals, + ir_ref *end_inputs, + ir_ref *not_found_inputs) +{ + zend_jit_addr op2_addr = OP2_ADDR(); + zend_jit_addr res_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, opline->result.var); + ir_ref ref = IR_UNUSED, cond, if_found; + ir_ref if_type = IS_UNUSED; + ir_refs *test_zval_inputs, *test_zval_values; + + ir_refs_init(test_zval_inputs, 4); + ir_refs_init(test_zval_values, 4); + + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE + && type == BP_VAR_R + && !exit_addr) { + int32_t exit_point = zend_jit_trace_get_exit_point(opline, ZEND_JIT_EXIT_TO_VM); + exit_addr = zend_jit_trace_get_exit_addr(exit_point); + if (!exit_addr) { + return 0; + } + } + + if (op2_info & MAY_BE_LONG) { + bool op2_loaded = 0; + bool packed_loaded = 0; + bool bad_packed_key = 0; + ir_ref if_packed = IS_UNDEF; + ir_ref h = IR_UNUSED; + ir_ref idx_not_found_inputs = IR_UNUSED; + + if (op2_info & ((MAY_BE_ANY|MAY_BE_UNDEF) - MAY_BE_LONG)) { + // JIT: if (EXPECTED(Z_TYPE_P(dim) == IS_LONG)) + if_type = jit_if_Z_TYPE(jit, op2_addr, IS_LONG); + ir_IF_TRUE(if_type); + } + if (op1_info & MAY_BE_PACKED_GUARD) { + int32_t exit_point = zend_jit_trace_get_exit_point(opline, ZEND_JIT_EXIT_PACKED_GUARD); + const void *exit_addr = zend_jit_trace_get_exit_addr(exit_point); + + if (!exit_addr) { + return 0; + } + cond = ir_AND_U32( + ir_LOAD_U32(ir_ADD_OFFSET(ht_ref, offsetof(zend_array, u.flags))), + ir_CONST_U32(HASH_FLAG_PACKED)); + if (op1_info & MAY_BE_ARRAY_PACKED) { + ir_GUARD(cond, ir_CONST_ADDR(exit_addr)); + } else { + ir_GUARD_NOT(cond, ir_CONST_ADDR(exit_addr)); + } + } + if (type == BP_VAR_W) { + // JIT: hval = Z_LVAL_P(dim); + h = jit_Z_LVAL(jit, op2_addr); + op2_loaded = 1; + } + if (op1_info & MAY_BE_ARRAY_PACKED) { + zend_long val = -1; + + if (Z_MODE(op2_addr) == IS_CONST_ZVAL) { + val = Z_LVAL_P(Z_ZV(op2_addr)); + if (val >= 0 && val < HT_MAX_SIZE) { + packed_loaded = 1; + } else { + bad_packed_key = 1; + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE && type == BP_VAR_R) { + jit_SIDE_EXIT(jit, ir_CONST_ADDR(exit_addr)); + } + } + h = ir_CONST_LONG(val); + } else { + if (!op2_loaded) { + // JIT: hval = Z_LVAL_P(dim); + h = jit_Z_LVAL(jit, op2_addr); + op2_loaded = 1; + } + packed_loaded = 1; + } + + if (dim_type == IS_UNDEF && type == BP_VAR_W && packed_loaded) { + /* don't generate "fast" code for packed array */ + packed_loaded = 0; + } + + if (packed_loaded) { + // JIT: ZEND_HASH_INDEX_FIND(ht, hval, retval, num_undef); + if (op1_info & MAY_BE_ARRAY_NUMERIC_HASH) { + if_packed = ir_IF( + ir_AND_U32( + ir_LOAD_U32(ir_ADD_OFFSET(ht_ref, offsetof(zend_array, u.flags))), + ir_CONST_U32(HASH_FLAG_PACKED))); + ir_IF_TRUE(if_packed); + } + // JIT: if (EXPECTED((zend_ulong)(_h) < (zend_ulong)(_ht)->nNumUsed)) + ref = ir_LOAD_U32(ir_ADD_OFFSET(ht_ref, offsetof(zend_array, nNumUsed))); +#if SIZEOF_ZEND_LONG == 8 + ref = ir_ZEXT_L(ref); +#endif + cond = ir_ULT(h, ref); + if (type == BP_JIT_IS) { + if (not_found_exit_addr) { + ir_GUARD(cond, ir_CONST_ADDR(not_found_exit_addr)); + } else { + ir_ref if_fit = ir_IF(cond); + ir_IF_FALSE(if_fit); + ir_END_list(*end_inputs); + ir_IF_TRUE(if_fit); + } + } else if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE && type == BP_VAR_R) { + ir_GUARD(cond, ir_CONST_ADDR(exit_addr)); + } else if (type == BP_VAR_IS && not_found_exit_addr) { + ir_GUARD(cond, ir_CONST_ADDR(not_found_exit_addr)); + } else if (type == BP_VAR_RW && not_found_exit_addr) { + ir_GUARD(cond, ir_CONST_ADDR(not_found_exit_addr)); + } else if (type == BP_VAR_IS && result_type_guard) { + ir_ref if_fit = ir_IF(cond); + ir_IF_FALSE(if_fit); + ir_END_list(*not_found_inputs); + ir_IF_TRUE(if_fit); + } else { + ir_ref if_fit = ir_IF(cond); + ir_IF_FALSE(if_fit); + ir_END_list(idx_not_found_inputs); + ir_IF_TRUE(if_fit); + } + // JIT: _ret = &_ht->arPacked[h]; + ref = ir_MUL_L(h, ir_CONST_LONG(sizeof(zval))); + ref = ir_BITCAST_A(ref); + ref = ir_ADD_A(ir_LOAD_A(ir_ADD_OFFSET(ht_ref, offsetof(zend_array, arPacked))), ref); + if (type == BP_JIT_IS) { + ir_refs_add(test_zval_values, ref); + ir_refs_add(test_zval_inputs, ir_END()); + } + } + } + switch (type) { + case BP_JIT_IS: + if (op1_info & MAY_BE_ARRAY_NUMERIC_HASH) { + if (if_packed) { + ir_IF_FALSE(if_packed); + if_packed = IR_UNUSED; + } + if (!op2_loaded) { + // JIT: hval = Z_LVAL_P(dim); + h = jit_Z_LVAL(jit, op2_addr); + } + if (packed_loaded) { + ref = ir_CALL_2(IR_ADDR, ir_CONST_FC_FUNC(_zend_hash_index_find), ht_ref, h); + } else { + ref = ir_CALL_2(IR_ADDR, ir_CONST_FC_FUNC(zend_hash_index_find), ht_ref, h); + } + if (not_found_exit_addr) { + ir_GUARD(ref, ir_CONST_ADDR(not_found_exit_addr)); + } else { + if_found = ir_IF(ref); + ir_IF_FALSE(if_found); + ir_END_list(*end_inputs); + ir_IF_TRUE(if_found); + } + ir_refs_add(test_zval_values, ref); + ir_refs_add(test_zval_inputs, ir_END()); + } else if (!not_found_exit_addr && !packed_loaded) { + ir_END_list(*end_inputs); + } + break; + case BP_VAR_R: + case BP_VAR_IS: + case BP_VAR_UNSET: + if (packed_loaded) { + ir_ref type_ref = jit_Z_TYPE_ref(jit, ref); + + if (op1_info & MAY_BE_ARRAY_NUMERIC_HASH) { + ir_ref if_def = ir_IF(type_ref); + ir_IF_TRUE(if_def); + ir_refs_add(found_inputs, ir_END()); + ir_refs_add(found_vals, ref); + ir_IF_FALSE(if_def); + } else if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE && type == BP_VAR_R) { + /* perform IS_UNDEF check only after result type guard (during deoptimization) */ + if (!result_type_guard || (op1_info & MAY_BE_ARRAY_NUMERIC_HASH)) { + ir_GUARD(type_ref, ir_CONST_ADDR(exit_addr)); + } + } else if (type == BP_VAR_IS && not_found_exit_addr) { + ir_GUARD(type_ref, ir_CONST_ADDR(not_found_exit_addr)); + } else if (type == BP_VAR_IS && result_type_guard) { + ir_ref if_def = ir_IF(type_ref); + ir_IF_FALSE(if_def); + ir_END_list(*not_found_inputs); + ir_IF_TRUE(if_def); + } else { + ir_ref if_def = ir_IF(type_ref); + ir_IF_FALSE(if_def); + ir_END_list(idx_not_found_inputs); + ir_IF_TRUE(if_def); + } + } + if (!(op1_info & MAY_BE_ARRAY_KEY_LONG) || (packed_loaded && (op1_info & MAY_BE_ARRAY_NUMERIC_HASH))) { + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE && type == BP_VAR_R) { + jit_SIDE_EXIT(jit, ir_CONST_ADDR(exit_addr)); + } else if (type == BP_VAR_IS && not_found_exit_addr) { + jit_SIDE_EXIT(jit, ir_CONST_ADDR(not_found_exit_addr)); + } else if (type == BP_VAR_IS && result_type_guard) { + ir_END_list(*not_found_inputs); + } else { + ir_END_list(idx_not_found_inputs); + } + } + if (/*!packed_loaded ||*/ (op1_info & MAY_BE_ARRAY_NUMERIC_HASH)) { + if (if_packed) { + ir_IF_FALSE(if_packed); + if_packed = IR_UNUSED; + } + if (!op2_loaded) { + // JIT: hval = Z_LVAL_P(dim); + h = jit_Z_LVAL(jit, op2_addr); + } + if (packed_loaded) { + ref = ir_CALL_2(IR_ADDR, ir_CONST_FC_FUNC(_zend_hash_index_find), ht_ref, h); + } else { + ref = ir_CALL_2(IR_ADDR, ir_CONST_FC_FUNC(zend_hash_index_find), ht_ref, h); + } + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE && type == BP_VAR_R) { + ir_GUARD(ref, ir_CONST_ADDR(exit_addr)); + } else if (type == BP_VAR_IS && not_found_exit_addr) { + ir_GUARD(ref, ir_CONST_ADDR(not_found_exit_addr)); + } else if (type == BP_VAR_IS && result_type_guard) { + if_found = ir_IF(ref); + ir_IF_FALSE(if_found); + ir_END_list(*not_found_inputs); + ir_IF_TRUE(if_found); + } else { + if_found = ir_IF(ref); + ir_IF_FALSE(if_found); + ir_END_list(idx_not_found_inputs); + ir_IF_TRUE(if_found); + } + ir_refs_add(found_inputs, ir_END()); + ir_refs_add(found_vals, ref); + } else if (packed_loaded) { + ir_refs_add(found_inputs, ir_END()); + ir_refs_add(found_vals, ref); + } + + if (idx_not_found_inputs) { + ir_MERGE_list(idx_not_found_inputs); + switch (type) { + case BP_VAR_R: + ZEND_ASSERT(JIT_G(trigger) != ZEND_JIT_ON_HOT_TRACE); + // JIT: zend_error(E_WARNING,"Undefined array key " ZEND_LONG_FMT, hval); + // JIT: retval = &EG(uninitialized_zval); + jit_SET_EX_OPLINE(jit, opline); + ir_CALL(IR_VOID, jit_STUB_FUNC_ADDR(jit, jit_stub_undefined_offset, IR_CONST_FASTCALL_FUNC)); + ir_END_list(*end_inputs); + break; + case BP_VAR_IS: + case BP_VAR_UNSET: + if (!not_found_exit_addr) { + // JIT: retval = &EG(uninitialized_zval); + jit_set_Z_TYPE_INFO(jit, res_addr, IS_NULL); + ir_END_list(*end_inputs); + } + break; + default: + ZEND_UNREACHABLE(); + } + } + break; + case BP_VAR_RW: + if (packed_loaded) { + if (not_found_exit_addr) { + ir_refs_add(found_inputs, ir_END()); + ir_refs_add(found_vals, ref); + } else { + ir_ref if_def = ir_IF(jit_Z_TYPE_ref(jit, ref)); + ir_IF_TRUE(if_def); + ir_refs_add(found_inputs, ir_END()); + ir_refs_add(found_vals, ref); + ir_IF_FALSE_cold(if_def); + ir_END_list(idx_not_found_inputs); + } + } + if (!packed_loaded || + !not_found_exit_addr || + (op1_info & MAY_BE_ARRAY_NUMERIC_HASH)) { + if (if_packed) { + ir_IF_FALSE(if_packed); + if_packed = IR_UNUSED; + ir_END_list(idx_not_found_inputs); + } else if (!packed_loaded) { + ir_END_list(idx_not_found_inputs); + } + + ir_MERGE_list(idx_not_found_inputs); + if (!op2_loaded) { + // JIT: hval = Z_LVAL_P(dim); + h = jit_Z_LVAL(jit, op2_addr); + } + if (packed_loaded) { + ref = ir_CALL_2(IR_ADDR, ir_CONST_FC_FUNC(zend_jit_hash_index_lookup_rw_no_packed), + ht_ref, h); + } else { + ref = ir_CALL_2(IR_ADDR, ir_CONST_FC_FUNC(zend_jit_hash_index_lookup_rw), ht_ref, h); + } + if (not_found_exit_addr) { + ir_GUARD(ref, ir_CONST_ADDR(not_found_exit_addr)); + } else { + if_found = ir_IF(ref); + ir_IF_FALSE(if_found); + ir_END_list(*end_inputs); + ir_IF_TRUE(if_found); + } + ir_refs_add(found_inputs, ir_END()); + ir_refs_add(found_vals, ref); + } + break; + case BP_VAR_W: + if (packed_loaded) { + ir_ref if_def = ir_IF(jit_Z_TYPE_ref(jit, ref)); + ir_IF_TRUE_cold(if_def); + ir_refs_add(found_inputs, ir_END()); + ir_refs_add(found_vals, ref); + ir_IF_FALSE(if_def); + ir_END_list(idx_not_found_inputs); + } + if (!(op1_info & MAY_BE_ARRAY_KEY_LONG) || (op1_info & MAY_BE_ARRAY_NUMERIC_HASH) || packed_loaded || bad_packed_key || dim_type == IS_UNDEF) { + if (if_packed) { + ir_IF_FALSE(if_packed); + if_packed = IR_UNUSED; + ir_END_list(idx_not_found_inputs); + } else if (!packed_loaded) { + ir_END_list(idx_not_found_inputs); + } + ir_MERGE_list(idx_not_found_inputs); + if (!op2_loaded) { + // JIT: hval = Z_LVAL_P(dim); + h = jit_Z_LVAL(jit, op2_addr); + } + ref = ir_CALL_2(IR_ADDR, ir_CONST_FC_FUNC(zend_hash_index_lookup), ht_ref, h); + ir_refs_add(found_inputs, ir_END()); + ir_refs_add(found_vals, ref); + } + break; + default: + ZEND_UNREACHABLE(); + } + } + + if (op2_info & MAY_BE_STRING) { + ir_ref key; + + if (if_type) { + ir_IF_FALSE(if_type); + if_type = IS_UNUSED; + } + + if (op2_info & ((MAY_BE_ANY|MAY_BE_UNDEF) - (MAY_BE_LONG|MAY_BE_STRING))) { + // JIT: if (EXPECTED(Z_TYPE_P(dim) == IS_STRING)) + if_type = jit_if_Z_TYPE(jit, op2_addr, IS_STRING); + ir_IF_TRUE(if_type); + } + + // JIT: offset_key = Z_STR_P(dim); + key = jit_Z_PTR(jit, op2_addr); + + // JIT: retval = zend_hash_find(ht, offset_key); + switch (type) { + case BP_JIT_IS: + if (opline->op2_type != IS_CONST) { + ir_ref if_num, end1, ref2; + + if_num = ir_IF( + ir_ULE( + ir_LOAD_C(ir_ADD_OFFSET(key, offsetof(zend_string, val))), + ir_CONST_CHAR('9'))); + ir_IF_TRUE_cold(if_num); + ref = ir_CALL_2(IR_ADDR, ir_CONST_FC_FUNC(zend_jit_symtable_find), ht_ref, key); + end1 = ir_END(); + ir_IF_FALSE(if_num); + ref2 = ir_CALL_2(IR_ADDR, ir_CONST_FC_FUNC(zend_hash_find), ht_ref, key); + ir_MERGE_WITH(end1); + ref = ir_PHI_2(IR_ADDR, ref2, ref); + } else { + ref = ir_CALL_2(IR_ADDR, ir_CONST_FC_FUNC(zend_hash_find_known_hash), ht_ref, key); + } + if (not_found_exit_addr) { + ir_GUARD(ref, ir_CONST_ADDR(not_found_exit_addr)); + } else { + if_found = ir_IF(ref); + ir_IF_FALSE(if_found); + ir_END_list(*end_inputs); + ir_IF_TRUE(if_found); + } + ir_refs_add(test_zval_values, ref); + ir_refs_add(test_zval_inputs, ir_END()); + break; + case BP_VAR_R: + case BP_VAR_IS: + case BP_VAR_UNSET: + if (opline->op2_type != IS_CONST) { + ir_ref if_num, end1, ref2; + + if_num = ir_IF( + ir_ULE( + ir_LOAD_C(ir_ADD_OFFSET(key, offsetof(zend_string, val))), + ir_CONST_CHAR('9'))); + ir_IF_TRUE_cold(if_num); + ref = ir_CALL_2(IR_ADDR, ir_CONST_FC_FUNC(zend_jit_symtable_find), ht_ref, key); + end1 = ir_END(); + ir_IF_FALSE(if_num); + ref2 = ir_CALL_2(IR_ADDR, ir_CONST_FC_FUNC(zend_hash_find), ht_ref, key); + ir_MERGE_WITH(end1); + ref = ir_PHI_2(IR_ADDR, ref2, ref); + } else { + ref = ir_CALL_2(IR_ADDR, ir_CONST_FC_FUNC(zend_hash_find_known_hash), ht_ref, key); + } + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE && type == BP_VAR_R) { + ir_GUARD(ref, ir_CONST_ADDR(exit_addr)); + } else if (type == BP_VAR_IS && not_found_exit_addr) { + ir_GUARD(ref, ir_CONST_ADDR(not_found_exit_addr)); + } else if (type == BP_VAR_IS && result_type_guard) { + if_found = ir_IF(ref); + ir_IF_FALSE(if_found); + ir_END_list(*not_found_inputs); + ir_IF_TRUE(if_found); + } else { + if_found = ir_IF(ref); + switch (type) { + case BP_VAR_R: + ir_IF_FALSE_cold(if_found); + // JIT: zend_error(E_WARNING, "Undefined array key \"%s\"", ZSTR_VAL(offset_key)); + jit_SET_EX_OPLINE(jit, opline); + ir_CALL(IR_VOID, jit_STUB_FUNC_ADDR(jit, jit_stub_undefined_key, IR_CONST_FASTCALL_FUNC)); + ir_END_list(*end_inputs); + break; + case BP_VAR_IS: + case BP_VAR_UNSET: + ir_IF_FALSE(if_found); + // JIT: retval = &EG(uninitialized_zval); + jit_set_Z_TYPE_INFO(jit, res_addr, IS_NULL); + ir_END_list(*end_inputs); + break; + default: + ZEND_UNREACHABLE(); + } + ir_IF_TRUE(if_found); + } + ir_refs_add(found_inputs, ir_END()); + ir_refs_add(found_vals, ref); + break; + case BP_VAR_RW: + if (opline->op2_type != IS_CONST) { + ref = ir_CALL_2(IR_ADDR, ir_CONST_FC_FUNC(zend_jit_symtable_lookup_rw), ht_ref, key); + } else { + ref = ir_CALL_2(IR_ADDR, ir_CONST_FC_FUNC(zend_jit_hash_lookup_rw), ht_ref, key); + } + if (not_found_exit_addr) { + ir_GUARD(ref, ir_CONST_ADDR(not_found_exit_addr)); + } else { + if_found = ir_IF(ref); + ir_IF_FALSE(if_found); + ir_END_list(*end_inputs); + ir_IF_TRUE(if_found); + } + ir_refs_add(found_inputs, ir_END()); + ir_refs_add(found_vals, ref); + break; + case BP_VAR_W: + if (opline->op2_type != IS_CONST) { + ref = ir_CALL_2(IR_ADDR, ir_CONST_FC_FUNC(zend_jit_symtable_lookup_w), ht_ref, key); + } else { + ref = ir_CALL_2(IR_ADDR, ir_CONST_FC_FUNC(zend_hash_lookup), ht_ref, key); + } + ir_refs_add(found_inputs, ir_END()); + ir_refs_add(found_vals, ref); + break; + default: + ZEND_UNREACHABLE(); + } + } + + if (op2_info & ((MAY_BE_ANY|MAY_BE_UNDEF) - (MAY_BE_LONG|MAY_BE_STRING))) { + if (if_type) { + ir_IF_FALSE_cold(if_type); + if_type = IS_UNDEF; + } + if (type != BP_VAR_RW) { + jit_SET_EX_OPLINE(jit, opline); + } + ref = jit_ZVAL_ADDR(jit, op2_addr); + switch (type) { + case BP_VAR_R: + ir_CALL_3(IR_VOID, ir_CONST_FC_FUNC(zend_jit_fetch_dim_r_helper), + ht_ref, + ref, + jit_ZVAL_ADDR(jit, res_addr)); + ir_END_list(*end_inputs); + break; + case BP_JIT_IS: + ref = ir_CALL_2(IR_I32, ir_CONST_FC_FUNC(zend_jit_fetch_dim_isset_helper), ht_ref, ref); + if (not_found_exit_addr) { + ir_GUARD(ref, ir_CONST_ADDR(not_found_exit_addr)); + ir_refs_add(found_inputs, ir_END()); + } else if (found_exit_addr) { + ir_GUARD_NOT(ref, ir_CONST_ADDR(found_exit_addr)); + ir_END_list(*end_inputs); + } else { + if_found = ir_IF(ref); + ir_IF_TRUE(if_found); + ir_refs_add(found_inputs, ir_END()); + ir_IF_FALSE(if_found); + ir_END_list(*end_inputs); + } + break; + case BP_VAR_IS: + case BP_VAR_UNSET: + ir_CALL_3(IR_VOID, ir_CONST_FC_FUNC(zend_jit_fetch_dim_is_helper), + ht_ref, + ref, + jit_ZVAL_ADDR(jit, res_addr)); + ir_END_list(*end_inputs); + break; + case BP_VAR_RW: + ref = ir_CALL_2(IR_ADDR, ir_CONST_FC_FUNC(zend_jit_fetch_dim_rw_helper), ht_ref, ref); + if_found = ir_IF(ref); + ir_IF_TRUE(if_found); + ir_refs_add(found_inputs, ir_END()); + ir_refs_add(found_vals, ref); + ir_IF_FALSE(if_found); + ir_END_list(*end_inputs); + break; + case BP_VAR_W: + ref = ir_CALL_2(IR_ADDR, ir_CONST_FC_FUNC(zend_jit_fetch_dim_w_helper), ht_ref, ref); + if_found = ir_IF(ref); + ir_IF_TRUE(if_found); + ir_refs_add(found_inputs, ir_END()); + ir_refs_add(found_vals, ref); + ir_IF_FALSE(if_found); + ir_END_list(*end_inputs); + break; + default: + ZEND_UNREACHABLE(); + } + } + + if (type == BP_JIT_IS + && (op1_info & MAY_BE_ARRAY) + && (op2_info & (MAY_BE_LONG|MAY_BE_STRING)) + && test_zval_inputs->count) { + + ir_MERGE_N(test_zval_inputs->count, test_zval_inputs->refs); + ref = ir_PHI_N(IR_ADDR, test_zval_values->count, test_zval_values->refs); + + if (op1_info & MAY_BE_ARRAY_OF_REF) { + ref = jit_ZVAL_DEREF_ref(jit, ref); + } + cond = ir_GT(jit_Z_TYPE_ref(jit, ref), ir_CONST_U8(IS_NULL)); + if (not_found_exit_addr) { + ir_GUARD(cond, ir_CONST_ADDR(not_found_exit_addr)); + ir_refs_add(found_inputs, ir_END()); + } else if (found_exit_addr) { + ir_GUARD_NOT(cond, ir_CONST_ADDR(found_exit_addr)); + ir_END_list(*end_inputs); + } else { + ir_ref if_set = ir_IF(cond); + ir_IF_FALSE(if_set); + ir_END_list(*end_inputs); + ir_IF_TRUE(if_set); + ir_refs_add(found_inputs, ir_END()); + } + } + + return 1; +} + +static int zend_jit_fetch_dim_read(zend_jit_ctx *jit, + const zend_op *opline, + zend_ssa *ssa, + const zend_ssa_op *ssa_op, + uint32_t op1_info, + zend_jit_addr op1_addr, + bool op1_avoid_refcounting, + uint32_t op2_info, + uint32_t res_info, + zend_jit_addr res_addr, + uint8_t dim_type) +{ + zend_jit_addr orig_op1_addr, op2_addr; + const void *exit_addr = NULL; + const void *not_found_exit_addr = NULL; + bool result_type_guard = 0; + bool result_avoid_refcounting = 0; + uint32_t may_be_string = (opline->opcode != ZEND_FETCH_LIST_R) ? MAY_BE_STRING : 0; + int may_throw = 0; + ir_ref if_type = IR_UNUSED; + ir_ref end_inputs = IR_UNUSED; + ir_ref not_found_inputs = IR_UNUSED; + + orig_op1_addr = OP1_ADDR(); + op2_addr = OP2_ADDR(); + + if (opline->opcode != ZEND_FETCH_DIM_IS + && JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE + && !has_concrete_type(op1_info)) { + int32_t exit_point = zend_jit_trace_get_exit_point(opline, ZEND_JIT_EXIT_TO_VM); + exit_addr = zend_jit_trace_get_exit_addr(exit_point); + if (!exit_addr) { + return 0; + } + } + + if ((res_info & MAY_BE_GUARD) + && JIT_G(current_frame) + && (op1_info & (MAY_BE_ANY|MAY_BE_UNDEF)) == MAY_BE_ARRAY) { + + if (!(op2_info & ((MAY_BE_ANY|MAY_BE_UNDEF|MAY_BE_REF) - (MAY_BE_STRING|MAY_BE_LONG)))) { + result_type_guard = 1; + res_info &= ~MAY_BE_GUARD; + ssa->var_info[ssa_op->result_def].type &= ~MAY_BE_GUARD; + } + + if ((opline->result_type & (IS_VAR|IS_TMP_VAR)) + && (opline->opcode == ZEND_FETCH_LIST_R + || !(opline->op1_type & (IS_VAR|IS_TMP_VAR)) + || op1_avoid_refcounting) + && (res_info & (MAY_BE_STRING|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE)) + && (ssa_op+1)->op1_use == ssa_op->result_def + && !(op2_info & ((MAY_BE_ANY|MAY_BE_UNDEF|MAY_BE_REF) - (MAY_BE_STRING|MAY_BE_LONG))) + && zend_jit_may_avoid_refcounting(opline+1, res_info)) { + result_avoid_refcounting = 1; + ssa->var_info[ssa_op->result_def].avoid_refcounting = 1; + } + + if (opline->opcode == ZEND_FETCH_DIM_IS + && !(res_info & MAY_BE_NULL)) { + uint32_t flags = 0; + uint32_t old_op1_info = 0; + uint32_t old_info; + zend_jit_trace_stack *stack = JIT_G(current_frame)->stack; + int32_t exit_point; + + if (opline->opcode != ZEND_FETCH_LIST_R + && (opline->op1_type & (IS_VAR|IS_TMP_VAR)) + && !op1_avoid_refcounting) { + flags |= ZEND_JIT_EXIT_FREE_OP1; + } + if ((opline->op2_type & (IS_VAR|IS_TMP_VAR)) + && (op2_info & (MAY_BE_STRING|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE))) { + flags |= ZEND_JIT_EXIT_FREE_OP2; + } + + if (op1_avoid_refcounting) { + old_op1_info = STACK_INFO(stack, EX_VAR_TO_NUM(opline->op1.var)); + SET_STACK_REG(stack, EX_VAR_TO_NUM(opline->op1.var), ZREG_NONE); + } + + old_info = STACK_INFO(stack, EX_VAR_TO_NUM(opline->result.var)); + SET_STACK_TYPE(stack, EX_VAR_TO_NUM(opline->result.var), IS_NULL, 0); + SET_STACK_REG_EX(stack, EX_VAR_TO_NUM(opline->result.var), ZREG_NONE, ZREG_TYPE_ONLY); + exit_point = zend_jit_trace_get_exit_point(opline+1, flags); + SET_STACK_INFO(stack, EX_VAR_TO_NUM(opline->result.var), old_info); + not_found_exit_addr = zend_jit_trace_get_exit_addr(exit_point); + if (!not_found_exit_addr) { + return 0; + } + + if (op1_avoid_refcounting) { + SET_STACK_INFO(stack, EX_VAR_TO_NUM(opline->op1.var), old_op1_info); + } + } + } + + if (op1_info & MAY_BE_REF) { + ir_ref ref = jit_ZVAL_ADDR(jit, op1_addr); + ref = jit_ZVAL_DEREF_ref(jit, ref); + op1_addr = ZEND_ADDR_REF_ZVAL(ref); + } + + if (op1_info & MAY_BE_ARRAY) { + ir_ref ht_ref, ref; + zend_jit_addr val_addr; + ir_refs *found_inputs, *found_vals; + + ir_refs_init(found_inputs, 10); + ir_refs_init(found_vals, 10); + + if (op1_info & ((MAY_BE_ANY|MAY_BE_UNDEF) - MAY_BE_ARRAY)) { + if (exit_addr && !(op1_info & (MAY_BE_OBJECT|may_be_string))) { + jit_guard_Z_TYPE(jit, op1_addr, IS_ARRAY, exit_addr); + } else { + if_type = jit_if_Z_TYPE(jit, op1_addr, IS_ARRAY); + ir_IF_TRUE(if_type); + } + } + + ht_ref = jit_Z_PTR(jit, op1_addr); + + if ((op2_info & ((MAY_BE_ANY|MAY_BE_UNDEF) - (MAY_BE_LONG|MAY_BE_STRING))) || + (opline->opcode != ZEND_FETCH_DIM_IS && JIT_G(trigger) != ZEND_JIT_ON_HOT_TRACE)) { + may_throw = 1; + } + + if (!zend_jit_fetch_dimension_address_inner(jit, opline, + (opline->opcode != ZEND_FETCH_DIM_IS) ? BP_VAR_R : BP_VAR_IS, + op1_info, op2_info, dim_type, NULL, not_found_exit_addr, exit_addr, + result_type_guard, ht_ref, found_inputs, found_vals, + &end_inputs, ¬_found_inputs)) { + return 0; + } + + if (found_inputs->count) { + ir_MERGE_N(found_inputs->count, found_inputs->refs); + ref = ir_PHI_N(IR_ADDR, found_vals->count, found_vals->refs); + val_addr = ZEND_ADDR_REF_ZVAL(ref); + + if (result_type_guard) { + uint8_t type = concrete_type(res_info); + uint32_t flags = 0; + + if (opline->opcode != ZEND_FETCH_LIST_R + && (opline->op1_type & (IS_VAR|IS_TMP_VAR)) + && !op1_avoid_refcounting) { + flags |= ZEND_JIT_EXIT_FREE_OP1; + } + if ((opline->op2_type & (IS_VAR|IS_TMP_VAR)) + && (op2_info & (MAY_BE_STRING|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE))) { + flags |= ZEND_JIT_EXIT_FREE_OP2; + } + + val_addr = zend_jit_guard_fetch_result_type(jit, opline, val_addr, type, + (op1_info & MAY_BE_ARRAY_OF_REF) != 0, flags, op1_avoid_refcounting); + if (!val_addr) { + return 0; + } + + if (not_found_inputs) { + ir_END_list(not_found_inputs); + ir_MERGE_list(not_found_inputs); + } + + // ZVAL_COPY + jit_ZVAL_COPY(jit, res_addr, -1, val_addr, res_info, !result_avoid_refcounting); + if (Z_MODE(res_addr) != IS_REG) { + } else if (!zend_jit_store_var_if_necessary(jit, opline->result.var, res_addr, res_info)) { + return 0; + } + } else if (op1_info & MAY_BE_ARRAY_OF_REF) { + // ZVAL_COPY_DEREF + ir_ref type_info = jit_Z_TYPE_INFO(jit, val_addr); + if (!zend_jit_zval_copy_deref(jit, res_addr, val_addr, type_info)) { + return 0; + } + } else { + // ZVAL_COPY + jit_ZVAL_COPY(jit, res_addr, -1, val_addr, res_info, 1); + } + + ir_END_list(end_inputs); + } + } + + if (op1_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-MAY_BE_ARRAY)) { + if (if_type) { + ir_IF_FALSE_cold(if_type); + if_type = IS_UNDEF; + } + + if (opline->opcode != ZEND_FETCH_LIST_R && (op1_info & MAY_BE_STRING)) { + ir_ref str_ref; + + may_throw = 1; + if (op1_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-(MAY_BE_ARRAY|MAY_BE_STRING))) { + if (exit_addr && !(op1_info & MAY_BE_OBJECT)) { + jit_guard_Z_TYPE(jit, op1_addr, IS_STRING, exit_addr); + } else { + if_type = jit_if_Z_TYPE(jit, op1_addr, IS_STRING); + ir_IF_TRUE(if_type); + } + } + jit_SET_EX_OPLINE(jit, opline); + str_ref = jit_Z_PTR(jit, op1_addr); + if (opline->opcode != ZEND_FETCH_DIM_IS) { + ir_ref ref; + + if ((op2_info & (MAY_BE_ANY|MAY_BE_UNDEF|MAY_BE_GUARD)) == MAY_BE_LONG) { + ref = ir_CALL_2(IR_ADDR, ir_CONST_FC_FUNC(zend_jit_fetch_dim_str_offset_r_helper), + str_ref, jit_Z_LVAL(jit, op2_addr)); + } else { + ref = ir_CALL_2(IR_ADDR, ir_CONST_FC_FUNC(zend_jit_fetch_dim_str_r_helper), + str_ref, jit_ZVAL_ADDR(jit, op2_addr)); + } + jit_set_Z_PTR(jit, res_addr, ref); + jit_set_Z_TYPE_INFO(jit, res_addr, IS_STRING); + } else { + ir_CALL_3(IR_VOID, ir_CONST_FC_FUNC(zend_jit_fetch_dim_str_is_helper), + str_ref, + jit_ZVAL_ADDR(jit, op2_addr), + jit_ZVAL_ADDR(jit, res_addr)); + } + ir_END_list(end_inputs); + } + + if (op1_info & MAY_BE_OBJECT) { + ir_ref arg2; + + if (if_type) { + ir_IF_FALSE_cold(if_type); + if_type = IS_UNDEF; + } + + may_throw = 1; + if (op1_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-(MAY_BE_ARRAY|MAY_BE_OBJECT|may_be_string))) { + if (exit_addr) { + jit_guard_Z_TYPE(jit, op1_addr, IS_OBJECT, exit_addr); + } else { + if_type = jit_if_Z_TYPE(jit, op1_addr, IS_OBJECT); + ir_IF_TRUE(if_type); + } + } + + jit_SET_EX_OPLINE(jit, opline); + if (opline->op2_type == IS_CONST && Z_EXTRA_P(RT_CONSTANT(opline, opline->op2)) == ZEND_EXTRA_VALUE) { + ZEND_ASSERT(Z_MODE(op2_addr) == IS_CONST_ZVAL); + arg2 = ir_CONST_ADDR(Z_ZV(op2_addr)+1); + } else { + arg2 = jit_ZVAL_ADDR(jit, op2_addr); + } + + if (opline->opcode != ZEND_FETCH_DIM_IS) { + ir_CALL_3(IR_VOID, ir_CONST_FC_FUNC(zend_jit_fetch_dim_obj_r_helper), + jit_ZVAL_ADDR(jit, op1_addr), + arg2, + jit_ZVAL_ADDR(jit, res_addr)); + } else { + ir_CALL_3(IR_VOID, ir_CONST_FC_FUNC(zend_jit_fetch_dim_obj_is_helper), + jit_ZVAL_ADDR(jit, op1_addr), + arg2, + jit_ZVAL_ADDR(jit, res_addr)); + } + + ir_END_list(end_inputs); + } + + if ((op1_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-(MAY_BE_ARRAY|MAY_BE_OBJECT|may_be_string))) + && (!exit_addr || !(op1_info & (MAY_BE_ARRAY|MAY_BE_OBJECT|may_be_string)))) { + + if (if_type) { + ir_IF_FALSE_cold(if_type); + if_type = IS_UNDEF; + } + + if ((opline->opcode != ZEND_FETCH_DIM_IS && (op1_info & MAY_BE_UNDEF)) || (op2_info & MAY_BE_UNDEF)) { + jit_SET_EX_OPLINE(jit, opline); + if (opline->opcode != ZEND_FETCH_DIM_IS && (op1_info & MAY_BE_UNDEF)) { + may_throw = 1; + zend_jit_type_check_undef(jit, jit_Z_TYPE(jit, op1_addr), opline->op1.var, NULL, 0, 1); + } + + if (op2_info & MAY_BE_UNDEF) { + may_throw = 1; + zend_jit_type_check_undef(jit, jit_Z_TYPE(jit, op2_addr), opline->op2.var, NULL, 0, 1); + } + } + + if (opline->opcode != ZEND_FETCH_DIM_IS && opline->opcode != ZEND_FETCH_LIST_R) { + ir_ref ref; + + may_throw = 1; + if ((op1_info & MAY_BE_UNDEF) || (op2_info & MAY_BE_UNDEF)) { + ref = jit_ZVAL_ADDR(jit, orig_op1_addr); + } else { + jit_SET_EX_OPLINE(jit, opline); + ref = jit_ZVAL_ADDR(jit, op1_addr); + } + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_jit_invalid_array_access), ref); + } + + jit_set_Z_TYPE_INFO(jit, res_addr, IS_NULL); + ir_END_list(end_inputs); + } + } + + if (end_inputs) { + ir_MERGE_list(end_inputs); + +#ifdef ZEND_JIT_USE_RC_INFERENCE + if ((opline->op2_type & (IS_TMP_VAR|IS_VAR)) && (op1_info & MAY_BE_OBJECT)) { + /* Magic offsetGet() may increase refcount of the key */ + op2_info |= MAY_BE_RCN; + } +#endif + + if (opline->op2_type & (IS_TMP_VAR|IS_VAR)) { + if ((op2_info & MAY_HAVE_DTOR) && (op2_info & MAY_BE_RC1)) { + may_throw = 1; + } + jit_FREE_OP(jit, opline->op2_type, opline->op2, op2_info, opline); + } + if (opline->opcode != ZEND_FETCH_LIST_R && !op1_avoid_refcounting) { + if (opline->op1_type & (IS_TMP_VAR|IS_VAR)) { + if ((op1_info & MAY_HAVE_DTOR) && (op1_info & MAY_BE_RC1)) { + may_throw = 1; + } + jit_FREE_OP(jit, opline->op1_type, opline->op1, op1_info, opline); + } + } + + if (may_throw) { + zend_jit_check_exception(jit); + } + } else if (op1_info & (MAY_BE_ANY|MAY_BE_UNDEF)) { + ir_BEGIN(IR_UNUSED); /* unreachable tail */ + } + + return 1; +} + +static zend_jit_addr zend_jit_prepare_array_update(zend_jit_ctx *jit, + const zend_op *opline, + uint32_t op1_info, + zend_jit_addr op1_addr, + ir_ref *if_type, + ir_ref *ht_ref, + int *may_throw) +{ + ir_ref ref = IR_UNUSED; + ir_ref array_reference_end = IR_UNUSED, array_reference_ref = IR_UNUSED; + ir_refs *array_inputs, *array_values; + + ir_refs_init(array_inputs, 4); + ir_refs_init(array_values, 4); + + ref = jit_ZVAL_ADDR(jit, op1_addr); + if (op1_info & MAY_BE_REF) { + ir_ref if_reference, if_array, end1, ref2; + + *may_throw = 1; + if_reference = jit_if_Z_TYPE(jit, op1_addr, IS_REFERENCE); + ir_IF_FALSE(if_reference); + end1 = ir_END(); + ir_IF_TRUE_cold(if_reference); + array_reference_ref = ir_ADD_OFFSET(jit_Z_PTR_ref(jit, ref), offsetof(zend_reference, val)); + if_array = jit_if_Z_TYPE_ref(jit, array_reference_ref, ir_CONST_U8(IS_ARRAY)); + ir_IF_TRUE(if_array); + array_reference_end = ir_END(); + ir_IF_FALSE_cold(if_array); + if (opline->opcode != ZEND_FETCH_DIM_RW && opline->opcode != ZEND_ASSIGN_DIM_OP) { + jit_SET_EX_OPLINE(jit, opline); + } + ref2 = ir_CALL_1(IR_ADDR, ir_CONST_FC_FUNC(zend_jit_prepare_assign_dim_ref), ref); + ir_GUARD(ref2, jit_STUB_ADDR(jit, jit_stub_exception_handler_undef)); + + ir_MERGE_WITH(end1); + ref = ir_PHI_2(IR_ADDR, ref2, ref); + op1_addr = ZEND_ADDR_REF_ZVAL(ref); + } + + if (op1_info & MAY_BE_ARRAY) { + ir_ref op1_ref = ref; + + if (op1_info & ((MAY_BE_ANY|MAY_BE_UNDEF) - MAY_BE_ARRAY)) { + *if_type = jit_if_Z_TYPE(jit, op1_addr, IS_ARRAY); + ir_IF_TRUE(*if_type); + } + if (array_reference_end) { + ir_MERGE_WITH(array_reference_end); + op1_ref = ir_PHI_2(IR_ADDR, ref, array_reference_ref); + } + // JIT: SEPARATE_ARRAY() + ref = jit_Z_PTR_ref(jit, op1_ref); + if (RC_MAY_BE_N(op1_info)) { + if (RC_MAY_BE_1(op1_info)) { + ir_ref if_refcount_1 = ir_IF(ir_EQ(jit_GC_REFCOUNT(jit, ref), ir_CONST_U32(1))); + ir_IF_TRUE(if_refcount_1); + ir_refs_add(array_inputs, ir_END()); + ir_refs_add(array_values, ref); + ir_IF_FALSE(if_refcount_1); + } + ref = ir_CALL_1(IR_ADDR, ir_CONST_FC_FUNC(zend_jit_zval_array_dup), op1_ref); + } + if (array_inputs->count || (op1_info & (MAY_BE_UNDEF|MAY_BE_NULL))) { + ir_refs_add(array_inputs, ir_END()); + ir_refs_add(array_values, ref); + } + } + + if (op1_info & (MAY_BE_UNDEF|MAY_BE_NULL)) { + if (*if_type) { + ir_IF_FALSE_cold(*if_type); + *if_type = IR_UNUSED; + } + if (op1_info & (MAY_BE_ANY-(MAY_BE_NULL|MAY_BE_ARRAY))) { + *if_type = ir_IF(ir_LE(jit_Z_TYPE(jit, op1_addr), ir_CONST_U8(IS_NULL))); + ir_IF_TRUE(*if_type); + } + if ((op1_info & MAY_BE_UNDEF) + && (opline->opcode == ZEND_FETCH_DIM_RW || opline->opcode == ZEND_ASSIGN_DIM_OP)) { + ir_ref end1 = IR_UNUSED; + + *may_throw = 1; + if (op1_info & MAY_BE_NULL) { + ir_ref if_def = ir_IF(jit_Z_TYPE(jit, op1_addr)); + ir_IF_TRUE(if_def); + end1 = ir_END(); + ir_IF_FALSE(if_def); + } + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_jit_undefined_op_helper), ir_CONST_U32(opline->op1.var)); + if (end1) { + ir_MERGE_WITH(end1); + } + } + // JIT: ZVAL_ARR(container, zend_new_array(8)); + ref = ir_CALL_1(IR_ADDR, + jit_STUB_FUNC_ADDR(jit, jit_stub_new_array, IR_CONST_FASTCALL_FUNC), + jit_ZVAL_ADDR(jit, op1_addr)); + if (array_inputs->count) { + ir_refs_add(array_inputs, ir_END()); + ir_refs_add(array_values, ref); + } + } + + if (array_inputs->count) { + ir_MERGE_N(array_inputs->count, array_inputs->refs); + ref = ir_PHI_N(IR_ADDR, array_values->count, array_values->refs); + } + + *ht_ref = ref; + return op1_addr; +} + +static int zend_jit_fetch_dim(zend_jit_ctx *jit, + const zend_op *opline, + uint32_t op1_info, + zend_jit_addr op1_addr, + uint32_t op2_info, + zend_jit_addr res_addr, + uint8_t dim_type) +{ + zend_jit_addr op2_addr; + int may_throw = 0; + ir_ref end_inputs = IR_UNUSED; + ir_ref ref, if_type = IR_UNUSED, ht_ref; + + op2_addr = (opline->op2_type != IS_UNUSED) ? OP2_ADDR() : 0; + + if (opline->opcode == ZEND_FETCH_DIM_RW) { + jit_SET_EX_OPLINE(jit, opline); + } + + op1_addr = zend_jit_prepare_array_update(jit, opline, op1_info, op1_addr, &if_type, &ht_ref, &may_throw); + + if (op1_info & (MAY_BE_UNDEF|MAY_BE_NULL|MAY_BE_ARRAY)) { + ir_refs *found_inputs, *found_vals; + + ir_refs_init(found_inputs, 8); + ir_refs_init(found_vals, 8); + + if (opline->op2_type == IS_UNUSED) { + ir_ref if_ok; + + may_throw = 1; + // JIT:var_ptr = zend_hash_next_index_insert(Z_ARRVAL_P(container), &EG(uninitialized_zval)); + ref = ir_CALL_2(IR_ADDR, ir_CONST_FC_FUNC(zend_hash_next_index_insert), + ht_ref, jit_EG(uninitialized_zval)); + + // JIT: if (UNEXPECTED(!var_ptr)) { + if_ok = ir_IF(ref); + ir_IF_FALSE_cold(if_ok); + if (opline->opcode != ZEND_FETCH_DIM_RW) { + jit_SET_EX_OPLINE(jit, opline); + } + ir_CALL(IR_VOID, jit_STUB_FUNC_ADDR(jit, jit_stub_cannot_add_element, IR_CONST_FASTCALL_FUNC)); + ir_END_list(end_inputs); + + ir_IF_TRUE(if_ok); + jit_set_Z_PTR(jit, res_addr, ref); + jit_set_Z_TYPE_INFO(jit, res_addr, IS_INDIRECT); + + ir_END_list(end_inputs); + } else { + uint32_t type; + + switch (opline->opcode) { + case ZEND_FETCH_DIM_W: + case ZEND_FETCH_LIST_W: + type = BP_VAR_W; + break; + case ZEND_FETCH_DIM_RW: + may_throw = 1; + type = BP_VAR_RW; + break; + case ZEND_FETCH_DIM_UNSET: + type = BP_VAR_UNSET; + break; + default: + ZEND_UNREACHABLE(); + } + + if (op2_info & ((MAY_BE_ANY|MAY_BE_UNDEF) - (MAY_BE_LONG|MAY_BE_STRING))) { + may_throw = 1; + } + if (!zend_jit_fetch_dimension_address_inner(jit, opline, type, op1_info, op2_info, dim_type, NULL, NULL, NULL, + 0, ht_ref, found_inputs, found_vals, &end_inputs, NULL)) { + return 0; + } + + if (type == BP_VAR_RW || (op2_info & ((MAY_BE_ANY|MAY_BE_UNDEF) - (MAY_BE_LONG|MAY_BE_STRING)))) { + if (end_inputs) { + ir_MERGE_list(end_inputs); + jit_set_Z_TYPE_INFO(jit, res_addr, IS_NULL); + end_inputs = ir_END(); + } + } else { + ZEND_ASSERT(end_inputs == IR_UNUSED); + } + + if (found_inputs->count) { + ir_MERGE_N(found_inputs->count, found_inputs->refs); + ref = ir_PHI_N(IR_ADDR, found_vals->count, found_vals->refs); + jit_set_Z_PTR(jit, res_addr, ref); + jit_set_Z_TYPE_INFO(jit, res_addr, IS_INDIRECT); + ir_END_list(end_inputs); + } + + } + } + + if (op1_info & (MAY_BE_ANY-(MAY_BE_NULL|MAY_BE_ARRAY))) { + ir_ref arg2; + + may_throw = 1; + + if (if_type) { + ir_IF_FALSE(if_type); + if_type = IR_UNUSED; + } + + if (opline->opcode != ZEND_FETCH_DIM_RW) { + jit_SET_EX_OPLINE(jit, opline); + } + + if (opline->op2_type == IS_UNUSED) { + arg2 = IR_NULL; + } else if (opline->op2_type == IS_CONST && Z_EXTRA_P(RT_CONSTANT(opline, opline->op2)) == ZEND_EXTRA_VALUE) { + ZEND_ASSERT(Z_MODE(op2_addr) == IS_CONST_ZVAL); + arg2 = ir_CONST_ADDR(Z_ZV(op2_addr) + 1); + } else { + arg2 = jit_ZVAL_ADDR(jit, op2_addr); + } + + switch (opline->opcode) { + case ZEND_FETCH_DIM_W: + case ZEND_FETCH_LIST_W: + ir_CALL_3(IR_VOID, ir_CONST_FC_FUNC(zend_jit_fetch_dim_obj_w_helper), + jit_ZVAL_ADDR(jit, op1_addr), + arg2, + jit_ZVAL_ADDR(jit, res_addr)); + break; + case ZEND_FETCH_DIM_RW: + ir_CALL_3(IR_VOID, ir_CONST_FC_FUNC(zend_jit_fetch_dim_obj_rw_helper), + jit_ZVAL_ADDR(jit, op1_addr), + arg2, + jit_ZVAL_ADDR(jit, res_addr)); + break; +// case ZEND_FETCH_DIM_UNSET: +// | EXT_CALL zend_jit_fetch_dim_obj_unset_helper, r0 +// break; + default: + ZEND_UNREACHABLE(); + } + + if (op1_info & (MAY_BE_UNDEF|MAY_BE_NULL|MAY_BE_ARRAY)) { + ir_END_list(end_inputs); + } + } + +#ifdef ZEND_JIT_USE_RC_INFERENCE + if ((opline->op2_type & (IS_TMP_VAR|IS_VAR)) && (op1_info & (MAY_BE_UNDEF|MAY_BE_NULL|MAY_BE_FALSE|MAY_BE_ARRAY|MAY_BE_OBJECT))) { + /* ASSIGN_DIM may increase refcount of the key */ + op2_info |= MAY_BE_RCN; + } +#endif + + if ((opline->op2_type & (IS_TMP_VAR|IS_VAR)) + && (op2_info & MAY_HAVE_DTOR) + && (op2_info & MAY_BE_RC1)) { + may_throw = 1; + } + + if (end_inputs) { + ir_MERGE_list(end_inputs); + } + + jit_FREE_OP(jit, opline->op2_type, opline->op2, op2_info, opline); + + if (may_throw) { + zend_jit_check_exception(jit); + } + + return 1; +} + +static int zend_jit_isset_isempty_dim(zend_jit_ctx *jit, + const zend_op *opline, + uint32_t op1_info, + zend_jit_addr op1_addr, + bool op1_avoid_refcounting, + uint32_t op2_info, + uint8_t dim_type, + int may_throw, + uint8_t smart_branch_opcode, + uint32_t target_label, + uint32_t target_label2, + const void *exit_addr) +{ + zend_jit_addr op2_addr, res_addr; + ir_ref if_type = IR_UNUSED; + ir_ref false_inputs = IR_UNUSED, end_inputs = IR_UNUSED; + ir_refs *true_inputs; + + ir_refs_init(true_inputs, 8); + + // TODO: support for empty() ??? + ZEND_ASSERT(!(opline->extended_value & ZEND_ISEMPTY)); + + op2_addr = OP2_ADDR(); + res_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, opline->result.var); + + if (op1_info & MAY_BE_REF) { + ir_ref ref = jit_ZVAL_ADDR(jit, op1_addr); + ref = jit_ZVAL_DEREF_ref(jit, ref); + op1_addr = ZEND_ADDR_REF_ZVAL(ref); + } + + if (op1_info & MAY_BE_ARRAY) { + const void *found_exit_addr = NULL; + const void *not_found_exit_addr = NULL; + ir_ref ht_ref; + + if (op1_info & ((MAY_BE_ANY|MAY_BE_UNDEF) - MAY_BE_ARRAY)) { + if_type = jit_if_Z_TYPE(jit, op1_addr, IS_ARRAY); + ir_IF_TRUE(if_type); + } + + ht_ref = jit_Z_PTR(jit, op1_addr); + + if (exit_addr + && !(op1_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-MAY_BE_ARRAY)) + && !may_throw + && (!(opline->op1_type & (IS_TMP_VAR|IS_VAR)) || op1_avoid_refcounting) + && (!(opline->op2_type & (IS_TMP_VAR|IS_VAR)) || !(op2_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-MAY_BE_LONG)))) { + if (smart_branch_opcode == ZEND_JMPNZ) { + found_exit_addr = exit_addr; + } else { + not_found_exit_addr = exit_addr; + } + } + if (!zend_jit_fetch_dimension_address_inner(jit, opline, BP_JIT_IS, op1_info, op2_info, dim_type, found_exit_addr, not_found_exit_addr, NULL, + 0, ht_ref, true_inputs, NULL, &false_inputs, NULL)) { + return 0; + } + + if (found_exit_addr) { + ir_MERGE_list(false_inputs); + return 1; + } else if (not_found_exit_addr) { + ir_MERGE_N(true_inputs->count, true_inputs->refs); + return 1; + } + } + + if (op1_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-MAY_BE_ARRAY)) { + if (if_type) { + ir_IF_FALSE(if_type); + if_type = IR_UNUSED; + } + + if (op1_info & (MAY_BE_STRING|MAY_BE_OBJECT)) { + ir_ref ref, arg1, arg2, if_true; + + jit_SET_EX_OPLINE(jit, opline); + arg1 = jit_ZVAL_ADDR(jit, op1_addr); + if (opline->op2_type == IS_CONST && Z_EXTRA_P(RT_CONSTANT(opline, opline->op2)) == ZEND_EXTRA_VALUE) { + ZEND_ASSERT(Z_MODE(op2_addr) == IS_CONST_ZVAL); + arg2 = ir_CONST_ADDR(Z_ZV(op2_addr)+1); + } else { + arg2 = jit_ZVAL_ADDR(jit, op2_addr); + } + ref = ir_CALL_2(IR_I32, ir_CONST_FC_FUNC(zend_jit_isset_dim_helper), arg1, arg2); + if_true = ir_IF(ref); + ir_IF_TRUE(if_true); + ir_refs_add(true_inputs, ir_END()); + ir_IF_FALSE(if_true); + ir_END_list(false_inputs); + } else { + if (op2_info & MAY_BE_UNDEF) { + ir_ref end1 = IR_UNUSED; + + if (op2_info & MAY_BE_ANY) { + ir_ref if_def = ir_IF(jit_Z_TYPE(jit, op2_addr)); + ir_IF_TRUE(if_def); + end1 = ir_END(); + ir_IF_FALSE(if_def); + } + jit_SET_EX_OPLINE(jit, opline); + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_jit_undefined_op_helper), ir_CONST_U32(opline->op2.var)); + if (end1) { + ir_MERGE_WITH(end1); + } + } + ir_END_list(false_inputs); + } + } + +#ifdef ZEND_JIT_USE_RC_INFERENCE + if ((opline->op2_type & (IS_TMP_VAR|IS_VAR)) && (op1_info & MAY_BE_OBJECT)) { + /* Magic offsetExists() may increase refcount of the key */ + op2_info |= MAY_BE_RCN; + } +#endif + + if (true_inputs->count) { + ir_MERGE_N(true_inputs->count, true_inputs->refs); + + jit_FREE_OP(jit, opline->op2_type, opline->op2, op2_info, opline); + if (!op1_avoid_refcounting) { + jit_FREE_OP(jit, opline->op1_type, opline->op1, op1_info, opline); + } + if (may_throw) { + zend_jit_check_exception_undef_result(jit, opline); + } + if (!(opline->extended_value & ZEND_ISEMPTY)) { + if (exit_addr) { + if (smart_branch_opcode == ZEND_JMPNZ) { + jit_SIDE_EXIT(jit, ir_CONST_ADDR(exit_addr)); + } else { + ir_END_list(end_inputs); + } + } else if (smart_branch_opcode) { + if (smart_branch_opcode == ZEND_JMPZ) { + _zend_jit_add_predecessor_ref(jit, target_label2, jit->b, ir_END()); + } else if (smart_branch_opcode == ZEND_JMPNZ) { + _zend_jit_add_predecessor_ref(jit, target_label, jit->b, ir_END()); + } else { + ZEND_UNREACHABLE(); + } + } else { + jit_set_Z_TYPE_INFO(jit, res_addr, IS_TRUE); + ir_END_list(end_inputs); + } + } else { + ZEND_UNREACHABLE(); // TODO: support for empty() + } + } + + ir_MERGE_list(false_inputs); + jit_FREE_OP(jit, opline->op2_type, opline->op2, op2_info, opline); + if (!op1_avoid_refcounting) { + jit_FREE_OP(jit, opline->op1_type, opline->op1, op1_info, opline); + } + if (may_throw) { + zend_jit_check_exception_undef_result(jit, opline); + } + if (!(opline->extended_value & ZEND_ISEMPTY)) { + if (exit_addr) { + if (smart_branch_opcode == ZEND_JMPZ) { + jit_SIDE_EXIT(jit, ir_CONST_ADDR(exit_addr)); + } else { + ir_END_list(end_inputs); + } + } else if (smart_branch_opcode) { + if (smart_branch_opcode == ZEND_JMPZ) { + _zend_jit_add_predecessor_ref(jit, target_label, jit->b, ir_END()); + } else if (smart_branch_opcode == ZEND_JMPNZ) { + _zend_jit_add_predecessor_ref(jit, target_label2, jit->b, ir_END()); + } else { + ZEND_UNREACHABLE(); + } + } else { + jit_set_Z_TYPE_INFO(jit, res_addr, IS_FALSE); + ir_END_list(end_inputs); + } + } else { + ZEND_UNREACHABLE(); // TODO: support for empty() + } + + if (!exit_addr && smart_branch_opcode) { + jit->b = -1; + } else { + ir_MERGE_list(end_inputs); + } + + return 1; +} + +static int zend_jit_assign_dim(zend_jit_ctx *jit, const zend_op *opline, uint32_t op1_info, zend_jit_addr op1_addr, uint32_t op2_info, uint32_t val_info, uint8_t dim_type, int may_throw) +{ + zend_jit_addr op2_addr, op3_addr, res_addr; + ir_ref if_type = IR_UNUSED; + ir_ref end_inputs = IR_UNUSED, ht_ref; + + op2_addr = (opline->op2_type != IS_UNUSED) ? OP2_ADDR() : 0; + op3_addr = OP1_DATA_ADDR(); + if (opline->result_type == IS_UNUSED) { + res_addr = 0; + } else { + res_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, opline->result.var); + } + + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE && (val_info & MAY_BE_UNDEF)) { + int32_t exit_point = zend_jit_trace_get_exit_point(opline, ZEND_JIT_EXIT_TO_VM); + const void *exit_addr = zend_jit_trace_get_exit_addr(exit_point); + + if (!exit_addr) { + return 0; + } + + jit_guard_not_Z_TYPE(jit, op3_addr, IS_UNDEF, exit_addr); + + val_info &= ~MAY_BE_UNDEF; + } + + op1_addr = zend_jit_prepare_array_update(jit, opline, op1_info, op1_addr, &if_type, &ht_ref, &may_throw); + + if (op1_info & (MAY_BE_UNDEF|MAY_BE_NULL|MAY_BE_ARRAY)) { + if (opline->op2_type == IS_UNUSED) { + uint32_t var_info = MAY_BE_NULL; + ir_ref if_ok, ref; + zend_jit_addr var_addr; + + // JIT: var_ptr = zend_hash_next_index_insert(Z_ARRVAL_P(container), &EG(uninitialized_zval)); + ref = ir_CALL_2(IR_ADDR, ir_CONST_FC_FUNC(zend_hash_next_index_insert), + ht_ref, jit_EG(uninitialized_zval)); + + // JIT: if (UNEXPECTED(!var_ptr)) { + if_ok = ir_IF(ref); + ir_IF_FALSE_cold(if_ok); + + // JIT: zend_throw_error(NULL, "Cannot add element to the array as the next element is already occupied"); + jit_SET_EX_OPLINE(jit, opline); + ir_CALL(IR_VOID, jit_STUB_FUNC_ADDR(jit, jit_stub_cannot_add_element, IR_CONST_FASTCALL_FUNC)); + + ir_END_list(end_inputs); + + ir_IF_TRUE(if_ok); + var_addr = ZEND_ADDR_REF_ZVAL(ref); + if (!zend_jit_simple_assign(jit, opline, var_addr, var_info, -1, (opline+1)->op1_type, op3_addr, val_info, res_addr, 0)) { + return 0; + } + } else { + uint32_t var_info = zend_array_element_type(op1_info, opline->op1_type, 0, 0); + zend_jit_addr var_addr; + ir_ref ref; + ir_refs *found_inputs, *found_values; + + ir_refs_init(found_inputs, 8); + ir_refs_init(found_values, 8); + + if (!zend_jit_fetch_dimension_address_inner(jit, opline, BP_VAR_W, op1_info, op2_info, dim_type, NULL, NULL, NULL, + 0, ht_ref, found_inputs, found_values, &end_inputs, NULL)) { + return 0; + } + + if (op1_info & (MAY_BE_ARRAY_OF_REF|MAY_BE_OBJECT)) { + var_info |= MAY_BE_REF; + } + if (var_info & (MAY_BE_STRING|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE)) { + var_info |= MAY_BE_RC1; + } + + ir_MERGE_N(found_inputs->count, found_inputs->refs); + ref = ir_PHI_N(IR_ADDR, found_values->count, found_values->refs); + var_addr = ZEND_ADDR_REF_ZVAL(ref); + + // JIT: value = zend_assign_to_variable(variable_ptr, value, OP_DATA_TYPE); + if (opline->op1_type == IS_VAR) { + ZEND_ASSERT(opline->result_type == IS_UNUSED); + if (!zend_jit_assign_to_variable_call(jit, opline, var_addr, var_addr, var_info, -1, (opline+1)->op1_type, op3_addr, val_info, res_addr, 0)) { + return 0; + } + } else { + if (!zend_jit_assign_to_variable(jit, opline, var_addr, var_addr, var_info, -1, (opline+1)->op1_type, op3_addr, val_info, res_addr, 0, 0)) { + return 0; + } + } + } + + ir_END_list(end_inputs); + } + + if (op1_info & (MAY_BE_ANY-(MAY_BE_NULL|MAY_BE_ARRAY))) { + ir_ref arg2, arg4; + + if (if_type) { + ir_IF_FALSE_cold(if_type); + if_type = IR_UNUSED; + } + + jit_SET_EX_OPLINE(jit, opline); + + if (opline->op2_type == IS_UNUSED) { + arg2 = IR_NULL; + } else if (opline->op2_type == IS_CONST && Z_EXTRA_P(RT_CONSTANT(opline, opline->op2)) == ZEND_EXTRA_VALUE) { + ZEND_ASSERT(Z_MODE(op2_addr) == IS_CONST_ZVAL); + arg2 = ir_CONST_ADDR(Z_ZV(op2_addr) + 1); + } else { + arg2 = jit_ZVAL_ADDR(jit, op2_addr); + } + + if (opline->result_type == IS_UNUSED) { + arg4 = IR_NULL; + } else { + arg4 = jit_ZVAL_ADDR(jit, res_addr); + } + ir_CALL_4(IR_VOID, ir_CONST_FC_FUNC(zend_jit_assign_dim_helper), + jit_ZVAL_ADDR(jit, op1_addr), + arg2, + jit_ZVAL_ADDR(jit, op3_addr), + arg4); + +#ifdef ZEND_JIT_USE_RC_INFERENCE + if (((opline+1)->op1_type & (IS_TMP_VAR|IS_VAR)) && (val_info & MAY_BE_RC1)) { + /* ASSIGN_DIM may increase refcount of the value */ + val_info |= MAY_BE_RCN; + } +#endif + + jit_FREE_OP(jit, (opline+1)->op1_type, (opline+1)->op1, val_info, NULL); + + ir_END_list(end_inputs); + } + +#ifdef ZEND_JIT_USE_RC_INFERENCE + if ((opline->op2_type & (IS_TMP_VAR|IS_VAR)) && (op1_info & (MAY_BE_UNDEF|MAY_BE_NULL|MAY_BE_FALSE|MAY_BE_ARRAY|MAY_BE_OBJECT))) { + /* ASSIGN_DIM may increase refcount of the key */ + op2_info |= MAY_BE_RCN; + } +#endif + + ir_MERGE_list(end_inputs); + jit_FREE_OP(jit, opline->op2_type, opline->op2, op2_info, opline); + + if (may_throw) { + zend_jit_check_exception(jit); + } + + return 1; +} + +static int zend_jit_assign_dim_op(zend_jit_ctx *jit, const zend_op *opline, uint32_t op1_info, uint32_t op1_def_info, zend_jit_addr op1_addr, uint32_t op2_info, uint32_t op1_data_info, zend_ssa_range *op1_data_range, uint8_t dim_type, int may_throw) +{ + zend_jit_addr op2_addr, op3_addr, var_addr = IS_UNUSED; + const void *not_found_exit_addr = NULL; + uint32_t var_info = MAY_BE_NULL; + ir_ref if_type = IS_UNUSED; + ir_ref end_inputs = IR_UNUSED, ht_ref; + bool emit_fast_path = 1; + + ZEND_ASSERT(opline->result_type == IS_UNUSED); + + op2_addr = (opline->op2_type != IS_UNUSED) ? OP2_ADDR() : 0; + op3_addr = OP1_DATA_ADDR(); + + jit_SET_EX_OPLINE(jit, opline); + + op1_addr = zend_jit_prepare_array_update(jit, opline, op1_info, op1_addr, &if_type, &ht_ref, &may_throw); + + if (op1_info & (MAY_BE_UNDEF|MAY_BE_NULL|MAY_BE_ARRAY)) { + uint32_t var_def_info = zend_array_element_type(op1_def_info, opline->op1_type, 1, 0); + + if (opline->op2_type == IS_UNUSED) { + var_info = MAY_BE_NULL; + ir_ref if_ok, ref; + + // JIT: var_ptr = zend_hash_next_index_insert(Z_ARRVAL_P(container), &EG(uninitialized_zval)); + ref = ir_CALL_2(IR_ADDR, ir_CONST_FC_FUNC(zend_hash_next_index_insert), + ht_ref, jit_EG(uninitialized_zval)); + + // JIT: if (UNEXPECTED(!var_ptr)) { + if_ok = ir_IF(ref); + ir_IF_FALSE_cold(if_ok); + + // JIT: zend_throw_error(NULL, "Cannot add element to the array as the next element is already occupied"); + ir_CALL(IR_VOID, jit_STUB_FUNC_ADDR(jit, jit_stub_cannot_add_element, IR_CONST_FASTCALL_FUNC)); + + ir_END_list(end_inputs); + + ir_IF_TRUE(if_ok); + var_addr = ZEND_ADDR_REF_ZVAL(ref); + } else { + ir_ref ref; + ir_refs *found_inputs, *found_values; + + ir_refs_init(found_inputs, 8); + ir_refs_init(found_values, 8); + + var_info = zend_array_element_type(op1_info, opline->op1_type, 0, 0); + if (op1_info & (MAY_BE_ARRAY_OF_REF|MAY_BE_OBJECT)) { + var_info |= MAY_BE_REF; + } + if (var_info & (MAY_BE_STRING|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE)) { + var_info |= MAY_BE_RC1; + } + + if (dim_type != IS_UNKNOWN + && dim_type != IS_UNDEF + && (op1_info & (MAY_BE_ANY|MAY_BE_UNDEF)) == MAY_BE_ARRAY + && (op2_info & (MAY_BE_LONG|MAY_BE_STRING)) + && !(op2_info & ((MAY_BE_ANY|MAY_BE_UNDEF) - (MAY_BE_LONG|MAY_BE_STRING)))) { + int32_t exit_point = zend_jit_trace_get_exit_point(opline, 0); + not_found_exit_addr = zend_jit_trace_get_exit_addr(exit_point); + if (!not_found_exit_addr) { + return 0; + } + } + + if (!zend_jit_fetch_dimension_address_inner(jit, opline, BP_VAR_RW, op1_info, op2_info, dim_type, NULL, not_found_exit_addr, NULL, + 0, ht_ref, found_inputs, found_values, &end_inputs, NULL)) { + return 0; + } + + if (found_inputs->count) { + ir_MERGE_N(found_inputs->count, found_inputs->refs); + ref = ir_PHI_N(IR_ADDR, found_values->count, found_values->refs); + var_addr = ZEND_ADDR_REF_ZVAL(ref); + + if (not_found_exit_addr && dim_type != IS_REFERENCE) { + jit_guard_Z_TYPE(jit, var_addr, dim_type, not_found_exit_addr); + var_info = (1 << dim_type) | (var_info & ~(MAY_BE_ANY|MAY_BE_UNDEF|MAY_BE_REF)); + } + if (var_info & MAY_BE_REF) { + binary_op_type binary_op = get_binary_op(opline->extended_value); + ir_ref if_ref, if_typed, noref_path, ref_path, ref, reference, ref2, arg2; + + ref = jit_ZVAL_ADDR(jit, var_addr); + if_ref = jit_if_Z_TYPE(jit, var_addr, IS_REFERENCE); + ir_IF_FALSE(if_ref); + noref_path = ir_END(); + ir_IF_TRUE(if_ref); + + reference = jit_Z_PTR_ref(jit, ref); + ref2 = ir_ADD_OFFSET(reference, offsetof(zend_reference, val)); + if_typed = jit_if_TYPED_REF(jit, reference); + ir_IF_FALSE(if_typed); + ref_path = ir_END(); + ir_IF_TRUE_cold(if_typed); + + arg2 = jit_ZVAL_ADDR(jit, op3_addr); + ir_CALL_3(IR_VOID, ir_CONST_FC_FUNC(zend_jit_assign_op_to_typed_ref), + reference, arg2, ir_CONST_FC_FUNC(binary_op)); + + ir_END_list(end_inputs); + + ir_MERGE_2(noref_path, ref_path); + ref = ir_PHI_2(IR_ADDR, ref, ref2); + var_addr = ZEND_ADDR_REF_ZVAL(ref); + } + } else { + emit_fast_path = 0; + } + } + + if (emit_fast_path) { + uint8_t val_op_type = (opline+1)->op1_type; + + if (val_op_type & (IS_TMP_VAR|IS_VAR)) { + /* prevent FREE_OP in the helpers */ + val_op_type = IS_CV; + } + + switch (opline->extended_value) { + case ZEND_ADD: + case ZEND_SUB: + case ZEND_MUL: + case ZEND_DIV: + if (!zend_jit_math_helper(jit, opline, opline->extended_value, IS_CV, opline->op1, var_addr, var_info, val_op_type, (opline+1)->op1, op3_addr, op1_data_info, 0, var_addr, var_def_info, var_info, + 1 /* may overflow */, may_throw)) { + return 0; + } + break; + case ZEND_BW_OR: + case ZEND_BW_AND: + case ZEND_BW_XOR: + case ZEND_SL: + case ZEND_SR: + case ZEND_MOD: + if (!zend_jit_long_math_helper(jit, opline, opline->extended_value, + IS_CV, opline->op1, var_addr, var_info, NULL, + val_op_type, (opline+1)->op1, op3_addr, op1_data_info, + op1_data_range, + 0, var_addr, var_def_info, var_info, may_throw)) { + return 0; + } + break; + case ZEND_CONCAT: + if (!zend_jit_concat_helper(jit, opline, IS_CV, opline->op1, var_addr, var_info, val_op_type, (opline+1)->op1, op3_addr, op1_data_info, var_addr, + may_throw)) { + return 0; + } + break; + default: + ZEND_UNREACHABLE(); + } + + ir_END_list(end_inputs); + } + } + + if (op1_info & (MAY_BE_ANY-(MAY_BE_NULL|MAY_BE_ARRAY))) { + binary_op_type binary_op; + ir_ref arg2; + + if (if_type) { + ir_IF_FALSE_cold(if_type); + if_type = IS_UNUSED; + } + + if (opline->op2_type == IS_UNUSED) { + arg2 = IR_NULL; + } else if (opline->op2_type == IS_CONST && Z_EXTRA_P(RT_CONSTANT(opline, opline->op2)) == ZEND_EXTRA_VALUE) { + ZEND_ASSERT(Z_MODE(op2_addr) == IS_CONST_ZVAL); + arg2 = ir_CONST_ADDR(Z_ZV(op2_addr) + 1); + } else { + arg2 = jit_ZVAL_ADDR(jit, op2_addr); + } + binary_op = get_binary_op(opline->extended_value); + ir_CALL_4(IR_VOID, ir_CONST_FC_FUNC(zend_jit_assign_dim_op_helper), + jit_ZVAL_ADDR(jit, op1_addr), + arg2, + jit_ZVAL_ADDR(jit, op3_addr), + ir_CONST_FC_FUNC(binary_op)); + ir_END_list(end_inputs); + } + + if (end_inputs) { + ir_MERGE_list(end_inputs); + } + + jit_FREE_OP(jit, (opline+1)->op1_type, (opline+1)->op1, op1_data_info, NULL); + jit_FREE_OP(jit, opline->op2_type, opline->op2, op2_info, NULL); + if (may_throw) { + zend_jit_check_exception(jit); + } + + return 1; +} + +static int zend_jit_fe_reset(zend_jit_ctx *jit, const zend_op *opline, uint32_t op1_info) +{ + zend_jit_addr res_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, opline->result.var); + + // JIT: ZVAL_COPY(res, value); + if (opline->op1_type == IS_CONST) { + zval *zv = RT_CONSTANT(opline, opline->op1); + + jit_ZVAL_COPY_CONST(jit, res_addr, MAY_BE_ANY, MAY_BE_ANY, zv, 1); + } else { + zend_jit_addr op1_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, opline->op1.var); + + jit_ZVAL_COPY(jit, res_addr, -1, op1_addr, op1_info, opline->op1_type == IS_CV); + } + + // JIT: Z_FE_POS_P(res) = 0; + ir_STORE(ir_ADD_OFFSET(jit_FP(jit), opline->result.var + offsetof(zval, u2.fe_pos)), ir_CONST_U32(0)); + + return 1; +} + +static int zend_jit_packed_guard(zend_jit_ctx *jit, const zend_op *opline, uint32_t var, uint32_t op_info) +{ + int32_t exit_point = zend_jit_trace_get_exit_point(opline, ZEND_JIT_EXIT_PACKED_GUARD); + const void *exit_addr = zend_jit_trace_get_exit_addr(exit_point); + zend_jit_addr addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, var); + ir_ref ref; + + if (!exit_addr) { + return 0; + } + + ref = ir_AND_U32( + ir_LOAD_U32(ir_ADD_OFFSET(jit_Z_PTR(jit, addr), offsetof(zend_array, u.flags))), + ir_CONST_U32(HASH_FLAG_PACKED)); + if (op_info & MAY_BE_ARRAY_PACKED) { + ir_GUARD(ref, ir_CONST_ADDR(exit_addr)); + } else { + ir_GUARD_NOT(ref, ir_CONST_ADDR(exit_addr)); + } + + return 1; +} + +static int zend_jit_fe_fetch(zend_jit_ctx *jit, const zend_op *opline, uint32_t op1_info, uint32_t op2_info, unsigned int target_label, uint8_t exit_opcode, const void *exit_addr) +{ + zend_jit_addr op1_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, opline->op1.var); + ir_ref ref, ht_ref, hash_pos_ref, packed_pos_ref, hash_p_ref = IR_UNUSED, packed_p_ref = IR_UNUSED, if_packed = IR_UNUSED; + ir_ref if_def_hash = IR_UNUSED, if_def_packed = IR_UNUSED; + ir_ref exit_inputs = IR_UNUSED; + + if (!MAY_BE_HASH(op1_info) && !MAY_BE_PACKED(op1_info)) { + /* empty array */ + if (exit_addr) { + if (exit_opcode == ZEND_JMP) { + jit_SIDE_EXIT(jit, ir_CONST_ADDR(exit_addr)); + } + } else { + zend_basic_block *bb; + + ZEND_ASSERT(jit->b >= 0); + bb = &jit->ssa->cfg.blocks[jit->b]; + _zend_jit_add_predecessor_ref(jit, bb->successors[0], jit->b, ir_END()); + jit->b = -1; + } + return 1; + } + + // JIT: array = EX_VAR(opline->op1.var); + // JIT: fe_ht = Z_ARRVAL_P(array); + ht_ref = jit_Z_PTR(jit, op1_addr); + + if (op1_info & MAY_BE_PACKED_GUARD) { + if (!zend_jit_packed_guard(jit, opline, opline->op1.var, op1_info)) { + return 0; + } + } + + // JIT: pos = Z_FE_POS_P(array); + hash_pos_ref = packed_pos_ref = ir_LOAD_U32(ir_ADD_OFFSET(jit_FP(jit), opline->op1.var + offsetof(zval, u2.fe_pos))); + + if (MAY_BE_HASH(op1_info)) { + ir_ref loop_ref, pos2_ref, p2_ref; + + if (MAY_BE_PACKED(op1_info)) { + ref = ir_AND_U32( + ir_LOAD_U32(ir_ADD_OFFSET(ht_ref, offsetof(zend_array, u.flags))), + ir_CONST_U32(HASH_FLAG_PACKED)); + if_packed = ir_IF(ref); + ir_IF_FALSE(if_packed); + } + + // JIT: p = fe_ht->arData + pos; + if (sizeof(void*) == 8) { + ref = ir_ZEXT_A(hash_pos_ref); + } else { + ref = ir_BITCAST_A(hash_pos_ref); + } + hash_p_ref = ir_ADD_A( + ir_MUL_A(ref, ir_CONST_ADDR(sizeof(Bucket))), + ir_LOAD_A(ir_ADD_OFFSET(ht_ref, offsetof(zend_array, arData)))); + + loop_ref = ir_LOOP_BEGIN(ir_END()); + hash_pos_ref = ir_PHI_2(IR_U32, hash_pos_ref, IR_UNUSED); + hash_p_ref = ir_PHI_2(IR_ADDR, hash_p_ref, IR_UNUSED); + + // JIT: if (UNEXPECTED(pos >= fe_ht->nNumUsed)) { + ref = ir_ULT(hash_pos_ref, + ir_LOAD_U32(ir_ADD_OFFSET(ht_ref, offsetof(zend_array, nNumUsed)))); + + // JIT: ZEND_VM_SET_RELATIVE_OPCODE(opline, opline->extended_value); + // JIT: ZEND_VM_CONTINUE(); + + if (exit_addr) { + if (exit_opcode == ZEND_JMP) { + ir_GUARD(ref, ir_CONST_ADDR(exit_addr)); + } else { + ir_ref if_fit = ir_IF(ref); + ir_IF_FALSE(if_fit); + ir_END_list(exit_inputs); + ir_IF_TRUE(if_fit); + } + } else { + ir_ref if_fit = ir_IF(ref); + ir_IF_FALSE(if_fit); + ir_END_list(exit_inputs); + ir_IF_TRUE(if_fit); + } + + // JIT: pos++; + pos2_ref = ir_ADD_U32(hash_pos_ref, ir_CONST_U32(1)); + + // JIT: value_type = Z_TYPE_INFO_P(value); + // JIT: if (EXPECTED(value_type != IS_UNDEF)) { + if (!exit_addr || exit_opcode == ZEND_JMP) { + if_def_hash = ir_IF(jit_Z_TYPE_ref(jit, hash_p_ref)); + ir_IF_FALSE(if_def_hash); + } else { + ir_GUARD_NOT(jit_Z_TYPE_ref(jit, hash_p_ref), ir_CONST_ADDR(exit_addr)); + } + + // JIT: p++; + p2_ref = ir_ADD_OFFSET(hash_p_ref, sizeof(Bucket)); + + ir_MERGE_SET_OP(loop_ref, 2, ir_LOOP_END()); + ir_PHI_SET_OP(hash_pos_ref, 2, pos2_ref); + ir_PHI_SET_OP(hash_p_ref, 2, p2_ref); + + if (MAY_BE_PACKED(op1_info)) { + ir_IF_TRUE(if_packed); + } + } + if (MAY_BE_PACKED(op1_info)) { + ir_ref loop_ref, pos2_ref, p2_ref; + + // JIT: p = fe_ht->arPacked + pos; + if (sizeof(void*) == 8) { + ref = ir_ZEXT_A(packed_pos_ref); + } else { + ref = ir_BITCAST_A(packed_pos_ref); + } + packed_p_ref = ir_ADD_A( + ir_MUL_A(ref, ir_CONST_ADDR(sizeof(zval))), + ir_LOAD_A(ir_ADD_OFFSET(ht_ref, offsetof(zend_array, arPacked)))); + + loop_ref = ir_LOOP_BEGIN(ir_END()); + packed_pos_ref = ir_PHI_2(IR_U32, packed_pos_ref, IR_UNUSED); + packed_p_ref = ir_PHI_2(IR_ADDR, packed_p_ref, IR_UNUSED); + + // JIT: if (UNEXPECTED(pos >= fe_ht->nNumUsed)) { + ref = ir_ULT(packed_pos_ref, + ir_LOAD_U32(ir_ADD_OFFSET(ht_ref, offsetof(zend_array, nNumUsed)))); + + // JIT: ZEND_VM_SET_RELATIVE_OPCODE(opline, opline->extended_value); + // JIT: ZEND_VM_CONTINUE(); + if (exit_addr) { + if (exit_opcode == ZEND_JMP) { + ir_GUARD(ref, ir_CONST_ADDR(exit_addr)); + } else { + ir_ref if_fit = ir_IF(ref); + ir_IF_FALSE(if_fit); + ir_END_list(exit_inputs); + ir_IF_TRUE(if_fit); + } + } else { + ir_ref if_fit = ir_IF(ref); + ir_IF_FALSE(if_fit); + ir_END_list(exit_inputs); + ir_IF_TRUE(if_fit); + } + + // JIT: pos++; + pos2_ref = ir_ADD_U32(packed_pos_ref, ir_CONST_U32(1)); + + // JIT: value_type = Z_TYPE_INFO_P(value); + // JIT: if (EXPECTED(value_type != IS_UNDEF)) { + if (!exit_addr || exit_opcode == ZEND_JMP) { + if_def_packed = ir_IF(jit_Z_TYPE_ref(jit, packed_p_ref)); + ir_IF_FALSE(if_def_packed); + } else { + ir_GUARD_NOT(jit_Z_TYPE_ref(jit, packed_p_ref), ir_CONST_ADDR(exit_addr)); + } + + // JIT: p++; + p2_ref = ir_ADD_OFFSET(packed_p_ref, sizeof(zval)); + + ir_MERGE_SET_OP(loop_ref, 2, ir_LOOP_END()); + ir_PHI_SET_OP(packed_pos_ref, 2, pos2_ref); + ir_PHI_SET_OP(packed_p_ref, 2, p2_ref); + } + + if (!exit_addr || exit_opcode == ZEND_JMP) { + zend_jit_addr val_addr; + zend_jit_addr var_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, opline->op2.var); + uint32_t val_info; + ir_ref p_ref = IR_UNUSED, hash_path = IR_UNUSED; + + if (RETURN_VALUE_USED(opline)) { + zend_jit_addr res_addr = RES_ADDR(); + + if (MAY_BE_HASH(op1_info)) { + ir_ref key_ref = IR_UNUSED, if_key = IR_UNUSED, key_path = IR_UNUSED; + + ZEND_ASSERT(if_def_hash); + ir_IF_TRUE(if_def_hash); + + // JIT: Z_FE_POS_P(array) = pos + 1; + ir_STORE(ir_ADD_OFFSET(jit_FP(jit), opline->op1.var + offsetof(zval, u2.fe_pos)), + ir_ADD_U32(hash_pos_ref, ir_CONST_U32(1))); + + if (op1_info & MAY_BE_ARRAY_KEY_STRING) { + key_ref = ir_LOAD_A(ir_ADD_OFFSET(hash_p_ref, offsetof(Bucket, key))); + } + if ((op1_info & MAY_BE_ARRAY_KEY_LONG) + && (op1_info & MAY_BE_ARRAY_KEY_STRING)) { + // JIT: if (!p->key) { + if_key = ir_IF(key_ref); + ir_IF_TRUE(if_key); + } + if (op1_info & MAY_BE_ARRAY_KEY_STRING) { + ir_ref if_interned, interned_path; + + // JIT: ZVAL_STR_COPY(EX_VAR(opline->result.var), p->key); + jit_set_Z_PTR(jit, res_addr, key_ref); + ref = ir_AND_U32( + ir_LOAD_U32(ir_ADD_OFFSET(key_ref, offsetof(zend_refcounted, gc.u.type_info))), + ir_CONST_U32(IS_STR_INTERNED)); + if_interned = ir_IF(ref); + ir_IF_TRUE(if_interned); + + jit_set_Z_TYPE_INFO(jit, res_addr, IS_STRING); + + interned_path = ir_END(); + ir_IF_FALSE(if_interned); + + jit_GC_ADDREF(jit, key_ref); + jit_set_Z_TYPE_INFO(jit, res_addr, IS_STRING_EX); + + ir_MERGE_WITH(interned_path); + + if (op1_info & MAY_BE_ARRAY_KEY_LONG) { + key_path = ir_END(); + } + } + if (op1_info & MAY_BE_ARRAY_KEY_LONG) { + if (op1_info & MAY_BE_ARRAY_KEY_STRING) { + ir_IF_FALSE(if_key); + } + // JIT: ZVAL_LONG(EX_VAR(opline->result.var), p->h); + ref = ir_LOAD_L(ir_ADD_OFFSET(hash_p_ref, offsetof(Bucket, h))); + jit_set_Z_LVAL(jit, res_addr, ref); + jit_set_Z_TYPE_INFO(jit, res_addr, IS_LONG); + + if (op1_info & MAY_BE_ARRAY_KEY_STRING) { + ir_MERGE_WITH(key_path); + } + } + if (MAY_BE_PACKED(op1_info)) { + hash_path = ir_END(); + } else { + p_ref = hash_p_ref; + } + } + if (MAY_BE_PACKED(op1_info)) { + ZEND_ASSERT(if_def_packed); + ir_IF_TRUE(if_def_packed); + + // JIT: Z_FE_POS_P(array) = pos + 1; + ir_STORE(ir_ADD_OFFSET(jit_FP(jit), opline->op1.var + offsetof(zval, u2.fe_pos)), + ir_ADD_U32(packed_pos_ref, ir_CONST_U32(1))); + + // JIT: ZVAL_LONG(EX_VAR(opline->result.var), pos); + if (sizeof(zend_long) == 8) { + packed_pos_ref = ir_ZEXT_L(packed_pos_ref); + } else { + packed_pos_ref = ir_BITCAST_L(packed_pos_ref); + } + jit_set_Z_LVAL(jit, res_addr, packed_pos_ref); + jit_set_Z_TYPE_INFO(jit, res_addr, IS_LONG); + + if (MAY_BE_HASH(op1_info)) { + ir_MERGE_WITH(hash_path); + p_ref = ir_PHI_2(IR_ADDR, packed_p_ref, hash_p_ref); + } else { + p_ref = packed_p_ref; + } + } + } else { + ir_ref pos_ref = IR_UNUSED; + + if (if_def_hash && if_def_packed) { + ir_IF_TRUE(if_def_hash); + ir_MERGE_WITH_EMPTY_TRUE(if_def_packed); + pos_ref = ir_PHI_2(IR_U32, hash_pos_ref, packed_pos_ref); + p_ref = ir_PHI_2(IR_ADDR, hash_p_ref, packed_p_ref); + } else if (if_def_hash) { + ir_IF_TRUE(if_def_hash); + pos_ref = hash_pos_ref; + p_ref = hash_p_ref; + } else if (if_def_packed) { + ir_IF_TRUE(if_def_packed); + pos_ref = packed_pos_ref; + p_ref = packed_p_ref; + } else { + ZEND_UNREACHABLE(); + } + + // JIT: Z_FE_POS_P(array) = pos + 1; + ir_STORE(ir_ADD_OFFSET(jit_FP(jit), opline->op1.var + offsetof(zval, u2.fe_pos)), + ir_ADD_U32(pos_ref, ir_CONST_U32(1))); + } + + val_info = ((op1_info & MAY_BE_ARRAY_OF_ANY) >> MAY_BE_ARRAY_SHIFT); + if (val_info & MAY_BE_ARRAY) { + val_info |= MAY_BE_ARRAY_KEY_ANY | MAY_BE_ARRAY_OF_ANY | MAY_BE_ARRAY_OF_REF; + } + if (op1_info & MAY_BE_ARRAY_OF_REF) { + val_info |= MAY_BE_REF | MAY_BE_RC1 | MAY_BE_RCN | MAY_BE_ANY | + MAY_BE_ARRAY_KEY_ANY | MAY_BE_ARRAY_OF_ANY | MAY_BE_ARRAY_OF_REF; + } else if (val_info & (MAY_BE_STRING|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE)) { + val_info |= MAY_BE_RC1 | MAY_BE_RCN; + } + + val_addr = ZEND_ADDR_REF_ZVAL(p_ref); + if (opline->op2_type == IS_CV) { + // JIT: zend_assign_to_variable(variable_ptr, value, IS_CV, EX_USES_STRICT_TYPES()); + if (!zend_jit_assign_to_variable(jit, opline, var_addr, var_addr, op2_info, -1, IS_CV, val_addr, val_info, 0, 0, 1)) { + return 0; + } + } else { + // JIT: ZVAL_COPY(res, value); + jit_ZVAL_COPY(jit, var_addr, -1, val_addr, val_info, 1); + } + + if (!exit_addr) { + zend_basic_block *bb; + + ZEND_ASSERT(jit->b >= 0); + bb = &jit->ssa->cfg.blocks[jit->b]; + _zend_jit_add_predecessor_ref(jit, bb->successors[1], jit->b, ir_END()); + ZEND_ASSERT(exit_inputs); + if (!jit->ctx.ir_base[exit_inputs].op2) { + ref = exit_inputs; + } else { + ir_MERGE_list(exit_inputs); + ref = ir_END(); + } + _zend_jit_add_predecessor_ref(jit, bb->successors[0], jit->b, ref); + jit->b = -1; + } + } else { + ZEND_ASSERT(exit_inputs); + ir_MERGE_list(exit_inputs); + } + + return 1; +} + +static int zend_jit_load_this(zend_jit_ctx *jit, uint32_t var) +{ + zend_jit_addr this_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, offsetof(zend_execute_data, This)); + zend_jit_addr var_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, var); + ir_ref ref = jit_Z_PTR(jit, this_addr); + + jit_set_Z_PTR(jit, var_addr, ref); + jit_set_Z_TYPE_INFO(jit, var_addr, IS_OBJECT_EX); + jit_GC_ADDREF(jit, ref); + + return 1; +} + +static int zend_jit_fetch_this(zend_jit_ctx *jit, const zend_op *opline, const zend_op_array *op_array, bool check_only) +{ + if (!op_array->scope || (op_array->fn_flags & ZEND_ACC_STATIC)) { + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE) { + if (!JIT_G(current_frame) || + !TRACE_FRAME_IS_THIS_CHECKED(JIT_G(current_frame))) { + + zend_jit_addr this_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, offsetof(zend_execute_data, This)); + int32_t exit_point = zend_jit_trace_get_exit_point(opline, ZEND_JIT_EXIT_TO_VM); + const void *exit_addr = zend_jit_trace_get_exit_addr(exit_point); + + if (!exit_addr) { + return 0; + } + + jit_guard_Z_TYPE(jit, this_addr, IS_OBJECT, exit_addr); + + if (JIT_G(current_frame)) { + TRACE_FRAME_SET_THIS_CHECKED(JIT_G(current_frame)); + } + } + } else { + zend_jit_addr this_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, offsetof(zend_execute_data, This)); + ir_ref if_object = jit_if_Z_TYPE(jit, this_addr, IS_OBJECT); + + ir_IF_FALSE_cold(if_object); + jit_SET_EX_OPLINE(jit, opline); + ir_IJMP(jit_STUB_ADDR(jit, jit_stub_invalid_this)); + + ir_IF_TRUE(if_object); + } + } + + if (!check_only) { + if (!zend_jit_load_this(jit, opline->result.var)) { + return 0; + } + } + + return 1; +} + +static int zend_jit_class_guard(zend_jit_ctx *jit, const zend_op *opline, ir_ref obj_ref, zend_class_entry *ce) +{ + int32_t exit_point = zend_jit_trace_get_exit_point(opline, 0); + const void *exit_addr = zend_jit_trace_get_exit_addr(exit_point); + + if (!exit_addr) { + return 0; + } + + ir_GUARD(ir_EQ(ir_LOAD_A(ir_ADD_OFFSET(obj_ref, offsetof(zend_object, ce))), ir_CONST_ADDR(ce)), + ir_CONST_ADDR(exit_addr)); + + return 1; +} + +static int zend_jit_fetch_obj(zend_jit_ctx *jit, + const zend_op *opline, + const zend_op_array *op_array, + zend_ssa *ssa, + const zend_ssa_op *ssa_op, + uint32_t op1_info, + zend_jit_addr op1_addr, + bool op1_indirect, + zend_class_entry *ce, + bool ce_is_instanceof, + bool on_this, + bool delayed_fetch_this, + bool op1_avoid_refcounting, + zend_class_entry *trace_ce, + uint8_t prop_type, + int may_throw) +{ + zval *member; + zend_property_info *prop_info; + bool may_be_dynamic = 1; + zend_jit_addr res_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, opline->result.var); + zend_jit_addr prop_addr; + uint32_t res_info = RES_INFO(); + ir_ref prop_type_ref = IR_UNUSED; + ir_ref obj_ref = IR_UNUSED; + ir_ref prop_ref = IR_UNUSED; + ir_ref end_inputs = IR_UNUSED; + ir_ref slow_inputs = IR_UNUSED; + + ZEND_ASSERT(opline->op2_type == IS_CONST); + ZEND_ASSERT(op1_info & MAY_BE_OBJECT); + + member = RT_CONSTANT(opline, opline->op2); + ZEND_ASSERT(Z_TYPE_P(member) == IS_STRING && Z_STRVAL_P(member)[0] != '\0'); + prop_info = zend_get_known_property_info(op_array, ce, Z_STR_P(member), on_this, op_array->filename); + + if (on_this) { + zend_jit_addr this_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, offsetof(zend_execute_data, This)); + obj_ref = jit_Z_PTR(jit, this_addr); + } else { + if (opline->op1_type == IS_VAR + && opline->opcode == ZEND_FETCH_OBJ_W + && (op1_info & MAY_BE_INDIRECT) + && Z_REG(op1_addr) == ZREG_FP) { + op1_addr = jit_ZVAL_INDIRECT_DEREF(jit, op1_addr); + } + if (op1_info & MAY_BE_REF) { + op1_addr = jit_ZVAL_DEREF(jit, op1_addr); + } + if (op1_info & ((MAY_BE_UNDEF|MAY_BE_ANY)- MAY_BE_OBJECT)) { + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE) { + int32_t exit_point = zend_jit_trace_get_exit_point(opline, ZEND_JIT_EXIT_TO_VM); + const void *exit_addr = zend_jit_trace_get_exit_addr(exit_point); + + if (!exit_addr) { + return 0; + } + jit_guard_Z_TYPE(jit, op1_addr, IS_OBJECT, exit_addr); + } else { + ir_ref if_obj = jit_if_Z_TYPE(jit, op1_addr, IS_OBJECT); + + ir_IF_FALSE_cold(if_obj); + if (opline->opcode != ZEND_FETCH_OBJ_IS) { + ir_ref op1_ref = IR_UNUSED; + + jit_SET_EX_OPLINE(jit, opline); + if (opline->opcode != ZEND_FETCH_OBJ_W && (op1_info & MAY_BE_UNDEF)) { + zend_jit_addr orig_op1_addr = OP1_ADDR(); + ir_ref fast_path = IR_UNUSED; + + if (op1_info & MAY_BE_ANY) { + ir_ref if_def = ir_IF(jit_Z_TYPE(jit, op1_addr)); + ir_IF_TRUE(if_def); + fast_path = ir_END(); + ir_IF_FALSE_cold(if_def); + } + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_jit_undefined_op_helper), + ir_CONST_U32(opline->op1.var)); + if (fast_path) { + ir_MERGE_WITH(fast_path); + } + op1_ref = jit_ZVAL_ADDR(jit, orig_op1_addr); + } else { + op1_ref = jit_ZVAL_ADDR(jit, op1_addr); + } + if (opline->opcode == ZEND_FETCH_OBJ_W) { + ir_CALL_2(IR_VOID, ir_CONST_FC_FUNC(zend_jit_invalid_property_write), + op1_ref, ir_CONST_ADDR(Z_STRVAL_P(member))); + jit_set_Z_TYPE_INFO(jit, res_addr, _IS_ERROR); + } else { + ir_CALL_2(IR_VOID, ir_CONST_FC_FUNC(zend_jit_invalid_property_read), + op1_ref, ir_CONST_ADDR(Z_STRVAL_P(member))); + jit_set_Z_TYPE_INFO(jit, res_addr, IS_NULL); + } + } else { + jit_set_Z_TYPE_INFO(jit, res_addr, IS_NULL); + } + ir_END_list(end_inputs); + + ir_IF_TRUE(if_obj); + } + } + obj_ref = jit_Z_PTR(jit, op1_addr); + } + + ZEND_ASSERT(obj_ref); + if (!prop_info && trace_ce && (trace_ce->ce_flags & ZEND_ACC_IMMUTABLE)) { + prop_info = zend_get_known_property_info(op_array, trace_ce, Z_STR_P(member), on_this, op_array->filename); + if (prop_info) { + ce = trace_ce; + ce_is_instanceof = 0; + if (!(op1_info & MAY_BE_CLASS_GUARD)) { + if (on_this && JIT_G(current_frame) + && TRACE_FRAME_IS_THIS_CLASS_CHECKED(JIT_G(current_frame))) { + ZEND_ASSERT(JIT_G(current_frame)->ce == ce); + } else if (zend_jit_class_guard(jit, opline, obj_ref, ce)) { + if (on_this && JIT_G(current_frame)) { + JIT_G(current_frame)->ce = ce; + TRACE_FRAME_SET_THIS_CLASS_CHECKED(JIT_G(current_frame)); + } + } else { + return 0; + } + if (ssa->var_info && ssa_op->op1_use >= 0) { + ssa->var_info[ssa_op->op1_use].type |= MAY_BE_CLASS_GUARD; + ssa->var_info[ssa_op->op1_use].ce = ce; + ssa->var_info[ssa_op->op1_use].is_instanceof = ce_is_instanceof; + } + } + } + } + + if (!prop_info) { + ir_ref run_time_cache = ir_LOAD_A(jit_EX(run_time_cache)); + ir_ref ref = ir_LOAD_A(ir_ADD_OFFSET(run_time_cache, opline->extended_value & ~ZEND_FETCH_OBJ_FLAGS)); + ir_ref if_same = ir_IF(ir_EQ(ref, + ir_LOAD_A(ir_ADD_OFFSET(obj_ref, offsetof(zend_object, ce))))); + + ir_IF_FALSE_cold(if_same); + ir_END_list(slow_inputs); + + ir_IF_TRUE(if_same); + ir_ref offset_ref = ir_LOAD_A( + ir_ADD_OFFSET(run_time_cache, (opline->extended_value & ~ZEND_FETCH_OBJ_FLAGS) + sizeof(void*))); + + may_be_dynamic = zend_may_be_dynamic_property(ce, Z_STR_P(member), opline->op1_type == IS_UNUSED, op_array->filename); + if (may_be_dynamic) { + ir_ref if_dynamic = ir_IF(ir_LT(offset_ref, IR_NULL)); + if (opline->opcode == ZEND_FETCH_OBJ_W) { + ir_IF_TRUE_cold(if_dynamic); + ir_END_list(slow_inputs); + } else { + ir_IF_TRUE_cold(if_dynamic); + jit_SET_EX_OPLINE(jit, opline); + + if (opline->opcode != ZEND_FETCH_OBJ_IS) { + ir_CALL_2(IR_VOID, ir_CONST_FC_FUNC(zend_jit_fetch_obj_r_dynamic), + obj_ref, offset_ref); + } else { + ir_CALL_2(IR_VOID, ir_CONST_FC_FUNC(zend_jit_fetch_obj_is_dynamic), + obj_ref, offset_ref); + } + ir_END_list(end_inputs); + } + ir_IF_FALSE(if_dynamic); + } + prop_ref = ir_ADD_A(obj_ref, offset_ref); + prop_type_ref = jit_Z_TYPE_ref(jit, prop_ref); + ir_ref if_def = ir_IF(prop_type_ref); + ir_IF_FALSE_cold(if_def); + ir_END_list(slow_inputs); + ir_IF_TRUE(if_def); + prop_addr = ZEND_ADDR_REF_ZVAL(prop_ref); + if (opline->opcode == ZEND_FETCH_OBJ_W + && (!ce || ce_is_instanceof || (ce->ce_flags & (ZEND_ACC_HAS_TYPE_HINTS|ZEND_ACC_TRAIT)))) { + uint32_t flags = opline->extended_value & ZEND_FETCH_OBJ_FLAGS; + + ir_ref prop_info_ref = ir_LOAD_A( + ir_ADD_OFFSET(run_time_cache, (opline->extended_value & ~ZEND_FETCH_OBJ_FLAGS) + sizeof(void*) * 2)); + ir_ref if_has_prop_info = ir_IF(prop_info_ref); + + ir_IF_TRUE_cold(if_has_prop_info); + + ir_ref if_readonly = ir_IF( + ir_AND_U32(ir_LOAD_U32(ir_ADD_OFFSET(prop_info_ref, offsetof(zend_property_info, flags))), + ir_CONST_U32(ZEND_ACC_READONLY))); + ir_IF_TRUE(if_readonly); + + ir_ref if_prop_obj = jit_if_Z_TYPE(jit, prop_addr, IS_OBJECT); + ir_IF_TRUE(if_prop_obj); + ref = jit_Z_PTR(jit, prop_addr); + jit_GC_ADDREF(jit, ref); + jit_set_Z_PTR(jit, res_addr, ref); + jit_set_Z_TYPE_INFO(jit, res_addr, IS_OBJECT_EX); + ir_END_list(end_inputs); + + ir_IF_FALSE_cold(if_prop_obj); + + ir_ref extra_addr = ir_ADD_OFFSET(jit_ZVAL_ADDR(jit, prop_addr), offsetof(zval, u2.extra)); + ir_ref extra = ir_LOAD_U32(extra_addr); + ir_ref if_reinitable = ir_IF(ir_AND_U32(extra, ir_CONST_U32(IS_PROP_REINITABLE))); + ir_IF_TRUE(if_reinitable); + ir_STORE(extra_addr, ir_AND_U32(extra, ir_CONST_U32(~IS_PROP_REINITABLE))); + ir_ref reinit_path = ir_END(); + + ir_IF_FALSE(if_reinitable); + + jit_SET_EX_OPLINE(jit, opline); + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_readonly_property_modification_error), prop_info_ref); + jit_set_Z_TYPE_INFO(jit, res_addr, _IS_ERROR); + ir_END_list(end_inputs); + + if (flags == ZEND_FETCH_DIM_WRITE) { + ir_IF_FALSE_cold(if_readonly); + ir_MERGE_WITH(reinit_path); + jit_SET_EX_OPLINE(jit, opline); + ir_CALL_2(IR_VOID, ir_CONST_FC_FUNC(zend_jit_check_array_promotion), + prop_ref, prop_info_ref); + ir_END_list(end_inputs); + ir_IF_FALSE(if_has_prop_info); + } else if (flags == ZEND_FETCH_REF) { + ir_IF_FALSE_cold(if_readonly); + ir_MERGE_WITH(reinit_path); + ir_CALL_3(IR_VOID, ir_CONST_FC_FUNC(zend_jit_create_typed_ref), + prop_ref, + prop_info_ref, + jit_ZVAL_ADDR(jit, res_addr)); + ir_END_list(end_inputs); + ir_IF_FALSE(if_has_prop_info); + } else { + ir_ref list = reinit_path; + + ZEND_ASSERT(flags == 0); + ir_IF_FALSE(if_has_prop_info); + ir_END_list(list); + ir_IF_FALSE(if_readonly); + ir_END_list(list); + ir_MERGE_list(list); + } + } + } else { + prop_ref = ir_ADD_OFFSET(obj_ref, prop_info->offset); + prop_addr = ZEND_ADDR_REF_ZVAL(prop_ref); + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE) { + if (opline->opcode == ZEND_FETCH_OBJ_W || !(res_info & MAY_BE_GUARD) || !JIT_G(current_frame)) { + /* perform IS_UNDEF check only after result type guard (during deoptimization) */ + int32_t exit_point = zend_jit_trace_get_exit_point(opline, ZEND_JIT_EXIT_TO_VM); + const void *exit_addr = zend_jit_trace_get_exit_addr(exit_point); + + if (!exit_addr) { + return 0; + } + prop_type_ref = jit_Z_TYPE_INFO(jit, prop_addr); + ir_GUARD(prop_type_ref, ir_CONST_ADDR(exit_addr)); + } + } else { + prop_type_ref = jit_Z_TYPE_INFO(jit, prop_addr); + ir_ref if_def = ir_IF(prop_type_ref); + ir_IF_FALSE_cold(if_def); + ir_END_list(slow_inputs); + ir_IF_TRUE(if_def); + } + if (opline->opcode == ZEND_FETCH_OBJ_W && (prop_info->flags & ZEND_ACC_READONLY)) { + if (!prop_type_ref) { + prop_type_ref = jit_Z_TYPE_INFO(jit, prop_addr); + } + ir_ref if_prop_obj = jit_if_Z_TYPE(jit, prop_addr, IS_OBJECT); + ir_IF_TRUE(if_prop_obj); + ir_ref ref = jit_Z_PTR(jit, prop_addr); + jit_GC_ADDREF(jit, ref); + jit_set_Z_PTR(jit, res_addr, ref); + jit_set_Z_TYPE_INFO(jit, res_addr, IS_OBJECT_EX); + ir_END_list(end_inputs); + + ir_IF_FALSE_cold(if_prop_obj); + + ir_ref extra_addr = ir_ADD_OFFSET(jit_ZVAL_ADDR(jit, prop_addr), offsetof(zval, u2.extra)); + ir_ref extra = ir_LOAD_U32(extra_addr); + ir_ref if_reinitable = ir_IF(ir_AND_U32(extra, ir_CONST_U32(IS_PROP_REINITABLE))); + + ir_IF_FALSE(if_reinitable); + jit_SET_EX_OPLINE(jit, opline); + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_readonly_property_modification_error), ir_CONST_ADDR(prop_info)); + jit_set_Z_TYPE_INFO(jit, res_addr, _IS_ERROR); + ir_END_list(end_inputs); + + ir_IF_TRUE(if_reinitable); + ir_STORE(extra_addr, ir_AND_U32(extra, ir_CONST_U32(~IS_PROP_REINITABLE))); + } + + if (opline->opcode == ZEND_FETCH_OBJ_W + && (opline->extended_value & ZEND_FETCH_OBJ_FLAGS) + && ZEND_TYPE_IS_SET(prop_info->type)) { + uint32_t flags = opline->extended_value & ZEND_FETCH_OBJ_FLAGS; + + if (flags == ZEND_FETCH_DIM_WRITE) { + if ((ZEND_TYPE_FULL_MASK(prop_info->type) & MAY_BE_ARRAY) == 0) { + if (!prop_type_ref) { + prop_type_ref = jit_Z_TYPE_INFO(jit, prop_addr); + } + ir_ref if_null_or_flase = ir_IF(ir_LE(prop_type_ref, ir_CONST_U32(IR_FALSE))); + ir_IF_TRUE_cold(if_null_or_flase); + jit_SET_EX_OPLINE(jit, opline); + ir_CALL_2(IR_VOID, ir_CONST_FC_FUNC(zend_jit_check_array_promotion), + prop_ref, ir_CONST_ADDR(prop_info)); + ir_END_list(end_inputs); + ir_IF_FALSE(if_null_or_flase); + } + } else if (flags == ZEND_FETCH_REF) { + ir_ref ref; + + if (!prop_type_ref) { + prop_type_ref = jit_Z_TYPE_INFO(jit, prop_addr); + } + + ir_ref if_reference = ir_IF(ir_EQ(prop_type_ref, ir_CONST_U32(IS_REFERENCE_EX))); + ir_IF_FALSE(if_reference); + if (ce && ce->ce_flags & ZEND_ACC_IMMUTABLE) { + ref = ir_CONST_ADDR(prop_info); + } else { + int prop_info_offset = + (((prop_info->offset - (sizeof(zend_object) - sizeof(zval))) / sizeof(zval)) * sizeof(void*)); + + ref = ir_LOAD_A(ir_ADD_OFFSET(obj_ref, offsetof(zend_object, ce))); + ref = ir_LOAD_A(ir_ADD_OFFSET(ref, offsetof(zend_class_entry, properties_info_table))); + ref = ir_LOAD_A(ir_ADD_OFFSET(ref, prop_info_offset)); + } + ir_CALL_3(IR_VOID, ir_CONST_FC_FUNC(zend_jit_create_typed_ref), + prop_ref, + ref, + jit_ZVAL_ADDR(jit, res_addr)); + ir_END_list(end_inputs); + ir_IF_TRUE(if_reference); + } else { + ZEND_UNREACHABLE(); + } + } + } + + if (opline->opcode == ZEND_FETCH_OBJ_W) { + ZEND_ASSERT(prop_ref); + jit_set_Z_PTR(jit, res_addr, prop_ref); + jit_set_Z_TYPE_INFO(jit, res_addr, IS_INDIRECT); + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE && prop_info) { + ssa->var_info[ssa_op->result_def].indirect_reference = 1; + } + ir_END_list(end_inputs); + } else { + bool result_avoid_refcounting = 0; + + if ((res_info & MAY_BE_GUARD) && JIT_G(current_frame) && prop_info) { + uint8_t type = concrete_type(res_info); + uint32_t flags = 0; + zend_jit_addr val_addr = prop_addr; + + if ((opline->op1_type & (IS_VAR|IS_TMP_VAR)) + && !delayed_fetch_this + && !op1_avoid_refcounting) { + flags = ZEND_JIT_EXIT_FREE_OP1; + } + + if ((opline->result_type & (IS_VAR|IS_TMP_VAR)) + && !(flags & ZEND_JIT_EXIT_FREE_OP1) + && (res_info & (MAY_BE_STRING|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE)) + && (ssa_op+1)->op1_use == ssa_op->result_def + && zend_jit_may_avoid_refcounting(opline+1, res_info)) { + result_avoid_refcounting = 1; + ssa->var_info[ssa_op->result_def].avoid_refcounting = 1; + } + + val_addr = zend_jit_guard_fetch_result_type(jit, opline, val_addr, type, + 1, flags, op1_avoid_refcounting); + if (!val_addr) { + return 0; + } + + res_info &= ~MAY_BE_GUARD; + ssa->var_info[ssa_op->result_def].type &= ~MAY_BE_GUARD; + + // ZVAL_COPY + jit_ZVAL_COPY(jit, res_addr, -1, val_addr, res_info, !result_avoid_refcounting); + } else { + prop_type_ref = jit_Z_TYPE_INFO(jit, prop_addr); + + if (!zend_jit_zval_copy_deref(jit, res_addr, prop_addr, prop_type_ref)) { + return 0; + } + } + ir_END_list(end_inputs); + } + + if (op1_avoid_refcounting) { + SET_STACK_REG(JIT_G(current_frame)->stack, EX_VAR_TO_NUM(opline->op1.var), ZREG_NONE); + } + + if (JIT_G(trigger) != ZEND_JIT_ON_HOT_TRACE || !prop_info) { + ir_MERGE_list(slow_inputs); + jit_SET_EX_OPLINE(jit, opline); + + if (opline->opcode == ZEND_FETCH_OBJ_W) { + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_jit_fetch_obj_w_slow), obj_ref); + } else if (opline->opcode != ZEND_FETCH_OBJ_IS) { + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_jit_fetch_obj_r_slow), obj_ref); + } else { + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_jit_fetch_obj_is_slow), obj_ref); + } + ir_END_list(end_inputs); + } + + ir_MERGE_list(end_inputs); + + if (opline->op1_type != IS_UNUSED && !delayed_fetch_this && !op1_indirect) { + if (opline->op1_type == IS_VAR + && opline->opcode == ZEND_FETCH_OBJ_W + && (op1_info & MAY_BE_RC1)) { + zend_jit_addr orig_op1_addr = OP1_ADDR(); + ir_ref if_refcounted, ptr, refcount, if_non_zero; + ir_ref merge_inputs = IR_UNUSED; + + if_refcounted = jit_if_REFCOUNTED(jit, orig_op1_addr); + ir_IF_FALSE( if_refcounted); + ir_END_list(merge_inputs); + ir_IF_TRUE( if_refcounted); + ptr = jit_Z_PTR(jit, orig_op1_addr); + refcount = jit_GC_DELREF(jit, ptr); + if_non_zero = ir_IF(refcount); + ir_IF_TRUE( if_non_zero); + ir_END_list(merge_inputs); + ir_IF_FALSE( if_non_zero); + jit_SET_EX_OPLINE(jit, opline); + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_jit_extract_helper), ptr); + ir_END_list(merge_inputs); + ir_MERGE_list(merge_inputs); + } else if (!op1_avoid_refcounting) { + if (on_this) { + op1_info &= ~MAY_BE_RC1; + } + jit_FREE_OP(jit, opline->op1_type, opline->op1, op1_info, opline); + } + } + + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE + && prop_info + && (opline->opcode != ZEND_FETCH_OBJ_W || + !(opline->extended_value & ZEND_FETCH_OBJ_FLAGS) || + !ZEND_TYPE_IS_SET(prop_info->type)) + && (!(opline->op1_type & (IS_VAR|IS_TMP_VAR)) || on_this || op1_indirect)) { + may_throw = 0; + } + + if (may_throw) { + zend_jit_check_exception(jit); + } + + return 1; +} + +static int zend_jit_assign_obj(zend_jit_ctx *jit, + const zend_op *opline, + const zend_op_array *op_array, + zend_ssa *ssa, + const zend_ssa_op *ssa_op, + uint32_t op1_info, + zend_jit_addr op1_addr, + uint32_t val_info, + bool op1_indirect, + zend_class_entry *ce, + bool ce_is_instanceof, + bool on_this, + bool delayed_fetch_this, + zend_class_entry *trace_ce, + uint8_t prop_type, + int may_throw) +{ + zval *member; + zend_string *name; + zend_property_info *prop_info; + zend_jit_addr val_addr = OP1_DATA_ADDR(); + zend_jit_addr res_addr = 0; + zend_jit_addr prop_addr; + ir_ref obj_ref = IR_UNUSED; + ir_ref prop_ref = IR_UNUSED; + ir_ref delayed_end_input = IR_UNUSED; + ir_ref end_inputs = IR_UNUSED; + ir_ref slow_inputs = IR_UNUSED; + + if (RETURN_VALUE_USED(opline)) { + res_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, opline->result.var); + } + + ZEND_ASSERT(opline->op2_type == IS_CONST); + ZEND_ASSERT(op1_info & MAY_BE_OBJECT); + + member = RT_CONSTANT(opline, opline->op2); + ZEND_ASSERT(Z_TYPE_P(member) == IS_STRING && Z_STRVAL_P(member)[0] != '\0'); + name = Z_STR_P(member); + prop_info = zend_get_known_property_info(op_array, ce, name, on_this, op_array->filename); + + if (on_this) { + zend_jit_addr this_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, offsetof(zend_execute_data, This)); + obj_ref = jit_Z_PTR(jit, this_addr); + } else { + if (opline->op1_type == IS_VAR + && (op1_info & MAY_BE_INDIRECT) + && Z_REG(op1_addr) == ZREG_FP) { + op1_addr = jit_ZVAL_INDIRECT_DEREF(jit, op1_addr); + } + if (op1_info & MAY_BE_REF) { + op1_addr = jit_ZVAL_DEREF(jit, op1_addr); + } + if (op1_info & ((MAY_BE_UNDEF|MAY_BE_ANY)- MAY_BE_OBJECT)) { + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE) { + int32_t exit_point = zend_jit_trace_get_exit_point(opline, ZEND_JIT_EXIT_TO_VM); + const void *exit_addr = zend_jit_trace_get_exit_addr(exit_point); + + if (!exit_addr) { + return 0; + } + jit_guard_Z_TYPE(jit, op1_addr, IS_OBJECT, exit_addr); + } else { + ir_ref if_obj = jit_if_Z_TYPE(jit, op1_addr, IS_OBJECT); + ir_IF_FALSE_cold(if_obj); + + jit_SET_EX_OPLINE(jit, opline); + ir_CALL_2(IR_VOID, ir_CONST_FC_FUNC(zend_jit_invalid_property_assign), + jit_ZVAL_ADDR(jit, op1_addr), + ir_CONST_ADDR(ZSTR_VAL(name))); + + if (RETURN_VALUE_USED(opline)) { + jit_set_Z_TYPE_INFO(jit, res_addr, IS_NULL); + } + + ir_END_list(end_inputs); + + ir_IF_TRUE(if_obj); + } + } + obj_ref = jit_Z_PTR(jit, op1_addr); + } + + ZEND_ASSERT(obj_ref); + if (!prop_info && trace_ce && (trace_ce->ce_flags & ZEND_ACC_IMMUTABLE)) { + prop_info = zend_get_known_property_info(op_array, trace_ce, name, on_this, op_array->filename); + if (prop_info) { + ce = trace_ce; + ce_is_instanceof = 0; + if (!(op1_info & MAY_BE_CLASS_GUARD)) { + if (on_this && JIT_G(current_frame) + && TRACE_FRAME_IS_THIS_CLASS_CHECKED(JIT_G(current_frame))) { + ZEND_ASSERT(JIT_G(current_frame)->ce == ce); + } else if (zend_jit_class_guard(jit, opline, obj_ref, ce)) { + if (on_this && JIT_G(current_frame)) { + JIT_G(current_frame)->ce = ce; + TRACE_FRAME_SET_THIS_CLASS_CHECKED(JIT_G(current_frame)); + } + } else { + return 0; + } + if (ssa->var_info && ssa_op->op1_use >= 0) { + ssa->var_info[ssa_op->op1_use].type |= MAY_BE_CLASS_GUARD; + ssa->var_info[ssa_op->op1_use].ce = ce; + ssa->var_info[ssa_op->op1_use].is_instanceof = ce_is_instanceof; + } + if (ssa->var_info && ssa_op->op1_def >= 0) { + ssa->var_info[ssa_op->op1_def].type |= MAY_BE_CLASS_GUARD; + ssa->var_info[ssa_op->op1_def].ce = ce; + ssa->var_info[ssa_op->op1_def].is_instanceof = ce_is_instanceof; + } + } + } + } + + if (!prop_info) { + ir_ref run_time_cache = ir_LOAD_A(jit_EX(run_time_cache)); + ir_ref ref = ir_LOAD_A(ir_ADD_OFFSET(run_time_cache, opline->extended_value & ~ZEND_FETCH_OBJ_FLAGS)); + ir_ref if_same = ir_IF(ir_EQ(ref, ir_LOAD_A(ir_ADD_OFFSET(obj_ref, offsetof(zend_object, ce))))); + + ir_IF_FALSE_cold(if_same); + ir_END_list(slow_inputs); + + ir_IF_TRUE(if_same); + ir_ref offset_ref = ir_LOAD_A( + ir_ADD_OFFSET(run_time_cache, (opline->extended_value & ~ZEND_FETCH_OBJ_FLAGS) + sizeof(void*))); + + ir_ref if_dynamic = ir_IF(ir_LT(offset_ref, IR_NULL)); + ir_IF_TRUE_cold(if_dynamic); + ir_END_list(slow_inputs); + + ir_IF_FALSE(if_dynamic); + prop_ref = ir_ADD_A(obj_ref, offset_ref); + ir_ref if_def = ir_IF(jit_Z_TYPE_ref(jit, prop_ref)); + ir_IF_FALSE_cold(if_def); + ir_END_list(slow_inputs); + + ir_IF_TRUE(if_def); + prop_addr = ZEND_ADDR_REF_ZVAL(prop_ref); + + if (!ce || ce_is_instanceof || (ce->ce_flags & (ZEND_ACC_HAS_TYPE_HINTS|ZEND_ACC_TRAIT))) { + ir_ref prop_info_ref = ir_LOAD_A( + ir_ADD_OFFSET(run_time_cache, (opline->extended_value & ~ZEND_FETCH_OBJ_FLAGS) + sizeof(void*) * 2)); + ir_ref if_has_prop_info = ir_IF(prop_info_ref); + ir_IF_TRUE_cold(if_has_prop_info); + + // JIT: value = zend_assign_to_typed_prop(prop_info, property_val, value EXECUTE_DATA_CC); + jit_SET_EX_OPLINE(jit, opline); + ir_CALL_4(IR_VOID, ir_CONST_FC_FUNC(zend_jit_assign_to_typed_prop), + prop_ref, + prop_info_ref, + jit_ZVAL_ADDR(jit, val_addr), + RETURN_VALUE_USED(opline) ? jit_ZVAL_ADDR(jit, res_addr) : IR_NULL); + + if ((opline+1)->op1_type == IS_CONST) { + // TODO: ??? + // if (Z_TYPE_P(value) == orig_type) { + // CACHE_PTR_EX(cache_slot + 2, NULL); + } + + ir_END_list(end_inputs); + ir_IF_FALSE(if_has_prop_info); + } + } else { + prop_ref = ir_ADD_OFFSET(obj_ref, prop_info->offset); + prop_addr = ZEND_ADDR_REF_ZVAL(prop_ref); + if (!ce || ce_is_instanceof || !(ce->ce_flags & ZEND_ACC_IMMUTABLE) || ce->__get || ce->__set || (prop_info->flags & ZEND_ACC_READONLY)) { + // Undefined property with magic __get()/__set() + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE) { + int32_t exit_point = zend_jit_trace_get_exit_point(opline, ZEND_JIT_EXIT_TO_VM); + const void *exit_addr = zend_jit_trace_get_exit_addr(exit_point); + + if (!exit_addr) { + return 0; + } + ir_GUARD(jit_Z_TYPE_INFO(jit, prop_addr), ir_CONST_ADDR(exit_addr)); + } else { + ir_ref if_def = ir_IF(jit_Z_TYPE_INFO(jit, prop_addr)); + ir_IF_FALSE_cold(if_def); + ir_END_list(slow_inputs); + ir_IF_TRUE(if_def); + } + } + if (ZEND_TYPE_IS_SET(prop_info->type)) { + ir_ref ref; + + // JIT: value = zend_assign_to_typed_prop(prop_info, property_val, value EXECUTE_DATA_CC); + jit_SET_EX_OPLINE(jit, opline); + if (ce && ce->ce_flags & ZEND_ACC_IMMUTABLE) { + ref = ir_CONST_ADDR(prop_info); + } else { + int prop_info_offset = + (((prop_info->offset - (sizeof(zend_object) - sizeof(zval))) / sizeof(zval)) * sizeof(void*)); + + ref = ir_LOAD_A(ir_ADD_OFFSET(obj_ref, offsetof(zend_object, ce))); + ref = ir_LOAD_A(ir_ADD_OFFSET(ref, offsetof(zend_class_entry, properties_info_table))); + ref = ir_LOAD_A(ir_ADD_OFFSET(ref, prop_info_offset)); + } + ir_CALL_4(IR_VOID, ir_CONST_FC_FUNC(zend_jit_assign_to_typed_prop), + prop_ref, + ref, + jit_ZVAL_ADDR(jit, val_addr), + RETURN_VALUE_USED(opline) ? jit_ZVAL_ADDR(jit, res_addr) : IR_NULL); + + ir_END_list(end_inputs); + } + } + + if (!prop_info || !ZEND_TYPE_IS_SET(prop_info->type)) { + if (opline->result_type == IS_UNUSED) { + if (!zend_jit_assign_to_variable_call(jit, opline, prop_addr, prop_addr, -1, -1, (opline+1)->op1_type, val_addr, val_info, res_addr, 0)) { + return 0; + } + } else { + if (!zend_jit_assign_to_variable(jit, opline, prop_addr, prop_addr, -1, -1, (opline+1)->op1_type, val_addr, val_info, res_addr, 0, 0)) { + return 0; + } + } + if (end_inputs || slow_inputs) { + if (((opline+1)->op1_type & (IS_VAR|IS_TMP_VAR)) + && (val_info & (MAY_BE_REF|MAY_BE_STRING|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE))) { + /* skip FREE_OP_DATA() */ + delayed_end_input = ir_END(); + } else { + ir_END_list(end_inputs); + } + } + } + + if (slow_inputs) { + ir_MERGE_list(slow_inputs); + jit_SET_EX_OPLINE(jit, opline); + + // JIT: value = zobj->handlers->write_property(zobj, name, value, CACHE_ADDR(opline->extended_value)); + ir_ref run_time_cache = ir_LOAD_A(jit_EX(run_time_cache)); + ir_CALL_5(IR_VOID, ir_CONST_FC_FUNC(zend_jit_assign_obj_helper), + obj_ref, + ir_CONST_ADDR(name), + jit_ZVAL_ADDR(jit, val_addr), + ir_ADD_OFFSET(run_time_cache, opline->extended_value & ~ZEND_FETCH_OBJ_FLAGS), + RETURN_VALUE_USED(opline) ? jit_ZVAL_ADDR(jit, res_addr) : IR_NULL); + + ir_END_list(end_inputs); + } + + if (end_inputs) { + ir_MERGE_list(end_inputs); + + if (val_info & (MAY_BE_REF|MAY_BE_STRING|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE)) { + val_info |= MAY_BE_RC1|MAY_BE_RCN; + } + jit_FREE_OP(jit, (opline+1)->op1_type, (opline+1)->op1, val_info, opline); + + if (delayed_end_input) { + ir_MERGE_WITH(delayed_end_input); + } + } + + if (opline->op1_type != IS_UNUSED && !delayed_fetch_this && !op1_indirect) { + jit_FREE_OP(jit, opline->op1_type, opline->op1, op1_info, opline); + } + + if (may_throw) { + zend_jit_check_exception(jit); + } + + return 1; +} + +static int zend_jit_assign_obj_op(zend_jit_ctx *jit, + const zend_op *opline, + const zend_op_array *op_array, + zend_ssa *ssa, + const zend_ssa_op *ssa_op, + uint32_t op1_info, + zend_jit_addr op1_addr, + uint32_t val_info, + zend_ssa_range *val_range, + bool op1_indirect, + zend_class_entry *ce, + bool ce_is_instanceof, + bool on_this, + bool delayed_fetch_this, + zend_class_entry *trace_ce, + uint8_t prop_type) +{ + zval *member; + zend_string *name; + zend_property_info *prop_info; + zend_jit_addr val_addr = OP1_DATA_ADDR(); + zend_jit_addr prop_addr; + bool use_prop_guard = 0; + bool may_throw = 0; + binary_op_type binary_op = get_binary_op(opline->extended_value); + ir_ref obj_ref = IR_UNUSED; + ir_ref prop_ref = IR_UNUSED; + ir_ref end_inputs = IR_UNUSED; + ir_ref slow_inputs = IR_UNUSED; + + ZEND_ASSERT(opline->op2_type == IS_CONST); + ZEND_ASSERT(op1_info & MAY_BE_OBJECT); + ZEND_ASSERT(opline->result_type == IS_UNUSED); + + member = RT_CONSTANT(opline, opline->op2); + ZEND_ASSERT(Z_TYPE_P(member) == IS_STRING && Z_STRVAL_P(member)[0] != '\0'); + name = Z_STR_P(member); + prop_info = zend_get_known_property_info(op_array, ce, name, on_this, op_array->filename); + + if (on_this) { + zend_jit_addr this_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, offsetof(zend_execute_data, This)); + obj_ref = jit_Z_PTR(jit, this_addr); + } else { + if (opline->op1_type == IS_VAR + && (op1_info & MAY_BE_INDIRECT) + && Z_REG(op1_addr) == ZREG_FP) { + op1_addr = jit_ZVAL_INDIRECT_DEREF(jit, op1_addr); + } + if (op1_info & MAY_BE_REF) { + op1_addr = jit_ZVAL_DEREF(jit, op1_addr); + } + if (op1_info & ((MAY_BE_UNDEF|MAY_BE_ANY)- MAY_BE_OBJECT)) { + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE) { + int32_t exit_point = zend_jit_trace_get_exit_point(opline, ZEND_JIT_EXIT_TO_VM); + const void *exit_addr = zend_jit_trace_get_exit_addr(exit_point); + + if (!exit_addr) { + return 0; + } + jit_guard_Z_TYPE(jit, op1_addr, IS_OBJECT, exit_addr); + } else { + ir_ref if_obj = jit_if_Z_TYPE(jit, op1_addr, IS_OBJECT); + ir_IF_FALSE_cold(if_obj); + + jit_SET_EX_OPLINE(jit, opline); + ir_CALL_2(IR_VOID, + (op1_info & MAY_BE_UNDEF) ? + ir_CONST_FC_FUNC(zend_jit_invalid_property_assign_op) : + ir_CONST_FC_FUNC(zend_jit_invalid_property_assign), + jit_ZVAL_ADDR(jit, op1_addr), + ir_CONST_ADDR(ZSTR_VAL(name))); + + may_throw = 1; + + ir_END_list(end_inputs); + ir_IF_TRUE(if_obj); + } + } + obj_ref = jit_Z_PTR(jit, op1_addr); + } + + ZEND_ASSERT(obj_ref); + if (!prop_info && trace_ce && (trace_ce->ce_flags & ZEND_ACC_IMMUTABLE)) { + prop_info = zend_get_known_property_info(op_array, trace_ce, name, on_this, op_array->filename); + if (prop_info) { + ce = trace_ce; + ce_is_instanceof = 0; + if (!(op1_info & MAY_BE_CLASS_GUARD)) { + if (on_this && JIT_G(current_frame) + && TRACE_FRAME_IS_THIS_CLASS_CHECKED(JIT_G(current_frame))) { + ZEND_ASSERT(JIT_G(current_frame)->ce == ce); + } else if (zend_jit_class_guard(jit, opline, obj_ref, ce)) { + if (on_this && JIT_G(current_frame)) { + JIT_G(current_frame)->ce = ce; + TRACE_FRAME_SET_THIS_CLASS_CHECKED(JIT_G(current_frame)); + } + } else { + return 0; + } + if (ssa->var_info && ssa_op->op1_use >= 0) { + ssa->var_info[ssa_op->op1_use].type |= MAY_BE_CLASS_GUARD; + ssa->var_info[ssa_op->op1_use].ce = ce; + ssa->var_info[ssa_op->op1_use].is_instanceof = ce_is_instanceof; + } + if (ssa->var_info && ssa_op->op1_def >= 0) { + ssa->var_info[ssa_op->op1_def].type |= MAY_BE_CLASS_GUARD; + ssa->var_info[ssa_op->op1_def].ce = ce; + ssa->var_info[ssa_op->op1_def].is_instanceof = ce_is_instanceof; + } + } + } + } + + use_prop_guard = (prop_type != IS_UNKNOWN + && prop_type != IS_UNDEF + && prop_type != IS_REFERENCE + && (op1_info & (MAY_BE_ANY|MAY_BE_UNDEF)) == MAY_BE_OBJECT); + + if (!prop_info) { + ir_ref run_time_cache = ir_LOAD_A(jit_EX(run_time_cache)); + ir_ref ref = ir_LOAD_A(ir_ADD_OFFSET(run_time_cache, (opline+1)->extended_value & ~ZEND_FETCH_OBJ_FLAGS)); + ir_ref if_same = ir_IF(ir_EQ(ref, ir_LOAD_A(ir_ADD_OFFSET(obj_ref, offsetof(zend_object, ce))))); + + ir_IF_FALSE_cold(if_same); + ir_END_list(slow_inputs); + + ir_IF_TRUE(if_same); + if (!ce || ce_is_instanceof || (ce->ce_flags & (ZEND_ACC_HAS_TYPE_HINTS|ZEND_ACC_TRAIT))) { + ir_ref prop_info_ref = ir_LOAD_A( + ir_ADD_OFFSET(run_time_cache, ((opline+1)->extended_value & ~ZEND_FETCH_OBJ_FLAGS) + sizeof(void*) * 2)); + ir_ref if_has_prop_info = ir_IF(prop_info_ref); + ir_IF_TRUE_cold(if_has_prop_info); + ir_END_list(slow_inputs); + + ir_IF_FALSE(if_has_prop_info); + } + ir_ref offset_ref = ir_LOAD_A( + ir_ADD_OFFSET(run_time_cache, ((opline+1)->extended_value & ~ZEND_FETCH_OBJ_FLAGS) + sizeof(void*))); + + ir_ref if_dynamic = ir_IF(ir_LT(offset_ref, IR_NULL)); + ir_IF_TRUE_cold(if_dynamic); + ir_END_list(slow_inputs); + + ir_IF_FALSE(if_dynamic); + + prop_ref = ir_ADD_A(obj_ref, offset_ref); + if (!use_prop_guard) { + ir_ref if_def = ir_IF(jit_Z_TYPE_ref(jit, prop_ref)); + ir_IF_FALSE_cold(if_def); + ir_END_list(slow_inputs); + + ir_IF_TRUE(if_def); + } + prop_addr = ZEND_ADDR_REF_ZVAL(prop_ref); + } else { + prop_ref = ir_ADD_OFFSET(obj_ref, prop_info->offset); + prop_addr = ZEND_ADDR_REF_ZVAL(prop_ref); + + if (ZEND_TYPE_IS_SET(prop_info->type) || !use_prop_guard) { + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE) { + int32_t exit_point = zend_jit_trace_get_exit_point(opline, ZEND_JIT_EXIT_TO_VM); + const void *exit_addr = zend_jit_trace_get_exit_addr(exit_point); + + if (!exit_addr) { + return 0; + } + ir_GUARD(jit_Z_TYPE_INFO(jit, prop_addr), ir_CONST_ADDR(exit_addr)); + } else { + ir_ref if_def = ir_IF(jit_Z_TYPE_INFO(jit, prop_addr)); + ir_IF_FALSE_cold(if_def); + ir_END_list(slow_inputs); + ir_IF_TRUE(if_def); + } + } + if (ZEND_TYPE_IS_SET(prop_info->type)) { + ir_ref if_ref, if_typed, noref_path, ref_path, reference, ref; + + may_throw = 1; + + jit_SET_EX_OPLINE(jit, opline); + + if_ref = jit_if_Z_TYPE(jit, prop_addr, IS_REFERENCE); + ir_IF_FALSE(if_ref); + noref_path = ir_END(); + ir_IF_TRUE(if_ref); + + reference = jit_Z_PTR(jit, prop_addr); + ref = ir_ADD_OFFSET(reference, offsetof(zend_reference, val)); + if_typed = jit_if_TYPED_REF(jit, reference); + ir_IF_FALSE(if_typed); + ref_path = ir_END(); + ir_IF_TRUE_cold(if_typed); + + ir_CALL_3(IR_VOID, ir_CONST_FC_FUNC(zend_jit_assign_op_to_typed_ref), + reference, + jit_ZVAL_ADDR(jit, val_addr), + ir_CONST_FC_FUNC(binary_op)); + + ir_END_list(end_inputs); + + ir_MERGE_2(noref_path, ref_path); + prop_ref = ir_PHI_2(IR_ADDR, prop_ref, ref); + prop_addr = ZEND_ADDR_REF_ZVAL(prop_ref); + + // JIT: value = zend_assign_to_typed_prop(prop_info, property_val, value EXECUTE_DATA_CC); + if (ce && ce->ce_flags & ZEND_ACC_IMMUTABLE) { + ref = ir_CONST_ADDR(prop_info); + } else { + int prop_info_offset = + (((prop_info->offset - (sizeof(zend_object) - sizeof(zval))) / sizeof(zval)) * sizeof(void*)); + + ref = ir_LOAD_A(ir_ADD_OFFSET(obj_ref, offsetof(zend_object, ce))); + ref = ir_LOAD_A(ir_ADD_OFFSET(ref, offsetof(zend_class_entry, properties_info_table))); + ref = ir_LOAD_A(ir_ADD_OFFSET(ref, prop_info_offset)); + } + + ir_CALL_4(IR_VOID, ir_CONST_FC_FUNC(zend_jit_assign_op_to_typed_prop), + prop_ref, + ref, + jit_ZVAL_ADDR(jit, val_addr), + ir_CONST_FC_FUNC(binary_op)); + + ir_END_list(end_inputs); + } + } + + if (!prop_info || !ZEND_TYPE_IS_SET(prop_info->type)) { + zend_jit_addr var_addr = prop_addr; + uint32_t var_info = MAY_BE_ANY|MAY_BE_REF|MAY_BE_RC1|MAY_BE_RCN; + uint32_t var_def_info = MAY_BE_ANY|MAY_BE_REF|MAY_BE_RC1|MAY_BE_RCN; + + if (use_prop_guard) { + int32_t exit_point = zend_jit_trace_get_exit_point(opline, 0); + const void *exit_addr = zend_jit_trace_get_exit_addr(exit_point); + if (!exit_addr) { + return 0; + } + + jit_guard_Z_TYPE(jit, prop_addr, prop_type, exit_addr); + var_info = (1 << prop_type) | (var_info & ~(MAY_BE_ANY|MAY_BE_UNDEF|MAY_BE_REF)); + } + + if (var_info & MAY_BE_REF) { + ir_ref if_ref, if_typed, noref_path, ref_path, reference, ref; + + may_throw = 1; + + if_ref = jit_if_Z_TYPE(jit, prop_addr, IS_REFERENCE); + ir_IF_FALSE(if_ref); + noref_path = ir_END(); + ir_IF_TRUE(if_ref); + + reference = jit_Z_PTR(jit, var_addr); + ref = ir_ADD_OFFSET(reference, offsetof(zend_reference, val)); + if_typed = jit_if_TYPED_REF(jit, reference); + ir_IF_FALSE(if_typed); + ref_path = ir_END(); + ir_IF_TRUE_cold(if_typed); + + jit_SET_EX_OPLINE(jit, opline); + + ir_CALL_3(IR_VOID, ir_CONST_FC_FUNC(zend_jit_assign_op_to_typed_ref), + reference, + jit_ZVAL_ADDR(jit, val_addr), + ir_CONST_FC_FUNC(binary_op)); + + ir_END_list(end_inputs); + + ir_MERGE_2(noref_path, ref_path); + prop_ref = ir_PHI_2(IR_ADDR, prop_ref, ref); + var_addr = ZEND_ADDR_REF_ZVAL(prop_ref); + + var_info &= ~MAY_BE_REF; + } + + uint8_t val_op_type = (opline+1)->op1_type; + if (val_op_type & (IS_TMP_VAR|IS_VAR)) { + /* prevent FREE_OP in the helpers */ + val_op_type = IS_CV; + } + + switch (opline->extended_value) { + case ZEND_ADD: + case ZEND_SUB: + case ZEND_MUL: + if ((var_info & (MAY_BE_STRING|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE)) || + (val_info & (MAY_BE_STRING|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE))) { + if (opline->extended_value != ZEND_ADD || + (var_info & MAY_BE_ANY) != MAY_BE_ARRAY || + (val_info & MAY_BE_ANY) == MAY_BE_ARRAY) { + may_throw = 1; + } + } + if (!zend_jit_math_helper(jit, opline, opline->extended_value, IS_CV, opline->op1, var_addr, var_info, val_op_type, (opline+1)->op1, val_addr, val_info, 0, var_addr, var_def_info, var_info, + 1 /* may overflow */, 0)) { + return 0; + } + break; + case ZEND_BW_OR: + case ZEND_BW_AND: + case ZEND_BW_XOR: + if ((var_info & (MAY_BE_STRING|MAY_BE_DOUBLE|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE)) || + (val_info & (MAY_BE_STRING|MAY_BE_DOUBLE|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE))) { + if ((var_info & MAY_BE_ANY) != MAY_BE_STRING || + (val_info & MAY_BE_ANY) != MAY_BE_STRING) { + may_throw = 1; + } + } + goto long_math; + case ZEND_SL: + case ZEND_SR: + if ((var_info & (MAY_BE_STRING|MAY_BE_DOUBLE|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE)) || + (val_info & (MAY_BE_STRING|MAY_BE_DOUBLE|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE))) { + may_throw = 1; + } + if (val_op_type != IS_CONST || + Z_TYPE_P(RT_CONSTANT((opline+1), (opline+1)->op1)) != IS_LONG || + Z_LVAL_P(RT_CONSTANT((opline+1), (opline+1)->op1)) < 0) { + may_throw = 1; + } + goto long_math; + case ZEND_MOD: + if ((var_info & (MAY_BE_STRING|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE)) || + (val_info & (MAY_BE_STRING|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE))) { + may_throw = 1; + } + if (val_op_type != IS_CONST || + Z_TYPE_P(RT_CONSTANT((opline+1), (opline+1)->op1)) != IS_LONG || + Z_LVAL_P(RT_CONSTANT((opline+1), (opline+1)->op1)) == 0) { + may_throw = 1; + } +long_math: + if (!zend_jit_long_math_helper(jit, opline, opline->extended_value, + IS_CV, opline->op1, var_addr, var_info, NULL, + val_op_type, (opline+1)->op1, val_addr, val_info, + val_range, + 0, var_addr, var_def_info, var_info, /* may throw */ 1)) { + return 0; + } + break; + case ZEND_CONCAT: + may_throw = 1; + if (!zend_jit_concat_helper(jit, opline, IS_CV, opline->op1, var_addr, var_info, val_op_type, (opline+1)->op1, val_addr, val_info, var_addr, + 0)) { + return 0; + } + break; + default: + ZEND_UNREACHABLE(); + } + if (end_inputs || slow_inputs) { + ir_END_list(end_inputs); + } + } + + if (slow_inputs) { + ir_MERGE_list(slow_inputs); + + may_throw = 1; + + jit_SET_EX_OPLINE(jit, opline); + ir_ref run_time_cache = ir_LOAD_A(jit_EX(run_time_cache)); + ir_CALL_5(IR_VOID, ir_CONST_FC_FUNC(zend_jit_assign_obj_op_helper), + obj_ref, + ir_CONST_ADDR(name), + jit_ZVAL_ADDR(jit, val_addr), + ir_ADD_OFFSET(run_time_cache, (opline+1)->extended_value & ~ZEND_FETCH_OBJ_FLAGS), + ir_CONST_FC_FUNC(binary_op)); + + ir_END_list(end_inputs); + } + + if (end_inputs) { + ir_MERGE_list(end_inputs); + } + + if (val_info & (MAY_BE_REF|MAY_BE_STRING|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE)) { + val_info |= MAY_BE_RC1|MAY_BE_RCN; + } + + // JIT: FREE_OP_DATA(); + jit_FREE_OP(jit, (opline+1)->op1_type, (opline+1)->op1, val_info, opline); + + if (opline->op1_type != IS_UNUSED && !delayed_fetch_this && !op1_indirect) { + if ((op1_info & MAY_HAVE_DTOR) && (op1_info & MAY_BE_RC1)) { + may_throw = 1; + } + jit_FREE_OP(jit, opline->op1_type, opline->op1, op1_info, opline); + } + + if (may_throw) { + zend_jit_check_exception(jit); + } + + return 1; +} + +static int zend_jit_incdec_obj(zend_jit_ctx *jit, + const zend_op *opline, + const zend_op_array *op_array, + zend_ssa *ssa, + const zend_ssa_op *ssa_op, + uint32_t op1_info, + zend_jit_addr op1_addr, + bool op1_indirect, + zend_class_entry *ce, + bool ce_is_instanceof, + bool on_this, + bool delayed_fetch_this, + zend_class_entry *trace_ce, + uint8_t prop_type) +{ + zval *member; + zend_string *name; + zend_property_info *prop_info; + zend_jit_addr res_addr = 0; + zend_jit_addr prop_addr; + bool use_prop_guard = 0; + bool may_throw = 0; + uint32_t res_info = (opline->result_type != IS_UNDEF) ? RES_INFO() : 0; + ir_ref obj_ref = IR_UNUSED; + ir_ref prop_ref = IR_UNUSED; + ir_ref end_inputs = IR_UNUSED; + ir_ref slow_inputs = IR_UNUSED; + + ZEND_ASSERT(opline->op2_type == IS_CONST); + ZEND_ASSERT(op1_info & MAY_BE_OBJECT); + + if (opline->result_type != IS_UNUSED) { + res_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, opline->result.var); + } + + member = RT_CONSTANT(opline, opline->op2); + ZEND_ASSERT(Z_TYPE_P(member) == IS_STRING && Z_STRVAL_P(member)[0] != '\0'); + name = Z_STR_P(member); + prop_info = zend_get_known_property_info(op_array, ce, name, on_this, op_array->filename); + + if (on_this) { + zend_jit_addr this_addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, offsetof(zend_execute_data, This)); + obj_ref = jit_Z_PTR(jit, this_addr); + } else { + if (opline->op1_type == IS_VAR + && (op1_info & MAY_BE_INDIRECT) + && Z_REG(op1_addr) == ZREG_FP) { + op1_addr = jit_ZVAL_INDIRECT_DEREF(jit, op1_addr); + } + if (op1_info & MAY_BE_REF) { + op1_addr = jit_ZVAL_DEREF(jit, op1_addr); + } + if (op1_info & ((MAY_BE_UNDEF|MAY_BE_ANY)- MAY_BE_OBJECT)) { + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE) { + int32_t exit_point = zend_jit_trace_get_exit_point(opline, ZEND_JIT_EXIT_TO_VM); + const void *exit_addr = zend_jit_trace_get_exit_addr(exit_point); + + if (!exit_addr) { + return 0; + } + jit_guard_Z_TYPE(jit, op1_addr, IS_OBJECT, exit_addr); + } else { + ir_ref if_obj = jit_if_Z_TYPE(jit, op1_addr, IS_OBJECT); + ir_IF_FALSE_cold(if_obj); + + jit_SET_EX_OPLINE(jit, opline); + ir_CALL_2(IR_VOID, ir_CONST_FC_FUNC(zend_jit_invalid_property_incdec), + jit_ZVAL_ADDR(jit, op1_addr), + ir_CONST_ADDR(ZSTR_VAL(name))); + + may_throw = 1; + + if ((opline->op1_type & (IS_VAR|IS_TMP_VAR)) && !delayed_fetch_this && !op1_indirect) { + ir_END_list(end_inputs); + } else { + ir_IJMP(jit_STUB_ADDR(jit, jit_stub_exception_handler)); + } + ir_IF_TRUE(if_obj); + } + } + obj_ref = jit_Z_PTR(jit, op1_addr); + } + + ZEND_ASSERT(obj_ref); + if (!prop_info && trace_ce && (trace_ce->ce_flags & ZEND_ACC_IMMUTABLE)) { + prop_info = zend_get_known_property_info(op_array, trace_ce, name, on_this, op_array->filename); + if (prop_info) { + ce = trace_ce; + ce_is_instanceof = 0; + if (!(op1_info & MAY_BE_CLASS_GUARD)) { + if (on_this && JIT_G(current_frame) + && TRACE_FRAME_IS_THIS_CLASS_CHECKED(JIT_G(current_frame))) { + ZEND_ASSERT(JIT_G(current_frame)->ce == ce); + } else if (zend_jit_class_guard(jit, opline, obj_ref, ce)) { + if (on_this && JIT_G(current_frame)) { + JIT_G(current_frame)->ce = ce; + TRACE_FRAME_SET_THIS_CLASS_CHECKED(JIT_G(current_frame)); + } + } else { + return 0; + } + if (ssa->var_info && ssa_op->op1_use >= 0) { + ssa->var_info[ssa_op->op1_use].type |= MAY_BE_CLASS_GUARD; + ssa->var_info[ssa_op->op1_use].ce = ce; + ssa->var_info[ssa_op->op1_use].is_instanceof = ce_is_instanceof; + } + if (ssa->var_info && ssa_op->op1_def >= 0) { + ssa->var_info[ssa_op->op1_def].type |= MAY_BE_CLASS_GUARD; + ssa->var_info[ssa_op->op1_def].ce = ce; + ssa->var_info[ssa_op->op1_def].is_instanceof = ce_is_instanceof; + } + } + } + } + + use_prop_guard = (prop_type != IS_UNKNOWN + && prop_type != IS_UNDEF + && prop_type != IS_REFERENCE + && (op1_info & (MAY_BE_ANY|MAY_BE_UNDEF)) == MAY_BE_OBJECT); + + if (!prop_info) { + ir_ref run_time_cache = ir_LOAD_A(jit_EX(run_time_cache)); + ir_ref ref = ir_LOAD_A(ir_ADD_OFFSET(run_time_cache, opline->extended_value & ~ZEND_FETCH_OBJ_FLAGS)); + ir_ref if_same = ir_IF(ir_EQ(ref, ir_LOAD_A(ir_ADD_OFFSET(obj_ref, offsetof(zend_object, ce))))); + + ir_IF_FALSE_cold(if_same); + ir_END_list(slow_inputs); + + ir_IF_TRUE(if_same); + if (!ce || ce_is_instanceof || (ce->ce_flags & (ZEND_ACC_HAS_TYPE_HINTS|ZEND_ACC_TRAIT))) { + ir_ref prop_info_ref = ir_LOAD_A( + ir_ADD_OFFSET(run_time_cache, (opline->extended_value & ~ZEND_FETCH_OBJ_FLAGS) + sizeof(void*) * 2)); + ir_ref if_has_prop_info = ir_IF(prop_info_ref); + ir_IF_TRUE_cold(if_has_prop_info); + ir_END_list(slow_inputs); + + ir_IF_FALSE(if_has_prop_info); + } + ir_ref offset_ref = ir_LOAD_A( + ir_ADD_OFFSET(run_time_cache, (opline->extended_value & ~ZEND_FETCH_OBJ_FLAGS) + sizeof(void*))); + + ir_ref if_dynamic = ir_IF(ir_LT(offset_ref, IR_NULL)); + ir_IF_TRUE_cold(if_dynamic); + ir_END_list(slow_inputs); + + ir_IF_FALSE(if_dynamic); + + prop_ref = ir_ADD_A(obj_ref, offset_ref); + if (!use_prop_guard) { + ir_ref if_def = ir_IF(jit_Z_TYPE_ref(jit, prop_ref)); + ir_IF_FALSE_cold(if_def); + ir_END_list(slow_inputs); + + ir_IF_TRUE(if_def); + } + prop_addr = ZEND_ADDR_REF_ZVAL(prop_ref); + } else { + prop_ref = ir_ADD_OFFSET(obj_ref, prop_info->offset); + prop_addr = ZEND_ADDR_REF_ZVAL(prop_ref); + + if (ZEND_TYPE_IS_SET(prop_info->type) || !use_prop_guard) { + if (JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE) { + int32_t exit_point = zend_jit_trace_get_exit_point(opline, ZEND_JIT_EXIT_TO_VM); + const void *exit_addr = zend_jit_trace_get_exit_addr(exit_point); + + if (!exit_addr) { + return 0; + } + ir_GUARD(jit_Z_TYPE_INFO(jit, prop_addr), ir_CONST_ADDR(exit_addr)); + } else { + ir_ref if_def = ir_IF(jit_Z_TYPE_INFO(jit, prop_addr)); + ir_IF_FALSE_cold(if_def); + ir_END_list(slow_inputs); + ir_IF_TRUE(if_def); + } + } + + if (ZEND_TYPE_IS_SET(prop_info->type)) { + const void *func; + ir_ref ref; + + may_throw = 1; + jit_SET_EX_OPLINE(jit, opline); + + if (ce && ce->ce_flags & ZEND_ACC_IMMUTABLE) { + ref = ir_CONST_ADDR(prop_info); + } else { + int prop_info_offset = + (((prop_info->offset - (sizeof(zend_object) - sizeof(zval))) / sizeof(zval)) * sizeof(void*)); + + ref = ir_LOAD_A(ir_ADD_OFFSET(obj_ref, offsetof(zend_object, ce))); + ref = ir_LOAD_A(ir_ADD_OFFSET(ref, offsetof(zend_class_entry, properties_info_table))); + ref = ir_LOAD_A(ir_ADD_OFFSET(ref, prop_info_offset)); + } + + if (opline->result_type == IS_UNUSED) { + switch (opline->opcode) { + case ZEND_PRE_INC_OBJ: + case ZEND_POST_INC_OBJ: + func = zend_jit_inc_typed_prop; + break; + case ZEND_PRE_DEC_OBJ: + case ZEND_POST_DEC_OBJ: + func = zend_jit_dec_typed_prop; + break; + default: + ZEND_UNREACHABLE(); + } + + ir_CALL_2(IR_VOID, ir_CONST_FC_FUNC(func), prop_ref, ref); + } else { + switch (opline->opcode) { + case ZEND_PRE_INC_OBJ: + func = zend_jit_pre_inc_typed_prop; + break; + case ZEND_PRE_DEC_OBJ: + func = zend_jit_pre_dec_typed_prop; + break; + case ZEND_POST_INC_OBJ: + func = zend_jit_post_inc_typed_prop; + break; + case ZEND_POST_DEC_OBJ: + func = zend_jit_post_dec_typed_prop; + break; + default: + ZEND_UNREACHABLE(); + } + ir_CALL_3(IR_VOID, ir_CONST_FC_FUNC(func), + prop_ref, + ref, + jit_ZVAL_ADDR(jit, res_addr)); + } + ir_END_list(end_inputs); + } + } + + if (!prop_info || !ZEND_TYPE_IS_SET(prop_info->type)) { + uint32_t var_info = MAY_BE_ANY|MAY_BE_REF|MAY_BE_RC1|MAY_BE_RCN; + zend_jit_addr var_addr = prop_addr; + ir_ref if_long = IR_UNUSED; + ir_ref if_overflow = IR_UNUSED; + + if (use_prop_guard) { + int32_t exit_point = zend_jit_trace_get_exit_point(opline, 0); + const void *exit_addr = zend_jit_trace_get_exit_addr(exit_point); + if (!exit_addr) { + return 0; + } + + jit_guard_Z_TYPE(jit, prop_addr, prop_type, exit_addr); + var_info = (1 << prop_type) | (var_info & ~(MAY_BE_ANY|MAY_BE_UNDEF|MAY_BE_REF)); + } + + if (var_info & MAY_BE_REF) { + const void *func; + ir_ref if_ref, if_typed, noref_path, ref_path, reference, ref; + + if_ref = jit_if_Z_TYPE(jit, prop_addr, IS_REFERENCE); + ir_IF_FALSE(if_ref); + noref_path = ir_END(); + ir_IF_TRUE(if_ref); + + reference = jit_Z_PTR(jit, var_addr); + ref = ir_ADD_OFFSET(reference, offsetof(zend_reference, val)); + if_typed = jit_if_TYPED_REF(jit, reference); + ir_IF_FALSE(if_typed); + ref_path = ir_END(); + ir_IF_TRUE_cold(if_typed); + + switch (opline->opcode) { + case ZEND_PRE_INC_OBJ: + func = zend_jit_pre_inc_typed_ref; + break; + case ZEND_PRE_DEC_OBJ: + func = zend_jit_pre_dec_typed_ref; + break; + case ZEND_POST_INC_OBJ: + func = zend_jit_post_inc_typed_ref; + break; + case ZEND_POST_DEC_OBJ: + func = zend_jit_post_dec_typed_ref; + break; + default: + ZEND_UNREACHABLE(); + } + + may_throw = 1; + jit_SET_EX_OPLINE(jit, opline); + ir_CALL_2(IR_VOID, ir_CONST_FC_FUNC(func), + reference, + (opline->result_type == IS_UNUSED) ? IR_NULL : jit_ZVAL_ADDR(jit, res_addr)); + + ir_END_list(end_inputs); + + ir_MERGE_2(noref_path, ref_path); + prop_ref = ir_PHI_2(IR_ADDR, prop_ref, ref); + var_addr = ZEND_ADDR_REF_ZVAL(prop_ref); + + var_info &= ~MAY_BE_REF; + } + + if (var_info & MAY_BE_LONG) { + ir_ref addr, ref; + + if (var_info & (MAY_BE_ANY - MAY_BE_LONG)) { + if_long = jit_if_Z_TYPE(jit, var_addr, IS_LONG); + ir_IF_TRUE(if_long); + } + + addr = jit_ZVAL_ADDR(jit, var_addr); + ref = ir_LOAD_L(addr); + if (opline->opcode == ZEND_POST_INC_OBJ || opline->opcode == ZEND_POST_DEC_OBJ) { + if (opline->result_type != IS_UNUSED) { + jit_set_Z_LVAL(jit, res_addr, ref); + jit_set_Z_TYPE_INFO(jit, res_addr, IS_LONG); + } + } + if (opline->opcode == ZEND_PRE_INC_OBJ || opline->opcode == ZEND_POST_INC_OBJ) { + ref = ir_ADD_OV_L(ref, ir_CONST_LONG(1)); + } else { + ref = ir_SUB_OV_L(ref, ir_CONST_LONG(1)); + } + + ir_STORE(addr, ref); + if_overflow = ir_IF(ir_OVERFLOW(ref)); + ir_IF_FALSE(if_overflow); + + if (opline->opcode == ZEND_PRE_INC_OBJ || opline->opcode == ZEND_PRE_DEC_OBJ) { + if (opline->result_type != IS_UNUSED) { + jit_set_Z_LVAL(jit, res_addr, ref); + jit_set_Z_TYPE_INFO(jit, res_addr, IS_LONG); + } + } + ir_END_list(end_inputs); + } + + if (var_info & (MAY_BE_ANY - MAY_BE_LONG)) { + if (var_info & (MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE)) { + may_throw = 1; + } + if (if_long) { + ir_IF_FALSE_cold(if_long); + } + if (opline->opcode == ZEND_POST_INC_OBJ || opline->opcode == ZEND_POST_DEC_OBJ) { + jit_ZVAL_COPY(jit, res_addr, -1, var_addr, var_info, 1); + } + if (opline->opcode == ZEND_PRE_INC_OBJ || opline->opcode == ZEND_POST_INC_OBJ) { + if (opline->opcode == ZEND_PRE_INC_OBJ && opline->result_type != IS_UNUSED) { + ir_CALL_2(IR_VOID, ir_CONST_FC_FUNC(zend_jit_pre_inc), + jit_ZVAL_ADDR(jit, var_addr), + jit_ZVAL_ADDR(jit, res_addr)); + } else { + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(increment_function), + jit_ZVAL_ADDR(jit, var_addr)); + } + } else { + if (opline->opcode == ZEND_PRE_DEC_OBJ && opline->result_type != IS_UNUSED) { + ir_CALL_2(IR_VOID, ir_CONST_FC_FUNC(zend_jit_pre_dec), + jit_ZVAL_ADDR(jit, var_addr), + jit_ZVAL_ADDR(jit, res_addr)); + } else { + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(decrement_function), + jit_ZVAL_ADDR(jit, var_addr)); + } + } + + ir_END_list(end_inputs); + } + if (var_info & MAY_BE_LONG) { + ir_IF_TRUE_cold(if_overflow); + if (opline->opcode == ZEND_PRE_INC_OBJ || opline->opcode == ZEND_POST_INC_OBJ) { +#if SIZEOF_ZEND_LONG == 4 + jit_set_Z_LVAL(jit, var_addr, ir_CONST_LONG(0)); + jit_set_Z_W2(jit, var_addr, ir_CONST_U32(0x41e00000)); +#else + jit_set_Z_LVAL(jit, var_addr, ir_CONST_LONG(0x43e0000000000000)); +#endif + jit_set_Z_TYPE_INFO(jit, var_addr, IS_DOUBLE); + if (opline->opcode == ZEND_PRE_INC_OBJ && opline->result_type != IS_UNUSED) { +#if SIZEOF_ZEND_LONG == 4 + jit_set_Z_LVAL(jit, res_addr, ir_CONST_LONG(0)); + jit_set_Z_W2(jit, res_addr, ir_CONST_U32(0x41e00000)); +#else + jit_set_Z_LVAL(jit, res_addr, ir_CONST_LONG(0x43e0000000000000)); +#endif + jit_set_Z_TYPE_INFO(jit, res_addr, IS_DOUBLE); + } + } else { +#if SIZEOF_ZEND_LONG == 4 + jit_set_Z_LVAL(jit, var_addr, ir_CONST_LONG(0x00200000)); + jit_set_Z_W2(jit, var_addr, ir_CONST_U32(0xc1e00000)); +#else + jit_set_Z_LVAL(jit, var_addr, ir_CONST_LONG(0xc3e0000000000000)); +#endif + jit_set_Z_TYPE_INFO(jit, var_addr, IS_DOUBLE); + if (opline->opcode == ZEND_PRE_DEC_OBJ && opline->result_type != IS_UNUSED) { +#if SIZEOF_ZEND_LONG == 4 + jit_set_Z_LVAL(jit, res_addr, ir_CONST_LONG(0x00200000)); + jit_set_Z_W2(jit, res_addr, ir_CONST_U32(0xc1e00000)); +#else + jit_set_Z_LVAL(jit, res_addr, ir_CONST_LONG(0xc3e0000000000000)); +#endif + jit_set_Z_TYPE_INFO(jit, res_addr, IS_DOUBLE); + } + } + if (opline->result_type != IS_UNUSED + && (opline->opcode == ZEND_PRE_INC_OBJ || opline->opcode == ZEND_PRE_DEC_OBJ) + && prop_info + && !ZEND_TYPE_IS_SET(prop_info->type) + && (res_info & MAY_BE_GUARD) + && (res_info & MAY_BE_LONG)) { + zend_jit_trace_stack *stack = JIT_G(current_frame)->stack; + uint32_t old_res_info = STACK_INFO(stack, EX_VAR_TO_NUM(opline->result.var)); + int32_t exit_point; + const void *exit_addr; + + SET_STACK_TYPE(stack, EX_VAR_TO_NUM(opline->result.var), IS_DOUBLE, 0); + exit_point = zend_jit_trace_get_exit_point(opline + 1, 0); + exit_addr = zend_jit_trace_get_exit_addr(exit_point); + if (!exit_addr) { + return 0; + } + SET_STACK_INFO(stack, EX_VAR_TO_NUM(opline->result.var), old_res_info); + ssa->var_info[ssa_op->result_def].type = res_info & ~MAY_BE_GUARD; + jit_SIDE_EXIT(jit, ir_CONST_ADDR(exit_addr)); + } else { + ir_END_list(end_inputs); + } + } + } + + if (slow_inputs) { + const void *func; + + ir_MERGE_list(slow_inputs); + + // JIT: zend_jit_pre_inc_obj_helper(zobj, name, CACHE_ADDR(opline->extended_value), result); + switch (opline->opcode) { + case ZEND_PRE_INC_OBJ: + func = zend_jit_pre_inc_obj_helper; + break; + case ZEND_PRE_DEC_OBJ: + func = zend_jit_pre_dec_obj_helper; + break; + case ZEND_POST_INC_OBJ: + func = zend_jit_post_inc_obj_helper; + break; + case ZEND_POST_DEC_OBJ: + func = zend_jit_post_dec_obj_helper; + break; + default: + ZEND_UNREACHABLE(); + } + + may_throw = 1; + jit_SET_EX_OPLINE(jit, opline); + ir_ref run_time_cache = ir_LOAD_A(jit_EX(run_time_cache)); + ir_CALL_4(IR_VOID, ir_CONST_FC_FUNC(func), + obj_ref, + ir_CONST_ADDR(name), + ir_ADD_OFFSET(run_time_cache, opline->extended_value & ~ZEND_FETCH_OBJ_FLAGS), + (opline->result_type == IS_UNUSED) ? IR_NULL : jit_ZVAL_ADDR(jit, res_addr)); + + ir_END_list(end_inputs); + } + + if (end_inputs) { + ir_MERGE_list(end_inputs); + } + + if ((opline->op1_type & (IS_VAR|IS_TMP_VAR)) && !delayed_fetch_this && !op1_indirect) { + if ((op1_info & MAY_HAVE_DTOR) && (op1_info & MAY_BE_RC1)) { + may_throw = 1; + } + jit_FREE_OP(jit, opline->op1_type, opline->op1, op1_info, opline); + } + + if (may_throw) { + zend_jit_check_exception(jit); + } + + return 1; +} + +static int zend_jit_switch(zend_jit_ctx *jit, const zend_op *opline, const zend_op_array *op_array, zend_ssa *ssa, zend_jit_trace_rec *trace, zend_jit_trace_info *trace_info) +{ + HashTable *jumptable = Z_ARRVAL_P(RT_CONSTANT(opline, opline->op2)); + const zend_op *next_opline = NULL; + ir_refs *slow_inputs; + + ir_refs_init(slow_inputs, 8); + + if (trace) { + ZEND_ASSERT(trace->op == ZEND_JIT_TRACE_VM || trace->op == ZEND_JIT_TRACE_END); + ZEND_ASSERT(trace->opline != NULL); + next_opline = trace->opline; + } + + if (opline->op1_type == IS_CONST) { + zval *zv = RT_CONSTANT(opline, opline->op1); + zval *jump_zv = NULL; + int b; + + if (opline->opcode == ZEND_SWITCH_LONG) { + if (Z_TYPE_P(zv) == IS_LONG) { + jump_zv = zend_hash_index_find(jumptable, Z_LVAL_P(zv)); + } + } else if (opline->opcode == ZEND_SWITCH_STRING) { + if (Z_TYPE_P(zv) == IS_STRING) { + jump_zv = zend_hash_find_known_hash(jumptable, Z_STR_P(zv)); + } + } else if (opline->opcode == ZEND_MATCH) { + if (Z_TYPE_P(zv) == IS_LONG) { + jump_zv = zend_hash_index_find(jumptable, Z_LVAL_P(zv)); + } else if (Z_TYPE_P(zv) == IS_STRING) { + jump_zv = zend_hash_find_known_hash(jumptable, Z_STR_P(zv)); + } + } else { + ZEND_UNREACHABLE(); + } + if (next_opline) { + const zend_op *target; + + if (jump_zv != NULL) { + target = ZEND_OFFSET_TO_OPLINE(opline, Z_LVAL_P(jump_zv)); + } else { + target = ZEND_OFFSET_TO_OPLINE(opline, opline->extended_value); + } + ZEND_ASSERT(target == next_opline); + } else { + if (jump_zv != NULL) { + b = ssa->cfg.map[ZEND_OFFSET_TO_OPLINE(opline, Z_LVAL_P(jump_zv)) - op_array->opcodes]; + } else { + b = ssa->cfg.map[ZEND_OFFSET_TO_OPLINE(opline, opline->extended_value) - op_array->opcodes]; + } + _zend_jit_add_predecessor_ref(jit, b, jit->b, ir_END()); + jit->b = -1; + } + } else { + zend_ssa_op *ssa_op = &ssa->ops[opline - op_array->opcodes]; + uint32_t op1_info = OP1_INFO(); + zend_jit_addr op1_addr = OP1_ADDR(); + const zend_op *default_opline = ZEND_OFFSET_TO_OPLINE(opline, opline->extended_value); + const zend_op *target; + int default_b = next_opline ? -1 : ssa->cfg.map[default_opline - op_array->opcodes]; + int b; + int32_t exit_point; + const void *exit_addr; + const void *fallback_label = NULL; + const void *default_label = NULL; + zval *zv; + + if (next_opline) { + if (opline->opcode != ZEND_MATCH && next_opline != opline + 1) { + exit_point = zend_jit_trace_get_exit_point(opline + 1, 0); + fallback_label = zend_jit_trace_get_exit_addr(exit_point); + if (!fallback_label) { + return 0; + } + } + if (next_opline != default_opline) { + exit_point = zend_jit_trace_get_exit_point(default_opline, 0); + default_label = zend_jit_trace_get_exit_addr(exit_point); + if (!default_label) { + return 0; + } + } + } + + if (opline->opcode == ZEND_SWITCH_LONG) { + if (op1_info & MAY_BE_LONG) { + if (op1_info & MAY_BE_REF) { + ir_ref ref, if_long, fast_path, ref2; + + ref = jit_ZVAL_ADDR(jit, op1_addr); + if_long = jit_if_Z_TYPE(jit, op1_addr, IS_LONG); + ir_IF_TRUE(if_long); + fast_path = ir_END(); + ir_IF_FALSE_cold(if_long); + + // JIT: ZVAL_DEREF(op) + if (fallback_label) { + jit_guard_Z_TYPE(jit, op1_addr, IS_REFERENCE, fallback_label); + } else { + ir_ref if_ref = jit_if_Z_TYPE(jit, op1_addr, IS_REFERENCE); + ir_IF_FALSE_cold(if_ref); + ir_refs_add(slow_inputs, ir_END()); + ir_IF_TRUE(if_ref); + } + + ref2 = ir_ADD_OFFSET(jit_Z_PTR(jit, op1_addr), offsetof(zend_reference, val)); + op1_addr = ZEND_ADDR_REF_ZVAL(ref2); + + if (fallback_label) { + jit_guard_Z_TYPE(jit, op1_addr, IS_LONG, fallback_label); + } else { + if_long = jit_if_Z_TYPE(jit, op1_addr, IS_LONG); + ir_IF_FALSE_cold(if_long); + ir_refs_add(slow_inputs, ir_END()); + ir_IF_TRUE(if_long); + } + + ir_MERGE_2(fast_path, ir_END()); + ref = ir_PHI_2(IR_ADDR, ref, ref2); + op1_addr = ZEND_ADDR_REF_ZVAL(ref); + } else if (op1_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-MAY_BE_LONG)) { + if (fallback_label) { + jit_guard_Z_TYPE(jit, op1_addr, IS_LONG, fallback_label); + } else { + ir_ref if_long = jit_if_Z_TYPE(jit, op1_addr, IS_LONG); + ir_IF_FALSE_cold(if_long); + ir_refs_add(slow_inputs, ir_END()); + ir_IF_TRUE(if_long); + } + } + ir_ref ref = jit_Z_LVAL(jit, op1_addr); + + if (!HT_IS_PACKED(jumptable)) { + ref = ir_CALL_2(IR_LONG, ir_CONST_FC_FUNC(zend_hash_index_find), + ir_CONST_ADDR(jumptable), ref); + ref = ir_SUB_L(ref, ir_CONST_LONG((uintptr_t)jumptable->arData)); + ref = ir_DIV_L(ref, ir_CONST_LONG(sizeof(Bucket))); + } + ref = ir_SWITCH(ref); + + if (next_opline) { + ir_ref continue_list = IR_UNUSED; + + ZEND_HASH_FOREACH_VAL(jumptable, zv) { + ir_ref idx; + target = ZEND_OFFSET_TO_OPLINE(opline, Z_LVAL_P(zv)); + + if (HT_IS_PACKED(jumptable)) { + idx = ir_CONST_LONG(zv - jumptable->arPacked); + } else { + idx = ir_CONST_LONG((Bucket*)zv - jumptable->arData); + } + ir_CASE_VAL(ref, idx); + if (target == next_opline) { + ir_END_list(continue_list); + } else { + exit_point = zend_jit_trace_get_exit_point(target, 0); + exit_addr = zend_jit_trace_get_exit_addr(exit_point); + if (!exit_addr) { + return 0; + } + jit_SIDE_EXIT(jit, ir_CONST_ADDR(exit_addr)); + } + } ZEND_HASH_FOREACH_END(); + + ir_CASE_DEFAULT(ref); + if (next_opline == default_opline) { + ir_END_list(continue_list); + } else { + jit_SIDE_EXIT(jit, ir_CONST_ADDR(default_label)); + } + if (continue_list) { + ir_MERGE_list(continue_list); + } else { + ZEND_ASSERT(slow_inputs->count); + ir_MERGE_N(slow_inputs->count, slow_inputs->refs); + } + } else { + ZEND_HASH_FOREACH_VAL(jumptable, zv) { + target = ZEND_OFFSET_TO_OPLINE(opline, Z_LVAL_P(zv)); + b = ssa->cfg.map[target - op_array->opcodes]; + _zend_jit_add_predecessor_ref(jit, b, jit->b, ref); + } ZEND_HASH_FOREACH_END(); + + _zend_jit_add_predecessor_ref(jit, default_b, jit->b, ref); + if (slow_inputs->count) { + ir_MERGE_N(slow_inputs->count, slow_inputs->refs); + _zend_jit_add_predecessor_ref(jit, jit->b + 1, jit->b, ir_END()); + } + jit->b = -1; + } + } else { + ZEND_ASSERT(!next_opline); + _zend_jit_add_predecessor_ref(jit, jit->b + 1, jit->b, ir_END()); + jit->b = -1; + } + } else if (opline->opcode == ZEND_SWITCH_STRING) { + if (op1_info & MAY_BE_STRING) { + if (op1_info & MAY_BE_REF) { + ir_ref ref, if_string, fast_path, ref2; + + ref = jit_ZVAL_ADDR(jit, op1_addr); + if_string = jit_if_Z_TYPE(jit, op1_addr, IS_STRING); + ir_IF_TRUE(if_string); + fast_path = ir_END(); + ir_IF_FALSE_cold(if_string); + + // JIT: ZVAL_DEREF(op) + if (fallback_label) { + jit_guard_Z_TYPE(jit, op1_addr, IS_REFERENCE, fallback_label); + } else { + ir_ref if_ref = jit_if_Z_TYPE(jit, op1_addr, IS_STRING); + ir_IF_FALSE_cold(if_ref); + ir_refs_add(slow_inputs, ir_END()); + ir_IF_TRUE(if_ref); + } + + ref2 = ir_ADD_OFFSET(jit_Z_PTR(jit, op1_addr), offsetof(zend_reference, val)); + op1_addr = ZEND_ADDR_REF_ZVAL(ref2); + + if (fallback_label) { + jit_guard_Z_TYPE(jit, op1_addr, IS_LONG, fallback_label); + } else { + if_string = jit_if_Z_TYPE(jit, op1_addr, IS_STRING); + ir_IF_FALSE_cold(if_string); + ir_refs_add(slow_inputs, ir_END()); + ir_IF_TRUE(if_string); + } + + ir_MERGE_2(fast_path, ir_END()); + ref = ir_PHI_2(IR_ADDR, ref, ref2); + op1_addr = ZEND_ADDR_REF_ZVAL(ref); + } else if (op1_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-MAY_BE_STRING)) { + if (fallback_label) { + jit_guard_Z_TYPE(jit, op1_addr, IS_STRING, fallback_label); + } else { + ir_ref if_string = jit_if_Z_TYPE(jit, op1_addr, IS_STRING); + ir_IF_FALSE_cold(if_string); + ir_refs_add(slow_inputs, ir_END()); + ir_IF_TRUE(if_string); + } + } + + ir_ref ref = jit_Z_PTR(jit, op1_addr); + ref = ir_CALL_2(IR_LONG, ir_CONST_FC_FUNC(zend_hash_find), + ir_CONST_ADDR(jumptable), ref); + ref = ir_SUB_L(ref, ir_CONST_LONG((uintptr_t)jumptable->arData)); + ref = ir_DIV_L(ref, ir_CONST_LONG(sizeof(Bucket))); + ref = ir_SWITCH(ref); + + if (next_opline) { + ir_ref continue_list = IR_UNUSED; + + ZEND_HASH_FOREACH_VAL(jumptable, zv) { + ir_ref idx; + target = ZEND_OFFSET_TO_OPLINE(opline, Z_LVAL_P(zv)); + + if (HT_IS_PACKED(jumptable)) { + idx = ir_CONST_LONG(zv - jumptable->arPacked); + } else { + idx = ir_CONST_LONG((Bucket*)zv - jumptable->arData); + } + ir_CASE_VAL(ref, idx); + if (target == next_opline) { + ir_END_list(continue_list); + } else { + exit_point = zend_jit_trace_get_exit_point(target, 0); + exit_addr = zend_jit_trace_get_exit_addr(exit_point); + if (!exit_addr) { + return 0; + } + jit_SIDE_EXIT(jit, ir_CONST_ADDR(exit_addr)); + } + } ZEND_HASH_FOREACH_END(); + + ir_CASE_DEFAULT(ref); + if (next_opline == default_opline) { + ir_END_list(continue_list); + } else { + jit_SIDE_EXIT(jit, ir_CONST_ADDR(default_label)); + } + if (continue_list) { + ir_MERGE_list(continue_list); + } else { + ZEND_ASSERT(slow_inputs->count); + ir_MERGE_N(slow_inputs->count, slow_inputs->refs); + } + } else { + ZEND_HASH_FOREACH_VAL(jumptable, zv) { + target = ZEND_OFFSET_TO_OPLINE(opline, Z_LVAL_P(zv)); + b = ssa->cfg.map[target - op_array->opcodes]; + _zend_jit_add_predecessor_ref(jit, b, jit->b, ref); + } ZEND_HASH_FOREACH_END(); + _zend_jit_add_predecessor_ref(jit, default_b, jit->b, ref); + if (slow_inputs->count) { + ir_MERGE_N(slow_inputs->count, slow_inputs->refs); + _zend_jit_add_predecessor_ref(jit, jit->b + 1, jit->b, ir_END()); + } + jit->b = -1; + } + } else { + ZEND_ASSERT(!next_opline); + _zend_jit_add_predecessor_ref(jit, jit->b + 1, jit->b, ir_END()); + jit->b = -1; + } + } else if (opline->opcode == ZEND_MATCH) { + ir_ref if_type = IR_UNUSED, default_input_list = IR_UNUSED, ref = IR_UNUSED; + ir_ref continue_list = IR_UNUSED; + + if (op1_info & (MAY_BE_LONG|MAY_BE_STRING)) { + ir_ref long_path = IR_UNUSED; + + if (op1_info & MAY_BE_REF) { + op1_addr = jit_ZVAL_DEREF(jit, op1_addr); + } + if (op1_info & MAY_BE_LONG) { + if (op1_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-MAY_BE_LONG)) { + if (op1_info & (MAY_BE_STRING|MAY_BE_UNDEF)) { + if_type = jit_if_Z_TYPE(jit, op1_addr, IS_LONG); + ir_IF_TRUE(if_type); + } else if (default_label) { + jit_guard_Z_TYPE(jit, op1_addr, IS_LONG, default_label); + } else if (next_opline) { + ir_ref if_type = jit_if_Z_TYPE(jit, op1_addr, IS_LONG); + ir_IF_FALSE(if_type); + ir_END_list(continue_list); + ir_IF_TRUE(if_type); + } else { + ir_ref if_type = jit_if_Z_TYPE(jit, op1_addr, IS_LONG); + ir_IF_FALSE(if_type); + ir_END_list(default_input_list); + ir_IF_TRUE(if_type); + } + } + ref = jit_Z_LVAL(jit, op1_addr); + ref = ir_CALL_2(IR_LONG, ir_CONST_FC_FUNC(zend_hash_index_find), + ir_CONST_ADDR(jumptable), ref); + if (op1_info & MAY_BE_STRING) { + long_path = ir_END(); + } + } + if (op1_info & MAY_BE_STRING) { + if (if_type) { + ir_IF_FALSE(if_type); + if_type = IS_UNUSED; + } + if (op1_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-(MAY_BE_LONG|MAY_BE_STRING))) { + if (op1_info & MAY_BE_UNDEF) { + if_type = jit_if_Z_TYPE(jit, op1_addr, IS_STRING); + ir_IF_TRUE(if_type); + } else if (default_label) { + jit_guard_Z_TYPE(jit, op1_addr, IS_STRING, default_label); + } else if (next_opline) { + ir_ref if_type = jit_if_Z_TYPE(jit, op1_addr, IS_STRING); + ir_IF_FALSE(if_type); + ir_END_list(continue_list); + ir_IF_TRUE(if_type); + } else { + ir_ref if_type = jit_if_Z_TYPE(jit, op1_addr, IS_STRING); + ir_IF_FALSE(if_type); + ir_END_list(default_input_list); + ir_IF_TRUE(if_type); + } + } + ir_ref ref2 = jit_Z_PTR(jit, op1_addr); + ref2 = ir_CALL_2(IR_LONG, ir_CONST_FC_FUNC(zend_hash_find), + ir_CONST_ADDR(jumptable), ref2); + if (op1_info & MAY_BE_LONG) { + ir_MERGE_WITH(long_path); + ref = ir_PHI_2(IR_LONG, ref2, ref); + } else { + ref = ref2; + } + } + + ref = ir_SUB_L(ref, ir_CONST_LONG((uintptr_t)jumptable->arData)); + ref = ir_DIV_L(ref, ir_CONST_LONG(HT_IS_PACKED(jumptable) ? sizeof(zval) : sizeof(Bucket))); + ref = ir_SWITCH(ref); + + if (next_opline) { + ZEND_HASH_FOREACH_VAL(jumptable, zv) { + ir_ref idx; + target = ZEND_OFFSET_TO_OPLINE(opline, Z_LVAL_P(zv)); + + if (HT_IS_PACKED(jumptable)) { + idx = ir_CONST_LONG(zv - jumptable->arPacked); + } else { + idx = ir_CONST_LONG((Bucket*)zv - jumptable->arData); + } + ir_CASE_VAL(ref, idx); + if (target == next_opline) { + ir_END_list(continue_list); + } else { + exit_point = zend_jit_trace_get_exit_point(target, 0); + exit_addr = zend_jit_trace_get_exit_addr(exit_point); + if (!exit_addr) { + return 0; + } + jit_SIDE_EXIT(jit, ir_CONST_ADDR(exit_addr)); + } + } ZEND_HASH_FOREACH_END(); + + ir_CASE_DEFAULT(ref); + if (next_opline == default_opline) { + ir_END_list(continue_list); + } else { + jit_SIDE_EXIT(jit, ir_CONST_ADDR(default_label)); + } + } else { + ZEND_HASH_FOREACH_VAL(jumptable, zv) { + target = ZEND_OFFSET_TO_OPLINE(opline, Z_LVAL_P(zv)); + b = ssa->cfg.map[target - op_array->opcodes]; + _zend_jit_add_predecessor_ref(jit, b, jit->b, ref); + } ZEND_HASH_FOREACH_END(); + _zend_jit_add_predecessor_ref(jit, default_b, jit->b, ref); + } + } + + if (op1_info & MAY_BE_UNDEF) { + if (if_type) { + ir_IF_FALSE(if_type); + if_type = IS_UNUSED; + } + if (op1_info & (MAY_BE_ANY-(MAY_BE_LONG|MAY_BE_STRING))) { + if (default_label) { + jit_guard_Z_TYPE(jit, op1_addr, IS_UNDEF, default_label); + } else if (next_opline) { + ir_ref if_def = ir_IF(jit_Z_TYPE(jit, op1_addr)); + ir_IF_TRUE(if_def); + ir_END_list(continue_list); + ir_IF_FALSE_cold(if_def); + } else { + ir_ref if_def = ir_IF(jit_Z_TYPE(jit, op1_addr)); + ir_IF_TRUE(if_def); + ir_END_list(default_input_list); + ir_IF_FALSE_cold(if_def); + } + } + + jit_SET_EX_OPLINE(jit, opline); + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_jit_undefined_op_helper), + ir_CONST_U32(opline->op1.var)); + zend_jit_check_exception_undef_result(jit, opline); + if (default_label) { + jit_SIDE_EXIT(jit, ir_CONST_ADDR(default_label)); + } else if (next_opline) { + ir_END_list(continue_list); + } else { + ir_END_list(default_input_list); + } + } + if (next_opline) { + ZEND_ASSERT(continue_list); + ir_MERGE_list(continue_list); + } else { + if (default_input_list) { + ZEND_ASSERT(jit->ctx.ir_base[ref].op == IR_SWITCH); + ZEND_ASSERT(jit->ctx.ir_base[ref].op3 == IR_UNUSED); + jit->ctx.ir_base[ref].op3 = default_input_list; + } + jit->b = -1; + } + } else { + ZEND_UNREACHABLE(); + } + } + return 1; +} + +static int zend_jit_start(zend_jit_ctx *jit, const zend_op_array *op_array, zend_ssa *ssa) +{ + int i, count; + zend_basic_block *bb; + + zend_jit_init_ctx(jit, (zend_jit_vm_kind == ZEND_VM_KIND_CALL) ? 0 : (IR_START_BR_TARGET|IR_ENTRY_BR_TARGET)); + + jit->ctx.spill_base = ZREG_FP; + + jit->op_array = jit->current_op_array = op_array; + jit->ssa = ssa; + jit->bb_start_ref = zend_arena_calloc(&CG(arena), ssa->cfg.blocks_count * 2, sizeof(ir_ref)); + jit->bb_predecessors = jit->bb_start_ref + ssa->cfg.blocks_count; + + count = 0; + for (i = 0, bb = ssa->cfg.blocks; i < ssa->cfg.blocks_count; i++, bb++) { + jit->bb_predecessors[i] = count; + count += bb->predecessors_count; + } + jit->bb_edges = zend_arena_calloc(&CG(arena), count, sizeof(ir_ref)); + + if (!GCC_GLOBAL_REGS) { + ir_ref ref = ir_PARAM(IR_ADDR, "execute_data", 1); + jit_STORE_FP(jit, ref); + jit->ctx.flags |= IR_FASTCALL_FUNC; + } + + return 1; +} + +static void *zend_jit_finish(zend_jit_ctx *jit) +{ + void *entry; + size_t size; + zend_string *str = NULL; + + if (JIT_G(debug) & (ZEND_JIT_DEBUG_ASM|ZEND_JIT_DEBUG_GDB|ZEND_JIT_DEBUG_PERF|ZEND_JIT_DEBUG_PERF_DUMP| + ZEND_JIT_DEBUG_IR_SRC|ZEND_JIT_DEBUG_IR_AFTER_SCCP|ZEND_JIT_DEBUG_IR_AFTER_SCCP| + ZEND_JIT_DEBUG_IR_AFTER_SCHEDULE|ZEND_JIT_DEBUG_IR_AFTER_REGS|ZEND_JIT_DEBUG_IR_FINAL|ZEND_JIT_DEBUG_IR_CODEGEN)) { + if (jit->name) { + str = zend_string_copy(jit->name); + } else { + str = zend_jit_func_name(jit->op_array); + } + } + + if (jit->op_array) { + /* Only for function JIT */ + _zend_jit_fix_merges(jit); +#if defined(IR_TARGET_AARCH64) + } else if (jit->trace) { + jit->ctx.deoptimization_exits = jit->trace->exit_count; + jit->ctx.get_exit_addr = zend_jit_trace_get_exit_addr; +#endif + } + + entry = zend_jit_ir_compile(&jit->ctx, &size, str ? ZSTR_VAL(str) : NULL); + if (entry) { + if (JIT_G(debug) & (ZEND_JIT_DEBUG_ASM|ZEND_JIT_DEBUG_GDB|ZEND_JIT_DEBUG_PERF|ZEND_JIT_DEBUG_PERF_DUMP)) { +#if HAVE_CAPSTONE + if (JIT_G(debug) & ZEND_JIT_DEBUG_ASM) { + if (str) { + ir_disasm_add_symbol(ZSTR_VAL(str), (uintptr_t)entry, size); + } + ir_disasm(str ? ZSTR_VAL(str) : "unknown", + entry, size, + (JIT_G(debug) & ZEND_JIT_DEBUG_ASM_ADDR) != 0, + &jit->ctx, stderr); + } +#endif +#ifndef _WIN32 + if (str) { + if (JIT_G(debug) & ZEND_JIT_DEBUG_GDB) { + uintptr_t sp_offset = 0; + +// ir_mem_unprotect(entry, size); + if (!(jit->ctx.flags & IR_FUNCTION) + && zend_jit_vm_kind == ZEND_VM_KIND_HYBRID) { +#if !ZEND_WIN32 && !defined(IR_TARGET_AARCH64) + sp_offset = zend_jit_hybrid_vm_sp_adj; +#else + sp_offset = sizeof(void*); +#endif + } else { + sp_offset = sizeof(void*); + } + ir_gdb_register(ZSTR_VAL(str), entry, size, sp_offset, 0); +// ir_mem_protect(entry, size); + } + + if (JIT_G(debug) & (ZEND_JIT_DEBUG_PERF|ZEND_JIT_DEBUG_PERF_DUMP)) { + ir_perf_map_register(ZSTR_VAL(str), entry, size); + if (JIT_G(debug) & ZEND_JIT_DEBUG_PERF_DUMP) { + ir_perf_jitdump_register(ZSTR_VAL(str), entry, size); + } + } + } +#endif + } + + if (jit->op_array) { + /* Only for function JIT */ + const zend_op_array *op_array = jit->op_array; + zend_op *opline = (zend_op*)op_array->opcodes; + + if (!(op_array->fn_flags & ZEND_ACC_HAS_TYPE_HINTS)) { + while (opline->opcode == ZEND_RECV) { + opline++; + } + } + opline->handler = entry; + + if (jit->ctx.entries_count) { + /* For all entries */ + int i = jit->ctx.entries_count; + do { + ir_insn *insn = &jit->ctx.ir_base[jit->ctx.entries[--i]]; + op_array->opcodes[insn->op2].handler = (char*)entry + insn->op3; + } while (i != 0); + } + } else { + /* Only for tracing JIT */ + zend_jit_trace_info *t = jit->trace; + zend_jit_trace_stack *stack; + uint32_t i; + + if (t) { + for (i = 0; i < t->stack_map_size; i++) { + stack = t->stack_map + i; + if (stack->flags & ZREG_SPILL_SLOT) { + stack->reg = (jit->ctx.flags & IR_USE_FRAME_POINTER) ? IR_REG_FP : IR_REG_SP; + stack->ref = ir_get_spill_slot_offset(&jit->ctx, stack->ref); + } + } + } + + zend_jit_trace_add_code(entry, size); + +#if ZEND_JIT_SUPPORT_CLDEMOTE + if (cpu_support_cldemote) { + shared_cacheline_demote((uintptr_t)entry, size); + } +#endif + } + } + + if (str) { + zend_string_release(str); + } + + return entry; +} + +static const void *zend_jit_trace_allocate_exit_group(uint32_t n) +{ + const void *entry; + size_t size; + + entry = ir_emit_exitgroup(n, ZEND_JIT_EXIT_POINTS_PER_GROUP, zend_jit_stub_handlers[jit_stub_trace_exit], + *dasm_ptr, (char*)dasm_end - (char*)*dasm_ptr, &size); + + if (entry) { + *dasm_ptr = (char*)entry + ZEND_MM_ALIGNED_SIZE_EX(size, 16); +#ifdef HAVE_CAPSTONE + if (JIT_G(debug) & ZEND_JIT_DEBUG_ASM) { + uint32_t i; + char name[32]; + + for (i = 0; i < ZEND_JIT_EXIT_POINTS_PER_GROUP; i++) { + sprintf(name, "jit$$trace_exit_%d", n + i); + ir_disasm_add_symbol(name, (uintptr_t)entry + (i * ZEND_JIT_EXIT_POINTS_SPACING), ZEND_JIT_EXIT_POINTS_SPACING); + } + } +#endif + } + + return entry; +} + +static int zend_jit_type_guard(zend_jit_ctx *jit, const zend_op *opline, uint32_t var, uint8_t type) +{ + int32_t exit_point = zend_jit_trace_get_exit_point(opline, 0); + const void *exit_addr = zend_jit_trace_get_exit_addr(exit_point); + zend_jit_addr addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, var); + + if (!exit_addr) { + return 0; + } + ir_GUARD(ir_EQ(jit_Z_TYPE(jit, addr), ir_CONST_U8(type)), ir_CONST_ADDR(exit_addr)); + + return 1; +} + +static int zend_jit_scalar_type_guard(zend_jit_ctx *jit, const zend_op *opline, uint32_t var) +{ + int32_t exit_point = zend_jit_trace_get_exit_point(opline, 0); + const void *exit_addr = zend_jit_trace_get_exit_addr(exit_point); + zend_jit_addr addr = ZEND_ADDR_MEM_ZVAL(ZREG_FP, var); + + if (!exit_addr) { + return 0; + } + ir_GUARD(ir_LT(jit_Z_TYPE(jit, addr), ir_CONST_U8(IS_STRING)), ir_CONST_ADDR(exit_addr)); + + return 1; +} + +static bool zend_jit_noref_guard(zend_jit_ctx *jit, const zend_op *opline, zend_jit_addr var_addr) +{ + uint32_t exit_point = zend_jit_trace_get_exit_point(opline, 0); + const void *exit_addr = zend_jit_trace_get_exit_addr(exit_point); + + if (!exit_addr) { + return 0; + } + ir_GUARD(ir_NE(jit_Z_TYPE(jit, var_addr), ir_CONST_U8(IS_REFERENCE)), ir_CONST_ADDR(exit_addr)); + + return 1; +} + +static int zend_jit_trace_opline_guard(zend_jit_ctx *jit, const zend_op *opline) +{ + uint32_t exit_point = zend_jit_trace_get_exit_point(NULL, 0); + const void *exit_addr = zend_jit_trace_get_exit_addr(exit_point); + + if (!exit_addr) { + return 0; + } + + ir_GUARD(jit_CMP_IP(jit, IR_EQ, opline), ir_CONST_ADDR(exit_addr)); + zend_jit_set_last_valid_opline(jit, opline); + + return 1; +} + +static bool zend_jit_guard_reference(zend_jit_ctx *jit, + const zend_op *opline, + zend_jit_addr *var_addr_ptr, + zend_jit_addr *ref_addr_ptr, + bool add_ref_guard) +{ + zend_jit_addr var_addr = *var_addr_ptr; + const void *exit_addr = NULL; + ir_ref ref; + + if (add_ref_guard) { + int32_t exit_point = zend_jit_trace_get_exit_point(opline, 0); + + exit_addr = zend_jit_trace_get_exit_addr(exit_point); + if (!exit_addr) { + return 0; + } + + ref = jit_Z_TYPE(jit, var_addr); + ir_GUARD(ir_EQ(ref, ir_CONST_U8(IS_REFERENCE)), ir_CONST_ADDR(exit_addr)); + } + + ref = jit_Z_PTR(jit, var_addr); + *ref_addr_ptr = ZEND_ADDR_REF_ZVAL(ref); + ref = ir_ADD_OFFSET(ref, offsetof(zend_reference, val)); + var_addr = ZEND_ADDR_REF_ZVAL(ref); + *var_addr_ptr = var_addr; + + return 1; +} + +static bool zend_jit_fetch_reference(zend_jit_ctx *jit, + const zend_op *opline, + uint8_t var_type, + uint32_t *var_info_ptr, + zend_jit_addr *var_addr_ptr, + bool add_ref_guard, + bool add_type_guard) +{ + zend_jit_addr var_addr = *var_addr_ptr; + uint32_t var_info = *var_info_ptr; + const void *exit_addr = NULL; + ir_ref ref; + + if (add_ref_guard || add_type_guard) { + int32_t exit_point = zend_jit_trace_get_exit_point(opline, 0); + + exit_addr = zend_jit_trace_get_exit_addr(exit_point); + if (!exit_addr) { + return 0; + } + } + + if (add_ref_guard) { + ref = jit_Z_TYPE(jit, var_addr); + ir_GUARD(ir_EQ(ref, ir_CONST_U8(IS_REFERENCE)), ir_CONST_ADDR(exit_addr)); + } + if (opline->opcode == ZEND_INIT_METHOD_CALL && opline->op1_type == IS_VAR) { + /* Hack: Convert reference to regular value to simplify JIT code for INIT_METHOD_CALL */ + ir_CALL_1(IR_VOID, ir_CONST_FC_FUNC(zend_jit_unref_helper), + jit_ZVAL_ADDR(jit, var_addr)); + *var_addr_ptr = var_addr; + } else { + ref = jit_Z_PTR(jit, var_addr); + ref = ir_ADD_OFFSET(ref, offsetof(zend_reference, val)); + var_addr = ZEND_ADDR_REF_ZVAL(ref); + *var_addr_ptr = var_addr; + } + + if (var_type != IS_UNKNOWN) { + var_type &= ~(IS_TRACE_REFERENCE|IS_TRACE_INDIRECT|IS_TRACE_PACKED); + } + if (add_type_guard + && var_type != IS_UNKNOWN + && (var_info & (MAY_BE_ANY|MAY_BE_UNDEF)) != (1 << var_type)) { + ref = jit_Z_TYPE(jit, var_addr); + ir_GUARD(ir_EQ(ref, ir_CONST_U8(var_type)), ir_CONST_ADDR(exit_addr)); + + ZEND_ASSERT(var_info & (1 << var_type)); + if (var_type < IS_STRING) { + var_info = (1 << var_type); + } else if (var_type != IS_ARRAY) { + var_info = (1 << var_type) | (var_info & (MAY_BE_RC1|MAY_BE_RCN)); + } else { + var_info = MAY_BE_ARRAY | (var_info & (MAY_BE_ARRAY_OF_ANY|MAY_BE_ARRAY_OF_REF|MAY_BE_ARRAY_KEY_ANY|MAY_BE_RC1|MAY_BE_RCN)); + } + + *var_info_ptr = var_info; + } else { + var_info &= ~MAY_BE_REF; + *var_info_ptr = var_info; + } + *var_info_ptr |= MAY_BE_GUARD; /* prevent generation of specialized zval dtor */ + + return 1; +} + +static bool zend_jit_fetch_indirect_var(zend_jit_ctx *jit, const zend_op *opline, uint8_t var_type, uint32_t *var_info_ptr, zend_jit_addr *var_addr_ptr, bool add_indirect_guard) +{ + zend_jit_addr var_addr = *var_addr_ptr; + uint32_t var_info = *var_info_ptr; + int32_t exit_point; + const void *exit_addr; + ir_ref ref = IR_UNUSED; + + if (add_indirect_guard) { + int32_t exit_point = zend_jit_trace_get_exit_point(opline, 0); + const void *exit_addr = zend_jit_trace_get_exit_addr(exit_point); + + if (!exit_addr) { + return 0; + } + jit_guard_Z_TYPE(jit, var_addr, IS_INDIRECT, exit_addr); + ref = jit_Z_PTR(jit, var_addr); + } else { + /* This LOAD of INDIRECT VAR, stored by the previous FETCH_(DIM/OBJ)_W, + * is eliminated by store forwarding (S2L) */ + ref = jit_Z_PTR(jit, var_addr); + } + *var_info_ptr &= ~MAY_BE_INDIRECT; + var_addr = ZEND_ADDR_REF_ZVAL(ref); + *var_addr_ptr = var_addr; + + if (var_type != IS_UNKNOWN) { + var_type &= ~(IS_TRACE_INDIRECT|IS_TRACE_PACKED); + } + if (!(var_type & IS_TRACE_REFERENCE) + && var_type != IS_UNKNOWN + && (var_info & (MAY_BE_ANY|MAY_BE_UNDEF)) != (1 << var_type)) { + exit_point = zend_jit_trace_get_exit_point(opline, 0); + exit_addr = zend_jit_trace_get_exit_addr(exit_point); + + if (!exit_addr) { + return 0; + } + + jit_guard_Z_TYPE(jit, var_addr, var_type, exit_addr); + + //var_info = zend_jit_trace_type_to_info_ex(var_type, var_info); + ZEND_ASSERT(var_info & (1 << var_type)); + if (var_type < IS_STRING) { + var_info = (1 << var_type); + } else if (var_type != IS_ARRAY) { + var_info = (1 << var_type) | (var_info & (MAY_BE_RC1|MAY_BE_RCN)); + } else { + var_info = MAY_BE_ARRAY | (var_info & (MAY_BE_ARRAY_OF_ANY|MAY_BE_ARRAY_OF_REF|MAY_BE_ARRAY_KEY_ANY|MAY_BE_RC1|MAY_BE_RCN)); + } + + *var_info_ptr = var_info; + } + + return 1; +} + +static int zend_jit_trace_handler(zend_jit_ctx *jit, const zend_op_array *op_array, const zend_op *opline, int may_throw, zend_jit_trace_rec *trace) +{ + zend_jit_op_array_trace_extension *jit_extension = + (zend_jit_op_array_trace_extension*)ZEND_FUNC_INFO(op_array); + size_t offset = jit_extension->offset; + const void *handler = + (zend_vm_opcode_handler_t)ZEND_OP_TRACE_INFO(opline, offset)->call_handler; + ir_ref ref; + + zend_jit_set_ip(jit, opline); + if (GCC_GLOBAL_REGS) { + ir_CALL(IR_VOID, ir_CONST_FUNC(handler)); + } else { + ref = jit_FP(jit); + ref = ir_CALL_1(IR_I32, ir_CONST_FC_FUNC(handler), ref); + } + if (may_throw + && opline->opcode != ZEND_RETURN + && opline->opcode != ZEND_RETURN_BY_REF) { + zend_jit_check_exception(jit); + } + + while (trace->op != ZEND_JIT_TRACE_VM && trace->op != ZEND_JIT_TRACE_END) { + trace++; + } + + if (!GCC_GLOBAL_REGS + && (trace->op != ZEND_JIT_TRACE_END || trace->stop != ZEND_JIT_TRACE_STOP_RETURN)) { + if (opline->opcode == ZEND_RETURN || + opline->opcode == ZEND_RETURN_BY_REF || + opline->opcode == ZEND_DO_UCALL || + opline->opcode == ZEND_DO_FCALL_BY_NAME || + opline->opcode == ZEND_DO_FCALL || + opline->opcode == ZEND_GENERATOR_CREATE) { + + ir_ref addr = jit_EG(current_execute_data); + + jit_STORE_FP(jit, ir_LOAD_A(addr)); + } + } + + if (zend_jit_trace_may_exit(op_array, opline)) { + if (opline->opcode == ZEND_RETURN || + opline->opcode == ZEND_RETURN_BY_REF || + opline->opcode == ZEND_GENERATOR_CREATE) { + + if (zend_jit_vm_kind == ZEND_VM_KIND_HYBRID) { + if (trace->op != ZEND_JIT_TRACE_END || + (trace->stop != ZEND_JIT_TRACE_STOP_RETURN && + trace->stop != ZEND_JIT_TRACE_STOP_INTERPRETER)) { + /* this check may be handled by the following OPLINE guard or jmp [IP] */ + ir_GUARD(ir_NE(jit_IP(jit), ir_CONST_ADDR(zend_jit_halt_op)), + jit_STUB_ADDR(jit, jit_stub_trace_halt)); + } + } else if (GCC_GLOBAL_REGS) { + ir_GUARD(jit_IP(jit), jit_STUB_ADDR(jit, jit_stub_trace_halt)); + } else { + ir_GUARD(ir_GE(ref, ir_CONST_I32(0)), jit_STUB_ADDR(jit, jit_stub_trace_halt)); + } + } else if (opline->opcode == ZEND_EXIT || + opline->opcode == ZEND_GENERATOR_RETURN || + opline->opcode == ZEND_YIELD || + opline->opcode == ZEND_YIELD_FROM) { + ir_IJMP(jit_STUB_ADDR(jit, jit_stub_trace_halt)); + ir_BEGIN(IR_UNUSED); /* unreachable block */ + } + if (trace->op != ZEND_JIT_TRACE_END || + (trace->stop != ZEND_JIT_TRACE_STOP_RETURN && + trace->stop != ZEND_JIT_TRACE_STOP_INTERPRETER)) { + + const zend_op *next_opline = trace->opline; + const zend_op *exit_opline = NULL; + uint32_t exit_point; + const void *exit_addr; + uint32_t old_info = 0; + uint32_t old_res_info = 0; + zend_jit_trace_stack *stack = JIT_G(current_frame)->stack; + + if (zend_is_smart_branch(opline)) { + bool exit_if_true = 0; + exit_opline = zend_jit_trace_get_exit_opline(trace, opline + 1, &exit_if_true); + } else { + switch (opline->opcode) { + case ZEND_JMPZ: + case ZEND_JMPNZ: + case ZEND_JMPZ_EX: + case ZEND_JMPNZ_EX: + case ZEND_JMP_SET: + case ZEND_COALESCE: + case ZEND_JMP_NULL: + case ZEND_FE_RESET_R: + case ZEND_FE_RESET_RW: + exit_opline = (trace->opline == opline + 1) ? + OP_JMP_ADDR(opline, opline->op2) : + opline + 1; + break; + case ZEND_FE_FETCH_R: + case ZEND_FE_FETCH_RW: + exit_opline = (trace->opline == opline + 1) ? + ZEND_OFFSET_TO_OPLINE(opline, opline->extended_value) : + opline + 1; + break; + + } + } + + switch (opline->opcode) { + case ZEND_FE_FETCH_R: + case ZEND_FE_FETCH_RW: + if (opline->op2_type != IS_UNUSED) { + old_info = STACK_INFO(stack, EX_VAR_TO_NUM(opline->op2.var)); + SET_STACK_TYPE(stack, EX_VAR_TO_NUM(opline->op2.var), IS_UNKNOWN, 1); + } + break; + case ZEND_BIND_INIT_STATIC_OR_JMP: + if (opline->op1_type == IS_CV) { + old_info = STACK_INFO(stack, EX_VAR_TO_NUM(opline->op1.var)); + SET_STACK_TYPE(stack, EX_VAR_TO_NUM(opline->op1.var), IS_UNKNOWN, 1); + } + break; + } + if (opline->result_type == IS_VAR || opline->result_type == IS_TMP_VAR) { + old_res_info = STACK_INFO(stack, EX_VAR_TO_NUM(opline->result.var)); + SET_STACK_TYPE(stack, EX_VAR_TO_NUM(opline->result.var), IS_UNKNOWN, 1); + } + exit_point = zend_jit_trace_get_exit_point(exit_opline, 0); + exit_addr = zend_jit_trace_get_exit_addr(exit_point); + + if (opline->result_type == IS_VAR || opline->result_type == IS_TMP_VAR) { + SET_STACK_INFO(stack, EX_VAR_TO_NUM(opline->result.var), old_res_info); + } + switch (opline->opcode) { + case ZEND_FE_FETCH_R: + case ZEND_FE_FETCH_RW: + if (opline->op2_type != IS_UNUSED) { + SET_STACK_INFO(stack, EX_VAR_TO_NUM(opline->op2.var), old_info); + } + break; + case ZEND_BIND_INIT_STATIC_OR_JMP: + if (opline->op1_type == IS_CV) { + SET_STACK_INFO(stack, EX_VAR_TO_NUM(opline->op1.var), old_info); + } + break; + } + + if (!exit_addr) { + return 0; + } + ir_GUARD(jit_CMP_IP(jit, IR_EQ, next_opline), ir_CONST_ADDR(exit_addr)); + } + } + + zend_jit_set_last_valid_opline(jit, trace->opline); + + return 1; +} + +static int zend_jit_deoptimizer_start(zend_jit_ctx *jit, + zend_string *name, + uint32_t trace_num, + uint32_t exit_num) +{ + zend_jit_init_ctx(jit, (zend_jit_vm_kind == ZEND_VM_KIND_CALL) ? 0 : IR_START_BR_TARGET); + + jit->ctx.spill_base = ZREG_FP; + + jit->op_array = NULL; + jit->ssa = NULL; + jit->name = zend_string_copy(name); + + jit->ctx.flags |= IR_SKIP_PROLOGUE; + + return 1; +} + +static int zend_jit_trace_start(zend_jit_ctx *jit, + const zend_op_array *op_array, + zend_ssa *ssa, + zend_string *name, + uint32_t trace_num, + zend_jit_trace_info *parent, + uint32_t exit_num) +{ + zend_jit_init_ctx(jit, (zend_jit_vm_kind == ZEND_VM_KIND_CALL) ? 0 : IR_START_BR_TARGET); + + jit->ctx.spill_base = ZREG_FP; + + jit->op_array = NULL; + jit->current_op_array = op_array; + jit->ssa = ssa; + jit->name = zend_string_copy(name); + + if (!GCC_GLOBAL_REGS) { + if (!parent) { + ir_ref ref = ir_PARAM(IR_ADDR, "execute_data", 1); + jit_STORE_FP(jit, ref); + jit->ctx.flags |= IR_FASTCALL_FUNC; + } + } + + if (parent) { + jit->ctx.flags |= IR_SKIP_PROLOGUE; + } + + if (parent) { + int i; + int parent_vars_count = parent->exit_info[exit_num].stack_size; + zend_jit_trace_stack *parent_stack = + parent->stack_map + + parent->exit_info[exit_num].stack_offset; + + /* prevent clobbering of registers used for deoptimization */ + for (i = 0; i < parent_vars_count; i++) { + if (STACK_FLAGS(parent_stack, i) != ZREG_CONST + && STACK_REG(parent_stack, i) != ZREG_NONE) { + int32_t reg = STACK_REG(parent_stack, i); + ir_type type; + + if (STACK_FLAGS(parent_stack, i) == ZREG_ZVAL_COPY) { + type = IR_ADDR; + } else if (STACK_TYPE(parent_stack, i) == IS_LONG) { + type = IR_LONG; + } else if (STACK_TYPE(parent_stack, i) == IS_DOUBLE) { + type = IR_DOUBLE; + } else { + ZEND_UNREACHABLE(); + } + if (ssa && ssa->vars[i].no_val) { + /* pass */ + } else { + ir_ref ref = ir_RLOAD(type, reg); + + if (STACK_FLAGS(parent_stack, i) & (ZREG_LOAD|ZREG_STORE)) { + /* op3 is used as a flag that the value is already stored in memory. + * In case the IR framework decides to spill the result of IR_LOAD, + * it doesn't have to store the value once again. + * + * See: insn->op3 check in ir_emit_rload() + */ + ir_set_op(&jit->ctx, ref, 3, EX_NUM_TO_VAR(i)); + } + } + } + } + } + + if (parent && parent->exit_info[exit_num].flags & ZEND_JIT_EXIT_METHOD_CALL) { + ZEND_ASSERT(parent->exit_info[exit_num].poly_func_reg >= 0 && parent->exit_info[exit_num].poly_this_reg >= 0); + ir_RLOAD_A(parent->exit_info[exit_num].poly_func_reg); + ir_RLOAD_A(parent->exit_info[exit_num].poly_this_reg); + } + + ir_STORE(jit_EG(jit_trace_num), ir_CONST_U32(trace_num)); + + return 1; +} + +static int zend_jit_trace_begin_loop(zend_jit_ctx *jit) +{ + return ir_LOOP_BEGIN(ir_END()); +} + +static void zend_jit_trace_gen_phi(zend_jit_ctx *jit, zend_ssa_phi *phi) +{ + int dst_var = phi->ssa_var; + int src_var = phi->sources[0]; + ir_ref ref; + + ZEND_ASSERT(!(jit->ra[dst_var].flags & ZREG_LOAD)); + ZEND_ASSERT(jit->ra[src_var].ref != IR_UNUSED && jit->ra[src_var].ref != IR_NULL); + + ref = ir_PHI_2( + (jit->ssa->var_info[src_var].type & MAY_BE_LONG) ? IR_LONG : IR_DOUBLE, + zend_jit_use_reg(jit, ZEND_ADDR_REG(src_var)), IR_UNUSED); + + src_var = phi->sources[1]; + ZEND_ASSERT(jit->ra[src_var].ref == IR_NULL); + jit->ra[src_var].flags |= ZREG_FORWARD; + + zend_jit_def_reg(jit, ZEND_ADDR_REG(dst_var), ref); +} + +static int zend_jit_trace_end_loop(zend_jit_ctx *jit, int loop_ref, const void *timeout_exit_addr) +{ + if (timeout_exit_addr) { + zend_jit_check_timeout(jit, NULL, timeout_exit_addr); + } + ZEND_ASSERT(jit->ctx.ir_base[loop_ref].op2 == IR_UNUSED); + ir_MERGE_SET_OP(loop_ref, 2, ir_LOOP_END()); + return 1; +} + +static int zend_jit_trace_return(zend_jit_ctx *jit, bool original_handler, const zend_op *opline) +{ + if (GCC_GLOBAL_REGS) { + if (!original_handler) { + ir_TAILCALL(IR_VOID, ir_LOAD_A(jit_IP(jit))); + } else { + ir_TAILCALL(IR_VOID, zend_jit_orig_opline_handler(jit)); + } + } else { + if (original_handler) { + ir_ref ref; + ir_ref addr = zend_jit_orig_opline_handler(jit); + +#if defined(IR_TARGET_X86) + addr = ir_CAST_FC_FUNC(addr); +#endif + ref = ir_CALL_1(IR_I32, addr, jit_FP(jit)); + if (opline && + (opline->opcode == ZEND_RETURN + || opline->opcode == ZEND_RETURN_BY_REF + || opline->opcode == ZEND_GENERATOR_RETURN + || opline->opcode == ZEND_GENERATOR_CREATE + || opline->opcode == ZEND_YIELD + || opline->opcode == ZEND_YIELD_FROM)) { + ir_RETURN(ref); + } + } + ir_RETURN(ir_CONST_I32(2)); // ZEND_VM_LEAVE + } + return 1; +} + +static int zend_jit_link_side_trace(const void *code, size_t size, uint32_t jmp_table_size, uint32_t exit_num, const void *addr) +{ + return ir_patch(code, size, jmp_table_size, zend_jit_trace_get_exit_addr(exit_num), addr); +} + +static int zend_jit_trace_link_to_root(zend_jit_ctx *jit, zend_jit_trace_info *t, const void *timeout_exit_addr) +{ + const void *link_addr; + + /* Skip prologue. */ + ZEND_ASSERT(zend_jit_trace_prologue_size != (size_t)-1); + link_addr = (const void*)((const char*)t->code_start + zend_jit_trace_prologue_size); + + if (timeout_exit_addr) { + zend_jit_check_timeout(jit, NULL, timeout_exit_addr); + } + ir_IJMP(ir_CONST_ADDR(link_addr)); + + return 1; +} + +static bool zend_jit_opline_supports_reg(const zend_op_array *op_array, zend_ssa *ssa, const zend_op *opline, const zend_ssa_op *ssa_op, zend_jit_trace_rec *trace) +{ + uint32_t op1_info, op2_info; + + switch (opline->opcode) { + case ZEND_SEND_VAR: + case ZEND_SEND_VAL: + case ZEND_SEND_VAL_EX: + return (opline->op2_type != IS_CONST) && (opline->opcode != ZEND_SEND_VAL_EX || opline->op2.num <= MAX_ARG_FLAG_NUM); + case ZEND_QM_ASSIGN: + case ZEND_IS_SMALLER: + case ZEND_IS_SMALLER_OR_EQUAL: + case ZEND_IS_EQUAL: + case ZEND_IS_NOT_EQUAL: + case ZEND_IS_IDENTICAL: + case ZEND_IS_NOT_IDENTICAL: + case ZEND_CASE: + return 1; + case ZEND_RETURN: + return (op_array->type != ZEND_EVAL_CODE && op_array->function_name); + case ZEND_ASSIGN: + return (opline->op1_type == IS_CV); + case ZEND_ADD: + case ZEND_SUB: + case ZEND_MUL: + op1_info = OP1_INFO(); + op2_info = OP2_INFO(); + return !(op1_info & MAY_BE_UNDEF) + && !(op2_info & MAY_BE_UNDEF) + && (op1_info & (MAY_BE_LONG|MAY_BE_DOUBLE)) + && (op2_info & (MAY_BE_LONG|MAY_BE_DOUBLE)); + case ZEND_BW_OR: + case ZEND_BW_AND: + case ZEND_BW_XOR: + case ZEND_SL: + case ZEND_SR: + case ZEND_MOD: + op1_info = OP1_INFO(); + op2_info = OP2_INFO(); + return (op1_info & MAY_BE_LONG) + && (op2_info & MAY_BE_LONG); + case ZEND_PRE_INC: + case ZEND_PRE_DEC: + case ZEND_POST_INC: + case ZEND_POST_DEC: + op1_info = OP1_INFO(); + return opline->op1_type == IS_CV + && (op1_info & MAY_BE_LONG) + && !(op1_info & MAY_BE_REF); + case ZEND_STRLEN: + op1_info = OP1_INFO(); + return (opline->op1_type & (IS_CV|IS_CONST)) + && (op1_info & (MAY_BE_ANY|MAY_BE_REF|MAY_BE_UNDEF)) == MAY_BE_STRING; + case ZEND_COUNT: + op1_info = OP1_INFO(); + return (opline->op1_type & (IS_CV|IS_CONST)) + && (op1_info & (MAY_BE_ANY|MAY_BE_REF|MAY_BE_UNDEF)) == MAY_BE_ARRAY; + case ZEND_JMPZ: + case ZEND_JMPNZ: + if (JIT_G(trigger) != ZEND_JIT_ON_HOT_TRACE) { + if (!ssa->cfg.map) { + return 0; + } + if (opline > op_array->opcodes + ssa->cfg.blocks[ssa->cfg.map[opline-op_array->opcodes]].start && + ((opline-1)->result_type & (IS_SMART_BRANCH_JMPZ|IS_SMART_BRANCH_JMPNZ)) != 0) { + return 0; + } + } + ZEND_FALLTHROUGH; + case ZEND_BOOL: + case ZEND_BOOL_NOT: + case ZEND_JMPZ_EX: + case ZEND_JMPNZ_EX: + return 1; + case ZEND_FETCH_CONSTANT: + return 1; + case ZEND_FETCH_DIM_R: + op1_info = OP1_INFO(); + op2_info = OP2_INFO(); + if (trace + && trace->op1_type != IS_UNKNOWN + && (trace->op1_type & ~(IS_TRACE_REFERENCE|IS_TRACE_INDIRECT|IS_TRACE_PACKED)) == IS_ARRAY) { + op1_info &= ~((MAY_BE_ANY|MAY_BE_UNDEF) - MAY_BE_ARRAY); + } + return ((op1_info & (MAY_BE_ANY|MAY_BE_UNDEF)) == MAY_BE_ARRAY) && + (!(opline->op1_type & (IS_TMP_VAR|IS_VAR)) || !(op1_info & MAY_BE_RC1)) && + (((op2_info & (MAY_BE_ANY|MAY_BE_UNDEF)) == MAY_BE_LONG) || + (((op2_info & (MAY_BE_ANY|MAY_BE_UNDEF)) == MAY_BE_STRING) && + (!(opline->op2_type & (IS_TMP_VAR|IS_VAR)) || !(op2_info & MAY_BE_RC1)))); + } + return 0; +} + +static bool zend_jit_var_supports_reg(zend_ssa *ssa, int var) +{ + if (ssa->vars[var].no_val) { + /* we don't need the value */ + return 0; + } + + if (!(JIT_G(opt_flags) & ZEND_JIT_REG_ALLOC_GLOBAL)) { + /* Disable global register allocation, + * register allocation for SSA variables connected through Phi functions + */ + if (ssa->vars[var].definition_phi) { + return 0; + } + if (ssa->vars[var].phi_use_chain) { + zend_ssa_phi *phi = ssa->vars[var].phi_use_chain; + do { + if (!ssa->vars[phi->ssa_var].no_val) { + return 0; + } + phi = zend_ssa_next_use_phi(ssa, var, phi); + } while (phi); + } + } + + if (((ssa->var_info[var].type & (MAY_BE_ANY|MAY_BE_UNDEF|MAY_BE_REF)) != MAY_BE_DOUBLE) && + ((ssa->var_info[var].type & (MAY_BE_ANY|MAY_BE_UNDEF|MAY_BE_REF)) != MAY_BE_LONG)) { + /* bad type */ + return 0; + } + + return 1; +} + +static bool zend_jit_may_be_in_reg(const zend_op_array *op_array, zend_ssa *ssa, int var) +{ + if (!zend_jit_var_supports_reg(ssa, var)) { + return 0; + } + + if (ssa->vars[var].definition >= 0) { + uint32_t def = ssa->vars[var].definition; + if (!zend_jit_opline_supports_reg(op_array, ssa, op_array->opcodes + def, ssa->ops + def, NULL)) { + return 0; + } + } + + if (ssa->vars[var].use_chain >= 0) { + int use = ssa->vars[var].use_chain; + + do { + if (!zend_ssa_is_no_val_use(op_array->opcodes + use, ssa->ops + use, var) && + !zend_jit_opline_supports_reg(op_array, ssa, op_array->opcodes + use, ssa->ops + use, NULL)) { + return 0; + } + /* Quick workaround to disable register allocation for unsupported operand */ + // TODO: Find a general solution ??? + if (op_array->opcodes[use].opcode == ZEND_FETCH_DIM_R) { + return 0; + } + use = zend_ssa_next_use(ssa->ops, var, use); + } while (use >= 0); + } + + if (JIT_G(trigger) != ZEND_JIT_ON_HOT_TRACE) { + int def_block, use_block, b, use, j; + zend_basic_block *bb; + zend_ssa_phi *p; + bool ret = 1; + zend_worklist worklist; + ALLOCA_FLAG(use_heap) + + /* Check if live range is split by ENTRY block */ + if (ssa->vars[var].definition >= 0) { + def_block =ssa->cfg.map[ssa->vars[var].definition]; + } else { + ZEND_ASSERT(ssa->vars[var].definition_phi); + def_block = ssa->vars[var].definition_phi->block; + } + + ZEND_WORKLIST_ALLOCA(&worklist, ssa->cfg.blocks_count, use_heap); + + if (ssa->vars[var].use_chain >= 0) { + use = ssa->vars[var].use_chain; + do { + use_block = ssa->cfg.map[use]; + if (use_block != def_block) { + zend_worklist_push(&worklist, use_block); + } + use = zend_ssa_next_use(ssa->ops, var, use); + } while (use >= 0); + } + + p = ssa->vars[var].phi_use_chain; + while (p) { + use_block = p->block; + if (use_block != def_block) { + bb = &ssa->cfg.blocks[use_block]; + for (j = 0; j < bb->predecessors_count; j++) { + if (p->sources[j] == var) { + use_block = ssa->cfg.predecessors[bb->predecessor_offset + j]; + if (use_block != def_block) { + zend_worklist_push(&worklist, use_block); + } + } + } + } + p = zend_ssa_next_use_phi(ssa, var, p); + } + + while (zend_worklist_len(&worklist) != 0) { + b = zend_worklist_pop(&worklist); + bb = &ssa->cfg.blocks[b]; + if (bb->flags & (ZEND_BB_ENTRY|ZEND_BB_RECV_ENTRY)) { + ret = 0; + break; + } + for (j = 0; j < bb->predecessors_count; j++) { + b = ssa->cfg.predecessors[bb->predecessor_offset + j]; + if (b != def_block) { + zend_worklist_push(&worklist, b); + } + } + } + + ZEND_WORKLIST_FREE_ALLOCA(&worklist, use_heap); + + return ret; + } + + return 1; +} + +/* + * Local variables: + * tab-width: 4 + * c-basic-offset: 4 + * indent-tabs-mode: t + * End: + */ diff --git a/ext/opcache/jit/zend_jit_trace.c b/ext/opcache/jit/zend_jit_trace.c index c3ac9fb4a74e2..85fcb657ffe71 100644 --- a/ext/opcache/jit/zend_jit_trace.c +++ b/ext/opcache/jit/zend_jit_trace.c @@ -86,6 +86,7 @@ static int zend_jit_trace_startup(bool reattached) return SUCCESS; } +#ifndef ZEND_JIT_IR static const void *zend_jit_trace_allocate_exit_group(uint32_t n) { dasm_State* dasm_state = NULL; @@ -115,6 +116,7 @@ static const void *zend_jit_trace_allocate_exit_group(uint32_t n) return entry; } +#endif static const void *zend_jit_trace_allocate_exit_point(uint32_t n) { @@ -147,7 +149,22 @@ static const void *zend_jit_trace_get_exit_addr(uint32_t n) ((n % ZEND_JIT_EXIT_POINTS_PER_GROUP) * ZEND_JIT_EXIT_POINTS_SPACING)); } -#if ZEND_JIT_TARGET_ARM64 +#ifdef ZEND_JIT_IR +static uint32_t zend_jit_exit_point_by_addr(void *addr) +{ + uint32_t n = (ZEND_JIT_EXIT_NUM + (ZEND_JIT_EXIT_POINTS_PER_GROUP - 1)) / ZEND_JIT_EXIT_POINTS_PER_GROUP; + uint32_t i; + + for (i = 0; i < n; i++) { + if ((char*)addr >= (char*)zend_jit_exit_groups[i] + && (char*)addr <= (char*)zend_jit_exit_groups[i] + ((ZEND_JIT_EXIT_POINTS_PER_GROUP - 1) * ZEND_JIT_EXIT_POINTS_SPACING)) { + return (i * ZEND_JIT_EXIT_POINTS_PER_GROUP) + + (((char*)addr - (char*)zend_jit_exit_groups[i]) / ZEND_JIT_EXIT_POINTS_SPACING); + } + } + return (uint32_t)-1; +} +#elif ZEND_JIT_TARGET_ARM64 static zend_jit_trace_info *zend_jit_get_current_trace_info(void) { return &zend_jit_traces[ZEND_JIT_TRACE_NUM]; @@ -191,7 +208,12 @@ static uint32_t zend_jit_trace_get_exit_point(const zend_op *to_opline, uint32_t do { if (STACK_TYPE(stack, stack_size-1) != IS_UNKNOWN || STACK_MEM_TYPE(stack, stack_size-1) != IS_UNKNOWN - || STACK_REG(stack, stack_size-1) != ZREG_NONE) { +#ifndef ZEND_JIT_IR + || STACK_REG(stack, stack_size-1) != ZREG_NONE +#else + || STACK_REF(stack, stack_size-1) != IR_UNUSED +#endif + ) { break; } stack_size--; @@ -203,7 +225,11 @@ static uint32_t zend_jit_trace_get_exit_point(const zend_op *to_opline, uint32_t } /* Try to reuse exit points */ - if (to_opline != NULL && t->exit_count > 0) { + if (to_opline != NULL +#ifdef ZEND_JIT_IR + && !(flags & ZEND_JIT_EXIT_METHOD_CALL) +#endif + && t->exit_count > 0) { uint32_t i = t->exit_count; do { @@ -211,7 +237,9 @@ static uint32_t zend_jit_trace_get_exit_point(const zend_op *to_opline, uint32_t if (stack_size == 0 || (t->exit_info[i].stack_size >= stack_size && memcmp(t->stack_map + t->exit_info[i].stack_offset, stack, stack_size * sizeof(zend_jit_trace_stack)) == 0)) { +#ifndef ZEND_JIT_IR stack_offset = t->exit_info[i].stack_offset; +#endif if (t->exit_info[i].opline == to_opline && t->exit_info[i].flags == flags && t->exit_info[i].stack_size == stack_size) { @@ -236,6 +264,12 @@ static uint32_t zend_jit_trace_get_exit_point(const zend_op *to_opline, uint32_t t->exit_info[exit_point].flags = flags; t->exit_info[exit_point].stack_size = stack_size; t->exit_info[exit_point].stack_offset = stack_offset; +#ifdef ZEND_JIT_IR + t->exit_info[exit_point].poly_func_ref = 0; + t->exit_info[exit_point].poly_this_ref = 0; + t->exit_info[exit_point].poly_func_reg = ZREG_NONE; + t->exit_info[exit_point].poly_this_reg = ZREG_NONE; +#endif } return exit_point; @@ -432,7 +466,7 @@ static zend_always_inline void zend_jit_trace_add_op_guard(zend_ssa #define CHECK_OP_TRACE_TYPE(_var, _ssa_var, op_info, op_type) do { \ if (op_type != IS_UNKNOWN) { \ if ((op_info & MAY_BE_GUARD) != 0) { \ - if (!zend_jit_type_guard(&dasm_state, opline, _var, op_type)) { \ + if (!zend_jit_type_guard(&ctx, opline, _var, op_type)) { \ goto jit_failure; \ } \ if (ssa->vars[_ssa_var].alias != NO_ALIAS) { \ @@ -821,11 +855,88 @@ static int zend_jit_trace_add_ret_phis(zend_jit_trace_rec *trace_buffer, uint32_ static int zend_jit_trace_copy_ssa_var_info(const zend_op_array *op_array, const zend_ssa *ssa, const zend_op **tssa_opcodes, zend_ssa *tssa, int ssa_var) { - int var, use; + int var, use, def, src; zend_ssa_op *op; - zend_ssa_var_info *info; - unsigned int no_val; - zend_ssa_alias_kind alias; + uint32_t n; + + if (tssa->vars[ssa_var].definition_phi) { + uint32_t b = ssa->cfg.map[tssa_opcodes[0] - op_array->opcodes]; + zend_basic_block *bb = ssa->cfg.blocks + b; + + if (bb->flags & ZEND_BB_LOOP_HEADER) { + zend_ssa_phi *phi = ssa->blocks[b].phis; + zend_ssa_phi *pi = NULL; + + var = tssa->vars[ssa_var].var; + while (phi) { + if (ssa->vars[phi->ssa_var].var == var) { + if (phi->pi >= 0) { + pi = phi; + } else { + src = phi->ssa_var; + goto copy_info; + } + } + phi = phi->next; + } + if (pi) { + src = pi->ssa_var; + goto copy_info; + } + + while (bb->idom >= 0) { + b = bb->idom; + bb = ssa->cfg.blocks + b; + + for (n = bb->len, op = ssa->ops + bb->start + n; n > 0; n--) { + op--; + if (op->result_def >= 0 && ssa->vars[op->result_def].var == var) { + src = op->result_def; + goto copy_info; + } else if (op->op2_def >= 0 && ssa->vars[op->op2_def].var == var) { + src = op->op2_def; + goto copy_info; + } else if (op->op1_def >= 0 && ssa->vars[op->op1_def].var == var) { + src = op->op1_def; + goto copy_info; + } + } + + phi = ssa->blocks[b].phis; + zend_ssa_phi *pi = NULL; + while (phi) { + if (ssa->vars[phi->ssa_var].var == var) { + if (phi->pi >= 0) { + pi = phi; + } else { + src = phi->ssa_var; + goto copy_info; + } + } + phi = phi->next; + } + if (pi) { + src = pi->ssa_var; + goto copy_info; + } + } + } + } else if (tssa->vars[ssa_var].definition >= 0) { + def = tssa->vars[ssa_var].definition; + ZEND_ASSERT((tssa_opcodes[def] - op_array->opcodes) < op_array->last); + op = ssa->ops + (tssa_opcodes[def] - op_array->opcodes); + if (tssa->ops[def].op1_def == ssa_var) { + src = op->op1_def; + } else if (tssa->ops[def].op2_def == ssa_var) { + src = op->op2_def; + } else if (tssa->ops[def].result_def == ssa_var) { + src = op->result_def; + } else { + assert(0); + return 0; + } + goto copy_info; + } if (tssa->vars[ssa_var].phi_use_chain) { // TODO: this may be incorrect ??? @@ -838,27 +949,24 @@ static int zend_jit_trace_copy_ssa_var_info(const zend_op_array *op_array, const ZEND_ASSERT((tssa_opcodes[use] - op_array->opcodes) < op_array->last); op = ssa->ops + (tssa_opcodes[use] - op_array->opcodes); if (tssa->ops[use].op1_use == var) { - no_val = ssa->vars[op->op1_use].no_val; - alias = ssa->vars[op->op1_use].alias; - info = ssa->var_info + op->op1_use; + src = op->op1_use; } else if (tssa->ops[use].op2_use == var) { - no_val = ssa->vars[op->op2_use].no_val; - alias = ssa->vars[op->op2_use].alias; - info = ssa->var_info + op->op2_use; + src = op->op2_use; } else if (tssa->ops[use].result_use == var) { - no_val = ssa->vars[op->result_use].no_val; - alias = ssa->vars[op->result_use].alias; - info = ssa->var_info + op->result_use; + src = op->result_use; } else { assert(0); return 0; } - tssa->vars[ssa_var].no_val = no_val; - tssa->vars[ssa_var].alias = alias; - memcpy(&tssa->var_info[ssa_var], info, sizeof(zend_ssa_var_info)); - return 1; + goto copy_info; } return 0; + +copy_info: + tssa->vars[ssa_var].no_val = ssa->vars[src].no_val; + tssa->vars[ssa_var].alias = ssa->vars[src].alias; + memcpy(&tssa->var_info[ssa_var], &ssa->var_info[src], sizeof(zend_ssa_var_info)); + return 1; } static void zend_jit_trace_propagate_range(const zend_op_array *op_array, const zend_op **tssa_opcodes, zend_ssa *tssa, int ssa_var) @@ -1636,9 +1744,6 @@ static zend_ssa *zend_jit_trace_build_tssa(zend_jit_trace_rec *trace_buffer, uin TRACE_FRAME_INIT(frame, op_array, 0, 0); TRACE_FRAME_SET_RETURN_SSA_VAR(frame, -1); frame->used_stack = 0; - for (i = 0; i < op_array->last_var + op_array->T; i++) { - SET_STACK_TYPE(frame->stack, i, IS_UNKNOWN, 1); - } memset(&return_value_info, 0, sizeof(return_value_info)); if (trace_buffer->stop == ZEND_JIT_TRACE_STOP_LOOP) { @@ -2325,9 +2430,6 @@ static zend_ssa *zend_jit_trace_build_tssa(zend_jit_trace_rec *trace_buffer, uin TRACE_FRAME_INIT(call, op_array, 0, 0); call->used_stack = 0; top = zend_jit_trace_call_frame(top, op_array); - for (i = 0; i < op_array->last_var + op_array->T; i++) { - SET_STACK_TYPE(call->stack, i, IS_UNKNOWN, 1); - } } else { ZEND_ASSERT(&call->func->op_array == op_array); } @@ -2454,9 +2556,6 @@ static zend_ssa *zend_jit_trace_build_tssa(zend_jit_trace_rec *trace_buffer, uin TRACE_FRAME_INIT(frame, op_array, 0, 0); TRACE_FRAME_SET_RETURN_SSA_VAR(frame, -1); frame->used_stack = 0; - for (i = 0; i < op_array->last_var + op_array->T; i++) { - SET_STACK_TYPE(frame->stack, i, IS_UNKNOWN, 1); - } } } else if (p->op == ZEND_JIT_TRACE_INIT_CALL) { @@ -2661,25 +2760,53 @@ static zend_ssa *zend_jit_trace_build_tssa(zend_jit_trace_rec *trace_buffer, uin return tssa; } +#ifndef ZEND_JIT_IR +# define RA_HAS_IVAL(var) (start[var] >= 0) +# define RA_IVAL_FLAGS(var) flags[var] +# define RA_IVAL_START(var, line) do {start[var] = (line);} while (0) +# define RA_IVAL_END(var, line) do {end[var] = (line);} while (0) +# define RA_IVAL_CLOSE(var, line) zend_jit_close_var(stack, var, start, end, flags, line) +# define RA_IVAL_DEL(var) do {start[var] = end[var] = -1;} while (0) +# define RA_HAS_REG(var) (ra[var] != NULL) +# define RA_REG_FLAGS(var) ra[var]->flags +# define RA_REG_DEL(var) do {ra[var] = NULL;} while (0) + static void zend_jit_close_var(zend_jit_trace_stack *stack, uint32_t n, int *start, int *end, uint8_t *flags, int line) { int32_t var = STACK_VAR(stack, n); - if (var >= 0 && start[var] >= 0 && !(flags[var] & ZREG_LAST_USE)) { + if (var >= 0 && RA_HAS_IVAL(var) && !(RA_IVAL_FLAGS(var) & ZREG_LAST_USE)) { // TODO: shrink interval to last side exit ???? - end[var] = line; + RA_IVAL_END(var, line); } } +#else +# define RA_HAS_IVAL(var) (ra[var].ref != 0) +# define RA_IVAL_FLAGS(var) ra[var].flags +# define RA_IVAL_START(var, line) do {ra[var].ref = IR_NULL;} while (0) +# define RA_IVAL_END(var, line) +# define RA_IVAL_CLOSE(var, line) +# define RA_IVAL_DEL(var) do {ra[var].ref = IR_UNUSED;} while (0) +# define RA_HAS_REG(var) (ra[var].ref != 0) +# define RA_REG_FLAGS(var) ra[var].flags +# define RA_REG_START(var, line) do {ra[var].ref = IR_NULL;} while (0) +# define RA_REG_DEL(var) do {ra[var].ref = IR_UNUSED;} while (0) +#endif + +#ifndef ZEND_JIT_IR static void zend_jit_trace_use_var(int line, int var, int def, int use_chain, int *start, int *end, uint8_t *flags, const zend_ssa *ssa, const zend_op **ssa_opcodes, const zend_op_array *op_array, const zend_ssa *op_array_ssa) +#else +static void zend_jit_trace_use_var(int line, int var, int def, int use_chain, zend_jit_reg_var *ra, const zend_ssa *ssa, const zend_op **ssa_opcodes, const zend_op_array *op_array, const zend_ssa *op_array_ssa) +#endif { - ZEND_ASSERT(start[var] >= 0); - ZEND_ASSERT(!(flags[var] & ZREG_LAST_USE)); - end[var] = line; + ZEND_ASSERT(RA_HAS_IVAL(var)); + ZEND_ASSERT(!(RA_IVAL_FLAGS(var) & ZREG_LAST_USE)); + RA_IVAL_END(var, line); if (def >= 0) { - flags[var] |= ZREG_LAST_USE; - } else if (use_chain < 0 && (flags[var] & (ZREG_LOAD|ZREG_STORE))) { - flags[var] |= ZREG_LAST_USE; + RA_IVAL_FLAGS(var) |= ZREG_LAST_USE; + } else if (use_chain < 0 && (RA_IVAL_FLAGS(var) & (ZREG_LOAD|ZREG_STORE))) { + RA_IVAL_FLAGS(var) |= ZREG_LAST_USE; } else if (use_chain >= 0 && !zend_ssa_is_no_val_use(ssa_opcodes[use_chain], ssa->ops + use_chain, var)) { /* pass */ } else if (op_array_ssa->vars) { @@ -2687,21 +2814,25 @@ static void zend_jit_trace_use_var(int line, int var, int def, int use_chain, in if (ssa->ops[line].op1_use == var) { if (zend_ssa_is_last_use(op_array, op_array_ssa, op_array_ssa->ops[use].op1_use, use)) { - flags[var] |= ZREG_LAST_USE; + RA_IVAL_FLAGS(var) |= ZREG_LAST_USE; } } else if (ssa->ops[line].op2_use == var) { if (zend_ssa_is_last_use(op_array, op_array_ssa, op_array_ssa->ops[use].op2_use, use)) { - flags[var] |= ZREG_LAST_USE; + RA_IVAL_FLAGS(var) |= ZREG_LAST_USE; } } else if (ssa->ops[line].result_use == var) { if (zend_ssa_is_last_use(op_array, op_array_ssa, op_array_ssa->ops[use].result_use, use)) { - flags[var] |= ZREG_LAST_USE; + RA_IVAL_FLAGS(var) |= ZREG_LAST_USE; } } } } +#ifndef ZEND_JIT_IR static zend_lifetime_interval** zend_jit_trace_allocate_registers(zend_jit_trace_rec *trace_buffer, zend_ssa *ssa, uint32_t parent_trace, uint32_t exit_num) +#else +static zend_jit_reg_var* zend_jit_trace_allocate_registers(zend_jit_trace_rec *trace_buffer, zend_ssa *ssa, uint32_t parent_trace, uint32_t exit_num) +#endif { const zend_op **ssa_opcodes = ((zend_tssa*)ssa)->tssa_opcodes; zend_jit_trace_rec *p; @@ -2710,11 +2841,15 @@ static zend_lifetime_interval** zend_jit_trace_allocate_registers(zend_jit_trace const zend_ssa *op_array_ssa; const zend_ssa_op *ssa_op; int i, j, idx, count, level; +#ifndef ZEND_JIT_IR int last_idx = -1; int *start, *end; uint8_t *flags; + zend_lifetime_interval **ra, *list, *ival; +#else + zend_jit_reg_var *ra; +#endif const zend_op_array **vars_op_array; - zend_lifetime_interval **intervals, *list, *ival; void *checkpoint; zend_jit_trace_stack_frame *frame; zend_jit_trace_stack *stack; @@ -2723,6 +2858,7 @@ static zend_lifetime_interval** zend_jit_trace_allocate_registers(zend_jit_trace zend_jit_trace_stack *parent_stack = parent_trace ? zend_jit_traces[parent_trace].stack_map + zend_jit_traces[parent_trace].exit_info[exit_num].stack_offset : NULL; +#ifndef ZEND_JIT_IR ALLOCA_FLAG(use_heap); ZEND_ASSERT(ssa->var_info != NULL); @@ -2741,6 +2877,12 @@ static zend_lifetime_interval** zend_jit_trace_allocate_registers(zend_jit_trace memset(start, -1, sizeof(int) * ssa->vars_count * 2); memset(flags, 0, sizeof(uint8_t) * ssa->vars_count); memset(ZEND_VOIDP(vars_op_array), 0, sizeof(zend_op_array*) * ssa->vars_count); +#else + checkpoint = zend_arena_checkpoint(CG(arena)); + ra = zend_arena_calloc(&CG(arena), ssa->vars_count, sizeof(zend_jit_reg_var)); + vars_op_array = zend_arena_calloc(&CG(arena), ssa->vars_count, sizeof(zend_op_array*)); + memset(ZEND_VOIDP(vars_op_array), 0, sizeof(zend_op_array*) * ssa->vars_count); +#endif op_array = trace_buffer->op_array; jit_extension = @@ -2766,15 +2908,20 @@ static zend_lifetime_interval** zend_jit_trace_allocate_registers(zend_jit_trace && !zend_ssa_is_no_val_use(ssa_opcodes[ssa->vars[i].use_chain], ssa->ops + ssa->vars[i].use_chain, i) && ssa->vars[i].alias == NO_ALIAS && zend_jit_var_supports_reg(ssa, i)) { - start[i] = 0; + RA_IVAL_START(i, 0); if (i < parent_vars_count && STACK_REG(parent_stack, i) != ZREG_NONE - && STACK_REG(parent_stack, i) < ZREG_NUM) { +#ifndef ZEND_JIT_IR + && STACK_REG(parent_stack, i) < ZREG_NUM +#else + && STACK_FLAGS(parent_stack, i) != ZREG_ZVAL_COPY +#endif + ) { /* We will try to reuse register from parent trace */ - flags[i] = STACK_FLAGS(parent_stack, i); + RA_IVAL_FLAGS(i) = STACK_FLAGS(parent_stack, i); count += 2; } else { - flags[i] = ZREG_LOAD; + RA_IVAL_FLAGS(i) = ZREG_LOAD; count++; } } @@ -2800,7 +2947,7 @@ static zend_lifetime_interval** zend_jit_trace_allocate_registers(zend_jit_trace if (ssa->vars[phi->ssa_var].use_chain >= 0 && ssa->vars[phi->ssa_var].alias == NO_ALIAS && zend_jit_var_supports_reg(ssa, phi->ssa_var)) { - start[phi->ssa_var] = 0; + RA_IVAL_START(phi->ssa_var, 0); count++; } phi = phi->next; @@ -2830,10 +2977,16 @@ static zend_lifetime_interval** zend_jit_trace_allocate_registers(zend_jit_trace } if (ssa_op->op1_use >= 0 - && start[ssa_op->op1_use] >= 0 + && RA_HAS_IVAL(ssa_op->op1_use) && !zend_ssa_is_no_val_use(opline, ssa_op, ssa_op->op1_use)) { if (support_opline) { - zend_jit_trace_use_var(idx, ssa_op->op1_use, ssa_op->op1_def, ssa_op->op1_use_chain, start, end, flags, ssa, ssa_opcodes, op_array, op_array_ssa); + zend_jit_trace_use_var(idx, ssa_op->op1_use, ssa_op->op1_def, ssa_op->op1_use_chain, +#ifndef ZEND_JIT_IR + start, end, flags, +#else + ra, +#endif + ssa, ssa_opcodes, op_array, op_array_ssa); if (opline->op1_type != IS_CV) { if (opline->opcode == ZEND_CASE || opline->opcode == ZEND_CASE_STRICT @@ -2849,56 +3002,71 @@ static zend_lifetime_interval** zend_jit_trace_allocate_registers(zend_jit_trace || opline->opcode == ZEND_BIND_LEXICAL || opline->opcode == ZEND_ROPE_ADD) { /* The value is kept alive and may be used outside of the trace */ - flags[ssa_op->op1_use] |= ZREG_STORE; + RA_IVAL_FLAGS(ssa_op->op1_use) |= ZREG_STORE; } else { - flags[ssa_op->op1_use] |= ZREG_LAST_USE; + RA_IVAL_FLAGS(ssa_op->op1_use) |= ZREG_LAST_USE; } } } else { - start[ssa_op->op1_use] = -1; - end[ssa_op->op1_use] = -1; + RA_IVAL_DEL(ssa_op->op1_use); count--; } } if (ssa_op->op2_use >= 0 && ssa_op->op2_use != ssa_op->op1_use - && start[ssa_op->op2_use] >= 0 + && RA_HAS_IVAL(ssa_op->op2_use) && !zend_ssa_is_no_val_use(opline, ssa_op, ssa_op->op2_use)) { +#ifndef ZEND_JIT_IR if (support_opline) { - zend_jit_trace_use_var(idx, ssa_op->op2_use, ssa_op->op2_def, ssa_op->op2_use_chain, start, end, flags, ssa, ssa_opcodes, op_array, op_array_ssa); +#else + /* Quick workaround to disable register allocation for unsupported operand */ + // TODO: Find a general solution ??? + if (support_opline && opline->opcode != ZEND_FETCH_DIM_R) { +#endif + zend_jit_trace_use_var(idx, ssa_op->op2_use, ssa_op->op2_def, ssa_op->op2_use_chain, +#ifndef ZEND_JIT_IR + start, end, flags, +#else + ra, +#endif + ssa, ssa_opcodes, op_array, op_array_ssa); if (opline->op2_type != IS_CV) { - flags[ssa_op->op2_use] |= ZREG_LAST_USE; + RA_IVAL_FLAGS(ssa_op->op2_use) |= ZREG_LAST_USE; } } else { - start[ssa_op->op2_use] = -1; - end[ssa_op->op2_use] = -1; + RA_IVAL_DEL(ssa_op->op2_use); count--; } } if (ssa_op->result_use >= 0 && ssa_op->result_use != ssa_op->op1_use && ssa_op->result_use != ssa_op->op2_use - && start[ssa_op->result_use] >= 0 + && RA_HAS_IVAL(ssa_op->result_use) && !zend_ssa_is_no_val_use(opline, ssa_op, ssa_op->result_use)) { if (support_opline) { - zend_jit_trace_use_var(idx, ssa_op->result_use, ssa_op->result_def, ssa_op->res_use_chain, start, end, flags, ssa, ssa_opcodes, op_array, op_array_ssa); + zend_jit_trace_use_var(idx, ssa_op->result_use, ssa_op->result_def, ssa_op->res_use_chain, +#ifndef ZEND_JIT_IR + start, end, flags, +#else + ra, +#endif + ssa, ssa_opcodes, op_array, op_array_ssa); } else { - start[ssa_op->result_use] = -1; - end[ssa_op->result_use] = -1; + RA_IVAL_DEL(ssa_op->result_use); count--; } } if (ssa_op->op1_def >= 0) { - zend_jit_close_var(stack, EX_VAR_TO_NUM(opline->op1.var), start, end, flags, idx); + RA_IVAL_CLOSE(EX_VAR_TO_NUM(opline->op1.var), idx); SET_STACK_VAR(stack, EX_VAR_TO_NUM(opline->op1.var), ssa_op->op1_def); } if (ssa_op->op2_def >= 0) { - zend_jit_close_var(stack, EX_VAR_TO_NUM(opline->op2.var), start, end, flags, idx); + RA_IVAL_CLOSE(EX_VAR_TO_NUM(opline->op2.var), idx); SET_STACK_VAR(stack, EX_VAR_TO_NUM(opline->op2.var), ssa_op->op2_def); } if (ssa_op->result_def >= 0) { - zend_jit_close_var(stack, EX_VAR_TO_NUM(opline->result.var), start, end, flags, idx); + RA_IVAL_CLOSE(EX_VAR_TO_NUM(opline->result.var), idx); SET_STACK_VAR(stack, EX_VAR_TO_NUM(opline->result.var), ssa_op->result_def); } @@ -2920,8 +3088,8 @@ static zend_lifetime_interval** zend_jit_trace_allocate_registers(zend_jit_trace || opline->opcode == ZEND_FETCH_CONSTANT) { if (!(ssa->var_info[ssa_op->result_def].type & MAY_BE_DOUBLE) || (opline->opcode != ZEND_PRE_INC && opline->opcode != ZEND_PRE_DEC)) { - start[ssa_op->result_def] = idx; vars_op_array[ssa_op->result_def] = op_array; + RA_IVAL_START(ssa_op->result_def, idx); count++; } } @@ -2930,18 +3098,24 @@ static zend_lifetime_interval** zend_jit_trace_allocate_registers(zend_jit_trace && (ssa->vars[ssa_op->op1_def].use_chain >= 0 || ssa->vars[ssa_op->op1_def].phi_use_chain) && ssa->vars[ssa_op->op1_def].alias == NO_ALIAS - && zend_jit_var_supports_reg(ssa, ssa_op->op1_def)) { - start[ssa_op->op1_def] = idx; + && zend_jit_var_supports_reg(ssa, ssa_op->op1_def) + && (!(ssa->var_info[ssa_op->op1_def].type & MAY_BE_GUARD) + || opline->opcode == ZEND_PRE_INC + || opline->opcode == ZEND_PRE_DEC + || opline->opcode == ZEND_POST_INC + || opline->opcode == ZEND_POST_DEC)) { vars_op_array[ssa_op->op1_def] = op_array; + RA_IVAL_START(ssa_op->op1_def, idx); count++; } if (ssa_op->op2_def >= 0 && (ssa->vars[ssa_op->op2_def].use_chain >= 0 || ssa->vars[ssa_op->op2_def].phi_use_chain) && ssa->vars[ssa_op->op2_def].alias == NO_ALIAS - && zend_jit_var_supports_reg(ssa, ssa_op->op2_def)) { - start[ssa_op->op2_def] = idx; + && zend_jit_var_supports_reg(ssa, ssa_op->op2_def) + && !(ssa->var_info[ssa_op->op2_def].type & MAY_BE_GUARD)) { vars_op_array[ssa_op->op2_def] = op_array; + RA_IVAL_START(ssa_op->op2_def, idx); count++; } } @@ -2960,29 +3134,34 @@ static zend_lifetime_interval** zend_jit_trace_allocate_registers(zend_jit_trace ssa_op++; opline++; if (ssa_op->op1_use >= 0 - && start[ssa_op->op1_use] >= 0 + && RA_HAS_IVAL(ssa_op->op1_use) && !zend_ssa_is_no_val_use(opline, ssa_op, ssa_op->op1_use)) { if (support_opline) { - zend_jit_trace_use_var(idx, ssa_op->op1_use, ssa_op->op1_def, ssa_op->op1_use_chain, start, end, flags, ssa, ssa_opcodes, op_array, op_array_ssa); + zend_jit_trace_use_var(idx, ssa_op->op1_use, ssa_op->op1_def, ssa_op->op1_use_chain, +#ifndef ZEND_JIT_IR + start, end, flags, +#else + ra, +#endif + ssa, ssa_opcodes, op_array, op_array_ssa); if (opline->op1_type != IS_CV) { - flags[ssa_op->op1_use] |= ZREG_LAST_USE; + RA_IVAL_FLAGS(ssa_op->op1_use) |= ZREG_LAST_USE; } } else { - start[ssa_op->op1_use] = -1; - end[ssa_op->op1_use] = -1; + RA_IVAL_DEL(ssa_op->op1_use); count--; } } if (ssa_op->op1_def >= 0) { - zend_jit_close_var(stack, EX_VAR_TO_NUM(opline->op1.var), start, end, flags, idx); + RA_IVAL_CLOSE(EX_VAR_TO_NUM(opline->op1.var), idx); SET_STACK_VAR(stack, EX_VAR_TO_NUM(opline->op1.var), ssa_op->op1_def); if (support_opline && (ssa->vars[ssa_op->op1_def].use_chain >= 0 || ssa->vars[ssa_op->op1_def].phi_use_chain) && ssa->vars[ssa_op->op1_def].alias == NO_ALIAS && zend_jit_var_supports_reg(ssa, ssa_op->op1_def)) { - start[ssa_op->op1_def] = idx; vars_op_array[ssa_op->op1_def] = op_array; + RA_IVAL_START(ssa_op->op1_def, idx); count++; } } @@ -2996,8 +3175,14 @@ static zend_lifetime_interval** zend_jit_trace_allocate_registers(zend_jit_trace idx++; while (opline->opcode == ZEND_RECV_INIT) { /* RECV_INIT doesn't support registers */ +#ifdef ZEND_JIT_IR + if (ssa_op->result_use >= 0 && RA_HAS_IVAL(ssa_op->result_use)) { + RA_IVAL_DEL(ssa_op->result_use); + count--; + } +#endif if (ssa_op->result_def >= 0) { - zend_jit_close_var(stack, EX_VAR_TO_NUM(opline->result.var), start, end, flags, idx); + RA_IVAL_CLOSE(EX_VAR_TO_NUM(opline->result.var), idx); SET_STACK_VAR(stack, EX_VAR_TO_NUM(opline->result.var), ssa_op->result_def); } ssa_op++; @@ -3012,7 +3197,7 @@ static zend_lifetime_interval** zend_jit_trace_allocate_registers(zend_jit_trace while (opline->opcode == ZEND_BIND_GLOBAL) { /* BIND_GLOBAL doesn't support registers */ if (ssa_op->op1_def >= 0) { - zend_jit_close_var(stack, EX_VAR_TO_NUM(opline->op1.var), start, end, flags, idx); + RA_IVAL_CLOSE(EX_VAR_TO_NUM(opline->op1.var), idx); SET_STACK_VAR(stack, EX_VAR_TO_NUM(opline->op1.var), ssa_op->op1_def); } ssa_op++; @@ -3029,6 +3214,17 @@ static zend_lifetime_interval** zend_jit_trace_allocate_registers(zend_jit_trace /* New call frames */ zend_jit_trace_stack_frame *prev_frame = frame; +#ifdef ZEND_JIT_IR + /* Clear allocated registers */ + for (i = 0; i < op_array->last_var + op_array->T; i++) { + j = STACK_VAR(stack, i); + if (j >= 0 && RA_HAS_IVAL(j) && !(RA_IVAL_FLAGS(j) & ZREG_LAST_USE)) { + RA_IVAL_DEL(j); + count--; + } + } +#endif + frame = zend_jit_trace_call_frame(frame, op_array); frame->prev = prev_frame; frame->func = (const zend_function*)p->op_array; @@ -3044,8 +3240,8 @@ static zend_lifetime_interval** zend_jit_trace_allocate_registers(zend_jit_trace if (ssa->vars[j].use_chain >= 0 && ssa->vars[j].alias == NO_ALIAS && zend_jit_var_supports_reg(ssa, j)) { - start[j] = idx; - flags[j] = ZREG_LOAD; + RA_IVAL_START(j, idx); + RA_IVAL_FLAGS(j) = ZREG_LOAD; count++; } j++; @@ -3057,7 +3253,7 @@ static zend_lifetime_interval** zend_jit_trace_allocate_registers(zend_jit_trace } else if (p->op == ZEND_JIT_TRACE_BACK) { /* Close exiting call frames */ for (i = 0; i < op_array->last_var; i++) { - zend_jit_close_var(stack, i, start, end, flags, idx-1); + RA_IVAL_CLOSE(i, idx-1); } op_array = p->op_array; jit_extension = @@ -3075,9 +3271,10 @@ static zend_lifetime_interval** zend_jit_trace_allocate_registers(zend_jit_trace vars_op_array[j] = op_array; if (ssa->vars[j].use_chain >= 0 && ssa->vars[j].alias == NO_ALIAS - && zend_jit_var_supports_reg(ssa, j)) { - start[j] = idx; - flags[j] = ZREG_LOAD; + && zend_jit_var_supports_reg(ssa, j) + && !(ssa->var_info[j].type & MAY_BE_GUARD)) { + RA_IVAL_START(j, idx); + RA_IVAL_FLAGS(j) = ZREG_LOAD; count++; } j++; @@ -3097,46 +3294,49 @@ static zend_lifetime_interval** zend_jit_trace_allocate_registers(zend_jit_trace while (phi) { i = phi->sources[1]; - if (start[i] >= 0 && !ssa->vars[phi->ssa_var].no_val) { - end[i] = idx; - flags[i] &= ~ZREG_LAST_USE; + if (RA_HAS_IVAL(i) && !ssa->vars[phi->ssa_var].no_val) { + RA_IVAL_END(i, idx); + RA_IVAL_FLAGS(i) &= ~ZREG_LAST_USE; } phi = phi->next; } if (trace_buffer->stop == ZEND_JIT_TRACE_STOP_LOOP) { for (i = 0; i < op_array->last_var; i++) { - if (start[i] >= 0 && !ssa->vars[i].phi_use_chain) { - end[i] = idx; - flags[i] &= ~ZREG_LAST_USE; + if (RA_HAS_IVAL(i) && !ssa->vars[i].phi_use_chain) { + RA_IVAL_END(i, idx); + RA_IVAL_FLAGS(i) &= ~ZREG_LAST_USE; } else { - zend_jit_close_var(stack, i, start, end, flags, idx); + RA_IVAL_CLOSE(i, idx); } } } +#ifndef ZEND_JIT_IR } else { last_idx = idx; for (i = 0; i < op_array->last_var; i++) { - zend_jit_close_var(stack, i, start, end, flags, idx); + RA_IVAL_CLOSE(i, idx); } while (frame->prev) { frame = frame->prev; op_array = &frame->func->op_array; stack = frame->stack; for (i = 0; i < op_array->last_var; i++) { - zend_jit_close_var(stack, i, start, end, flags, idx); + RA_IVAL_CLOSE(i, idx); } } +#endif } +#ifndef ZEND_JIT_IR if (!count) { free_alloca(start, use_heap); zend_arena_release(&CG(arena), checkpoint); return NULL; } - intervals = zend_arena_calloc(&CG(arena), ssa->vars_count, sizeof(zend_lifetime_interval)); - memset(intervals, 0, sizeof(zend_lifetime_interval*) * ssa->vars_count); + ra = zend_arena_calloc(&CG(arena), ssa->vars_count, sizeof(zend_lifetime_interval)); + memset(ra, 0, sizeof(zend_lifetime_interval*) * ssa->vars_count); list = zend_arena_alloc(&CG(arena), sizeof(zend_lifetime_interval) * count); j = 0; for (i = 0; i < ssa->vars_count; i++) { @@ -3148,7 +3348,7 @@ static zend_lifetime_interval** zend_jit_trace_allocate_registers(zend_jit_trace /* skip life range with single use */ continue; } - intervals[i] = &list[j]; + ra[i] = &list[j]; list[j].ssa_var = i; list[j].reg = ZREG_NONE; list[j].flags = flags[i]; @@ -3181,7 +3381,7 @@ static zend_lifetime_interval** zend_jit_trace_allocate_registers(zend_jit_trace } while (i > 0) { i--; - if (intervals[i] + if (RA_HAS_REG(i) && STACK_REG(parent_stack, i) != ZREG_NONE && STACK_REG(parent_stack, i) < ZREG_NUM) { list[j].ssa_var = - 1; @@ -3193,7 +3393,7 @@ static zend_lifetime_interval** zend_jit_trace_allocate_registers(zend_jit_trace list[j].hint = NULL; list[j].used_as_hint = NULL; list[j].list_next = NULL; - intervals[i]->hint = &list[j]; + ra[i]->hint = &list[j]; j++; } } @@ -3205,11 +3405,11 @@ static zend_lifetime_interval** zend_jit_trace_allocate_registers(zend_jit_trace zend_ssa_phi *phi = ssa->blocks[1].phis; while (phi) { - if (intervals[phi->ssa_var]) { - if (intervals[phi->sources[1]] + if (RA_HAS_REG(phi->ssa_var)) { + if (RA_HAS_REG(phi->sources[1]) && (ssa->var_info[phi->sources[1]].type & MAY_BE_ANY) == (ssa->var_info[phi->ssa_var].type & MAY_BE_ANY)) { - intervals[phi->sources[1]]->hint = intervals[phi->ssa_var]; + ra[phi->sources[1]]->hint = ra[phi->ssa_var]; } } phi = phi->next; @@ -3217,7 +3417,7 @@ static zend_lifetime_interval** zend_jit_trace_allocate_registers(zend_jit_trace } for (i = 0; i < ssa->vars_count; i++) { - if (intervals[i] && !intervals[i]->hint) { + if (RA_HAS_REG(i) && !ra[i]->hint) { if (ssa->vars[i].definition >= 0) { uint32_t line = ssa->vars[i].definition; @@ -3228,12 +3428,12 @@ static zend_lifetime_interval** zend_jit_trace_allocate_registers(zend_jit_trace case ZEND_POST_INC: case ZEND_POST_DEC: if (ssa->ops[line].op1_use >= 0 && - intervals[ssa->ops[line].op1_use] && + RA_HAS_REG(ssa->ops[line].op1_use) && (i == ssa->ops[line].op1_def || (i == ssa->ops[line].result_def && (ssa->ops[line].op1_def < 0 || - !intervals[ssa->ops[line].op1_def])))) { - zend_jit_add_hint(intervals, i, ssa->ops[line].op1_use); + !RA_HAS_REG(ssa->ops[line].op1_def))))) { + zend_jit_add_hint(ra, i, ssa->ops[line].op1_use); } break; case ZEND_SEND_VAR: @@ -3245,23 +3445,23 @@ static zend_lifetime_interval** zend_jit_trace_allocate_registers(zend_jit_trace case ZEND_PRE_DEC: if (i == ssa->ops[line].op1_def && ssa->ops[line].op1_use >= 0 && - intervals[ssa->ops[line].op1_use]) { - zend_jit_add_hint(intervals, i, ssa->ops[line].op1_use); + RA_HAS_REG(ssa->ops[line].op1_use)) { + zend_jit_add_hint(ra, i, ssa->ops[line].op1_use); } break; case ZEND_ASSIGN: if (ssa->ops[line].op2_use >= 0 && - intervals[ssa->ops[line].op2_use] && + RA_HAS_REG(ssa->ops[line].op2_use) && (i == ssa->ops[line].op2_def || (i == ssa->ops[line].op1_def && (ssa->ops[line].op2_def < 0 || - !intervals[ssa->ops[line].op2_def])) || + !RA_HAS_REG(ssa->ops[line].op2_def))) || (i == ssa->ops[line].result_def && (ssa->ops[line].op2_def < 0 || - !intervals[ssa->ops[line].op2_def]) && + !RA_HAS_REG(ssa->ops[line].op2_def)) && (ssa->ops[line].op1_def < 0 || - !intervals[ssa->ops[line].op1_def])))) { - zend_jit_add_hint(intervals, i, ssa->ops[line].op2_use); + !RA_HAS_REG(ssa->ops[line].op1_def))))) { + zend_jit_add_hint(ra, i, ssa->ops[line].op2_use); } break; case ZEND_SUB: @@ -3272,7 +3472,7 @@ static zend_lifetime_interval** zend_jit_trace_allocate_registers(zend_jit_trace case ZEND_BW_XOR: if (i == ssa->ops[line].result_def) { if (ssa->ops[line].op1_use >= 0 && - intervals[ssa->ops[line].op1_use] && + RA_HAS_REG(ssa->ops[line].op1_use) && ssa->ops[line].op1_use_chain < 0 && !ssa->vars[ssa->ops[line].op1_use].phi_use_chain && (ssa->var_info[i].type & MAY_BE_ANY) == @@ -3280,14 +3480,14 @@ static zend_lifetime_interval** zend_jit_trace_allocate_registers(zend_jit_trace zend_ssa_phi *phi = ssa->vars[ssa->ops[line].op1_use].definition_phi; if (phi && - intervals[phi->sources[1]] && - intervals[phi->sources[1]]->hint == intervals[ssa->ops[line].op1_use]) { + RA_HAS_REG(phi->sources[1]) && + ra[phi->sources[1]]->hint == ra[ssa->ops[line].op1_use]) { break; } - zend_jit_add_hint(intervals, i, ssa->ops[line].op1_use); + zend_jit_add_hint(ra, i, ssa->ops[line].op1_use); } else if (opline->opcode != ZEND_SUB && ssa->ops[line].op2_use >= 0 && - intervals[ssa->ops[line].op2_use] && + RA_HAS_REG(ssa->ops[line].op2_use) && ssa->ops[line].op2_use_chain < 0 && !ssa->vars[ssa->ops[line].op2_use].phi_use_chain && (ssa->var_info[i].type & MAY_BE_ANY) == @@ -3295,11 +3495,11 @@ static zend_lifetime_interval** zend_jit_trace_allocate_registers(zend_jit_trace zend_ssa_phi *phi = ssa->vars[ssa->ops[line].op2_use].definition_phi; if (phi && - intervals[phi->sources[1]] && - intervals[phi->sources[1]]->hint == intervals[ssa->ops[line].op2_use]) { + RA_HAS_REG(phi->sources[1]) && + ra[phi->sources[1]]->hint == ra[ssa->ops[line].op2_use]) { break; } - zend_jit_add_hint(intervals, i, ssa->ops[line].op2_use); + zend_jit_add_hint(ra, i, ssa->ops[line].op2_use); } } break; @@ -3308,7 +3508,7 @@ static zend_lifetime_interval** zend_jit_trace_allocate_registers(zend_jit_trace } } - list = zend_jit_sort_intervals(intervals, ssa->vars_count); + list = zend_jit_sort_intervals(ra, ssa->vars_count); if (list) { ival = list; @@ -3337,15 +3537,15 @@ static zend_lifetime_interval** zend_jit_trace_allocate_registers(zend_jit_trace if (list) { zend_lifetime_interval *ival, *next; - memset(intervals, 0, ssa->vars_count * sizeof(zend_lifetime_interval*)); + memset(ra, 0, ssa->vars_count * sizeof(zend_lifetime_interval*)); ival = list; count = 0; while (ival != NULL) { ZEND_ASSERT(ival->reg != ZREG_NONE); count++; next = ival->list_next; - ival->list_next = intervals[ival->ssa_var]; - intervals[ival->ssa_var] = ival; + ival->list_next = ra[ival->ssa_var]; + ra[ival->ssa_var] = ival; ival = next; } @@ -3365,11 +3565,28 @@ static zend_lifetime_interval** zend_jit_trace_allocate_registers(zend_jit_trace } while (i > 0) { i--; - if (intervals[i] && intervals[i]->reg != STACK_REG(parent_stack, i)) { - intervals[i]->flags |= ZREG_LOAD; + if (RA_HAS_REG(i) && ra[i]->reg != STACK_REG(parent_stack, i)) { + RA_REG_FLAGS(i) |= ZREG_LOAD; + } + } + } +#else /* ZEND_JIT_IR */ + if (count) { + for (i = 0; i < ssa->vars_count; i++) { + if (RA_HAS_REG(i)) { + if ((RA_REG_FLAGS(i) & ZREG_LOAD) && + (RA_REG_FLAGS(i) & ZREG_LAST_USE) && + (i >= parent_vars_count || STACK_REG(parent_stack, i) == ZREG_NONE) && + zend_ssa_next_use(ssa->ops, i, ssa->vars[i].use_chain) < 0) { + /* skip life range with single use */ + RA_REG_DEL(i); + count--; } } } + } + if (count) { +#endif /* SSA resolution */ if (trace_buffer->stop == ZEND_JIT_TRACE_STOP_LOOP @@ -3381,58 +3598,95 @@ static zend_lifetime_interval** zend_jit_trace_allocate_registers(zend_jit_trace int def = phi->ssa_var; int use = phi->sources[1]; - if (intervals[def]) { - if (!intervals[use]) { - intervals[def]->flags |= ZREG_LOAD; - if ((intervals[def]->flags & ZREG_LAST_USE) + if (RA_HAS_REG(def)) { + if (!RA_HAS_REG(use)) { + RA_REG_FLAGS(def) |= ZREG_LOAD; + if ((RA_REG_FLAGS(def) & ZREG_LAST_USE) && ssa->vars[def].use_chain >= 0 - && ssa->vars[def].use_chain == intervals[def]->range.end) { + && !ssa->vars[def].phi_use_chain +#ifndef ZEND_JIT_IR + && ssa->vars[def].use_chain == ra[def]->range.end +#else + && zend_ssa_next_use(ssa->ops, def, ssa->vars[def].use_chain) < 0 +#endif + ) { /* remove interval used once */ - intervals[def] = NULL; + RA_REG_DEL(def); count--; } - } else if (intervals[def]->reg != intervals[use]->reg) { - intervals[def]->flags |= ZREG_LOAD; +#ifndef ZEND_JIT_IR + } else if (ra[def]->reg != ra[use]->reg) { + RA_REG_FLAGS(def) |= ZREG_LOAD; if (ssa->vars[use].use_chain >= 0) { - intervals[use]->flags |= ZREG_STORE; + RA_REG_FLAGS(use) |= ZREG_STORE; } else { - intervals[use] = NULL; + RA_REG_DEL(use); count--; } } else { use = phi->sources[0]; - ZEND_ASSERT(!intervals[use]); - intervals[use] = zend_arena_alloc(&CG(arena), sizeof(zend_lifetime_interval)); - intervals[use]->ssa_var = phi->sources[0]; - intervals[use]->reg = intervals[def]->reg; - intervals[use]->flags = ZREG_LOAD; - intervals[use]->range.start = 0; - intervals[use]->range.end = 0; - intervals[use]->range.next = NULL; - intervals[use]->hint = NULL; - intervals[use]->used_as_hint = NULL; - intervals[use]->list_next = NULL; - } - } else if (intervals[use] + ZEND_ASSERT(!RA_HAS_REG(use)); + ra[use] = zend_arena_alloc(&CG(arena), sizeof(zend_lifetime_interval)); + ra[use]->ssa_var = phi->sources[0]; + ra[use]->reg = ra[def]->reg; + ra[use]->flags = ZREG_LOAD; + ra[use]->range.start = 0; + ra[use]->range.end = 0; + ra[use]->range.next = NULL; + ra[use]->hint = NULL; + ra[use]->used_as_hint = NULL; + ra[use]->list_next = NULL; +#else + } else if ((ssa->var_info[def].type & MAY_BE_ANY) != (ssa->var_info[use].type & MAY_BE_ANY)) { + RA_REG_FLAGS(def) |= ZREG_LOAD; + RA_REG_FLAGS(use) |= ZREG_STORE; + } else { + use = phi->sources[0]; + if (zend_jit_var_supports_reg(ssa, use)) { + ZEND_ASSERT(!RA_HAS_REG(use)); + RA_REG_START(use, 0); + RA_REG_FLAGS(use) = ZREG_LOAD; + count++; + } else { + RA_REG_FLAGS(def) |= ZREG_LOAD; + } +#endif + } + } else if (RA_HAS_REG(use) && (!ssa->vars[def].no_val - || ssa->var_info[def].type != ssa->var_info[use].type)) { +#ifndef ZEND_JIT_IR + || ssa->var_info[def].type != ssa->var_info[use].type +#endif + )) { if (ssa->vars[use].use_chain >= 0) { - intervals[use]->flags |= ZREG_STORE; + RA_REG_FLAGS(use) |= ZREG_STORE; // TODO: ext/opcache/tests/jit/reg_alloc_00[67].phpt ??? } else { - intervals[use] = NULL; + RA_REG_DEL(use); count--; } } phi = phi->next; } +#ifndef ZEND_JIT_IR } else { for (i = 0; i < ssa->vars_count; i++) { - if (intervals[i] - && intervals[i]->range.end == last_idx - && !(intervals[i]->flags & (ZREG_LOAD|ZREG_STORE))) { - intervals[i]->flags |= ZREG_STORE; + if (RA_HAS_REG(i) + && ra[i]->range.end == last_idx + && !(RA_REG_FLAGS(i) & (ZREG_LOAD|ZREG_STORE))) { + RA_REG_FLAGS(i) |= ZREG_STORE; + } + } +#else + } else if (p->stop == ZEND_JIT_TRACE_STOP_LINK + || p->stop == ZEND_JIT_TRACE_STOP_INTERPRETER) { + for (i = 0; i < op_array->last_var + op_array->T; i++) { + int var = STACK_VAR(stack, i); + if (var >= 0 && RA_HAS_REG(var) + && !(RA_REG_FLAGS(var) & (ZREG_LOAD|ZREG_STORE|ZREG_LAST_USE))) { + RA_REG_FLAGS(var) |= ZREG_STORE; } } +#endif } if (!count) { @@ -3441,24 +3695,47 @@ static zend_lifetime_interval** zend_jit_trace_allocate_registers(zend_jit_trace } if (JIT_G(debug) & ZEND_JIT_DEBUG_REG_ALLOC) { +#ifndef ZEND_JIT_IR fprintf(stderr, "---- TRACE %d Allocated Live Ranges\n", ZEND_JIT_TRACE_NUM); for (i = 0; i < ssa->vars_count; i++) { - ival = intervals[i]; + ival = ra[i]; while (ival) { zend_jit_dump_lifetime_interval(vars_op_array[ival->ssa_var], ssa, ival); ival = ival->list_next; } } +#else + fprintf(stderr, "---- TRACE %d Live Ranges \"%s\"\n", ZEND_JIT_TRACE_NUM, op_array->function_name ? ZSTR_VAL(op_array->function_name) : "[main]"); + for (i = 0; i < ssa->vars_count; i++) { + if (RA_HAS_REG(i)) { + fprintf(stderr, "#%d.", i); + uint32_t var_num = ssa->vars[i].var; + zend_dump_var(vars_op_array[i], (var_num < vars_op_array[i]->last_var ? IS_CV : 0), var_num); + if (RA_REG_FLAGS(i) & ZREG_LAST_USE) { + fprintf(stderr, " last_use"); + } + if (RA_REG_FLAGS(i) & ZREG_LOAD) { + fprintf(stderr, " load"); + } + if (RA_REG_FLAGS(i) & ZREG_STORE) { + fprintf(stderr, " store"); + } + fprintf(stderr, "\n"); + } + } + fprintf(stderr, "\n"); +#endif } - return intervals; + return ra; } - zend_arena_release(&CG(arena), checkpoint); //??? + zend_arena_release(&CG(arena), checkpoint); return NULL; } -static void zend_jit_trace_clenup_stack(zend_jit_trace_stack *stack, const zend_op *opline, const zend_ssa_op *ssa_op, const zend_ssa *ssa, zend_lifetime_interval **ra) +#ifndef ZEND_JIT_IR +static void zend_jit_trace_cleanup_stack(zend_jit_trace_stack *stack, const zend_op *opline, const zend_ssa_op *ssa_op, const zend_ssa *ssa, zend_lifetime_interval **ra) { uint32_t line = ssa_op - ssa->ops; @@ -3478,6 +3755,32 @@ static void zend_jit_trace_clenup_stack(zend_jit_trace_stack *stack, const zend_ SET_STACK_REG(stack, EX_VAR_TO_NUM(opline->result.var), ZREG_NONE); } } +#else +static void zend_jit_trace_cleanup_stack(zend_jit_ctx *jit, zend_jit_trace_stack *stack, const zend_op *opline, const zend_ssa_op *ssa_op, const zend_ssa *ssa, const zend_op **ssa_opcodes) +{ + if (ssa_op->op1_use >= 0 + && jit->ra[ssa_op->op1_use].ref + && (jit->ra[ssa_op->op1_use].flags & ZREG_LAST_USE) + && (ssa_op->op1_use_chain == -1 + || zend_ssa_is_no_val_use(ssa_opcodes[ssa_op->op1_use_chain], ssa->ops + ssa_op->op1_use_chain, ssa_op->op1_use))) { + CLEAR_STACK_REF(stack, EX_VAR_TO_NUM(opline->op1.var)); + } + if (ssa_op->op2_use >= 0 + && jit->ra[ssa_op->op2_use].ref + && (jit->ra[ssa_op->op2_use].flags & ZREG_LAST_USE) + && (ssa_op->op2_use_chain == -1 + || zend_ssa_is_no_val_use(ssa_opcodes[ssa_op->op2_use_chain], ssa->ops + ssa_op->op2_use_chain, ssa_op->op2_use))) { + CLEAR_STACK_REF(stack, EX_VAR_TO_NUM(opline->op2.var)); + } + if (ssa_op->result_use >= 0 + && jit->ra[ssa_op->result_use].ref + && (jit->ra[ssa_op->result_use].flags & ZREG_LAST_USE) + && (ssa_op->res_use_chain == -1 + || zend_ssa_is_no_val_use(ssa_opcodes[ssa_op->res_use_chain], ssa->ops + ssa_op->res_use_chain, ssa_op->result_use))) { + CLEAR_STACK_REF(stack, EX_VAR_TO_NUM(opline->result.var)); + } +} +#endif static void zend_jit_trace_setup_ret_counter(const zend_op *opline, size_t offset) { @@ -3568,10 +3871,18 @@ static int zend_jit_trace_stack_needs_deoptimization(zend_jit_trace_stack *stack uint32_t i; for (i = 0; i < stack_size; i++) { +#ifndef ZEND_JIT_IR if (STACK_REG(stack, i) != ZREG_NONE && !(STACK_FLAGS(stack, i) & (ZREG_LOAD|ZREG_STORE))) { return 1; } +#else + if (STACK_FLAGS(stack, i) & ~(ZREG_LOAD|ZREG_STORE|ZREG_LAST_USE)) { + return 1; + } else if (STACK_REG(stack, i) != ZREG_NONE) { + return 1; + } +#endif } return 0; } @@ -3592,24 +3903,39 @@ static int zend_jit_trace_exit_needs_deoptimization(uint32_t trace_num, uint32_t return zend_jit_trace_stack_needs_deoptimization(stack, stack_size); } -static int zend_jit_trace_deoptimization(dasm_State **Dst, +static int zend_jit_trace_deoptimization( +#ifndef ZEND_JIT_IR + dasm_State **jit, +#else + zend_jit_ctx *jit, +#endif uint32_t flags, const zend_op *opline, zend_jit_trace_stack *parent_stack, int parent_vars_count, zend_ssa *ssa, zend_jit_trace_stack *stack, +#ifndef ZEND_JIT_IR zend_lifetime_interval **ra, - bool polymorphic_side_trace) +#else + zend_jit_exit_const *constants, + int8_t func_reg, +#endif + bool polymorphic_side_trace) { int i; +#ifndef ZEND_JIT_IR bool has_constants = 0; bool has_unsaved_vars = 0; +#else + int check2 = -1; +#endif // TODO: Merge this loop with the following register LOAD loop to implement parallel move ??? for (i = 0; i < parent_vars_count; i++) { int8_t reg = STACK_REG(parent_stack, i); +#ifndef ZEND_JIT_IR if (reg != ZREG_NONE) { if (reg < ZREG_NUM) { if (ssa && ssa->vars[i].no_val) { @@ -3624,7 +3950,7 @@ static int zend_jit_trace_deoptimization(dasm_State **Dst, uint8_t type = STACK_TYPE(parent_stack, i); if (!(STACK_FLAGS(parent_stack, i) & (ZREG_LOAD|ZREG_STORE)) - && !zend_jit_store_var(Dst, 1 << type, i, reg, + && !zend_jit_store_var(jit, 1 << type, i, reg, STACK_MEM_TYPE(parent_stack, i) != type)) { return 0; } @@ -3637,8 +3963,106 @@ static int zend_jit_trace_deoptimization(dasm_State **Dst, has_constants = 1; } } +#else + if (STACK_FLAGS(parent_stack, i) == ZREG_CONST) { + uint8_t type = STACK_TYPE(parent_stack, i); + + if (type == IS_LONG) { + if (!zend_jit_store_const_long(jit, i, + (zend_long)constants[STACK_REF(parent_stack, i)].i)) { + return 0; + } + } else if (type == IS_DOUBLE) { + if (!zend_jit_store_const_double(jit, i, + constants[STACK_REF(parent_stack, i)].d)) { + return 0; + } + } else { + ZEND_UNREACHABLE(); + } + if (stack) { + SET_STACK_TYPE(stack, i, type, 1); + if (jit->ra && jit->ra[i].ref) { + SET_STACK_REF(stack, i, jit->ra[i].ref); + } + } + } else if (STACK_FLAGS(parent_stack, i) == ZREG_TYPE_ONLY) { + uint8_t type = STACK_TYPE(parent_stack, i); + + if (!zend_jit_store_type(jit, i, type)) { + return 0; + } + if (stack) { + SET_STACK_TYPE(stack, i, type, 1); + } + } else if (STACK_FLAGS(parent_stack, i) == ZREG_THIS) { + if (polymorphic_side_trace) { + ssa->var_info[i].delayed_fetch_this = 1; + if (stack) { + SET_STACK_REG_EX(stack, i, ZREG_NONE, ZREG_THIS); + } + } else if (!zend_jit_load_this(jit, EX_NUM_TO_VAR(i))) { + return 0; + } + } else if (STACK_FLAGS(parent_stack, i) == ZREG_ZVAL_ADDREF) { + zend_jit_addr dst = ZEND_ADDR_MEM_ZVAL(ZREG_FP, EX_NUM_TO_VAR(i)); + zend_jit_zval_try_addref(jit, dst); + } else if (STACK_FLAGS(parent_stack, i) == ZREG_ZVAL_COPY) { + ZEND_ASSERT(reg != ZREG_NONE); + ZEND_ASSERT(check2 == -1); + check2 = i; + } else if (STACK_FLAGS(parent_stack, i) & ZREG_SPILL_SLOT) { + if (ssa && ssa->vars[i].no_val) { + /* pass */ + } else { + uint8_t type = STACK_TYPE(parent_stack, i); + + if (!zend_jit_store_spill_slot(jit, 1 << type, i, reg, STACK_REF(parent_stack, i), + STACK_MEM_TYPE(parent_stack, i) != type)) { + return 0; + } + if (stack) { + if (jit->ra && jit->ra[i].ref) { + SET_STACK_TYPE(stack, i, type, 0); + if ((STACK_FLAGS(parent_stack, i) & (ZREG_LOAD|ZREG_STORE)) != 0) { + SET_STACK_REF_EX(stack, i, jit->ra[i].ref, ZREG_LOAD); + } else { + SET_STACK_REF(stack, i, jit->ra[i].ref); + } + } else { + SET_STACK_TYPE(stack, i, type, 1); + } + } + } + } else if (reg != ZREG_NONE) { + if (ssa && ssa->vars[i].no_val) { + /* pass */ + } else { + uint8_t type = STACK_TYPE(parent_stack, i); + + if (!zend_jit_store_reg(jit, 1 << type, i, reg, + (STACK_FLAGS(parent_stack, i) & (ZREG_LOAD|ZREG_STORE)) != 0, + STACK_MEM_TYPE(parent_stack, i) != type)) { + return 0; + } + if (stack) { + if (jit->ra && jit->ra[i].ref) { + SET_STACK_TYPE(stack, i, type, 0); + if ((STACK_FLAGS(parent_stack, i) & (ZREG_LOAD|ZREG_STORE)) != 0) { + SET_STACK_REF_EX(stack, i, jit->ra[i].ref, ZREG_LOAD); + } else { + SET_STACK_REF(stack, i, jit->ra[i].ref); + } + } else { + SET_STACK_TYPE(stack, i, type, 1); + } + } + } + } +#endif } +#ifndef ZEND_JIT_IR if (has_unsaved_vars && (has_constants || (flags & (ZEND_JIT_EXIT_RESTORE_CALL|ZEND_JIT_EXIT_FREE_OP1|ZEND_JIT_EXIT_FREE_OP2)))) { @@ -3656,7 +4080,7 @@ static int zend_jit_trace_deoptimization(dasm_State **Dst, SET_STACK_TYPE(stack, i, type, 1); } if (!(STACK_FLAGS(parent_stack, i) & (ZREG_LOAD|ZREG_STORE)) - && !zend_jit_store_var(Dst, 1 << type, i, reg, + && !zend_jit_store_var(jit, 1 << type, i, reg, STACK_MEM_TYPE(parent_stack, i) != type)) { return 0; } @@ -3679,24 +4103,38 @@ static int zend_jit_trace_deoptimization(dasm_State **Dst, if (stack) { SET_STACK_REG(stack, i, ZREG_THIS); } - } else if (!zend_jit_load_this(Dst, EX_NUM_TO_VAR(i))) { + } else if (!zend_jit_load_this(jit, EX_NUM_TO_VAR(i))) { return 0; } } else { if (reg == ZREG_ZVAL_COPY_GPR0 - &&!zend_jit_escape_if_undef_r0(Dst, i, flags, opline)) { + &&!zend_jit_escape_if_undef_r0(jit, i, flags, opline)) { return 0; } - if (!zend_jit_store_const(Dst, i, reg)) { + if (!zend_jit_store_const(jit, i, reg)) { return 0; } } } } } +#else + if (check2 != -1) { + int8_t reg = STACK_REG(parent_stack, check2); + + ZEND_ASSERT(STACK_FLAGS(parent_stack, check2) == ZREG_ZVAL_COPY); + ZEND_ASSERT(reg != ZREG_NONE); + if (!zend_jit_escape_if_undef(jit, check2, flags, opline, reg)) { + return 0; + } + if (!zend_jit_restore_zval(jit, EX_NUM_TO_VAR(check2), reg)) { + return 0; + } + } +#endif if (flags & ZEND_JIT_EXIT_RESTORE_CALL) { - if (!zend_jit_save_call_chain(Dst, -1)) { + if (!zend_jit_save_call_chain(jit, -1)) { return 0; } } @@ -3704,7 +4142,7 @@ static int zend_jit_trace_deoptimization(dasm_State **Dst, if (flags & ZEND_JIT_EXIT_FREE_OP2) { const zend_op *op = opline - 1; - if (!zend_jit_free_op(Dst, op, -1, op->op2.var)) { + if (!zend_jit_free_op(jit, op, -1, op->op2.var)) { return 0; } } @@ -3712,21 +4150,31 @@ static int zend_jit_trace_deoptimization(dasm_State **Dst, if (flags & ZEND_JIT_EXIT_FREE_OP1) { const zend_op *op = opline - 1; - if (!zend_jit_free_op(Dst, op, -1, op->op1.var)) { + if (!zend_jit_free_op(jit, op, -1, op->op1.var)) { return 0; } } if (flags & (ZEND_JIT_EXIT_FREE_OP1|ZEND_JIT_EXIT_FREE_OP2)) { - if (!zend_jit_check_exception(Dst)) { +#ifndef ZEND_JIT_IR + if (!zend_jit_check_exception(jit)) { return 0; } +#else + zend_jit_check_exception(jit); +#endif } if ((flags & ZEND_JIT_EXIT_METHOD_CALL) && !polymorphic_side_trace) { - if (!zend_jit_free_trampoline(Dst)) { +#ifndef ZEND_JIT_IR + if (!zend_jit_free_trampoline(jit)) { + return 0; + } +#else + if (!zend_jit_free_trampoline(jit, func_reg)) { return 0; } +#endif } return 1; @@ -3984,9 +4432,15 @@ static bool zend_jit_trace_next_is_send_result(const zend_op *oplin static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t parent_trace, uint32_t exit_num) { const void *handler = NULL; - dasm_State* dasm_state = NULL; - zend_script *script = NULL; +#ifndef ZEND_JIT_IR + dasm_State* ctx = NULL; zend_lifetime_interval **ra = NULL; +#else + zend_jit_ctx ctx; + zend_jit_ctx *jit = &ctx; + zend_jit_reg_var *ra = NULL; +#endif + zend_script *script = NULL; zend_string *name = NULL; void *checkpoint; const zend_op_array *op_array; @@ -4033,10 +4487,23 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par ssa_opcodes = ((zend_tssa*)ssa)->tssa_opcodes; +#ifdef ZEND_JIT_IR + op_array = trace_buffer->op_array; + opline = trace_buffer[1].opline; + name = zend_jit_trace_name(op_array, opline->lineno); + zend_jit_trace_start(&ctx, op_array, ssa, name, ZEND_JIT_TRACE_NUM, + parent_trace ? &zend_jit_traces[parent_trace] : NULL, exit_num); + ctx.trace = &zend_jit_traces[ZEND_JIT_TRACE_NUM]; +#endif + /* Register allocation */ if ((JIT_G(opt_flags) & (ZEND_JIT_REG_ALLOC_LOCAL|ZEND_JIT_REG_ALLOC_GLOBAL)) && JIT_G(opt_level) >= ZEND_JIT_LEVEL_INLINE) { +#ifndef ZEND_JIT_IR ra = zend_jit_trace_allocate_registers(trace_buffer, ssa, parent_trace, exit_num); +#else + ctx.ra = ra = zend_jit_trace_allocate_registers(trace_buffer, ssa, parent_trace, exit_num); +#endif } p = trace_buffer; @@ -4051,26 +4518,27 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par SET_STACK_TYPE(stack, i, IS_UNKNOWN, 1); } +#ifndef ZEND_JIT_IR opline = p[1].opline; name = zend_jit_trace_name(op_array, opline->lineno); p += ZEND_JIT_TRACE_START_REC_SIZE; - dasm_init(&dasm_state, DASM_MAXSECTION); - dasm_setupglobal(&dasm_state, dasm_labels, zend_lb_MAX); - dasm_setup(&dasm_state, dasm_actions); + dasm_init(&ctx, DASM_MAXSECTION); + dasm_setupglobal(&ctx, dasm_labels, zend_lb_MAX); + dasm_setup(&ctx, dasm_actions); jit_extension = (zend_jit_op_array_trace_extension*)ZEND_FUNC_INFO(op_array); op_array_ssa = &jit_extension->func_info.ssa; - dasm_growpc(&dasm_state, 2); /* =>0: loop header */ + dasm_growpc(&ctx, 2); /* =>0: loop header */ /* =>1: end of code */ - zend_jit_align_func(&dasm_state); + zend_jit_align_func(&ctx); if (!parent_trace) { - zend_jit_prologue(&dasm_state); + zend_jit_prologue(&ctx); } - zend_jit_trace_begin(&dasm_state, ZEND_JIT_TRACE_NUM, + zend_jit_trace_begin(&ctx, ZEND_JIT_TRACE_NUM, parent_trace ? &zend_jit_traces[parent_trace] : NULL, exit_num); if (!parent_trace) { @@ -4078,11 +4546,30 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par zend_jit_track_last_valid_opline(); } else { if (zend_jit_traces[parent_trace].exit_info[exit_num].opline == NULL) { - zend_jit_trace_opline_guard(&dasm_state, opline); + zend_jit_trace_opline_guard(&ctx, opline); } else { zend_jit_reset_last_valid_opline(); } } +#else + opline = p[1].opline; + p += ZEND_JIT_TRACE_START_REC_SIZE; + + jit_extension = + (zend_jit_op_array_trace_extension*)ZEND_FUNC_INFO(op_array); + op_array_ssa = &jit_extension->func_info.ssa; + + if (!parent_trace) { + zend_jit_set_last_valid_opline(&ctx, opline); + zend_jit_track_last_valid_opline(&ctx); + } else { + if (zend_jit_traces[parent_trace].exit_info[exit_num].opline == NULL) { + zend_jit_trace_opline_guard(&ctx, opline); + } else { + zend_jit_reset_last_valid_opline(&ctx); + } + } +#endif if (JIT_G(opt_level) >= ZEND_JIT_LEVEL_INLINE) { int last_var; @@ -4092,7 +4579,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par if (used_stack > 0) { peek_checked_stack = used_stack; - if (!zend_jit_stack_check(&dasm_state, opline, used_stack)) { + if (!zend_jit_stack_check(&ctx, opline, used_stack)) { goto jit_failure; } } @@ -4146,7 +4633,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par || (ssa->vars[i].phi_use_chain && !(ssa->var_info[ssa->vars[i].phi_use_chain->ssa_var].type & MAY_BE_GUARD)))) { /* Check loop-invariant variable type */ - if (!zend_jit_type_guard(&dasm_state, opline, EX_NUM_TO_VAR(i), concrete_type(info))) { + if (!zend_jit_type_guard(&ctx, opline, EX_NUM_TO_VAR(i), concrete_type(info))) { goto jit_failure; } info &= ~MAY_BE_GUARD; @@ -4167,7 +4654,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par && (ssa->vars[i].use_chain != -1 || (ssa->vars[i].phi_use_chain && !(ssa->var_info[ssa->vars[i].phi_use_chain->ssa_var].type & MAY_BE_PACKED_GUARD)))) { - if (!zend_jit_packed_guard(&dasm_state, opline, EX_NUM_TO_VAR(i), info)) { + if (!zend_jit_packed_guard(&ctx, opline, EX_NUM_TO_VAR(i), info)) { goto jit_failure; } info &= ~MAY_BE_PACKED_GUARD; @@ -4177,10 +4664,16 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par if (parent_trace) { /* Deoptimization */ - if (!zend_jit_trace_deoptimization(&dasm_state, + if (!zend_jit_trace_deoptimization(&ctx, zend_jit_traces[parent_trace].exit_info[exit_num].flags, zend_jit_traces[parent_trace].exit_info[exit_num].opline, - parent_stack, parent_vars_count, ssa, stack, ra, + parent_stack, parent_vars_count, ssa, stack, +#ifndef ZEND_JIT_IR + ra, +#else + zend_jit_traces[parent_trace].constants, + zend_jit_traces[parent_trace].exit_info[exit_num].poly_func_reg, +#endif polymorphic_side_trace)) { goto jit_failure; } @@ -4190,25 +4683,41 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par && trace_buffer->stop != ZEND_JIT_TRACE_STOP_RECURSIVE_CALL && trace_buffer->stop != ZEND_JIT_TRACE_STOP_RECURSIVE_RET) { for (i = 0; i < last_var; i++) { - if (ra[i] - && (ra[i]->flags & ZREG_LOAD) != 0 - && ra[i]->reg != stack[i].reg) { + if (RA_HAS_REG(i) + && (RA_REG_FLAGS(i) & ZREG_LOAD) != 0 +#ifndef ZEND_JIT_IR + && ra[i]->reg != stack[i].reg +#else + && ra[i].ref != STACK_REF(stack, i) +#endif + ) { if ((ssa->var_info[i].type & MAY_BE_GUARD) != 0) { uint8_t op_type; ssa->var_info[i].type &= ~MAY_BE_GUARD; op_type = concrete_type(ssa->var_info[i].type); - if (!zend_jit_type_guard(&dasm_state, opline, EX_NUM_TO_VAR(i), op_type)) { + if (!zend_jit_type_guard(&ctx, opline, EX_NUM_TO_VAR(i), op_type)) { goto jit_failure; } SET_STACK_TYPE(stack, i, op_type, 1); } - SET_STACK_REG_EX(stack, i, ra[i]->reg, ZREG_LOAD); - if (!zend_jit_load_var(&dasm_state, ssa->var_info[i].type, i, ra[i]->reg)) { +#ifndef ZEND_JIT_IR + if (!zend_jit_load_var(&ctx, ssa->var_info[i].type, i, ra[i]->reg)) { goto jit_failure; } + SET_STACK_REG_EX(stack, i, ra[i]->reg, ZREG_LOAD); +#else + if (trace_buffer->stop == ZEND_JIT_TRACE_STOP_LOOP) { + if (!zend_jit_load_var(&ctx, ssa->var_info[i].type, i, i)) { + goto jit_failure; + } + SET_STACK_REF_EX(stack, i, ra[i].ref, ZREG_LOAD); + } else { + SET_STACK_REF_EX(stack, i, IR_NULL, ZREG_LOAD); + } +#endif } } } @@ -4218,7 +4727,8 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par || trace_buffer->stop == ZEND_JIT_TRACE_STOP_RECURSIVE_CALL || trace_buffer->stop == ZEND_JIT_TRACE_STOP_RECURSIVE_RET) { - zend_jit_label(&dasm_state, 0); /* start of of trace loop */ +#ifndef ZEND_JIT_IR + zend_jit_label(&ctx, 0); /* start of of trace loop */ if (ra) { zend_ssa_phi *phi = ssa->blocks[1].phis; @@ -4232,7 +4742,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par ZEND_ASSERT(ival->reg != ZREG_NONE); if (info & MAY_BE_GUARD) { - if (!zend_jit_type_guard(&dasm_state, opline, EX_NUM_TO_VAR(phi->var), concrete_type(info))) { + if (!zend_jit_type_guard(&ctx, opline, EX_NUM_TO_VAR(phi->var), concrete_type(info))) { goto jit_failure; } info &= ~MAY_BE_GUARD; @@ -4240,14 +4750,14 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par SET_STACK_TYPE(stack, phi->var, concrete_type(info), 1); } SET_STACK_REG_EX(stack, phi->var, ival->reg, ZREG_LOAD); - if (!zend_jit_load_var(&dasm_state, ssa->var_info[phi->ssa_var].type, ssa->vars[phi->ssa_var].var, ival->reg)) { + if (!zend_jit_load_var(&ctx, ssa->var_info[phi->ssa_var].type, ssa->vars[phi->ssa_var].var, ival->reg)) { goto jit_failure; } } else if (ival->flags & ZREG_STORE) { ZEND_ASSERT(ival->reg != ZREG_NONE); SET_STACK_REG_EX(stack, phi->var, ival->reg, ZREG_STORE); - if (!zend_jit_store_var(&dasm_state, ssa->var_info[phi->ssa_var].type, ssa->vars[phi->ssa_var].var, ival->reg, + if (!zend_jit_store_var(&ctx, ssa->var_info[phi->ssa_var].type, ssa->vars[phi->ssa_var].var, ival->reg, STACK_MEM_TYPE(stack, phi->var) != ssa->var_info[phi->ssa_var].type)) { goto jit_failure; } @@ -4259,9 +4769,59 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par phi = phi->next; } } +#else + jit->trace_loop_ref = zend_jit_trace_begin_loop(&ctx); /* start of of trace loop */ + + if (ra) { + zend_ssa_phi *phi = ssa->blocks[1].phis; + + /* First try to insert IR Phi */ + while (phi) { + if (RA_HAS_REG(phi->ssa_var) + && !(RA_REG_FLAGS(phi->ssa_var) & ZREG_LOAD)) { + zend_jit_trace_gen_phi(&ctx, phi); + SET_STACK_REF(stack, phi->var, ra[phi->ssa_var].ref); + } + phi = phi->next; + } + + phi = ssa->blocks[1].phis; + while (phi) { + if (RA_HAS_REG(phi->ssa_var)) { + if (RA_REG_FLAGS(phi->ssa_var) & ZREG_LOAD) { + uint32_t info = ssa->var_info[phi->ssa_var].type; + + if (info & MAY_BE_GUARD) { + if (!zend_jit_type_guard(&ctx, opline, EX_NUM_TO_VAR(phi->var), concrete_type(info))) { + goto jit_failure; + } + info &= ~MAY_BE_GUARD; + ssa->var_info[phi->ssa_var].type = info; + SET_STACK_TYPE(stack, phi->var, concrete_type(info), 1); + } + if (!zend_jit_load_var(&ctx, ssa->var_info[phi->ssa_var].type, ssa->vars[phi->ssa_var].var, phi->ssa_var)) { + goto jit_failure; + } + SET_STACK_REF_EX(stack, phi->var, ra[phi->ssa_var].ref, ZREG_LOAD); + } else if (RA_REG_FLAGS(phi->ssa_var) & ZREG_STORE) { + + if (!zend_jit_store_var(&ctx, ssa->var_info[phi->ssa_var].type, ssa->vars[phi->ssa_var].var, phi->ssa_var, + STACK_MEM_TYPE(stack, phi->var) != ssa->var_info[phi->ssa_var].type)) { + goto jit_failure; + } + SET_STACK_REF_EX(stack, phi->var, ra[phi->ssa_var].ref, ZREG_STORE); + } else { + /* Register has to be written back on side exit */ + SET_STACK_REF(stack, phi->var, ra[phi->ssa_var].ref); + } + } + phi = phi->next; + } + } +#endif // if (trace_buffer->stop != ZEND_JIT_TRACE_STOP_RECURSIVE_RET) { -// if (ra && zend_jit_trace_stack_needs_deoptimization(stack, op_array->last_var + op_array->T)) { +// if (ra && dzend_jit_trace_stack_needs_deoptimization(stack, op_array->last_var + op_array->T)) { // uint32_t exit_point = zend_jit_trace_get_exit_point(opline, ZEND_JIT_EXIT_TO_VM); // // timeout_exit_addr = zend_jit_trace_get_exit_addr(exit_point); @@ -4271,6 +4831,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par // } // } +#ifndef ZEND_JIT_IR if (ra && trace_buffer->stop != ZEND_JIT_TRACE_STOP_LOOP) { int last_var = op_array->last_var; @@ -4278,14 +4839,15 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par last_var += op_array->T; } for (i = 0; i < last_var; i++) { - if (ra && ra[i] && (ra[i]->flags & ZREG_LOAD) != 0) { + if (ra && RA_HAS_REG(i) && (RA_REG_FLAGS(i) & ZREG_LOAD) != 0) { SET_STACK_REG_EX(stack, i, ra[i]->reg, ZREG_LOAD); - if (!zend_jit_load_var(&dasm_state, ssa->var_info[i].type, i, ra[i]->reg)) { + if (!zend_jit_load_var(&ctx, ssa->var_info[i].type, i, ra[i]->reg)) { goto jit_failure; } } } } +#endif } ssa_op = (JIT_G(opt_level) >= ZEND_JIT_LEVEL_INLINE) ? ssa->ops : NULL; @@ -4344,7 +4906,6 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par } if (JIT_G(opt_level) >= ZEND_JIT_LEVEL_INLINE) { - gen_handler = 0; switch (opline->opcode) { case ZEND_PRE_INC: case ZEND_PRE_DEC: @@ -4360,8 +4921,10 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par } if (opline->result_type != IS_UNUSED) { res_use_info = zend_jit_trace_type_to_info( - STACK_MEM_TYPE(stack, EX_VAR_TO_NUM(opline->result.var))) - & (MAY_BE_UNDEF|MAY_BE_NULL|MAY_BE_FALSE|MAY_BE_TRUE|MAY_BE_LONG|MAY_BE_DOUBLE); + STACK_MEM_TYPE(stack, EX_VAR_TO_NUM(opline->result.var))); + if (opline->result_type == IS_CV) { + res_use_info &= (MAY_BE_UNDEF|MAY_BE_NULL|MAY_BE_FALSE|MAY_BE_TRUE|MAY_BE_LONG|MAY_BE_DOUBLE); + } res_info = RES_INFO(); res_addr = RES_REG_ADDR(); } else { @@ -4374,7 +4937,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par && !has_concrete_type(op1_def_info)) { op1_def_info &= ~MAY_BE_GUARD; } - if (!zend_jit_inc_dec(&dasm_state, opline, + if (!zend_jit_inc_dec(&ctx, opline, op1_info, OP1_REG_ADDR(), op1_def_info, OP1_DEF_REG_ADDR(), res_use_info, res_info, @@ -4389,11 +4952,25 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par if (opline->result_type != IS_UNUSED) { ssa->var_info[ssa_op->result_def].type &= ~MAY_BE_GUARD; } +#ifdef ZEND_JIT_IR + } else if ((op1_def_info & (MAY_BE_ANY|MAY_BE_GUARD)) == (MAY_BE_DOUBLE|MAY_BE_GUARD) + && !(op1_info & MAY_BE_STRING)) { + ssa->var_info[ssa_op->op1_def].type &= ~MAY_BE_GUARD; + if (opline->result_type != IS_UNUSED) { + ssa->var_info[ssa_op->result_def].type &= ~MAY_BE_GUARD; + } +#endif } if (opline->result_type != IS_UNUSED && (res_info & (MAY_BE_ANY|MAY_BE_GUARD)) == (MAY_BE_LONG|MAY_BE_GUARD) && !(op1_info & MAY_BE_STRING)) { ssa->var_info[ssa_op->result_def].type &= ~MAY_BE_GUARD; +#ifdef ZEND_JIT_IR + } else if (opline->result_type != IS_UNUSED + && (res_info & (MAY_BE_ANY|MAY_BE_GUARD)) == (MAY_BE_DOUBLE|MAY_BE_GUARD) + && !(res_info & MAY_BE_STRING)) { + ssa->var_info[ssa_op->result_def].type &= ~MAY_BE_GUARD; +#endif } goto done; case ZEND_BW_OR: @@ -4406,9 +4983,11 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par CHECK_OP1_TRACE_TYPE(); op2_info = OP2_INFO(); CHECK_OP2_TRACE_TYPE(); +#ifndef ZEND_JIT_IR if ((op1_info & MAY_BE_UNDEF) || (op2_info & MAY_BE_UNDEF)) { break; } +#endif if (!(op1_info & MAY_BE_LONG) || !(op2_info & MAY_BE_LONG)) { break; @@ -4419,16 +4998,18 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par send_result = 1; res_use_info = -1; res_addr = ZEND_ADDR_MEM_ZVAL(ZREG_RX, (opline+1)->result.var); - if (!zend_jit_reuse_ip(&dasm_state)) { + if (!zend_jit_reuse_ip(&ctx)) { goto jit_failure; } } else { res_use_info = zend_jit_trace_type_to_info( - STACK_MEM_TYPE(stack, EX_VAR_TO_NUM(opline->result.var))) - & (MAY_BE_UNDEF|MAY_BE_NULL|MAY_BE_FALSE|MAY_BE_TRUE|MAY_BE_LONG|MAY_BE_DOUBLE); + STACK_MEM_TYPE(stack, EX_VAR_TO_NUM(opline->result.var))); + if (opline->result_type == IS_CV) { + res_use_info &= (MAY_BE_UNDEF|MAY_BE_NULL|MAY_BE_FALSE|MAY_BE_TRUE|MAY_BE_LONG|MAY_BE_DOUBLE); + } } res_info = RES_INFO(); - if (!zend_jit_long_math(&dasm_state, opline, + if (!zend_jit_long_math(&ctx, opline, op1_info, OP1_RANGE(), OP1_REG_ADDR(), op2_info, OP2_RANGE(), OP2_REG_ADDR(), res_use_info, res_info, res_addr, @@ -4444,12 +5025,27 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par op1_addr = OP1_REG_ADDR(); op2_info = OP2_INFO(); op2_addr = OP2_REG_ADDR(); +#ifdef ZEND_JIT_IR + if ((op1_info & MAY_BE_UNDEF) || (op2_info & MAY_BE_UNDEF)) { + break; + } + if (opline->opcode == ZEND_ADD && + (op1_info & (MAY_BE_ANY|MAY_BE_UNDEF)) == MAY_BE_ARRAY && + (op2_info & (MAY_BE_ANY|MAY_BE_UNDEF)) == MAY_BE_ARRAY) { + /* pass */ + } else if (!(op1_info & (MAY_BE_LONG|MAY_BE_DOUBLE)) || + !(op2_info & (MAY_BE_LONG|MAY_BE_DOUBLE))) { + break; + } +#endif if (orig_op1_type != IS_UNKNOWN && (orig_op1_type & IS_TRACE_REFERENCE) && opline->op1_type == IS_CV +#ifndef ZEND_JIT_IR && (Z_MODE(op2_addr) != IS_REG || Z_REG(op2_addr) != ZREG_FCARG1) +#endif && (orig_op2_type == IS_UNKNOWN || !(orig_op2_type & IS_TRACE_REFERENCE))) { - if (!zend_jit_fetch_reference(&dasm_state, opline, orig_op1_type, &op1_info, &op1_addr, + if (!zend_jit_fetch_reference(&ctx, opline, orig_op1_type, &op1_info, &op1_addr, !ssa->var_info[ssa_op->op1_use].guarded_reference, 1)) { goto jit_failure; } @@ -4462,9 +5058,11 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par if (orig_op2_type != IS_UNKNOWN && (orig_op2_type & IS_TRACE_REFERENCE) && opline->op2_type == IS_CV +#ifndef ZEND_JIT_IR && (Z_MODE(op1_addr) != IS_REG || Z_REG(op1_addr) != ZREG_FCARG1) +#endif && (orig_op1_type == IS_UNKNOWN || !(orig_op1_type & IS_TRACE_REFERENCE))) { - if (!zend_jit_fetch_reference(&dasm_state, opline, orig_op2_type, &op2_info, &op2_addr, + if (!zend_jit_fetch_reference(&ctx, opline, orig_op2_type, &op2_info, &op2_addr, !ssa->var_info[ssa_op->op2_use].guarded_reference, 1)) { goto jit_failure; } @@ -4474,6 +5072,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par } else { CHECK_OP2_TRACE_TYPE(); } +#ifndef ZEND_JIT_IR if ((op1_info & MAY_BE_UNDEF) || (op2_info & MAY_BE_UNDEF)) { break; } @@ -4485,33 +5084,50 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par !(op2_info & (MAY_BE_LONG|MAY_BE_DOUBLE))) { break; } +#endif res_addr = RES_REG_ADDR(); if (Z_MODE(res_addr) != IS_REG && zend_jit_trace_next_is_send_result(opline, p, frame)) { send_result = 1; res_use_info = -1; res_addr = ZEND_ADDR_MEM_ZVAL(ZREG_RX, (opline+1)->result.var); - if (!zend_jit_reuse_ip(&dasm_state)) { + if (!zend_jit_reuse_ip(&ctx)) { goto jit_failure; } } else { res_use_info = zend_jit_trace_type_to_info( - STACK_MEM_TYPE(stack, EX_VAR_TO_NUM(opline->result.var))) - & (MAY_BE_UNDEF|MAY_BE_NULL|MAY_BE_FALSE|MAY_BE_TRUE|MAY_BE_LONG|MAY_BE_DOUBLE); + STACK_MEM_TYPE(stack, EX_VAR_TO_NUM(opline->result.var))); + if (opline->result_type == IS_CV) { + res_use_info &= (MAY_BE_UNDEF|MAY_BE_NULL|MAY_BE_FALSE|MAY_BE_TRUE|MAY_BE_LONG|MAY_BE_DOUBLE); + } } res_info = RES_INFO(); if (opline->opcode == ZEND_ADD && (op1_info & (MAY_BE_ANY|MAY_BE_UNDEF)) == MAY_BE_ARRAY && (op2_info & (MAY_BE_ANY|MAY_BE_UNDEF)) == MAY_BE_ARRAY) { - if (!zend_jit_add_arrays(&dasm_state, opline, op1_info, op1_addr, op2_info, op2_addr, res_addr)) { + if (!zend_jit_add_arrays(&ctx, opline, op1_info, op1_addr, op2_info, op2_addr, res_addr)) { goto jit_failure; } } else { - if (!zend_jit_math(&dasm_state, opline, + bool may_overflow = (op1_info & MAY_BE_LONG) && (op2_info & MAY_BE_LONG) && (res_info & (MAY_BE_DOUBLE|MAY_BE_GUARD)) && zend_may_overflow(opline, ssa_op, op_array, ssa); + +#ifdef ZEND_JIT_IR + if (ra + && may_overflow + && ((res_info & MAY_BE_GUARD) + && (res_info & MAY_BE_ANY) == MAY_BE_LONG) + && ((opline->opcode == ZEND_ADD + && Z_MODE(op2_addr) == IS_CONST_ZVAL && Z_LVAL_P(Z_ZV(op2_addr)) == 1) + || (opline->opcode == ZEND_SUB + && Z_MODE(op2_addr) == IS_CONST_ZVAL && Z_LVAL_P(Z_ZV(op2_addr)) == 1))) { + zend_jit_trace_cleanup_stack(&ctx, stack, opline, ssa_op, ssa, ssa_opcodes); + } +#endif + if (!zend_jit_math(&ctx, opline, op1_info, op1_addr, op2_info, op2_addr, res_use_info, res_info, res_addr, - (op1_info & MAY_BE_LONG) && (op2_info & MAY_BE_LONG) && (res_info & (MAY_BE_DOUBLE|MAY_BE_GUARD)) && zend_may_overflow(opline, ssa_op, op_array, ssa), + may_overflow, zend_may_throw(opline, ssa_op, op_array, ssa))) { goto jit_failure; } @@ -4540,11 +5156,11 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par if (zend_jit_trace_next_is_send_result(opline, p, frame)) { send_result = 1; res_addr = ZEND_ADDR_MEM_ZVAL(ZREG_RX, (opline+1)->result.var); - if (!zend_jit_reuse_ip(&dasm_state)) { + if (!zend_jit_reuse_ip(&ctx)) { goto jit_failure; } } - if (!zend_jit_concat(&dasm_state, opline, + if (!zend_jit_concat(&ctx, opline, op1_info, op2_info, res_addr, zend_may_throw(opline, ssa_op, op_array, ssa))) { goto jit_failure; @@ -4567,7 +5183,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par && !has_concrete_type(op1_def_info)) { op1_def_info &= ~MAY_BE_GUARD; } - if (!zend_jit_assign_op(&dasm_state, opline, + if (!zend_jit_assign_op(&ctx, opline, op1_info, op1_def_info, OP1_RANGE(), op2_info, OP2_RANGE(), (op1_info & MAY_BE_LONG) && (op2_info & MAY_BE_LONG) && (op1_def_info & (MAY_BE_DOUBLE|MAY_BE_GUARD)) && zend_may_overflow(opline, ssa_op, op_array, ssa), @@ -4602,7 +5218,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par if (opline->op1_type == IS_VAR) { if (orig_op1_type != IS_UNKNOWN && (orig_op1_type & IS_TRACE_INDIRECT)) { - if (!zend_jit_fetch_indirect_var(&dasm_state, opline, orig_op1_type, + if (!zend_jit_fetch_indirect_var(&ctx, opline, orig_op1_type, &op1_info, &op1_addr, !ssa->var_info[ssa_op->op1_use].indirect_reference)) { goto jit_failure; } @@ -4612,7 +5228,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par } if (orig_op1_type != IS_UNKNOWN && (orig_op1_type & IS_TRACE_REFERENCE)) { - if (!zend_jit_fetch_reference(&dasm_state, opline, orig_op1_type, &op1_info, &op1_addr, + if (!zend_jit_fetch_reference(&ctx, opline, orig_op1_type, &op1_info, &op1_addr, !ssa->var_info[ssa_op->op1_use].guarded_reference, 1)) { goto jit_failure; } @@ -4628,7 +5244,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par op1_data_info = OP1_DATA_INFO(); CHECK_OP1_DATA_TRACE_TYPE(); op1_def_info = OP1_DEF_INFO(); - if (!zend_jit_assign_dim_op(&dasm_state, opline, + if (!zend_jit_assign_dim_op(&ctx, opline, op1_info, op1_def_info, op1_addr, op2_info, op1_data_info, OP1_DATA_RANGE(), val_type, zend_may_throw_ex(opline, ssa_op, op_array, ssa, op1_info, op2_info))) { @@ -4667,7 +5283,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par if (orig_op1_type != IS_UNKNOWN && (orig_op1_type & IS_TRACE_INDIRECT)) { op1_indirect = 1; - if (!zend_jit_fetch_indirect_var(&dasm_state, opline, orig_op1_type, + if (!zend_jit_fetch_indirect_var(&ctx, opline, orig_op1_type, &op1_info, &op1_addr, !ssa->var_info[ssa_op->op1_use].indirect_reference)) { goto jit_failure; } @@ -4675,7 +5291,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par } if (orig_op1_type != IS_UNKNOWN && (orig_op1_type & IS_TRACE_REFERENCE)) { - if (!zend_jit_fetch_reference(&dasm_state, opline, orig_op1_type, &op1_info, &op1_addr, + if (!zend_jit_fetch_reference(&ctx, opline, orig_op1_type, &op1_info, &op1_addr, !ssa->var_info[ssa_op->op1_use].guarded_reference, 1)) { goto jit_failure; } @@ -4709,7 +5325,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par on_this = op_array->opcodes[op_array_ssa->vars[op_array_ssa->ops[opline-op_array->opcodes].op1_use].definition].opcode == ZEND_FETCH_THIS; } } - if (!zend_jit_incdec_obj(&dasm_state, opline, op_array, ssa, ssa_op, + if (!zend_jit_incdec_obj(&ctx, opline, op_array, ssa, ssa_op, op1_info, op1_addr, op1_indirect, ce, ce_is_instanceof, on_this, delayed_fetch_this, op1_ce, val_type)) { @@ -4758,7 +5374,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par if (orig_op1_type != IS_UNKNOWN && (orig_op1_type & IS_TRACE_INDIRECT)) { op1_indirect = 1; - if (!zend_jit_fetch_indirect_var(&dasm_state, opline, orig_op1_type, + if (!zend_jit_fetch_indirect_var(&ctx, opline, orig_op1_type, &op1_info, &op1_addr, !ssa->var_info[ssa_op->op1_use].indirect_reference)) { goto jit_failure; } @@ -4766,7 +5382,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par } if (orig_op1_type != IS_UNKNOWN && (orig_op1_type & IS_TRACE_REFERENCE)) { - if (!zend_jit_fetch_reference(&dasm_state, opline, orig_op1_type, &op1_info, &op1_addr, + if (!zend_jit_fetch_reference(&ctx, opline, orig_op1_type, &op1_info, &op1_addr, !ssa->var_info[ssa_op->op1_use].guarded_reference, 1)) { goto jit_failure; } @@ -4802,7 +5418,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par } op1_data_info = OP1_DATA_INFO(); CHECK_OP1_DATA_TRACE_TYPE(); - if (!zend_jit_assign_obj_op(&dasm_state, opline, op_array, ssa, ssa_op, + if (!zend_jit_assign_obj_op(&ctx, opline, op_array, ssa, ssa_op, op1_info, op1_addr, op1_data_info, OP1_DATA_RANGE(), op1_indirect, ce, ce_is_instanceof, on_this, delayed_fetch_this, op1_ce, val_type)) { @@ -4838,7 +5454,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par if (orig_op1_type != IS_UNKNOWN && (orig_op1_type & IS_TRACE_INDIRECT)) { op1_indirect = 1; - if (!zend_jit_fetch_indirect_var(&dasm_state, opline, orig_op1_type, + if (!zend_jit_fetch_indirect_var(&ctx, opline, orig_op1_type, &op1_info, &op1_addr, !ssa->var_info[ssa_op->op1_use].indirect_reference)) { goto jit_failure; } @@ -4846,7 +5462,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par } if (orig_op1_type != IS_UNKNOWN && (orig_op1_type & IS_TRACE_REFERENCE)) { - if (!zend_jit_fetch_reference(&dasm_state, opline, orig_op1_type, &op1_info, &op1_addr, + if (!zend_jit_fetch_reference(&ctx, opline, orig_op1_type, &op1_info, &op1_addr, !ssa->var_info[ssa_op->op1_use].guarded_reference, 1)) { goto jit_failure; } @@ -4882,7 +5498,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par } op1_data_info = OP1_DATA_INFO(); CHECK_OP1_DATA_TRACE_TYPE(); - if (!zend_jit_assign_obj(&dasm_state, opline, op_array, ssa, ssa_op, + if (!zend_jit_assign_obj(&ctx, opline, op_array, ssa, ssa_op, op1_info, op1_addr, op1_data_info, op1_indirect, ce, ce_is_instanceof, on_this, delayed_fetch_this, op1_ce, val_type, @@ -4908,7 +5524,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par if (orig_op1_type != IS_UNKNOWN && (orig_op1_type & IS_TRACE_INDIRECT) && opline->result_type == IS_UNUSED) { - if (!zend_jit_fetch_indirect_var(&dasm_state, opline, orig_op1_type, + if (!zend_jit_fetch_indirect_var(&ctx, opline, orig_op1_type, &op1_info, &op1_addr, !ssa->var_info[ssa_op->op1_use].indirect_reference)) { goto jit_failure; } @@ -4918,7 +5534,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par } if (orig_op1_type != IS_UNKNOWN && (orig_op1_type & IS_TRACE_REFERENCE)) { - if (!zend_jit_fetch_reference(&dasm_state, opline, orig_op1_type, &op1_info, &op1_addr, + if (!zend_jit_fetch_reference(&ctx, opline, orig_op1_type, &op1_info, &op1_addr, !ssa->var_info[ssa_op->op1_use].guarded_reference, 1)) { goto jit_failure; } @@ -4933,7 +5549,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par CHECK_OP2_TRACE_TYPE(); op1_data_info = OP1_DATA_INFO(); CHECK_OP1_DATA_TRACE_TYPE(); - if (!zend_jit_assign_dim(&dasm_state, opline, + if (!zend_jit_assign_dim(&ctx, opline, op1_info, op1_addr, op2_info, op1_data_info, val_type, zend_may_throw_ex(opline, ssa_op, op_array, ssa, op1_info, op2_info))) { goto jit_failure; @@ -4950,6 +5566,10 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par } op2_addr = OP2_REG_ADDR(); op2_info = OP2_INFO(); +#ifdef ZEND_JIT_IR + zend_jit_addr ref_addr = 0; +#endif + if (ra && ssa_op->op2_def >= 0 && (!ssa->vars[ssa_op->op2_def].no_val @@ -4965,7 +5585,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par if (op1_type != IS_UNKNOWN && (op1_info & MAY_BE_GUARD)) { if (op1_type < IS_STRING && (op1_info & (MAY_BE_ANY|MAY_BE_UNDEF)) != (op1_def_info & (MAY_BE_ANY|MAY_BE_UNDEF))) { - if (!zend_jit_scalar_type_guard(&dasm_state, opline, opline->op1.var)) { + if (!zend_jit_scalar_type_guard(&ctx, opline, opline->op1.var)) { goto jit_failure; } op1_info &= ~(MAY_BE_STRING|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE|MAY_BE_REF|MAY_BE_GUARD); @@ -4983,10 +5603,18 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par } if (orig_op1_type != IS_UNKNOWN) { if (orig_op1_type & IS_TRACE_REFERENCE) { - if (!zend_jit_fetch_reference(&dasm_state, opline, orig_op1_type, &op1_info, &op1_addr, +#ifndef ZEND_JIT_IR + if (!zend_jit_fetch_reference(&ctx, opline, orig_op1_type, &op1_info, &op1_addr, !ssa->var_info[ssa_op->op1_use].guarded_reference, 0)) { goto jit_failure; } +#else + if (!zend_jit_guard_reference(&ctx, opline, &op1_addr, &ref_addr, + !ssa->var_info[ssa_op->op1_use].guarded_reference)) { + goto jit_failure; + } + op1_info &= ~MAY_BE_REF; +#endif if (opline->op1_type == IS_CV && ssa->vars[ssa_op->op1_def].alias == NO_ALIAS) { ssa->var_info[ssa_op->op1_def].guarded_reference = 1; @@ -4999,18 +5627,20 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par && zend_jit_trace_next_is_send_result(opline, p, frame)) { send_result = 1; res_addr = ZEND_ADDR_MEM_ZVAL(ZREG_RX, (opline+1)->result.var); - if (!zend_jit_reuse_ip(&dasm_state)) { + if (!zend_jit_reuse_ip(&ctx)) { goto jit_failure; } } } - if (!zend_jit_assign_to_typed_ref(&dasm_state, opline, opline->op2_type, op2_addr, res_addr, 1)) { +#ifndef ZEND_JIT_IR + if (!zend_jit_assign_to_typed_ref(&ctx, opline, opline->op2_type, op2_addr, res_addr, 1)) { goto jit_failure; } +#endif op1_def_addr = op1_addr; op1_def_info &= ~MAY_BE_REF; } else if (op1_info & MAY_BE_REF) { - if (!zend_jit_noref_guard(&dasm_state, opline, op1_addr)) { + if (!zend_jit_noref_guard(&ctx, opline, op1_addr)) { goto jit_failure; } op1_info &= ~MAY_BE_REF; @@ -5027,16 +5657,19 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par && zend_jit_trace_next_is_send_result(opline, p, frame)) { send_result = 1; res_addr = ZEND_ADDR_MEM_ZVAL(ZREG_RX, (opline+1)->result.var); - if (!zend_jit_reuse_ip(&dasm_state)) { + if (!zend_jit_reuse_ip(&ctx)) { goto jit_failure; } } } - if (!zend_jit_assign(&dasm_state, opline, + if (!zend_jit_assign(&ctx, opline, op1_info, op1_addr, op1_def_info, op1_def_addr, op2_info, op2_addr, op2_def_addr, res_info, res_addr, +#ifdef ZEND_JIT_IR + ref_addr, +#endif zend_may_throw_ex(opline, ssa_op, op_array, ssa, op1_info, op2_info))) { goto jit_failure; } @@ -5064,8 +5697,10 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par CHECK_OP1_TRACE_TYPE(); res_info = RES_INFO(); res_use_info = zend_jit_trace_type_to_info( - STACK_MEM_TYPE(stack, EX_VAR_TO_NUM(opline->result.var))) - & (MAY_BE_UNDEF|MAY_BE_NULL|MAY_BE_FALSE|MAY_BE_TRUE|MAY_BE_LONG|MAY_BE_DOUBLE); + STACK_MEM_TYPE(stack, EX_VAR_TO_NUM(opline->result.var))); + if (opline->result_type == IS_CV) { + res_use_info &= (MAY_BE_UNDEF|MAY_BE_NULL|MAY_BE_FALSE|MAY_BE_TRUE|MAY_BE_LONG|MAY_BE_DOUBLE); + } res_addr = RES_REG_ADDR(); if (Z_MODE(res_addr) != IS_REG && STACK_TYPE(stack, EX_VAR_TO_NUM(opline->result.var)) != @@ -5073,7 +5708,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par /* type may be not set */ res_use_info |= MAY_BE_NULL; } - if (!zend_jit_qm_assign(&dasm_state, opline, + if (!zend_jit_qm_assign(&ctx, opline, op1_info, op1_addr, op1_def_addr, res_use_info, res_info, res_addr)) { goto jit_failure; @@ -5088,7 +5723,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par case ZEND_INIT_FCALL_BY_NAME: case ZEND_INIT_NS_FCALL_BY_NAME: frame_flags = TRACE_FRAME_MASK_NESTED; - if (!zend_jit_init_fcall(&dasm_state, opline, op_array_ssa->cfg.map ? op_array_ssa->cfg.map[opline - op_array->opcodes] : -1, op_array, ssa, ssa_op, frame->call_level, p + 1, peek_checked_stack - checked_stack)) { + if (!zend_jit_init_fcall(&ctx, opline, op_array_ssa->cfg.map ? op_array_ssa->cfg.map[opline - op_array->opcodes] : -1, op_array, ssa, ssa_op, frame->call_level, p + 1, peek_checked_stack - checked_stack)) { goto jit_failure; } goto done; @@ -5104,7 +5739,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par } op1_info = OP1_INFO(); CHECK_OP1_TRACE_TYPE(); - if (!zend_jit_send_val(&dasm_state, opline, + if (!zend_jit_send_val(&ctx, opline, op1_info, OP1_REG_ADDR())) { goto jit_failure; } @@ -5125,7 +5760,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par break; } op1_info = OP1_INFO(); - if (!zend_jit_send_ref(&dasm_state, opline, op_array, + if (!zend_jit_send_ref(&ctx, opline, op_array, op1_info, 0)) { goto jit_failure; } @@ -5151,14 +5786,16 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par op1_addr = OP1_REG_ADDR(); if (ra && ssa_op->op1_def >= 0 - && !ssa->vars[ssa_op->op1_def].no_val) { + && (!ssa->vars[ssa_op->op1_def].no_val + || STACK_MEM_TYPE(stack, EX_VAR_TO_NUM(opline->op1.var)) == IS_UNKNOWN + || STACK_MEM_TYPE(stack, EX_VAR_TO_NUM(opline->op1.var)) >= IS_STRING)) { op1_def_addr = OP1_DEF_REG_ADDR(); } else { op1_def_addr = op1_addr; } op1_info = OP1_INFO(); CHECK_OP1_TRACE_TYPE(); - if (!zend_jit_send_var(&dasm_state, opline, op_array, + if (!zend_jit_send_var(&ctx, opline, op_array, op1_info, op1_addr, op1_def_addr)) { goto jit_failure; } @@ -5193,7 +5830,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par TRACE_FRAME_SET_LAST_SEND_UNKNOWN(JIT_G(current_frame)->call); break; } - if (!zend_jit_check_func_arg(&dasm_state, opline)) { + if (!zend_jit_check_func_arg(&ctx, opline)) { goto jit_failure; } goto done; @@ -5202,7 +5839,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par && JIT_G(current_frame)->call) { TRACE_FRAME_SET_UNKNOWN_NUM_ARGS(JIT_G(current_frame)->call); } - if (!zend_jit_check_undef_args(&dasm_state, opline)) { + if (!zend_jit_check_undef_args(&ctx, opline)) { goto jit_failure; } goto done; @@ -5210,7 +5847,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par case ZEND_DO_ICALL: case ZEND_DO_FCALL_BY_NAME: case ZEND_DO_FCALL: - if (!zend_jit_do_fcall(&dasm_state, opline, op_array, op_array_ssa, frame->call_level, -1, p + 1)) { + if (!zend_jit_do_fcall(&ctx, opline, op_array, op_array_ssa, frame->call_level, -1, p + 1)) { goto jit_failure; } goto done; @@ -5234,7 +5871,11 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par uint32_t exit_point; if (ra) { - zend_jit_trace_clenup_stack(stack, opline, ssa_op, ssa, ra); +#ifndef ZEND_JIT_IR + zend_jit_trace_cleanup_stack(stack, opline, ssa_op, ssa, ra); +#else + zend_jit_trace_cleanup_stack(&ctx, stack, opline, ssa_op, ssa, ssa_opcodes); +#endif } exit_point = zend_jit_trace_get_exit_point(exit_opline, 0); exit_addr = zend_jit_trace_get_exit_addr(exit_point); @@ -5242,7 +5883,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par goto jit_failure; } smart_branch_opcode = exit_if_true ? ZEND_JMPNZ : ZEND_JMPZ; - if (!zend_jit_cmp(&dasm_state, opline, + if (!zend_jit_cmp(&ctx, opline, op1_info, OP1_RANGE(), OP1_REG_ADDR(), op2_info, OP2_RANGE(), OP2_REG_ADDR(), RES_REG_ADDR(), @@ -5254,7 +5895,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par } else { smart_branch_opcode = 0; exit_addr = NULL; - if (!zend_jit_cmp(&dasm_state, opline, + if (!zend_jit_cmp(&ctx, opline, op1_info, OP1_RANGE(), OP1_REG_ADDR(), op2_info, OP2_RANGE(), OP2_REG_ADDR(), RES_REG_ADDR(), @@ -5282,7 +5923,11 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par uint32_t exit_point; if (ra) { - zend_jit_trace_clenup_stack(stack, opline, ssa_op, ssa, ra); +#ifndef ZEND_JIT_IR + zend_jit_trace_cleanup_stack(stack, opline, ssa_op, ssa, ra); +#else + zend_jit_trace_cleanup_stack(&ctx, stack, opline, ssa_op, ssa, ssa_opcodes); +#endif } exit_point = zend_jit_trace_get_exit_point(exit_opline, 0); exit_addr = zend_jit_trace_get_exit_addr(exit_point); @@ -5293,7 +5938,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par exit_if_true = !exit_if_true; } smart_branch_opcode = exit_if_true ? ZEND_JMPNZ : ZEND_JMPZ; - if (!zend_jit_identical(&dasm_state, opline, + if (!zend_jit_identical(&ctx, opline, op1_info, OP1_RANGE(), OP1_REG_ADDR(), op2_info, OP2_RANGE(), OP2_REG_ADDR(), RES_REG_ADDR(), @@ -5305,7 +5950,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par } else { smart_branch_opcode = 0; exit_addr = NULL; - if (!zend_jit_identical(&dasm_state, opline, + if (!zend_jit_identical(&ctx, opline, op1_info, OP1_RANGE(), OP1_REG_ADDR(), op2_info, OP2_RANGE(), OP2_REG_ADDR(), RES_REG_ADDR(), @@ -5330,7 +5975,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par smart_branch_opcode = 0; exit_addr = NULL; } - if (!zend_jit_defined(&dasm_state, opline, smart_branch_opcode, -1, -1, exit_addr)) { + if (!zend_jit_defined(&ctx, opline, smart_branch_opcode, -1, -1, exit_addr)) { goto jit_failure; } goto done; @@ -5347,7 +5992,11 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par uint32_t exit_point; if (ra) { - zend_jit_trace_clenup_stack(stack, opline, ssa_op, ssa, ra); +#ifndef ZEND_JIT_IR + zend_jit_trace_cleanup_stack(stack, opline, ssa_op, ssa, ra); +#else + zend_jit_trace_cleanup_stack(&ctx, stack, opline, ssa_op, ssa, ssa_opcodes); +#endif } exit_point = zend_jit_trace_get_exit_point(exit_opline, 0); exit_addr = zend_jit_trace_get_exit_addr(exit_point); @@ -5359,7 +6008,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par smart_branch_opcode = 0; exit_addr = NULL; } - if (!zend_jit_type_check(&dasm_state, opline, op1_info, smart_branch_opcode, -1, -1, exit_addr)) { + if (!zend_jit_type_check(&ctx, opline, op1_info, smart_branch_opcode, -1, -1, exit_addr)) { goto jit_failure; } goto done; @@ -5376,7 +6025,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par || !op_array->function_name // TODO: support for IS_UNDEF ??? || (op1_info & MAY_BE_UNDEF)) { - if (!zend_jit_trace_handler(&dasm_state, op_array, opline, zend_may_throw(opline, ssa_op, op_array, ssa), p + 1)) { + if (!zend_jit_trace_handler(&ctx, op_array, opline, zend_may_throw(opline, ssa_op, op_array, ssa), p + 1)) { goto jit_failure; } } else { @@ -5384,13 +6033,13 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par int may_throw = 0; bool left_frame = 0; - if (!zend_jit_return(&dasm_state, opline, op_array, + if (!zend_jit_return(&ctx, opline, op_array, op1_info, OP1_REG_ADDR())) { goto jit_failure; } if (op_array->last_var > 100) { /* To many CVs to unroll */ - if (!zend_jit_free_cvs(&dasm_state)) { + if (!zend_jit_free_cvs(&ctx)) { goto jit_failure; } left_frame = 1; @@ -5416,11 +6065,11 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par if (info & (MAY_BE_STRING|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE|MAY_BE_REF)) { if (!left_frame) { left_frame = 1; - if (!zend_jit_leave_frame(&dasm_state)) { + if (!zend_jit_leave_frame(&ctx)) { goto jit_failure; } } - if (!zend_jit_free_cv(&dasm_state, info, j)) { + if (!zend_jit_free_cv(&ctx, info, j)) { goto jit_failure; } if (info & (MAY_BE_OBJECT|MAY_BE_RESOURCE|MAY_BE_ARRAY_OF_OBJECT|MAY_BE_ARRAY_OF_ARRAY|MAY_BE_ARRAY_OF_RESOURCE)) { @@ -5431,7 +6080,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par } } } - if (!zend_jit_leave_func(&dasm_state, op_array, opline, op1_info, left_frame, + if (!zend_jit_leave_func(&ctx, op_array, opline, op1_info, left_frame, p + 1, &zend_jit_traces[ZEND_JIT_TRACE_NUM], (op_array_ssa->cfg.flags & ZEND_FUNC_INDIRECT_VAR_ACCESS) != 0, may_throw)) { goto jit_failure; @@ -5442,7 +6091,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par case ZEND_BOOL_NOT: op1_info = OP1_INFO(); CHECK_OP1_TRACE_TYPE(); - if (!zend_jit_bool_jmpznz(&dasm_state, opline, + if (!zend_jit_bool_jmpznz(&ctx, opline, op1_info, OP1_REG_ADDR(), RES_REG_ADDR(), -1, -1, zend_may_throw(opline, ssa_op, op_array, ssa), @@ -5480,7 +6129,11 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par ZEND_UNREACHABLE(); } if (ra) { - zend_jit_trace_clenup_stack(stack, opline, ssa_op, ssa, ra); +#ifndef ZEND_JIT_IR + zend_jit_trace_cleanup_stack(stack, opline, ssa_op, ssa, ra); +#else + zend_jit_trace_cleanup_stack(&ctx, stack, opline, ssa_op, ssa, ssa_opcodes); +#endif } if (!(op1_info & MAY_BE_GUARD) && has_concrete_type(op1_info) @@ -5513,7 +6166,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par } else { res_addr = RES_REG_ADDR(); } - if (!zend_jit_bool_jmpznz(&dasm_state, opline, + if (!zend_jit_bool_jmpznz(&ctx, opline, op1_info, OP1_REG_ADDR(), res_addr, -1, -1, zend_may_throw(opline, ssa_op, op_array, ssa), @@ -5530,7 +6183,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par op1_addr = OP1_REG_ADDR(); if (orig_op1_type != IS_UNKNOWN && (orig_op1_type & IS_TRACE_REFERENCE)) { - if (!zend_jit_fetch_reference(&dasm_state, opline, orig_op1_type, &op1_info, &op1_addr, + if (!zend_jit_fetch_reference(&ctx, opline, orig_op1_type, &op1_info, &op1_addr, !ssa->var_info[ssa_op->op1_use].guarded_reference, 1)) { goto jit_failure; } @@ -5555,7 +6208,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par smart_branch_opcode = 0; exit_addr = NULL; } - if (!zend_jit_isset_isempty_cv(&dasm_state, opline, + if (!zend_jit_isset_isempty_cv(&ctx, opline, op1_info, op1_addr, smart_branch_opcode, -1, -1, exit_addr)) { goto jit_failure; @@ -5585,7 +6238,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par smart_branch_opcode = 0; exit_addr = NULL; } - if (!zend_jit_in_array(&dasm_state, opline, + if (!zend_jit_in_array(&ctx, opline, op1_info, op1_addr, smart_branch_opcode, -1, -1, exit_addr)) { goto jit_failure; @@ -5606,7 +6259,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par op1_addr = OP1_REG_ADDR(); if (orig_op1_type != IS_UNKNOWN && (orig_op1_type & IS_TRACE_REFERENCE)) { - if (!zend_jit_fetch_reference(&dasm_state, opline, orig_op1_type, &op1_info, &op1_addr, + if (!zend_jit_fetch_reference(&ctx, opline, orig_op1_type, &op1_info, &op1_addr, !ssa->var_info[ssa_op->op1_use].guarded_reference, 1)) { goto jit_failure; } @@ -5646,7 +6299,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par } } } - if (!zend_jit_fetch_dim_read(&dasm_state, opline, ssa, ssa_op, + if (!zend_jit_fetch_dim_read(&ctx, opline, ssa, ssa_op, op1_info, op1_addr, avoid_refcounting, op2_info, res_info, RES_REG_ADDR(), val_type)) { goto jit_failure; @@ -5669,7 +6322,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par if (opline->op1_type == IS_VAR) { if (orig_op1_type != IS_UNKNOWN && (orig_op1_type & IS_TRACE_INDIRECT)) { - if (!zend_jit_fetch_indirect_var(&dasm_state, opline, orig_op1_type, + if (!zend_jit_fetch_indirect_var(&ctx, opline, orig_op1_type, &op1_info, &op1_addr, !ssa->var_info[ssa_op->op1_use].indirect_reference)) { goto jit_failure; } @@ -5679,7 +6332,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par } if (orig_op1_type != IS_UNKNOWN && (orig_op1_type & IS_TRACE_REFERENCE)) { - if (!zend_jit_fetch_reference(&dasm_state, opline, orig_op1_type, &op1_info, &op1_addr, + if (!zend_jit_fetch_reference(&ctx, opline, orig_op1_type, &op1_info, &op1_addr, !ssa->var_info[ssa_op->op1_use].guarded_reference, 1)) { goto jit_failure; } @@ -5693,7 +6346,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par op2_info = OP2_INFO(); CHECK_OP2_TRACE_TYPE(); op1_def_info = OP1_DEF_INFO(); - if (!zend_jit_fetch_dim(&dasm_state, opline, + if (!zend_jit_fetch_dim(&ctx, opline, op1_info, op1_addr, op2_info, RES_REG_ADDR(), val_type)) { goto jit_failure; } @@ -5713,7 +6366,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par op1_addr = OP1_REG_ADDR(); if (orig_op1_type != IS_UNKNOWN && (orig_op1_type & IS_TRACE_REFERENCE)) { - if (!zend_jit_fetch_reference(&dasm_state, opline, orig_op1_type, &op1_info, &op1_addr, + if (!zend_jit_fetch_reference(&ctx, opline, orig_op1_type, &op1_info, &op1_addr, !ssa->var_info[ssa_op->op1_use].guarded_reference, 1)) { goto jit_failure; } @@ -5732,7 +6385,11 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par uint32_t exit_point; if (ra) { - zend_jit_trace_clenup_stack(stack, opline, ssa_op, ssa, ra); +#ifndef ZEND_JIT_IR + zend_jit_trace_cleanup_stack(stack, opline, ssa_op, ssa, ra); +#else + zend_jit_trace_cleanup_stack(&ctx, stack, opline, ssa_op, ssa, ssa_opcodes); +#endif } if (ssa_op->op1_use >= 0 && ssa->var_info[ssa_op->op1_use].avoid_refcounting) { @@ -5772,7 +6429,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par op1_info &= ~MAY_BE_ARRAY_PACKED; } } - if (!zend_jit_isset_isempty_dim(&dasm_state, opline, + if (!zend_jit_isset_isempty_dim(&ctx, opline, op1_info, op1_addr, avoid_refcounting, op2_info, val_type, zend_may_throw_ex(opline, ssa_op, op_array, ssa, op1_info, op2_info), @@ -5819,7 +6476,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par if (orig_op1_type != IS_UNKNOWN && (orig_op1_type & IS_TRACE_INDIRECT)) { op1_indirect = 1; - if (!zend_jit_fetch_indirect_var(&dasm_state, opline, orig_op1_type, + if (!zend_jit_fetch_indirect_var(&ctx, opline, orig_op1_type, &op1_info, &op1_addr, !ssa->var_info[ssa_op->op1_use].indirect_reference)) { goto jit_failure; } @@ -5827,7 +6484,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par } if (orig_op1_type != IS_UNKNOWN && (orig_op1_type & IS_TRACE_REFERENCE)) { - if (!zend_jit_fetch_reference(&dasm_state, opline, orig_op1_type, &op1_info, &op1_addr, + if (!zend_jit_fetch_reference(&ctx, opline, orig_op1_type, &op1_info, &op1_addr, !ssa->var_info[ssa_op->op1_use].guarded_reference, 1)) { goto jit_failure; } @@ -5865,7 +6522,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par on_this = op_array->opcodes[op_array_ssa->vars[op_array_ssa->ops[opline-op_array->opcodes].op1_use].definition].opcode == ZEND_FETCH_THIS; } } - if (!zend_jit_fetch_obj(&dasm_state, opline, op_array, ssa, ssa_op, + if (!zend_jit_fetch_obj(&ctx, opline, op_array, ssa, ssa_op, op1_info, op1_addr, op1_indirect, ce, ce_is_instanceof, on_this, delayed_fetch_this, avoid_refcounting, op1_ce, val_type, zend_may_throw_ex(opline, ssa_op, op_array, ssa, op1_info, MAY_BE_STRING))) { @@ -5884,7 +6541,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par if (ssa->vars[ssa_op->op1_def].alias == NO_ALIAS) { ssa->var_info[ssa_op->op1_def].guarded_reference = 1; } - if (!zend_jit_bind_global(&dasm_state, opline, op1_info)) { + if (!zend_jit_bind_global(&ctx, opline, op1_info)) { goto jit_failure; } if ((opline+1)->opcode == ZEND_BIND_GLOBAL) { @@ -5898,7 +6555,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par ssa_op = orig_ssa_op; goto done; case ZEND_RECV: - if (!zend_jit_recv(&dasm_state, opline, op_array)) { + if (!zend_jit_recv(&ctx, opline, op_array)) { goto jit_failure; } goto done; @@ -5906,7 +6563,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par orig_opline = opline; orig_ssa_op = ssa_op; while (1) { - if (!zend_jit_recv_init(&dasm_state, opline, op_array, + if (!zend_jit_recv_init(&ctx, opline, op_array, (opline + 1)->opcode != ZEND_RECV_INIT, zend_may_throw(opline, ssa_op, op_array, ssa))) { goto jit_failure; @@ -5924,7 +6581,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par case ZEND_FREE: case ZEND_FE_FREE: op1_info = OP1_INFO(); - if (!zend_jit_free(&dasm_state, opline, op1_info, + if (!zend_jit_free(&ctx, opline, op1_info, zend_may_throw(opline, ssa_op, op_array, ssa))) { goto jit_failure; } @@ -5935,7 +6592,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par if ((op1_info & (MAY_BE_UNDEF|MAY_BE_ANY|MAY_BE_REF)) != MAY_BE_STRING) { break; } - if (!zend_jit_echo(&dasm_state, opline, op1_info)) { + if (!zend_jit_echo(&ctx, opline, op1_info)) { goto jit_failure; } goto done; @@ -5943,7 +6600,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par op1_info = OP1_INFO(); op1_addr = OP1_REG_ADDR(); if (orig_op1_type == (IS_TRACE_REFERENCE|IS_STRING)) { - if (!zend_jit_fetch_reference(&dasm_state, opline, orig_op1_type, &op1_info, &op1_addr, + if (!zend_jit_fetch_reference(&ctx, opline, orig_op1_type, &op1_info, &op1_addr, !ssa->var_info[ssa_op->op1_use].guarded_reference, 1)) { goto jit_failure; } @@ -5957,7 +6614,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par break; } } - if (!zend_jit_strlen(&dasm_state, opline, op1_info, op1_addr, RES_REG_ADDR())) { + if (!zend_jit_strlen(&ctx, opline, op1_info, op1_addr, RES_REG_ADDR())) { goto jit_failure; } goto done; @@ -5965,7 +6622,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par op1_info = OP1_INFO(); op1_addr = OP1_REG_ADDR(); if (orig_op1_type == (IS_TRACE_REFERENCE|IS_ARRAY)) { - if (!zend_jit_fetch_reference(&dasm_state, opline, orig_op1_type, &op1_info, &op1_addr, + if (!zend_jit_fetch_reference(&ctx, opline, orig_op1_type, &op1_info, &op1_addr, !ssa->var_info[ssa_op->op1_use].guarded_reference, 1)) { goto jit_failure; } @@ -5979,7 +6636,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par break; } } - if (!zend_jit_count(&dasm_state, opline, op1_info, op1_addr, RES_REG_ADDR(), zend_may_throw(opline, ssa_op, op_array, ssa))) { + if (!zend_jit_count(&ctx, opline, op1_info, op1_addr, RES_REG_ADDR(), zend_may_throw(opline, ssa_op, op_array, ssa))) { goto jit_failure; } goto done; @@ -5991,14 +6648,14 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par delayed_fetch_this = 1; } } - if (!zend_jit_fetch_this(&dasm_state, opline, op_array, delayed_fetch_this)) { + if (!zend_jit_fetch_this(&ctx, opline, op_array, delayed_fetch_this)) { goto jit_failure; } goto done; case ZEND_SWITCH_LONG: case ZEND_SWITCH_STRING: case ZEND_MATCH: - if (!zend_jit_switch(&dasm_state, opline, op_array, op_array_ssa, p+1, &zend_jit_traces[ZEND_JIT_TRACE_NUM])) { + if (!zend_jit_switch(&ctx, opline, op_array, op_array_ssa, p+1, &zend_jit_traces[ZEND_JIT_TRACE_NUM])) { goto jit_failure; } goto done; @@ -6021,7 +6678,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par /* TODO May need reference unwrapping. */ break; } - if (!zend_jit_verify_return_type(&dasm_state, opline, op_array, op1_info)) { + if (!zend_jit_verify_return_type(&ctx, opline, op_array, op1_info)) { goto jit_failure; } goto done; @@ -6031,7 +6688,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par if ((op1_info & (MAY_BE_ANY|MAY_BE_REF|MAY_BE_UNDEF)) != MAY_BE_ARRAY) { break; } - if (!zend_jit_fe_reset(&dasm_state, opline, op1_info)) { + if (!zend_jit_fe_reset(&ctx, opline, op1_info)) { goto jit_failure; } goto done; @@ -6063,13 +6720,13 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par } else { ZEND_UNREACHABLE(); } - if (!zend_jit_fe_fetch(&dasm_state, opline, op1_info, OP2_INFO(), + if (!zend_jit_fe_fetch(&ctx, opline, op1_info, OP2_INFO(), -1, smart_branch_opcode, exit_addr)) { goto jit_failure; } goto done; case ZEND_FETCH_CONSTANT: - if (!zend_jit_fetch_constant(&dasm_state, opline, op_array, ssa, ssa_op, RES_REG_ADDR())) { + if (!zend_jit_fetch_constant(&ctx, opline, op_array, ssa, ssa_op, RES_REG_ADDR())) { goto jit_failure; } goto done; @@ -6092,10 +6749,12 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par op1_addr = OP1_REG_ADDR(); if (polymorphic_side_trace) { op1_info = MAY_BE_OBJECT; +#ifndef ZEND_JIT_IR op1_addr = 0; +#endif } else if (orig_op1_type != IS_UNKNOWN && (orig_op1_type & IS_TRACE_REFERENCE)) { - if (!zend_jit_fetch_reference(&dasm_state, opline, orig_op1_type, &op1_info, &op1_addr, + if (!zend_jit_fetch_reference(&ctx, opline, orig_op1_type, &op1_info, &op1_addr, !ssa->var_info[ssa_op->op1_use].guarded_reference, 1)) { goto jit_failure; } @@ -6130,11 +6789,16 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par } } frame_flags = TRACE_FRAME_MASK_NESTED; - if (!zend_jit_init_method_call(&dasm_state, opline, + if (!zend_jit_init_method_call(&ctx, opline, op_array_ssa->cfg.map ? op_array_ssa->cfg.map[opline - op_array->opcodes] : -1, op_array, ssa, ssa_op, frame->call_level, op1_info, op1_addr, ce, ce_is_instanceof, on_this, delayed_fetch_this, op1_ce, - p + 1, peek_checked_stack - checked_stack, polymorphic_side_trace)) { + p + 1, peek_checked_stack - checked_stack, +#ifdef ZEND_JIT_IR + polymorphic_side_trace ? zend_jit_traces[parent_trace].exit_info[exit_num].poly_func_reg : -1, + polymorphic_side_trace ? zend_jit_traces[parent_trace].exit_info[exit_num].poly_this_reg : -1, +#endif + polymorphic_side_trace)) { goto jit_failure; } goto done; @@ -6145,7 +6809,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par op2_info = OP2_INFO(); CHECK_OP2_TRACE_TYPE(); frame_flags = TRACE_FRAME_MASK_NESTED; - if (!zend_jit_init_closure_call(&dasm_state, opline, op_array_ssa->cfg.map ? op_array_ssa->cfg.map[opline - op_array->opcodes] : -1, op_array, ssa, ssa_op, frame->call_level, p + 1, peek_checked_stack - checked_stack)) { + if (!zend_jit_init_closure_call(&ctx, opline, op_array_ssa->cfg.map ? op_array_ssa->cfg.map[opline - op_array->opcodes] : -1, op_array, ssa, ssa_op, frame->call_level, p + 1, peek_checked_stack - checked_stack)) { goto jit_failure; } goto done; @@ -6164,7 +6828,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par if ((op2_info & (MAY_BE_UNDEF|MAY_BE_ANY|MAY_BE_REF)) != MAY_BE_STRING) { break; } - if (!zend_jit_rope(&dasm_state, opline, op2_info)) { + if (!zend_jit_rope(&ctx, opline, op2_info)) { goto jit_failure; } goto done; @@ -6183,17 +6847,17 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par if (op2_info & MAY_BE_GUARD) { op2_info = MAY_BE_RC1 | MAY_BE_RCN | MAY_BE_REF | MAY_BE_ANY | MAY_BE_ARRAY_KEY_ANY | MAY_BE_ARRAY_OF_ANY | MAY_BE_ARRAY_OF_REF; } - if (!zend_jit_trace_handler(&dasm_state, op_array, opline, + if (!zend_jit_trace_handler(&ctx, op_array, opline, zend_may_throw_ex(opline, ssa_op, op_array, ssa, op1_info, op2_info), p + 1)) { goto jit_failure; } if ((p+1)->op == ZEND_JIT_TRACE_INIT_CALL && (p+1)->func) { - if (opline->opcode == ZEND_NEW && ssa_op->result_def >= 0) { + if (opline->opcode == ZEND_NEW && opline->result_type != IS_UNUSED) { SET_STACK_TYPE(stack, EX_VAR_TO_NUM(opline->result.var), IS_OBJECT, 1); } if (zend_jit_may_be_polymorphic_call(opline) || zend_jit_may_be_modified((p+1)->func, op_array)) { - if (!zend_jit_init_fcall_guard(&dasm_state, 0, (p+1)->func, opline+1)) { + if (!zend_jit_init_fcall_guard(&ctx, 0, (p+1)->func, opline+1)) { goto jit_failure; } } @@ -6212,13 +6876,25 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par } if (ra) { - zend_jit_trace_clenup_stack(stack, opline, ssa_op, ssa, ra); +#ifndef ZEND_JIT_IR + zend_jit_trace_cleanup_stack(stack, opline, ssa_op, ssa, ra); +#else + zend_jit_trace_cleanup_stack(&ctx, stack, opline, ssa_op, ssa, ssa_opcodes); +#endif } +#ifndef ZEND_JIT_IR if ((opline->op1_type & (IS_VAR|IS_TMP_VAR)) && STACK_REG(stack, EX_VAR_TO_NUM(opline->op1.var)) > ZREG_NUM) { SET_STACK_REG(stack, EX_VAR_TO_NUM(opline->op1.var), ZREG_NONE); } +#else + if ((opline->op1_type & (IS_VAR|IS_TMP_VAR)) + && STACK_FLAGS(stack, EX_VAR_TO_NUM(opline->op1.var)) & (ZREG_ZVAL_ADDREF|ZREG_THIS)) { + SET_STACK_REG(stack, EX_VAR_TO_NUM(opline->op1.var), ZREG_NONE); + } + +#endif if (opline->opcode == ZEND_ROPE_INIT) { /* clear stack slots used by rope */ @@ -6275,21 +6951,33 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par } } else { SET_STACK_TYPE(stack, EX_VAR_TO_NUM(opline->result.var), type, - (gen_handler || type == IS_UNKNOWN || !ra || !ra[ssa_op->result_def])); + (gen_handler || type == IS_UNKNOWN || !ra || !RA_HAS_REG(ssa_op->result_def))); if (ssa->var_info[ssa_op->result_def].type & MAY_BE_INDIRECT) { RESET_STACK_MEM_TYPE(stack, EX_VAR_TO_NUM(opline->result.var)); } if (type != IS_UNKNOWN) { ssa->var_info[ssa_op->result_def].type &= ~MAY_BE_GUARD; +#ifndef ZEND_JIT_IR if (opline->opcode == ZEND_FETCH_THIS && delayed_fetch_this) { SET_STACK_REG(stack, EX_VAR_TO_NUM(opline->result.var), ZREG_THIS); } else if (ssa->var_info[ssa_op->result_def].avoid_refcounting) { SET_STACK_REG(stack, EX_VAR_TO_NUM(opline->result.var), ZREG_ZVAL_TRY_ADDREF); - } else if (ra && ra[ssa_op->result_def]) { + } else if (ra && RA_HAS_REG(ssa_op->result_def)) { SET_STACK_REG_EX(stack, EX_VAR_TO_NUM(opline->result.var), ra[ssa_op->result_def]->reg, - ra[ssa_op->result_def]->flags & ZREG_STORE); + RA_REG_FLAGS(ssa_op->result_def) & ZREG_STORE); + } +#else + if (opline->opcode == ZEND_FETCH_THIS + && delayed_fetch_this) { + SET_STACK_REG_EX(stack, EX_VAR_TO_NUM(opline->result.var), ZREG_NONE, ZREG_THIS); + } else if (ssa->var_info[ssa_op->result_def].avoid_refcounting) { + SET_STACK_REG_EX(stack, EX_VAR_TO_NUM(opline->result.var), ZREG_NONE, ZREG_ZVAL_ADDREF); + } else if (ra && RA_HAS_REG(ssa_op->result_def)) { + SET_STACK_REF_EX(stack, EX_VAR_TO_NUM(opline->result.var), ra[ssa_op->result_def].ref, + RA_REG_FLAGS(ssa_op->result_def) & ZREG_STORE); } +#endif } } @@ -6331,12 +7019,12 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par } SET_STACK_TYPE(stack, EX_VAR_TO_NUM(opline->op1.var), type, (gen_handler || type == IS_UNKNOWN || !ra || - (!ra[ssa_op->op1_def] && + (!RA_HAS_REG(ssa_op->op1_def) && (opline->opcode == ZEND_ASSIGN || !ssa->vars[ssa_op->op1_def].no_val)))); if (type != IS_UNKNOWN) { ssa->var_info[ssa_op->op1_def].type &= ~MAY_BE_GUARD; - if (ra && ra[ssa_op->op1_def]) { - uint8_t flags = ra[ssa_op->op1_def]->flags & ZREG_STORE; + if (ra && RA_HAS_REG(ssa_op->op1_def)) { + uint8_t flags = RA_REG_FLAGS(ssa_op->op1_def) & ZREG_STORE; if (ssa_op->op1_use >= 0) { if (opline->opcode == ZEND_SEND_VAR @@ -6346,12 +7034,16 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par || opline->opcode == ZEND_COALESCE || opline->opcode == ZEND_JMP_NULL || opline->opcode == ZEND_FE_RESET_R) { - if (!ra[ssa_op->op1_use]) { + if (!RA_HAS_REG(ssa_op->op1_use)) { flags |= ZREG_LOAD; } } } +#ifndef ZEND_JIT_IR SET_STACK_REG_EX(stack, EX_VAR_TO_NUM(opline->op1.var), ra[ssa_op->op1_def]->reg, flags); +#else + SET_STACK_REF_EX(stack, EX_VAR_TO_NUM(opline->op1.var), ra[ssa_op->op1_def].ref, flags); +#endif } } if (type == IS_LONG @@ -6378,21 +7070,28 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par } SET_STACK_TYPE(stack, EX_VAR_TO_NUM(opline->op2.var), type, (gen_handler || type == IS_UNKNOWN || !ra || - (!ra[ssa_op->op2_def] /*&& !ssa->vars[ssa_op->op2_def].no_val*/))); + (!RA_HAS_REG(ssa_op->op2_def) /*&& !ssa->vars[ssa_op->op2_def].no_val*/))); if (type != IS_UNKNOWN) { ssa->var_info[ssa_op->op2_def].type &= ~MAY_BE_GUARD; - if (ra && ra[ssa_op->op2_def]) { - uint8_t flags = ra[ssa_op->op2_def]->flags & ZREG_STORE; + if (ra && RA_HAS_REG(ssa_op->op2_def)) { + uint8_t flags = RA_REG_FLAGS(ssa_op->op2_def) & ZREG_STORE; if (ssa_op->op2_use >= 0) { if (opline->opcode == ZEND_ASSIGN) { - if (!ra[ssa_op->op2_use] - || ra[ssa_op->op2_use]->reg != ra[ssa_op->op2_def]->reg) { + if (!RA_HAS_REG(ssa_op->op2_use) +#ifndef ZEND_JIT_IR + || ra[ssa_op->op2_use]->reg != ra[ssa_op->op2_def]->reg +#endif + ) { flags |= ZREG_LOAD; } } } +#ifndef ZEND_JIT_IR SET_STACK_REG_EX(stack, EX_VAR_TO_NUM(opline->op2.var), ra[ssa_op->op2_def]->reg, flags); +#else + SET_STACK_REF_EX(stack, EX_VAR_TO_NUM(opline->op2.var), ra[ssa_op->op2_def].ref, flags); +#endif } } if (type == IS_LONG @@ -6430,12 +7129,17 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par type = STACK_TYPE(stack, EX_VAR_TO_NUM(opline->op1.var)); } SET_STACK_TYPE(stack, EX_VAR_TO_NUM(opline->op1.var), type, - (gen_handler || type == IS_UNKNOWN || !ra || !ra[ssa_op->op1_def])); + (gen_handler || type == IS_UNKNOWN || !ra || !RA_HAS_REG(ssa_op->op1_def))); if (type != IS_UNKNOWN) { ssa->var_info[ssa_op->op1_def].type &= ~MAY_BE_GUARD; - if (ra && ra[ssa_op->op1_def]) { + if (ra && RA_HAS_REG(ssa_op->op1_def)) { +#ifndef ZEND_JIT_IR SET_STACK_REG_EX(stack, EX_VAR_TO_NUM(opline->op1.var), ra[ssa_op->op1_def]->reg, - ra[ssa_op->op1_def]->flags & ZREG_STORE); + RA_REG_FLAGS(ssa_op->op1_def) & ZREG_STORE); +#else + SET_STACK_REF_EX(stack, EX_VAR_TO_NUM(opline->op1.var), ra[ssa_op->op1_def].ref, + RA_REG_FLAGS(ssa_op->op1_def) & ZREG_STORE); +#endif } } if (type == IS_LONG @@ -6461,10 +7165,15 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par type = concrete_type(ssa->var_info[ssa_op->result_def].type); } SET_STACK_TYPE(stack, EX_VAR_TO_NUM(opline->result.var), type, - (gen_handler || !ra || !ra[ssa_op->result_def])); - if (ra && ra[ssa_op->result_def]) { + (gen_handler || !ra || !RA_HAS_REG(ssa_op->result_def))); + if (ra && RA_HAS_REG(ssa_op->result_def)) { +#ifndef ZEND_JIT_IR SET_STACK_REG_EX(stack, EX_VAR_TO_NUM(opline->result.var), ra[ssa_op->result_def]->reg, - ra[ssa_op->result_def]->flags & ZREG_STORE); + RA_REG_FLAGS(ssa_op->result_def) & ZREG_STORE); +#else + SET_STACK_REF_EX(stack, EX_VAR_TO_NUM(opline->result.var), ra[ssa_op->result_def].ref, + RA_REG_FLAGS(ssa_op->result_def) & ZREG_STORE); +#endif } } ssa_op++; @@ -6483,10 +7192,15 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par type = concrete_type(ssa->var_info[ssa_op->op1_def].type); } SET_STACK_TYPE(stack, EX_VAR_TO_NUM(opline->op1.var), type, - (gen_handler || !ra || !ra[ssa_op->op1_def])); - if (ra && ra[ssa_op->op1_def]) { + (gen_handler || !ra || !RA_HAS_REG(ssa_op->op1_def))); + if (ra && RA_HAS_REG(ssa_op->op1_def)) { +#ifndef ZEND_JIT_IR SET_STACK_REG_EX(stack, EX_VAR_TO_NUM(opline->op1.var), ra[ssa_op->op1_def]->reg, - ra[ssa_op->op1_def]->flags & ZREG_STORE); + RA_REG_FLAGS(ssa_op->op1_def) & ZREG_STORE); +#else + SET_STACK_REF_EX(stack, EX_VAR_TO_NUM(opline->op1.var), ra[ssa_op->op1_def].ref, + RA_REG_FLAGS(ssa_op->op1_def) & ZREG_STORE); +#endif } } ssa_op++; @@ -6534,7 +7248,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par || (p+1)->op == ZEND_JIT_TRACE_END) && (TRACE_FRAME_NUM_ARGS(call) < 0 || TRACE_FRAME_NUM_ARGS(call) < p->op_array->num_args) - && !zend_jit_trace_opline_guard(&dasm_state, (p+1)->opline)) { + && !zend_jit_trace_opline_guard(&ctx, (p+1)->opline)) { goto jit_failure; } JIT_G(current_frame) = frame; @@ -6553,6 +7267,9 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par TRACE_FRAME_SET_THIS_CHECKED(call); } op_array = (zend_op_array*)p->op_array; +#ifdef ZEND_JIT_IR + ctx.current_op_array = op_array; +#endif jit_extension = (zend_jit_op_array_trace_extension*)ZEND_FUNC_INFO(op_array); op_array_ssa = &jit_extension->func_info.ssa; @@ -6568,27 +7285,37 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par if (ra) { int j = ZEND_JIT_TRACE_GET_FIRST_SSA_VAR(p->info); - for (i = 0; i < op_array->last_var; i++,j++) { - if (ra[j] && (ra[j]->flags & ZREG_LOAD) != 0) { + for (i = 0; i < op_array->last_var; i++, j++) { + if (RA_HAS_REG(j) && (RA_REG_FLAGS(j) & ZREG_LOAD) != 0) { if ((ssa->var_info[j].type & MAY_BE_GUARD) != 0) { uint8_t op_type; ssa->var_info[j].type &= ~MAY_BE_GUARD; op_type = concrete_type(ssa->var_info[j].type); - if (!zend_jit_type_guard(&dasm_state, opline, EX_NUM_TO_VAR(i), op_type)) { + if (!zend_jit_type_guard(&ctx, opline, EX_NUM_TO_VAR(i), op_type)) { goto jit_failure; } SET_STACK_TYPE(stack, i, op_type, 1); } +#ifndef ZEND_JIT_IR + if (!zend_jit_load_var(&ctx, ssa->var_info[j].type, i, ra[j]->reg)) { + goto jit_failure; + } SET_STACK_REG_EX(stack, i, ra[j]->reg, ZREG_LOAD); - if (!zend_jit_load_var(&dasm_state, ssa->var_info[j].type, i, ra[j]->reg)) { +#else + if (!zend_jit_load_var(&ctx, ssa->var_info[j].type, i, j)) { goto jit_failure; } + SET_STACK_REF_EX(stack, i, ra[j].ref, ZREG_LOAD); +#endif } } } } else if (p->op == ZEND_JIT_TRACE_BACK) { op_array = (zend_op_array*)p->op_array; +#ifdef ZEND_JIT_IR + ctx.current_op_array = op_array; +#endif jit_extension = (zend_jit_op_array_trace_extension*)ZEND_FUNC_INFO(op_array); op_array_ssa = &jit_extension->func_info.ssa; @@ -6619,11 +7346,18 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par if (ra) { j = ZEND_JIT_TRACE_GET_FIRST_SSA_VAR(p->info); for (i = 0; i < op_array->last_var + op_array->T; i++, j++) { - if (ra[j] && (ra[j]->flags & ZREG_LOAD) != 0) { + if (RA_HAS_REG(j) && (RA_REG_FLAGS(j) & ZREG_LOAD) != 0) { +#ifndef ZEND_JIT_IR + if (!zend_jit_load_var(&ctx, ssa->var_info[j].type, i, ra[j]->reg)) { + goto jit_failure; + } SET_STACK_REG_EX(stack, i, ra[j]->reg, ZREG_LOAD); - if (!zend_jit_load_var(&dasm_state, ssa->var_info[j].type, i, ra[j]->reg)) { +#else + if (!zend_jit_load_var(&ctx, ssa->var_info[j].type, i, j)) { goto jit_failure; } + SET_STACK_REF_EX(stack, i, ra[j].ref, ZREG_LOAD); +#endif } } } @@ -6761,7 +7495,7 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par opline = q->opline; ZEND_ASSERT(opline != NULL); } - if (!zend_jit_init_fcall_guard(&dasm_state, + if (!zend_jit_init_fcall_guard(&ctx, ZEND_JIT_TRACE_FAKE_LEVEL(p->info), p->func, opline)) { goto jit_failure; } @@ -6814,9 +7548,15 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par t = &zend_jit_traces[ZEND_JIT_TRACE_NUM]; +#ifndef ZEND_JIT_IR if (!parent_trace && zend_jit_trace_uses_initial_ip()) { t->flags |= ZEND_JIT_TRACE_USES_INITIAL_IP; } +#else + if (!parent_trace && zend_jit_trace_uses_initial_ip(&ctx)) { + t->flags |= ZEND_JIT_TRACE_USES_INITIAL_IP; + } +#endif if (p->stop == ZEND_JIT_TRACE_STOP_LOOP || p->stop == ZEND_JIT_TRACE_STOP_RECURSIVE_CALL @@ -6825,21 +7565,22 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par zend_ssa_phi *phi = ssa->blocks[1].phis; while (phi) { - if (ra[phi->ssa_var] - && ra[phi->sources[1]] + if (RA_HAS_REG(phi->sources[1]) && STACK_MEM_TYPE(stack, phi->var) != STACK_TYPE(stack, phi->var) - && (ra[phi->ssa_var]->flags & (ZREG_LOAD|ZREG_STORE)) == 0 - && (ra[phi->sources[1]]->flags & (ZREG_LOAD|ZREG_STORE)) == 0) { - /* Store actual type to memory to avoid deoptimization mistakes */ - /* TODO: Alternatively, we may try to update alredy generated deoptimization info */ - zend_jit_store_var_type(&dasm_state, phi->var, STACK_TYPE(stack, phi->var)); + && (RA_REG_FLAGS(phi->sources[1]) & (ZREG_LOAD|ZREG_STORE)) == 0) { + + if (!RA_HAS_REG(phi->ssa_var) + || (RA_REG_FLAGS(phi->ssa_var) & (ZREG_LOAD|ZREG_STORE)) == 0) { + /* Store actual type to memory to avoid deoptimization mistakes */ + zend_jit_store_var_type(&ctx, phi->var, STACK_TYPE(stack, phi->var)); + } } phi = phi->next; } } if (p->stop != ZEND_JIT_TRACE_STOP_RECURSIVE_RET) { if ((t->flags & ZEND_JIT_TRACE_USES_INITIAL_IP) - && !zend_jit_set_ip(&dasm_state, p->opline)) { + && !zend_jit_set_ip(&ctx, p->opline)) { goto jit_failure; } } @@ -6864,18 +7605,44 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par goto jit_failure; } } else { +#ifndef ZEND_JIT_IR timeout_exit_addr = dasm_labels[zend_lbinterrupt_handler]; +#else + timeout_exit_addr = zend_jit_stub_handlers[jit_stub_interrupt_handler]; +#endif } } - zend_jit_trace_end_loop(&dasm_state, 0, timeout_exit_addr); /* jump back to start of the trace loop */ +#ifndef ZEND_JIT_IR + zend_jit_trace_end_loop(&ctx, 0, timeout_exit_addr); /* jump back to start of the trace loop */ +#else + zend_jit_trace_end_loop(&ctx, jit->trace_loop_ref, timeout_exit_addr); /* jump back to start of the trace loop */ +#endif } } else if (p->stop == ZEND_JIT_TRACE_STOP_LINK || p->stop == ZEND_JIT_TRACE_STOP_INTERPRETER) { - if (!zend_jit_trace_deoptimization(&dasm_state, 0, NULL, +#ifndef ZEND_JIT_IR + if (!zend_jit_trace_deoptimization(&ctx, 0, NULL, stack, op_array->last_var + op_array->T, NULL, NULL, NULL, 0)) { goto jit_failure; } +#else + if (ra && (p-1)->op != ZEND_JIT_TRACE_ENTER) { + for (i = 0; i < op_array->last_var + op_array->T; i++) { + int32_t ref = STACK_REF(stack, i); + + if (ref) { + uint8_t type = STACK_TYPE(stack, i); + + if (!(STACK_FLAGS(stack, i) & (ZREG_LOAD|ZREG_STORE)) + && !zend_jit_store_ref(jit, 1 << type, i, ref, STACK_MEM_TYPE(stack, i) != type)) { + goto jit_failure; + } + } + CLEAR_STACK_REF(stack, i); + } + } +#endif if (p->stop == ZEND_JIT_TRACE_STOP_LINK) { const void *timeout_exit_addr = NULL; @@ -6890,12 +7657,18 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par goto jit_failure; } if ((zend_jit_traces[t->link].flags & ZEND_JIT_TRACE_USES_INITIAL_IP) - && !zend_jit_set_ip(&dasm_state, p->opline)) { + && !zend_jit_set_ip(&ctx, p->opline)) { goto jit_failure; } +#ifndef ZEND_JIT_IR if (!parent_trace && zend_jit_trace_uses_initial_ip()) { t->flags |= ZEND_JIT_TRACE_USES_INITIAL_IP; } +#else + if (!parent_trace && zend_jit_trace_uses_initial_ip(&ctx)) { + t->flags |= ZEND_JIT_TRACE_USES_INITIAL_IP; + } +#endif if (parent_trace && (zend_jit_traces[t->link].flags & ZEND_JIT_TRACE_CHECK_INTERRUPT) && zend_jit_traces[parent_trace].root == t->link) { @@ -6911,15 +7684,19 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par goto jit_failure; } } else { +#ifndef ZEND_JIT_IR timeout_exit_addr = dasm_labels[zend_lbinterrupt_handler]; +#else + timeout_exit_addr = zend_jit_stub_handlers[jit_stub_interrupt_handler]; +#endif } } - zend_jit_trace_link_to_root(&dasm_state, &zend_jit_traces[t->link], timeout_exit_addr); + zend_jit_trace_link_to_root(&ctx, &zend_jit_traces[t->link], timeout_exit_addr); } else { - zend_jit_trace_return(&dasm_state, 0, NULL); + zend_jit_trace_return(&ctx, 0, NULL); } } else if (p->stop == ZEND_JIT_TRACE_STOP_RETURN) { - zend_jit_trace_return(&dasm_state, 0, NULL); + zend_jit_trace_return(&ctx, 0, NULL); } else { // TODO: not implemented ??? ZEND_ASSERT(0 && p->stop); @@ -6929,13 +7706,17 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par goto jit_failure; } - if (!zend_jit_trace_end(&dasm_state, t)) { +#ifndef ZEND_JIT_IR + if (!zend_jit_trace_end(&ctx, t)) { goto jit_failure; } - handler = dasm_link_and_encode(&dasm_state, NULL, NULL, NULL, NULL, ZSTR_VAL(name), ZEND_JIT_TRACE_NUM, + handler = dasm_link_and_encode(&ctx, NULL, NULL, NULL, NULL, ZSTR_VAL(name), ZEND_JIT_TRACE_NUM, parent_trace ? SP_ADJ_JIT : ((zend_jit_vm_kind == ZEND_VM_KIND_HYBRID) ? SP_ADJ_VM : SP_ADJ_RET), parent_trace ? SP_ADJ_NONE : SP_ADJ_JIT); +#else + handler = zend_jit_finish(&ctx); +#endif if (handler) { if (p->stop == ZEND_JIT_TRACE_STOP_RECURSIVE_CALL) { @@ -6991,7 +7772,11 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par } jit_failure: - dasm_free(&dasm_state); +#ifndef ZEND_JIT_IR + dasm_free(&ctx); +#else + zend_jit_free_ctx(&ctx); +#endif if (name) { zend_string_release(name); @@ -7020,57 +7805,117 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par return handler; } +#ifdef ZEND_JIT_IR +static zend_string *zend_jit_trace_escape_name(uint32_t trace_num, uint32_t exit_num) +{ + smart_str buf = {0}; + + smart_str_appends(&buf," ESCAPE-"); + smart_str_append_long(&buf, (zend_long)trace_num); + smart_str_appendc(&buf, '-'); + smart_str_append_long(&buf, (zend_long)exit_num); + smart_str_0(&buf); + return buf.s; +} +#endif + static const void *zend_jit_trace_exit_to_vm(uint32_t trace_num, uint32_t exit_num) { const void *handler = NULL; - dasm_State* dasm_state = NULL; - void *checkpoint; +#ifndef ZEND_JIT_IR + dasm_State* ctx = NULL; char name[32]; +#else + zend_jit_ctx ctx; + zend_string *name; +#endif + void *checkpoint; const zend_op *opline; uint32_t stack_size; zend_jit_trace_stack *stack; bool original_handler = 0; if (!zend_jit_trace_exit_needs_deoptimization(trace_num, exit_num)) { +#ifndef ZEND_JIT_IR return dasm_labels[zend_lbtrace_escape]; +#else + return zend_jit_stub_handlers[jit_stub_trace_escape]; +#endif } +#ifndef ZEND_JIT_IR checkpoint = zend_arena_checkpoint(CG(arena));; sprintf(name, "ESCAPE-%d-%d", trace_num, exit_num); - dasm_init(&dasm_state, DASM_MAXSECTION); - dasm_setupglobal(&dasm_state, dasm_labels, zend_lb_MAX); - dasm_setup(&dasm_state, dasm_actions); + dasm_init(&ctx, DASM_MAXSECTION); + dasm_setupglobal(&ctx, dasm_labels, zend_lb_MAX); + dasm_setup(&ctx, dasm_actions); - zend_jit_align_func(&dasm_state); + zend_jit_align_func(&ctx); +#else + name = zend_jit_trace_escape_name(trace_num, exit_num); + + if (!zend_jit_deoptimizer_start(&ctx, name, trace_num, exit_num)) { + zend_string_release(name); + return NULL; + } + + checkpoint = zend_arena_checkpoint(CG(arena));; +#endif /* Deoptimization */ stack_size = zend_jit_traces[trace_num].exit_info[exit_num].stack_size; stack = zend_jit_traces[trace_num].stack_map + zend_jit_traces[trace_num].exit_info[exit_num].stack_offset; - if (!zend_jit_trace_deoptimization(&dasm_state, + if (!zend_jit_trace_deoptimization(&ctx, zend_jit_traces[trace_num].exit_info[exit_num].flags, zend_jit_traces[trace_num].exit_info[exit_num].opline, - stack, stack_size, NULL, NULL, NULL, 0)) { + stack, stack_size, NULL, NULL, +#ifndef ZEND_JIT_IR + NULL, +#else + zend_jit_traces[trace_num].constants, + zend_jit_traces[trace_num].exit_info[exit_num].poly_func_reg, +#endif + 0)) { goto jit_failure; } opline = zend_jit_traces[trace_num].exit_info[exit_num].opline; if (opline) { if (opline == zend_jit_traces[zend_jit_traces[trace_num].root].opline) { +#ifndef ZEND_JIT_IR /* prevent endless loop */ original_handler = 1; +#else + zend_jit_op_array_trace_extension *jit_extension = + (zend_jit_op_array_trace_extension*)ZEND_FUNC_INFO(zend_jit_traces[zend_jit_traces[trace_num].root].op_array); + + if (ZEND_OP_TRACE_INFO(opline, jit_extension->offset)->orig_handler != opline->handler) { + /* prevent endless loop */ + original_handler = 1; + } +#endif } - zend_jit_set_ip_ex(&dasm_state, opline, original_handler); + zend_jit_set_ip_ex(&ctx, opline, original_handler); } - zend_jit_trace_return(&dasm_state, original_handler, opline); + zend_jit_trace_return(&ctx, original_handler, opline); - handler = dasm_link_and_encode(&dasm_state, NULL, NULL, NULL, NULL, name, ZEND_JIT_TRACE_NUM, SP_ADJ_JIT, SP_ADJ_NONE); +#ifndef ZEND_JIT_IR + handler = dasm_link_and_encode(&ctx, NULL, NULL, NULL, NULL, name, ZEND_JIT_TRACE_NUM, SP_ADJ_JIT, SP_ADJ_NONE); +#else + handler = zend_jit_finish(&ctx); +#endif jit_failure: - dasm_free(&dasm_state); +#ifndef ZEND_JIT_IR + dasm_free(&ctx); +#else + zend_jit_free_ctx(&ctx); + zend_string_release(name); +#endif zend_arena_release(&CG(arena), checkpoint); return handler; } @@ -7112,6 +7957,10 @@ static zend_jit_trace_stop zend_jit_compile_root_trace(zend_jit_trace_rec *trace t->opline = trace_buffer[1].opline; t->exit_info = exit_info; t->stack_map = NULL; +#ifdef ZEND_JIT_IR + t->consts_count = 0; + t->constants = NULL; +#endif orig_trigger = JIT_G(trigger); JIT_G(trigger) = ZEND_JIT_ON_HOT_TRACE; @@ -7134,6 +7983,12 @@ static zend_jit_trace_stop zend_jit_compile_root_trace(zend_jit_trace_rec *trace efree(t->stack_map); t->stack_map = NULL; } +#ifdef ZEND_JIT_IR + if (t->constants) { + efree(t->constants); + t->constants = NULL; + } +#endif ret = ZEND_JIT_TRACE_STOP_NO_SHM; goto exit; } @@ -7146,6 +8001,13 @@ static zend_jit_trace_stop zend_jit_compile_root_trace(zend_jit_trace_rec *trace zend_jit_trace_stack *shared_stack_map = (zend_jit_trace_stack*)zend_shared_alloc(t->stack_map_size * sizeof(zend_jit_trace_stack)); if (!shared_stack_map) { efree(t->stack_map); + t->stack_map = NULL; +#ifdef ZEND_JIT_IR + if (t->constants) { + efree(t->constants); + t->constants = NULL; + } +#endif ret = ZEND_JIT_TRACE_STOP_NO_SHM; goto exit; } @@ -7154,6 +8016,20 @@ static zend_jit_trace_stop zend_jit_compile_root_trace(zend_jit_trace_rec *trace t->stack_map = shared_stack_map; } +#ifdef ZEND_JIT_IR + if (t->consts_count) { + zend_jit_exit_const *constants = (zend_jit_exit_const*)zend_shared_alloc(t->consts_count * sizeof(zend_jit_exit_const)); + if (!constants) { + efree(t->constants); + ret = ZEND_JIT_TRACE_STOP_NO_SHM; + goto exit; + } + memcpy(constants, t->constants, t->consts_count * sizeof(zend_jit_exit_const)); + efree(t->constants); + t->constants = constants; + } +#endif + t->exit_counters = ZEND_JIT_EXIT_COUNTERS; ZEND_JIT_EXIT_COUNTERS += t->exit_count; @@ -7169,12 +8045,24 @@ static zend_jit_trace_stop zend_jit_compile_root_trace(zend_jit_trace_rec *trace efree(t->stack_map); t->stack_map = NULL; } +#ifdef ZEND_JIT_IR + if (t->constants) { + efree(t->constants); + t->constants = NULL; + } +#endif ret = ZEND_JIT_TRACE_STOP_TOO_MANY_EXITS; } else { if (t->stack_map) { efree(t->stack_map); t->stack_map = NULL; } +#ifdef ZEND_JIT_IR + if (t->constants) { + efree(t->constants); + t->constants = NULL; + } +#endif ret = ZEND_JIT_TRACE_STOP_COMPILER_ERROR; } @@ -7570,6 +8458,13 @@ static void zend_jit_dump_exit_info(zend_jit_trace_info *t) } if (t->exit_info[i].flags & (ZEND_JIT_EXIT_POLYMORPHISM|ZEND_JIT_EXIT_METHOD_CALL|ZEND_JIT_EXIT_CLOSURE_CALL)) { fprintf(stderr, "/POLY"); +#ifdef ZEND_JIT_IR + if (t->exit_info[i].flags & ZEND_JIT_EXIT_METHOD_CALL) { + fprintf(stderr, "(%s, %s)", + t->exit_info[i].poly_func_reg != ZREG_NONE ? zend_reg_name(t->exit_info[i].poly_func_reg) : "?", + t->exit_info[i].poly_this_reg != ZREG_NONE ? zend_reg_name(t->exit_info[i].poly_this_reg) : "?"); + } +#endif } if (t->exit_info[i].flags & ZEND_JIT_EXIT_FREE_OP1) { fprintf(stderr, "/FREE_OP1"); @@ -7588,6 +8483,7 @@ static void zend_jit_dump_exit_info(zend_jit_trace_info *t) } else { fprintf(stderr, "%s", zend_get_type_by_const(type)); } +#ifndef ZEND_JIT_IR if (STACK_REG(stack, j) != ZREG_NONE) { if (STACK_REG(stack, j) < ZREG_NUM) { fprintf(stderr, "(%s)", zend_reg_name[STACK_REG(stack, j)]); @@ -7599,6 +8495,42 @@ static void zend_jit_dump_exit_info(zend_jit_trace_info *t) fprintf(stderr, "(const_%d)", STACK_REG(stack, j) - ZREG_NUM); } } +#else + if (STACK_FLAGS(stack, j) == ZREG_CONST) { + if (type == IS_LONG) { + fprintf(stderr, "(" ZEND_LONG_FMT ")", (zend_long)t->constants[STACK_REF(stack, j)].i); + } else if (type == IS_DOUBLE) { + fprintf(stderr, "(%g)", t->constants[STACK_REF(stack, j)].d); + } else { + ZEND_UNREACHABLE(); + } + } else if (STACK_FLAGS(stack, j) == ZREG_TYPE_ONLY) { + fprintf(stderr, "(type_only)"); + } else if (STACK_FLAGS(stack, j) == ZREG_THIS) { + fprintf(stderr, "(this)"); + } else if (STACK_FLAGS(stack, j) == ZREG_ZVAL_ADDREF) { + fprintf(stderr, "(zval_try_addref)"); + } else if (STACK_FLAGS(stack, j) == ZREG_ZVAL_COPY) { + fprintf(stderr, "zval_copy(%s)", zend_reg_name(STACK_REG(stack, j))); + } else if (STACK_FLAGS(stack, j) & ZREG_SPILL_SLOT) { + if (STACK_REG(stack, j) == ZREG_NONE) { + fprintf(stderr, "(spill=0x%x", STACK_REF(stack, j)); + } else { + fprintf(stderr, "(spill=0x%x(%s)", STACK_REF(stack, j), zend_reg_name(STACK_REG(stack, j))); + } + if (STACK_FLAGS(stack, j) != 0) { + fprintf(stderr, ":%x", STACK_FLAGS(stack, j)); + } + fprintf(stderr, ")"); + } else if (STACK_REG(stack, j) != ZREG_NONE) { + fprintf(stderr, "(%s", zend_reg_name(STACK_REG(stack, j))); + if (STACK_FLAGS(stack, j) != 0) { + fprintf(stderr, ":%x", STACK_FLAGS(stack, j)); + } + fprintf(stderr, ")"); + } +#endif +#ifndef ZEND_JIT_IR } else if (STACK_REG(stack, j) == ZREG_ZVAL_TRY_ADDREF) { fprintf(stderr, " "); zend_dump_var(op_array, (j < op_array->last_var) ? IS_CV : 0, j); @@ -7607,6 +8539,14 @@ static void zend_jit_dump_exit_info(zend_jit_trace_info *t) fprintf(stderr, " "); zend_dump_var(op_array, (j < op_array->last_var) ? IS_CV : 0, j); fprintf(stderr, ":unknown(zval_copy(%s))", zend_reg_name[ZREG_COPY]); +#else + } else if (STACK_FLAGS(stack, j) == ZREG_ZVAL_ADDREF) { + fprintf(stderr, ":unknown(zval_try_addref)"); + } else if (STACK_FLAGS(stack, j) == ZREG_ZVAL_COPY) { + fprintf(stderr, " "); + zend_dump_var(op_array, (j < op_array->last_var) ? IS_CV : 0, j); + fprintf(stderr, ":unknown(zval_copy(%s))", zend_reg_name(STACK_REG(stack, j))); +#endif } } fprintf(stderr, "\n"); @@ -7856,6 +8796,10 @@ static zend_jit_trace_stop zend_jit_compile_side_trace(zend_jit_trace_rec *trace t->opline = NULL; t->exit_info = exit_info; t->stack_map = NULL; +#ifdef ZEND_JIT_IR + t->consts_count = 0; + t->constants = NULL; +#endif orig_trigger = JIT_G(trigger); JIT_G(trigger) = ZEND_JIT_ON_HOT_TRACE; @@ -7878,6 +8822,12 @@ static zend_jit_trace_stop zend_jit_compile_side_trace(zend_jit_trace_rec *trace efree(t->stack_map); t->stack_map = NULL; } +#ifdef ZEND_JIT_IR + if (t->constants) { + efree(t->constants); + t->constants = NULL; + } +#endif ret = ZEND_JIT_TRACE_STOP_NO_SHM; goto exit; } @@ -7890,6 +8840,13 @@ static zend_jit_trace_stop zend_jit_compile_side_trace(zend_jit_trace_rec *trace zend_jit_trace_stack *shared_stack_map = (zend_jit_trace_stack*)zend_shared_alloc(t->stack_map_size * sizeof(zend_jit_trace_stack)); if (!shared_stack_map) { efree(t->stack_map); + t->stack_map = NULL; +#ifdef ZEND_JIT_IR + if (t->constants) { + efree(t->constants); + t->constants = NULL; + } +#endif ret = ZEND_JIT_TRACE_STOP_NO_SHM; goto exit; } @@ -7898,6 +8855,20 @@ static zend_jit_trace_stop zend_jit_compile_side_trace(zend_jit_trace_rec *trace t->stack_map = shared_stack_map; } +#ifdef ZEND_JIT_IR + if (t->consts_count) { + zend_jit_exit_const *constants = (zend_jit_exit_const*)zend_shared_alloc(t->consts_count * sizeof(zend_jit_exit_const)); + if (!constants) { + efree(t->constants); + ret = ZEND_JIT_TRACE_STOP_NO_SHM; + goto exit; + } + memcpy(constants, t->constants, t->consts_count * sizeof(zend_jit_exit_const)); + efree(t->constants); + t->constants = constants; + } +#endif + zend_jit_link_side_trace( zend_jit_traces[parent_num].code_start, zend_jit_traces[parent_num].code_size, @@ -7919,12 +8890,24 @@ static zend_jit_trace_stop zend_jit_compile_side_trace(zend_jit_trace_rec *trace efree(t->stack_map); t->stack_map = NULL; } +#ifdef ZEND_JIT_IR + if (t->constants) { + efree(t->constants); + t->constants = NULL; + } +#endif ret = ZEND_JIT_TRACE_STOP_TOO_MANY_EXITS; } else { if (t->stack_map) { efree(t->stack_map); t->stack_map = NULL; } +#ifdef ZEND_JIT_IR + if (t->constants) { + efree(t->constants); + t->constants = NULL; + } +#endif ret = ZEND_JIT_TRACE_STOP_COMPILER_ERROR; } @@ -8119,6 +9102,7 @@ int ZEND_FASTCALL zend_jit_trace_exit(uint32_t exit_num, zend_jit_registers_buf } for (i = 0; i < stack_size; i++) { +#ifndef ZEND_JIT_IR if (STACK_REG(stack, i) != ZREG_NONE) { if (STACK_TYPE(stack, i) == IS_LONG) { zend_long val; @@ -8168,6 +9152,61 @@ int ZEND_FASTCALL zend_jit_trace_exit(uint32_t exit_num, zend_jit_registers_buf ZEND_UNREACHABLE(); } } +#else + if (STACK_FLAGS(stack, i) == ZREG_CONST) { + if (STACK_TYPE(stack, i) == IS_LONG) { + ZVAL_LONG(EX_VAR_NUM(i), (zend_long)t->constants[STACK_REF(stack, i)].i); + } else if (STACK_TYPE(stack, i) == IS_DOUBLE) { + ZVAL_DOUBLE(EX_VAR_NUM(i), t->constants[STACK_REF(stack, i)].d); + } else { + ZEND_UNREACHABLE(); + } + } else if (STACK_FLAGS(stack, i) == ZREG_TYPE_ONLY) { + uint32_t type = STACK_TYPE(stack, i); + if (type <= IS_DOUBLE) { + Z_TYPE_INFO_P(EX_VAR_NUM(i)) = type; + } else { + ZEND_UNREACHABLE(); + } + } else if (STACK_FLAGS(stack, i) == ZREG_THIS) { + zend_object *obj = Z_OBJ(EX(This)); + + GC_ADDREF(obj); + ZVAL_OBJ(EX_VAR_NUM(i), obj); + } else if (STACK_FLAGS(stack, i) == ZREG_ZVAL_ADDREF) { + Z_TRY_ADDREF_P(EX_VAR_NUM(i)); + } else if (STACK_FLAGS(stack, i) == ZREG_ZVAL_COPY) { + zval *val = (zval*)regs->gpr[STACK_REG(stack, i)]; + + if (UNEXPECTED(Z_TYPE_P(val) == IS_UNDEF)) { + /* Undefined array index or property */ + repeat_last_opline = 1; + } else { + ZVAL_COPY(EX_VAR_NUM(i), val); + } + } else if (STACK_FLAGS(stack, i) & ZREG_SPILL_SLOT) { + ZEND_ASSERT(STACK_REG(stack, i) != ZREG_NONE); + uintptr_t ptr = (uintptr_t)regs->gpr[STACK_REG(stack, i)] + STACK_REF(stack, i); + + if (STACK_TYPE(stack, i) == IS_LONG) { + ZVAL_LONG(EX_VAR_NUM(i), *(zend_long*)ptr); + } else if (STACK_TYPE(stack, i) == IS_DOUBLE) { + ZVAL_DOUBLE(EX_VAR_NUM(i), *(double*)ptr); + } else { + ZEND_UNREACHABLE(); + } + } else if (STACK_REG(stack, i) != ZREG_NONE) { + if (STACK_TYPE(stack, i) == IS_LONG) { + zend_long val = regs->gpr[STACK_REG(stack, i)]; + ZVAL_LONG(EX_VAR_NUM(i), val); + } else if (STACK_TYPE(stack, i) == IS_DOUBLE) { + double val = regs->fpr[STACK_REG(stack, i) - ZREG_FIRST_FPR]; + ZVAL_DOUBLE(EX_VAR_NUM(i), val); + } else { + ZEND_UNREACHABLE(); + } + } +#endif } if (repeat_last_opline) { @@ -8207,7 +9246,12 @@ int ZEND_FASTCALL zend_jit_trace_exit(uint32_t exit_num, zend_jit_registers_buf } } if (t->exit_info[exit_num].flags & ZEND_JIT_EXIT_METHOD_CALL) { +#ifndef ZEND_JIT_IR zend_function *func = (zend_function*)regs->gpr[ZREG_COPY]; +#else + ZEND_ASSERT(t->exit_info[exit_num].poly_func_reg >= 0); + zend_function *func = (zend_function*)regs->gpr[t->exit_info[exit_num].poly_func_reg]; +#endif if (UNEXPECTED(func->common.fn_flags & ZEND_ACC_CALL_VIA_TRAMPOLINE)) { zend_string_release_ex(func->common.function_name, 0); diff --git a/ext/opcache/jit/zend_jit_vm_helpers.c b/ext/opcache/jit/zend_jit_vm_helpers.c index 3bed3c36f9680..c346835e597e6 100644 --- a/ext/opcache/jit/zend_jit_vm_helpers.c +++ b/ext/opcache/jit/zend_jit_vm_helpers.c @@ -28,11 +28,13 @@ #include "Optimizer/zend_func_info.h" #include "Optimizer/zend_call_graph.h" #include "zend_jit.h" +#ifndef ZEND_JIT_IR #if ZEND_JIT_TARGET_X86 # include "zend_jit_x86.h" #elif ZEND_JIT_TARGET_ARM64 # include "zend_jit_arm64.h" #endif +#endif /* ZEND_JIT_IR */ #include "zend_jit_internal.h"