diff --git a/libsolidity/interface/OptimiserSettings.h b/libsolidity/interface/OptimiserSettings.h
index 1ca46ccbc260..d736eb63a178 100644
--- a/libsolidity/interface/OptimiserSettings.h
+++ b/libsolidity/interface/OptimiserSettings.h
@@ -47,6 +47,8 @@ struct OptimiserSettings
"xa[r]EscLM" // Turn into SSA and simplify
"Vcul [j]" // Reverse SSA
+ "k" // preprocessing after some simplifications
+
// should have good "compilability" property here.
"Trpeul" // Run functional expression inliner
diff --git a/libyul/CMakeLists.txt b/libyul/CMakeLists.txt
index c0082f5d69fd..e14e03cfabdc 100644
--- a/libyul/CMakeLists.txt
+++ b/libyul/CMakeLists.txt
@@ -93,6 +93,8 @@ add_library(yul
optimiser/ConditionalUnsimplifier.h
optimiser/ControlFlowSimplifier.cpp
optimiser/ControlFlowSimplifier.h
+ optimiser/ConstantFunctionEvaluator.cpp
+ optimiser/ConstantFunctionEvaluator.h
optimiser/DataFlowAnalyzer.cpp
optimiser/DataFlowAnalyzer.h
optimiser/DeadCodeEliminator.cpp
@@ -192,6 +194,10 @@ add_library(yul
optimiser/VarDeclInitializer.h
optimiser/VarNameCleaner.cpp
optimiser/VarNameCleaner.h
+ tools/interpreter/Interpreter.h
+ tools/interpreter/Interpreter.cpp
+ tools/interpreter/EVMInstructionInterpreter.h
+ tools/interpreter/EVMInstructionInterpreter.cpp
)
target_link_libraries(yul PUBLIC evmasm solutil langutil smtutil fmt::fmt-header-only)
diff --git a/libyul/optimiser/ConstantFunctionEvaluator.cpp b/libyul/optimiser/ConstantFunctionEvaluator.cpp
new file mode 100644
index 000000000000..02eaa747fc4d
--- /dev/null
+++ b/libyul/optimiser/ConstantFunctionEvaluator.cpp
@@ -0,0 +1,405 @@
+/*
+ This file is part of solidity.
+
+ solidity is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ solidity is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with solidity. If not, see .
+*/
+// SPDX-License-Identifier: GPL-3.0
+/**
+ * Optimiser component that performs function inlining for arbitrary functions.
+ */
+
+#include
+
+#include
+#include
+#include
+#include
+
+#include
+
+#include
+#include
+
+#include
+#include
+#include
+
+#include
+
+using namespace solidity;
+using namespace solidity::evmasm;
+using namespace solidity::yul;
+using namespace solidity::yul::tools::interpreter;
+
+using namespace std::literals::string_literals;
+
+void ConstantFunctionEvaluator::run(OptimiserStepContext& _context, Block& _ast)
+{
+ ConstantFunctionEvaluator(_context.dialect)(_ast);
+}
+
+
+ConstantFunctionEvaluator::ConstantFunctionEvaluator(Dialect const& _dialect):
+ m_dialect(_dialect),
+ m_rootScope(),
+ m_currentScope(&m_rootScope)
+{
+}
+
+void ConstantFunctionEvaluator::operator()(FunctionDefinition& _function)
+{
+ ASTModifier::operator()(_function);
+ if (_function.parameters.size() > 0) return ;
+
+ InterpreterState state;
+ // TODO make these configurable
+ state.maxExprNesting = 100;
+ state.maxSteps = 10000;
+ state.maxTraceSize = 0;
+ // This must be limited, because the stack is also used in this optimizer component
+ state.maxRecursionDepth = 64;
+
+ std::map returnVariables;
+ for (auto const& retVar: _function.returnVariables)
+ {
+ returnVariables[retVar.name] = 0;
+ }
+
+ ArithmeticOnlyInterpreter interpreter(
+ state,
+ m_dialect,
+ *m_currentScope,
+ /* _callerRecursionDepth=*/ 0,
+ returnVariables
+ );
+ try
+ {
+ interpreter(_function.body);
+ } catch (InterpreterTerminatedGeneric const&)
+ {
+ // won't replace body
+ return ;
+ }
+
+ Block newBody;
+ newBody.debugData = _function.body.debugData;
+
+ // After the execution, all debug data got swept away. To still maintain
+ // useful information, we assign the literal debug data with the debug data
+ // of the function itself.
+ // One case this assignment is helpful is in the case of function with only
+ // one return variable. In this case, it would likely be a solidity
+ // constant.
+ langutil::DebugData::ConstPtr literalDebugData = _function.debugData;
+
+ for (auto const& retVar: _function.returnVariables)
+ {
+ Identifier ident;
+ ident.name = retVar.name;
+
+ Literal val;
+ val.debugData = literalDebugData;
+ val.kind = LiteralKind::Number;
+ val.value = LiteralValue(interpreter.valueOfVariable(retVar.name));
+
+ Assignment assignment;
+ assignment.variableNames = { std::move(ident) };
+ assignment.value = { std::make_unique(std::move(val)) };
+
+ newBody.statements.push_back(std::move(assignment));
+ }
+ _function.body = std::move(newBody);
+}
+
+void ConstantFunctionEvaluator::operator()(Block& _block)
+{
+ enterScope(_block);
+
+ for (auto const& statement: _block.statements)
+ if (std::holds_alternative(statement))
+ {
+ FunctionDefinition const& funDef = std::get(statement);
+ m_currentScope->names.emplace(funDef.name, &funDef);
+ }
+
+ for (auto& statement: _block.statements)
+ {
+ visit(statement);
+ }
+
+ leaveScope();
+}
+
+void ConstantFunctionEvaluator::enterScope(Block const& _block)
+{
+ if (!m_currentScope->subScopes.count(&_block))
+ m_currentScope->subScopes[&_block] = std::make_unique(Scope{
+ {},
+ {},
+ m_currentScope,
+ });
+ m_currentScope = m_currentScope->subScopes[&_block].get();
+}
+
+void ConstantFunctionEvaluator::leaveScope()
+{
+ m_currentScope = m_currentScope->parent;
+ yulAssert(m_currentScope, "");
+}
+
+u256 ArithmeticOnlyInterpreter::evaluate(Expression const& _expression)
+{
+ ArithmeticOnlyExpressionEvaluator ev(
+ m_state,
+ m_dialect,
+ *m_scope,
+ m_variables,
+ m_disableExternalCalls,
+ m_disableMemoryTrace,
+ m_recursionDepth
+ );
+ ev.visit(_expression);
+ return ev.value();
+}
+
+std::vector ArithmeticOnlyInterpreter::evaluateMulti(Expression const& _expression)
+{
+ ArithmeticOnlyExpressionEvaluator ev(
+ m_state,
+ m_dialect,
+ *m_scope,
+ m_variables,
+ m_disableExternalCalls,
+ m_disableMemoryTrace,
+ m_recursionDepth
+ );
+ ev.visit(_expression);
+ return ev.values();
+}
+
+
+void ArithmeticOnlyExpressionEvaluator::operator()(FunctionCall const& _funCall)
+{
+ FunctionCallType fnCallType = determineFunctionCallType(_funCall);
+ if (fnCallType == FunctionCallType::BuiltinNonArithmetic)
+ {
+ BOOST_THROW_EXCEPTION(BuiltinNonArithmeticFunctionInvoked());
+ }
+ ExpressionEvaluator::operator()(_funCall);
+}
+
+ArithmeticOnlyExpressionEvaluator::FunctionCallType
+ArithmeticOnlyExpressionEvaluator::determineFunctionCallType(FunctionCall const& _funCall)
+{
+ if (EVMDialect const* dialect = dynamic_cast(&m_dialect))
+ {
+ if (BuiltinFunctionForEVM const* fun = dialect->builtin(_funCall.functionName.name))
+ {
+ if (fun->instruction)
+ {
+ switch (*fun->instruction)
+ {
+ // --------------- arithmetic ---------------
+ case Instruction::ADD:
+ case Instruction::MUL:
+ case Instruction::SUB:
+ case Instruction::DIV:
+ case Instruction::SDIV:
+ case Instruction::MOD:
+ case Instruction::SMOD:
+ case Instruction::EXP:
+ case Instruction::NOT:
+ case Instruction::LT:
+ case Instruction::GT:
+ case Instruction::SLT:
+ case Instruction::SGT:
+ case Instruction::EQ:
+ case Instruction::ISZERO:
+ case Instruction::AND:
+ case Instruction::OR:
+ case Instruction::XOR:
+ case Instruction::BYTE:
+ case Instruction::SHL:
+ case Instruction::SHR:
+ case Instruction::SAR:
+ case Instruction::ADDMOD:
+ case Instruction::MULMOD:
+ case Instruction::SIGNEXTEND:
+ return FunctionCallType::BuiltinArithmetic;
+ // --------------- stop ---------------------------
+ case Instruction::STOP:
+ // --------------- blockchain stuff ---------------
+ case Instruction::KECCAK256:
+ case Instruction::ADDRESS:
+ case Instruction::BALANCE:
+ case Instruction::SELFBALANCE:
+ case Instruction::ORIGIN:
+ case Instruction::CALLER:
+ case Instruction::CALLVALUE:
+ case Instruction::CALLDATALOAD:
+ case Instruction::CALLDATASIZE:
+ case Instruction::CALLDATACOPY:
+ case Instruction::CODESIZE:
+ case Instruction::CODECOPY:
+ case Instruction::GASPRICE:
+ case Instruction::CHAINID:
+ case Instruction::BASEFEE:
+ case Instruction::BLOBHASH:
+ case Instruction::BLOBBASEFEE:
+ case Instruction::EXTCODESIZE:
+ case Instruction::EXTCODEHASH:
+ case Instruction::EXTCODECOPY:
+ case Instruction::RETURNDATASIZE:
+ case Instruction::RETURNDATACOPY:
+ case Instruction::MCOPY:
+ case Instruction::BLOCKHASH:
+ case Instruction::COINBASE:
+ case Instruction::TIMESTAMP:
+ case Instruction::NUMBER:
+ case Instruction::PREVRANDAO:
+ case Instruction::GASLIMIT:
+ // --------------- memory / storage / logs ---------------
+ case Instruction::MLOAD:
+ case Instruction::MSTORE:
+ case Instruction::MSTORE8:
+ case Instruction::SLOAD:
+ case Instruction::SSTORE:
+ case Instruction::PC:
+ case Instruction::MSIZE:
+ case Instruction::GAS:
+ case Instruction::LOG0:
+ case Instruction::LOG1:
+ case Instruction::LOG2:
+ case Instruction::LOG3:
+ case Instruction::LOG4:
+ case Instruction::TLOAD:
+ case Instruction::TSTORE:
+ // --------------- calls ---------------
+ case Instruction::CREATE:
+ case Instruction::CREATE2:
+ case Instruction::CALL:
+ case Instruction::CALLCODE:
+ case Instruction::DELEGATECALL:
+ case Instruction::STATICCALL:
+ case Instruction::RETURN:
+ case Instruction::REVERT:
+ case Instruction::INVALID:
+ case Instruction::SELFDESTRUCT:
+ return FunctionCallType::BuiltinNonArithmetic;
+
+ // --------------- pop only discard value. ------------------
+ case Instruction::POP:
+ return FunctionCallType::BuiltinArithmetic;
+
+ // --------------- invalid in strict assembly ---------------
+ case Instruction::JUMP:
+ case Instruction::JUMPI:
+ case Instruction::JUMPDEST:
+ case Instruction::PUSH0:
+ case Instruction::PUSH1:
+ case Instruction::PUSH2:
+ case Instruction::PUSH3:
+ case Instruction::PUSH4:
+ case Instruction::PUSH5:
+ case Instruction::PUSH6:
+ case Instruction::PUSH7:
+ case Instruction::PUSH8:
+ case Instruction::PUSH9:
+ case Instruction::PUSH10:
+ case Instruction::PUSH11:
+ case Instruction::PUSH12:
+ case Instruction::PUSH13:
+ case Instruction::PUSH14:
+ case Instruction::PUSH15:
+ case Instruction::PUSH16:
+ case Instruction::PUSH17:
+ case Instruction::PUSH18:
+ case Instruction::PUSH19:
+ case Instruction::PUSH20:
+ case Instruction::PUSH21:
+ case Instruction::PUSH22:
+ case Instruction::PUSH23:
+ case Instruction::PUSH24:
+ case Instruction::PUSH25:
+ case Instruction::PUSH26:
+ case Instruction::PUSH27:
+ case Instruction::PUSH28:
+ case Instruction::PUSH29:
+ case Instruction::PUSH30:
+ case Instruction::PUSH31:
+ case Instruction::PUSH32:
+ case Instruction::DUP1:
+ case Instruction::DUP2:
+ case Instruction::DUP3:
+ case Instruction::DUP4:
+ case Instruction::DUP5:
+ case Instruction::DUP6:
+ case Instruction::DUP7:
+ case Instruction::DUP8:
+ case Instruction::DUP9:
+ case Instruction::DUP10:
+ case Instruction::DUP11:
+ case Instruction::DUP12:
+ case Instruction::DUP13:
+ case Instruction::DUP14:
+ case Instruction::DUP15:
+ case Instruction::DUP16:
+ case Instruction::SWAP1:
+ case Instruction::SWAP2:
+ case Instruction::SWAP3:
+ case Instruction::SWAP4:
+ case Instruction::SWAP5:
+ case Instruction::SWAP6:
+ case Instruction::SWAP7:
+ case Instruction::SWAP8:
+ case Instruction::SWAP9:
+ case Instruction::SWAP10:
+ case Instruction::SWAP11:
+ case Instruction::SWAP12:
+ case Instruction::SWAP13:
+ case Instruction::SWAP14:
+ case Instruction::SWAP15:
+ case Instruction::SWAP16:
+ {
+ yulAssert(false, "");
+ }
+ }
+ }
+ else
+ {
+ static std::set const NON_INSTRUCTION_FUNC_NAME = {
+ "datasize",
+ "dataoffset",
+ "datacopy",
+ "memoryguard",
+ "loadimmutable",
+ "setimmutable",
+ "linkersymbol"
+ };
+ if (NON_INSTRUCTION_FUNC_NAME.count(fun->name.str()))
+ {
+ return FunctionCallType::BuiltinNonArithmetic;
+ }
+ if (boost::algorithm::starts_with(fun->name.str(), "verbatim"))
+ {
+ return FunctionCallType::BuiltinNonArithmetic;
+ }
+ }
+
+ yulAssert(false, "Can not determine function call type for function " + fun->name.str());
+ }
+ }
+
+ return FunctionCallType::InvokeOther;
+}
diff --git a/libyul/optimiser/ConstantFunctionEvaluator.h b/libyul/optimiser/ConstantFunctionEvaluator.h
new file mode 100644
index 000000000000..4b7f753fb72d
--- /dev/null
+++ b/libyul/optimiser/ConstantFunctionEvaluator.h
@@ -0,0 +1,174 @@
+/*
+ This file is part of solidity.
+
+ solidity is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ solidity is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with solidity. If not, see .
+*/
+// SPDX-License-Identifier: GPL-3.0
+/**
+ * Optimiser component that evaluates constant functions and replace theirs body
+ * with evaluated result.
+ */
+#pragma once
+
+#include
+
+#include
+#include
+#include
+#include
+
+namespace solidity::yul
+{
+
+
+/**
+ * Optimiser component that evaluates constant functions and replace theirs body
+ * with evaluated result.
+ *
+ * A function is _constant_ if it satisfies all of the following criteria:
+ * - It take no arguments
+ * - If executed, this function only perform arithmetic operations and calls
+ * other function (that only does arithmetic expression). This means
+ * if any of reading from/writing to any memory, logging, creating contract, ...
+ * operations encountered, the function is not constant.
+ *
+ * Non-constant functions are left unchanged after the transformation.
+ *
+ * Under the hood, this component will use yul interpreter to evaluate the function.
+ *
+ * For example, this component may change the following code:
+ *
+ * function foo() -> x
+ * {
+ * let u, v := bar()
+ * x := add(u, v)
+ * }
+ *
+ * function bar() -> u, v
+ * {
+ * switch iszero(0) { u := 6 v := 9 }
+ * default { u := 4 v := 20 }
+ * }
+ *
+ * into
+ *
+ * function foo() -> x
+ * { x := 15 }
+ *
+ * function bar() -> u, v
+ * { u, v := 6, 9 }
+ */
+class ConstantFunctionEvaluator: public ASTModifier
+{
+public:
+ static constexpr char const* name{"ConstantFunctionEvaluator"};
+ static void run(OptimiserStepContext& _context, Block& _ast);
+
+ void operator()(FunctionDefinition& _function) override;
+ void operator()(Block& _block) override;
+
+private:
+ ConstantFunctionEvaluator(Dialect const& _dialect);
+
+ void enterScope(Block const& _block);
+ void leaveScope();
+
+ Dialect const& m_dialect;
+ tools::interpreter::Scope m_rootScope;
+ tools::interpreter::Scope* m_currentScope;
+};
+
+
+}
+
+namespace solidity::yul::tools::interpreter
+{
+
+class BuiltinNonArithmeticFunctionInvoked: public InterpreterTerminatedGeneric
+{
+};
+
+class UnlimitedLiteralEncountered: public InterpreterTerminatedGeneric
+{
+};
+
+class ArithmeticOnlyInterpreter : public Interpreter
+{
+public:
+ ArithmeticOnlyInterpreter(
+ InterpreterState& _state,
+ Dialect const& _dialect,
+ Scope& _scope,
+ size_t _callerRecursionDepth,
+ std::map _variables = {}
+ ): Interpreter(
+ _state,
+ _dialect,
+ _scope,
+ /* _disableExternalCalls=*/ false, // we disable by explicit check
+ /* _disableMemoryTracing=*/ true,
+ _callerRecursionDepth,
+ _variables
+ )
+ {
+ }
+
+protected:
+ virtual u256 evaluate(Expression const& _expression) override;
+ virtual std::vector evaluateMulti(Expression const& _expression) override;
+};
+
+class ArithmeticOnlyExpressionEvaluator: public ExpressionEvaluator
+{
+public:
+ using ExpressionEvaluator::ExpressionEvaluator;
+
+ void operator()(FunctionCall const& _funCall) override;
+
+protected:
+ enum class FunctionCallType
+ {
+ BuiltinArithmetic,
+ BuiltinNonArithmetic,
+ InvokeOther,
+ };
+
+ virtual std::unique_ptr makeInterpreterCopy(std::map _variables = {}) const override
+ {
+ return std::make_unique(
+ m_state,
+ m_dialect,
+ m_scope,
+ m_recursionDepth,
+ std::move(_variables)
+ );
+ }
+ virtual std::unique_ptr makeInterpreterNew(InterpreterState& _state, Scope& _scope) const override
+ {
+ return std::make_unique(
+ _state,
+ m_dialect,
+ _scope,
+ m_recursionDepth
+ );
+ }
+
+ u256 getValueForUnlimitedLiteral(Literal const&) override
+ {
+ BOOST_THROW_EXCEPTION(UnlimitedLiteralEncountered());
+ }
+ FunctionCallType determineFunctionCallType(FunctionCall const& _funCall);
+};
+
+}
diff --git a/libyul/optimiser/Suite.cpp b/libyul/optimiser/Suite.cpp
index d7043e42f30f..948d40cd2113 100644
--- a/libyul/optimiser/Suite.cpp
+++ b/libyul/optimiser/Suite.cpp
@@ -29,6 +29,7 @@
#include
#include
#include
+#include
#include
#include
#include
@@ -264,6 +265,7 @@ std::map> const& OptimiserSuite::all
ConditionalSimplifier,
ConditionalUnsimplifier,
ControlFlowSimplifier,
+ ConstantFunctionEvaluator,
DeadCodeEliminator,
EqualStoreEliminator,
EquivalentFunctionCombiner,
@@ -305,6 +307,7 @@ std::map const& OptimiserSuite::stepNameToAbbreviationMap()
{ConditionalSimplifier::name, 'C'},
{ConditionalUnsimplifier::name, 'U'},
{ControlFlowSimplifier::name, 'n'},
+ {ConstantFunctionEvaluator::name, 'k'},
{DeadCodeEliminator::name, 'D'},
{EqualStoreEliminator::name, 'E'},
{EquivalentFunctionCombiner::name, 'v'},
diff --git a/libyul/tools/interpreter/EVMInstructionInterpreter.cpp b/libyul/tools/interpreter/EVMInstructionInterpreter.cpp
new file mode 100644
index 000000000000..1813fe15b390
--- /dev/null
+++ b/libyul/tools/interpreter/EVMInstructionInterpreter.cpp
@@ -0,0 +1,706 @@
+/*
+ This file is part of solidity.
+
+ solidity is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ solidity is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with solidity. If not, see .
+*/
+// SPDX-License-Identifier: GPL-3.0
+/**
+ * Yul interpreter module that evaluates EVM instructions.
+ */
+
+#include
+
+#include
+
+#include
+#include
+#include
+
+#include
+#include
+
+#include
+#include
+#include
+
+#include
+
+using namespace solidity;
+using namespace solidity::evmasm;
+using namespace solidity::yul;
+using namespace solidity::yul::tools::interpreter;
+
+using solidity::util::h160;
+using solidity::util::h256;
+using solidity::util::keccak256;
+
+namespace
+{
+
+/// Reads 32 bytes from @a _data at position @a _offset bytes while
+/// interpreting @a _data to be padded with an infinite number of zero
+/// bytes beyond its end.
+u256 readZeroExtended(bytes const& _data, u256 const& _offset)
+{
+ if (_offset >= _data.size())
+ return 0;
+ else if (_offset + 32 <= _data.size())
+ return *reinterpret_cast(_data.data() + static_cast(_offset));
+ else
+ {
+ size_t off = static_cast(_offset);
+ u256 val;
+ for (size_t i = 0; i < 32; ++i)
+ {
+ val <<= 8;
+ if (off + i < _data.size())
+ val += _data[off + i];
+ }
+ return val;
+ }
+}
+
+}
+
+namespace solidity::yul::tools::interpreter
+{
+
+void copyZeroExtended(
+ std::map& _target,
+ bytes const& _source,
+ size_t _targetOffset,
+ size_t _sourceOffset,
+ size_t _size
+)
+{
+ for (size_t i = 0; i < _size; ++i)
+ _target[_targetOffset + i] = (_sourceOffset + i < _source.size() ? _source[_sourceOffset + i] : 0);
+}
+
+void copyZeroExtendedWithOverlap(
+ std::map& _target,
+ std::map const& _source,
+ size_t _targetOffset,
+ size_t _sourceOffset,
+ size_t _size
+)
+{
+ if (_targetOffset >= _sourceOffset)
+ for (size_t i = _size; i > 0; --i)
+ _target[_targetOffset + i - 1] = (_source.count(_sourceOffset + i - 1) != 0 ? _source.at(_sourceOffset + i - 1) : 0);
+ else
+ for (size_t i = 0; i < _size; ++i)
+ _target[_targetOffset + i] = (_source.count(_sourceOffset + i) != 0 ? _source.at(_sourceOffset + i) : 0);
+}
+
+}
+
+using u512 = boost::multiprecision::number>;
+
+u256 EVMInstructionInterpreter::eval(
+ evmasm::Instruction _instruction,
+ std::vector const& _arguments
+)
+{
+ using namespace solidity::evmasm;
+ using evmasm::Instruction;
+
+ auto info = instructionInfo(_instruction, m_evmVersion);
+ yulAssert(static_cast(info.args) == _arguments.size(), "");
+
+ auto const& arg = _arguments;
+ switch (_instruction)
+ {
+ case Instruction::STOP:
+ logTrace(_instruction);
+ BOOST_THROW_EXCEPTION(ExplicitlyTerminated());
+ // --------------- arithmetic ---------------
+ case Instruction::ADD:
+ return arg[0] + arg[1];
+ case Instruction::MUL:
+ return arg[0] * arg[1];
+ case Instruction::SUB:
+ return arg[0] - arg[1];
+ case Instruction::DIV:
+ return arg[1] == 0 ? 0 : arg[0] / arg[1];
+ case Instruction::SDIV:
+ return arg[1] == 0 ? 0 : s2u(u2s(arg[0]) / u2s(arg[1]));
+ case Instruction::MOD:
+ return arg[1] == 0 ? 0 : arg[0] % arg[1];
+ case Instruction::SMOD:
+ return arg[1] == 0 ? 0 : s2u(u2s(arg[0]) % u2s(arg[1]));
+ case Instruction::EXP:
+ return exp256(arg[0], arg[1]);
+ case Instruction::NOT:
+ return ~arg[0];
+ case Instruction::LT:
+ return arg[0] < arg[1] ? 1 : 0;
+ case Instruction::GT:
+ return arg[0] > arg[1] ? 1 : 0;
+ case Instruction::SLT:
+ return u2s(arg[0]) < u2s(arg[1]) ? 1 : 0;
+ case Instruction::SGT:
+ return u2s(arg[0]) > u2s(arg[1]) ? 1 : 0;
+ case Instruction::EQ:
+ return arg[0] == arg[1] ? 1 : 0;
+ case Instruction::ISZERO:
+ return arg[0] == 0 ? 1 : 0;
+ case Instruction::AND:
+ return arg[0] & arg[1];
+ case Instruction::OR:
+ return arg[0] | arg[1];
+ case Instruction::XOR:
+ return arg[0] ^ arg[1];
+ case Instruction::BYTE:
+ return arg[0] >= 32 ? 0 : (arg[1] >> unsigned(8 * (31 - arg[0]))) & 0xff;
+ case Instruction::SHL:
+ return arg[0] > 255 ? 0 : (arg[1] << unsigned(arg[0]));
+ case Instruction::SHR:
+ return arg[0] > 255 ? 0 : (arg[1] >> unsigned(arg[0]));
+ case Instruction::SAR:
+ {
+ static u256 const hibit = u256(1) << 255;
+ if (arg[0] >= 256)
+ return arg[1] & hibit ? u256(-1) : 0;
+ else
+ {
+ unsigned amount = unsigned(arg[0]);
+ u256 v = arg[1] >> amount;
+ if (arg[1] & hibit)
+ v |= u256(-1) << (256 - amount);
+ return v;
+ }
+ }
+ case Instruction::ADDMOD:
+ return arg[2] == 0 ? 0 : u256((u512(arg[0]) + u512(arg[1])) % arg[2]);
+ case Instruction::MULMOD:
+ return arg[2] == 0 ? 0 : u256((u512(arg[0]) * u512(arg[1])) % arg[2]);
+ case Instruction::SIGNEXTEND:
+ if (arg[0] >= 31)
+ return arg[1];
+ else
+ {
+ unsigned testBit = unsigned(arg[0]) * 8 + 7;
+ u256 ret = arg[1];
+ u256 mask = ((u256(1) << testBit) - 1);
+ if (boost::multiprecision::bit_test(ret, testBit))
+ ret |= ~mask;
+ else
+ ret &= mask;
+ return ret;
+ }
+ // --------------- blockchain stuff ---------------
+ case Instruction::KECCAK256:
+ {
+ if (!accessMemory(arg[0], arg[1]))
+ return u256("0x1234cafe1234cafe1234cafe") + arg[0];
+ uint64_t offset = uint64_t(arg[0] & uint64_t(-1));
+ uint64_t size = uint64_t(arg[1] & uint64_t(-1));
+ return u256(keccak256(m_state.readMemory(offset, size)));
+ }
+ case Instruction::ADDRESS:
+ return h256(m_state.address, h256::AlignRight);
+ case Instruction::BALANCE:
+ if (arg[0] == h256(m_state.address, h256::AlignRight))
+ return m_state.selfbalance;
+ else
+ return m_state.balance;
+ case Instruction::SELFBALANCE:
+ return m_state.selfbalance;
+ case Instruction::ORIGIN:
+ return h256(m_state.origin, h256::AlignRight);
+ case Instruction::CALLER:
+ return h256(m_state.caller, h256::AlignRight);
+ case Instruction::CALLVALUE:
+ return m_state.callvalue;
+ case Instruction::CALLDATALOAD:
+ return readZeroExtended(m_state.calldata, arg[0]);
+ case Instruction::CALLDATASIZE:
+ return m_state.calldata.size();
+ case Instruction::CALLDATACOPY:
+ if (accessMemory(arg[0], arg[2]))
+ copyZeroExtended(
+ m_state.memory, m_state.calldata,
+ size_t(arg[0]), size_t(arg[1]), size_t(arg[2])
+ );
+ logTrace(_instruction, arg);
+ return 0;
+ case Instruction::CODESIZE:
+ return m_state.code.size();
+ case Instruction::CODECOPY:
+ if (accessMemory(arg[0], arg[2]))
+ copyZeroExtended(
+ m_state.memory, m_state.code,
+ size_t(arg[0]), size_t(arg[1]), size_t(arg[2])
+ );
+ logTrace(_instruction, arg);
+ return 0;
+ case Instruction::GASPRICE:
+ return m_state.gasprice;
+ case Instruction::CHAINID:
+ return m_state.chainid;
+ case Instruction::BASEFEE:
+ return m_state.basefee;
+ case Instruction::BLOBHASH:
+ return blobHash(arg[0]);
+ case Instruction::BLOBBASEFEE:
+ return m_state.blobbasefee;
+ case Instruction::EXTCODESIZE:
+ return u256(keccak256(h256(arg[0]))) & 0xffffff;
+ case Instruction::EXTCODEHASH:
+ return u256(keccak256(h256(arg[0] + 1)));
+ case Instruction::EXTCODECOPY:
+ if (accessMemory(arg[1], arg[3]))
+ // TODO this way extcodecopy and codecopy do the same thing.
+ copyZeroExtended(
+ m_state.memory, m_state.code,
+ size_t(arg[1]), size_t(arg[2]), size_t(arg[3])
+ );
+ logTrace(_instruction, arg);
+ return 0;
+ case Instruction::RETURNDATASIZE:
+ return m_state.returndata.size();
+ case Instruction::RETURNDATACOPY:
+ if (accessMemory(arg[0], arg[2]))
+ copyZeroExtended(
+ m_state.memory, m_state.returndata,
+ size_t(arg[0]), size_t(arg[1]), size_t(arg[2])
+ );
+ logTrace(_instruction, arg);
+ return 0;
+ case Instruction::MCOPY:
+ if (accessMemory(arg[1], arg[2]) && accessMemory(arg[0], arg[2]))
+ copyZeroExtendedWithOverlap(
+ m_state.memory,
+ m_state.memory,
+ static_cast(arg[0]),
+ static_cast(arg[1]),
+ static_cast(arg[2])
+ );
+ logTrace(_instruction, arg);
+ return 0;
+ case Instruction::BLOCKHASH:
+ if (arg[0] >= m_state.blockNumber || arg[0] + 256 < m_state.blockNumber)
+ return 0;
+ else
+ return 0xaaaaaaaa + (arg[0] - m_state.blockNumber - 256);
+ case Instruction::COINBASE:
+ return h256(m_state.coinbase, h256::AlignRight);
+ case Instruction::TIMESTAMP:
+ return m_state.timestamp;
+ case Instruction::NUMBER:
+ return m_state.blockNumber;
+ case Instruction::PREVRANDAO:
+ return (m_evmVersion < langutil::EVMVersion::paris()) ? m_state.difficulty : m_state.prevrandao;
+ case Instruction::GASLIMIT:
+ return m_state.gaslimit;
+ // --------------- memory / storage / logs ---------------
+ case Instruction::MLOAD:
+ accessMemory(arg[0], 0x20);
+ return readMemoryWord(arg[0]);
+ case Instruction::MSTORE:
+ accessMemory(arg[0], 0x20);
+ writeMemoryWord(arg[0], arg[1]);
+ return 0;
+ case Instruction::MSTORE8:
+ accessMemory(arg[0], 1);
+ m_state.memory[arg[0]] = uint8_t(arg[1] & 0xff);
+ return 0;
+ case Instruction::SLOAD:
+ return m_state.storage[h256(arg[0])];
+ case Instruction::SSTORE:
+ m_state.storage[h256(arg[0])] = h256(arg[1]);
+ return 0;
+ case Instruction::PC:
+ return 0x77;
+ case Instruction::MSIZE:
+ return m_state.msize;
+ case Instruction::GAS:
+ return 0x99;
+ case Instruction::LOG0:
+ accessMemory(arg[0], arg[1]);
+ logTrace(_instruction, arg);
+ return 0;
+ case Instruction::LOG1:
+ accessMemory(arg[0], arg[1]);
+ logTrace(_instruction, arg);
+ return 0;
+ case Instruction::LOG2:
+ accessMemory(arg[0], arg[1]);
+ logTrace(_instruction, arg);
+ return 0;
+ case Instruction::LOG3:
+ accessMemory(arg[0], arg[1]);
+ logTrace(_instruction, arg);
+ return 0;
+ case Instruction::LOG4:
+ accessMemory(arg[0], arg[1]);
+ logTrace(_instruction, arg);
+ return 0;
+ case Instruction::TLOAD:
+ return m_state.transientStorage[h256(arg[0])];
+ case Instruction::TSTORE:
+ m_state.transientStorage[h256(arg[0])] = h256(arg[1]);
+ return 0;
+ // --------------- calls ---------------
+ case Instruction::CREATE:
+ accessMemory(arg[1], arg[2]);
+ logTrace(_instruction, arg);
+ if (arg[2] != 0)
+ return (0xcccccc + arg[1]) & u256("0xffffffffffffffffffffffffffffffffffffffff");
+ else
+ return 0xcccccc;
+ case Instruction::CREATE2:
+ accessMemory(arg[1], arg[2]);
+ logTrace(_instruction, arg);
+ if (arg[2] != 0)
+ return (0xdddddd + arg[1]) & u256("0xffffffffffffffffffffffffffffffffffffffff");
+ else
+ return 0xdddddd;
+ case Instruction::CALL:
+ case Instruction::CALLCODE:
+ accessMemory(arg[3], arg[4]);
+ accessMemory(arg[5], arg[6]);
+ logTrace(_instruction, arg);
+ // Randomly fail based on the called address if it isn't a call to self.
+ // Used for fuzzing.
+ return (
+ (arg[0] > 0) &&
+ (arg[1] == util::h160::Arith(m_state.address) || (arg[1] & 1))
+ ) ? 1 : 0;
+ case Instruction::DELEGATECALL:
+ case Instruction::STATICCALL:
+ accessMemory(arg[2], arg[3]);
+ accessMemory(arg[4], arg[5]);
+ logTrace(_instruction, arg);
+ // Randomly fail based on the called address if it isn't a call to self.
+ // Used for fuzzing.
+ return (
+ (arg[0] > 0) &&
+ (arg[1] == util::h160::Arith(m_state.address) || (arg[1] & 1))
+ ) ? 1 : 0;
+ case Instruction::RETURN:
+ {
+ m_state.returndata = {};
+ if (accessMemory(arg[0], arg[1]))
+ m_state.returndata = m_state.readMemory(arg[0], arg[1]);
+ logTrace(_instruction, arg, m_state.returndata);
+ BOOST_THROW_EXCEPTION(ExplicitlyTerminatedWithReturn());
+ }
+ case Instruction::REVERT:
+ accessMemory(arg[0], arg[1]);
+ logTrace(_instruction, arg);
+ m_state.storage.clear();
+ m_state.transientStorage.clear();
+ BOOST_THROW_EXCEPTION(ExplicitlyTerminated());
+ case Instruction::INVALID:
+ logTrace(_instruction);
+ m_state.storage.clear();
+ m_state.transientStorage.clear();
+ m_state.trace.clear();
+ BOOST_THROW_EXCEPTION(ExplicitlyTerminated());
+ case Instruction::SELFDESTRUCT:
+ logTrace(_instruction, arg);
+ m_state.storage.clear();
+ m_state.transientStorage.clear();
+ m_state.trace.clear();
+ BOOST_THROW_EXCEPTION(ExplicitlyTerminated());
+ case Instruction::POP:
+ break;
+ // --------------- invalid in strict assembly ---------------
+ case Instruction::JUMP:
+ case Instruction::JUMPI:
+ case Instruction::JUMPDEST:
+ case Instruction::PUSH0:
+ case Instruction::PUSH1:
+ case Instruction::PUSH2:
+ case Instruction::PUSH3:
+ case Instruction::PUSH4:
+ case Instruction::PUSH5:
+ case Instruction::PUSH6:
+ case Instruction::PUSH7:
+ case Instruction::PUSH8:
+ case Instruction::PUSH9:
+ case Instruction::PUSH10:
+ case Instruction::PUSH11:
+ case Instruction::PUSH12:
+ case Instruction::PUSH13:
+ case Instruction::PUSH14:
+ case Instruction::PUSH15:
+ case Instruction::PUSH16:
+ case Instruction::PUSH17:
+ case Instruction::PUSH18:
+ case Instruction::PUSH19:
+ case Instruction::PUSH20:
+ case Instruction::PUSH21:
+ case Instruction::PUSH22:
+ case Instruction::PUSH23:
+ case Instruction::PUSH24:
+ case Instruction::PUSH25:
+ case Instruction::PUSH26:
+ case Instruction::PUSH27:
+ case Instruction::PUSH28:
+ case Instruction::PUSH29:
+ case Instruction::PUSH30:
+ case Instruction::PUSH31:
+ case Instruction::PUSH32:
+ case Instruction::DUP1:
+ case Instruction::DUP2:
+ case Instruction::DUP3:
+ case Instruction::DUP4:
+ case Instruction::DUP5:
+ case Instruction::DUP6:
+ case Instruction::DUP7:
+ case Instruction::DUP8:
+ case Instruction::DUP9:
+ case Instruction::DUP10:
+ case Instruction::DUP11:
+ case Instruction::DUP12:
+ case Instruction::DUP13:
+ case Instruction::DUP14:
+ case Instruction::DUP15:
+ case Instruction::DUP16:
+ case Instruction::SWAP1:
+ case Instruction::SWAP2:
+ case Instruction::SWAP3:
+ case Instruction::SWAP4:
+ case Instruction::SWAP5:
+ case Instruction::SWAP6:
+ case Instruction::SWAP7:
+ case Instruction::SWAP8:
+ case Instruction::SWAP9:
+ case Instruction::SWAP10:
+ case Instruction::SWAP11:
+ case Instruction::SWAP12:
+ case Instruction::SWAP13:
+ case Instruction::SWAP14:
+ case Instruction::SWAP15:
+ case Instruction::SWAP16:
+ {
+ yulAssert(false, "");
+ return 0;
+ }
+ }
+
+ return 0;
+}
+
+u256 EVMInstructionInterpreter::evalBuiltin(
+ BuiltinFunctionForEVM const& _fun,
+ std::vector const& _arguments,
+ std::vector const& _evaluatedArguments
+)
+{
+ if (_fun.instruction)
+ return eval(*_fun.instruction, _evaluatedArguments);
+
+ std::string fun = _fun.name.str();
+ // Evaluate datasize/offset/copy instructions
+ if (fun == "datasize" || fun == "dataoffset")
+ {
+ std::string arg = formatLiteral(std::get(_arguments.at(0)));
+ if (arg.length() < 32)
+ arg.resize(32, 0);
+ if (fun == "datasize")
+ return u256(keccak256(arg)) & 0xfff;
+ else
+ {
+ // Force different value than for datasize
+ arg[31]++;
+ arg[31]++;
+ return u256(keccak256(arg)) & 0xfff;
+ }
+ }
+ else if (fun == "datacopy")
+ {
+ // This is identical to codecopy.
+ if (
+ _evaluatedArguments.at(2) != 0 &&
+ accessMemory(_evaluatedArguments.at(0), _evaluatedArguments.at(2))
+ )
+ copyZeroExtended(
+ m_state.memory,
+ m_state.code,
+ size_t(_evaluatedArguments.at(0)),
+ size_t(_evaluatedArguments.at(1) & std::numeric_limits::max()),
+ size_t(_evaluatedArguments.at(2))
+ );
+ return 0;
+ }
+ else if (fun == "memoryguard")
+ return _evaluatedArguments.at(0);
+ else {
+ m_state.trace.push_back("Unknown builtin: " + fun);
+ BOOST_THROW_EXCEPTION(UnsupportedBuiltinFunctionEvaluated());
+ }
+ return 0;
+}
+
+
+bool EVMInstructionInterpreter::accessMemory(u256 const& _offset, u256 const& _size)
+{
+ if (_size == 0)
+ return true;
+
+ if (_offset <= (_offset + _size) && (_offset + _size) <= (_offset + _size + 0x1f))
+ {
+ u256 newMSize = (_offset + _size + 0x1f) & ~u256(0x1f);
+ m_state.msize = std::max(m_state.msize, newMSize);
+ // We only record accesses to contiguous memory chunks that are at most s_maxRangeSize bytes
+ // in size and at an offset of at most numeric_limits::max() - s_maxRangeSize
+ return _size <= s_maxRangeSize && _offset <= u256(std::numeric_limits::max() - s_maxRangeSize);
+ }
+
+ m_state.msize = u256(-1);
+ return false;
+}
+
+bytes EVMInstructionInterpreter::readMemory(u256 const& _offset, u256 const& _size)
+{
+ yulAssert(_size <= s_maxRangeSize, "Too large read.");
+ bytes data(size_t(_size), uint8_t(0));
+ for (size_t i = 0; i < data.size(); ++i)
+ data[i] = m_state.memory[_offset + i];
+ return data;
+}
+
+u256 EVMInstructionInterpreter::readMemoryWord(u256 const& _offset)
+{
+ return u256(h256(m_state.readMemory(_offset, 32)));
+}
+
+void EVMInstructionInterpreter::writeMemoryWord(u256 const& _offset, u256 const& _value)
+{
+ for (size_t i = 0; i < 32; i++)
+ m_state.memory[_offset + i] = uint8_t((_value >> (8 * (31 - i))) & 0xff);
+}
+
+
+void EVMInstructionInterpreter::logTrace(
+ evmasm::Instruction _instruction,
+ std::vector const& _arguments,
+ bytes const& _data
+)
+{
+ logTrace(
+ evmasm::instructionInfo(_instruction, m_evmVersion).name,
+ SemanticInformation::memory(_instruction) == SemanticInformation::Effect::Write,
+ _arguments,
+ _data
+ );
+}
+
+void EVMInstructionInterpreter::logTrace(
+ std::string const& _pseudoInstruction,
+ bool _writesToMemory,
+ std::vector const& _arguments,
+ bytes const& _data
+)
+{
+ if (!(_writesToMemory && memWriteTracingDisabled()))
+ {
+ std::string message = _pseudoInstruction + "(";
+ std::pair inputMemoryPtrModified = isInputMemoryPtrModified(_pseudoInstruction, _arguments);
+ for (size_t i = 0; i < _arguments.size(); ++i)
+ {
+ bool printZero = inputMemoryPtrModified.first && inputMemoryPtrModified.second == i;
+ u256 arg = printZero ? 0 : _arguments[i];
+ message += (i > 0 ? ", " : "") + formatNumber(arg);
+ }
+ message += ")";
+ if (!_data.empty())
+ message += " [" + util::toHex(_data) + "]";
+ m_state.trace.emplace_back(std::move(message));
+ if (m_state.maxTraceSize > 0 && m_state.trace.size() >= m_state.maxTraceSize)
+ {
+ m_state.trace.emplace_back("Trace size limit reached.");
+ BOOST_THROW_EXCEPTION(TraceLimitReached());
+ }
+ }
+}
+
+std::pair EVMInstructionInterpreter::isInputMemoryPtrModified(
+ std::string const& _pseudoInstruction,
+ std::vector const& _arguments
+)
+{
+ if (_pseudoInstruction == "RETURN" || _pseudoInstruction == "REVERT")
+ {
+ if (_arguments[1] == 0)
+ return {true, 0};
+ else
+ return {false, 0};
+ }
+ else if (
+ _pseudoInstruction == "RETURNDATACOPY" || _pseudoInstruction == "CALLDATACOPY"
+ || _pseudoInstruction == "CODECOPY")
+ {
+ if (_arguments[2] == 0)
+ return {true, 0};
+ else
+ return {false, 0};
+ }
+ else if (_pseudoInstruction == "EXTCODECOPY")
+ {
+ if (_arguments[3] == 0)
+ return {true, 1};
+ else
+ return {false, 0};
+ }
+ else if (
+ _pseudoInstruction == "LOG0" || _pseudoInstruction == "LOG1" || _pseudoInstruction == "LOG2"
+ || _pseudoInstruction == "LOG3" || _pseudoInstruction == "LOG4")
+ {
+ if (_arguments[1] == 0)
+ return {true, 0};
+ else
+ return {false, 0};
+ }
+ if (_pseudoInstruction == "CREATE" || _pseudoInstruction == "CREATE2")
+ {
+ if (_arguments[2] == 0)
+ return {true, 1};
+ else
+ return {false, 0};
+ }
+ if (_pseudoInstruction == "CALL" || _pseudoInstruction == "CALLCODE")
+ {
+ if (_arguments[4] == 0)
+ return {true, 3};
+ else
+ return {false, 0};
+ }
+ else if (_pseudoInstruction == "DELEGATECALL" || _pseudoInstruction == "STATICCALL")
+ {
+ if (_arguments[3] == 0)
+ return {true, 2};
+ else
+ return {false, 0};
+ }
+ else
+ return {false, 0};
+}
+
+h256 EVMInstructionInterpreter::blobHash(u256 const& _index)
+{
+ yulAssert(m_evmVersion.hasBlobHash());
+ if (_index >= m_state.blobCommitments.size())
+ return util::FixedHash<32>{};
+
+ h256 hashedCommitment = h256(picosha2::hash256(toBigEndian(m_state.blobCommitments[static_cast(_index)])));
+ yulAssert(m_state.blobHashVersion.size == 1);
+ hashedCommitment[0] = *m_state.blobHashVersion.data();
+ yulAssert(hashedCommitment.size == 32);
+ return hashedCommitment;
+}
diff --git a/libyul/tools/interpreter/EVMInstructionInterpreter.h b/libyul/tools/interpreter/EVMInstructionInterpreter.h
new file mode 100644
index 000000000000..f1c2a203b96d
--- /dev/null
+++ b/libyul/tools/interpreter/EVMInstructionInterpreter.h
@@ -0,0 +1,177 @@
+/*
+ This file is part of solidity.
+
+ solidity is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ solidity is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with solidity. If not, see .
+*/
+// SPDX-License-Identifier: GPL-3.0
+/**
+ * Yul interpreter module that evaluates EVM instructions.
+ */
+
+#pragma once
+
+#include
+
+#include
+#include
+#include
+
+#include
+
+#include
+
+namespace solidity::evmasm
+{
+enum class Instruction: uint8_t;
+}
+
+namespace solidity::yul
+{
+class YulString;
+struct BuiltinFunctionForEVM;
+}
+
+namespace solidity::yul::tools::interpreter
+{
+
+/// Copy @a _size bytes of @a _source at offset @a _sourceOffset to
+/// @a _target at offset @a _targetOffset. Behaves as if @a _source would
+/// continue with an infinite sequence of zero bytes beyond its end.
+void copyZeroExtended(
+ std::map& _target,
+ bytes const& _source,
+ size_t _targetOffset,
+ size_t _sourceOffset,
+ size_t _size
+);
+
+/// Copy @a _size bytes of @a _source at offset @a _sourceOffset to
+/// @a _target at offset @a _targetOffset. Behaves as if @a _source would
+/// continue with an infinite sequence of zero bytes beyond its end.
+/// When target and source areas overlap, behaves as if the data was copied
+/// using an intermediate buffer.
+void copyZeroExtendedWithOverlap(
+ std::map& _target,
+ std::map const& _source,
+ size_t _targetOffset,
+ size_t _sourceOffset,
+ size_t _size
+);
+
+struct InterpreterState;
+
+/**
+ * Interprets EVM instructions based on the current state and logs instructions with
+ * side-effects.
+ *
+ * Since this is mainly meant to be used for differential fuzz testing, it is focused
+ * on a single contract only, does not do any gas counting and differs from the correct
+ * implementation in many ways:
+ *
+ * - If memory access to a "large" memory position is performed, a deterministic
+ * value is returned. Data that is stored in a "large" memory position is not
+ * retained.
+ * - The blockhash instruction returns a fixed value if the argument is in range.
+ * - Extcodesize returns a deterministic value depending on the address.
+ * - Extcodecopy copies a deterministic value depending on the address.
+ * - And many other things
+ *
+ * The main focus is that the generated execution trace is the same for equivalent executions
+ * and likely to be different for non-equivalent executions.
+ */
+class EVMInstructionInterpreter
+{
+public:
+ explicit EVMInstructionInterpreter(langutil::EVMVersion _evmVersion, InterpreterState& _state, bool _disableMemWriteTrace):
+ m_evmVersion(_evmVersion),
+ m_state(_state),
+ m_disableMemoryWriteInstructions(_disableMemWriteTrace)
+ {}
+ /// Evaluate instruction
+ u256 eval(evmasm::Instruction _instruction, std::vector const& _arguments);
+ /// Evaluate builtin function
+ u256 evalBuiltin(
+ BuiltinFunctionForEVM const& _fun,
+ std::vector const& _arguments,
+ std::vector const& _evaluatedArguments
+ );
+
+ /// @returns the blob versioned hash
+ util::h256 blobHash(u256 const& _index);
+
+private:
+ /// Checks if the memory access is valid and adjusts msize accordingly.
+ /// @returns true if memory access is valid, false otherwise
+ /// A valid memory access must satisfy all of the following pre-requisites:
+ /// - Sum of @param _offset and @param _size do not overflow modulo u256
+ /// - Sum of @param _offset, @param _size, and 31 do not overflow modulo u256 (see note below)
+ /// - @param _size is lesser than or equal to @a s_maxRangeSize
+ /// - @param _offset is lesser than or equal to the difference of numeric_limits::max()
+ /// and @a s_maxRangeSize
+ /// Note: Memory expansion is carried out in multiples of 32 bytes.
+ bool accessMemory(u256 const& _offset, u256 const& _size = 32);
+ /// @returns the memory contents at the provided address.
+ /// Does not adjust msize, use @a accessMemory for that
+ bytes readMemory(u256 const& _offset, u256 const& _size = 32);
+ /// @returns the memory contents at the provided address.
+ /// Does not adjust msize, use @a accessMemory for that
+ u256 readMemoryWord(u256 const& _offset);
+ /// @returns writes a word to memory
+ /// Does not adjust msize, use @a accessMemory for that
+ void writeMemoryWord(u256 const& _offset, u256 const& _value);
+
+ void logTrace(
+ evmasm::Instruction _instruction,
+ std::vector const& _arguments = {},
+ bytes const& _data = {}
+ );
+ /// Appends a log to the trace representing an instruction or similar operation by string,
+ /// with arguments and auxiliary data (if nonempty). Flag @param _writesToMemory indicates
+ /// whether the instruction writes to (true) or does not write to (false) memory.
+ void logTrace(
+ std::string const& _pseudoInstruction,
+ bool _writesToMemory,
+ std::vector const& _arguments = {},
+ bytes const& _data = {}
+ );
+
+ /// @returns a pair of boolean and size_t whose first value is true if @param _pseudoInstruction
+ /// is a Yul instruction that the Yul optimizer's loadResolver step rewrites the input
+ /// memory pointer value to zero if that instruction's read length (contained within @param
+ // _arguments) is zero, and whose second value is the positional index of the input memory
+ // pointer argument.
+ /// If the Yul instruction is unaffected or affected but read length is non-zero, the first
+ /// value is false.
+ std::pair isInputMemoryPtrModified(
+ std::string const& _pseudoInstruction,
+ std::vector const& _arguments
+ );
+
+ /// @returns disable trace flag.
+ bool memWriteTracingDisabled()
+ {
+ return m_disableMemoryWriteInstructions;
+ }
+
+ langutil::EVMVersion m_evmVersion;
+ InterpreterState& m_state;
+ /// Flag to disable trace of instructions that write to memory.
+ bool m_disableMemoryWriteInstructions;
+
+public:
+ /// Maximum length for range-based memory access operations.
+ static constexpr unsigned s_maxRangeSize = 0xffff;
+};
+
+} // solidity::yul::interpreter::tools
diff --git a/libyul/tools/interpreter/Interpreter.cpp b/libyul/tools/interpreter/Interpreter.cpp
new file mode 100644
index 000000000000..48fc4932a80c
--- /dev/null
+++ b/libyul/tools/interpreter/Interpreter.cpp
@@ -0,0 +1,519 @@
+/*
+ This file is part of solidity.
+
+ solidity is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ solidity is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with solidity. If not, see .
+*/
+// SPDX-License-Identifier: GPL-3.0
+/**
+ * Yul interpreter.
+ */
+
+#include
+
+#include
+
+#include
+#include
+#include
+#include
+
+#include
+
+#include
+
+#include
+
+#include
+#include
+
+using namespace solidity;
+using namespace solidity::yul;
+using namespace solidity::yul::tools::interpreter;
+
+using solidity::util::h256;
+
+void InterpreterState::dumpStorage(std::ostream& _out) const
+{
+ for (auto const& [slot, value]: storage)
+ if (value != h256{})
+ _out << " " << slot.hex() << ": " << value.hex() << std::endl;
+}
+
+void InterpreterState::dumpTransientStorage(std::ostream& _out) const
+{
+ for (auto const& [slot, value]: transientStorage)
+ if (value != h256{})
+ _out << " " << slot.hex() << ": " << value.hex() << std::endl;
+}
+
+void InterpreterState::dumpTraceAndState(std::ostream& _out, bool _disableMemoryTrace) const
+{
+ _out << "Trace:" << std::endl;
+ for (auto const& line: trace)
+ _out << " " << line << std::endl;
+ if (!_disableMemoryTrace)
+ {
+ _out << "Memory dump:\n";
+ std::map words;
+ for (auto const& [offset, value]: memory)
+ words[(offset / 0x20) * 0x20] |= u256(uint32_t(value)) << (256 - 8 - 8 * static_cast(offset % 0x20));
+ for (auto const& [offset, value]: words)
+ if (value != 0)
+ _out << " " << std::uppercase << std::hex << std::setw(4) << offset << ": " << h256(value).hex() << std::endl;
+ }
+ _out << "Storage dump:" << std::endl;
+ dumpStorage(_out);
+
+ _out << "Transient storage dump:" << std::endl;
+ dumpTransientStorage(_out);
+
+ if (!calldata.empty())
+ {
+ _out << "Calldata dump:";
+
+ for (size_t offset = 0; offset < calldata.size(); ++offset)
+ if (calldata[offset] != 0)
+ {
+ if (offset % 32 == 0)
+ _out <<
+ std::endl <<
+ " " <<
+ std::uppercase <<
+ std::hex <<
+ std::setfill(' ') <<
+ std::setw(4) <<
+ offset <<
+ ": ";
+
+ _out <<
+ std::hex <<
+ std::setw(2) <<
+ std::setfill('0') <<
+ static_cast(calldata[offset]);
+ }
+
+ _out << std::endl;
+ }
+}
+
+void Interpreter::run(
+ InterpreterState& _state,
+ Dialect const& _dialect,
+ Block const& _ast,
+ bool _disableExternalCalls,
+ bool _disableMemoryTrace
+)
+{
+ Scope scope;
+ Interpreter{_state, _dialect, scope, _disableExternalCalls, _disableMemoryTrace, 0}(_ast);
+}
+
+void Interpreter::operator()(ExpressionStatement const& _expressionStatement)
+{
+ evaluateMulti(_expressionStatement.expression);
+}
+
+void Interpreter::operator()(Assignment const& _assignment)
+{
+ solAssert(_assignment.value, "");
+ std::vector values = evaluateMulti(*_assignment.value);
+ solAssert(values.size() == _assignment.variableNames.size(), "");
+ for (size_t i = 0; i < values.size(); ++i)
+ {
+ YulName varName = _assignment.variableNames.at(i).name;
+ solAssert(m_variables.count(varName), "");
+ m_variables[varName] = values.at(i);
+ }
+}
+
+void Interpreter::operator()(VariableDeclaration const& _declaration)
+{
+ std::vector values(_declaration.variables.size(), 0);
+ if (_declaration.value)
+ values = evaluateMulti(*_declaration.value);
+
+ solAssert(values.size() == _declaration.variables.size(), "");
+ for (size_t i = 0; i < values.size(); ++i)
+ {
+ YulName varName = _declaration.variables.at(i).name;
+ solAssert(!m_variables.count(varName), "");
+ m_variables[varName] = values.at(i);
+ m_scope->names.emplace(varName, nullptr);
+ }
+}
+
+void Interpreter::operator()(If const& _if)
+{
+ solAssert(_if.condition, "");
+ if (evaluate(*_if.condition) != 0)
+ (*this)(_if.body);
+}
+
+void Interpreter::operator()(Switch const& _switch)
+{
+ solAssert(_switch.expression, "");
+ u256 val = evaluate(*_switch.expression);
+ solAssert(!_switch.cases.empty(), "");
+ for (auto const& c: _switch.cases)
+ // Default case has to be last.
+ if (!c.value || evaluate(*c.value) == val)
+ {
+ (*this)(c.body);
+ break;
+ }
+}
+
+void Interpreter::operator()(FunctionDefinition const&)
+{
+}
+
+void Interpreter::operator()(ForLoop const& _forLoop)
+{
+ solAssert(_forLoop.condition, "");
+
+ enterScope(_forLoop.pre);
+ ScopeGuard g([this]{ leaveScope(); });
+
+ for (auto const& statement: _forLoop.pre.statements)
+ {
+ visit(statement);
+ if (m_state.controlFlowState == ControlFlowState::Leave)
+ return;
+ }
+ while (evaluate(*_forLoop.condition) != 0)
+ {
+ // Increment step for each loop iteration for loops with
+ // an empty body and post blocks to prevent a deadlock.
+ if (_forLoop.body.statements.size() == 0 && _forLoop.post.statements.size() == 0)
+ incrementStep();
+
+ m_state.controlFlowState = ControlFlowState::Default;
+ (*this)(_forLoop.body);
+ if (m_state.controlFlowState == ControlFlowState::Break || m_state.controlFlowState == ControlFlowState::Leave)
+ break;
+
+ m_state.controlFlowState = ControlFlowState::Default;
+ (*this)(_forLoop.post);
+ if (m_state.controlFlowState == ControlFlowState::Leave)
+ break;
+ }
+ if (m_state.controlFlowState != ControlFlowState::Leave)
+ m_state.controlFlowState = ControlFlowState::Default;
+}
+
+void Interpreter::operator()(Break const&)
+{
+ m_state.controlFlowState = ControlFlowState::Break;
+}
+
+void Interpreter::operator()(Continue const&)
+{
+ m_state.controlFlowState = ControlFlowState::Continue;
+}
+
+void Interpreter::operator()(Leave const&)
+{
+ m_state.controlFlowState = ControlFlowState::Leave;
+}
+
+void Interpreter::operator()(Block const& _block)
+{
+ enterScope(_block);
+ // Register functions.
+ for (auto const& statement: _block.statements)
+ if (std::holds_alternative(statement))
+ {
+ FunctionDefinition const& funDef = std::get(statement);
+ m_scope->names.emplace(funDef.name, &funDef);
+ }
+
+ for (auto const& statement: _block.statements)
+ {
+ incrementStep();
+ visit(statement);
+ if (m_state.controlFlowState != ControlFlowState::Default)
+ break;
+ }
+
+ leaveScope();
+}
+
+u256 Interpreter::evaluate(Expression const& _expression)
+{
+ ExpressionEvaluator ev(m_state, m_dialect, *m_scope, m_variables, m_disableExternalCalls, m_disableMemoryTrace, m_recursionDepth);
+ ev.visit(_expression);
+ return ev.value();
+}
+
+std::vector Interpreter::evaluateMulti(Expression const& _expression)
+{
+ ExpressionEvaluator ev(m_state, m_dialect, *m_scope, m_variables, m_disableExternalCalls, m_disableMemoryTrace, m_recursionDepth);
+ ev.visit(_expression);
+ return ev.values();
+}
+
+void Interpreter::enterScope(Block const& _block)
+{
+ if (!m_scope->subScopes.count(&_block))
+ m_scope->subScopes[&_block] = std::make_unique(Scope{
+ {},
+ {},
+ m_scope
+ });
+ m_scope = m_scope->subScopes[&_block].get();
+}
+
+void Interpreter::leaveScope()
+{
+ for (auto const& [var, funDeclaration]: m_scope->names)
+ if (!funDeclaration)
+ m_variables.erase(var);
+ m_scope = m_scope->parent;
+ yulAssert(m_scope, "");
+}
+
+void Interpreter::incrementStep()
+{
+ // recursion depth is checked here since `incrementStep` got called
+ // when an actual statement is executed
+ checkRecursionDepth();
+
+ m_state.numSteps++;
+ if (m_state.maxSteps > 0 && m_state.numSteps >= m_state.maxSteps)
+ {
+ m_state.trace.emplace_back("Interpreter execution step limit reached.");
+ BOOST_THROW_EXCEPTION(StepLimitReached());
+ }
+}
+
+void Interpreter::checkRecursionDepth()
+{
+ if (m_state.maxRecursionDepth > 0 && m_recursionDepth > m_state.maxRecursionDepth)
+ {
+ m_state.trace.emplace_back("Interpreter recursion depth exceeded");
+ BOOST_THROW_EXCEPTION(RecursionDepthExceeded());
+ }
+}
+
+void ExpressionEvaluator::operator()(Literal const& _literal)
+{
+ incrementStep();
+ setValue(_literal.value.value());
+}
+
+void ExpressionEvaluator::operator()(Identifier const& _identifier)
+{
+ solAssert(m_variables.count(_identifier.name), "");
+ incrementStep();
+ setValue(m_variables.at(_identifier.name));
+}
+
+void ExpressionEvaluator::operator()(FunctionCall const& _funCall)
+{
+ std::vector> const* literalArguments = nullptr;
+ if (BuiltinFunction const* builtin = m_dialect.builtin(_funCall.functionName.name))
+ if (!builtin->literalArguments.empty())
+ literalArguments = &builtin->literalArguments;
+ evaluateArgs(_funCall.arguments, literalArguments);
+
+ if (EVMDialect const* dialect = dynamic_cast(&m_dialect))
+ {
+ if (BuiltinFunctionForEVM const* fun = dialect->builtin(_funCall.functionName.name))
+ {
+ EVMInstructionInterpreter interpreter(dialect->evmVersion(), m_state, m_disableMemoryTrace);
+
+ u256 const value = interpreter.evalBuiltin(*fun, _funCall.arguments, values());
+
+ if (
+ !m_disableExternalCalls &&
+ fun->instruction &&
+ evmasm::isCallInstruction(*fun->instruction)
+ )
+ runExternalCall(*fun->instruction);
+
+ setValue(value);
+ return;
+ }
+ }
+
+ Scope* scope = &m_scope;
+ for (; scope; scope = scope->parent)
+ if (scope->names.count(_funCall.functionName.name))
+ break;
+ yulAssert(scope, "");
+
+ FunctionDefinition const* fun = scope->names.at(_funCall.functionName.name);
+ yulAssert(fun, "Function not found.");
+ yulAssert(m_values.size() == fun->parameters.size(), "");
+ std::map variables;
+ for (size_t i = 0; i < fun->parameters.size(); ++i)
+ variables[fun->parameters.at(i).name] = m_values.at(i);
+ for (size_t i = 0; i < fun->returnVariables.size(); ++i)
+ variables[fun->returnVariables.at(i).name] = 0;
+
+ m_state.controlFlowState = ControlFlowState::Default;
+ std::unique_ptr interpreter = makeInterpreterCopy(std::move(variables));
+ (*interpreter)(fun->body);
+ m_state.controlFlowState = ControlFlowState::Default;
+
+ m_values.clear();
+ for (auto const& retVar: fun->returnVariables)
+ m_values.emplace_back(interpreter->valueOfVariable(retVar.name));
+}
+
+u256 ExpressionEvaluator::value() const
+{
+ solAssert(m_values.size() == 1, "");
+ return m_values.front();
+}
+
+void ExpressionEvaluator::setValue(u256 _value)
+{
+ m_values.clear();
+ m_values.emplace_back(std::move(_value));
+}
+
+void ExpressionEvaluator::evaluateArgs(
+ std::vector const& _expr,
+ std::vector> const* _literalArguments
+)
+{
+ incrementStep();
+ std::vector values;
+ size_t i = 0;
+ /// Function arguments are evaluated in reverse.
+ for (auto const& expr: _expr | ranges::views::reverse)
+ {
+ if (!_literalArguments || !_literalArguments->at(_expr.size() - i - 1))
+ visit(expr);
+ else
+ {
+ Literal const& lit = std::get(expr);
+ if (lit.value.unlimited())
+ {
+ yulAssert(lit.kind == LiteralKind::String);
+ m_values = {getValueForUnlimitedLiteral(lit)};
+ }
+ else
+ m_values = {lit.value.value()};
+ }
+
+ values.push_back(value());
+ ++i;
+ }
+ m_values = std::move(values);
+ std::reverse(m_values.begin(), m_values.end());
+}
+
+u256 ExpressionEvaluator::getValueForUnlimitedLiteral(Literal const&)
+{
+ return 0xdeadbeef;
+}
+
+void ExpressionEvaluator::incrementStep()
+{
+ m_nestingLevel++;
+ if (m_state.maxExprNesting > 0 && m_nestingLevel > m_state.maxExprNesting)
+ {
+ m_state.trace.emplace_back("Maximum expression nesting level reached.");
+ BOOST_THROW_EXCEPTION(ExpressionNestingLimitReached());
+ }
+}
+
+void ExpressionEvaluator::runExternalCall(evmasm::Instruction _instruction)
+{
+ u256 memOutOffset = 0;
+ u256 memOutSize = 0;
+ u256 callvalue = 0;
+ u256 memInOffset = 0;
+ u256 memInSize = 0;
+
+ // Setup memOut* values
+ if (
+ _instruction == evmasm::Instruction::CALL ||
+ _instruction == evmasm::Instruction::CALLCODE
+ )
+ {
+ memOutOffset = values()[5];
+ memOutSize = values()[6];
+ callvalue = values()[2];
+ memInOffset = values()[3];
+ memInSize = values()[4];
+ }
+ else if (
+ _instruction == evmasm::Instruction::DELEGATECALL ||
+ _instruction == evmasm::Instruction::STATICCALL
+ )
+ {
+ memOutOffset = values()[4];
+ memOutSize = values()[5];
+ memInOffset = values()[2];
+ memInSize = values()[3];
+ }
+ else
+ yulAssert(false);
+
+ // Don't execute external call if it isn't our own address
+ if (values()[1] != util::h160::Arith(m_state.address))
+ return;
+
+ Scope tmpScope;
+ InterpreterState tmpState;
+ tmpState.calldata = m_state.readMemory(memInOffset, memInSize);
+ tmpState.callvalue = callvalue;
+ tmpState.numInstance = m_state.numInstance + 1;
+
+ yulAssert(tmpState.numInstance < 1024, "Detected more than 1024 recursive calls, aborting...");
+
+ // Create new interpreter for the called contract
+ std::unique_ptr newInterpreter = makeInterpreterNew(tmpState, tmpScope);
+
+ Scope* abstractRootScope = &m_scope;
+ Scope* fileScope = nullptr;
+ Block const* ast = nullptr;
+
+ // Find file scope
+ while (abstractRootScope->parent)
+ {
+ fileScope = abstractRootScope;
+ abstractRootScope = abstractRootScope->parent;
+ }
+
+ // Get AST for file scope
+ for (auto&& [block, scope]: abstractRootScope->subScopes)
+ if (scope.get() == fileScope)
+ {
+ ast = block;
+ break;
+ }
+
+ yulAssert(ast);
+
+ try
+ {
+ (*newInterpreter)(*ast);
+ }
+ catch (ExplicitlyTerminatedWithReturn const&)
+ {
+ // Copy return data to our memory
+ copyZeroExtended(
+ m_state.memory,
+ newInterpreter->returnData(),
+ memOutOffset.convert_to(),
+ 0,
+ memOutSize.convert_to()
+ );
+ m_state.returndata = newInterpreter->returnData();
+ }
+}
diff --git a/libyul/tools/interpreter/Interpreter.h b/libyul/tools/interpreter/Interpreter.h
new file mode 100644
index 000000000000..926bb8cbaf8d
--- /dev/null
+++ b/libyul/tools/interpreter/Interpreter.h
@@ -0,0 +1,336 @@
+/*
+ This file is part of solidity.
+
+ solidity is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ solidity is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with solidity. If not, see .
+*/
+// SPDX-License-Identifier: GPL-3.0
+/**
+ * Yul interpreter.
+ */
+
+#pragma once
+
+#include
+#include
+
+#include
+
+#include
+#include
+
+#include
+
+#include