diff --git a/jbmc/regression/jbmc-generics/constant_propagation/test.desc b/jbmc/regression/jbmc-generics/constant_propagation/test.desc index 31756c1b722..6345893ca5a 100644 --- a/jbmc/regression/jbmc-generics/constant_propagation/test.desc +++ b/jbmc/regression/jbmc-generics/constant_propagation/test.desc @@ -3,8 +3,10 @@ Test.class --function Test.main --show-vcc ^EXIT=0$ ^SIGNAL=0$ -^\{-\d+\} symex_dynamic::dynamic_object1#3 = \{ \{ \{ "java::GenericSub" \}, NULL, 0 \} \}$ -^\{-\d+\} symex_dynamic::dynamic_object1#4 = \{ \{ \{ "java::GenericSub" \}, NULL, 5 \} \}$ +^\{-\d+\} symex_dynamic::dynamic_object1#2 = \{ \{ \{ "java::GenericSub" \}, NULL, 0 \} \}$ +^\{-\d+\} symex_dynamic::dynamic_object1#2\.\.@Generic\.\.@java.lang.Object\.\.@class_identifier = "java::GenericSub"$ +^\{-\d+\} symex_dynamic::dynamic_object1#2\.\.@Generic\.\.key = NULL$ +^\{-\d+\} symex_dynamic::dynamic_object1#3\.\.@Generic\.\.x = 5$ -- byte_extract_(big|little)_endian -- diff --git a/jbmc/regression/jbmc-strings/ClassName/test.desc b/jbmc/regression/jbmc-strings/ClassName/test.desc index 6a9bd1782a0..7e7dd4c1be3 100644 --- a/jbmc/regression/jbmc-strings/ClassName/test.desc +++ b/jbmc/regression/jbmc-strings/ClassName/test.desc @@ -4,5 +4,3 @@ test.class ^EXIT=0$ ^SIGNAL=0$ ^VERIFICATION SUCCESSFUL$ -\[java::test\.main:\(\)V\.assertion\.1\] line 5 assertion at file test\.java line 5 function java::test\.main:\(\)V bytecode-index 8: SUCCESS$ -\[java::test\.main:\(\)V\.assertion\.2\] line 6 assertion at file test\.java line 6 function java::test\.main:\(\)V bytecode-index 19: SUCCESS$ diff --git a/regression/cbmc-concurrency/norace_struct1/test.desc b/regression/cbmc-concurrency/norace_struct1/test.desc index c239dca4b31..8ecf05b6d5c 100644 --- a/regression/cbmc-concurrency/norace_struct1/test.desc +++ b/regression/cbmc-concurrency/norace_struct1/test.desc @@ -1,4 +1,4 @@ -KNOWNBUG pthread +CORE pthread main.c ^EXIT=0$ diff --git a/regression/cbmc/Array_Propagation1/main.c b/regression/cbmc/Array_Propagation1/main.c new file mode 100644 index 00000000000..48d3a83933e --- /dev/null +++ b/regression/cbmc/Array_Propagation1/main.c @@ -0,0 +1,21 @@ +#include + +struct S +{ + int a; +}; + +int main() +{ + struct S s; + s.a = 1; + + assert(s.a == 1); + + int A[1]; + A[0] = 1; + + assert(A[0] == 1); + + return 0; +} diff --git a/regression/cbmc/Array_Propagation1/test.desc b/regression/cbmc/Array_Propagation1/test.desc new file mode 100644 index 00000000000..a1d9fddba86 --- /dev/null +++ b/regression/cbmc/Array_Propagation1/test.desc @@ -0,0 +1,13 @@ +KNOWNBUG +main.c + +^EXIT=0$ +^SIGNAL=0$ +^VERIFICATION SUCCESSFUL$ +(Starting CEGAR Loop|VCC\(s\), 0 remaining after simplification$) +-- +^warning: ignoring +-- +This requires field-sensitive SSA encoding of arrays to pass entirely via +constant propagation (changing the above condition to "1 remaining after +simplification" would also make it pass, but this isn't the point of the test). diff --git a/regression/cbmc/Struct_Propagation1/main.c b/regression/cbmc/Struct_Propagation1/main.c new file mode 100644 index 00000000000..56445e241b6 --- /dev/null +++ b/regression/cbmc/Struct_Propagation1/main.c @@ -0,0 +1,34 @@ +struct S +{ + int v; + struct Inner + { + int x; + } inner; +}; + +struct S works; + +int main() +{ + struct S fails; + + works.v = 2; + fails.v = 2; + + while(works.v > 0) + --(works.v); + + while(fails.v > 0) + --(fails.v); + + __CPROVER_assert(works.inner.x == 0, ""); + + _Bool b; + if(b) + { + struct S s = {42, {42}}; + } + + return 0; +} diff --git a/regression/cbmc/Struct_Propagation1/test.desc b/regression/cbmc/Struct_Propagation1/test.desc new file mode 100644 index 00000000000..42e6505e2d3 --- /dev/null +++ b/regression/cbmc/Struct_Propagation1/test.desc @@ -0,0 +1,9 @@ +CORE +main.c +--unwind 5 +^EXIT=0$ +^SIGNAL=0$ +^VERIFICATION SUCCESSFUL$ +(Starting CEGAR Loop|VCC\(s\), 0 remaining after simplification$) +-- +^warning: ignoring diff --git a/regression/cbmc/variable-access-to-constant-array/test.desc b/regression/cbmc/variable-access-to-constant-array/test.desc index ea0a082706e..7fa81e26cff 100644 --- a/regression/cbmc/variable-access-to-constant-array/test.desc +++ b/regression/cbmc/variable-access-to-constant-array/test.desc @@ -6,5 +6,10 @@ main.c ^VERIFICATION FAILED$ -- ^warning: ignoring +-- This test actually passes when using the SMT2 back-end, but takes 68 seconds to do so. + +Field-sensitive encoding of array accesses for an array of 2^16 elements is very +expensive. We might consider some bounds up to which we actually do field +expansion, or at least need to mark this test as "THOROUGH." diff --git a/regression/goto-harness/nondet_elements_longer_lists/test.desc b/regression/goto-harness/nondet_elements_longer_lists/test.desc index 88dc31c264c..cb71825070a 100644 --- a/regression/goto-harness/nondet_elements_longer_lists/test.desc +++ b/regression/goto-harness/nondet_elements_longer_lists/test.desc @@ -1,8 +1,6 @@ CORE main.c --harness-type call-function --max-nondet-tree-depth 4 --min-null-tree-depth 1 --function test_function -\[test_function.unwind.\d+\] line \d+ unwinding assertion loop 0: SUCCESS -\[test_function.unwind.\d+\] line \d+ unwinding assertion loop 1: SUCCESS \[test_function.assertion.\d+\] line \d+ assertion list_walker->datum == \+\+i: SUCCESS ^EXIT=0$ ^SIGNAL=0$ diff --git a/regression/goto-harness/nondet_elements_longer_lists_global/test.desc b/regression/goto-harness/nondet_elements_longer_lists_global/test.desc index 22bfdb8bfd7..52218e7877d 100644 --- a/regression/goto-harness/nondet_elements_longer_lists_global/test.desc +++ b/regression/goto-harness/nondet_elements_longer_lists_global/test.desc @@ -1,8 +1,6 @@ CORE main.c --harness-type call-function --max-nondet-tree-depth 4 --min-null-tree-depth 1 --function test_function --nondet-globals -\[test_function.unwind.\d+\] line \d+ unwinding assertion loop 0: SUCCESS -\[test_function.unwind.\d+\] line \d+ unwinding assertion loop 1: SUCCESS \[test_function.assertion.\d+\] line \d+ assertion list_walker->datum == \+\+i: SUCCESS ^EXIT=0$ ^SIGNAL=0$ diff --git a/src/goto-symex/Makefile b/src/goto-symex/Makefile index c2b8d7b8871..7876b206a28 100644 --- a/src/goto-symex/Makefile +++ b/src/goto-symex/Makefile @@ -1,5 +1,6 @@ SRC = auto_objects.cpp \ build_goto_trace.cpp \ + field_sensitivity.cpp \ goto_state.cpp \ goto_symex.cpp \ goto_symex_state.cpp \ diff --git a/src/goto-symex/field_sensitivity.cpp b/src/goto-symex/field_sensitivity.cpp new file mode 100644 index 00000000000..e3465c2c44e --- /dev/null +++ b/src/goto-symex/field_sensitivity.cpp @@ -0,0 +1,313 @@ +/*******************************************************************\ + +Module: Field-sensitive SSA + +Author: Michael Tautschnig + +\*******************************************************************/ + +#include "field_sensitivity.h" + +#include +#include +#include +#include + +#include "goto_symex_state.h" +#include "symex_target.h" + +// #define ENABLE_ARRAY_FIELD_SENSITIVITY + +void field_sensitivityt::apply( + const namespacet &ns, + goto_symex_statet &state, + exprt &expr, + bool write) const +{ + if(!run_apply) + return; + + if(expr.id() != ID_address_of) + { + Forall_operands(it, expr) + apply(ns, state, *it, write); + } + + if(expr.id() == ID_symbol && expr.get_bool(ID_C_SSA_symbol) && !write) + { + expr = get_fields(ns, state, to_ssa_expr(expr)); + } + else if( + !write && expr.id() == ID_member && + to_member_expr(expr).struct_op().id() == ID_struct) + { + simplify(expr, ns); + } +#ifdef ENABLE_ARRAY_FIELD_SENSITIVITY + else if( + !write && expr.id() == ID_index && + to_index_expr(expr).array().id() == ID_array) + { + simplify(expr, ns); + } +#endif // ENABLE_ARRAY_FIELD_SENSITIVITY + else if(expr.id() == ID_member) + { + // turn a member-of-an-SSA-expression into a single SSA expression, thus + // encoding the member as an individual symbol rather than only the full + // struct + member_exprt &member = to_member_expr(expr); + + if( + member.struct_op().id() == ID_symbol && + member.struct_op().get_bool(ID_C_SSA_symbol) && + (member.struct_op().type().id() == ID_struct || + member.struct_op().type().id() == ID_struct_tag)) + { + // place the entire member expression, not just the struct operand, in an + // SSA expression + ssa_exprt tmp = to_ssa_expr(member.struct_op()); + bool was_l2 = !tmp.get_level_2().empty(); + + tmp.remove_level_2(); + member.struct_op() = tmp.get_original_expr(); + tmp.set_expression(member); + if(was_l2) + expr = state.rename(tmp, ns).get(); + else + expr.swap(tmp); + } + } +#ifdef ENABLE_ARRAY_FIELD_SENSITIVITY + else if(expr.id() == ID_index) + { + // turn a index-of-an-SSA-expression into a single SSA expression, thus + // encoding the index access into an array as an individual symbol rather + // than only the full array + index_exprt &index = to_index_expr(expr); + simplify(index.index(), ns); + + if( + index.array().id() == ID_symbol && + index.array().get_bool(ID_C_SSA_symbol) && + index.array().type().id() == ID_array && + index.index().id() == ID_constant) + { + // place the entire index expression, not just the array operand, in an + // SSA expression + ssa_exprt tmp = to_ssa_expr(index.array()); + bool was_l2 = !tmp.get_level_2().empty(); + + tmp.remove_level_2(); + index.array() = tmp.get_original_expr(); + tmp.set_expression(index); + if(was_l2) + expr = state.rename(tmp, ns).get(); + else + expr.swap(tmp); + } + } +#endif // ENABLE_ARRAY_FIELD_SENSITIVITY +} + +exprt field_sensitivityt::get_fields( + const namespacet &ns, + goto_symex_statet &state, + const ssa_exprt &ssa_expr) const +{ + if(ssa_expr.type().id() == ID_struct || ssa_expr.type().id() == ID_struct_tag) + { + const struct_typet &type = to_struct_type(ns.follow(ssa_expr.type())); + const struct_union_typet::componentst &components = type.components(); + + struct_exprt result(ssa_expr.type()); + result.reserve_operands(components.size()); + + const exprt &struct_op = ssa_expr.get_original_expr(); + + for(const auto &comp : components) + { + const member_exprt member(struct_op, comp.get_name(), comp.type()); + ssa_exprt tmp = ssa_expr; + bool was_l2 = !tmp.get_level_2().empty(); + tmp.remove_level_2(); + tmp.set_expression(member); + if(was_l2) + { + result.add_to_operands( + state.rename(get_fields(ns, state, tmp), ns).get()); + } + else + result.add_to_operands(get_fields(ns, state, tmp)); + } + + return std::move(result); + } +#ifdef ENABLE_ARRAY_FIELD_SENSITIVITY + else if( + ssa_expr.type().id() == ID_array && + to_array_type(ssa_expr.type()).size().id() == ID_constant) + { + const array_typet &type = to_array_type(ssa_expr.type()); + const std::size_t array_size = + numeric_cast_v(to_constant_expr(type.size())); + + array_exprt result(type); + result.reserve_operands(array_size); + + const exprt &array = ssa_expr.get_original_expr(); + + for(std::size_t i = 0; i < array_size; ++i) + { + const index_exprt index(array, from_integer(i, index_type())); + ssa_exprt tmp = ssa_expr; + bool was_l2 = !tmp.get_level_2().empty(); + tmp.remove_level_2(); + tmp.set_expression(index); + if(was_l2) + { + result.add_to_operands( + state.rename(get_fields(ns, state, tmp), ns).get()); + } + else + result.add_to_operands(get_fields(ns, state, tmp)); + } + + return std::move(result); + } +#endif // ENABLE_ARRAY_FIELD_SENSITIVITY + else + return ssa_expr; +} + +void field_sensitivityt::field_assignments( + const namespacet &ns, + goto_symex_statet &state, + const ssa_exprt &lhs, + symex_targett &target, + bool allow_pointer_unsoundness) +{ + exprt lhs_fs = lhs; + apply(ns, state, lhs_fs, false); + + bool run_apply_bak = run_apply; + run_apply = false; + field_assignments_rec( + ns, state, lhs_fs, lhs, target, allow_pointer_unsoundness); + run_apply = run_apply_bak; +} + +/// Assign to the individual fields \p lhs_fs of a non-expanded symbol \p lhs. +/// This is required whenever prior steps have updated the full object rather +/// than individual fields, e.g., in case of assignments to an array at an +/// unknown index. +/// \param ns: a namespace to resolve type symbols/tag types +/// \param state: symbolic execution state +/// \param lhs_fs: expanded symbol +/// \param lhs: non-expanded symbol +/// \param target: symbolic execution equation store +/// \param allow_pointer_unsoundness: allow pointer unsoundness +void field_sensitivityt::field_assignments_rec( + const namespacet &ns, + goto_symex_statet &state, + const exprt &lhs_fs, + const exprt &lhs, + symex_targett &target, + bool allow_pointer_unsoundness) +{ + if(lhs == lhs_fs) + return; + else if(lhs_fs.id() == ID_symbol && lhs_fs.get_bool(ID_C_SSA_symbol)) + { + exprt ssa_rhs = state.rename(lhs, ns).get(); + simplify(ssa_rhs, ns); + + ssa_exprt ssa_lhs = to_ssa_expr(lhs_fs); + state.assignment( + ssa_lhs, ssa_rhs, ns, true, true, allow_pointer_unsoundness); + + // do the assignment + target.assignment( + state.guard.as_expr(), + ssa_lhs, + ssa_lhs, + ssa_lhs.get_original_expr(), + ssa_rhs, + state.source, + symex_targett::assignment_typet::STATE); + } + else if(lhs.type().id() == ID_struct || lhs.type().id() == ID_struct_tag) + { + const struct_typet &type = to_struct_type(ns.follow(lhs.type())); + const struct_union_typet::componentst &components = type.components(); + + PRECONDITION( + components.empty() || + (lhs_fs.has_operands() && lhs_fs.operands().size() == components.size())); + + exprt::operandst::const_iterator fs_it = lhs_fs.operands().begin(); + for(const auto &comp : components) + { + const member_exprt member_rhs(lhs, comp.get_name(), comp.type()); + const exprt &member_lhs = *fs_it; + + field_assignments_rec( + ns, state, member_lhs, member_rhs, target, allow_pointer_unsoundness); + ++fs_it; + } + } +#ifdef ENABLE_ARRAY_FIELD_SENSITIVITY + else if(const auto &type = type_try_dynamic_cast(lhs.type())) + { + const std::size_t array_size = + numeric_cast_v(to_constant_expr(type->size())); + PRECONDITION( + lhs_fs.has_operands() && lhs_fs.operands().size() == array_size); + + exprt::operandst::const_iterator fs_it = lhs_fs.operands().begin(); + for(std::size_t i = 0; i < array_size; ++i) + { + const index_exprt index_rhs(lhs, from_integer(i, index_type())); + const exprt &index_lhs = *fs_it; + + field_assignments_rec( + ns, state, index_lhs, index_rhs, target, allow_pointer_unsoundness); + ++fs_it; + } + } +#endif // ENABLE_ARRAY_FIELD_SENSITIVITY + else if(lhs_fs.has_operands()) + { + PRECONDITION( + lhs.has_operands() && lhs_fs.operands().size() == lhs.operands().size()); + + exprt::operandst::const_iterator fs_it = lhs_fs.operands().begin(); + for(const exprt &op : lhs.operands()) + { + field_assignments_rec( + ns, state, *fs_it, op, target, allow_pointer_unsoundness); + ++fs_it; + } + } + else + { + UNREACHABLE; + } +} + +bool field_sensitivityt::is_divisible(const ssa_exprt &expr) +{ + if(expr.type().id() == ID_struct || expr.type().id() == ID_struct_tag) + return true; + +#ifdef ENABLE_ARRAY_FIELD_SENSITIVITY + if( + expr.type().id() == ID_array && + to_array_type(expr.type()).size().id() == ID_constant) + { + return true; + } +#endif + + return false; +} diff --git a/src/goto-symex/field_sensitivity.h b/src/goto-symex/field_sensitivity.h new file mode 100644 index 00000000000..35b10c90fbd --- /dev/null +++ b/src/goto-symex/field_sensitivity.h @@ -0,0 +1,90 @@ +/*******************************************************************\ + +Module: Field-sensitive SSA + +Author: Michael Tautschnig + +\*******************************************************************/ + +#ifndef CPROVER_GOTO_SYMEX_FIELD_SENSITIVITY_H +#define CPROVER_GOTO_SYMEX_FIELD_SENSITIVITY_H + +class exprt; +class ssa_exprt; +class namespacet; +class goto_symex_statet; +class symex_targett; + +/// \brief Control granularity of object accesses +class field_sensitivityt +{ +public: + /// Assign to the individual fields of a non-expanded symbol \p lhs. This is + /// required whenever prior steps have updated the full object rather than + /// individual fields, e.g., in case of assignments to an array at an unknown + /// index. + /// \param ns: a namespace to resolve type symbols/tag types + /// \param state: symbolic execution state + /// \param lhs: non-expanded symbol + /// \param target: symbolic execution equation store + /// \param allow_pointer_unsoundness: allow pointer unsoundness + void field_assignments( + const namespacet &ns, + goto_symex_statet &state, + const ssa_exprt &lhs, + symex_targett &target, + bool allow_pointer_unsoundness); + + /// Turn an expression \p expr into a field-sensitive SSA expression. + /// Field-sensitive SSA expressions have individual symbols for each + /// component. While the exact granularity is an implementation detail, + /// possible implementations translate a struct-typed symbol into a struct of + /// member expressions, or an array-typed symbol into an array of index + /// expressions. Such expansions are not possible when the symbol is to be + /// written to (i.e., \p write is true); in such a case only translations from + /// existing member or index expressions into symbols are performed. + /// \param ns: a namespace to resolve type symbols/tag types + /// \param [in,out] state: symbolic execution state + /// \param [in,out] expr: an expression to be (recursively) transformed - this + /// parameter is both input and output. + /// \param write: set to true if the expression is to be used as an lvalue. + void + apply(const namespacet &ns, goto_symex_statet &state, exprt &expr, bool write) + const; + + /// Compute an expression representing the individual components of a + /// field-sensitive SSA representation of \p ssa_expr. + /// \param ns: a namespace to resolve type symbols/tag types + /// \param [in,out] state: symbolic execution state + /// \param ssa_expr: the expression to evaluate + /// \return Expanded expression; for example, for a \p ssa_expr of some struct + /// type, a `struct_exprt` with each component now being an SSA expression + /// is built. + exprt get_fields( + const namespacet &ns, + goto_symex_statet &state, + const ssa_exprt &ssa_expr) const; + + /// Determine whether \p expr would translate to an atomic SSA expression + /// (returns false) or a composite object made of several SSA expressions as + /// components (such as a struct with each member becoming an individual SSA + /// expression, return true in this case). + /// \param expr: the expression to evaluate + /// \return False, if and only if, \p expr would be a single field-sensitive + /// SSA expression. + static bool is_divisible(const ssa_exprt &expr); + +private: + /// whether or not to invoke \ref field_sensitivityt::apply + bool run_apply = true; + + void field_assignments_rec( + const namespacet &ns, + goto_symex_statet &state, + const exprt &lhs_fs, + const exprt &lhs, + symex_targett &target, + bool allow_pointer_unsoundness); +}; + +#endif // CPROVER_GOTO_SYMEX_FIELD_SENSITIVITY_H diff --git a/src/goto-symex/goto_symex.h b/src/goto-symex/goto_symex.h index 3b75c5c8e98..c48145e94d5 100644 --- a/src/goto-symex/goto_symex.h +++ b/src/goto-symex/goto_symex.h @@ -279,9 +279,9 @@ class goto_symext void initialize_auto_object(const exprt &, statet &); void process_array_expr(statet &, exprt &); exprt make_auto_object(const typet &, statet &); - virtual void dereference(exprt &, statet &); + virtual void dereference(exprt &, statet &, bool write); - void dereference_rec(exprt &, statet &); + void dereference_rec(exprt &, statet &, bool write); exprt address_arithmetic( const exprt &, statet &, diff --git a/src/goto-symex/goto_symex_state.cpp b/src/goto-symex/goto_symex_state.cpp index a3ba83bc5f4..3943cf34788 100644 --- a/src/goto-symex/goto_symex_state.cpp +++ b/src/goto-symex/goto_symex_state.cpp @@ -170,6 +170,7 @@ void goto_symex_statet::assignment( { DATA_INVARIANT(!check_renaming_l1(lhs), "lhs renaming failed on l1"); } + const ssa_exprt l1_lhs = lhs; #if 0 PRECONDITION(l1_identifier != get_original_name(l1_identifier) @@ -221,11 +222,11 @@ void goto_symex_statet::assignment( value_set.assign(l1_lhs, l1_rhs, ns, rhs_is_simplified, is_shared); } - #if 0 +#if 0 std::cout << "Assigning " << l1_identifier << '\n'; value_set.output(ns, std::cout); std::cout << "**********************\n"; - #endif +#endif } template @@ -334,6 +335,10 @@ goto_symex_statet::rename(exprt expr, const namespacet &ns) c_expr.type() == to_if_expr(c_expr).false_case().type())), "Type of renamed expr should be the same as operands for with_exprt and " "if_exprt"); + + if(level == L2) + field_sensitivity.apply(ns, *this, expr, false); + return renamedt{std::move(expr)}; } } @@ -360,6 +365,10 @@ bool goto_symex_statet::l2_thread_read_encoding( return false; } + // only continue if an indivisible object is being accessed + if(field_sensitivityt::is_divisible(expr)) + return false; + ssa_exprt ssa_l1=expr; ssa_l1.remove_level_2(); const irep_idt &l1_identifier=ssa_l1.get_identifier(); @@ -485,6 +494,10 @@ goto_symex_statet::write_is_shared_resultt goto_symex_statet::write_is_shared( return write_is_shared_resultt::NOT_SHARED; } + // only continue if an indivisible object is being accessed + if(field_sensitivityt::is_divisible(expr)) + return write_is_shared_resultt::NOT_SHARED; + if(atomic_section_id != 0) return write_is_shared_resultt::IN_ATOMIC_SECTION; diff --git a/src/goto-symex/goto_symex_state.h b/src/goto-symex/goto_symex_state.h index 47785b08a72..0b00bad3fd1 100644 --- a/src/goto-symex/goto_symex_state.h +++ b/src/goto-symex/goto_symex_state.h @@ -25,6 +25,7 @@ Author: Daniel Kroening, kroening@kroening.com #include #include "call_stack.h" +#include "field_sensitivity.h" #include "frame.h" #include "goto_state.h" #include "renaming_level.h" @@ -115,6 +116,8 @@ class goto_symex_statet final : public goto_statet bool record_value, bool allow_pointer_unsoundness=false); + field_sensitivityt field_sensitivity; + protected: template void rename_address(exprt &expr, const namespacet &ns); diff --git a/src/goto-symex/symex_assign.cpp b/src/goto-symex/symex_assign.cpp index 963831e5bc1..50c325dd715 100644 --- a/src/goto-symex/symex_assign.cpp +++ b/src/goto-symex/symex_assign.cpp @@ -16,9 +16,13 @@ Author: Daniel Kroening, kroening@kroening.com #include #include #include +#include #include "goto_symex_state.h" +// We can either use with_exprt or update_exprt when building expressions that +// modify components of an array or a struct. Set USE_UPDATE to use +// update_exprt. // #define USE_UPDATE void goto_symext::symex_assign(statet &state, const code_assignt &code) @@ -198,6 +202,163 @@ void goto_symext::symex_assign_rec( "assignment to `" + lhs.id_string() + "' not handled"); } +/// Replace "with" (or "update") expressions in \p ssa_rhs by their update +/// values and move the index or member to the left-hand side \p lhs_mod. This +/// effectively undoes the work that \ref goto_symext::symex_assign_array and +/// \ref goto_symext::symex_assign_struct_member have done, but now making use +/// of the index/member that may only be known after renaming to L2 has taken +/// place. +/// \param [in, out] state: symbolic execution state to perform renaming +/// \param [in,out] ssa_rhs: right-hand side +/// \param [in,out] lhs_mod: left-hand side +/// \param ns: namespace +static void rewrite_with_to_field_symbols( + goto_symext::statet &state, + exprt &ssa_rhs, + ssa_exprt &lhs_mod, + const namespacet &ns) +{ +#ifdef USE_UPDATE + while(ssa_rhs.id() == ID_update && + to_update_expr(ssa_rhs).designator().size() == 1 && + (lhs_mod.type().id() == ID_array || lhs_mod.type().id() == ID_struct || + lhs_mod.type().id() == ID_struct_tag)) + { + exprt field_sensitive_lhs; + const update_exprt &update = to_update_expr(ssa_rhs); + PRECONDITION(update.designator().size() == 1); + const exprt &designator = update.designator().front(); + + if(lhs_mod.type().id() == ID_array) + { + field_sensitive_lhs = + index_exprt(lhs_mod, to_index_designator(designator).index()); + } + else + { + field_sensitive_lhs = member_exprt( + lhs_mod, + to_member_designator(designator).get_component_name(), + update.new_value().type()); + } + + state.field_sensitivity.apply(state, field_sensitive_lhs, true); + + if(field_sensitive_lhs.id() != ID_symbol) + break; + + ssa_rhs = update.new_value(); + lhs_mod = to_ssa_expr(field_sensitive_lhs); + } +#else + while(ssa_rhs.id() == ID_with && + to_with_expr(ssa_rhs).operands().size() == 3 && + (lhs_mod.type().id() == ID_array || lhs_mod.type().id() == ID_struct || + lhs_mod.type().id() == ID_struct_tag)) + { + exprt field_sensitive_lhs; + const with_exprt &with_expr = to_with_expr(ssa_rhs); + + if(lhs_mod.type().id() == ID_array) + { + field_sensitive_lhs = index_exprt(lhs_mod, with_expr.where()); + } + else + { + field_sensitive_lhs = member_exprt( + lhs_mod, + with_expr.where().get(ID_component_name), + with_expr.new_value().type()); + } + + state.field_sensitivity.apply(ns, state, field_sensitive_lhs, true); + + if(field_sensitive_lhs.id() != ID_symbol) + break; + + ssa_rhs = with_expr.new_value(); + lhs_mod = to_ssa_expr(field_sensitive_lhs); + } +#endif +} + +/// Replace byte-update operations that only affect individual fields of an +/// expression by assignments to just those fields. May generate "with" (or +/// "update") expressions, which \ref rewrite_with_to_field_symbols will then +/// take care of. +/// \param [in,out] ssa_rhs: right-hand side +/// \param [in,out] lhs_mod: left-hand side +/// \param ns: namespace +/// \param do_simplify: set to true if, and only if, simplification is enabled +static void shift_indexed_access_to_lhs( + exprt &ssa_rhs, + ssa_exprt &lhs_mod, + const namespacet &ns, + bool do_simplify) +{ + if( + ssa_rhs.id() == ID_byte_update_little_endian || + ssa_rhs.id() == ID_byte_update_big_endian) + { + byte_update_exprt &byte_update = to_byte_update_expr(ssa_rhs); + exprt byte_extract = byte_extract_exprt( + byte_update.id() == ID_byte_update_big_endian + ? ID_byte_extract_big_endian + : ID_byte_extract_little_endian, + lhs_mod, + byte_update.offset(), + byte_update.value().type()); + if(do_simplify) + simplify(byte_extract, ns); + + if(byte_extract.id() == ID_symbol) + { + ssa_rhs = byte_update.value(); + lhs_mod = to_ssa_expr(byte_extract); + } + else if(byte_extract.id() == ID_index || byte_extract.id() == ID_member) + { + ssa_rhs = byte_update.value(); + + while(byte_extract.id() == ID_index || byte_extract.id() == ID_member) + { + if(byte_extract.id() == ID_index) + { + index_exprt &idx = to_index_expr(byte_extract); + +#ifdef USE_UPDATE + update_exprt new_rhs{idx.array(), exprt{}, ssa_rhs}; + new_rhs.designator().push_back(index_designatort{idx.index()}); +#else + with_exprt new_rhs{idx.array(), idx.index(), ssa_rhs}; +#endif + + ssa_rhs = new_rhs; + byte_extract = idx.array(); + } + else + { + member_exprt &member = to_member_expr(byte_extract); + const irep_idt &component_name = member.get_component_name(); + +#ifdef USE_UPDATE + update_exprt new_rhs{member.compound(), exprt{}, ssa_rhs}; + new_rhs.designator().push_back(member_designatort{component_name}); +#else + with_exprt new_rhs(member.compound(), exprt(ID_member_name), ssa_rhs); + new_rhs.where().set(ID_component_name, component_name); +#endif + + ssa_rhs = new_rhs; + byte_extract = member.compound(); + } + } + + lhs_mod = to_ssa_expr(byte_extract); + } + } +} + void goto_symext::symex_assign_symbol( statet &state, const ssa_exprt &lhs, // L1 @@ -216,11 +377,23 @@ void goto_symext::symex_assign_symbol( } exprt l2_rhs = state.rename(std::move(ssa_rhs), ns).get(); + + ssa_exprt lhs_mod = lhs; + + // Note the following two calls are specifically required for + // field-sensitivity. For example, with-expressions, which may have just been + // introduced by symex_assign_struct_member, are transformed into member + // expressions on the LHS. If we add an option to disable field-sensitivity + // in the future these should be omitted. + shift_indexed_access_to_lhs(l2_rhs, lhs_mod, ns, symex_config.simplify_opt); + rewrite_with_to_field_symbols(state, l2_rhs, lhs_mod, ns); + do_simplify(l2_rhs); - ssa_exprt ssa_lhs=lhs; + ssa_exprt l2_lhs = lhs_mod; + ssa_exprt l1_lhs = l2_lhs; // l2_lhs will be renamed to L2 by the following: state.assignment( - ssa_lhs, + l2_lhs, l2_rhs, ns, symex_config.simplify_opt, @@ -228,41 +401,51 @@ void goto_symext::symex_assign_symbol( symex_config.allow_pointer_unsoundness); exprt ssa_full_lhs=full_lhs; - ssa_full_lhs=add_to_lhs(ssa_full_lhs, ssa_lhs); + ssa_full_lhs = add_to_lhs(ssa_full_lhs, l2_lhs); const bool record_events=state.record_events; state.record_events=false; exprt l2_full_lhs = state.rename(std::move(ssa_full_lhs), ns).get(); state.record_events=record_events; // do the assignment - const symbolt &symbol = - ns.lookup(to_symbol_expr(ssa_lhs.get_original_expr())); + const symbolt &symbol = ns.lookup(l2_lhs.get_object_name()); if(symbol.is_auxiliary) assignment_type=symex_targett::assignment_typet::HIDDEN; log.conditional_output( - log.debug(), - [this, &ssa_lhs](messaget::mstreamt &mstream) { - mstream << "Assignment to " << ssa_lhs.get_identifier() - << " [" - << pointer_offset_bits(ssa_lhs.type(), ns).value_or(0) - << " bits]" + log.debug(), [this, &l2_lhs](messaget::mstreamt &mstream) { + mstream << "Assignment to " << l2_lhs.get_identifier() << " [" + << pointer_offset_bits(l2_lhs.type(), ns).value_or(0) << " bits]" << messaget::eom; }); // Temporarily add the state guard guard.emplace_back(state.guard.as_expr()); + exprt original_lhs = l2_full_lhs; + get_original_name(original_lhs); target.assignment( conjunction(guard), - ssa_lhs, + l2_lhs, l2_full_lhs, - add_to_lhs(full_lhs, ssa_lhs.get_original_expr()), + original_lhs, l2_rhs, state.source, assignment_type); + if(field_sensitivityt::is_divisible(l1_lhs)) + { + // Split composite symbol lhs into its components + state.field_sensitivity.field_assignments( + ns, state, l1_lhs, target, symex_config.allow_pointer_unsoundness); + // Erase the composite symbol from our working state. Note that we need to + // have it in the propagation table and the value set while doing the field + // assignments, thus we cannot skip putting it in there above. + state.propagation.erase(l1_lhs.get_identifier()); + state.value_set.erase_symbol(l1_lhs, ns); + } + // Restore the guard guard.pop_back(); } diff --git a/src/goto-symex/symex_clean_expr.cpp b/src/goto-symex/symex_clean_expr.cpp index ee764424c66..328e227ce9a 100644 --- a/src/goto-symex/symex_clean_expr.cpp +++ b/src/goto-symex/symex_clean_expr.cpp @@ -175,7 +175,7 @@ void goto_symext::clean_expr( const bool write) { replace_nondet(expr, path_storage.build_symex_nondet); - dereference(expr, state); + dereference(expr, state, write); // make sure all remaining byte extract operations use the root // object to avoid nesting of with/update and byte_update when on diff --git a/src/goto-symex/symex_dead.cpp b/src/goto-symex/symex_dead.cpp index dd6152e41df..d563c16b4fc 100644 --- a/src/goto-symex/symex_dead.cpp +++ b/src/goto-symex/symex_dead.cpp @@ -11,6 +11,7 @@ Author: Daniel Kroening, kroening@kroening.com #include "goto_symex.h" +#include #include #include @@ -22,23 +23,25 @@ void goto_symext::symex_dead(statet &state) const code_deadt &code = instruction.get_dead(); ssa_exprt ssa = state.rename_ssa(ssa_exprt{code.symbol()}, ns).get(); - const irep_idt &l1_identifier = ssa.get_identifier(); - // we cannot remove the object from the L1 renaming map, because L1 renaming - // information is not local to a path, but removing it from the propagation - // map and value-set is safe as 1) it is local to a path and 2) this instance - // can no longer appear + const exprt fields = state.field_sensitivity.get_fields(ns, state, ssa); + find_symbols_sett fields_set; + find_symbols(fields, fields_set); - if(state.value_set.values.has_key(l1_identifier)) + for(const irep_idt &l1_identifier : fields_set) { - state.value_set.values.erase(l1_identifier); + // We cannot remove the object from the L1 renaming map, because L1 renaming + // information is not local to a path, but removing it from the propagation + // map and value-set is safe as 1) it is local to a path and 2) this + // instance can no longer appear. + if(state.value_set.values.has_key(l1_identifier)) + state.value_set.values.erase(l1_identifier); + state.propagation.erase(l1_identifier); + // Remove from the local L2 renaming map; this means any reads from the dead + // identifier will use generation 0 (e.g. x!N@M#0, where N and M are + // positive integers), which is never defined by any write, and will be + // dropped by `goto_symext::merge_goto` on merging with branches where the + // identifier is still live. + state.drop_l1_name(l1_identifier); } - - state.propagation.erase(l1_identifier); - // Remove from the local L2 renaming map; this means any reads from the dead - // identifier will use generation 0 (e.g. x!N@M#0, where N and M are positive - // integers), which is never defined by any write, and will be dropped by - // `goto_symext::merge_goto` on merging with branches where the identifier - // is still live. - state.drop_l1_name(l1_identifier); } diff --git a/src/goto-symex/symex_dereference.cpp b/src/goto-symex/symex_dereference.cpp index 531daa4838b..e6879fd491e 100644 --- a/src/goto-symex/symex_dereference.cpp +++ b/src/goto-symex/symex_dereference.cpp @@ -70,7 +70,7 @@ exprt goto_symext::address_arithmetic( // there could be further dereferencing in the offset exprt offset=be.offset(); - dereference_rec(offset, state); + dereference_rec(offset, state, false); result=plus_exprt(result, offset); @@ -106,14 +106,14 @@ exprt goto_symext::address_arithmetic( // just grab the pointer, but be wary of further dereferencing // in the pointer itself result=to_dereference_expr(expr).pointer(); - dereference_rec(result, state); + dereference_rec(result, state, false); } else if(expr.id()==ID_if) { if_exprt if_expr=to_if_expr(expr); // the condition is not an address - dereference_rec(if_expr.cond(), state); + dereference_rec(if_expr.cond(), state, false); // recursive call if_expr.true_case() = @@ -130,7 +130,7 @@ exprt goto_symext::address_arithmetic( { // give up, just dereference result=expr; - dereference_rec(result, state); + dereference_rec(result, state, false); // turn &array into &array[0] if(result.type().id() == ID_array && !keep_array) @@ -198,7 +198,7 @@ exprt goto_symext::address_arithmetic( /// such as `&struct.flexible_array[0]` (see inline comments in code). /// For full details of this method's pointer replacement and potential side- /// effects see \ref goto_symext::dereference -void goto_symext::dereference_rec(exprt &expr, statet &state) +void goto_symext::dereference_rec(exprt &expr, statet &state, bool write) { if(expr.id()==ID_dereference) { @@ -221,7 +221,7 @@ void goto_symext::dereference_rec(exprt &expr, statet &state) tmp1.swap(to_dereference_expr(expr).pointer()); // first make sure there are no dereferences in there - dereference_rec(tmp1, state); + dereference_rec(tmp1, state, false); // we need to set up some elaborate call-backs symex_dereference_statet symex_dereference_state(state, ns); @@ -241,6 +241,10 @@ void goto_symext::dereference_rec(exprt &expr, statet &state) // this may yield a new auto-object trigger_auto_object(expr, state); + + // ...and may have introduced a member-of-symbol construct with a + // corresponding SSA symbol: + state.field_sensitivity.apply(ns, state, expr, write); } else if( expr.id() == ID_index && to_index_expr(expr).array().id() == ID_member && @@ -259,7 +263,7 @@ void goto_symext::dereference_rec(exprt &expr, statet &state) tmp.add_source_location()=expr.source_location(); // recursive call - dereference_rec(tmp, state); + dereference_rec(tmp, state, write); expr.swap(tmp); } @@ -297,17 +301,17 @@ void goto_symext::dereference_rec(exprt &expr, statet &state) to_address_of_expr(tc_op).object(), from_integer(0, index_type()))); - dereference_rec(expr, state); + dereference_rec(expr, state, write); } else { - dereference_rec(tc_op, state); + dereference_rec(tc_op, state, write); } } else { Forall_operands(it, expr) - dereference_rec(*it, state); + dereference_rec(*it, state, write); } } @@ -348,7 +352,7 @@ void goto_symext::dereference_rec(exprt &expr, statet &state) /// dereferenced. If new objects are created by this mechanism then /// state will be altered (by `symex_assign`) to initialise them. /// See \ref auto_objects.cpp for details. -void goto_symext::dereference(exprt &expr, statet &state) +void goto_symext::dereference(exprt &expr, statet &state, bool write) { // The expression needs to be renamed to level 1 // in order to distinguish addresses of local variables @@ -356,9 +360,10 @@ void goto_symext::dereference(exprt &expr, statet &state) // symbols whose address is taken. PRECONDITION(!state.call_stack().empty()); exprt l1_expr = state.rename(expr, ns).get(); + state.field_sensitivity.apply(ns, state, l1_expr, write); // start the recursion! - dereference_rec(l1_expr, state); + dereference_rec(l1_expr, state, write); // dereferencing may introduce new symbol_exprt // (like __CPROVER_memory) expr = state.rename(std::move(l1_expr), ns).get(); @@ -388,4 +393,6 @@ void goto_symext::dereference(exprt &expr, statet &state) !has_subexpr(expr, ID_dereference), "simplify re-introduced dereferencing"); } + + state.field_sensitivity.apply(ns, state, expr, write); } diff --git a/src/goto-symex/symex_goto.cpp b/src/goto-symex/symex_goto.cpp index 667c911c12f..55556d3d8df 100644 --- a/src/goto-symex/symex_goto.cpp +++ b/src/goto-symex/symex_goto.cpp @@ -493,6 +493,10 @@ static void merge_names( return; } + // field sensitivity: only merge on individual fields + if(field_sensitivityt::is_divisible(ssa)) + return; + // shared variables are renamed on every access anyway, we don't need to // merge anything const symbolt &symbol = ns.lookup(obj_identifier); diff --git a/src/pointer-analysis/value_set.cpp b/src/pointer-analysis/value_set.cpp index 6a5a507ce57..8218efdf428 100644 --- a/src/pointer-analysis/value_set.cpp +++ b/src/pointer-analysis/value_set.cpp @@ -1653,3 +1653,46 @@ void value_sett::erase_values_from_entry( } values.replace(index, std::move(replacement)); } + +void value_sett::erase_struct_union_symbol( + const struct_union_typet &struct_union_type, + const std::string &erase_prefix, + const namespacet &ns) +{ + for(const auto &c : struct_union_type.components()) + { + const typet &subtype = c.type(); + const irep_idt &name = c.get_name(); + + // ignore methods and padding + if(subtype.id() == ID_code || c.get_is_padding()) + continue; + + erase_symbol_rec(subtype, erase_prefix + "." + id2string(name), ns); + } +} + +void value_sett::erase_symbol_rec( + const typet &type, + const std::string &erase_prefix, + const namespacet &ns) +{ + if(type.id() == ID_struct_tag) + erase_struct_union_symbol( + ns.follow_tag(to_struct_tag_type(type)), erase_prefix, ns); + else if(type.id() == ID_union_tag) + erase_struct_union_symbol( + ns.follow_tag(to_union_tag_type(type)), erase_prefix, ns); + else if(type.id() == ID_array) + erase_symbol_rec(type.subtype(), erase_prefix + "[]", ns); + else if(values.has_key(erase_prefix)) + values.erase(erase_prefix); +} + +void value_sett::erase_symbol( + const symbol_exprt &symbol_expr, + const namespacet &ns) +{ + erase_symbol_rec( + symbol_expr.type(), id2string(symbol_expr.get_identifier()), ns); +} diff --git a/src/pointer-analysis/value_set.h b/src/pointer-analysis/value_set.h index dbaf1998cde..d5537d0502f 100644 --- a/src/pointer-analysis/value_set.h +++ b/src/pointer-analysis/value_set.h @@ -483,6 +483,8 @@ class value_sett const irep_idt &index, const std::unordered_set &values_to_erase); + void erase_symbol(const symbol_exprt &symbol_expr, const namespacet &ns); + protected: /// Reads the set of objects pointed to by `expr`, including making /// recursive lookups for dereference operations etc. @@ -517,6 +519,16 @@ class value_sett const exprt &src, exprt &dest) const; + void erase_symbol_rec( + const typet &type, + const std::string &erase_prefix, + const namespacet &ns); + + void erase_struct_union_symbol( + const struct_union_typet &struct_union_type, + const std::string &erase_prefix, + const namespacet &ns); + // Subclass customisation points: protected: diff --git a/src/util/ssa_expr.cpp b/src/util/ssa_expr.cpp index ccd065bb79e..9ac1f664c4f 100644 --- a/src/util/ssa_expr.cpp +++ b/src/util/ssa_expr.cpp @@ -16,7 +16,7 @@ Author: Daniel Kroening, kroening@kroening.com /// If \p expr is a symbol "s" add to \p os "s!l0@l1#l2" and to \p l1_object_os /// "s!l0@l1". /// If \p expr is a member or index expression, recursively apply the procedure -/// and add ".component_name" or "[index]" to \p os. +/// and add "..component_name" or "[[index]]" to \p os. static void build_ssa_identifier_rec( const exprt &expr, const irep_idt &l0, @@ -31,8 +31,8 @@ static void build_ssa_identifier_rec( build_ssa_identifier_rec(member.struct_op(), l0, l1, l2, os, l1_object_os); - os << '.' << member.get_component_name(); - l1_object_os << '.' << member.get_component_name(); + os << ".." << member.get_component_name(); + l1_object_os << ".." << member.get_component_name(); } else if(expr.id()==ID_index) { @@ -42,8 +42,8 @@ static void build_ssa_identifier_rec( const mp_integer idx = numeric_cast_v(to_constant_expr(index.index())); - os << '[' << idx << ']'; - l1_object_os << '[' << idx << ']'; + os << "[[" << idx << "]]"; + l1_object_os << "[[" << idx << "]]"; } else if(expr.id()==ID_symbol) { diff --git a/src/util/ssa_expr.h b/src/util/ssa_expr.h index 23ee49c1756..364d403dcfe 100644 --- a/src/util/ssa_expr.h +++ b/src/util/ssa_expr.h @@ -35,6 +35,16 @@ class ssa_exprt:public symbol_exprt return static_cast(find(ID_expression)); } + /// Replace the underlying, original expression by \p expr while maintaining + /// SSA indices. + /// \param expr: expression to store + void set_expression(const exprt &expr) + { + type() = expr.type(); + add(ID_expression, expr); + update_identifier(); + } + irep_idt get_object_name() const { const exprt &original_expr = get_original_expr();