From 943d60c8b67194744dc1aec27fdfe7e1c655e412 Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Fri, 27 Jul 2018 11:27:17 +0100 Subject: [PATCH] Value set: handle with, array and struct expressions more accurately We may have a suffix indicating that the result of a get_value_set_rec query would effectively be projected down to particular struct member or array indices on completion (and so we in fact do this eagerly at the bottom of the recursion, rather than at the top as one might expect). However, the ID_array, ID_struct and ID_with expressions are widening -- one or more of their operands is integrated into a larger type -- so we should consume suffix entries introduced by ID_member expressions if available. --- .../string_field_aliasing/Cart$Category.class | Bin 0 -> 406 bytes .../string_field_aliasing/Cart$Product.class | Bin 0 -> 473 bytes .../jbmc/string_field_aliasing/Cart.class | Bin 0 -> 680 bytes .../jbmc/string_field_aliasing/Cart.java | 16 + .../jbmc/string_field_aliasing/test.desc | 13 + src/pointer-analysis/value_set.cpp | 151 ++++++--- unit/pointer-analysis/value_set.cpp | 314 ++++++++++++++++++ 7 files changed, 451 insertions(+), 43 deletions(-) create mode 100644 jbmc/regression/jbmc/string_field_aliasing/Cart$Category.class create mode 100644 jbmc/regression/jbmc/string_field_aliasing/Cart$Product.class create mode 100644 jbmc/regression/jbmc/string_field_aliasing/Cart.class create mode 100644 jbmc/regression/jbmc/string_field_aliasing/Cart.java create mode 100644 jbmc/regression/jbmc/string_field_aliasing/test.desc create mode 100644 unit/pointer-analysis/value_set.cpp diff --git a/jbmc/regression/jbmc/string_field_aliasing/Cart$Category.class b/jbmc/regression/jbmc/string_field_aliasing/Cart$Category.class new file mode 100644 index 0000000000000000000000000000000000000000..e3dcef537a3d4923196d50e02f9143300518a055 GIT binary patch literal 406 zcmYLFJx{|h6ug&)qzNerRt#VOLg;5s&kghc) zv{O;bqB75f=tgU4(o!rIk|%NF-20vLt>L6vnp|EhYZ}-nXEtpFAMFSM4tyLEI(G1P zjF9}l=q{Vdye72ztGq14F@J^2zv6(f1`kLQ>&Fc73J`K#=WfIbhif=EeFY8P;Jh$$ j5b^Fc+BQ1e@2*tXz$PQsH*Umimyv@lBn`_cSQ`W19#2fh literal 0 HcmV?d00001 diff --git a/jbmc/regression/jbmc/string_field_aliasing/Cart$Product.class b/jbmc/regression/jbmc/string_field_aliasing/Cart$Product.class new file mode 100644 index 0000000000000000000000000000000000000000..de058dc87d416efeaea285082346c5c350301ff6 GIT binary patch literal 473 zcmY*VO-sW-6r4?B)6}$$RqMCyp@&+*dhn+75)g!mu zl=wEOiUc-qX5Y@td!JwL9{|qK_F!YX0tY)T_FU{UIGKDF42|KOKk|OelUaYHl}u*m z40gn|ivxyo$hDZI>WRU>N)n;Mm}i;D80y22E8Y3Ir$;4yFSE`m75SB6E@UG0fWhk) z)X|uH!*oi%^`T6}_2OY7)E%G1d6G2Z@t7-V@RD?lD1&>e(&-}747I{{iOHlgN*5{; zm(pl>Ce(?U$cK-rj~WhLv?zU^_E#nN8~SE47e=`HZywq~w@6EZXw&u_TBHqz6_lu= zNLU4ez}Vy%DNV7(BsutsNzdW=$o&Suetb!-x%{#=a_+azRR Six^O+g)(;GQEk(I1Na6iUt632 literal 0 HcmV?d00001 diff --git a/jbmc/regression/jbmc/string_field_aliasing/Cart.class b/jbmc/regression/jbmc/string_field_aliasing/Cart.class new file mode 100644 index 0000000000000000000000000000000000000000..ec46760492257797fc0efe0629abb0e3f90bb7fd GIT binary patch literal 680 zcmZWmT~8BH5IwhDy504*#T8KN7f98XB7Nbl#+N20CXJfVV8Yw&a?@TcyJmN5fZxOm z325R2KY$-aJa=0Z!#>R1nK^Uj%p33wnPL2iS$0*WCZ=%vvr!L+IxL=ZZe;Q^2rFSMW*?WOvv$ZSW^x}OjQ0tpW zf0`Z+buv)H2{S=I4%K8=CC0|{q?3(JN{)U{C0SR%3&%SAHc&^~0_)8@+n7)4_7Bt# z)tRVh)Y-`r6OFp9Jqr1LBc%F2TtJXHxZ5S*@5Iw2)E|u1<=N3&mRq8V8uw7cP@;*t zL<<`dZPZv!#T&oPn1rXExx27OZp1%2Xg?0rvLx| literal 0 HcmV?d00001 diff --git a/jbmc/regression/jbmc/string_field_aliasing/Cart.java b/jbmc/regression/jbmc/string_field_aliasing/Cart.java new file mode 100644 index 00000000000..d14d35baafc --- /dev/null +++ b/jbmc/regression/jbmc/string_field_aliasing/Cart.java @@ -0,0 +1,16 @@ +public class Cart { + + class Product { + public String size; + public Category cat; + } + + class Category { + public String name; + } + + public boolean checkTax4(Product product, String s) { + product.size="abc"; + return s.startsWith(product.cat.name); + } +} diff --git a/jbmc/regression/jbmc/string_field_aliasing/test.desc b/jbmc/regression/jbmc/string_field_aliasing/test.desc new file mode 100644 index 00000000000..0666460000c --- /dev/null +++ b/jbmc/regression/jbmc/string_field_aliasing/test.desc @@ -0,0 +1,13 @@ +CORE +Cart.class +--cp `../../../../scripts/format_classpath.sh . ../../../src/java_bytecode/library/core-models.jar` --trace --cover location --java-max-vla-length 96 --refine-strings --java-unwind-enum-static --max-nondet-string-length 200 --unwind 4 Cart.class --function Cart.checkTax4 --string-printable +^EXIT=0$ +^SIGNAL=0$ +checkTax4:.*: SATISFIED +-- +checkTax4:.*bytecode-index 8.*: FAILED +dec_solve: current index set is empty, this should not happen +-- +This checks that assigning to a field of an object (which generates a WITH expression in this case) doesn't result +in conflating the String-typed and non-String-typed fields of the Cart class. When they are conflated we get +warnings from the string solver and missing coverage. diff --git a/src/pointer-analysis/value_set.cpp b/src/pointer-analysis/value_set.cpp index aea661e5b62..474a8010662 100644 --- a/src/pointer-analysis/value_set.cpp +++ b/src/pointer-analysis/value_set.cpp @@ -344,6 +344,30 @@ void value_sett::get_value_set( get_value_set_rec(tmp, dest, "", tmp.type(), ns); } +/// Check if 'suffix' starts with 'field'. +/// Suffix is delimited by periods and '[]' (array access) tokens, so we're +/// looking for ".field($|[]|.)" +static bool suffix_starts_with_field( + const std::string &suffix, const std::string &field) +{ + return + !suffix.empty() && + suffix[0] == '.' && + suffix.compare(1, field.length(), field) == 0 && + (suffix.length() == field.length() + 1 || + suffix[field.length() + 1] == '.' || + suffix[field.length() + 1] == '['); +} + +static std::string strip_first_field_from_suffix( + const std::string &suffix, const std::string &field) +{ + INVARIANT( + suffix_starts_with_field(suffix, field), + "suffix should start with " + field); + return suffix.substr(field.length() + 1); +} + void value_sett::get_value_set_rec( const exprt &expr, object_mapt &dest, @@ -711,72 +735,113 @@ void value_sett::get_value_set_rec( } else if(expr.id()==ID_struct) { + const auto &struct_components = to_struct_type(expr_type).components(); + INVARIANT( + struct_components.size() == expr.operands().size(), + "struct expression should have an operand per component"); + auto component_iter = struct_components.begin(); + bool found_component = false; + // a struct constructor, which may contain addresses + forall_operands(it, expr) - get_value_set_rec(*it, dest, suffix, original_type, ns); + { + const std::string &component_name = + id2string(component_iter->get_name()); + if(suffix_starts_with_field(suffix, component_name)) + { + std::string remaining_suffix = + strip_first_field_from_suffix(suffix, component_name); + get_value_set_rec(*it, dest, remaining_suffix, original_type, ns); + found_component = true; + } + ++component_iter; + } + + if(!found_component) + { + // Struct field doesn't appear as expected -- this has probably been + // cast from an incompatible type. Conservatively assume all fields may + // be of interest. + forall_operands(it, expr) + get_value_set_rec(*it, dest, suffix, original_type, ns); + } } else if(expr.id()==ID_with) { - assert(expr.operands().size()==3); - - // this is the array/struct - object_mapt tmp_map0; - get_value_set_rec(expr.op0(), tmp_map0, suffix, original_type, ns); + const with_exprt &with_expr = to_with_expr(expr); - // this is the update value -- note NO SUFFIX - object_mapt tmp_map2; - get_value_set_rec(expr.op2(), tmp_map2, "", original_type, ns); - - if(expr_type.id()==ID_struct) + // If the suffix is empty we're looking for the whole struct: + // default to combining both options as below. + if(expr_type.id() == ID_struct && !suffix.empty()) { - #if 0 - const object_map_dt &object_map0=tmp_map0.read(); - irep_idt component_name=expr.op1().get(ID_component_name); - - bool insert=true; - - for(object_map_dt::const_iterator - it=object_map0.begin(); - it!=object_map0.end(); - it++) + irep_idt component_name = with_expr.where().get(ID_component_name); + if(suffix_starts_with_field(suffix, id2string(component_name))) { - const exprt &e=to_expr(it); - - if(e.id()==ID_member && - e.get(ID_component_name)==component_name) - { - if(insert) - { - dest.write().insert(tmp_map2.read().begin(), tmp_map2.read().end()); - insert=false; - } - } - else - dest.write().insert(*it); + // Looking for the member overwritten by this WITH expression + std::string remaining_suffix = + strip_first_field_from_suffix(suffix, id2string(component_name)); + get_value_set_rec( + with_expr.new_value(), dest, remaining_suffix, original_type, ns); + } + else if(to_struct_type(expr_type).has_component(component_name)) + { + // Looking for a non-overwritten member, look through this expression + get_value_set_rec(with_expr.old(), dest, suffix, original_type, ns); + } + else + { + // Member we're looking for is not defined in this struct -- this + // must be a reinterpret cast of some sort. Default to conservatively + // assuming either operand might be involved. + get_value_set_rec(expr.op0(), dest, suffix, original_type, ns); + get_value_set_rec(expr.op2(), dest, "", original_type, ns); } - #else - // Should be more precise! We only want "suffix" - make_union(dest, tmp_map0); - make_union(dest, tmp_map2); - #endif + } + else if(expr_type.id() == ID_array && !suffix.empty()) + { + std::string new_value_suffix; + if(has_prefix(suffix, "[]")) + new_value_suffix = suffix.substr(2); + + // Otherwise use a blank suffix on the assumption anything involved with + // the new value might be interesting. + + get_value_set_rec(with_expr.old(), dest, suffix, original_type, ns); + get_value_set_rec( + with_expr.new_value(), dest, new_value_suffix, original_type, ns); } else { - make_union(dest, tmp_map0); - make_union(dest, tmp_map2); + // Something else-- the suffixes used here are a rough guess at best, + // so this is imprecise. + get_value_set_rec(expr.op0(), dest, suffix, original_type, ns); + get_value_set_rec(expr.op2(), dest, "", original_type, ns); } } else if(expr.id()==ID_array) { // an array constructor, possibly containing addresses + std::string new_suffix = suffix; + if(has_prefix(suffix, "[]")) + new_suffix = suffix.substr(2); + // Otherwise we're probably reinterpreting some other type -- try persisting + // with the current suffix for want of a better idea. + forall_operands(it, expr) - get_value_set_rec(*it, dest, suffix, original_type, ns); + get_value_set_rec(*it, dest, new_suffix, original_type, ns); } else if(expr.id()==ID_array_of) { // an array constructor, possibly containing an address + std::string new_suffix = suffix; + if(has_prefix(suffix, "[]")) + new_suffix = suffix.substr(2); + // Otherwise we're probably reinterpreting some other type -- try persisting + // with the current suffix for want of a better idea. + assert(expr.operands().size()==1); - get_value_set_rec(expr.op0(), dest, suffix, original_type, ns); + get_value_set_rec(expr.op0(), dest, new_suffix, original_type, ns); } else if(expr.id()==ID_dynamic_object) { diff --git a/unit/pointer-analysis/value_set.cpp b/unit/pointer-analysis/value_set.cpp new file mode 100644 index 00000000000..7c7b4a7e2d6 --- /dev/null +++ b/unit/pointer-analysis/value_set.cpp @@ -0,0 +1,314 @@ +/*******************************************************************\ + + Module: Unit tests for value_sett + + Author: Diffblue Ltd. + +\*******************************************************************/ + +/// \file +/// Unit tests for value_sett + +#include + +#include +#include +#include + +static bool object_descriptor_matches( + const exprt &descriptor_expr, const exprt &target) +{ + if(descriptor_expr.id() != ID_object_descriptor) + return false; + const auto &descriptor = to_object_descriptor_expr(descriptor_expr); + + if( + target.type().id() == ID_pointer && + target == null_pointer_exprt(to_pointer_type(target.type()))) + { + return + descriptor.object().id() == "NULL-object" && + descriptor.object().type() == target.type().subtype(); + } + else + { + return descriptor.object() == target; + } +} + +SCENARIO( + "value_sett", "[core][pointer-analysis][value_set]") +{ + symbol_tablet symbol_table; + namespacet ns(symbol_table); + value_sett value_set; + + GIVEN("A value-set containing some structure-typed objects") + { + // Create struct A { A *x; A *y } + + struct_typet struct_A; + struct_A.set_tag("A"); + struct_A.components().resize(2); + + type_symbolt A_symbol(struct_A); + A_symbol.name = "A"; + A_symbol.base_name = "A"; + A_symbol.pretty_name = "A"; + + auto &A_x = struct_A.components()[0]; + auto &A_y = struct_A.components()[1]; + + A_x.set_name("x"); + A_x.set_base_name("x"); + A_x.set_pretty_name("x"); + A_x.type() = pointer_typet(symbol_typet(A_symbol.name), 64); + + A_y.set_name("y"); + A_y.set_base_name("y"); + A_y.set_pretty_name("y"); + A_y.type() = pointer_typet(symbol_typet(A_symbol.name), 64); + + A_symbol.type = struct_A; + symbol_table.add(A_symbol); + + // Create global symbols struct A a1, a2, a3; + + symbolt a1_symbol; + a1_symbol.name = "a1"; + a1_symbol.base_name = "a1"; + a1_symbol.pretty_name = "a1"; + a1_symbol.type = symbol_typet(A_symbol.name); + a1_symbol.is_static_lifetime = true; + symbol_table.add(a1_symbol); + + symbolt a2_symbol; + a2_symbol.name = "a2"; + a2_symbol.base_name = "a2"; + a2_symbol.pretty_name = "a2"; + a2_symbol.type = symbol_typet(A_symbol.name); + a2_symbol.is_static_lifetime = true; + symbol_table.add(a2_symbol); + + symbolt a3_symbol; + a3_symbol.name = "a3"; + a3_symbol.base_name = "a3"; + a3_symbol.pretty_name = "a3"; + a3_symbol.type = symbol_typet(A_symbol.name); + a3_symbol.is_static_lifetime = true; + symbol_table.add(a3_symbol); + + // Assign a1.x = &a2; a1.y = &a3: + + member_exprt a1_x(a1_symbol.symbol_expr(), A_x); + member_exprt a1_y(a1_symbol.symbol_expr(), A_y); + + code_assignt assign_x(a1_x, address_of_exprt(a2_symbol.symbol_expr())); + code_assignt assign_y(a1_y, address_of_exprt(a3_symbol.symbol_expr())); + + value_set.apply_code(assign_x, ns); + value_set.apply_code(assign_y, ns); + + null_pointer_exprt null_A_ptr(to_pointer_type(a1_x.type())); + + WHEN("We query what a1.x points to") + { + std::list a1_x_result; + value_set.get_value_set(a1_x, a1_x_result, ns); + + THEN("It should point to 'a2'") + { + REQUIRE(a1_x_result.size() == 1); + const exprt &result = *a1_x_result.begin(); + REQUIRE(object_descriptor_matches(result, a2_symbol.symbol_expr())); + } + } + + WHEN("We query what a1.y points to") + { + std::list a1_y_result; + value_set.get_value_set(a1_y, a1_y_result, ns); + + THEN("It should point to 'a3'") + { + REQUIRE(a1_y_result.size() == 1); + const exprt &result = *a1_y_result.begin(); + REQUIRE(object_descriptor_matches(result, a3_symbol.symbol_expr())); + } + } + + WHEN("We query what (a1 WITH x = NULL).x points to") + { + with_exprt a1_with( + a1_symbol.symbol_expr(), member_exprt(nil_exprt(), A_x), null_A_ptr); + + member_exprt member_of_with(a1_with, A_x); + + std::list matching_with_result; + value_set.get_value_set(member_of_with, matching_with_result, ns); + + THEN("It should be NULL") + { + REQUIRE(matching_with_result.size() == 1); + const exprt &result = *matching_with_result.begin(); + REQUIRE(object_descriptor_matches(result, null_A_ptr)); + } + } + + WHEN("We query what (a1 WITH x = NULL).y points to") + { + with_exprt a1_with( + a1_symbol.symbol_expr(), member_exprt(nil_exprt(), A_x), null_A_ptr); + + member_exprt member_of_with(a1_with, A_y); + + std::list non_matching_with_result; + value_set.get_value_set(member_of_with, non_matching_with_result, ns); + + THEN("It should point to 'a3'") + { + REQUIRE(non_matching_with_result.size() == 1); + const exprt &result = *non_matching_with_result.begin(); + REQUIRE(object_descriptor_matches(result, a3_symbol.symbol_expr())); + } + } + + WHEN("We query what (a1 WITH x = NULL) points to") + { + with_exprt a1_with( + a1_symbol.symbol_expr(), member_exprt(nil_exprt(), A_x), null_A_ptr); + + std::list maybe_matching_with_result; + value_set.get_value_set(a1_with, maybe_matching_with_result, ns); + + THEN("It may point to NULL") + { + bool found = false; + for(const exprt &e : maybe_matching_with_result) + { + if(object_descriptor_matches(e, a1_with.new_value())) + found = true; + } + + REQUIRE(found); + } + + THEN("It may point to 'a2'") + { + // This happens because no entry for the whole struct is recorded, so + // value_sett tries looking up the struct's first component. + + bool found = false; + for(const exprt &e : maybe_matching_with_result) + { + if(object_descriptor_matches(e, a2_symbol.symbol_expr())) + found = true; + } + + REQUIRE(found); + } + } + + WHEN("We query what '{ .x = &a2, .y = &a3 }.x' points to") + { + struct_exprt struct_constant(symbol_typet(A_symbol.name)); + struct_constant.copy_to_operands( + address_of_exprt(a2_symbol.symbol_expr())); + struct_constant.copy_to_operands( + address_of_exprt(a3_symbol.symbol_expr())); + + member_exprt member_of_constant(struct_constant, A_x); + + std::list member_of_constant_result; + value_set.get_value_set( + member_of_constant, member_of_constant_result, ns); + + THEN("It should point to 'a2'") + { + REQUIRE(member_of_constant_result.size() == 1); + const exprt &result = *member_of_constant_result.begin(); + REQUIRE(object_descriptor_matches(result, a2_symbol.symbol_expr())); + } + } + } + + GIVEN("Some global integer symbols") + { + // Make some global integers i1, i2, i3: + signedbv_typet int32_type(32); + pointer_typet int32_ptr(int32_type, 64); + + symbolt i1; + i1.name = "i1"; + i1.base_name = "i1"; + i1.pretty_name = "i1"; + i1.type = int32_type; + i1.is_static_lifetime = true; + symbol_table.add(i1); + + symbolt i2; + i2.name = "i2"; + i2.base_name = "i2"; + i2.pretty_name = "i2"; + i2.type = int32_type; + i2.is_static_lifetime = true; + symbol_table.add(i2); + + symbolt i3; + i3.name = "i3"; + i3.base_name = "i3"; + i3.pretty_name = "i3"; + i3.type = int32_type; + i3.is_static_lifetime = true; + symbol_table.add(i3); + + WHEN("We query { &i1, &i2 }[i3]") + { + array_exprt arr(array_typet(int32_ptr, from_integer(2, int32_type))); + arr.copy_to_operands(address_of_exprt(i1.symbol_expr())); + arr.copy_to_operands(address_of_exprt(i2.symbol_expr())); + + index_exprt index_of_arr(arr, i3.symbol_expr()); + + std::list index_of_arr_result; + value_set.get_value_set(index_of_arr, index_of_arr_result, ns); + + THEN("We should get either &i1 or &i2, but not unknown") + { + REQUIRE(index_of_arr_result.size() == 2); + + bool found_i1 = false, found_i2 = false; + for(const exprt &result : index_of_arr_result) + { + if(object_descriptor_matches(result, i1.symbol_expr())) + found_i1 = true; + else if(object_descriptor_matches(result, i2.symbol_expr())) + found_i2 = true; + } + + REQUIRE(found_i1); + REQUIRE(found_i2); + } + } + + WHEN("We query (ARRAY_OF(&i1))[i3]") + { + array_of_exprt arr( + address_of_exprt(i1.symbol_expr()), + array_typet(int32_ptr, from_integer(2, int32_type))); + + index_exprt index_of_arr(arr, i3.symbol_expr()); + + std::list index_of_arr_result; + value_set.get_value_set(index_of_arr, index_of_arr_result, ns); + + THEN("We should get &i1") + { + REQUIRE(index_of_arr_result.size() == 1); + REQUIRE( + object_descriptor_matches( + *index_of_arr_result.begin(), i1.symbol_expr())); + } + } + } +}