diff --git a/jbmc/regression/jbmc/string_field_aliasing/Cart$Category.class b/jbmc/regression/jbmc/string_field_aliasing/Cart$Category.class new file mode 100644 index 00000000000..e3dcef537a3 Binary files /dev/null and b/jbmc/regression/jbmc/string_field_aliasing/Cart$Category.class differ diff --git a/jbmc/regression/jbmc/string_field_aliasing/Cart$Product.class b/jbmc/regression/jbmc/string_field_aliasing/Cart$Product.class new file mode 100644 index 00000000000..de058dc87d4 Binary files /dev/null and b/jbmc/regression/jbmc/string_field_aliasing/Cart$Product.class differ diff --git a/jbmc/regression/jbmc/string_field_aliasing/Cart.class b/jbmc/regression/jbmc/string_field_aliasing/Cart.class new file mode 100644 index 00000000000..ec467604922 Binary files /dev/null and b/jbmc/regression/jbmc/string_field_aliasing/Cart.class differ diff --git a/jbmc/regression/jbmc/string_field_aliasing/Cart.java b/jbmc/regression/jbmc/string_field_aliasing/Cart.java new file mode 100644 index 00000000000..d14d35baafc --- /dev/null +++ b/jbmc/regression/jbmc/string_field_aliasing/Cart.java @@ -0,0 +1,16 @@ +public class Cart { + + class Product { + public String size; + public Category cat; + } + + class Category { + public String name; + } + + public boolean checkTax4(Product product, String s) { + product.size="abc"; + return s.startsWith(product.cat.name); + } +} diff --git a/jbmc/regression/jbmc/string_field_aliasing/test.desc b/jbmc/regression/jbmc/string_field_aliasing/test.desc new file mode 100644 index 00000000000..0666460000c --- /dev/null +++ b/jbmc/regression/jbmc/string_field_aliasing/test.desc @@ -0,0 +1,13 @@ +CORE +Cart.class +--cp `../../../../scripts/format_classpath.sh . ../../../src/java_bytecode/library/core-models.jar` --trace --cover location --java-max-vla-length 96 --refine-strings --java-unwind-enum-static --max-nondet-string-length 200 --unwind 4 Cart.class --function Cart.checkTax4 --string-printable +^EXIT=0$ +^SIGNAL=0$ +checkTax4:.*: SATISFIED +-- +checkTax4:.*bytecode-index 8.*: FAILED +dec_solve: current index set is empty, this should not happen +-- +This checks that assigning to a field of an object (which generates a WITH expression in this case) doesn't result +in conflating the String-typed and non-String-typed fields of the Cart class. When they are conflated we get +warnings from the string solver and missing coverage. diff --git a/src/pointer-analysis/value_set.cpp b/src/pointer-analysis/value_set.cpp index aea661e5b62..474a8010662 100644 --- a/src/pointer-analysis/value_set.cpp +++ b/src/pointer-analysis/value_set.cpp @@ -344,6 +344,30 @@ void value_sett::get_value_set( get_value_set_rec(tmp, dest, "", tmp.type(), ns); } +/// Check if 'suffix' starts with 'field'. +/// Suffix is delimited by periods and '[]' (array access) tokens, so we're +/// looking for ".field($|[]|.)" +static bool suffix_starts_with_field( + const std::string &suffix, const std::string &field) +{ + return + !suffix.empty() && + suffix[0] == '.' && + suffix.compare(1, field.length(), field) == 0 && + (suffix.length() == field.length() + 1 || + suffix[field.length() + 1] == '.' || + suffix[field.length() + 1] == '['); +} + +static std::string strip_first_field_from_suffix( + const std::string &suffix, const std::string &field) +{ + INVARIANT( + suffix_starts_with_field(suffix, field), + "suffix should start with " + field); + return suffix.substr(field.length() + 1); +} + void value_sett::get_value_set_rec( const exprt &expr, object_mapt &dest, @@ -711,72 +735,113 @@ void value_sett::get_value_set_rec( } else if(expr.id()==ID_struct) { + const auto &struct_components = to_struct_type(expr_type).components(); + INVARIANT( + struct_components.size() == expr.operands().size(), + "struct expression should have an operand per component"); + auto component_iter = struct_components.begin(); + bool found_component = false; + // a struct constructor, which may contain addresses + forall_operands(it, expr) - get_value_set_rec(*it, dest, suffix, original_type, ns); + { + const std::string &component_name = + id2string(component_iter->get_name()); + if(suffix_starts_with_field(suffix, component_name)) + { + std::string remaining_suffix = + strip_first_field_from_suffix(suffix, component_name); + get_value_set_rec(*it, dest, remaining_suffix, original_type, ns); + found_component = true; + } + ++component_iter; + } + + if(!found_component) + { + // Struct field doesn't appear as expected -- this has probably been + // cast from an incompatible type. Conservatively assume all fields may + // be of interest. + forall_operands(it, expr) + get_value_set_rec(*it, dest, suffix, original_type, ns); + } } else if(expr.id()==ID_with) { - assert(expr.operands().size()==3); - - // this is the array/struct - object_mapt tmp_map0; - get_value_set_rec(expr.op0(), tmp_map0, suffix, original_type, ns); + const with_exprt &with_expr = to_with_expr(expr); - // this is the update value -- note NO SUFFIX - object_mapt tmp_map2; - get_value_set_rec(expr.op2(), tmp_map2, "", original_type, ns); - - if(expr_type.id()==ID_struct) + // If the suffix is empty we're looking for the whole struct: + // default to combining both options as below. + if(expr_type.id() == ID_struct && !suffix.empty()) { - #if 0 - const object_map_dt &object_map0=tmp_map0.read(); - irep_idt component_name=expr.op1().get(ID_component_name); - - bool insert=true; - - for(object_map_dt::const_iterator - it=object_map0.begin(); - it!=object_map0.end(); - it++) + irep_idt component_name = with_expr.where().get(ID_component_name); + if(suffix_starts_with_field(suffix, id2string(component_name))) { - const exprt &e=to_expr(it); - - if(e.id()==ID_member && - e.get(ID_component_name)==component_name) - { - if(insert) - { - dest.write().insert(tmp_map2.read().begin(), tmp_map2.read().end()); - insert=false; - } - } - else - dest.write().insert(*it); + // Looking for the member overwritten by this WITH expression + std::string remaining_suffix = + strip_first_field_from_suffix(suffix, id2string(component_name)); + get_value_set_rec( + with_expr.new_value(), dest, remaining_suffix, original_type, ns); + } + else if(to_struct_type(expr_type).has_component(component_name)) + { + // Looking for a non-overwritten member, look through this expression + get_value_set_rec(with_expr.old(), dest, suffix, original_type, ns); + } + else + { + // Member we're looking for is not defined in this struct -- this + // must be a reinterpret cast of some sort. Default to conservatively + // assuming either operand might be involved. + get_value_set_rec(expr.op0(), dest, suffix, original_type, ns); + get_value_set_rec(expr.op2(), dest, "", original_type, ns); } - #else - // Should be more precise! We only want "suffix" - make_union(dest, tmp_map0); - make_union(dest, tmp_map2); - #endif + } + else if(expr_type.id() == ID_array && !suffix.empty()) + { + std::string new_value_suffix; + if(has_prefix(suffix, "[]")) + new_value_suffix = suffix.substr(2); + + // Otherwise use a blank suffix on the assumption anything involved with + // the new value might be interesting. + + get_value_set_rec(with_expr.old(), dest, suffix, original_type, ns); + get_value_set_rec( + with_expr.new_value(), dest, new_value_suffix, original_type, ns); } else { - make_union(dest, tmp_map0); - make_union(dest, tmp_map2); + // Something else-- the suffixes used here are a rough guess at best, + // so this is imprecise. + get_value_set_rec(expr.op0(), dest, suffix, original_type, ns); + get_value_set_rec(expr.op2(), dest, "", original_type, ns); } } else if(expr.id()==ID_array) { // an array constructor, possibly containing addresses + std::string new_suffix = suffix; + if(has_prefix(suffix, "[]")) + new_suffix = suffix.substr(2); + // Otherwise we're probably reinterpreting some other type -- try persisting + // with the current suffix for want of a better idea. + forall_operands(it, expr) - get_value_set_rec(*it, dest, suffix, original_type, ns); + get_value_set_rec(*it, dest, new_suffix, original_type, ns); } else if(expr.id()==ID_array_of) { // an array constructor, possibly containing an address + std::string new_suffix = suffix; + if(has_prefix(suffix, "[]")) + new_suffix = suffix.substr(2); + // Otherwise we're probably reinterpreting some other type -- try persisting + // with the current suffix for want of a better idea. + assert(expr.operands().size()==1); - get_value_set_rec(expr.op0(), dest, suffix, original_type, ns); + get_value_set_rec(expr.op0(), dest, new_suffix, original_type, ns); } else if(expr.id()==ID_dynamic_object) { diff --git a/unit/pointer-analysis/value_set.cpp b/unit/pointer-analysis/value_set.cpp new file mode 100644 index 00000000000..7c7b4a7e2d6 --- /dev/null +++ b/unit/pointer-analysis/value_set.cpp @@ -0,0 +1,314 @@ +/*******************************************************************\ + + Module: Unit tests for value_sett + + Author: Diffblue Ltd. + +\*******************************************************************/ + +/// \file +/// Unit tests for value_sett + +#include + +#include +#include +#include + +static bool object_descriptor_matches( + const exprt &descriptor_expr, const exprt &target) +{ + if(descriptor_expr.id() != ID_object_descriptor) + return false; + const auto &descriptor = to_object_descriptor_expr(descriptor_expr); + + if( + target.type().id() == ID_pointer && + target == null_pointer_exprt(to_pointer_type(target.type()))) + { + return + descriptor.object().id() == "NULL-object" && + descriptor.object().type() == target.type().subtype(); + } + else + { + return descriptor.object() == target; + } +} + +SCENARIO( + "value_sett", "[core][pointer-analysis][value_set]") +{ + symbol_tablet symbol_table; + namespacet ns(symbol_table); + value_sett value_set; + + GIVEN("A value-set containing some structure-typed objects") + { + // Create struct A { A *x; A *y } + + struct_typet struct_A; + struct_A.set_tag("A"); + struct_A.components().resize(2); + + type_symbolt A_symbol(struct_A); + A_symbol.name = "A"; + A_symbol.base_name = "A"; + A_symbol.pretty_name = "A"; + + auto &A_x = struct_A.components()[0]; + auto &A_y = struct_A.components()[1]; + + A_x.set_name("x"); + A_x.set_base_name("x"); + A_x.set_pretty_name("x"); + A_x.type() = pointer_typet(symbol_typet(A_symbol.name), 64); + + A_y.set_name("y"); + A_y.set_base_name("y"); + A_y.set_pretty_name("y"); + A_y.type() = pointer_typet(symbol_typet(A_symbol.name), 64); + + A_symbol.type = struct_A; + symbol_table.add(A_symbol); + + // Create global symbols struct A a1, a2, a3; + + symbolt a1_symbol; + a1_symbol.name = "a1"; + a1_symbol.base_name = "a1"; + a1_symbol.pretty_name = "a1"; + a1_symbol.type = symbol_typet(A_symbol.name); + a1_symbol.is_static_lifetime = true; + symbol_table.add(a1_symbol); + + symbolt a2_symbol; + a2_symbol.name = "a2"; + a2_symbol.base_name = "a2"; + a2_symbol.pretty_name = "a2"; + a2_symbol.type = symbol_typet(A_symbol.name); + a2_symbol.is_static_lifetime = true; + symbol_table.add(a2_symbol); + + symbolt a3_symbol; + a3_symbol.name = "a3"; + a3_symbol.base_name = "a3"; + a3_symbol.pretty_name = "a3"; + a3_symbol.type = symbol_typet(A_symbol.name); + a3_symbol.is_static_lifetime = true; + symbol_table.add(a3_symbol); + + // Assign a1.x = &a2; a1.y = &a3: + + member_exprt a1_x(a1_symbol.symbol_expr(), A_x); + member_exprt a1_y(a1_symbol.symbol_expr(), A_y); + + code_assignt assign_x(a1_x, address_of_exprt(a2_symbol.symbol_expr())); + code_assignt assign_y(a1_y, address_of_exprt(a3_symbol.symbol_expr())); + + value_set.apply_code(assign_x, ns); + value_set.apply_code(assign_y, ns); + + null_pointer_exprt null_A_ptr(to_pointer_type(a1_x.type())); + + WHEN("We query what a1.x points to") + { + std::list a1_x_result; + value_set.get_value_set(a1_x, a1_x_result, ns); + + THEN("It should point to 'a2'") + { + REQUIRE(a1_x_result.size() == 1); + const exprt &result = *a1_x_result.begin(); + REQUIRE(object_descriptor_matches(result, a2_symbol.symbol_expr())); + } + } + + WHEN("We query what a1.y points to") + { + std::list a1_y_result; + value_set.get_value_set(a1_y, a1_y_result, ns); + + THEN("It should point to 'a3'") + { + REQUIRE(a1_y_result.size() == 1); + const exprt &result = *a1_y_result.begin(); + REQUIRE(object_descriptor_matches(result, a3_symbol.symbol_expr())); + } + } + + WHEN("We query what (a1 WITH x = NULL).x points to") + { + with_exprt a1_with( + a1_symbol.symbol_expr(), member_exprt(nil_exprt(), A_x), null_A_ptr); + + member_exprt member_of_with(a1_with, A_x); + + std::list matching_with_result; + value_set.get_value_set(member_of_with, matching_with_result, ns); + + THEN("It should be NULL") + { + REQUIRE(matching_with_result.size() == 1); + const exprt &result = *matching_with_result.begin(); + REQUIRE(object_descriptor_matches(result, null_A_ptr)); + } + } + + WHEN("We query what (a1 WITH x = NULL).y points to") + { + with_exprt a1_with( + a1_symbol.symbol_expr(), member_exprt(nil_exprt(), A_x), null_A_ptr); + + member_exprt member_of_with(a1_with, A_y); + + std::list non_matching_with_result; + value_set.get_value_set(member_of_with, non_matching_with_result, ns); + + THEN("It should point to 'a3'") + { + REQUIRE(non_matching_with_result.size() == 1); + const exprt &result = *non_matching_with_result.begin(); + REQUIRE(object_descriptor_matches(result, a3_symbol.symbol_expr())); + } + } + + WHEN("We query what (a1 WITH x = NULL) points to") + { + with_exprt a1_with( + a1_symbol.symbol_expr(), member_exprt(nil_exprt(), A_x), null_A_ptr); + + std::list maybe_matching_with_result; + value_set.get_value_set(a1_with, maybe_matching_with_result, ns); + + THEN("It may point to NULL") + { + bool found = false; + for(const exprt &e : maybe_matching_with_result) + { + if(object_descriptor_matches(e, a1_with.new_value())) + found = true; + } + + REQUIRE(found); + } + + THEN("It may point to 'a2'") + { + // This happens because no entry for the whole struct is recorded, so + // value_sett tries looking up the struct's first component. + + bool found = false; + for(const exprt &e : maybe_matching_with_result) + { + if(object_descriptor_matches(e, a2_symbol.symbol_expr())) + found = true; + } + + REQUIRE(found); + } + } + + WHEN("We query what '{ .x = &a2, .y = &a3 }.x' points to") + { + struct_exprt struct_constant(symbol_typet(A_symbol.name)); + struct_constant.copy_to_operands( + address_of_exprt(a2_symbol.symbol_expr())); + struct_constant.copy_to_operands( + address_of_exprt(a3_symbol.symbol_expr())); + + member_exprt member_of_constant(struct_constant, A_x); + + std::list member_of_constant_result; + value_set.get_value_set( + member_of_constant, member_of_constant_result, ns); + + THEN("It should point to 'a2'") + { + REQUIRE(member_of_constant_result.size() == 1); + const exprt &result = *member_of_constant_result.begin(); + REQUIRE(object_descriptor_matches(result, a2_symbol.symbol_expr())); + } + } + } + + GIVEN("Some global integer symbols") + { + // Make some global integers i1, i2, i3: + signedbv_typet int32_type(32); + pointer_typet int32_ptr(int32_type, 64); + + symbolt i1; + i1.name = "i1"; + i1.base_name = "i1"; + i1.pretty_name = "i1"; + i1.type = int32_type; + i1.is_static_lifetime = true; + symbol_table.add(i1); + + symbolt i2; + i2.name = "i2"; + i2.base_name = "i2"; + i2.pretty_name = "i2"; + i2.type = int32_type; + i2.is_static_lifetime = true; + symbol_table.add(i2); + + symbolt i3; + i3.name = "i3"; + i3.base_name = "i3"; + i3.pretty_name = "i3"; + i3.type = int32_type; + i3.is_static_lifetime = true; + symbol_table.add(i3); + + WHEN("We query { &i1, &i2 }[i3]") + { + array_exprt arr(array_typet(int32_ptr, from_integer(2, int32_type))); + arr.copy_to_operands(address_of_exprt(i1.symbol_expr())); + arr.copy_to_operands(address_of_exprt(i2.symbol_expr())); + + index_exprt index_of_arr(arr, i3.symbol_expr()); + + std::list index_of_arr_result; + value_set.get_value_set(index_of_arr, index_of_arr_result, ns); + + THEN("We should get either &i1 or &i2, but not unknown") + { + REQUIRE(index_of_arr_result.size() == 2); + + bool found_i1 = false, found_i2 = false; + for(const exprt &result : index_of_arr_result) + { + if(object_descriptor_matches(result, i1.symbol_expr())) + found_i1 = true; + else if(object_descriptor_matches(result, i2.symbol_expr())) + found_i2 = true; + } + + REQUIRE(found_i1); + REQUIRE(found_i2); + } + } + + WHEN("We query (ARRAY_OF(&i1))[i3]") + { + array_of_exprt arr( + address_of_exprt(i1.symbol_expr()), + array_typet(int32_ptr, from_integer(2, int32_type))); + + index_exprt index_of_arr(arr, i3.symbol_expr()); + + std::list index_of_arr_result; + value_set.get_value_set(index_of_arr, index_of_arr_result, ns); + + THEN("We should get &i1") + { + REQUIRE(index_of_arr_result.size() == 1); + REQUIRE( + object_descriptor_matches( + *index_of_arr_result.begin(), i1.symbol_expr())); + } + } + } +}