diff --git a/src/solvers/refinement/string_constraint_generator.h b/src/solvers/refinement/string_constraint_generator.h index 1f00a0bbe7e..d2c76eb6faa 100644 --- a/src/solvers/refinement/string_constraint_generator.h +++ b/src/solvers/refinement/string_constraint_generator.h @@ -164,7 +164,11 @@ class string_constraint_generatort final const exprt &end_index); exprt add_axioms_for_concat(const function_application_exprt &f); exprt add_axioms_for_concat_code_point(const function_application_exprt &f); - exprt add_axioms_for_constant(const array_string_exprt &res, irep_idt sval); + exprt add_axioms_for_constant( + const array_string_exprt &res, + irep_idt sval, + const exprt &guard = true_exprt()); + exprt add_axioms_for_delete( const array_string_exprt &res, const array_string_exprt &str, @@ -195,6 +199,11 @@ class string_constraint_generatort final exprt add_axioms_for_insert_char(const function_application_exprt &f); exprt add_axioms_for_insert_float(const function_application_exprt &f); exprt add_axioms_for_insert_double(const function_application_exprt &f); + + exprt add_axioms_for_cprover_string( + const array_string_exprt &res, + const exprt &arg, + const exprt &guard); exprt add_axioms_from_literal(const function_application_exprt &f); exprt add_axioms_from_int(const function_application_exprt &f); exprt add_axioms_from_int( diff --git a/src/solvers/refinement/string_constraint_generator_constants.cpp b/src/solvers/refinement/string_constraint_generator_constants.cpp index d5d6d4eeb0c..fad3a2d9e3d 100644 --- a/src/solvers/refinement/string_constraint_generator_constants.cpp +++ b/src/solvers/refinement/string_constraint_generator_constants.cpp @@ -19,10 +19,12 @@ Author: Romain Brenguier, romain.brenguier@diffblue.com /// equal. /// \param res: array of characters for the result /// \param sval: a string constant +/// \param guard: condition under which the axiom should apply, true by default /// \return integer expression equal to zero exprt string_constraint_generatort::add_axioms_for_constant( const array_string_exprt &res, - irep_idt sval) + irep_idt sval, + const exprt &guard) { const typet &index_type = res.length().type(); const typet &char_type = res.content().type().subtype(); @@ -43,12 +45,12 @@ exprt string_constraint_generatort::add_axioms_for_constant( const exprt idx = from_integer(i, index_type); const exprt c = from_integer(str[i], char_type); const equal_exprt lemma(res[idx], c); - axioms.push_back(lemma); + axioms.push_back(implies_exprt(guard, lemma)); } const exprt s_length = from_integer(str.size(), index_type); - axioms.push_back(res.axiom_for_has_length(s_length)); + axioms.push_back(implies_exprt(guard, equal_exprt(res.length(), s_length))); return from_integer(0, get_return_code_type()); } @@ -65,6 +67,39 @@ exprt string_constraint_generatort::add_axioms_for_empty_string( return from_integer(0, get_return_code_type()); } +/// Convert an expression of type string_typet to a string_exprt +/// \param res: string expression for the result +/// \param arg: expression of type string typet +/// \param guard: condition under which `res` should be equal to arg +/// \return 0 if constraints were added, 1 if expression could not be handled +/// and no constraint was added. Expression we can handle are of the +/// form \f$ e := "" | (expr)? e : e \f$ +exprt string_constraint_generatort::add_axioms_for_cprover_string( + const array_string_exprt &res, + const exprt &arg, + const exprt &guard) +{ + if(const auto if_expr = expr_try_dynamic_cast(arg)) + { + const and_exprt guard_true(guard, if_expr->cond()); + const exprt return_code_true = + add_axioms_for_cprover_string(res, if_expr->true_case(), guard_true); + + const and_exprt guard_false(guard, not_exprt(if_expr->cond())); + const exprt return_code_false = + add_axioms_for_cprover_string(res, if_expr->false_case(), guard_false); + + return if_exprt( + equal_exprt(return_code_true, from_integer(0, get_return_code_type())), + return_code_false, + return_code_true); + } + else if(const auto constant_expr = expr_try_dynamic_cast(arg)) + return add_axioms_for_constant(res, constant_expr->get_value(), guard); + else + return from_integer(1, get_return_code_type()); +} + /// String corresponding to an internal cprover string /// /// Add axioms ensuring that the returned string expression is equal to the @@ -79,7 +114,5 @@ exprt string_constraint_generatort::add_axioms_from_literal( const function_application_exprt::argumentst &args=f.arguments(); PRECONDITION(args.size() == 3); // Bad args to string literal? const array_string_exprt res = char_array_of_pointer(args[1], args[0]); - const exprt &arg = args[2]; - irep_idt sval=to_constant_expr(arg).get_value(); - return add_axioms_for_constant(res, sval); + return add_axioms_for_cprover_string(res, args[2], true_exprt()); } diff --git a/src/solvers/refinement/string_refinement.cpp b/src/solvers/refinement/string_refinement.cpp index fec692c9bf9..3e3ed3a9479 100644 --- a/src/solvers/refinement/string_refinement.cpp +++ b/src/solvers/refinement/string_refinement.cpp @@ -27,6 +27,7 @@ Author: Alberto Griggio, alberto.griggio@gmail.com #include #include #include +#include static exprt substitute_array_with_expr(const exprt &expr, const exprt &index); @@ -293,7 +294,7 @@ bool is_char_type(const typet &type) /// Distinguish char array from other types. /// For now, any unsigned bitvector type is considered a character. /// \param type: a type -/// \param ns: name space +/// \param ns: namespace /// \return true if the given type is an array of characters bool is_char_array_type(const typet &type, const namespacet &ns) { @@ -311,33 +312,65 @@ bool is_char_pointer_type(const typet &type) } /// \param type: a type -/// \param ns: name space -/// \return true if a subtype is an pointer of characters -static bool has_char_pointer_subtype(const typet &type, const namespacet &ns) +/// \param pred: a predicate +/// \return true if one of the subtype of `type` satisfies predicate `pred`. +/// The meaning of "subtype" is in the algebraic datatype sense: +/// for example, the subtypes of a struct are the types of its +/// components, the subtype of a pointer is the type it points to, +/// etc... +/// For instance in the type `t` defined by +/// `{ int a; char[] b; double * c; { bool d} e}`, `int`, `char`, +/// `double` and `bool` are subtypes of `t`. +bool has_subtype( + const typet &type, + const std::function &pred) { - if(is_char_pointer_type(type)) + if(pred(type)) return true; if(type.id() == ID_struct || type.id() == ID_union) { const struct_union_typet &struct_type = to_struct_union_type(type); - for(const auto &comp : struct_type.components()) - { - if(has_char_pointer_subtype(comp.type(), ns)) - return true; - } + return std::any_of( + struct_type.components().begin(), + struct_type.components().end(), // NOLINTNEXTLINE + [&](const struct_union_typet::componentt &comp) { + return has_subtype(comp.type(), pred); + }); } - for(const auto &t : type.subtypes()) - { - if(has_char_pointer_subtype(t, ns)) - return true; - } - return false; + return std::any_of( // NOLINTNEXTLINE + type.subtypes().begin(), type.subtypes().end(), [&](const typet &t) { + return has_subtype(t, pred); + }); +} + +/// \param type: a type +/// \return true if a subtype of `type` is an pointer of characters. +/// The meaning of "subtype" is in the algebraic datatype sense: +/// for example, the subtypes of a struct are the types of its +/// components, the subtype of a pointer is the type it points to, +/// etc... +static bool has_char_pointer_subtype(const typet &type) +{ + return has_subtype(type, is_char_pointer_type); +} + +/// \param type: a type +/// \return true if a subtype of `type` is string_typet. +/// The meaning of "subtype" is in the algebraic datatype sense: +/// for example, the subtypes of a struct are the types of its +/// components, the subtype of a pointer is the type it points to, +/// etc... +static bool has_string_subtype(const typet &type) +{ + // NOLINTNEXTLINE + return has_subtype( + type, [](const typet &subtype) { return subtype == string_typet(); }); } /// \param expr: an expression -/// \param ns: name space +/// \param ns: namespace /// \return true if a subexpression of `expr` is an array of characters static bool has_char_array_subexpr(const exprt &expr, const namespacet &ns) { @@ -420,7 +453,8 @@ static union_find_replacet generate_symbol_resolution_from_equations( // function applications can be ignored because they will be replaced // in the convert_function_application step of dec_solve } - else if(has_char_pointer_subtype(lhs.type(), ns)) + else if(lhs.type().id() != ID_pointer && + has_char_pointer_subtype(lhs.type())) { if(rhs.type().id() == ID_struct) { @@ -447,14 +481,202 @@ static union_find_replacet generate_symbol_resolution_from_equations( return solver; } +/// Maps equation to expressions contained in them and conversely expressions to +/// equations that contain them. This can be used on a subset of expressions +/// which interests us, in particular strings. Equations are identified by an +/// index of type `std::size_t` for more efficient insertion and lookup. +class equation_symbol_mappingt +{ +public: + // Record index of the equations that contain a given expression + std::map> equations_containing; + // Record expressions that are contained in the equation with the given index + std::unordered_map> strings_in_equation; + + void add(const std::size_t i, const exprt &expr) + { + equations_containing[expr].push_back(i); + strings_in_equation[i].push_back(expr); + } + + std::vector find_expressions(const std::size_t i) + { + return strings_in_equation[i]; + } + + std::vector find_equations(const exprt &expr) + { + return equations_containing[expr]; + } +}; + +/// This is meant to be used on the lhs of an equation with string subtype. +/// \param lhs: expression which is either of string type, or a symbol +/// representing a struct with some string members +/// \return if lhs is a string return this string, if it is a struct return the +/// members of the expression that have string type. +static std::vector extract_strings_from_lhs(const exprt &lhs) +{ + std::vector result; + if(lhs.type() == string_typet()) + result.push_back(lhs); + else if(lhs.type().id() == ID_struct) + { + const struct_typet &struct_type = to_struct_type(lhs.type()); + for(const auto &comp : struct_type.components()) + { + const std::vector strings_in_comp = extract_strings_from_lhs( + member_exprt(lhs, comp.get_name(), comp.type())); + result.insert( + result.end(), strings_in_comp.begin(), strings_in_comp.end()); + } + } + return result; +} + +/// \param expr: an expression +/// \return all subexpressions of type string which are not if_exprt expressions +/// this includes expressions of the form `e.x` if e is a symbol subexpression +/// with a field `x` of type string +static std::vector extract_strings(const exprt &expr) +{ + std::vector result; + for(auto it = expr.depth_begin(); it != expr.depth_end();) + { + if(it->type() == string_typet() && it->id() != ID_if) + { + result.push_back(*it); + it.next_sibling_or_parent(); + } + else if(it->id() == ID_symbol) + { + for(const exprt &e : extract_strings_from_lhs(*it)) + result.push_back(e); + it.next_sibling_or_parent(); + } + else + ++it; + } + return result; +} + +/// Given an equation on strings, mark these strings as belonging to the same +/// set in the `symbol_resolve` structure. The lhs and rhs of the equation, +/// should have string type or be struct with string members. +/// \param eq: equation to add +/// \param symbol_resolve: structure to which the equation will be added +/// \param ns: namespace +static void add_string_equation_to_symbol_resolution( + const equal_exprt &eq, + union_find_replacet &symbol_resolve, + const namespacet &ns) +{ + if(eq.rhs().type() == string_typet()) + { + symbol_resolve.make_union(eq.lhs(), eq.rhs()); + } + else if(has_string_subtype(eq.lhs().type())) + { + if(eq.rhs().type().id() == ID_struct) + { + const struct_typet &struct_type = to_struct_type(eq.rhs().type()); + for(const auto &comp : struct_type.components()) + { + const member_exprt lhs_data(eq.lhs(), comp.get_name(), comp.type()); + const exprt rhs_data = simplify_expr( + member_exprt(eq.rhs(), comp.get_name(), comp.type()), ns); + add_string_equation_to_symbol_resolution( + equal_exprt(lhs_data, rhs_data), symbol_resolve, ns); + } + } + } +} + +/// Symbol resolution for expressions of type string typet. +/// We record equality between these expressions in the output if one of the +/// function calls depends on them. +/// \param equations: list of equations +/// \param ns: namespace +/// \param stream: output stream +/// \return union_find_replacet structure containing the correspondences. +union_find_replacet string_identifiers_resolution_from_equations( + std::vector &equations, + const namespacet &ns, + messaget::mstreamt &stream) +{ + const auto eom = messaget::eom; + const std::string log_message = + "WARNING string_refinement.cpp " + "string_identifiers_resolution_from_equations:"; + + equation_symbol_mappingt equation_map; + + // Indexes of equations that need to be added to the result + std::unordered_set required_equations; + std::stack equations_to_treat; + + for(std::size_t i = 0; i < equations.size(); ++i) + { + const equal_exprt &eq = equations[i]; + if(eq.rhs().id() == ID_function_application) + { + if(required_equations.insert(i).second) + equations_to_treat.push(i); + + std::vector rhs_strings = extract_strings(eq.rhs()); + for(const auto expr : rhs_strings) + equation_map.add(i, expr); + } + else if(eq.lhs().type().id() != ID_pointer && + has_string_subtype(eq.lhs().type())) + { + std::vector lhs_strings = extract_strings_from_lhs(eq.lhs()); + + for(const auto expr : lhs_strings) + equation_map.add(i, expr); + + if(lhs_strings.empty()) + { + stream << log_message << "non struct with string subtype " + << from_expr(ns, "", eq.lhs()) << "\n * of type " + << from_type(ns, "", eq.lhs().type()) << eom; + } + + for(const exprt &expr : extract_strings(eq.rhs())) + equation_map.add(i, expr); + } + } + + // transitively add all equations which depend on the equations to treat + while(!equations_to_treat.empty()) + { + const std::size_t i = equations_to_treat.top(); + equations_to_treat.pop(); + for(const exprt &string : equation_map.find_expressions(i)) + { + for(const std::size_t j : equation_map.find_equations(string)) + { + if(required_equations.insert(j).second) + equations_to_treat.push(j); + } + } + } + + union_find_replacet result; + for(const std::size_t i : required_equations) + add_string_equation_to_symbol_resolution(equations[i], result, ns); + + return result; +} + void output_equations( std::ostream &output, const std::vector &equations, const namespacet &ns) { - for(const auto &eq : equations) - output << " * " << from_expr(ns, "", eq.lhs()) - << " == " << from_expr(ns, "", eq.rhs()) << std::endl; + for(std::size_t i = 0; i < equations.size(); ++i) + output << " [" << i << "] " << from_expr(ns, "", equations[i].lhs()) + << " == " << from_expr(ns, "", equations[i].rhs()) << std::endl; } /// Main decision procedure of the solver. Looks for a valuation of variables @@ -539,8 +761,27 @@ decision_proceduret::resultt string_refinementt::dec_solve() << from_expr(ns, "", pair.second) << eom; #endif + const union_find_replacet string_id_symbol_resolve = + string_identifiers_resolution_from_equations(equations, ns, debug()); +#ifdef DEBUG + debug() << "symbol resolve string:" << eom; + for(const auto &pair : string_id_symbol_resolve.to_vector()) + { + debug() << from_expr(ns, "", pair.first) << " --> " + << from_expr(ns, "", pair.second) << eom; + } +#endif + debug() << "dec_solve: Replacing char pointer symbols" << eom; replace_symbols_in_equations(symbol_resolve, equations); + + debug() << "dec_solve: Replacing string ids in function applications" << eom; + for(equal_exprt &eq : equations) + { + if(can_cast_expr(eq.rhs())) + string_id_symbol_resolve.replace_expr(eq.rhs()); + } + #ifdef DEBUG output_equations(debug(), equations, ns); #endif diff --git a/src/solvers/refinement/string_refinement.h b/src/solvers/refinement/string_refinement.h index b890f9e40d5..8e4a3432703 100644 --- a/src/solvers/refinement/string_refinement.h +++ b/src/solvers/refinement/string_refinement.h @@ -112,4 +112,14 @@ exprt concretize_arrays_in_expression( bool is_char_array_type(const typet &type, const namespacet &ns); +bool has_subtype( + const typet &type, + const std::function &pred); + +// Declaration required for unit-test: +union_find_replacet string_identifiers_resolution_from_equations( + std::vector &equations, + const namespacet &ns, + messaget::mstreamt &stream); + #endif diff --git a/unit/Makefile b/unit/Makefile index c8f7b1b5613..752300fc0c4 100644 --- a/unit/Makefile +++ b/unit/Makefile @@ -31,7 +31,9 @@ SRC += unit_tests.cpp \ solvers/refinement/string_constraint_generator_valueof/is_digit_with_radix.cpp \ solvers/refinement/string_constraint_instantiation/instantiate_not_contains.cpp \ solvers/refinement/string_refinement/concretize_array.cpp \ + solvers/refinement/string_refinement/has_subtype.cpp \ solvers/refinement/string_refinement/substitute_array_list.cpp \ + solvers/refinement/string_refinement/string_symbol_resolution.cpp \ solvers/refinement/string_refinement/union_find_replace.cpp \ util/expr_cast/expr_cast.cpp \ util/expr_iterator.cpp \ diff --git a/unit/solvers/refinement/string_refinement/has_subtype.cpp b/unit/solvers/refinement/string_refinement/has_subtype.cpp new file mode 100644 index 00000000000..6b0e419e8d2 --- /dev/null +++ b/unit/solvers/refinement/string_refinement/has_subtype.cpp @@ -0,0 +1,61 @@ +/*******************************************************************\ + + Module: Unit tests for has_subtype in + solvers/refinement/string_refinement.cpp + + Author: DiffBlue Limited. All rights reserved. + +\*******************************************************************/ + +#include + +#include +#include +#include + +// Curryfied version of type comparison. +// Useful for the predicate argument of has_subtype +static std::function is_type(const typet &t1) +{ + auto f = [&](const typet &t2) { return t1 == t2; }; + return f; +} + +SCENARIO("has_subtype", "[core][solvers][refinement][string_refinement]") +{ + const typet char_type = java_char_type(); + const typet int_type = java_int_type(); + const typet bool_type = java_boolean_type(); + + REQUIRE(has_subtype(char_type, is_type(char_type))); + REQUIRE_FALSE(has_subtype(char_type, is_type(int_type))); + + GIVEN("a struct with char and int fields") + { + struct_typet struct_type; + struct_type.components().emplace_back("char_field", char_type); + struct_type.components().emplace_back("int_field", int_type); + THEN("char and int are subtypes") + { + REQUIRE(has_subtype(struct_type, is_type(char_type))); + REQUIRE(has_subtype(struct_type, is_type(int_type))); + } + THEN("bool is not a subtype") + { + REQUIRE_FALSE(has_subtype(struct_type, is_type(bool_type))); + } + } + + GIVEN("a pointer to char") + { + pointer_typet ptr_type = pointer_type(char_type); + THEN("char is a subtype") + { + REQUIRE(has_subtype(ptr_type, is_type(char_type))); + } + THEN("int is not a subtype") + { + REQUIRE_FALSE(has_subtype(ptr_type, is_type(int_type))); + } + } +} diff --git a/unit/solvers/refinement/string_refinement/string_symbol_resolution.cpp b/unit/solvers/refinement/string_refinement/string_symbol_resolution.cpp new file mode 100644 index 00000000000..076b2a5bbf2 --- /dev/null +++ b/unit/solvers/refinement/string_refinement/string_symbol_resolution.cpp @@ -0,0 +1,101 @@ +/*******************************************************************\ + + Module: Unit tests for string_identifiers_resolution_from_equations in + solvers/refinement/string_refinement.cpp + + Author: DiffBlue Limited. All rights reserved. + +\*******************************************************************/ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +SCENARIO("string_identifiers_resolution_from_equations", +"[core][solvers][refinement][string_refinement]") +{ + // For printing expression + register_language(new_java_bytecode_language); + symbol_tablet symbol_table; + namespacet ns(symbol_table); + messaget::mstreamt &stream = messaget().debug(); + + GIVEN("Some equations") + { + constant_exprt a("a", string_typet()); + constant_exprt b("b", string_typet()); + constant_exprt c("c", string_typet()); + constant_exprt d("d", string_typet()); + constant_exprt e("e", string_typet()); + constant_exprt f("f", string_typet()); + + struct_typet struct_type; + struct_type.components().emplace_back("str1", string_typet()); + struct_type.components().emplace_back("str2", string_typet()); + struct_exprt struct_expr(struct_type); + struct_expr.operands().push_back(a); + struct_expr.operands().push_back(f); + symbol_exprt symbol_struct("sym_struct", struct_type); + + std::vector equations; + equations.emplace_back(a, b); + equations.emplace_back(b, c); + equations.emplace_back(d, e); + equations.emplace_back(symbol_struct, struct_expr); + + WHEN("There is no function call") + { + union_find_replacet symbol_resolve = + string_identifiers_resolution_from_equations( + equations, ns, stream); + + THEN("The symbol resolution structure is empty") + { + REQUIRE(symbol_resolve.to_vector().empty()); + } + } + + WHEN("There is a function call") + { + symbol_exprt fun_sym("f", code_typet()); + function_application_exprt fun(fun_sym, bool_typet()); + fun.operands().push_back(c); + symbol_exprt bool_sym("bool_b", bool_typet()); + equations.emplace_back(bool_sym, fun); + union_find_replacet symbol_resolve = + string_identifiers_resolution_from_equations( + equations, ns, stream); + + THEN("Equations that depend on it should be added") + { + REQUIRE(symbol_resolve.find(b) == symbol_resolve.find(c)); + REQUIRE(symbol_resolve.find(a) == symbol_resolve.find(b)); + + member_exprt sym_m1(symbol_struct, "str1", string_typet()); + member_exprt sym_m2(symbol_struct, "str2", string_typet()); + + REQUIRE(symbol_resolve.find(sym_m1) == symbol_resolve.find(c)); + REQUIRE(symbol_resolve.find(sym_m2) == symbol_resolve.find(f)); + } + + THEN("Equations that do not depend on it should not be added") + { + REQUIRE(symbol_resolve.find(d) != symbol_resolve.find(e)); + } + + THEN("No other equation is added") + { + REQUIRE(symbol_resolve.find(a) != symbol_resolve.find(d)); + REQUIRE(symbol_resolve.find(a) != symbol_resolve.find(f)); + REQUIRE(symbol_resolve.find(d) != symbol_resolve.find(f)); + } + } + } +}