diff --git a/regression/jbmc-strings/long_string/Test.class b/regression/jbmc-strings/long_string/Test.class new file mode 100644 index 00000000000..de6b488b03b Binary files /dev/null and b/regression/jbmc-strings/long_string/Test.class differ diff --git a/regression/jbmc-strings/long_string/Test.java b/regression/jbmc-strings/long_string/Test.java new file mode 100644 index 00000000000..63b0d69babc --- /dev/null +++ b/regression/jbmc-strings/long_string/Test.java @@ -0,0 +1,41 @@ +import org.cprover.CProverString; + +public class Test { + public static void check(String s, String t) { + // Filter + if(s == null || t == null) + return; + + // Act + String u = s.concat(t); + + // Filter out + if(u.length() < 3_000_000) + return; + if(CProverString.charAt(u, 500_000) != 'a') + return; + if(CProverString.charAt(u, 2_000_000) != 'b') + return; + + // Assert + assert(false); + } + + public static void checkAbort(String s, String t) { + // Filter + if(s == null || t == null) + return; + + // Act + String u = s.concat(t); + + // Filter out + if(u.length() < 67_108_864) + return; + if(CProverString.charAt(u, 2_000_000) != 'b') + return; + + // Assert + assert(false); + } +} diff --git a/regression/jbmc-strings/long_string/test.desc b/regression/jbmc-strings/long_string/test.desc new file mode 100644 index 00000000000..7f5301726d3 --- /dev/null +++ b/regression/jbmc-strings/long_string/test.desc @@ -0,0 +1,11 @@ +CORE +Test.class +--refine-strings --function Test.check --string-max-input-length 2000000 +^EXIT=10$ +^SIGNAL=0$ +assertion at file Test.java line 21 .* FAILURE +-- +-- +This checks that the solver manage to violate assertions even when this requires +some very long strings, as long as they don't exceed the arbitrary limit that +is set by the solver (64M characters). diff --git a/regression/jbmc-strings/long_string/test_abort.desc b/regression/jbmc-strings/long_string/test_abort.desc new file mode 100644 index 00000000000..22074e1ed1e --- /dev/null +++ b/regression/jbmc-strings/long_string/test_abort.desc @@ -0,0 +1,9 @@ +CORE +Test.class +--refine-strings --function Test.checkAbort +^EXIT=6$ +^SIGNAL=0$ +-- +-- +This tests should abort, because concretizing a string of the required +length may take to much memory. diff --git a/src/solvers/refinement/string_constraint_generator.h b/src/solvers/refinement/string_constraint_generator.h index 7fcd2d76574..bb77486bd2f 100644 --- a/src/solvers/refinement/string_constraint_generator.h +++ b/src/solvers/refinement/string_constraint_generator.h @@ -177,8 +177,6 @@ class string_constraint_generatort final array_string_exprt fresh_string(const typet &index_type, const typet &char_type); array_string_exprt get_string_expr(const exprt &expr); - plus_exprt plus_exprt_with_overflow_check(const exprt &op1, const exprt &op2); - static constant_exprt constant_char(int i, const typet &char_type); @@ -447,6 +445,9 @@ exprt minimum(const exprt &a, const exprt &b); /// \return expression representing the maximum of two expressions exprt maximum(const exprt &a, const exprt &b); +/// \return Boolean true when the sum of the two expressions overflows +exprt sum_overflows(const plus_exprt &sum); + exprt length_constraint_for_concat_char( const array_string_exprt &res, const array_string_exprt &s1); diff --git a/src/solvers/refinement/string_constraint_generator_code_points.cpp b/src/solvers/refinement/string_constraint_generator_code_points.cpp index 1f61a769208..97a94e1a417 100644 --- a/src/solvers/refinement/string_constraint_generator_code_points.cpp +++ b/src/solvers/refinement/string_constraint_generator_code_points.cpp @@ -128,12 +128,11 @@ exprt string_constraint_generatort::add_axioms_for_code_point_at( const symbol_exprt result = fresh_symbol("char", return_type); const exprt index1 = from_integer(1, str.length().type()); const exprt &char1=str[pos]; - const exprt &char2=str[plus_exprt_with_overflow_check(pos, index1)]; + const exprt &char2 = str[plus_exprt(pos, index1)]; const typecast_exprt char1_as_int(char1, return_type); const typecast_exprt char2_as_int(char2, return_type); const exprt pair = pair_value(char1_as_int, char2_as_int, return_type); - const exprt is_low = - is_low_surrogate(str[plus_exprt_with_overflow_check(pos, index1)]); + const exprt is_low = is_low_surrogate(str[plus_exprt(pos, index1)]); const and_exprt return_pair(is_high_surrogate(str[pos]), is_low); lemmas.push_back(implies_exprt(return_pair, equal_exprt(result, pair))); @@ -210,8 +209,8 @@ exprt string_constraint_generatort::add_axioms_for_offset_by_code_point( const typet &return_type=f.type(); const symbol_exprt result = fresh_symbol("offset_by_code_point", return_type); - const exprt minimum = plus_exprt_with_overflow_check(index, offset); - const exprt maximum = plus_exprt_with_overflow_check(minimum, offset); + const exprt minimum = plus_exprt(index, offset); + const exprt maximum = plus_exprt(minimum, offset); lemmas.push_back(binary_relation_exprt(result, ID_le, maximum)); lemmas.push_back(binary_relation_exprt(result, ID_ge, minimum)); diff --git a/src/solvers/refinement/string_constraint_generator_concat.cpp b/src/solvers/refinement/string_constraint_generator_concat.cpp index d1cd743b32d..3a8cdfca8cc 100644 --- a/src/solvers/refinement/string_constraint_generator_concat.cpp +++ b/src/solvers/refinement/string_constraint_generator_concat.cpp @@ -18,12 +18,15 @@ Author: Romain Brenguier, romain.brenguier@diffblue.com /// at index `end_index'`. /// Where start_index' is max(0, start_index) and end_index' is /// max(min(end_index, s2.length), start_index') +/// If s1.length + end_index' - start_index' is greater than the maximal integer +/// of the type of res.length, then the result gets truncated to the size +/// of this maximal integer. /// /// These axioms are: -/// 1. \f$|res| = |s_1| + end\_index' - start\_index' \f$ +/// 1. \f$|res| = overflow ? |s_1| + end\_index' - start\_index' +/// : max_int \f$ /// 2. \f$\forall i<|s_1|. res[i]=s_1[i] \f$ -/// 3. \f$\forall i< end\_index' - start\_index'.\ res[i+|s_1|] -/// = s_2[start\_index'+i]\f$ +/// 3. \f$\forall i< |res| - |s_1|.\ res[i+|s_1|] = s_2[start\_index'+i]\f$ /// /// \param res: an array of characters expression /// \param s1: an array of characters expression @@ -59,7 +62,8 @@ exprt string_constraint_generatort::add_axioms_for_concat_substr( fresh_univ_index("QA_index_concat2", res.length().type()); const equal_exprt res_eq( res[plus_exprt(idx2, s1.length())], s2[plus_exprt(start1, idx2)]); - return string_constraintt(idx2, minus_exprt(end1, start1), res_eq); + const minus_exprt upper_bound(res.length(), s1.length()); + return string_constraintt(idx2, upper_bound, res_eq); }()); return from_integer(0, get_return_code_type()); @@ -77,10 +81,13 @@ exprt length_constraint_for_concat_substr( const exprt &start, const exprt &end) { + PRECONDITION(res.length().type().id() == ID_signedbv); const exprt start1 = maximum(start, from_integer(0, start.type())); const exprt end1 = maximum(minimum(end, s2.length()), start1); const plus_exprt res_length(s1.length(), minus_exprt(end1, start1)); - return equal_exprt(res.length(), res_length); + const exprt overflow = sum_overflows(res_length); + const exprt max_int = to_signedbv_type(res.length().type()).largest_expr(); + return equal_exprt(res.length(), if_exprt(overflow, max_int, res_length)); } /// Add axioms enforcing that the length of `res` is that of the concatenation diff --git a/src/solvers/refinement/string_constraint_generator_format.cpp b/src/solvers/refinement/string_constraint_generator_format.cpp index 03a6f788d28..847d7baec4d 100644 --- a/src/solvers/refinement/string_constraint_generator_format.cpp +++ b/src/solvers/refinement/string_constraint_generator_format.cpp @@ -358,6 +358,7 @@ exprt string_constraint_generatort::add_axioms_for_format( const typet &index_type = res.length().type(); for(const format_elementt &fe : format_strings) + { if(fe.is_format_specifier()) { const format_specifiert &fs=fe.get_format_specifier(); @@ -392,24 +393,37 @@ exprt string_constraint_generatort::add_axioms_for_format( add_axioms_for_constant(str, fe.get_format_text().get_content()); intermediary_strings.push_back(str); } + } + + exprt return_code = from_integer(0, get_return_code_type()); if(intermediary_strings.empty()) - return to_array_string_expr( - array_exprt(array_typet(char_type, from_integer(0, index_type)))); + { + lemmas.push_back(equal_exprt(res.length(), from_integer(0, index_type))); + return return_code; + } - auto it=intermediary_strings.begin(); - array_string_exprt str = *(it++); - exprt return_code = from_integer(0, signedbv_typet(32)); - for(; it!=intermediary_strings.end(); ++it) + array_string_exprt str = intermediary_strings[0]; + + if(intermediary_strings.size() == 1) { + // Copy the first string + return add_axioms_for_substring( + res, str, from_integer(0, index_type), str.length()); + } + + // start after the first string and stop before the last + for(std::size_t i = 1; i < intermediary_strings.size() - 1; ++i) + { + const array_string_exprt &intermediary = intermediary_strings[i]; const array_string_exprt fresh = fresh_string(index_type, char_type); return_code = - bitor_exprt(return_code, add_axioms_for_concat(fresh, str, *it)); + bitor_exprt(return_code, add_axioms_for_concat(fresh, str, intermediary)); str = fresh; } - // Copy - add_axioms_for_substring(res, str, from_integer(0, index_type), str.length()); - return return_code; + + return bitor_exprt( + return_code, add_axioms_for_concat(res, str, intermediary_strings.back())); } /// Construct a string from a constant array. diff --git a/src/solvers/refinement/string_constraint_generator_indexof.cpp b/src/solvers/refinement/string_constraint_generator_indexof.cpp index e2336e598a9..6ac23094b05 100644 --- a/src/solvers/refinement/string_constraint_generator_indexof.cpp +++ b/src/solvers/refinement/string_constraint_generator_indexof.cpp @@ -352,8 +352,7 @@ exprt string_constraint_generatort::add_axioms_for_last_index_of( lemmas.push_back(a3); const exprt index1 = from_integer(1, index_type); - const exprt from_index_plus_one = - plus_exprt_with_overflow_check(from_index, index1); + const plus_exprt from_index_plus_one(from_index, index1); const if_exprt end_index( binary_relation_exprt(from_index_plus_one, ID_le, str.length()), from_index_plus_one, diff --git a/src/solvers/refinement/string_constraint_generator_main.cpp b/src/solvers/refinement/string_constraint_generator_main.cpp index 97c0287eb96..bf9c36ff0dd 100644 --- a/src/solvers/refinement/string_constraint_generator_main.cpp +++ b/src/solvers/refinement/string_constraint_generator_main.cpp @@ -130,31 +130,19 @@ symbol_exprt string_constraint_generatort::fresh_boolean( return b; } -/// Create a plus expression while adding extra constraints to axioms in order -/// to prevent overflows. -/// \param op1: First term of the sum -/// \param op2: Second term of the sum -/// \return A plus expression representing the sum of the arguments -plus_exprt string_constraint_generatort::plus_exprt_with_overflow_check( - const exprt &op1, const exprt &op2) -{ - plus_exprt sum(plus_exprt(op1, op2)); - - exprt zero=from_integer(0, op1.type()); - - binary_relation_exprt neg1(op1, ID_lt, zero); - binary_relation_exprt neg2(op2, ID_lt, zero); - binary_relation_exprt neg_sum(sum, ID_lt, zero); - - // We prevent overflows by adding the following constraint: - // If the signs of the two operands are the same, then the sign of the sum - // should also be the same. - implies_exprt no_overflow(equal_exprt(neg1, neg2), - equal_exprt(neg1, neg_sum)); - - lemmas.push_back(no_overflow); - - return sum; +exprt sum_overflows(const plus_exprt &sum) +{ + PRECONDITION(sum.operands().size() == 2); + const exprt zero = from_integer(0, sum.op0().type()); + const binary_relation_exprt op0_negative(sum.op0(), ID_lt, zero); + const binary_relation_exprt op1_negative(sum.op1(), ID_lt, zero); + const binary_relation_exprt sum_negative(sum, ID_lt, zero); + + // overflow happens when we add two values of same sign but their sum has a + // different sign + return and_exprt( + equal_exprt(op0_negative, op1_negative), + notequal_exprt(op1_negative, sum_negative)); } /// Associate an actual finite length to infinite arrays diff --git a/src/solvers/refinement/string_constraint_generator_testing.cpp b/src/solvers/refinement/string_constraint_generator_testing.cpp index 09948073b49..81dd16272aa 100644 --- a/src/solvers/refinement/string_constraint_generator_testing.cpp +++ b/src/solvers/refinement/string_constraint_generator_testing.cpp @@ -36,11 +36,10 @@ exprt string_constraint_generatort::add_axioms_for_is_prefix( symbol_exprt isprefix=fresh_boolean("isprefix"); const typet &index_type=str.length().type(); - implies_exprt a1( - isprefix, - str.axiom_for_length_ge(plus_exprt_with_overflow_check( - prefix.length(), offset))); - lemmas.push_back(a1); + // Axiom 1. + lemmas.push_back( + implies_exprt( + isprefix, str.axiom_for_length_ge(plus_exprt(prefix.length(), offset)))); symbol_exprt qvar=fresh_univ_index("QA_isprefix", index_type); string_constraintt a2( diff --git a/src/solvers/refinement/string_constraint_generator_transformation.cpp b/src/solvers/refinement/string_constraint_generator_transformation.cpp index a5b01005c7b..154e027a5ef 100644 --- a/src/solvers/refinement/string_constraint_generator_transformation.cpp +++ b/src/solvers/refinement/string_constraint_generator_transformation.cpp @@ -175,9 +175,8 @@ exprt string_constraint_generatort::add_axioms_for_trim( const symbol_exprt idx = fresh_exist_index("index_trim", index_type); const exprt space_char = from_integer(' ', char_type); - exprt a1=str.axiom_for_length_ge( - plus_exprt_with_overflow_check(idx, res.length())); - lemmas.push_back(a1); + // Axiom 1. + lemmas.push_back(str.axiom_for_length_ge(plus_exprt(idx, res.length()))); binary_relation_exprt a2(idx, ID_ge, from_integer(0, index_type)); lemmas.push_back(a2); @@ -197,32 +196,31 @@ exprt string_constraint_generatort::add_axioms_for_trim( string_constraintt a6(n, idx, non_print); constraints.push_back(a6); - symbol_exprt n2=fresh_univ_index("QA_index_trim2", index_type); - minus_exprt bound(str.length(), plus_exprt_with_overflow_check(idx, - res.length())); - binary_relation_exprt eqn2( - str[plus_exprt(idx, plus_exprt(res.length(), n2))], - ID_le, - space_char); - - string_constraintt a7(n2, bound, eqn2); - constraints.push_back(a7); + // Axiom 7. + constraints.push_back([&] { // NOLINT + const symbol_exprt n2 = fresh_univ_index("QA_index_trim2", index_type); + const minus_exprt bound(minus_exprt(str.length(), idx), res.length()); + const binary_relation_exprt eqn2( + str[plus_exprt(idx, plus_exprt(res.length(), n2))], ID_le, space_char); + return string_constraintt(n2, bound, eqn2); + }()); symbol_exprt n3=fresh_univ_index("QA_index_trim3", index_type); equal_exprt eqn3(res[n3], str[plus_exprt(n3, idx)]); string_constraintt a8(n3, res.length(), eqn3); constraints.push_back(a8); - minus_exprt index_before( - plus_exprt_with_overflow_check(idx, res.length()), - from_integer(1, index_type)); - binary_relation_exprt no_space_before(str[index_before], ID_gt, space_char); - or_exprt a9( - equal_exprt(idx, str.length()), - and_exprt( - binary_relation_exprt(str[idx], ID_gt, space_char), - no_space_before)); - lemmas.push_back(a9); + // Axiom 9. + lemmas.push_back([&] { + const plus_exprt index_before( + idx, minus_exprt(res.length(), from_integer(1, index_type))); + const binary_relation_exprt no_space_before( + str[index_before], ID_gt, space_char); + return or_exprt( + equal_exprt(idx, str.length()), + and_exprt( + binary_relation_exprt(str[idx], ID_gt, space_char), no_space_before)); + }()); return from_integer(0, f.type()); } @@ -511,10 +509,7 @@ exprt string_constraint_generatort::add_axioms_for_delete_char_at( const array_string_exprt str = get_string_expr(f.arguments()[2]); exprt index_one=from_integer(1, str.length().type()); return add_axioms_for_delete( - res, - str, - f.arguments()[3], - plus_exprt_with_overflow_check(f.arguments()[3], index_one)); + res, str, f.arguments()[3], plus_exprt(f.arguments()[3], index_one)); } /// Add axioms stating that `res` corresponds to the input `str` diff --git a/src/solvers/refinement/string_refinement.cpp b/src/solvers/refinement/string_refinement.cpp index 8cec89c46fb..3aa14f1ddf4 100644 --- a/src/solvers/refinement/string_refinement.cpp +++ b/src/solvers/refinement/string_refinement.cpp @@ -23,7 +23,6 @@ Author: Alberto Griggio, alberto.griggio@gmail.com #include #include #include -#include #include #include #include @@ -47,7 +46,7 @@ static optionalt find_counter_example( /// * the negation of `a` is an existential formula `b`; /// * we substituted symbols in `b` by their values found in `get`; /// * arrays are concretized, meaning we attribute a value for characters that -/// are unknown to get, for details see concretize_arrays_in_expression; +/// are unknown to get, for details see substitute_array_access; /// * `b` is simplified and array accesses are replaced by expressions /// without arrays; /// * we give lemma `b` to a fresh solver; @@ -482,7 +481,7 @@ union_find_replacet string_identifiers_resolution_from_equations( equations_to_treat.push(i); std::vector rhs_strings = extract_strings(eq.rhs()); - for(const auto expr : rhs_strings) + for(const auto &expr : rhs_strings) equation_map.add(i, expr); } else if(eq.lhs().type().id() != ID_pointer && @@ -490,7 +489,7 @@ union_find_replacet string_identifiers_resolution_from_equations( { std::vector lhs_strings = extract_strings_from_lhs(eq.lhs()); - for(const auto expr : lhs_strings) + for(const auto &expr : lhs_strings) equation_map.add(i, expr); if(lhs_strings.empty()) @@ -648,20 +647,24 @@ decision_proceduret::resultt string_refinementt::dec_solve() output_equations(debug(), equations, ns); #endif + // The object `dependencies` is also used by get, so we have to use it as a + // class member but we make sure it is cleared at each `dec_solve` call. + dependencies.clear(); debug() << "dec_solve: compute dependency graph and remove function " << "applications captured by the dependencies:" << eom; - const auto new_equation_end = std::remove_if( - equations.begin(), equations.end(), [&](const equal_exprt &eq) { // NOLINT - return add_node(dependencies, eq, generator.array_pool); - }); - equations.erase(new_equation_end, equations.end()); + std::vector local_equations; + for(const equal_exprt &eq : equations) + { + if(!add_node(dependencies, eq, generator.array_pool)) + local_equations.push_back(eq); + } #ifdef DEBUG dependencies.output_dot(debug()); #endif debug() << "dec_solve: add constraints" << eom; - dependencies.add_constraints(generator); + dependencies.add_constraints(generator); #ifdef DEBUG output_equations(debug(), equations, ns); @@ -676,7 +679,7 @@ decision_proceduret::resultt string_refinementt::dec_solve() } #endif - for(const auto &eq : equations) + for(const auto &eq : local_equations) { #ifdef DEBUG debug() << "dec_solve: set_to " << format(eq) << eom; @@ -931,50 +934,21 @@ static optionalt get_array( } std::size_t n = *n_opt; - const array_typet ret_type(char_type, from_integer(n, index_type)); - array_exprt ret(ret_type); - - if(n>max_string_length) + if(n > MAX_CONCRETE_STRING_SIZE) { stream << "(sr::get_array) long string (size=" << n << ")" << eom; - return {}; - } - - if(n==0) - return empty_ret; - - if(arr_val.id()=="array-list") - { - DATA_INVARIANT( - arr_val.operands().size()%2==0, - string_refinement_invariantt("and index expression must be on a symbol, " - "with, array_of, if, or array, and all " - "cases besides array are handled above")); - std::map initial_map; - for(size_t i = 0; i < arr_val.operands().size(); i += 2) - { - exprt index = arr_val.operands()[i]; - if(auto idx = numeric_cast(index)) - { - if(*idx < n) - initial_map[*idx] = arr_val.operands()[i + 1]; - } - } - - // Pad the concretized values to the left to assign the uninitialized - // values of result. - ret.operands()=fill_in_map_as_vector(initial_map); - return ret; - } - else if(arr_val.id()==ID_array) - { - // copy the `n` first elements of `arr_val` - for(size_t i=0; iconcretize(n, index_type); + return {}; } /// convert the content of a string to a more readable representation. This @@ -1015,34 +989,35 @@ static exprt get_char_array_and_concretize( if(arr_model_opt) { stream << indent << indent << "- char_array: " << format(*arr_model_opt) + << '\n'; + stream << indent << indent << "- type : " << format(arr_model_opt->type()) << eom; const exprt simple = simplify_expr(*arr_model_opt, ns); stream << indent << indent << "- simplified_char_array: " << format(simple) << eom; - const exprt concretized_array = - concretize_arrays_in_expression(simple, max_string_length, ns); - stream << indent << indent - << "- concretized_char_array: " << format(concretized_array) << eom; - - if(concretized_array.id() == ID_array) - { - stream << indent << indent << "- as_string: \"" - << string_of_array(to_array_expr(concretized_array)) << "\"\n"; - } - else + if( + const auto concretized_array = get_array( + super_get, ns, max_string_length, stream, to_array_string_expr(simple))) { - stream << indent << "- warning: not an array" << eom; - } + stream << indent << indent + << "- concretized_char_array: " << format(*concretized_array) + << eom; - stream << indent << indent << "- type: " << format(concretized_array.type()) - << eom; - return concretized_array; - } - else - { - stream << indent << indent << "- incomplete model" << eom; - return arr; + if( + const auto array_expr = + expr_try_dynamic_cast(*concretized_array)) + { + stream << indent << indent << "- as_string: \"" + << string_of_array(*array_expr) << "\"\n"; + } + else + stream << indent << "- warning: not an array" << eom; + return *concretized_array; + } + return simple; } + stream << indent << indent << "- incomplete model" << eom; + return arr; } /// Display part of the current model by mapping the variables created by the @@ -1105,85 +1080,12 @@ static exprt substitute_array_access( : sparse_arrayt(expr).to_if_expression(index); } -/// Fill an array represented by a list of with_expr by propagating values to -/// the left. For instance `ARRAY_OF(12) WITH[2:=24] WITH[4:=42]` will give -/// `{ 24, 24, 24, 42, 42 }` -/// \param expr: an array expression in the form -/// `ARRAY_OF(x) WITH [i0:=v0] ... WITH [iN:=vN]` -/// \param string_max_length: bound on the length of strings -/// \return an array expression with filled in values, or expr if it is simply -/// an `ARRAY_OF(x)` expression -static array_exprt -fill_in_array_with_expr(const exprt &expr, const std::size_t string_max_length) -{ - PRECONDITION(expr.type().id()==ID_array); - PRECONDITION(expr.id()==ID_with || expr.id()==ID_array_of); - const array_typet &array_type = to_array_type(expr.type()); - - // Map of the parts of the array that are initialized - std::map initial_map; - - // Set the last index to be sure the array will have the right length - const auto &array_size_opt = numeric_cast(array_type.size()); - if(array_size_opt && *array_size_opt > 0) - initial_map.emplace( - *array_size_opt - 1, - from_integer(CHARACTER_FOR_UNKNOWN, array_type.subtype())); - - for(exprt it=expr; it.id()==ID_with; it=to_with_expr(it).old()) - { - // Add to `initial_map` all the pairs (index,value) contained in `WITH` - // statements - const with_exprt &with_expr = to_with_expr(it); - const exprt &then_expr=with_expr.new_value(); - const auto index = - numeric_cast_v(to_constant_expr(with_expr.where())); - if( - index < string_max_length && (!array_size_opt || index < *array_size_opt)) - initial_map.emplace(index, then_expr); - } - - array_exprt result(array_type); - result.operands() = fill_in_map_as_vector(initial_map); - return result; -} - -/// Fill an array represented by an array_expr by propagating values to -/// the left for unknown values. For instance `{ 24 , * , * , 42, * }` will give -/// `{ 24, 42, 42, 42, '?' }` -/// \param expr: an array expression -/// \param string_max_length: bound on the length of strings -/// \return an array expression with filled in values -exprt fill_in_array_expr(const array_exprt &expr, std::size_t string_max_length) -{ - PRECONDITION(expr.type().id() == ID_array); - const array_typet &array_type = to_array_type(expr.type()); - PRECONDITION(array_type.subtype().id() == ID_unsignedbv); - - // Map of the parts of the array that are initialized - std::map initial_map; - const auto &array_size_opt = numeric_cast(array_type.size()); - - if(array_size_opt && *array_size_opt > 0) - initial_map.emplace( - *array_size_opt - 1, - from_integer(CHARACTER_FOR_UNKNOWN, array_type.subtype())); - - for(std::size_t i = 0; i < expr.operands().size(); ++i) - { - if(i < string_max_length && expr.operands()[i].id() != ID_unknown) - initial_map[i] = expr.operands()[i]; - } - - array_exprt result(array_type); - result.operands()=fill_in_map_as_vector(initial_map); - return result; -} - /// Create an equivalent expression where array accesses are replaced by 'if' /// expressions: for instance in array access `arr[index]`, where: /// `arr := {12, 24, 48}` the constructed expression will be: /// `index==0 ? 12 : index==1 ? 24 : 48` +/// Avoids repetition so `arr := {12, 12, 24, 48}` will give +/// `index<=1 ? 12 : index==1 ? 24 : 48` static exprt substitute_array_access( const array_exprt &array_expr, const exprt &index, @@ -1191,29 +1093,9 @@ static exprt substitute_array_access( &symbol_generator) { const typet &char_type = array_expr.type().subtype(); - const typet &index_type = to_array_type(array_expr.type()).size().type(); - const std::vector &operands = array_expr.operands(); - - exprt result = symbol_generator("out_of_bound_access", char_type); - - for(std::size_t i = 0; i < operands.size(); ++i) - { - // Go in reverse order so that smaller indexes appear first in the result - const std::size_t pos = operands.size() - 1 - i; - const equal_exprt equals(index, from_integer(pos, index_type)); - if(operands[pos].type() != char_type) - { - INVARIANT( - operands[pos].id() == ID_unknown, - string_refinement_invariantt( - "elements can only have type char or " - "unknown, and it is not char type")); - result = if_exprt(equals, exprt(ID_unknown, char_type), result); - } - else - result = if_exprt(equals, operands[pos], result); - } - return result; + const exprt default_val = symbol_generator("out_of_bound_access", char_type); + const interval_sparse_arrayt sparse_array(array_expr, default_val); + return sparse_array.to_if_expression(index); } static exprt substitute_array_access( @@ -1391,46 +1273,6 @@ static exprt negation_of_constraint(const string_constraintt &axiom) return negaxiom; } -/// Result of the solver `supert` should not be interpreted literally for char -/// arrays as not all indices are present in the index set. -/// In the given expression, we populate arrays at the indices for which the -/// solver has no constraint by copying values to the left. -/// For example an expression `ARRAY_OF(0) WITH [1:=2] WITH [4:=3]` would -/// be interpreted as `{ 2, 2, 3, 3, 3}`. -/// \param expr: expression to interpret -/// \param string_max_length: maximum size of arrays to consider -/// \param ns: namespace, used to determine what is an array of character -/// \return the interpreted expression -exprt concretize_arrays_in_expression( - exprt expr, - std::size_t string_max_length, - const namespacet &ns) -{ - auto it=expr.depth_begin(); - const auto end=expr.depth_end(); - while(it!=end) - { - if(is_char_array_type(it->type(), ns)) - { - if(it->id() == ID_with || it->id() == ID_array_of) - { - it.mutate() = fill_in_array_with_expr(*it, string_max_length); - it.next_sibling_or_parent(); - } - else if(it->id() == ID_array) - { - it.mutate() = fill_in_array_expr(to_array_expr(*it), string_max_length); - it.next_sibling_or_parent(); - } - else - ++it; // ignoring other expressions - } - else - ++it; - } - return expr; -} - /// Debugging function which outputs the different steps an axiom goes through /// to be checked in check axioms. static void debug_check_axioms_step( @@ -2236,22 +2078,53 @@ exprt substitute_array_lists(exprt expr, size_t string_max_length) /// Evaluates the given expression in the valuation found by /// string_refinementt::dec_solve. /// -/// The difference with supert::get is that arrays of characters need to be -/// concretized. See concretize_arrays_in_expression for how it is done. +/// Arrays of characters are interpreted differently from the result of +/// supert::get: values are propagated to the left to fill unknown. /// \param expr: an expression /// \return evaluated expression exprt string_refinementt::get(const exprt &expr) const { - // clang-format off - const auto super_get = [this](const exprt &expr) - { + const auto super_get = [this](const exprt &expr) { return supert::get(expr); }; - // clang-format on - exprt ecopy(expr); (void)symbol_resolve.replace_expr(ecopy); + // Special treatment for index expressions + const auto &index_expr = expr_try_dynamic_cast(ecopy); + if(index_expr && is_char_type(index_expr->type())) + { + std::reference_wrapper current(index_expr->array()); + while(current.get().id() == ID_if) + { + const auto &if_expr = expr_dynamic_cast(current.get()); + const exprt cond = get(if_expr.cond()); + if(cond.is_true()) + current = std::cref(if_expr.true_case()); + else if(cond.is_false()) + current = std::cref(if_expr.false_case()); + else + UNREACHABLE; + } + const auto array = supert::get(current.get()); + const auto index = get(index_expr->index()); + const exprt unknown = + from_integer(CHARACTER_FOR_UNKNOWN, index_expr->type()); + if( + const auto sparse_array = interval_sparse_arrayt::of_expr(array, unknown)) + { + if(const auto index_value = numeric_cast(index)) + return sparse_array->at(*index_value); + return sparse_array->to_if_expression(index); + } + + INVARIANT( + array.id() == ID_symbol, + "apart from symbols, array valuations can be interpreted as sparse " + "arrays"); + return index_exprt(array, index); + } + if(is_char_array_type(ecopy.type(), ns)) { array_string_exprt &arr = to_array_string_expr(ecopy); @@ -2262,35 +2135,22 @@ exprt string_refinementt::get(const exprt &expr) const dependencies.eval(arr, [&](const exprt &expr) { return get(expr); })) return *from_dependencies; - const auto arr_model_opt = - get_array(super_get, ns, generator.max_string_length, debug(), arr); - // \todo Refactor with get array in model - if(arr_model_opt) - { - const exprt arr_model = simplify_expr(*arr_model_opt, ns); - const exprt concretized_array = concretize_arrays_in_expression( - arr_model, generator.max_string_length, ns); - return concretized_array; - } - else + if( + const auto arr_model_opt = + get_array(super_get, ns, generator.max_string_length, debug(), arr)) + return *arr_model_opt; + + if(generator.get_created_strings().count(arr)) { - auto set = generator.get_created_strings(); - if(set.find(arr) != set.end()) + const exprt length = super_get(arr.length()); + if(const auto n = numeric_cast(length)) { - exprt length = super_get(arr.length()); - if(const auto n = numeric_cast(length)) - { - exprt arr_model = - array_exprt(array_typet(arr.type().subtype(), length)); - for(size_t i = 0; i < *n; i++) - arr_model.copy_to_operands(exprt(ID_unknown, arr.type().subtype())); - const exprt concretized_array = concretize_arrays_in_expression( - arr_model, generator.max_string_length, ns); - return concretized_array; - } + const interval_sparse_arrayt sparse_array( + from_integer(CHARACTER_FOR_UNKNOWN, arr.type().subtype())); + return sparse_array.concretize(*n, length.type()); } - return arr; } + return arr; } return supert::get(ecopy); } diff --git a/src/solvers/refinement/string_refinement.h b/src/solvers/refinement/string_refinement.h index 281f004da87..0fd5dbc8f5a 100644 --- a/src/solvers/refinement/string_refinement.h +++ b/src/solvers/refinement/string_refinement.h @@ -31,6 +31,8 @@ Author: Alberto Griggio, alberto.griggio@gmail.com #define DEFAULT_MAX_NB_REFINEMENT std::numeric_limits::max() #define CHARACTER_FOR_UNKNOWN '?' +// Limit the size of strings in traces to 64M chars to avoid memout +#define MAX_CONCRETE_STRING_SIZE (1 << 26) class string_refinementt final: public bv_refinementt { @@ -91,10 +93,6 @@ class string_refinementt final: public bv_refinementt }; exprt substitute_array_lists(exprt expr, std::size_t string_max_length); -exprt concretize_arrays_in_expression( - exprt expr, - std::size_t string_max_length, - const namespacet &ns); // Declaration required for unit-test: union_find_replacet string_identifiers_resolution_from_equations( diff --git a/src/solvers/refinement/string_refinement_util.cpp b/src/solvers/refinement/string_refinement_util.cpp index 0175984f4b5..d556151a7dd 100644 --- a/src/solvers/refinement/string_refinement_util.cpp +++ b/src/solvers/refinement/string_refinement_util.cpp @@ -97,7 +97,7 @@ sparse_arrayt::sparse_arrayt(const with_exprt &expr) { const auto &with_expr = expr_dynamic_cast(ref.get()); const auto current_index = numeric_cast_v(with_expr.where()); - entries.emplace_back(current_index, with_expr.new_value()); + entries[current_index] = with_expr.new_value(); ref = with_expr.old(); } @@ -123,16 +123,10 @@ exprt sparse_arrayt::to_if_expression(const exprt &index) const }); } -interval_sparse_arrayt::interval_sparse_arrayt(const with_exprt &expr) - : sparse_arrayt(expr) +exprt sparse_arrayt::at(const std::size_t index) const { - // Entries are sorted so that successive entries correspond to intervals - std::sort( - entries.begin(), - entries.end(), - []( - const std::pair &a, - const std::pair &b) { return a.first < b.first; }); + const auto it = entries.find(index); + return it != entries.end() ? it->second : default_value; } exprt interval_sparse_arrayt::to_if_expression(const exprt &index) const @@ -152,6 +146,79 @@ exprt interval_sparse_arrayt::to_if_expression(const exprt &index) const }); } +interval_sparse_arrayt::interval_sparse_arrayt( + const array_exprt &expr, + const exprt &extra_value) + : sparse_arrayt(extra_value) +{ + const auto &operands = expr.operands(); + exprt last_added = extra_value; + for(std::size_t i = 0; i < operands.size(); ++i) + { + const std::size_t index = operands.size() - 1 - i; + if(operands[index].id() != ID_unknown && operands[index] != last_added) + { + entries[index] = operands[index]; + last_added = operands[index]; + } + } +} + +interval_sparse_arrayt interval_sparse_arrayt::of_array_list( + const exprt &expr, + const exprt &extra_value) +{ + PRECONDITION(expr.operands().size() % 2 == 0); + interval_sparse_arrayt sparse_array(extra_value); + for(std::size_t i = 0; i < expr.operands().size(); i += 2) + { + const auto index = numeric_cast(expr.operands()[i]); + INVARIANT( + expr.operands()[i + 1].type() == extra_value.type(), + "all values in array should have the same type"); + if(index.has_value() && expr.operands()[i + 1].id() != ID_unknown) + sparse_array.entries[*index] = expr.operands()[i + 1]; + } + return sparse_array; +} + +optionalt +interval_sparse_arrayt::of_expr(const exprt &expr, const exprt &extra_value) +{ + if(const auto &array_expr = expr_try_dynamic_cast(expr)) + return interval_sparse_arrayt(*array_expr, extra_value); + if(const auto &with_expr = expr_try_dynamic_cast(expr)) + return interval_sparse_arrayt(*with_expr); + if(expr.id() == "array-list") + return of_array_list(expr, extra_value); + return {}; +} + +exprt interval_sparse_arrayt::at(const std::size_t index) const +{ + // First element at or after index + const auto it = entries.lower_bound(index); + return it != entries.end() ? it->second : default_value; +} + +array_exprt interval_sparse_arrayt::concretize( + std::size_t size, + const typet &index_type) const +{ + const array_typet array_type( + default_value.type(), from_integer(size, index_type)); + array_exprt array(array_type); + array.operands().reserve(size); + + auto it = entries.begin(); + for(; it != entries.end() && it->first < size; ++it) + array.operands().resize(it->first + 1, it->second); + + array.operands().resize( + size, it == entries.end() ? default_value : it->second); + return array; +} + void equation_symbol_mappingt::add(const std::size_t i, const exprt &expr) { equations_containing[expr].push_back(i); @@ -253,6 +320,14 @@ void string_dependenciest::add_dependency( }); } +void string_dependenciest::clear() +{ + builtin_function_nodes.clear(); + string_nodes.clear(); + node_index_pool.clear(); + clean_cache(); +} + static void add_dependency_to_string_subexprs( string_dependenciest &dependencies, const function_application_exprt &fun_app, diff --git a/src/solvers/refinement/string_refinement_util.h b/src/solvers/refinement/string_refinement_util.h index e8f7b615e8f..d05c5a2d88c 100644 --- a/src/solvers/refinement/string_refinement_util.h +++ b/src/solvers/refinement/string_refinement_util.h @@ -96,9 +96,16 @@ class sparse_arrayt /// at the given index virtual exprt to_if_expression(const exprt &index) const; + /// Get the value at the specified index. + /// Complexity is linear in the number of entries. + virtual exprt at(std::size_t index) const; + protected: exprt default_value; - std::vector> entries; + std::map entries; + explicit sparse_arrayt(exprt default_value) : default_value(default_value) + { + } }; /// Represents arrays by the indexes up to which the value remains the same. @@ -112,8 +119,40 @@ class interval_sparse_arrayt final : public sparse_arrayt /// converted to an array `arr` where for all index `k` smaller than `i`, /// `arr[k]` is `a`, for all index between `i` (exclusive) and `j` it is `b` /// and for the others it is `x`. - explicit interval_sparse_arrayt(const with_exprt &expr); + explicit interval_sparse_arrayt(const with_exprt &expr) : sparse_arrayt(expr) + { + } + + /// Initialize an array expression to sparse array representation, avoiding + /// repetition of identical successive values and setting the default to + /// `extra_value`. + interval_sparse_arrayt(const array_exprt &expr, const exprt &extra_value); + + /// Initialize a sparse array from an array represented by a list of + /// index-value pairs, and setting the default to `extra_value`. + /// Indexes must be constant expressions, and negative indexes are ignored. + static interval_sparse_arrayt + of_array_list(const exprt &expr, const exprt &extra_value); + exprt to_if_expression(const exprt &index) const override; + + /// If the expression is an array_exprt or a with_exprt uses the appropriate + /// constructor, otherwise returns empty optional. + static optionalt + of_expr(const exprt &expr, const exprt &extra_value); + + /// Convert to an array representation, ignores elements at index >= size + array_exprt concretize(std::size_t size, const typet &index_type) const; + + /// Get the value at the specified index. + /// Complexity is linear in the number of entries. + exprt at(std::size_t index) const override; + + /// Array containing the same value at each index. + explicit interval_sparse_arrayt(exprt default_value) + : sparse_arrayt(default_value) + { + } }; /// Maps equation to expressions contained in them and conversely expressions to @@ -243,6 +282,9 @@ class string_dependenciest /// only add constraints on the length. void add_constraints(string_constraint_generatort &generatort); + /// Clear the content of the dependency graph + void clear(); + private: /// Set of nodes representing builtin_functions std::vector builtin_function_nodes; diff --git a/unit/solvers/refinement/string_refinement/concretize_array.cpp b/unit/solvers/refinement/string_refinement/concretize_array.cpp index a64303d2439..0b668de8fe8 100644 --- a/unit/solvers/refinement/string_refinement/concretize_array.cpp +++ b/unit/solvers/refinement/string_refinement/concretize_array.cpp @@ -1,7 +1,7 @@ /*******************************************************************\ - Module: Unit tests for concretize_array_expression in - solvers/refinement/string_refinement.cpp + Module: Unit tests for interval_sparse_arrayt::concretizein + solvers/refinement/string_refinement_util.cpp Author: Diffblue Ltd. @@ -18,6 +18,7 @@ SCENARIO("concretize_array_expression", "[core][solvers][refinement][string_refinement]") { + // Arrange const typet char_type=unsignedbv_typet(16); const typet int_type=signedbv_typet(32); const exprt index1=from_integer(1, int_type); @@ -27,33 +28,29 @@ SCENARIO("concretize_array_expression", const exprt index100=from_integer(100, int_type); const exprt char0=from_integer('0', char_type); const exprt index2=from_integer(2, int_type); + const exprt charz = from_integer('z', char_type); array_typet array_type(char_type, infinity_exprt(int_type)); // input_expr is - // `'0' + (ARRAY_OF(0) WITH [1:=x] WITH [4:=y] WITH [100:=z])[2]` - const plus_exprt input_expr( - from_integer('0', char_type), - index_exprt( + // ARRAY_OF(0) WITH [1:=x] WITH [4:=y] WITH [100:=z]` + const with_exprt input_expr( + with_exprt( with_exprt( - with_exprt( - with_exprt( - array_of_exprt(from_integer(0, char_type), array_type), - index1, - charx), - index4, - chary), - index100, - from_integer('z', char_type)), - index2)); - - // String max length is 50, so index 100 should get ignored. - symbol_tablet symbol_table; - namespacet ns(symbol_table); - const exprt concrete = concretize_arrays_in_expression(input_expr, 50, ns); - - // The expected result is `'0' + { 'x', 'x', 'y', 'y', 'y' }` - array_exprt array(array_type); - array.operands()={charx, charx, chary, chary, chary}; - const plus_exprt expected(char0, index_exprt(array, index2)); + array_of_exprt(from_integer(0, char_type), array_type), index1, charx), + index4, + chary), + index100, + charz); + + // Act + const interval_sparse_arrayt sparse_array(input_expr); + // String size is 7, so index 100 should get ignored. + const exprt concrete = sparse_array.concretize(7, int_type); + + // Assert + array_exprt expected(array_type); + // The expected result is `{ 'x', 'x', 'y', 'y', 'y', 'z', 'z' }` + expected.operands() = {charx, charx, chary, chary, chary, charz, charz}; + to_array_type(expected.type()).size() = from_integer(7, int_type); REQUIRE(concrete==expected); } diff --git a/unit/solvers/refinement/string_refinement/sparse_array.cpp b/unit/solvers/refinement/string_refinement/sparse_array.cpp index 9534d791466..d623dc37abc 100644 --- a/unit/solvers/refinement/string_refinement/sparse_array.cpp +++ b/unit/solvers/refinement/string_refinement/sparse_array.cpp @@ -46,19 +46,17 @@ SCENARIO("sparse_array", "[core][solvers][refinement][string_refinement]") WHEN("It is converted to a sparse array") { const sparse_arrayt sparse_array(input_expr); - THEN("The resulting if expression is index=4?x:index=1?y:index=100?z:0") + THEN("The resulting if expression is index=100?z:index=4?x:index=1?y:0") { const symbol_exprt index("index", int_type); const if_exprt expected( - equal_exprt(index, index4), - charx, + equal_exprt(index, index100), + charz, if_exprt( - equal_exprt(index, index1), - chary, + equal_exprt(index, index4), + charx, if_exprt( - equal_exprt(index, index100), - charz, - from_integer(0, char_type)))); + equal_exprt(index, index1), chary, from_integer(0, char_type)))); REQUIRE(sparse_array.to_if_expression(index) == expected); } }