From 072ed0f8b10c22512651f0fbd260c238efb453e7 Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Thu, 6 Apr 2017 15:25:39 +0100 Subject: [PATCH] Overhaul of string concretization This fixes diffblue/test-gen#158 Change the concretization process to avoid calling the solver We do this by looking at the indexes for which we have concrete value because they are in the index set and assigning to the others the value of the next concrete value. This is more efficient than calling the solver an extra step and avoid problems because lemmas gets added. Changing the way arrays are concretized in get_array This is to be coherent with the way this is done in the concretize function. Updating comments and assertion of concretize_string Refactor concretize_string Improve the loop in concretize_string and factor out into a pad_vector function. Modifications requested by Peter Schrammel --- src/solvers/refinement/string_refinement.cpp | 90 +++++++++++++++----- src/solvers/refinement/string_refinement.h | 55 +++++++++++- 2 files changed, 121 insertions(+), 24 deletions(-) diff --git a/src/solvers/refinement/string_refinement.cpp b/src/solvers/refinement/string_refinement.cpp index 6cd090d4b1d..23c7ab7f24a 100644 --- a/src/solvers/refinement/string_refinement.cpp +++ b/src/solvers/refinement/string_refinement.cpp @@ -340,7 +340,16 @@ Function: string_refinementt::concretize_string Purpose: If the expression is of type string, then adds constants to the index set to force the solver to pick concrete values - for each character, and fill the map `found_length` + for each character, and fill the maps `found_length` and + `found_content`. + + The way this is done is by looking for the length of the string, + then for each `i` in the index set, look at the value found by + the solver and put it in the `result` table. + For indexes that are not present in the index set, we put the + same value as the next index that is present in the index set. + We do so by traversing the array backward, remembering the + last value that has been initialized. \*******************************************************************/ @@ -357,29 +366,55 @@ void string_refinementt::concretize_string(const exprt &expr) if(!to_integer(length, found_length)) { assert(found_length.is_long()); - if(found_length<0) - { - // Lengths should not be negative. - // TODO: Add constraints no the sign of string lengths. - debug() << "concretize_results: WARNING found length is negative" - << eom; - } - else + assert(found_length>=0); + assert(found_length.to_long()<=generator.max_string_length); + size_t concretize_limit=found_length.to_long(); + exprt content_expr=str.content(); + std::vector result; + + if(index_set[str.content()].empty()) + return; + + // Use the last index as the default character value + exprt last_concretized=simplify_expr( + get(str[minus_exprt(length, from_integer(1, length.type()))]), ns); + result.resize(concretize_limit, last_concretized); + + // Keep track of the indexes for which we have actual values + std::set initialized; + + for(const auto &i : index_set[str.content()]) { - size_t concretize_limit=found_length.to_long(); - assert(concretize_limit<=generator.max_string_length); - concretize_limit=concretize_limit>generator.max_string_length? - generator.max_string_length:concretize_limit; - exprt content_expr=str.content(); - for(size_t i=0; i=concretize_limit) { - auto i_expr=from_integer(i, str.length().type()); - debug() << "Concretizing " << from_expr(content_expr) - << " / " << i << eom; - current_index_set[str.content()].insert(i_expr); - index_set[str.content()].insert(i_expr); + debug() << "concretize_string: ignoring out of bound index: " + << from_expr(simple_i) << eom; + } + else + { + // Add an entry in the result vector + size_t index=mp_index.to_long(); + exprt str_i=simplify_expr(str[simple_i], ns); + exprt value=simplify_expr(get(str_i), ns); + result[index]=value; + initialized.insert(index); } } + + // Pad the concretized values to the left to assign the uninitialized + // values of result. The indices greater than concretize_limit are + // already assigned to last_concretized. + pad_vector(result, initialized, last_concretized); + + array_exprt arr(to_array_type(content.type())); + arr.operands()=result; + debug() << "Concretized " << from_expr(content_expr) + << " = " << from_expr(arr) << eom; + found_content[content]=arr; } } } @@ -574,6 +609,9 @@ decision_proceduret::resultt string_refinementt::dec_solve() } } + found_length.clear(); + found_content.clear(); + initial_index_set(universal_axioms); update_index_set(cur); cur.clear(); @@ -613,7 +651,7 @@ decision_proceduret::resultt string_refinementt::dec_solve() if(do_concretizing) { concretize_results(); - do_concretizing=false; + return D_SATISFIABLE; } else { @@ -769,6 +807,7 @@ exprt string_refinementt::get_array(const exprt &arr, const exprt &size) const if(arr_val.id()=="array-list") { + std::set initialized; for(size_t i=0; isecond; + auto it=found_length.find(ecopy); if(it!=found_length.end()) return get_array(ecopy, it->second); diff --git a/src/solvers/refinement/string_refinement.h b/src/solvers/refinement/string_refinement.h index 83f913d5ba9..c88675c95d8 100644 --- a/src/solvers/refinement/string_refinement.h +++ b/src/solvers/refinement/string_refinement.h @@ -90,6 +90,11 @@ class string_refinementt: public bv_refinementt // by the solver replace_mapt current_model; + // Length of char arrays found during concretization + std::map found_length; + // Content of char arrays found during concretization + std::map found_content; + void add_equivalence(const irep_idt & lhs, const exprt & rhs); void display_index_set(); @@ -138,18 +143,62 @@ class string_refinementt: public bv_refinementt std::map &m, const typet &type, bool negated=false) const; exprt simplify_sum(const exprt &f) const; + template + void pad_vector( + std::vector &result, + std::set &initialized, + T1 last_concretized) const; void concretize_string(const exprt &expr); void concretize_results(); void concretize_lengths(); - // Length of char arrays found during concretization - std::map found_length; - exprt get_array(const exprt &arr, const exprt &size) const; exprt get_array(const exprt &arr) const; std::string string_of_array(const array_exprt &arr); }; +/*******************************************************************\ + +Function: string_refinementt::pad_vector + + Inputs: + concrete_array - the vector to populate + initialized - the vector containing the indices of the + concretized values + last_concretized - initial value of the last concretized index + + Purpose: Utility function for concretization of strings. Copies + concretized values to the left to initialize the + unconcretized indices of concrete_array. + +\*******************************************************************/ + +template +void string_refinementt::pad_vector( + std::vector &concrete_array, + std::set &initialized, + T1 last_concretized) const +{ + // Pad the concretized values to the left to assign the uninitialized + // values of result. The indices greater than concretize_limit are + // already assigned to last_concretized. + for(auto j=initialized.rbegin(); j!=initialized.rend();) + { + size_t i=*j; + // The leftmost index to pad is the value + 1 of the next element in + // 'initialized'. Since we cannot use the binary '+' operator on set + // iterators, we must increment the iterator here instead of in the + // for loop. + j++; + size_t leftmost_index_to_pad=(j!=initialized.rend()?*(j)+1:0); + // pad until we reach the next initialized index (right to left) + while(i>leftmost_index_to_pad) + concrete_array[(i--)-1]=last_concretized; + assert(i==leftmost_index_to_pad); + if(i>0) + last_concretized=concrete_array[i-1]; + } +} #endif