-
Notifications
You must be signed in to change notification settings - Fork 273
Eval function for string_to_lower/upper_case builtin functions #2678
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
8c4801b
a71f2c0
1137ffd
836cbad
be477c9
53ccd3f
fe9071b
e2961b5
579d00c
71de2e3
58307a0
a345d30
ecc0e43
a44468b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
public class Test { | ||
public String det() | ||
{ | ||
StringBuilder builder = new StringBuilder(); | ||
builder.append("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".toLowerCase()); | ||
builder.append("abcdeghijklmnopfrstuqwxyzABCDvFGHIJKLMENOPQRSTUVWXYZ".toLowerCase()); | ||
builder.append("abcdeghijlmnopqrstvwxyzABCDEFGHIJuKLMNOPQRSfkTUVWXYZ".toLowerCase()); | ||
builder.append("acdefghijklmnopqrsuvwxyzABCDEFbGHIJKLMNOPtQRSTUVWXYZ".toLowerCase()); | ||
builder.append("abcdfghijklmnopqrstuvwxyzABCDEFGHIJeKLMNOPQRSTUVWXYZ".toLowerCase()); | ||
return builder.toString(); | ||
} | ||
|
||
public String nonDet(String s) | ||
{ | ||
if(s.length() < 20) | ||
return "Short string"; | ||
if(!s.startsWith("A")) | ||
return "String not starting with A"; | ||
|
||
StringBuilder builder = new StringBuilder(); | ||
builder.append(s.toLowerCase()); | ||
builder.append(s.toLowerCase()); | ||
builder.append(":"); | ||
builder.append(s); | ||
builder.append(":"); | ||
builder.append(s.toLowerCase()); | ||
builder.append(s.toLowerCase()); | ||
return builder.toString(); | ||
} | ||
|
||
public String withDependency(String s, boolean b) | ||
{ | ||
// Filter | ||
if(s == null || s.length() < 20) | ||
return "Short string"; | ||
if(!s.endsWith("A")) | ||
return "String not ending with A"; | ||
|
||
// Act | ||
String result = s + s.toLowerCase(); | ||
|
||
// Assert | ||
if(b) { | ||
assert(result.endsWith("a")); | ||
} else { | ||
assert(!result.endsWith("a")); | ||
} | ||
return result; | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
CORE | ||
Test.class | ||
--function Test.withDependency --verbosity 10 --max-nondet-string-length 10000 | ||
^EXIT=10$ | ||
^SIGNAL=0$ | ||
assertion at file Test.java line 44 .*: SUCCESS | ||
assertion at file Test.java line 46 .*: FAILURE | ||
-- | ||
-- | ||
Check that when there are dependencies, axioms are adde correctly. |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
CORE | ||
Test.class | ||
--function Test.det --verbosity 10 --cover location | ||
^EXIT=0$ | ||
^SIGNAL=0$ | ||
coverage.* file Test.java line 9 .*: SATISFIED | ||
-- | ||
adding lemma .*nondet_infinite_array | ||
-- | ||
Check that using the string dependence informations, no lemma involving arrays is added |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
CORE | ||
Test.class | ||
--function Test.nonDet --verbosity 10 --cover location --max-nondet-string-length 1000 | ||
^EXIT=0$ | ||
^SIGNAL=0$ | ||
coverage.* file Test.java line 27 .*: SATISFIED | ||
-- | ||
adding lemma .*nondet_infinite_array | ||
-- | ||
Check that using the string dependence informations, no lemma involving arrays is added |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
public class Test { | ||
public String det() | ||
{ | ||
StringBuilder builder = new StringBuilder(); | ||
builder.append("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".toLowerCase()); | ||
builder.append("abcdeghijklmnopfrstuqwxyzABCDvFGHIJKLMENOPQRSTUVWXYZ".toUpperCase()); | ||
builder.append("abcdeghijlmnopqrstvwxyzABCDEFGHIJuKLMNOPQRSfkTUVWXYZ".toUpperCase()); | ||
builder.append("acdefghijklmnopqrsuvwxyzABCDEFbGHIJKLMNOPtQRSTUVWXYZ".toUpperCase()); | ||
builder.append("abcdfghijklmnopqrstuvwxyzABCDEFGHIJeKLMNOPQRSTUVWXYZ".toUpperCase()); | ||
return builder.toString(); | ||
} | ||
|
||
public String nonDet(String s) | ||
{ | ||
if(s.length() < 20) | ||
return "Short string"; | ||
if(!s.startsWith("a")) | ||
return "String not starting with a"; | ||
|
||
StringBuilder builder = new StringBuilder(); | ||
builder.append(s.toUpperCase()); | ||
builder.append(s.toUpperCase()); | ||
builder.append(":"); | ||
builder.append(s); | ||
builder.append(":"); | ||
builder.append(s.toUpperCase()); | ||
builder.append(s.toUpperCase()); | ||
return builder.toString(); | ||
} | ||
|
||
public String withDependency(String s, boolean b) | ||
{ | ||
// Filter | ||
if(s == null || s.length() < 20) | ||
return "Short string"; | ||
if(!s.endsWith("a")) | ||
return "String not ending with a"; | ||
|
||
// Act | ||
String result = s + s.toUpperCase(); | ||
|
||
// Assert | ||
if(b) { | ||
assert(result.endsWith("A")); | ||
} else { | ||
assert(!result.endsWith("A")); | ||
} | ||
return result; | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
CORE | ||
Test.class | ||
--function Test.withDependency --verbosity 10 --max-nondet-string-length 10000 | ||
^EXIT=10$ | ||
^SIGNAL=0$ | ||
assertion at file Test.java line 44 .*: SUCCESS | ||
assertion at file Test.java line 46 .*: FAILURE | ||
-- | ||
-- | ||
Check that when there are dependencies, axioms are added correctly. |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
CORE | ||
Test.class | ||
--function Test.det --verbosity 10 --cover location | ||
^EXIT=0$ | ||
^SIGNAL=0$ | ||
coverage.* file Test.java line 9 .*: SATISFIED | ||
-- | ||
adding lemma .*nondet_infinite_array | ||
-- | ||
Check that using the string dependence informations, no lemma involving arrays is added |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
CORE | ||
Test.class | ||
--function Test.nonDet --verbosity 10 --cover location --max-nondet-string-length 1000 | ||
^EXIT=0$ | ||
^SIGNAL=0$ | ||
coverage.* file Test.java line 27 .*: SATISFIED | ||
-- | ||
adding lemma .*nondet_infinite_array | ||
-- | ||
Check that using the string dependence informations, no lemma involving arrays is added |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,7 +8,9 @@ | |
|
||
#include "string_constraint_generator.h" | ||
|
||
/// Get the valuation of the string, given a valuation | ||
/// Given a function `get_value` which gives a valuation to expressions, attempt | ||
/// to find the current value of the array `a`. If the valuation of some | ||
/// characters are missing, then return an empty optional. | ||
static optionalt<std::vector<mp_integer>> eval_string( | ||
const array_string_exprt &a, | ||
const std::function<exprt(const exprt &)> &get_value); | ||
|
@@ -187,6 +189,51 @@ exprt string_set_char_builtin_functiont::length_constraint() const | |
equal_exprt(return_code, return_value)); | ||
} | ||
|
||
static bool eval_is_upper_case(const mp_integer &c) | ||
{ | ||
// Characters between 'A' and 'Z' are upper-case | ||
// Characters between 0xc0 (latin capital A with grave) | ||
// and 0xd6 (latin capital O with diaeresis) are upper-case | ||
// Characters between 0xd8 (latin capital O with stroke) | ||
// and 0xde (latin capital thorn) are upper-case | ||
return ('A' <= c && c <= 'Z') || (0xc0 <= c && c <= 0xd6) || | ||
(0xd8 <= c && c <= 0xde); | ||
} | ||
|
||
optionalt<exprt> string_to_lower_case_builtin_functiont::eval( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm probably not paying attention, but why not just return the exprt anyway? This is how the rewriter works. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If the eval function is not successful, the solver will get the valuation in another way (calling get of the underlying solver and doing some post-processing). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK... is this called before or after solving? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's called from |
||
const std::function<exprt(const exprt &)> &get_value) const | ||
{ | ||
auto input_opt = eval_string(input, get_value); | ||
if(!input_opt) | ||
return {}; | ||
for(mp_integer &c : input_opt.value()) | ||
{ | ||
if(eval_is_upper_case(c)) | ||
c += 0x20; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does that always work? Does it work better than to_lower? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It mimics the behavior of the function specified by the constraints, which is not exactly the same as to_lower |
||
} | ||
const auto length = | ||
from_integer(input_opt.value().size(), result.length().type()); | ||
const array_typet type(result.type().subtype(), length); | ||
return make_string(input_opt.value(), type); | ||
} | ||
|
||
optionalt<exprt> string_to_upper_case_builtin_functiont::eval( | ||
const std::function<exprt(const exprt &)> &get_value) const | ||
{ | ||
auto input_opt = eval_string(input, get_value); | ||
if(!input_opt) | ||
return {}; | ||
for(mp_integer &c : input_opt.value()) | ||
{ | ||
if(eval_is_upper_case(c - 0x20)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same again. |
||
c -= 0x20; | ||
} | ||
const auto length = | ||
from_integer(input_opt.value().size(), result.length().type()); | ||
const array_typet type(result.type().subtype(), length); | ||
return make_string(input_opt.value(), type); | ||
} | ||
|
||
std::vector<mp_integer> string_insertion_builtin_functiont::eval( | ||
const std::vector<mp_integer> &input1_value, | ||
const std::vector<mp_integer> &input2_value, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -175,6 +175,12 @@ class string_constraint_generatort final | |
const array_string_exprt &str, | ||
const exprt &position, | ||
const exprt &character); | ||
exprt add_axioms_for_to_lower_case( | ||
const array_string_exprt &res, | ||
const array_string_exprt &str); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why exported? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We are going to use it in the string_builtin_function object evaluation function |
||
exprt add_axioms_for_to_upper_case( | ||
const array_string_exprt &res, | ||
const array_string_exprt &expr); | ||
|
||
private: | ||
symbol_exprt fresh_boolean(const irep_idt &prefix); | ||
|
@@ -335,9 +341,6 @@ class string_constraint_generatort final | |
exprt add_axioms_for_substring(const function_application_exprt &f); | ||
exprt add_axioms_for_to_lower_case(const function_application_exprt &f); | ||
exprt add_axioms_for_to_upper_case(const function_application_exprt &f); | ||
exprt add_axioms_for_to_upper_case( | ||
const array_string_exprt &res, | ||
const array_string_exprt &expr); | ||
exprt add_axioms_for_trim(const function_application_exprt &f); | ||
|
||
exprt add_axioms_for_code_point( | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -456,10 +456,6 @@ exprt string_constraint_generatort::add_axioms_for_function_application( | |
res=add_axioms_for_compare_to(expr); | ||
else if(id==ID_cprover_string_literal_func) | ||
res=add_axioms_from_literal(expr); | ||
else if(id==ID_cprover_string_concat_func) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. typo in commit message: redundant There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. fixed |
||
res=add_axioms_for_concat(expr); | ||
else if(id==ID_cprover_string_concat_char_func) | ||
res=add_axioms_for_concat_char(expr); | ||
else if(id==ID_cprover_string_concat_code_point_func) | ||
res=add_axioms_for_concat_code_point(expr); | ||
else if(id==ID_cprover_string_insert_func) | ||
|
@@ -480,18 +476,10 @@ exprt string_constraint_generatort::add_axioms_for_function_application( | |
res=add_axioms_for_substring(expr); | ||
else if(id==ID_cprover_string_trim_func) | ||
res=add_axioms_for_trim(expr); | ||
else if(id==ID_cprover_string_to_lower_case_func) | ||
res=add_axioms_for_to_lower_case(expr); | ||
else if(id==ID_cprover_string_to_upper_case_func) | ||
res=add_axioms_for_to_upper_case(expr); | ||
else if(id==ID_cprover_string_char_set_func) | ||
res=add_axioms_for_char_set(expr); | ||
else if(id==ID_cprover_string_empty_string_func) | ||
res=add_axioms_for_empty_string(expr); | ||
else if(id==ID_cprover_string_copy_func) | ||
res=add_axioms_for_copy(expr); | ||
else if(id==ID_cprover_string_of_int_func) | ||
res=add_axioms_from_int(expr); | ||
else if(id==ID_cprover_string_of_int_hex_func) | ||
res=add_axioms_from_int_hex(expr); | ||
else if(id==ID_cprover_string_of_float_func) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
do you have tests for the other letters too?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No, but we don't test the result here anyway, we are just testing that unnecessary constraints are not added. I didn't find a good way to test it here: we cannot add assertions because that could change the result and recovering the string from the trace is difficult.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
A general, unhelpful point -- are we doing ASCII case conversion or Unicode case conversion, because the later, frankly, fills me with fear:
https://unicode.org/faq/casemap_charprop.html
https://www.unicode.org/reports/tr21/tr21-5.html
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I seem to recall seeing tests for upper-casing non-ascii characters...