Skip to content

Improvements in String.toLowerCase #1036

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions regression/strings-smoke-tests/java_case/test.desc
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
CORE
test_case.class
--refine-strings
^EXIT=0$
^EXIT=10$
^SIGNAL=0$
^VERIFICATION SUCCESSFUL$
assertion.* file test_case.java line 10 .* SUCCESS$
assertion.* file test_case.java line 11 .* SUCCESS$
assertion.* file test_case.java line 12 .* SUCCESS$
assertion.* file test_case.java line 16 .* FAILURE$
assertion.* file test_case.java line 20 .* SUCCESS$
assertion.* file test_case.java line 24 .* FAILURE$
--
Binary file modified regression/strings-smoke-tests/java_case/test_case.class
Binary file not shown.
33 changes: 24 additions & 9 deletions regression/strings-smoke-tests/java_case/test_case.java
Original file line number Diff line number Diff line change
@@ -1,12 +1,27 @@
public class test_case
{
public static void main(/*String[] argv*/)
{
String s = new String("Ab");
String l = s.toLowerCase();
String u = s.toUpperCase();
assert(l.equals("ab"));
assert(u.equals("AB"));
assert(s.equalsIgnoreCase("aB"));
}
public static void main(int i)
{
String s = new String("Ab");
String l = s.toLowerCase();
String u = s.toUpperCase();
if(i==1)
{
assert(l.equals("ab"));
assert(u.equals("AB"));
assert(s.equalsIgnoreCase("aB"));
}
else if(i==2)
{
assert(!u.equals("AB"));
}
else if(i==3)
{
assert("ÖÇ".toLowerCase().equals("öç"));
}
else
{
assert(!"ÖÇ".toLowerCase().equals("öç"));
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -216,34 +216,48 @@ string_exprt string_constraint_generatort::add_axioms_for_to_lower_case(
string_exprt res=fresh_string(ref_type);
exprt char_a=constant_char('a', char_type);
exprt char_A=constant_char('A', char_type);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

shouldn't these be const ?

exprt char_z=constant_char('z', char_type);
exprt char_Z=constant_char('Z', char_type);

// TODO: add support for locales using case mapping information
// from the UnicodeData file.
// TODO: for now, only characters in Basic Latin and Latin-1 supplement
// are supported (up to 0x100), we should add others using case mapping
// information from the UnicodeData file.

// We add axioms:
// a1 : |res| = |str|
// a2 : forall idx<str.length, 'A'<=str[idx]<='Z' => res[idx]=str[idx]+'a'-'A'
// a3 : forall idx<str.length, !('a'<=str[idx]<='z') => res[idx]=str[idx]
// forall idx<str.length,
// this[idx]='A'<=str[idx]<='Z' ? str[idx]+'a'-'A' : str[idx]
// a2 : forall idx<str.length,
// is_upper_case(str[idx])?
// res[idx]=str[idx]+'a'-'A' : res[idx]=str[idx]<0x100
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the actual implementation is slightly different, its res[idx]=str[idx]+0x20, maybe replace by diff here and add where diff='a'-'A'=0x20


exprt a1=res.axiom_for_has_same_length_as(str);
axioms.push_back(a1);

symbol_exprt idx=fresh_univ_index("QA_lower_case", index_type);
exprt is_upper_case=and_exprt(
exprt::operandst upper_case;
upper_case.push_back(and_exprt(
binary_relation_exprt(char_A, ID_le, str[idx]),
binary_relation_exprt(str[idx], ID_le, char_Z));
minus_exprt diff(char_a, char_A);
equal_exprt convert(res[idx], plus_exprt(str[idx], diff));
string_constraintt a2(idx, res.length(), is_upper_case, convert);
binary_relation_exprt(str[idx], ID_le, from_integer('Z', char_type))));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please also define char_Z, too


upper_case.push_back(and_exprt(
binary_relation_exprt(from_integer(0xc0, char_type), ID_le, str[idx]),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please add a comment for what 0xc0 is

binary_relation_exprt(str[idx], ID_le, from_integer(0xd6, char_type))));

upper_case.push_back(and_exprt(
binary_relation_exprt(from_integer(0xd8, char_type), ID_le, str[idx]),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please add a comment for what 0xd8 is

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ideally, name variables to hold these values, removing the need for comments.

binary_relation_exprt(str[idx], ID_le, from_integer(0xde, char_type))));

exprt is_upper_case=disjunction(upper_case);

// The difference between upper-case and lower-case for the basic latin and
// latin-1 supplement is 0x20.
exprt diff=from_integer(0x20, char_type);
equal_exprt converted(res[idx], plus_exprt(str[idx], diff));
and_exprt non_converted(
equal_exprt(res[idx], str[idx]),
binary_relation_exprt(str[idx], ID_lt, from_integer(0x100, char_type)));
if_exprt conditional_convert(is_upper_case, converted, non_converted);

string_constraintt a2(idx, res.length(), conditional_convert);
axioms.push_back(a2);

equal_exprt eq(res[idx], str[idx]);
string_constraintt a3(idx, res.length(), not_exprt(is_upper_case), eq);
axioms.push_back(a3);
return res;
}

Expand Down