Skip to content

Commit b370e7c

Browse files
author
Daniel Kroening
authored
Merge pull request #550 from smowton/better_java_string_uniquing
Switch to stateless Java string escaping
2 parents 05c8580 + 2fa53cf commit b370e7c

File tree

3 files changed

+25
-28
lines changed

3 files changed

+25
-28
lines changed

src/java_bytecode/java_bytecode_typecheck.cpp

-6
Original file line numberDiff line numberDiff line change
@@ -184,9 +184,3 @@ bool java_bytecode_typecheck(
184184
// fail for now
185185
return true;
186186
}
187-
188-
// Static members of java_bytecode_typecheckt:
189-
std::map<irep_idt, irep_idt>
190-
java_bytecode_typecheckt::string_literal_to_symbol_name;
191-
std::map<irep_idt, size_t>
192-
java_bytecode_typecheckt::escaped_string_literal_count;

src/java_bytecode/java_bytecode_typecheck.h

-2
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,6 @@ class java_bytecode_typecheckt:public typecheckt
6464
virtual std::string to_string(const typet &type);
6565

6666
std::set<irep_idt> already_typechecked;
67-
static std::map<irep_idt, irep_idt> string_literal_to_symbol_name;
68-
static std::map<irep_idt, size_t> escaped_string_literal_count;
6967
};
7068

7169
#endif // CPROVER_JAVA_BYTECODE_JAVA_BYTECODE_TYPECHECK_H

src/java_bytecode/java_bytecode_typecheck_expr.cpp

+25-20
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ Author: Daniel Kroening, [email protected]
66
77
\*******************************************************************/
88

9+
#include <iomanip>
10+
911
#include <util/std_expr.h>
1012
#include <util/prefix.h>
1113

@@ -91,11 +93,23 @@ void java_bytecode_typecheckt::typecheck_expr_java_new_array(
9193
typecheck_type(type);
9294
}
9395

94-
static void escape_non_alnum(std::string &toescape)
96+
static std::string escape_non_alnum(const std::string &toescape)
9597
{
98+
std::ostringstream escaped;
9699
for(auto &ch : toescape)
97-
if(!isalnum(ch))
98-
ch='_';
100+
{
101+
if(ch=='_')
102+
escaped << "__";
103+
else if(isalnum(ch))
104+
escaped << ch;
105+
else
106+
escaped << '_'
107+
<< std::hex
108+
<< std::setfill('0')
109+
<< std::setw(2)
110+
<< (unsigned int)ch;
111+
}
112+
return escaped.str();
99113
}
100114

101115
/*******************************************************************\
@@ -115,29 +129,20 @@ void java_bytecode_typecheckt::typecheck_expr_java_string_literal(exprt &expr)
115129
const irep_idt value=expr.get(ID_value);
116130
const symbol_typet string_type("java::java.lang.String");
117131

118-
auto findit=string_literal_to_symbol_name.find(value);
119-
if(findit!=string_literal_to_symbol_name.end())
132+
std::string escaped_symbol_name=
133+
"java::java.lang.String.Literal.";
134+
escaped_symbol_name+=escape_non_alnum(id2string(value));
135+
136+
auto findit=symbol_table.symbols.find(escaped_symbol_name);
137+
if(findit!=symbol_table.symbols.end())
120138
{
121-
expr=symbol_exprt(findit->second, pointer_typet(string_type));
139+
expr=findit->second.symbol_expr();
122140
return;
123141
}
124142

125143
// Create a new symbol:
126-
std::ostringstream identifier_str;
127-
std::string escaped=id2string(value);
128-
escape_non_alnum(escaped);
129-
identifier_str << "java::java.lang.String.Literal." << escaped;
130-
// Avoid naming clashes by virtue of escaping:
131-
// NOTE this increments the count stored in the map.
132-
size_t unique_num=++(escaped_string_literal_count[identifier_str.str()]);
133-
if(unique_num!=1)
134-
identifier_str << unique_num;
135-
136-
irep_idt identifier_id=identifier_str.str();
137-
string_literal_to_symbol_name.insert(std::make_pair(value, identifier_id));
138-
139144
symbolt new_symbol;
140-
new_symbol.name=identifier_id;
145+
new_symbol.name=escaped_symbol_name;
141146
new_symbol.type=pointer_typet(string_type);
142147
new_symbol.base_name="Literal";
143148
new_symbol.pretty_name=value;

0 commit comments

Comments
 (0)