Skip to content

Commit d1cbaad

Browse files
committed
Rearchitect var args support (making it actually work)
Previously all the logic to access values of variable argument lists was put into our va_arg() implementation. This required guessing what the parameters could have been, and did not take into account the initial value passed to va_start. Now va_start triggers constructing an array of pointers to parameters, the first of which is the original argument to va_start. The va_list is then just a pointer to the first element of the array. va_arg just increments the pointer. goto_program conversion places a side_effect_exprt where va_start was, and goto-symex populates the array with the actual values applicable for a particular invocation of the function.
1 parent 7639110 commit d1cbaad

File tree

12 files changed

+138
-109
lines changed

12 files changed

+138
-109
lines changed

regression/cbmc/Variadic1/test.desc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
FUTURE
1+
CORE
22
main.c
33

44
^EXIT=0$

regression/cbmc/va_list3/test.desc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
KNOWNBUG
1+
CORE
22
main.c
33

44
^EXIT=0$

regression/cbmc/va_list3/windows-preprocessed.desc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
KNOWNBUG
1+
CORE
22
windows-preprocessed.i
33
--winx64
44
^EXIT=0$

src/ansi-c/expr2c.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3664,8 +3664,8 @@ std::string expr2ct::convert_with_precedence(
36643664
return convert_prob_uniform(src, precedence=16);
36653665
else if(statement==ID_statement_expression)
36663666
return convert_statement_expression(src, precedence=15);
3667-
else if(statement==ID_gcc_builtin_va_arg_next)
3668-
return convert_function(src, "gcc_builtin_va_arg_next", precedence=16);
3667+
else if(statement == ID_va_start)
3668+
return convert_function(src, CPROVER_PREFIX "va_start", precedence = 16);
36693669
else
36703670
return convert_norep(src, precedence);
36713671
}

src/goto-instrument/goto_program2code.cpp

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -105,9 +105,10 @@ void goto_program2codet::scan_for_varargs()
105105
const exprt &l = target->get_assign().lhs();
106106
const exprt &r = target->get_assign().rhs();
107107

108-
// find va_arg_next
109-
if(r.id()==ID_side_effect &&
110-
to_side_effect_expr(r).get_statement()==ID_gcc_builtin_va_arg_next)
108+
// find va_start
109+
if(
110+
r.id() == ID_side_effect &&
111+
to_side_effect_expr(r).get_statement() == ID_va_start)
111112
{
112113
assert(r.has_operands());
113114
va_list_expr.insert(r.op0());
@@ -309,16 +310,17 @@ goto_programt::const_targett goto_program2codet::convert_assign_varargs(
309310

310311
dest.add(std::move(f));
311312
}
312-
else if(r.id()==ID_address_of)
313+
else if(
314+
r.id() == ID_side_effect &&
315+
to_side_effect_expr(r).get_statement() == ID_va_start)
313316
{
314317
code_function_callt f(
315-
symbol_exprt("va_start", code_typet({}, empty_typet())),
316-
{this_va_list_expr, to_address_of_expr(r).object()});
318+
symbol_exprt(ID_va_start, code_typet({}, empty_typet())),
319+
{this_va_list_expr, to_address_of_expr(skip_typecast(r.op0())).object()});
317320

318321
dest.add(std::move(f));
319322
}
320-
else if(r.id()==ID_side_effect &&
321-
to_side_effect_expr(r).get_statement()==ID_gcc_builtin_va_arg_next)
323+
else if(r.id() == ID_plus)
322324
{
323325
code_function_callt f(
324326
symbol_exprt("va_arg", code_typet({}, empty_typet())),

src/goto-programs/builtin_functions.cpp

Lines changed: 21 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1085,10 +1085,10 @@ void goto_convertt::do_function_call_symbol(
10851085
else if(identifier==ID_gcc_builtin_va_arg)
10861086
{
10871087
// This does two things.
1088-
// 1) Move list pointer to next argument.
1089-
// Done by gcc_builtin_va_arg_next.
1090-
// 2) Return value of argument.
1088+
// 1) Return value of argument.
10911089
// This is just dereferencing.
1090+
// 2) Move list pointer to next argument.
1091+
// This is just an increment.
10921092

10931093
if(arguments.size()!=1)
10941094
{
@@ -1100,27 +1100,21 @@ void goto_convertt::do_function_call_symbol(
11001100

11011101
exprt list_arg=make_va_list(arguments[0]);
11021102

1103-
{
1104-
side_effect_exprt rhs(
1105-
ID_gcc_builtin_va_arg_next,
1106-
list_arg.type(),
1107-
function.source_location());
1108-
rhs.copy_to_operands(list_arg);
1109-
rhs.set(ID_C_va_arg_type, to_code_type(function.type()).return_type());
1110-
goto_programt::targett t1=dest.add_instruction(ASSIGN);
1111-
t1->source_location=function.source_location();
1112-
t1->code=code_assignt(list_arg, rhs);
1113-
}
1114-
11151103
if(lhs.is_not_nil())
11161104
{
11171105
typet t=pointer_type(lhs.type());
1118-
dereference_exprt rhs(typecast_exprt(list_arg, t), lhs.type());
1106+
dereference_exprt rhs{typecast_exprt(dereference_exprt{list_arg}, t)};
11191107
rhs.add_source_location()=function.source_location();
1120-
goto_programt::targett t2=dest.add_instruction(ASSIGN);
1121-
t2->source_location=function.source_location();
1122-
t2->code=code_assignt(lhs, rhs);
1108+
dest.add(goto_programt::make_assignment(
1109+
lhs, std::move(rhs), function.source_location()));
11231110
}
1111+
1112+
code_assignt assign{
1113+
list_arg, plus_exprt{list_arg, from_integer(1, pointer_diff_type())}};
1114+
assign.rhs().set(
1115+
ID_C_va_arg_type, to_code_type(function.type()).return_type());
1116+
dest.add(goto_programt::make_assignment(
1117+
std::move(assign), function.source_location()));
11241118
}
11251119
else if(identifier=="__builtin_va_copy")
11261120
{
@@ -1146,7 +1140,7 @@ void goto_convertt::do_function_call_symbol(
11461140
t->source_location=function.source_location();
11471141
t->code=code_assignt(dest_expr, src_expr);
11481142
}
1149-
else if(identifier=="__builtin_va_start")
1143+
else if(identifier == "__builtin_va_start" || identifier == "__va_start")
11501144
{
11511145
// Set the list argument to be the address of the
11521146
// parameter argument.
@@ -1159,8 +1153,6 @@ void goto_convertt::do_function_call_symbol(
11591153
}
11601154

11611155
exprt dest_expr=make_va_list(arguments[0]);
1162-
const typecast_exprt src_expr(
1163-
address_of_exprt(arguments[1]), dest_expr.type());
11641156

11651157
if(!is_lvalue(dest_expr))
11661158
{
@@ -1169,9 +1161,13 @@ void goto_convertt::do_function_call_symbol(
11691161
throw 0;
11701162
}
11711163

1172-
goto_programt::targett t=dest.add_instruction(ASSIGN);
1173-
t->source_location=function.source_location();
1174-
t->code=code_assignt(dest_expr, src_expr);
1164+
side_effect_exprt rhs{
1165+
ID_va_start, dest_expr.type(), function.source_location()};
1166+
rhs.add_to_operands(
1167+
typecast_exprt{address_of_exprt{arguments[1]}, dest_expr.type()});
1168+
1169+
dest.add(goto_programt::make_assignment(
1170+
std::move(dest_expr), std::move(rhs), function.source_location()));
11751171
}
11761172
else if(identifier=="__builtin_va_end")
11771173
{

src/goto-symex/goto_symex.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -374,8 +374,8 @@ class goto_symext
374374
/// \return The resulting expression
375375
static exprt add_to_lhs(const exprt &lhs, const exprt &what);
376376

377-
virtual void symex_gcc_builtin_va_arg_next(
378-
statet &, const exprt &lhs, const side_effect_exprt &);
377+
virtual void
378+
symex_va_start(statet &, const exprt &lhs, const side_effect_exprt &);
379379
virtual void symex_allocate(
380380
statet &, const exprt &lhs, const side_effect_exprt &);
381381
virtual void symex_cpp_delete(statet &, const codet &);

src/goto-symex/goto_symex_state.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ struct framet
9090
irep_idt function_identifier;
9191
std::map<goto_programt::const_targett, goto_state_listt> goto_state_map;
9292
symex_targett::sourcet calling_location;
93+
std::vector<irep_idt> parameter_names;
9394

9495
goto_programt::const_targett end_of_function;
9596
exprt return_value = nil_exprt();

src/goto-symex/symex_assign.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,8 @@ void goto_symext::symex_assign(
5151
PRECONDITION(lhs.is_nil());
5252
symex_printf(state, side_effect_expr);
5353
}
54-
else if(statement==ID_gcc_builtin_va_arg_next)
55-
symex_gcc_builtin_va_arg_next(state, lhs, side_effect_expr);
54+
else if(statement == ID_va_start)
55+
symex_va_start(state, lhs, side_effect_expr);
5656
else
5757
UNREACHABLE;
5858
}

src/goto-symex/symex_builtin_functions.cpp

Lines changed: 81 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ Author: Daniel Kroening, [email protected]
1414
#include <util/arith_tools.h>
1515
#include <util/c_types.h>
1616
#include <util/expr_initializer.h>
17+
#include <util/expr_util.h>
18+
#include <util/fresh_symbol.h>
1719
#include <util/invariant_utils.h>
1820
#include <util/optional.h>
1921
#include <util/pointer_offset_size.h>
@@ -203,24 +205,51 @@ void goto_symext::symex_allocate(
203205
code_assignt(lhs, typecast_exprt::conditional_cast(rhs, lhs.type())));
204206
}
205207

206-
irep_idt get_symbol(const exprt &src)
208+
/// Construct an entry for the var args array. Visual Studio complicates this as
209+
/// we need to put immediate values or pointers in there, depending on the size
210+
/// of the parameter.
211+
static exprt va_list_entry(
212+
const irep_idt &parameter,
213+
const pointer_typet &lhs_type,
214+
const namespacet &ns)
207215
{
208-
if(src.id()==ID_typecast)
209-
return get_symbol(to_typecast_expr(src).op());
210-
else if(src.id()==ID_address_of)
216+
static const pointer_typet void_ptr_type = pointer_type(empty_typet{});
217+
218+
symbol_exprt parameter_expr = ns.lookup(parameter).symbol_expr();
219+
220+
// Visual Studio has va_list == char* and stores parameters of size no
221+
// greater than the size of a pointer as immediate values
222+
if(lhs_type.subtype().id() != ID_pointer)
211223
{
212-
exprt op=to_address_of_expr(src).object();
213-
if(op.id()==ID_symbol &&
214-
op.get_bool(ID_C_SSA_symbol))
215-
return to_ssa_expr(op).get_object_name();
216-
else
217-
return irep_idt();
224+
exprt parameter_size = size_of_expr(parameter_expr.type(), ns);
225+
CHECK_RETURN(parameter_size.is_not_nil());
226+
227+
binary_predicate_exprt fits_slot{
228+
parameter_size,
229+
ID_le,
230+
from_integer(lhs_type.get_width(), parameter_size.type())};
231+
232+
// our back-ends don't do type casts from float to pointers, so put an
233+
// unsignedbv in the middle
234+
exprt float_cast = parameter_expr;
235+
if(parameter_expr.type().id() == ID_floatbv)
236+
float_cast =
237+
typecast_exprt{parameter_expr, unsignedbv_typet{lhs_type.get_width()}};
238+
239+
return if_exprt{
240+
fits_slot,
241+
typecast_exprt::conditional_cast(float_cast, void_ptr_type),
242+
typecast_exprt::conditional_cast(
243+
address_of_exprt{std::move(parameter_expr)}, void_ptr_type)};
218244
}
219245
else
220-
return irep_idt();
246+
{
247+
return typecast_exprt::conditional_cast(
248+
address_of_exprt{std::move(parameter_expr)}, void_ptr_type);
249+
}
221250
}
222251

223-
void goto_symext::symex_gcc_builtin_va_arg_next(
252+
void goto_symext::symex_va_start(
224253
statet &state,
225254
const exprt &lhs,
226255
const side_effect_exprt &code)
@@ -230,42 +259,51 @@ void goto_symext::symex_gcc_builtin_va_arg_next(
230259
if(lhs.is_nil())
231260
return; // ignore
232261

233-
exprt tmp=code.op0();
234-
state.rename(tmp, ns); // to allow constant propagation
235-
do_simplify(tmp);
236-
irep_idt id=get_symbol(tmp);
237-
238-
const auto zero = zero_initializer(lhs.type(), code.source_location(), ns);
239-
CHECK_RETURN(zero.has_value());
240-
exprt rhs(*zero);
262+
// create an array holding pointers to the parameters, starting after the
263+
// parameter that the operand points to
264+
const exprt &op = skip_typecast(code.op0());
265+
// this must be the address of a symbol
266+
const irep_idt start_parameter =
267+
to_ssa_expr(to_address_of_expr(op).object()).get_object_name();
241268

242-
if(!id.empty())
269+
exprt::operandst va_arg_operands;
270+
bool parameter_id_found = false;
271+
for(auto &parameter : state.top().parameter_names)
243272
{
244-
// strip last name off id to get function name
245-
std::size_t pos=id2string(id).rfind("::");
246-
if(pos!=std::string::npos)
273+
if(!parameter_id_found)
247274
{
248-
irep_idt function_identifier=std::string(id2string(id), 0, pos);
249-
std::string base=id2string(function_identifier)+"::va_arg";
250-
251-
if(has_prefix(id2string(id), base))
252-
id=base+std::to_string(
253-
safe_string2unsigned(
254-
std::string(id2string(id), base.size(), std::string::npos))+1);
255-
else
256-
id=base+"0";
257-
258-
const symbolt *symbol;
259-
if(!ns.lookup(id, symbol))
260-
{
261-
const symbol_exprt symbol_expr(symbol->name, symbol->type);
262-
rhs = typecast_exprt::conditional_cast(
263-
address_of_exprt(symbol_expr), lhs.type());
264-
}
275+
parameter_id_found = parameter == start_parameter;
276+
continue;
265277
}
266-
}
267278

268-
symex_assign(state, code_assignt(lhs, rhs));
279+
va_arg_operands.push_back(
280+
va_list_entry(parameter, to_pointer_type(lhs.type()), ns));
281+
}
282+
const std::size_t va_arg_size = va_arg_operands.size();
283+
array_exprt array{std::move(va_arg_operands),
284+
array_typet{pointer_type(empty_typet{}),
285+
from_integer(va_arg_size, size_type())}};
286+
287+
symbolt &va_array = get_fresh_aux_symbol(
288+
array.type(),
289+
id2string(state.source.function_id),
290+
"va_args",
291+
state.source.pc->source_location,
292+
ns.lookup(state.source.function_id).mode,
293+
state.symbol_table);
294+
va_array.value = array;
295+
296+
clean_expr(array, state, false);
297+
state.rename(array, ns);
298+
do_simplify(array);
299+
symex_assign(state, code_assignt{va_array.symbol_expr(), std::move(array)});
300+
301+
address_of_exprt rhs{
302+
index_exprt{va_array.symbol_expr(), from_integer(0, index_type())}};
303+
state.rename(rhs, ns);
304+
symex_assign(
305+
state,
306+
code_assignt{lhs, typecast_exprt::conditional_cast(rhs, lhs.type())});
269307
}
270308

271309
irep_idt get_string_argument_rec(const exprt &src)

src/goto-symex/symex_function_call.cpp

Lines changed: 15 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ Author: Daniel Kroening, [email protected]
1616
#include <util/byte_operators.h>
1717
#include <util/c_types.h>
1818
#include <util/exception_utils.h>
19+
#include <util/fresh_symbol.h>
1920
#include <util/invariant.h>
2021

2122
static void locality(
@@ -60,6 +61,7 @@ void goto_symext::parameter_assignments(
6061
INVARIANT(
6162
!identifier.empty(),
6263
"function pointer parameter must have an identifier");
64+
state.top().parameter_names.push_back(identifier);
6365

6466
const symbolt &symbol=ns.lookup(identifier);
6567
symbol_exprt lhs=symbol.symbol_expr();
@@ -156,30 +158,20 @@ void goto_symext::parameter_assignments(
156158
if(function_type.has_ellipsis())
157159
{
158160
// These are va_arg arguments; their types may differ from call to call
159-
std::size_t va_count=0;
160-
const symbolt *va_sym=nullptr;
161-
while(!ns.lookup(
162-
id2string(function_identifier)+"::va_arg"+std::to_string(va_count),
163-
va_sym))
164-
++va_count;
165-
166-
for( ; it1!=arguments.end(); it1++, va_count++)
161+
for(; it1 != arguments.end(); it1++)
167162
{
168-
irep_idt id=
169-
id2string(function_identifier)+"::va_arg"+std::to_string(va_count);
170-
171-
// add to symbol table
172-
symbolt symbol;
173-
symbol.name=id;
174-
symbol.base_name="va_arg"+std::to_string(va_count);
175-
symbol.mode=ID_C;
176-
symbol.type=it1->type();
177-
178-
state.symbol_table.insert(std::move(symbol));
179-
180-
symbol_exprt lhs=symbol_exprt(id, it1->type());
181-
182-
symex_assign(state, code_assignt(lhs, *it1));
163+
symbolt &va_arg = get_fresh_aux_symbol(
164+
it1->type(),
165+
id2string(function_identifier),
166+
"va_arg",
167+
state.source.pc->source_location,
168+
ns.lookup(function_identifier).mode,
169+
state.symbol_table);
170+
va_arg.is_parameter = true;
171+
172+
state.top().parameter_names.push_back(va_arg.name);
173+
174+
symex_assign(state, code_assignt{va_arg.symbol_expr(), *it1});
183175
}
184176
}
185177
else if(it1!=arguments.end())

0 commit comments

Comments
 (0)