diff --git a/regression/goto-analyzer-taint-ansi-c/taint-basic1/main.c b/regression/goto-analyzer-taint-ansi-c/taint-basic1/main.c new file mode 100644 index 00000000000..5d06a69c547 --- /dev/null +++ b/regression/goto-analyzer-taint-ansi-c/taint-basic1/main.c @@ -0,0 +1,16 @@ +#include + +void my_f(void *) { } +void my_h(void *) { } +void *my_g(void) { return malloc(1); } + +void my_function() +{ + void *o; + + my_f(o); // T1 source + my_h(o); // T1,T2 sink + + o=my_g(); // T2 source + my_h(o); // T1,T2 sink +} diff --git a/regression/goto-analyzer-taint-ansi-c/taint-basic1/main.o b/regression/goto-analyzer-taint-ansi-c/taint-basic1/main.o new file mode 100644 index 00000000000..eef88d71225 Binary files /dev/null and b/regression/goto-analyzer-taint-ansi-c/taint-basic1/main.o differ diff --git a/regression/goto-analyzer-taint-ansi-c/taint-basic1/taint.json b/regression/goto-analyzer-taint-ansi-c/taint-basic1/taint.json new file mode 100644 index 00000000000..dc9129a43a8 --- /dev/null +++ b/regression/goto-analyzer-taint-ansi-c/taint-basic1/taint.json @@ -0,0 +1,6 @@ +[ +{ "id": "my_f", "kind": "source", "where": "parameter1", "taint": "T1", "function": "my_f" }, +{ "id": "my_g", "kind": "source", "where": "return_value", "taint": "T2", "function": "my_g" }, +{ "id": "my_h1", "kind": "sink", "where": "parameter1", "taint": "T1", "function": "my_h", "message": "There is a T1 flow" }, +{ "id": "my_h2", "kind": "sink", "where": "parameter1", "taint": "T2", "function": "my_h", "message": "There is a T2 flow" } +] diff --git a/regression/goto-analyzer-taint-ansi-c/taint-basic1/test.desc b/regression/goto-analyzer-taint-ansi-c/taint-basic1/test.desc new file mode 100644 index 00000000000..519166e0034 --- /dev/null +++ b/regression/goto-analyzer-taint-ansi-c/taint-basic1/test.desc @@ -0,0 +1,10 @@ +CORE +main.o +--taint taint.json +^EXIT=0$ +^SIGNAL=0$ +^file main.c line 12( function .*)?: There is a T1 flow \(taint rule my_h1\)$ +^file main.c line 15( function .*)?: There is a T2 flow \(taint rule my_h2\)$ +-- +^file main.c line 12( function .*)?: There is a T2 flow \(.*\)$ +^file main.c line 15( function .*)?: There is a T1 flow \(.*\)$ diff --git a/regression/goto-analyzer-taint-ansi-c/taint-copy1/main.c b/regression/goto-analyzer-taint-ansi-c/taint-copy1/main.c new file mode 100644 index 00000000000..65b8ab36cf4 --- /dev/null +++ b/regression/goto-analyzer-taint-ansi-c/taint-copy1/main.c @@ -0,0 +1,13 @@ +void my_f(void *) { } +void my_h(void *) { } + +void my_function() +{ + void *o1; + my_f(o1); // T1 source + + void *o2; + o2=o1; + + my_h(o2); // T1 sink +} diff --git a/regression/goto-analyzer-taint-ansi-c/taint-copy1/main.o b/regression/goto-analyzer-taint-ansi-c/taint-copy1/main.o new file mode 100644 index 00000000000..67438a63d97 Binary files /dev/null and b/regression/goto-analyzer-taint-ansi-c/taint-copy1/main.o differ diff --git a/regression/goto-analyzer-taint-ansi-c/taint-copy1/taint.json b/regression/goto-analyzer-taint-ansi-c/taint-copy1/taint.json new file mode 100644 index 00000000000..44a841f0cc4 --- /dev/null +++ b/regression/goto-analyzer-taint-ansi-c/taint-copy1/taint.json @@ -0,0 +1,4 @@ +[ +{ "id": "my_f", "kind": "source", "where": "parameter1", "taint": "T1", "function": "my_f" }, +{ "id": "my_h", "kind": "sink", "where": "parameter1", "taint": "T1", "function": "my_h", "message": "There is a T1 flow" } +] diff --git a/regression/goto-analyzer-taint-ansi-c/taint-copy1/test.desc b/regression/goto-analyzer-taint-ansi-c/taint-copy1/test.desc new file mode 100644 index 00000000000..af2d62eb456 --- /dev/null +++ b/regression/goto-analyzer-taint-ansi-c/taint-copy1/test.desc @@ -0,0 +1,7 @@ +CORE +main.o +--taint taint.json +^EXIT=0$ +^SIGNAL=0$ +^file main.c line 12( function .*)?: There is a T1 flow \(taint rule my_h\)$ +-- diff --git a/regression/goto-analyzer-taint-ansi-c/taint-member1/main.c b/regression/goto-analyzer-taint-ansi-c/taint-member1/main.c new file mode 100644 index 00000000000..5c372777154 --- /dev/null +++ b/regression/goto-analyzer-taint-ansi-c/taint-member1/main.c @@ -0,0 +1,24 @@ +#include + +void my_f(void *) { } +void my_h(void *) { } + +void my_function() +{ + struct some_struct + { + void *data; + } whatnot; + + my_f(whatnot.data); // T1 source + my_h(whatnot.data); // T1 sink + + // via a copy + void *o=whatnot.data; + my_h(o); // T1 sink + + // copy entire struct + struct some_struct whatelse; + whatelse=whatnot; + my_h(whatelse.data); // T1 sink +} diff --git a/regression/goto-analyzer-taint-ansi-c/taint-member1/main.o b/regression/goto-analyzer-taint-ansi-c/taint-member1/main.o new file mode 100644 index 00000000000..209a3a0924a Binary files /dev/null and b/regression/goto-analyzer-taint-ansi-c/taint-member1/main.o differ diff --git a/regression/goto-analyzer-taint-ansi-c/taint-member1/taint.json b/regression/goto-analyzer-taint-ansi-c/taint-member1/taint.json new file mode 100644 index 00000000000..44a841f0cc4 --- /dev/null +++ b/regression/goto-analyzer-taint-ansi-c/taint-member1/taint.json @@ -0,0 +1,4 @@ +[ +{ "id": "my_f", "kind": "source", "where": "parameter1", "taint": "T1", "function": "my_f" }, +{ "id": "my_h", "kind": "sink", "where": "parameter1", "taint": "T1", "function": "my_h", "message": "There is a T1 flow" } +] diff --git a/regression/goto-analyzer-taint-ansi-c/taint-member1/test.desc b/regression/goto-analyzer-taint-ansi-c/taint-member1/test.desc new file mode 100644 index 00000000000..0c71bfa735d --- /dev/null +++ b/regression/goto-analyzer-taint-ansi-c/taint-member1/test.desc @@ -0,0 +1,9 @@ +CORE +main.o +--taint taint.json +^EXIT=0$ +^SIGNAL=0$ +^file main.c line 14( function .*)?: There is a T1 flow \(taint rule my_h\)$ +^file main.c line 18( function .*)?: There is a T1 flow \(taint rule my_h\)$ +^file main.c line 23( function .*)?: There is a T1 flow \(taint rule my_h\)$ +-- diff --git a/src/analyses/custom_bitvector_analysis.cpp b/src/analyses/custom_bitvector_analysis.cpp index fb675b77b20..a0d835dd865 100644 --- a/src/analyses/custom_bitvector_analysis.cpp +++ b/src/analyses/custom_bitvector_analysis.cpp @@ -84,6 +84,19 @@ irep_idt custom_bitvector_domaint::object2id(const exprt &src) return '*'+id2string(op_id); } } + else if(src.id()==ID_member) + { + const auto &m=to_member_expr(src); + const exprt &op=m.compound(); + + irep_idt op_id=object2id(op); + + if(op_id.empty()) + return irep_idt(); + else + return id2string(op_id)+'.'+ + id2string(m.get_component_name()); + } else if(src.id()==ID_typecast) return object2id(to_typecast_expr(src).op()); else @@ -209,6 +222,49 @@ std::set custom_bitvector_analysist::aliases( return std::set(); } +void custom_bitvector_domaint::assign_struct_rec( + locationt from, + const exprt &lhs, + const exprt &rhs, + custom_bitvector_analysist &cba, + const namespacet &ns) +{ + if(ns.follow(lhs.type()).id()==ID_struct) + { + const struct_typet &struct_type= + to_struct_type(ns.follow(lhs.type())); + + // assign member-by-member + for(const auto &c : struct_type.components()) + { + member_exprt lhs_member(lhs, c), + rhs_member(rhs, c); + assign_struct_rec(from, lhs_member, rhs_member, cba, ns); + } + } + else + { + // may alias other stuff + std::set lhs_set=cba.aliases(lhs, from); + + vectorst rhs_vectors=get_rhs(rhs); + + for(const auto &lhs_alias : lhs_set) + { + assign_lhs(lhs_alias, rhs_vectors); + } + + // is it a pointer? + if(lhs.type().id()==ID_pointer) + { + dereference_exprt lhs_deref(lhs); + dereference_exprt rhs_deref(rhs); + vectorst rhs_vectors=get_rhs(rhs_deref); + assign_lhs(lhs_deref, rhs_vectors); + } + } +} + void custom_bitvector_domaint::transform( locationt from, locationt to, @@ -226,25 +282,7 @@ void custom_bitvector_domaint::transform( case ASSIGN: { const code_assignt &code_assign=to_code_assign(instruction.code); - - // may alias other stuff - std::set lhs_set=cba.aliases(code_assign.lhs(), from); - - vectorst rhs_vectors=get_rhs(code_assign.rhs()); - - for(const auto &lhs : lhs_set) - { - assign_lhs(lhs, rhs_vectors); - } - - // is it a pointer? - if(code_assign.lhs().type().id()==ID_pointer) - { - dereference_exprt lhs_deref(code_assign.lhs()); - dereference_exprt rhs_deref(code_assign.rhs()); - vectorst rhs_vectors=get_rhs(rhs_deref); - assign_lhs(lhs_deref, rhs_vectors); - } + assign_struct_rec(from, code_assign.lhs(), code_assign.rhs(), cba, ns); } break; diff --git a/src/analyses/custom_bitvector_analysis.h b/src/analyses/custom_bitvector_analysis.h index 55d5d444af8..4896d768cb7 100644 --- a/src/analyses/custom_bitvector_analysis.h +++ b/src/analyses/custom_bitvector_analysis.h @@ -96,6 +96,13 @@ class custom_bitvector_domaint:public ai_domain_baset bitst may_bits, must_bits; + void assign_struct_rec( + locationt from, + const exprt &lhs, + const exprt &rhs, + custom_bitvector_analysist &, + const namespacet &); + void assign_lhs(const exprt &, const vectorst &); void assign_lhs(const irep_idt &, const vectorst &); vectorst get_rhs(const exprt &) const; diff --git a/src/goto-analyzer/taint_analysis.cpp b/src/goto-analyzer/taint_analysis.cpp index 212f8aab981..44dd30a0c72 100644 --- a/src/goto-analyzer/taint_analysis.cpp +++ b/src/goto-analyzer/taint_analysis.cpp @@ -67,7 +67,7 @@ void taint_analysist::instrument( { const goto_programt::instructiont &instruction=*it; - goto_programt tmp; + goto_programt insert_before, insert_after; switch(instruction.type) { @@ -164,7 +164,7 @@ void taint_analysist::instrument( code_set_may.op0()=where; code_set_may.op1()= address_of_exprt(string_constantt(rule.taint)); - goto_programt::targett t=tmp.add_instruction(); + goto_programt::targett t=insert_after.add_instruction(); t->make_other(code_set_may); t->source_location=instruction.source_location; } @@ -172,7 +172,7 @@ void taint_analysist::instrument( case taint_parse_treet::rulet::SINK: { - goto_programt::targett t=tmp.add_instruction(); + goto_programt::targett t=insert_before.add_instruction(); binary_predicate_exprt get_may("get_may"); get_may.op0()=where; get_may.op1()=address_of_exprt(string_constantt(rule.taint)); @@ -191,7 +191,7 @@ void taint_analysist::instrument( code_clear_may.op0()=where; code_clear_may.op1()= address_of_exprt(string_constantt(rule.taint)); - goto_programt::targett t=tmp.add_instruction(); + goto_programt::targett t=insert_after.add_instruction(); t->make_other(code_clear_may); t->source_location=instruction.source_location; } @@ -208,11 +208,17 @@ void taint_analysist::instrument( } } - if(!tmp.empty()) + if(!insert_before.empty()) { - goto_programt::targett next=it; - next++; - goto_function.body.destructive_insert(next, tmp); + goto_function.body.insert_before_swap(it, insert_before); + // advance until we get back to the call + while(!it->is_function_call()) ++it; + } + + if(!insert_after.empty()) + { + goto_function.body.destructive_insert( + std::next(it), insert_after); } } }