From e860c6df86ef5c7e47878b6095bd462a60561cd4 Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Tue, 13 Dec 2016 16:15:16 +0000 Subject: [PATCH 1/8] Implement local variable live range merging Previously one entry in the local variable table meant one declaration at runtime. We now merge live ranges when it appears they refer to the same variable due to control flow from one live range to another. Variable declarations are placed at a program point that dominates all users. This is currently unused, but will be plumbed in in a subsequent commit. --- .../java_bytecode_convert_method.cpp | 458 +++++++++++++++++- 1 file changed, 456 insertions(+), 2 deletions(-) diff --git a/src/java_bytecode/java_bytecode_convert_method.cpp b/src/java_bytecode/java_bytecode_convert_method.cpp index 8ee9ba3db46..e9248c18440 100644 --- a/src/java_bytecode/java_bytecode_convert_method.cpp +++ b/src/java_bytecode/java_bytecode_convert_method.cpp @@ -6,8 +6,6 @@ Author: Daniel Kroening, kroening@kroening.com \*******************************************************************/ -#define DEBUG - #ifdef DEBUG #include #endif @@ -21,12 +19,17 @@ Author: Daniel Kroening, kroening@kroening.com #include #include +#include +#include + #include "java_bytecode_convert_method.h" #include "bytecode_info.h" #include "java_types.h" #include #include +#include +#include class patternt { @@ -65,6 +68,8 @@ class java_bytecode_convert_methodt:public messaget typedef java_bytecode_parse_treet::methodt methodt; typedef java_bytecode_parse_treet::instructiont instructiont; typedef methodt::instructionst instructionst; + typedef methodt::local_variable_tablet local_variable_tablet; + typedef methodt::local_variablet local_variablet; void operator()(const symbolt &class_symbol, const methodt &method) { @@ -72,11 +77,27 @@ class java_bytecode_convert_methodt:public messaget } protected: + irep_idt method_id; symbol_tablet &symbol_table; irep_idt current_method; typet method_return_type; +public: + struct holet { + unsigned start_pc; + unsigned length; + }; + + struct local_variable_with_holest + { + local_variablet var; + std::vector holes; + }; + + typedef std::vector local_variable_table_with_holest; + +protected: class variablet { public: @@ -84,6 +105,8 @@ class java_bytecode_convert_methodt:public messaget size_t start_pc; size_t length; bool is_parameter; + std::vector holes; + variablet() : symbol_expr(), is_parameter(false) {} }; typedef std::vector variablest; @@ -239,7 +262,24 @@ class java_bytecode_convert_methodt:public messaget bool done; }; +public: typedef std::map address_mapt; + typedef cfg_dominators_templatet java_cfg_dominatorst; + +protected: + + void find_initialisers( + local_variable_table_with_holest& vars, + const address_mapt& amap, + const java_cfg_dominatorst& doms); + + void find_initialisers_for_slot( + local_variable_table_with_holest::iterator firstvar, + local_variable_table_with_holest::iterator varlimit, + const address_mapt& amap, + const java_cfg_dominatorst& doms); + + void setup_local_variables(const methodt& m, const address_mapt& amap); struct block_tree_nodet { @@ -505,6 +545,420 @@ void java_bytecode_convert_methodt::convert( symbol_table.add(method_symbol); } +typedef java_bytecode_convert_methodt::holet holet; +typedef java_bytecode_convert_methodt::local_variable_with_holest local_variable_with_holest; +typedef java_bytecode_convert_methodt::local_variable_table_with_holest local_variable_table_with_holest; + +namespace { + bool lt_index(const local_variable_with_holest& a, + const local_variable_with_holest& b) + { + return a.var.indexvar.start_pcvar.start_pc; + } +} + +// Convert the address-map to a cfg-graph so we can compute dominators: + +typedef java_bytecode_convert_methodt::address_mapt address_mapt; +typedef java_bytecode_convert_methodt::java_cfg_dominatorst java_cfg_dominatorst; + +template +struct procedure_local_cfg_baset : + public graph > +{ + typedef std::map entry_mapt; + entry_mapt entry_map; + + procedure_local_cfg_baset() {} + + void operator()(const address_mapt& amap) + { + for(const auto& inst : amap) + { + // Map instruction PCs onto node indices: + entry_map[inst.first]=this->add_node(); + // Map back: + (*this)[entry_map[inst.first]].PC=inst.first; + } + for(const auto& inst : amap) + { + for(auto succ : inst.second.successors) + this->add_edge(entry_map.at(inst.first),entry_map.at(succ)); + } + } + + unsigned get_first_node(const address_mapt& amap) const { return amap.begin()->first; } + unsigned get_last_node(const address_mapt& amap) const { return (--amap.end())->first; } + unsigned nodes_empty(const address_mapt& amap) const { return amap.empty(); } + +}; + +typedef java_bytecode_convert_methodt::local_variablet local_variablet; +typedef std::map > predecessor_mapt; + +struct is_predecessor_of +{ + const predecessor_mapt& order; + + is_predecessor_of(const predecessor_mapt& _order) : order(_order) {} + + bool operator()(local_variable_with_holest* a, + local_variable_with_holest* b) const + { + auto findit=order.find(a); + if(findit==order.end()) + return false; + return findit->second.count(b)>0; + } +}; + +static void gather_transitive_predecessors( + local_variable_with_holest* start, + const predecessor_mapt& predecessor_map, + std::set& result) +{ + if(!result.insert(start).second) + return; + auto findit=predecessor_map.find(start); + if(findit==predecessor_map.end()) + return; + for(const auto pred : findit->second) + gather_transitive_predecessors(pred,predecessor_map,result); +} + +static bool is_store_to_slot( + const java_bytecode_convert_methodt::instructiont& inst, + unsigned slotidx) +{ + const std::string prevstatement=id2string(inst.statement); + if(!(prevstatement.size()>=1 && prevstatement.substr(1,5)=="store")) + return false; + + std::string storeslot; + if(inst.args.size()==1) + { + const auto& arg=inst.args[0]; + storeslot=id2string(to_constant_expr(arg).get_value()); + } + else + { + assert(prevstatement[6]=='_' && prevstatement.size()==8); + storeslot=prevstatement[7]; + assert(isdigit(storeslot[0])); + } + auto storeslotidx=safe_string2unsigned(storeslot); + return storeslotidx==slotidx; +} + +static void maybe_add_hole( + local_variable_with_holest& var, + unsigned from, + unsigned to) +{ + assert(to>=from); + if(to!=from) + var.holes.push_back({from,to-from}); +} + +void java_bytecode_convert_methodt::find_initialisers_for_slot( + local_variable_table_with_holest::iterator firstvar, + local_variable_table_with_holest::iterator varlimit, + const address_mapt& amap, + const java_cfg_dominatorst& doms) +{ + // Build a simple map from instruction PC to the variable live in this slot at that PC, + // and a map from each variable to variables that flow into it: + + std::vector var_map; + predecessor_mapt predecessor_map; + for(auto it=firstvar, itend=varlimit; it!=itend; ++it) + { + if(it->var.start_pc+it->var.length > var_map.size()) + var_map.resize(it->var.start_pc+it->var.length); + + for(unsigned idx=it->var.start_pc, + idxlim=it->var.start_pc+it->var.length; + idx!=idxlim; ++idx) + { + assert((!var_map[idx]) && "Local variable table clash?"); + var_map[idx]=&*it; + } + } + + // Now find variables that flow together by walking backwards to find initialisers + // or branches from other live ranges: + + for(auto it=firstvar, itend=varlimit; it!=itend; ++it) + { + // Don't alter parameters: + if(it->var.start_pc==0) + continue; + + // Find the last instruction within the live range: + unsigned end_pc=it->var.start_pc+it->var.length; + auto amapit=amap.find(end_pc); + auto old_amapit=amapit; + --amapit; + if(old_amapit==amap.end()) + assert(end_pc>amapit->first && + "Instruction live range doesn't align to instruction boundary?"); + + // Find vartable entries that flow into this one: + unsigned new_start_pc=it->var.start_pc; + for(; amapit->first >= it->var.start_pc; --amapit) + { + for(auto pred : amapit->second.predecessors) + { + auto pred_var=var_map[pred]; + if(pred_var==&*it) + { + // Flow from within same live range? + continue; + } + else if(!pred_var) + { + // Flow from out of range? + // Check if this is an initialiser, and if so expand the live range + // to include it, but don't check its predecessors: + auto inst_before_this=amapit; + --inst_before_this; + if(amapit->first!=it->var.start_pc || inst_before_this->first!=pred) + throw "Local variable table: unexpected flow from out of range"; + if(!is_store_to_slot(*(inst_before_this->second.source),it->var.index)) + throw "Local variable table: didn't find expected initialising store"; + new_start_pc=pred; + } + else { + if(pred_var->var.name!=it->var.name || pred_var->var.signature!=it->var.signature) + throw "Local variable table: flow from variable with clashing name or signature"; + // OK, this is a flow from a similar but distinct entry in the local var table. + predecessor_map[&*it].insert(pred_var); + } + } + } + + // If a simple pre-block initialiser was found, add it to the live range now: + it->var.length+=(it->var.start_pc-new_start_pc); + it->var.start_pc=new_start_pc; + + } + + // OK, we've established the flows all seem sensible. Now merge vartable entries + // according to the predecessor_map: + + // Top-sort so that we get the bottom variables first: + is_predecessor_of comp(predecessor_map); + std::vector topsorted_vars; + for(auto it=firstvar,itend=varlimit; it!=itend; ++it) + topsorted_vars.push_back(&*it); + + std::sort(topsorted_vars.begin(),topsorted_vars.end(),comp); + + // Now merge the entries: + for(auto merge_into : topsorted_vars) + { + // Already merged into another variable? + if(merge_into->var.length==0) + continue; + + std::set merge_vars; + gather_transitive_predecessors(merge_into,predecessor_map,merge_vars); + if(merge_vars.size()==1) + continue; + + // Because we need a lexically-scoped declaration, we must have the merged variable + // enter scope both in a block that dominates all entries, and which + // precedes them in program order. + unsigned first_pc=var_map.size(); + unsigned last_pc=0; + for(auto v : merge_vars) + { + if(v->var.start_pc < first_pc) + first_pc = v->var.start_pc; + if(v->var.start_pc+v->var.length > last_pc) + last_pc=v->var.start_pc+v->var.length; + } + + std::vector candidate_dominators; + for(auto v : merge_vars) + { + const auto& this_var_doms=doms.cfg[doms.cfg.entry_map.at(v->var.start_pc)].dominators; + for(const auto this_var_dom : this_var_doms) + if(this_var_dom <= first_pc) + candidate_dominators.push_back(this_var_dom); + } + std::sort(candidate_dominators.begin(),candidate_dominators.end()); + + // Working from the back, simply find the first PC that occurs merge_vars.size() + // times and therefore dominates all vars we seek to merge: + + unsigned found_dominator=var_map.size(); + for(auto domit=candidate_dominators.rbegin(), domitend=candidate_dominators.rend(); + domit != domitend && found_dominator==var_map.size(); /* Don't increment here */) + { + unsigned repeats=0; + auto dom=*domit; + while(domit!=domitend && *domit==dom) + { + ++domit; + ++repeats; + } + assert(repeats<=merge_vars.size()); + if(repeats==merge_vars.size()) + found_dominator=dom; + } + + if(found_dominator==var_map.size()) + throw "Variable live ranges with no common dominator?"; + + // Populate the holes in the live range (addresses where the new variable + // will be in scope, but references to this stack slot should not resolve to it + // as it was not visible in the original local variable table) + std::vector sorted_by_startpc( + merge_vars.begin(),merge_vars.end()); + std::sort(sorted_by_startpc.begin(),sorted_by_startpc.end(),lt_startpc); + + maybe_add_hole(*merge_into,found_dominator,sorted_by_startpc[0]->var.start_pc); + for(unsigned idx=0; idxvar.start_pc + sorted_by_startpc[idx]->var.length, + sorted_by_startpc[idx+1]->var.start_pc); + + // Apply the changes: + merge_into->var.start_pc=found_dominator; + merge_into->var.length=last_pc-found_dominator; + + #ifdef DEBUG + status() << "Merging " << merge_vars.size() << " variables named " << merge_into->var.name << + "; new live range " << merge_into->var.start_pc << "-" << + merge_into->var.start_pc + merge_into->var.length << eom; + #endif + + // Nuke the now-subsumed var-table entries: + for(auto& v : merge_vars) + if(v!=merge_into) + v->var.length=0; + + } + +} + +static void walk_to_next_index( + local_variable_table_with_holest::iterator& it1, + local_variable_table_with_holest::iterator& it2, + local_variable_table_with_holest::iterator itend) +{ + if(it2==itend) + { + it1=itend; + return; + } + + auto old_it2=it2; + auto index=it2->var.index; + while(it2!=itend && it2->var.index==index) + ++it2; + it1=old_it2; +} + +void java_bytecode_convert_methodt::find_initialisers( + local_variable_table_with_holest& vars, + const address_mapt& amap, + const java_cfg_dominatorst& doms) +{ + // Handle one stack slot at a time: + std::sort(vars.begin(),vars.end(),lt_index); + + auto it1=vars.begin(); + auto it2=it1; + auto itend=vars.end(); + walk_to_next_index(it1,it2,itend); + for(; it1!=itend; walk_to_next_index(it1,it2,itend)) + find_initialisers_for_slot(it1,it2,amap,doms); +} + +void java_bytecode_convert_methodt::setup_local_variables(const methodt& m, + const address_mapt& amap) +{ + // Compute CFG dominator tree + java_cfg_dominatorst doms; + doms(amap); + + // Find out which local variable table entries should be merged: + // Wrap each entry so we have somewhere to record live ranges with holes: + std::vector vars_with_holes; + for(const auto& v : m.local_variable_table) + vars_with_holes.push_back({v, {}}); + + find_initialisers(vars_with_holes,amap,doms); + + // Clean up removed records from the variable table: + + size_t toremove=0; + for(size_t i=0; i<(vars_with_holes.size()-toremove); ++i) + { + auto& v=vars_with_holes[i]; + if(v.var.length==0) + { + // Move to end; consider the new element we've swapped in: + ++toremove; + if(i!=vars_with_holes.size()-toremove) // Already where it needs to be? + std::swap(v,vars_with_holes[vars_with_holes.size()-toremove]); + --i; + } + } + + // Remove un-needed entries. + vars_with_holes.resize(vars_with_holes.size()-toremove); + + // Do the locals and parameters in the variable table, which is available when + // compiled with -g or for methods with many local variables in the latter + // case, different variables can have the same index, depending on the + // context. + // + // to calculate which variable to use, one uses the address of the instruction + // that uses the variable, the size of the instruction and the start_pc / + // length values in the local variable table + for(const auto & v : vars_with_holes) + { + if(v.var.start_pc==0) // Parameter? + continue; + + typet t=java_type_from_string(v.var.signature); + std::ostringstream id_oss; + id_oss << method_id << "::" << v.var.start_pc << "::" << v.var.name; + irep_idt identifier(id_oss.str()); + symbol_exprt result(identifier, t); + result.set(ID_C_base_name, v.var.name); + + variables[v.var.index].push_back(variablet()); + auto& newv=variables[v.var.index].back(); + newv.symbol_expr = result; + newv.start_pc = v.var.start_pc; + newv.length = v.var.length; + newv.holes=std::move(v.holes); + + symbolt new_symbol; + new_symbol.name=identifier; + new_symbol.type=t; + new_symbol.base_name=v.var.name; + new_symbol.pretty_name=id2string(identifier).substr(6, std::string::npos); + new_symbol.mode=ID_java; + new_symbol.is_type=false; + new_symbol.is_file_local=true; + new_symbol.is_thread_local=true; + new_symbol.is_lvalue=true; + symbol_table.add(new_symbol); + } + +} + /*******************************************************************\ Function: java_bytecode_convert_methodt::get_bytecode_info From 6bb8f0c263129e3621b3019db866114860c822f3 Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Thu, 19 Jan 2017 10:38:19 +0000 Subject: [PATCH 2/8] Replace old variable table implementation This uses the local variable table implementation introduced in the previous commit, replacing the existing simple variable table lookup code that could inadvertently split live ranges when a variable's live range occupies multiple disjoint address ranges and so is declared using multiple entries in the local variable table. --- .../java_bytecode_convert_method.cpp | 91 ++++++++++--------- 1 file changed, 47 insertions(+), 44 deletions(-) diff --git a/src/java_bytecode/java_bytecode_convert_method.cpp b/src/java_bytecode/java_bytecode_convert_method.cpp index e9248c18440..4e0d9473725 100644 --- a/src/java_bytecode/java_bytecode_convert_method.cpp +++ b/src/java_bytecode/java_bytecode_convert_method.cpp @@ -120,18 +120,27 @@ class java_bytecode_convert_methodt:public messaget } instruction_sizet; // return corresponding reference of variable - const variablet &find_variable_for_slot( - size_t address, - variablest &var_list, - instruction_sizet inst_size) + const variablet &find_variable_for_slot(size_t address, variablest &var_list) { for(const variablet &var : var_list) { size_t start_pc=var.start_pc; size_t length=var.length; - if(address+(size_t)inst_size>=start_pc && - address=start_pc && address<(start_pc+length)) + { + bool found_hole=false; + for(auto &hole : var.holes) + { + if(address>=hole.start_pc && address<(hole.start_pc-hole.length)) + { + found_hole=true; + break; + } + } + if(found_hole) + continue; return var; + } } // add unnamed local variable to end of list at this index // with scope from 0 to INT_MAX @@ -155,7 +164,6 @@ class java_bytecode_convert_methodt:public messaget const exprt &arg, char type_char, size_t address, - instruction_sizet inst_size, variable_cast_argumentt do_cast) { irep_idt number=to_constant_expr(arg).get_value(); @@ -166,7 +174,7 @@ class java_bytecode_convert_methodt:public messaget // search variable in list for correct frame / address if necessary const variablet &var= - find_variable_for_slot(address, var_list, inst_size); + find_variable_for_slot(address, var_list); if(var.symbol_expr.get_identifier().empty()) { @@ -317,7 +325,7 @@ class java_bytecode_convert_methodt:public messaget void convert(const instructiont &); codet convert_instructions( - const instructionst &, const code_typet &); + const methodt &, const code_typet &); const bytecode_infot &get_bytecode_info(const irep_idt &statement); }; @@ -382,6 +390,7 @@ void java_bytecode_convert_methodt::convert( const irep_idt method_identifier= id2string(class_symbol.name)+"."+id2string(m.name)+":"+m.signature; + method_id=method_identifier; code_typet &code_type=to_code_type(member_type); method_return_type=code_type.return_type(); @@ -400,38 +409,24 @@ void java_bytecode_convert_methodt::convert( variables.clear(); - // Do the parameters and locals in the variable table, which is available when - // compiled with -g or for methods with many local variables in the latter - // case, different variables can have the same index, depending on the - // context. - // - // to calculate which variable to use, one uses the address of the instruction - // that uses the variable, the size of the instruction and the start_pc / - // length values in the local variable table + // find parameter names in the local variable table: for(const auto &v : m.local_variable_table) { + if(v.start_pc!=0) // Local? + continue; + typet t=java_type_from_string(v.signature); std::ostringstream id_oss; - id_oss << method_identifier << "::" << v.start_pc << "::" << v.name; + id_oss << method_id << "::" << v.name; irep_idt identifier(id_oss.str()); symbol_exprt result(identifier, t); result.set(ID_C_base_name, v.name); - size_t number_index_entries=variables[v.index].size(); - variables[v.index].resize(number_index_entries+1); - variables[v.index][number_index_entries].symbol_expr=result; - variables[v.index][number_index_entries].start_pc=v.start_pc; - variables[v.index][number_index_entries].length=v.length; - symbolt new_symbol; - new_symbol.name=identifier; - new_symbol.type=t; - new_symbol.base_name=v.name; - new_symbol.pretty_name=strip_java_namespace_prefix(identifier); - new_symbol.mode=ID_java; - new_symbol.is_type=false; - new_symbol.is_file_local=true; - new_symbol.is_thread_local=true; - new_symbol.is_lvalue=true; - symbol_table.add(new_symbol); + + variables[v.index].push_back(variablet()); + auto &newv=variables[v.index].back(); + newv.symbol_expr = result; + newv.start_pc = v.start_pc; + newv.length = v.length; } // set up variables array @@ -533,9 +528,8 @@ void java_bytecode_convert_methodt::convert( method_has_this=code_type.has_this(); tmp_vars.clear(); - if((!m.is_abstract) && (!m.is_native)) - method_symbol.value=convert_instructions(m.instructions, code_type); + method_symbol.value=convert_instructions(m, code_type); // do we have the method symbol already? const auto s_it=symbol_table.symbols.find(method.get_name()); @@ -1309,9 +1303,12 @@ static unsigned get_bytecode_type_width(const typet &ty) } codet java_bytecode_convert_methodt::convert_instructions( - const instructionst &instructions, + const methodt &method, const code_typet &method_type) { + + const instructionst& instructions=method.instructions; + // Run a worklist algorithm, assuming that the bytecode has not // been tampered with. See "Leroy, X. (2003). Java bytecode // verification: algorithms and formalizations. Journal of Automated @@ -1423,6 +1420,10 @@ codet java_bytecode_convert_methodt::convert_instructions( } } + // Now that the control flow graph is built, set up our local variables + // (these require the graph to determine live ranges) + setup_local_variables(method,address_map); + std::set working_set; if(!instructions.empty()) working_set.insert(instructions.front().address); @@ -1652,7 +1653,7 @@ codet java_bytecode_convert_methodt::convert_instructions( // store value into some local variable assert(op.size()==1 && results.empty()); - exprt var=variable(arg0, statement[0], i_it->address, INST_INDEX, NO_CAST); + exprt var=variable(arg0, statement[0], i_it->address, NO_CAST); exprt toassign=op[0]; if('a'==statement[0] && toassign.type()!=var.type()) @@ -1683,7 +1684,7 @@ codet java_bytecode_convert_methodt::convert_instructions( else if(statement==patternt("?load")) { // load a value from a local variable - results[0]=variable(arg0, statement[0], i_it->address, INST_INDEX, CAST_AS_NEEDED); + results[0]=variable(arg0, statement[0], i_it->address, CAST_AS_NEEDED); } else if(statement=="ldc" || statement=="ldc_w" || statement=="ldc2" || statement=="ldc2_w") @@ -1751,7 +1752,7 @@ codet java_bytecode_convert_methodt::convert_instructions( // if(retaddr==5) goto 5; else if(retaddr==10) goto 10; ... assert(op.empty() && results.empty()); code_blockt branches; - auto retvar=variable(arg0, 'a', i_it->address, INST_INDEX, NO_CAST); + auto retvar=variable(arg0, 'a', i_it->address, NO_CAST); assert(!jsr_ret_targets.empty()); for(size_t idx=0, idxlim=jsr_ret_targets.size(); idx!=idxlim; ++idx) { @@ -1889,9 +1890,9 @@ codet java_bytecode_convert_methodt::convert_instructions( else if(statement=="iinc") { code_assignt code_assign; - code_assign.lhs()=variable(arg0, 'i', i_it->address, INST_INDEX_CONST, NO_CAST); + code_assign.lhs()=variable(arg0, 'i', i_it->address, NO_CAST); code_assign.rhs()=plus_exprt( - variable(arg0, 'i', i_it->address, INST_INDEX_CONST, CAST_AS_NEEDED), + variable(arg0, 'i', i_it->address, CAST_AS_NEEDED), typecast_exprt(arg1, java_int_type())); c=code_assign; } @@ -2424,10 +2425,12 @@ codet java_bytecode_convert_methodt::convert_instructions( // Try to recover block structure as indicated in the local variable table: // The block tree node mirrors the block structure of root_block, - // indexing the Java PCs were each subblock starts and ends. + // indexing the Java PCs where each subblock starts and ends. block_tree_nodet root; code_blockt root_block; + // First create a simple flat list of basic blocks. We'll add lexical nesting + // constructs as variable live-ranges require next. bool start_new_block=true; for(const auto &address_pair : address_map) { @@ -2492,13 +2495,13 @@ codet java_bytecode_convert_methodt::convert_instructions( // Skip anonymous variables: if(v.symbol_expr.get_identifier()==irep_idt()) continue; - code_declt d(v.symbol_expr); auto &block=get_block_for_pcrange( root, root_block, v.start_pc, v.start_pc+v.length, std::numeric_limits::max()); + code_declt d(v.symbol_expr); block.operands().insert(block.operands().begin(), d); } } From 2f8213aa687218f88cdd55c3458a1bc90a239a27 Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Thu, 19 Jan 2017 10:53:24 +0000 Subject: [PATCH 3/8] Factor the jbcm type and LVT functions into individual files --- src/java_bytecode/Makefile | 2 +- .../java_bytecode_convert_method.cpp | 787 +++--------------- .../java_bytecode_convert_method_class.h | 182 ++++ .../java_local_variable_table.cpp | 514 ++++++++++++ 4 files changed, 794 insertions(+), 691 deletions(-) create mode 100644 src/java_bytecode/java_bytecode_convert_method_class.h create mode 100644 src/java_bytecode/java_local_variable_table.cpp diff --git a/src/java_bytecode/Makefile b/src/java_bytecode/Makefile index 5a9626b1a6d..f13c327ef77 100644 --- a/src/java_bytecode/Makefile +++ b/src/java_bytecode/Makefile @@ -5,7 +5,7 @@ SRC = java_bytecode_language.cpp java_bytecode_parse_tree.cpp \ java_bytecode_typecheck_type.cpp java_bytecode_internal_additions.cpp \ java_root_class.cpp java_bytecode_parser.cpp bytecode_info.cpp \ java_class_loader.cpp jar_file.cpp java_object_factory.cpp \ - java_bytecode_convert_method.cpp + java_bytecode_convert_method.cpp java_local_variable_table.cpp INCLUDES= -I .. diff --git a/src/java_bytecode/java_bytecode_convert_method.cpp b/src/java_bytecode/java_bytecode_convert_method.cpp index 4e0d9473725..2ad56bc9230 100644 --- a/src/java_bytecode/java_bytecode_convert_method.cpp +++ b/src/java_bytecode/java_bytecode_convert_method.cpp @@ -23,6 +23,7 @@ Author: Daniel Kroening, kroening@kroening.com #include #include "java_bytecode_convert_method.h" +#include "java_bytecode_convert_method_class.h" #include "bytecode_info.h" #include "java_types.h" @@ -54,282 +55,6 @@ class patternt const char *p; }; -class java_bytecode_convert_methodt:public messaget -{ -public: - java_bytecode_convert_methodt( - symbol_tablet &_symbol_table, - message_handlert &_message_handler): - messaget(_message_handler), - symbol_table(_symbol_table) - { - } - - typedef java_bytecode_parse_treet::methodt methodt; - typedef java_bytecode_parse_treet::instructiont instructiont; - typedef methodt::instructionst instructionst; - typedef methodt::local_variable_tablet local_variable_tablet; - typedef methodt::local_variablet local_variablet; - - void operator()(const symbolt &class_symbol, const methodt &method) - { - convert(class_symbol, method); - } - -protected: - irep_idt method_id; - symbol_tablet &symbol_table; - - irep_idt current_method; - typet method_return_type; - -public: - struct holet { - unsigned start_pc; - unsigned length; - }; - - struct local_variable_with_holest - { - local_variablet var; - std::vector holes; - }; - - typedef std::vector local_variable_table_with_holest; - -protected: - class variablet - { - public: - symbol_exprt symbol_expr; - size_t start_pc; - size_t length; - bool is_parameter; - std::vector holes; - variablet() : symbol_expr(), is_parameter(false) {} - }; - - typedef std::vector variablest; - expanding_vector variables; - std::set used_local_names; - bool method_has_this; - - typedef enum instruction_sizet - { - INST_INDEX=2, INST_INDEX_CONST=3 - } instruction_sizet; - - // return corresponding reference of variable - const variablet &find_variable_for_slot(size_t address, variablest &var_list) - { - for(const variablet &var : var_list) - { - size_t start_pc=var.start_pc; - size_t length=var.length; - if(address>=start_pc && address<(start_pc+length)) - { - bool found_hole=false; - for(auto &hole : var.holes) - { - if(address>=hole.start_pc && address<(hole.start_pc-hole.length)) - { - found_hole=true; - break; - } - } - if(found_hole) - continue; - return var; - } - } - // add unnamed local variable to end of list at this index - // with scope from 0 to INT_MAX - // as it is at the end of the vector, it will only be taken into account - // if no other variable is valid - size_t list_length=var_list.size(); - var_list.resize(list_length+1); - var_list[list_length].start_pc=0; - var_list[list_length].length=std::numeric_limits::max(); - return var_list[list_length]; - } - - // JVM local variables - enum variable_cast_argumentt - { - CAST_AS_NEEDED, - NO_CAST - }; - - const exprt variable( - const exprt &arg, - char type_char, - size_t address, - variable_cast_argumentt do_cast) - { - irep_idt number=to_constant_expr(arg).get_value(); - - std::size_t number_int=safe_string2size_t(id2string(number)); - typet t=java_type_from_char(type_char); - variablest &var_list=variables[number_int]; - - // search variable in list for correct frame / address if necessary - const variablet &var= - find_variable_for_slot(address, var_list); - - if(var.symbol_expr.get_identifier().empty()) - { - // an un-named local variable - irep_idt base_name="anonlocal::"+id2string(number)+type_char; - irep_idt identifier=id2string(current_method)+"::"+id2string(base_name); - - symbol_exprt result(identifier, t); - result.set(ID_C_base_name, base_name); - used_local_names.insert(result); - - return result; - } - else - { - exprt result=var.symbol_expr; - if(do_cast==CAST_AS_NEEDED && t!=result.type()) - result=typecast_exprt(result, t); - return result; - } - } - - // temporary variables - std::list tmp_vars; - - symbol_exprt tmp_variable(const std::string &prefix, const typet &type) - { - irep_idt base_name=prefix+"_tmp"+std::to_string(tmp_vars.size()); - irep_idt identifier=id2string(current_method)+"::"+id2string(base_name); - - auxiliary_symbolt tmp_symbol; - tmp_symbol.base_name=base_name; - tmp_symbol.is_static_lifetime=false; - tmp_symbol.mode=ID_java; - tmp_symbol.name=identifier; - tmp_symbol.type=type; - symbol_table.add(tmp_symbol); - - symbol_exprt result(identifier, type); - result.set(ID_C_base_name, base_name); - tmp_vars.push_back(result); - - return result; - } - - // JVM program locations - irep_idt label(const irep_idt &address) - { - return "pc"+id2string(address); - } - - // JVM Stack - typedef std::vector stackt; - stackt stack; - - exprt::operandst pop(std::size_t n) - { - if(stack.size() successors; - std::set predecessors; - codet code; - stackt stack; - bool done; - }; - -public: - typedef std::map address_mapt; - typedef cfg_dominators_templatet java_cfg_dominatorst; - -protected: - - void find_initialisers( - local_variable_table_with_holest& vars, - const address_mapt& amap, - const java_cfg_dominatorst& doms); - - void find_initialisers_for_slot( - local_variable_table_with_holest::iterator firstvar, - local_variable_table_with_holest::iterator varlimit, - const address_mapt& amap, - const java_cfg_dominatorst& doms); - - void setup_local_variables(const methodt& m, const address_mapt& amap); - - struct block_tree_nodet - { - bool leaf; - std::vector branch_addresses; - std::vector branch; - block_tree_nodet():leaf(false) {} - explicit block_tree_nodet(bool l):leaf(l) {} - static block_tree_nodet get_leaf() { return block_tree_nodet(true); } - }; - - static void replace_goto_target( - codet &repl, - const irep_idt &old_label, - const irep_idt &new_label); - - code_blockt &get_block_for_pcrange( - block_tree_nodet &tree, - code_blockt &this_block, - unsigned address_start, - unsigned address_limit, - unsigned next_block_start_address); - - code_blockt &get_or_create_block_for_pcrange( - block_tree_nodet &tree, - code_blockt &this_block, - unsigned address_start, - unsigned address_limit, - unsigned next_block_start_address, - const address_mapt &amap, - bool allow_merge=true); - - // conversion - void convert(const symbolt &class_symbol, const methodt &); - void convert(const instructiont &); - - codet convert_instructions( - const methodt &, const code_typet &); - - const bytecode_infot &get_bytecode_info(const irep_idt &statement); -}; - const size_t SLOTS_PER_INTEGER(1u); const size_t INTEGER_WIDTH(64u); static size_t count_slots( @@ -368,6 +93,102 @@ static irep_idt strip_java_namespace_prefix(const irep_idt to_strip) return to_strip_str.substr(6, std::string::npos); } +java_bytecode_convert_methodt::java_bytecode_convert_methodt( + symbol_tablet &_symbol_table, + message_handlert &_message_handler) : + messaget(_message_handler), + symbol_table(_symbol_table) +{ +} + +const exprt java_bytecode_convert_methodt::variable( + const exprt &arg, + char type_char, + size_t address, + variable_cast_argumentt do_cast) +{ + irep_idt number=to_constant_expr(arg).get_value(); + + std::size_t number_int=safe_string2size_t(id2string(number)); + typet t=java_type_from_char(type_char); + variablest &var_list=variables[number_int]; + + // search variable in list for correct frame / address if necessary + const variablet &var=find_variable_for_slot(address, var_list); + + if(var.symbol_expr.get_identifier().empty()) + { + // an un-named local variable + irep_idt base_name="anonlocal::"+id2string(number)+type_char; + irep_idt identifier=id2string(current_method)+"::"+id2string(base_name); + + symbol_exprt result(identifier, t); + result.set(ID_C_base_name, base_name); + used_local_names.insert(result); + return result; + } + else + { + exprt result=var.symbol_expr; + if(do_cast==CAST_AS_NEEDED && t!=result.type()) + result=typecast_exprt(result, t); + return result; + } +} + +symbol_exprt java_bytecode_convert_methodt::tmp_variable( + const std::string &prefix, const typet &type) +{ + irep_idt base_name=prefix+"_tmp"+std::to_string(tmp_vars.size()); + irep_idt identifier=id2string(current_method)+"::"+id2string(base_name); + + auxiliary_symbolt tmp_symbol; + tmp_symbol.base_name=base_name; + tmp_symbol.is_static_lifetime=false; + tmp_symbol.mode=ID_java; + tmp_symbol.name=identifier; + tmp_symbol.type=type; + symbol_table.add(tmp_symbol); + + symbol_exprt result(identifier, type); + result.set(ID_C_base_name, base_name); + tmp_vars.push_back(result); + + return result; +} + +irep_idt java_bytecode_convert_methodt::label(const irep_idt &address) +{ + return "pc"+id2string(address); +} + +exprt::operandst java_bytecode_convert_methodt::pop(std::size_t n) +{ + if(stack.size()var.start_pcvar.start_pc; - } -} - -// Convert the address-map to a cfg-graph so we can compute dominators: - -typedef java_bytecode_convert_methodt::address_mapt address_mapt; -typedef java_bytecode_convert_methodt::java_cfg_dominatorst java_cfg_dominatorst; - -template -struct procedure_local_cfg_baset : - public graph > -{ - typedef std::map entry_mapt; - entry_mapt entry_map; - - procedure_local_cfg_baset() {} - - void operator()(const address_mapt& amap) - { - for(const auto& inst : amap) - { - // Map instruction PCs onto node indices: - entry_map[inst.first]=this->add_node(); - // Map back: - (*this)[entry_map[inst.first]].PC=inst.first; - } - for(const auto& inst : amap) - { - for(auto succ : inst.second.successors) - this->add_edge(entry_map.at(inst.first),entry_map.at(succ)); - } - } - - unsigned get_first_node(const address_mapt& amap) const { return amap.begin()->first; } - unsigned get_last_node(const address_mapt& amap) const { return (--amap.end())->first; } - unsigned nodes_empty(const address_mapt& amap) const { return amap.empty(); } - -}; - -typedef java_bytecode_convert_methodt::local_variablet local_variablet; -typedef std::map > predecessor_mapt; - -struct is_predecessor_of -{ - const predecessor_mapt& order; - - is_predecessor_of(const predecessor_mapt& _order) : order(_order) {} - - bool operator()(local_variable_with_holest* a, - local_variable_with_holest* b) const - { - auto findit=order.find(a); - if(findit==order.end()) - return false; - return findit->second.count(b)>0; - } -}; - -static void gather_transitive_predecessors( - local_variable_with_holest* start, - const predecessor_mapt& predecessor_map, - std::set& result) -{ - if(!result.insert(start).second) - return; - auto findit=predecessor_map.find(start); - if(findit==predecessor_map.end()) - return; - for(const auto pred : findit->second) - gather_transitive_predecessors(pred,predecessor_map,result); -} - -static bool is_store_to_slot( - const java_bytecode_convert_methodt::instructiont& inst, - unsigned slotidx) -{ - const std::string prevstatement=id2string(inst.statement); - if(!(prevstatement.size()>=1 && prevstatement.substr(1,5)=="store")) - return false; - - std::string storeslot; - if(inst.args.size()==1) - { - const auto& arg=inst.args[0]; - storeslot=id2string(to_constant_expr(arg).get_value()); - } - else - { - assert(prevstatement[6]=='_' && prevstatement.size()==8); - storeslot=prevstatement[7]; - assert(isdigit(storeslot[0])); - } - auto storeslotidx=safe_string2unsigned(storeslot); - return storeslotidx==slotidx; -} - -static void maybe_add_hole( - local_variable_with_holest& var, - unsigned from, - unsigned to) -{ - assert(to>=from); - if(to!=from) - var.holes.push_back({from,to-from}); -} - -void java_bytecode_convert_methodt::find_initialisers_for_slot( - local_variable_table_with_holest::iterator firstvar, - local_variable_table_with_holest::iterator varlimit, - const address_mapt& amap, - const java_cfg_dominatorst& doms) -{ - // Build a simple map from instruction PC to the variable live in this slot at that PC, - // and a map from each variable to variables that flow into it: - - std::vector var_map; - predecessor_mapt predecessor_map; - for(auto it=firstvar, itend=varlimit; it!=itend; ++it) - { - if(it->var.start_pc+it->var.length > var_map.size()) - var_map.resize(it->var.start_pc+it->var.length); - - for(unsigned idx=it->var.start_pc, - idxlim=it->var.start_pc+it->var.length; - idx!=idxlim; ++idx) - { - assert((!var_map[idx]) && "Local variable table clash?"); - var_map[idx]=&*it; - } - } - - // Now find variables that flow together by walking backwards to find initialisers - // or branches from other live ranges: - - for(auto it=firstvar, itend=varlimit; it!=itend; ++it) - { - // Don't alter parameters: - if(it->var.start_pc==0) - continue; - - // Find the last instruction within the live range: - unsigned end_pc=it->var.start_pc+it->var.length; - auto amapit=amap.find(end_pc); - auto old_amapit=amapit; - --amapit; - if(old_amapit==amap.end()) - assert(end_pc>amapit->first && - "Instruction live range doesn't align to instruction boundary?"); - - // Find vartable entries that flow into this one: - unsigned new_start_pc=it->var.start_pc; - for(; amapit->first >= it->var.start_pc; --amapit) - { - for(auto pred : amapit->second.predecessors) - { - auto pred_var=var_map[pred]; - if(pred_var==&*it) - { - // Flow from within same live range? - continue; - } - else if(!pred_var) - { - // Flow from out of range? - // Check if this is an initialiser, and if so expand the live range - // to include it, but don't check its predecessors: - auto inst_before_this=amapit; - --inst_before_this; - if(amapit->first!=it->var.start_pc || inst_before_this->first!=pred) - throw "Local variable table: unexpected flow from out of range"; - if(!is_store_to_slot(*(inst_before_this->second.source),it->var.index)) - throw "Local variable table: didn't find expected initialising store"; - new_start_pc=pred; - } - else { - if(pred_var->var.name!=it->var.name || pred_var->var.signature!=it->var.signature) - throw "Local variable table: flow from variable with clashing name or signature"; - // OK, this is a flow from a similar but distinct entry in the local var table. - predecessor_map[&*it].insert(pred_var); - } - } - } - - // If a simple pre-block initialiser was found, add it to the live range now: - it->var.length+=(it->var.start_pc-new_start_pc); - it->var.start_pc=new_start_pc; - - } - - // OK, we've established the flows all seem sensible. Now merge vartable entries - // according to the predecessor_map: - - // Top-sort so that we get the bottom variables first: - is_predecessor_of comp(predecessor_map); - std::vector topsorted_vars; - for(auto it=firstvar,itend=varlimit; it!=itend; ++it) - topsorted_vars.push_back(&*it); - - std::sort(topsorted_vars.begin(),topsorted_vars.end(),comp); - - // Now merge the entries: - for(auto merge_into : topsorted_vars) - { - // Already merged into another variable? - if(merge_into->var.length==0) - continue; - - std::set merge_vars; - gather_transitive_predecessors(merge_into,predecessor_map,merge_vars); - if(merge_vars.size()==1) - continue; - - // Because we need a lexically-scoped declaration, we must have the merged variable - // enter scope both in a block that dominates all entries, and which - // precedes them in program order. - unsigned first_pc=var_map.size(); - unsigned last_pc=0; - for(auto v : merge_vars) - { - if(v->var.start_pc < first_pc) - first_pc = v->var.start_pc; - if(v->var.start_pc+v->var.length > last_pc) - last_pc=v->var.start_pc+v->var.length; - } - - std::vector candidate_dominators; - for(auto v : merge_vars) - { - const auto& this_var_doms=doms.cfg[doms.cfg.entry_map.at(v->var.start_pc)].dominators; - for(const auto this_var_dom : this_var_doms) - if(this_var_dom <= first_pc) - candidate_dominators.push_back(this_var_dom); - } - std::sort(candidate_dominators.begin(),candidate_dominators.end()); - - // Working from the back, simply find the first PC that occurs merge_vars.size() - // times and therefore dominates all vars we seek to merge: - - unsigned found_dominator=var_map.size(); - for(auto domit=candidate_dominators.rbegin(), domitend=candidate_dominators.rend(); - domit != domitend && found_dominator==var_map.size(); /* Don't increment here */) - { - unsigned repeats=0; - auto dom=*domit; - while(domit!=domitend && *domit==dom) - { - ++domit; - ++repeats; - } - assert(repeats<=merge_vars.size()); - if(repeats==merge_vars.size()) - found_dominator=dom; - } - - if(found_dominator==var_map.size()) - throw "Variable live ranges with no common dominator?"; - - // Populate the holes in the live range (addresses where the new variable - // will be in scope, but references to this stack slot should not resolve to it - // as it was not visible in the original local variable table) - std::vector sorted_by_startpc( - merge_vars.begin(),merge_vars.end()); - std::sort(sorted_by_startpc.begin(),sorted_by_startpc.end(),lt_startpc); - - maybe_add_hole(*merge_into,found_dominator,sorted_by_startpc[0]->var.start_pc); - for(unsigned idx=0; idxvar.start_pc + sorted_by_startpc[idx]->var.length, - sorted_by_startpc[idx+1]->var.start_pc); - - // Apply the changes: - merge_into->var.start_pc=found_dominator; - merge_into->var.length=last_pc-found_dominator; - - #ifdef DEBUG - status() << "Merging " << merge_vars.size() << " variables named " << merge_into->var.name << - "; new live range " << merge_into->var.start_pc << "-" << - merge_into->var.start_pc + merge_into->var.length << eom; - #endif - - // Nuke the now-subsumed var-table entries: - for(auto& v : merge_vars) - if(v!=merge_into) - v->var.length=0; - - } - -} - -static void walk_to_next_index( - local_variable_table_with_holest::iterator& it1, - local_variable_table_with_holest::iterator& it2, - local_variable_table_with_holest::iterator itend) -{ - if(it2==itend) - { - it1=itend; - return; - } - - auto old_it2=it2; - auto index=it2->var.index; - while(it2!=itend && it2->var.index==index) - ++it2; - it1=old_it2; -} - -void java_bytecode_convert_methodt::find_initialisers( - local_variable_table_with_holest& vars, - const address_mapt& amap, - const java_cfg_dominatorst& doms) -{ - // Handle one stack slot at a time: - std::sort(vars.begin(),vars.end(),lt_index); - - auto it1=vars.begin(); - auto it2=it1; - auto itend=vars.end(); - walk_to_next_index(it1,it2,itend); - for(; it1!=itend; walk_to_next_index(it1,it2,itend)) - find_initialisers_for_slot(it1,it2,amap,doms); -} - -void java_bytecode_convert_methodt::setup_local_variables(const methodt& m, - const address_mapt& amap) -{ - // Compute CFG dominator tree - java_cfg_dominatorst doms; - doms(amap); - - // Find out which local variable table entries should be merged: - // Wrap each entry so we have somewhere to record live ranges with holes: - std::vector vars_with_holes; - for(const auto& v : m.local_variable_table) - vars_with_holes.push_back({v, {}}); - - find_initialisers(vars_with_holes,amap,doms); - - // Clean up removed records from the variable table: - - size_t toremove=0; - for(size_t i=0; i<(vars_with_holes.size()-toremove); ++i) - { - auto& v=vars_with_holes[i]; - if(v.var.length==0) - { - // Move to end; consider the new element we've swapped in: - ++toremove; - if(i!=vars_with_holes.size()-toremove) // Already where it needs to be? - std::swap(v,vars_with_holes[vars_with_holes.size()-toremove]); - --i; - } - } - - // Remove un-needed entries. - vars_with_holes.resize(vars_with_holes.size()-toremove); - - // Do the locals and parameters in the variable table, which is available when - // compiled with -g or for methods with many local variables in the latter - // case, different variables can have the same index, depending on the - // context. - // - // to calculate which variable to use, one uses the address of the instruction - // that uses the variable, the size of the instruction and the start_pc / - // length values in the local variable table - for(const auto & v : vars_with_holes) - { - if(v.var.start_pc==0) // Parameter? - continue; - - typet t=java_type_from_string(v.var.signature); - std::ostringstream id_oss; - id_oss << method_id << "::" << v.var.start_pc << "::" << v.var.name; - irep_idt identifier(id_oss.str()); - symbol_exprt result(identifier, t); - result.set(ID_C_base_name, v.var.name); - - variables[v.var.index].push_back(variablet()); - auto& newv=variables[v.var.index].back(); - newv.symbol_expr = result; - newv.start_pc = v.var.start_pc; - newv.length = v.var.length; - newv.holes=std::move(v.holes); - - symbolt new_symbol; - new_symbol.name=identifier; - new_symbol.type=t; - new_symbol.base_name=v.var.name; - new_symbol.pretty_name=id2string(identifier).substr(6, std::string::npos); - new_symbol.mode=ID_java; - new_symbol.is_type=false; - new_symbol.is_file_local=true; - new_symbol.is_thread_local=true; - new_symbol.is_lvalue=true; - symbol_table.add(new_symbol); - } - -} - /*******************************************************************\ Function: java_bytecode_convert_methodt::get_bytecode_info diff --git a/src/java_bytecode/java_bytecode_convert_method_class.h b/src/java_bytecode/java_bytecode_convert_method_class.h new file mode 100644 index 00000000000..643b913a71a --- /dev/null +++ b/src/java_bytecode/java_bytecode_convert_method_class.h @@ -0,0 +1,182 @@ + +#ifndef JBCM_CLASS_H +#define JBCM_CLASS_H + +#include +#include +#include +#include +#include +#include "java_bytecode_parse_tree.h" + +#include +#include + +class symbol_tablet; +class symbolt; + +class java_bytecode_convert_methodt:public messaget +{ +public: + java_bytecode_convert_methodt( + symbol_tablet &_symbol_table, + message_handlert &_message_handler); + + typedef java_bytecode_parse_treet::methodt methodt; + typedef java_bytecode_parse_treet::instructiont instructiont; + typedef methodt::instructionst instructionst; + typedef methodt::local_variable_tablet local_variable_tablet; + typedef methodt::local_variablet local_variablet; + + void operator()(const symbolt &class_symbol, const methodt &method) + { + convert(class_symbol, method); + } + +protected: + irep_idt method_id; + symbol_tablet &symbol_table; + + irep_idt current_method; + typet method_return_type; + +public: + struct holet { + unsigned start_pc; + unsigned length; + }; + + struct local_variable_with_holest + { + local_variablet var; + std::vector holes; + }; + + typedef std::vector local_variable_table_with_holest; + +protected: + class variablet + { + public: + symbol_exprt symbol_expr; + size_t start_pc; + size_t length; + bool is_parameter; + std::vector holes; + variablet() : symbol_expr(), is_parameter(false) {} + }; + + typedef std::vector variablest; + expanding_vector variables; + std::set used_local_names; + bool method_has_this; + + // return corresponding reference of variable + const variablet &find_variable_for_slot(size_t address, variablest &var_list); + + // JVM local variables + enum variable_cast_argumentt + { + CAST_AS_NEEDED, + NO_CAST + }; + + const exprt variable( + const exprt &arg, + char type_char, + size_t address, + variable_cast_argumentt do_cast); + + // temporary variables + std::list tmp_vars; + + symbol_exprt tmp_variable(const std::string &prefix, const typet &type); + + // JVM program locations + irep_idt label(const irep_idt &address); + + // JVM Stack + typedef std::vector stackt; + stackt stack; + + exprt::operandst pop(std::size_t n); + + void push(const exprt::operandst &o); + + struct converted_instructiont + { + converted_instructiont( + const instructionst::const_iterator &it, + const codet &_code):source(it), code(_code), done(false) + {} + + instructionst::const_iterator source; + std::list successors; + std::set predecessors; + codet code; + stackt stack; + bool done; + }; + +public: + typedef std::map address_mapt; + typedef cfg_dominators_templatet java_cfg_dominatorst; + +protected: + + void find_initialisers( + local_variable_table_with_holest& vars, + const address_mapt& amap, + const java_cfg_dominatorst& doms); + + void find_initialisers_for_slot( + local_variable_table_with_holest::iterator firstvar, + local_variable_table_with_holest::iterator varlimit, + const address_mapt& amap, + const java_cfg_dominatorst& doms); + + void setup_local_variables(const methodt& m, const address_mapt& amap); + + struct block_tree_nodet + { + bool leaf; + std::vector branch_addresses; + std::vector branch; + block_tree_nodet():leaf(false) {} + explicit block_tree_nodet(bool l):leaf(l) {} + static block_tree_nodet get_leaf() { return block_tree_nodet(true); } + }; + + static void replace_goto_target( + codet &repl, + const irep_idt &old_label, + const irep_idt &new_label); + + code_blockt &get_block_for_pcrange( + block_tree_nodet &tree, + code_blockt &this_block, + unsigned address_start, + unsigned address_limit, + unsigned next_block_start_address); + + code_blockt &get_or_create_block_for_pcrange( + block_tree_nodet &tree, + code_blockt &this_block, + unsigned address_start, + unsigned address_limit, + unsigned next_block_start_address, + const address_mapt &amap, + bool allow_merge=true); + + // conversion + void convert(const symbolt &class_symbol, const methodt &); + void convert(const instructiont &); + + codet convert_instructions( + const methodt &, + const code_typet &); + + const bytecode_infot &get_bytecode_info(const irep_idt &statement); +}; + +#endif diff --git a/src/java_bytecode/java_local_variable_table.cpp b/src/java_bytecode/java_local_variable_table.cpp new file mode 100644 index 00000000000..6145a723f39 --- /dev/null +++ b/src/java_bytecode/java_local_variable_table.cpp @@ -0,0 +1,514 @@ + +#include "java_bytecode_convert_method_class.h" + +#include +#include "java_types.h" + +#include + +// Specialise the CFG representation to work over Java instead of GOTO programs. +// This must be done at global scope due to template resolution rules. + +template +struct procedure_local_cfg_baset : + public graph > +{ + typedef java_bytecode_convert_methodt::address_mapt address_mapt; + typedef std::map entry_mapt; + entry_mapt entry_map; + + procedure_local_cfg_baset() {} + + void operator()(const address_mapt& amap) + { + for(const auto& inst : amap) + { + // Map instruction PCs onto node indices: + entry_map[inst.first]=this->add_node(); + // Map back: + (*this)[entry_map[inst.first]].PC=inst.first; + } + for(const auto& inst : amap) + { + for(auto succ : inst.second.successors) + this->add_edge(entry_map.at(inst.first),entry_map.at(succ)); + } + } + + unsigned get_first_node(const address_mapt& amap) const { return amap.begin()->first; } + unsigned get_last_node(const address_mapt& amap) const { return (--amap.end())->first; } + unsigned nodes_empty(const address_mapt& amap) const { return amap.empty(); } + +}; + +namespace { + +typedef java_bytecode_convert_methodt::holet holet; +typedef java_bytecode_convert_methodt::local_variable_with_holest local_variable_with_holest; +typedef java_bytecode_convert_methodt::local_variable_table_with_holest local_variable_table_with_holest; +typedef java_bytecode_convert_methodt::address_mapt address_mapt; +typedef java_bytecode_convert_methodt::java_cfg_dominatorst java_cfg_dominatorst; + +// Comparators for local variables: + +static bool lt_index(const local_variable_with_holest& a, + const local_variable_with_holest& b) +{ + return a.var.indexvar.start_pcvar.start_pc; +} + +// The predecessor map, and a top-sorting comparator: + +typedef std::map > predecessor_mapt; + +struct is_predecessor_of +{ + const predecessor_mapt& order; + + is_predecessor_of(const predecessor_mapt& _order) : order(_order) {} + + bool operator()(local_variable_with_holest* a, + local_variable_with_holest* b) const + { + auto findit=order.find(a); + if(findit==order.end()) + return false; + return findit->second.count(b)>0; + } +}; + +// Helper routines for the find-initialisers code below: + +static void gather_transitive_predecessors( + local_variable_with_holest* start, + const predecessor_mapt& predecessor_map, + std::set& result) +{ + if(!result.insert(start).second) + return; + auto findit=predecessor_map.find(start); + if(findit==predecessor_map.end()) + return; + for(const auto pred : findit->second) + gather_transitive_predecessors(pred,predecessor_map,result); +} + +static bool is_store_to_slot( + const java_bytecode_convert_methodt::instructiont& inst, + unsigned slotidx) +{ + const std::string prevstatement=id2string(inst.statement); + if(!(prevstatement.size()>=1 && prevstatement.substr(1,5)=="store")) + return false; + + std::string storeslot; + if(inst.args.size()==1) + { + const auto& arg=inst.args[0]; + storeslot=id2string(to_constant_expr(arg).get_value()); + } + else + { + assert(prevstatement[6]=='_' && prevstatement.size()==8); + storeslot=prevstatement[7]; + assert(isdigit(storeslot[0])); + } + auto storeslotidx=safe_string2unsigned(storeslot); + return storeslotidx==slotidx; +} + +static void maybe_add_hole( + local_variable_with_holest& var, + unsigned from, + unsigned to) +{ + assert(to>=from); + if(to!=from) + var.holes.push_back({from,to-from}); +} + +static void populate_variable_address_map( + local_variable_table_with_holest::iterator firstvar, + local_variable_table_with_holest::iterator varlimit, + std::vector& live_variable_at_address) +{ + for(auto it=firstvar, itend=varlimit; it!=itend; ++it) + { + if(it->var.start_pc+it->var.length > live_variable_at_address.size()) + live_variable_at_address.resize(it->var.start_pc+it->var.length); + + for(unsigned idx=it->var.start_pc, + idxlim=it->var.start_pc+it->var.length; + idx!=idxlim; ++idx) + { + assert((!live_variable_at_address[idx]) && "Local variable table clash?"); + live_variable_at_address[idx]=&*it; + } + } + +} + +static void populate_predecessor_map( + local_variable_table_with_holest::iterator firstvar, + local_variable_table_with_holest::iterator varlimit, + const std::vector& live_variable_at_address, + const address_mapt& amap, + predecessor_mapt& predecessor_map) +{ + + for(auto it=firstvar, itend=varlimit; it!=itend; ++it) + { + // Parameters are irrelevant to us and shouldn't be changed: + if(it->var.start_pc==0) + continue; + + // Find the last instruction within the live range: + unsigned end_pc=it->var.start_pc+it->var.length; + auto amapit=amap.find(end_pc); + auto old_amapit=amapit; + --amapit; + if(old_amapit==amap.end()) + assert(end_pc>amapit->first && + "Instruction live range doesn't align to instruction boundary?"); + + // Find vartable entries that flow into this one: + unsigned new_start_pc=it->var.start_pc; + for(; amapit->first >= it->var.start_pc; --amapit) + { + for(auto pred : amapit->second.predecessors) + { + auto pred_var=live_variable_at_address[pred]; + if(pred_var==&*it) + { + // Flow from within same live range? + continue; + } + else if(!pred_var) + { + // Flow from out of range? + // Check if this is an initialiser, and if so expand the live range + // to include it, but don't check its predecessors: + auto inst_before_this=amapit; + --inst_before_this; + if(amapit->first!=it->var.start_pc || inst_before_this->first!=pred) + throw "Local variable table: unexpected flow from out of range"; + if(!is_store_to_slot(*(inst_before_this->second.source),it->var.index)) + throw "Local variable table: didn't find expected initialising store"; + new_start_pc=pred; + } + else { + if(pred_var->var.name!=it->var.name || pred_var->var.signature!=it->var.signature) + throw "Local variable table: flow from variable with clashing name or signature"; + // OK, this is a flow from a similar but distinct entry in the local var table. + predecessor_map[&*it].insert(pred_var); + } + } + } + + // If a simple pre-block initialiser was found, add it to the live range now: + it->var.length+=(it->var.start_pc-new_start_pc); + it->var.start_pc=new_start_pc; + + } + +} + +static unsigned get_common_dominator( + const std::set& merge_vars, + const java_cfg_dominatorst& dominator_analysis) +{ + assert(merge_vars.size()!=0); + + unsigned first_pc=UINT_MAX; + for(auto v : merge_vars) + { + if(v->var.start_pc < first_pc) + first_pc = v->var.start_pc; + } + + std::vector candidate_dominators; + for(auto v : merge_vars) + { + const auto& dominator_nodeidx=dominator_analysis.cfg.entry_map.at(v->var.start_pc); + const auto& this_var_doms=dominator_analysis.cfg[dominator_nodeidx].dominators; + for(const auto this_var_dom : this_var_doms) + if(this_var_dom <= first_pc) + candidate_dominators.push_back(this_var_dom); + } + std::sort(candidate_dominators.begin(),candidate_dominators.end()); + + // Working from the back, simply find the first PC that occurs merge_vars.size() + // times and therefore dominates all vars we seek to merge: + for(auto domit=candidate_dominators.rbegin(), domitend=candidate_dominators.rend(); + domit != domitend; /* Don't increment here */) + { + unsigned repeats=0; + auto dom=*domit; + while(domit!=domitend && *domit==dom) + { + ++domit; + ++repeats; + } + assert(repeats<=merge_vars.size()); + if(repeats==merge_vars.size()) + return dom; + } + + throw "Variable live ranges with no common dominator?"; +} + +static void populate_live_range_holes( + local_variable_with_holest& merge_into, + const std::set& merge_vars, + unsigned expanded_live_range_start) +{ + std::vector sorted_by_startpc( + merge_vars.begin(),merge_vars.end()); + std::sort(sorted_by_startpc.begin(),sorted_by_startpc.end(),lt_startpc); + + maybe_add_hole(merge_into,expanded_live_range_start,sorted_by_startpc[0]->var.start_pc); + for(unsigned idx=0; idxvar.start_pc + sorted_by_startpc[idx]->var.length, + sorted_by_startpc[idx+1]->var.start_pc); +} + +static void merge_variable_table_entries( + local_variable_with_holest& merge_into, + const std::set& merge_vars, + const java_cfg_dominatorst& dominator_analysis, + std::ostream& debug_out) +{ + // Because we need a lexically-scoped declaration, we must have the merged variable + // enter scope both in a block that dominates all entries, and which + // precedes them in program order. + unsigned found_dominator= + get_common_dominator(merge_vars,dominator_analysis); + + // Populate the holes in the live range (addresses where the new variable + // will be in scope, but references to this stack slot should not resolve to it + // as it was not visible in the original local variable table) + populate_live_range_holes(merge_into,merge_vars,found_dominator); + + unsigned last_pc=0; + for(auto v : merge_vars) + if(v->var.start_pc+v->var.length > last_pc) + last_pc=v->var.start_pc+v->var.length; + + // Apply the changes: + merge_into.var.start_pc=found_dominator; + merge_into.var.length=last_pc-found_dominator; + +#ifdef DEBUG + debug_out << "Merged " << merge_vars.size() << " variables named " << merge_into.var.name << + "; new live range " << merge_into.var.start_pc << "-" << + merge_into.var.start_pc + merge_into.var.length << messaget::eom; +#endif + + // Nuke the now-subsumed var-table entries: + for(auto& v : merge_vars) + if(v!=&merge_into) + v->var.length=0; +} + +} // End anonymous namespace + +void java_bytecode_convert_methodt::find_initialisers_for_slot( + local_variable_table_with_holest::iterator firstvar, + local_variable_table_with_holest::iterator varlimit, + const address_mapt& amap, + const java_cfg_dominatorst& dominator_analysis) +{ + // Build a simple map from instruction PC to the variable live in this slot at that PC, + // and a map from each variable to variables that flow into it: + std::vector live_variable_at_address; + populate_variable_address_map(firstvar,varlimit,live_variable_at_address); + + // Now find variables that flow together by walking backwards to find initialisers + // or branches from other live ranges: + predecessor_mapt predecessor_map; + populate_predecessor_map(firstvar,varlimit,live_variable_at_address,amap,predecessor_map); + + // OK, we've established the flows all seem sensible. Now merge vartable entries + // according to the predecessor_map: + + // Top-sort so that we get the bottom variables first: + is_predecessor_of comp(predecessor_map); + std::vector topsorted_vars; + for(auto it=firstvar,itend=varlimit; it!=itend; ++it) + topsorted_vars.push_back(&*it); + + std::sort(topsorted_vars.begin(),topsorted_vars.end(),comp); + + // Now merge the entries: + for(auto merge_into : topsorted_vars) + { + // Already merged into another variable? + if(merge_into->var.length==0) + continue; + + std::set merge_vars; + gather_transitive_predecessors(merge_into,predecessor_map,merge_vars); + // Nothing to do? + if(merge_vars.size()==1) + continue; + + merge_variable_table_entries(*merge_into,merge_vars,dominator_analysis,status()); + } + +} + +static void walk_to_next_index( + local_variable_table_with_holest::iterator& it1, + local_variable_table_with_holest::iterator& it2, + local_variable_table_with_holest::iterator itend) +{ + if(it2==itend) + { + it1=itend; + return; + } + + auto old_it2=it2; + auto index=it2->var.index; + while(it2!=itend && it2->var.index==index) + ++it2; + it1=old_it2; +} + +void java_bytecode_convert_methodt::find_initialisers( + local_variable_table_with_holest& vars, + const address_mapt& amap, + const java_cfg_dominatorst& dominator_analysis) +{ + // Handle one stack slot at a time: + std::sort(vars.begin(),vars.end(),lt_index); + + auto it1=vars.begin(); + auto it2=it1; + auto itend=vars.end(); + walk_to_next_index(it1,it2,itend); + for(; it1!=itend; walk_to_next_index(it1,it2,itend)) + find_initialisers_for_slot(it1,it2,amap,dominator_analysis); +} + +static void cleanup_var_table(std::vector& vars_with_holes) +{ + size_t toremove=0; + for(size_t i=0; i<(vars_with_holes.size()-toremove); ++i) + { + auto& v=vars_with_holes[i]; + if(v.var.length==0) + { + // Move to end; consider the new element we've swapped in: + ++toremove; + if(i!=vars_with_holes.size()-toremove) // Already where it needs to be? + std::swap(v,vars_with_holes[vars_with_holes.size()-toremove]); + --i; + } + } + + // Remove un-needed entries. + vars_with_holes.resize(vars_with_holes.size()-toremove); +} + +void java_bytecode_convert_methodt::setup_local_variables(const methodt& m, + const address_mapt& amap) +{ + // Compute CFG dominator tree + java_cfg_dominatorst dominator_analysis; + dominator_analysis(amap); + + // Find out which local variable table entries should be merged: + // Wrap each entry so we have somewhere to record live ranges with holes: + std::vector vars_with_holes; + for(const auto& v : m.local_variable_table) + vars_with_holes.push_back({v, {}}); + + // Merge variable records: + find_initialisers(vars_with_holes,amap,dominator_analysis); + + // Clean up removed records from the variable table: + cleanup_var_table(vars_with_holes); + + // Do the locals and parameters in the variable table, which is available when + // compiled with -g or for methods with many local variables in the latter + // case, different variables can have the same index, depending on the + // context. + // + // to calculate which variable to use, one uses the address of the instruction + // that uses the variable, the size of the instruction and the start_pc / + // length values in the local variable table + for(const auto & v : vars_with_holes) + { + if(v.var.start_pc==0) // Parameter? + continue; + + typet t=java_type_from_string(v.var.signature); + std::ostringstream id_oss; + id_oss << method_id << "::" << v.var.start_pc << "::" << v.var.name; + irep_idt identifier(id_oss.str()); + symbol_exprt result(identifier, t); + result.set(ID_C_base_name, v.var.name); + + variables[v.var.index].push_back(variablet()); + auto& newv=variables[v.var.index].back(); + newv.symbol_expr = result; + newv.start_pc = v.var.start_pc; + newv.length = v.var.length; + newv.holes=std::move(v.holes); + + symbolt new_symbol; + new_symbol.name=identifier; + new_symbol.type=t; + new_symbol.base_name=v.var.name; + new_symbol.pretty_name=id2string(identifier).substr(6, std::string::npos); + new_symbol.mode=ID_java; + new_symbol.is_type=false; + new_symbol.is_file_local=true; + new_symbol.is_thread_local=true; + new_symbol.is_lvalue=true; + symbol_table.add(new_symbol); + } + +} + +const java_bytecode_convert_methodt::variablet & +java_bytecode_convert_methodt::find_variable_for_slot( + size_t address, + variablest &var_list) +{ + for(const variablet &var : var_list) + { + size_t start_pc = var.start_pc; + size_t length = var.length; + if (address>=start_pc && address<(start_pc+length)) + { + bool found_hole=false; + for(auto &hole : var.holes) + if(address>=hole.start_pc && address<(hole.start_pc-hole.length)) + { + found_hole=true; + break; + } + if(found_hole) + continue; + return var; + } + } + // add unnamed local variable to end of list at this index + // with scope from 0 to INT_MAX + // as it is at the end of the vector, it will only be taken into account + // if no other variable is valid + size_t list_length = var_list.size(); + var_list.resize(list_length+1); + var_list[list_length].start_pc=0; + var_list[list_length].length=std::numeric_limits::max(); + return var_list[list_length]; +} From adecf6db1f7adac58384a8cdcfbebbbd90b04da6 Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Wed, 26 Oct 2016 17:29:15 +0100 Subject: [PATCH 4/8] Use a total order when top-sorting variables --- .../java_local_variable_table.cpp | 36 +++++++++++++++---- 1 file changed, 29 insertions(+), 7 deletions(-) diff --git a/src/java_bytecode/java_local_variable_table.cpp b/src/java_bytecode/java_local_variable_table.cpp index 6145a723f39..430593bef26 100644 --- a/src/java_bytecode/java_local_variable_table.cpp +++ b/src/java_bytecode/java_local_variable_table.cpp @@ -5,6 +5,7 @@ #include "java_types.h" #include +#include // Specialise the CFG representation to work over Java instead of GOTO programs. // This must be done at global scope due to template resolution rules. @@ -159,9 +160,10 @@ static void populate_predecessor_map( local_variable_table_with_holest::iterator varlimit, const std::vector& live_variable_at_address, const address_mapt& amap, - predecessor_mapt& predecessor_map) + predecessor_mapt& predecessor_map, + message_handlert& msg_handler) { - + messaget msg(msg_handler); for(auto it=firstvar, itend=varlimit; it!=itend; ++it) { // Parameters are irrelevant to us and shouldn't be changed: @@ -197,7 +199,11 @@ static void populate_predecessor_map( auto inst_before_this=amapit; --inst_before_this; if(amapit->first!=it->var.start_pc || inst_before_this->first!=pred) - throw "Local variable table: unexpected flow from out of range"; + { + msg.warning() << "Local variable table: ignoring flow from out of range for " << + it->var.name << " " << pred << " -> " << amapit->first << "\n"; + continue; + } if(!is_store_to_slot(*(inst_before_this->second.source),it->var.index)) throw "Local variable table: didn't find expected initialising store"; new_start_pc=pred; @@ -333,11 +339,25 @@ void java_bytecode_convert_methodt::find_initialisers_for_slot( // Now find variables that flow together by walking backwards to find initialisers // or branches from other live ranges: predecessor_mapt predecessor_map; - populate_predecessor_map(firstvar,varlimit,live_variable_at_address,amap,predecessor_map); + populate_predecessor_map( + firstvar, + varlimit, + live_variable_at_address, + amap, + predecessor_map, + get_message_handler()); // OK, we've established the flows all seem sensible. Now merge vartable entries // according to the predecessor_map: + // Take the transitive closure of the predecessor map: + for(auto& kv : predecessor_map) + { + std::set closed_preds; + gather_transitive_predecessors(kv.first,predecessor_map,closed_preds); + kv.second=std::move(closed_preds); + } + // Top-sort so that we get the bottom variables first: is_predecessor_of comp(predecessor_map); std::vector topsorted_vars; @@ -353,12 +373,14 @@ void java_bytecode_convert_methodt::find_initialisers_for_slot( if(merge_into->var.length==0) continue; - std::set merge_vars; - gather_transitive_predecessors(merge_into,predecessor_map,merge_vars); + auto findit=predecessor_map.find(merge_into); // Nothing to do? - if(merge_vars.size()==1) + if(findit==predecessor_map.end()) continue; + const auto& merge_vars=findit->second; + assert(merge_vars.size()>=2); + merge_variable_table_entries(*merge_into,merge_vars,dominator_analysis,status()); } From 69281b188ee44f4717bc9d8dfe17bee8304fab5a Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Thu, 1 Dec 2016 16:12:30 +0000 Subject: [PATCH 5/8] Local variable table: tolerate impossible flows Flows from out of range or from a clashing variable declaration can happen because jsr / ret control flow is imprecisely modelled, so control flow edges are present which are in practice infeasible, hence the compiler generating code which naively appears to change a stack slot's name or type without re-initialising it. Consequently, demote these to warnings, and perhaps in the future do proper control-flow analysis to determine which callsites can really reach which jsr subroutines. --- src/java_bytecode/java_local_variable_table.cpp | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/java_bytecode/java_local_variable_table.cpp b/src/java_bytecode/java_local_variable_table.cpp index 430593bef26..b1ac6ed1a66 100644 --- a/src/java_bytecode/java_local_variable_table.cpp +++ b/src/java_bytecode/java_local_variable_table.cpp @@ -185,7 +185,7 @@ static void populate_predecessor_map( { for(auto pred : amapit->second.predecessors) { - auto pred_var=live_variable_at_address[pred]; + auto pred_var=(pred < live_variable_at_address.size() ? live_variable_at_address[pred] : 0); if(pred_var==&*it) { // Flow from within same live range? @@ -200,8 +200,10 @@ static void populate_predecessor_map( --inst_before_this; if(amapit->first!=it->var.start_pc || inst_before_this->first!=pred) { + // These sorts of infeasible edges can occur because jsr handling is presently vague + // (any subroutine is assumed to be able to return to any callsite) msg.warning() << "Local variable table: ignoring flow from out of range for " << - it->var.name << " " << pred << " -> " << amapit->first << "\n"; + it->var.name << " " << pred << " -> " << amapit->first << messaget::eom; continue; } if(!is_store_to_slot(*(inst_before_this->second.source),it->var.index)) @@ -210,7 +212,13 @@ static void populate_predecessor_map( } else { if(pred_var->var.name!=it->var.name || pred_var->var.signature!=it->var.signature) - throw "Local variable table: flow from variable with clashing name or signature"; + { + // These sorts of infeasible edges can occur because jsr handling is presently vague + // (any subroutine is assumed to be able to return to any callsite) + msg.warning() << "Local variable table: ignoring flow from clashing variable for " << + it->var.name << " " << pred << " -> " << amapit->first << messaget::eom; + continue; + } // OK, this is a flow from a similar but distinct entry in the local var table. predecessor_map[&*it].insert(pred_var); } From 4ed03d7ca426590302de9b104cb36b3a6c49ce68 Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Mon, 5 Dec 2016 16:58:01 +0000 Subject: [PATCH 6/8] Fix hole-in-variable-live-range test --- src/java_bytecode/java_local_variable_table.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/java_bytecode/java_local_variable_table.cpp b/src/java_bytecode/java_local_variable_table.cpp index b1ac6ed1a66..92f4941c915 100644 --- a/src/java_bytecode/java_local_variable_table.cpp +++ b/src/java_bytecode/java_local_variable_table.cpp @@ -522,7 +522,7 @@ java_bytecode_convert_methodt::find_variable_for_slot( { bool found_hole=false; for(auto &hole : var.holes) - if(address>=hole.start_pc && address<(hole.start_pc-hole.length)) + if(address>=hole.start_pc && address<(hole.start_pc+hole.length)) { found_hole=true; break; From 28a8289da0ce72c8251c24c823029765a05f9ddf Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Thu, 19 Jan 2017 11:32:18 +0000 Subject: [PATCH 7/8] Style fixes --- .../java_bytecode_convert_method.cpp | 12 +- .../java_bytecode_convert_method_class.h | 21 +- .../java_local_variable_table.cpp | 284 +++++++++++------- 3 files changed, 196 insertions(+), 121 deletions(-) diff --git a/src/java_bytecode/java_bytecode_convert_method.cpp b/src/java_bytecode/java_bytecode_convert_method.cpp index 2ad56bc9230..854d50ac9ba 100644 --- a/src/java_bytecode/java_bytecode_convert_method.cpp +++ b/src/java_bytecode/java_bytecode_convert_method.cpp @@ -82,7 +82,8 @@ static void cast_if_necessary(binary_relation_exprt &condition) exprt &lhs(condition.lhs()); exprt &rhs(condition.rhs()); const typet &lhs_type(lhs.type()); - if(lhs_type==rhs.type()) return; + if(lhs_type==rhs.type()) + return; rhs=typecast_exprt(rhs, lhs_type); } @@ -137,7 +138,8 @@ const exprt java_bytecode_convert_methodt::variable( } symbol_exprt java_bytecode_convert_methodt::tmp_variable( - const std::string &prefix, const typet &type) + const std::string &prefix, + const typet &type) { irep_idt base_name=prefix+"_tmp"+std::to_string(tmp_vars.size()); irep_idt identifier=id2string(current_method)+"::"+id2string(base_name); @@ -245,9 +247,9 @@ void java_bytecode_convert_methodt::convert( variables[v.index].push_back(variablet()); auto &newv=variables[v.index].back(); - newv.symbol_expr = result; - newv.start_pc = v.start_pc; - newv.length = v.length; + newv.symbol_expr=result; + newv.start_pc=v.start_pc; + newv.length=v.length; } // set up variables array diff --git a/src/java_bytecode/java_bytecode_convert_method_class.h b/src/java_bytecode/java_bytecode_convert_method_class.h index 643b913a71a..8a456d0a99a 100644 --- a/src/java_bytecode/java_bytecode_convert_method_class.h +++ b/src/java_bytecode/java_bytecode_convert_method_class.h @@ -1,6 +1,13 @@ +/*******************************************************************\ -#ifndef JBCM_CLASS_H -#define JBCM_CLASS_H +Module: JAVA Bytecode Language Conversion + +Author: Daniel Kroening, kroening@kroening.com + +\*******************************************************************/ + +#ifndef CPROVER_JAVA_BYTECODE_JAVA_BYTECODE_CONVERT_METHOD_CLASS_H +#define CPROVER_JAVA_BYTECODE_JAVA_BYTECODE_CONVERT_METHOD_CLASS_H #include #include @@ -41,7 +48,8 @@ class java_bytecode_convert_methodt:public messaget typet method_return_type; public: - struct holet { + struct holet + { unsigned start_pc; unsigned length; }; @@ -52,7 +60,8 @@ class java_bytecode_convert_methodt:public messaget std::vector holes; }; - typedef std::vector local_variable_table_with_holest; + typedef std::vector + local_variable_table_with_holest; protected: class variablet @@ -120,10 +129,10 @@ class java_bytecode_convert_methodt:public messaget public: typedef std::map address_mapt; - typedef cfg_dominators_templatet java_cfg_dominatorst; + typedef cfg_dominators_templatet + java_cfg_dominatorst; protected: - void find_initialisers( local_variable_table_with_holest& vars, const address_mapt& amap, diff --git a/src/java_bytecode/java_local_variable_table.cpp b/src/java_bytecode/java_local_variable_table.cpp index 92f4941c915..fa566318d24 100644 --- a/src/java_bytecode/java_local_variable_table.cpp +++ b/src/java_bytecode/java_local_variable_table.cpp @@ -1,8 +1,15 @@ +/*******************************************************************\ + +Module: Java local variable table processing + +Author: Chris Smowton, chris.smowton@diffblue.com + +\*******************************************************************/ #include "java_bytecode_convert_method_class.h" +#include "java_types.h" #include -#include "java_types.h" #include #include @@ -11,8 +18,11 @@ // This must be done at global scope due to template resolution rules. template -struct procedure_local_cfg_baset : - public graph > +struct procedure_local_cfg_baset< + T, + const java_bytecode_convert_methodt::address_mapt, + unsigned> : + public graph > { typedef java_bytecode_convert_methodt::address_mapt address_mapt; typedef std::map entry_mapt; @@ -32,50 +42,66 @@ struct procedure_local_cfg_basetadd_edge(entry_map.at(inst.first),entry_map.at(succ)); + this->add_edge(entry_map.at(inst.first), entry_map.at(succ)); } } - unsigned get_first_node(const address_mapt& amap) const { return amap.begin()->first; } - unsigned get_last_node(const address_mapt& amap) const { return (--amap.end())->first; } - unsigned nodes_empty(const address_mapt& amap) const { return amap.empty(); } - + unsigned get_first_node(const address_mapt& amap) const + { + return amap.begin()->first; + } + unsigned get_last_node(const address_mapt& amap) const + { + return (--amap.end())->first; + } + unsigned nodes_empty(const address_mapt& amap) const + { + return amap.empty(); + } }; -namespace { - -typedef java_bytecode_convert_methodt::holet holet; -typedef java_bytecode_convert_methodt::local_variable_with_holest local_variable_with_holest; -typedef java_bytecode_convert_methodt::local_variable_table_with_holest local_variable_table_with_holest; -typedef java_bytecode_convert_methodt::address_mapt address_mapt; -typedef java_bytecode_convert_methodt::java_cfg_dominatorst java_cfg_dominatorst; +typedef java_bytecode_convert_methodt::holet + holet; +typedef java_bytecode_convert_methodt::local_variable_with_holest + local_variable_with_holest; +typedef java_bytecode_convert_methodt::local_variable_table_with_holest + local_variable_table_with_holest; +typedef java_bytecode_convert_methodt::address_mapt + address_mapt; +typedef java_bytecode_convert_methodt::java_cfg_dominatorst + java_cfg_dominatorst; // Comparators for local variables: -static bool lt_index(const local_variable_with_holest& a, - const local_variable_with_holest& b) +static bool lt_index( + const local_variable_with_holest& a, + const local_variable_with_holest& b) { return a.var.indexvar.start_pcvar.start_pc; } // The predecessor map, and a top-sorting comparator: -typedef std::map > predecessor_mapt; +typedef std::map< + local_variable_with_holest*, + std::set > + predecessor_mapt; -struct is_predecessor_of +struct is_predecessor_oft { const predecessor_mapt& order; - is_predecessor_of(const predecessor_mapt& _order) : order(_order) {} + explicit is_predecessor_oft(const predecessor_mapt& _order) : order(_order) {} - bool operator()(local_variable_with_holest* a, - local_variable_with_holest* b) const + bool operator()( + local_variable_with_holest* a, + local_variable_with_holest* b) const { auto findit=order.find(a); if(findit==order.end()) @@ -97,7 +123,7 @@ static void gather_transitive_predecessors( if(findit==predecessor_map.end()) return; for(const auto pred : findit->second) - gather_transitive_predecessors(pred,predecessor_map,result); + gather_transitive_predecessors(pred, predecessor_map, result); } static bool is_store_to_slot( @@ -105,7 +131,7 @@ static bool is_store_to_slot( unsigned slotidx) { const std::string prevstatement=id2string(inst.statement); - if(!(prevstatement.size()>=1 && prevstatement.substr(1,5)=="store")) + if(!(prevstatement.size()>=1 && prevstatement.substr(1, 5)=="store")) return false; std::string storeslot; @@ -131,7 +157,7 @@ static void maybe_add_hole( { assert(to>=from); if(to!=from) - var.holes.push_back({from,to-from}); + var.holes.push_back({from, to-from}); } static void populate_variable_address_map( @@ -141,7 +167,7 @@ static void populate_variable_address_map( { for(auto it=firstvar, itend=varlimit; it!=itend; ++it) { - if(it->var.start_pc+it->var.length > live_variable_at_address.size()) + if(it->var.start_pc+it->var.length>live_variable_at_address.size()) live_variable_at_address.resize(it->var.start_pc+it->var.length); for(unsigned idx=it->var.start_pc, @@ -152,7 +178,6 @@ static void populate_variable_address_map( live_variable_at_address[idx]=&*it; } } - } static void populate_predecessor_map( @@ -176,16 +201,22 @@ static void populate_predecessor_map( auto old_amapit=amapit; --amapit; if(old_amapit==amap.end()) - assert(end_pc>amapit->first && - "Instruction live range doesn't align to instruction boundary?"); + { + assert( + end_pc>amapit->first && + "Instruction live range doesn't align to instruction boundary?"); + } // Find vartable entries that flow into this one: unsigned new_start_pc=it->var.start_pc; - for(; amapit->first >= it->var.start_pc; --amapit) + for(; amapit->first>=it->var.start_pc; --amapit) { for(auto pred : amapit->second.predecessors) { - auto pred_var=(pred < live_variable_at_address.size() ? live_variable_at_address[pred] : 0); + auto pred_var= + (predfirst!=it->var.start_pc || inst_before_this->first!=pred) { - // These sorts of infeasible edges can occur because jsr handling is presently vague - // (any subroutine is assumed to be able to return to any callsite) - msg.warning() << "Local variable table: ignoring flow from out of range for " << - it->var.name << " " << pred << " -> " << amapit->first << messaget::eom; + // These sorts of infeasible edges can occur because jsr + // handling is presently vague (any subroutine is assumed to + // be able to return to any callsite) + msg.warning() << "Local variable table: ignoring flow from " + << "out of range for " << it->var.name << " " + << pred << " -> " << amapit->first + << messaget::eom; continue; } - if(!is_store_to_slot(*(inst_before_this->second.source),it->var.index)) - throw "Local variable table: didn't find expected initialising store"; + if(!is_store_to_slot( + *(inst_before_this->second.source), + it->var.index)) + { + throw "local variable table: didn't find initialising store"; + } new_start_pc=pred; } - else { - if(pred_var->var.name!=it->var.name || pred_var->var.signature!=it->var.signature) + else + { + if(pred_var->var.name!=it->var.name || + pred_var->var.signature!=it->var.signature) { - // These sorts of infeasible edges can occur because jsr handling is presently vague - // (any subroutine is assumed to be able to return to any callsite) - msg.warning() << "Local variable table: ignoring flow from clashing variable for " << - it->var.name << " " << pred << " -> " << amapit->first << messaget::eom; + // These sorts of infeasible edges can occur because + // jsr handling is presently vague (any subroutine is + // assumed to be able to return to any callsite) + msg.warning() << "Local variable table: ignoring flow from " + << "clashing variable for " + << it->var.name << " " << pred << " -> " + << amapit->first << messaget::eom; continue; } - // OK, this is a flow from a similar but distinct entry in the local var table. + // OK, this is a flow from a similar but + // distinct entry in the local var table. predecessor_map[&*it].insert(pred_var); } } } - // If a simple pre-block initialiser was found, add it to the live range now: + // If a simple pre-block initialiser was found, + // add it to the live range now: it->var.length+=(it->var.start_pc-new_start_pc); it->var.start_pc=new_start_pc; - } - } static unsigned get_common_dominator( @@ -242,25 +285,30 @@ static unsigned get_common_dominator( unsigned first_pc=UINT_MAX; for(auto v : merge_vars) { - if(v->var.start_pc < first_pc) - first_pc = v->var.start_pc; + if(v->var.start_pcvar.start_pc; } std::vector candidate_dominators; for(auto v : merge_vars) { - const auto& dominator_nodeidx=dominator_analysis.cfg.entry_map.at(v->var.start_pc); - const auto& this_var_doms=dominator_analysis.cfg[dominator_nodeidx].dominators; + const auto& dominator_nodeidx= + dominator_analysis.cfg.entry_map.at(v->var.start_pc); + const auto& this_var_doms= + dominator_analysis.cfg[dominator_nodeidx].dominators; for(const auto this_var_dom : this_var_doms) - if(this_var_dom <= first_pc) + if(this_var_dom<=first_pc) candidate_dominators.push_back(this_var_dom); } - std::sort(candidate_dominators.begin(),candidate_dominators.end()); - - // Working from the back, simply find the first PC that occurs merge_vars.size() - // times and therefore dominates all vars we seek to merge: - for(auto domit=candidate_dominators.rbegin(), domitend=candidate_dominators.rend(); - domit != domitend; /* Don't increment here */) + std::sort(candidate_dominators.begin(), candidate_dominators.end()); + + // Working from the back, simply find the first PC + // that occurs merge_vars.size() times and therefore + // dominates all vars we seek to merge: + for(auto domit=candidate_dominators.rbegin(), + domitend=candidate_dominators.rend(); + domit!=domitend; + /* Don't increment here */) { unsigned repeats=0; auto dom=*domit; @@ -274,7 +322,7 @@ static unsigned get_common_dominator( return dom; } - throw "Variable live ranges with no common dominator?"; + throw "variable live ranges with no common dominator?"; } static void populate_live_range_holes( @@ -283,14 +331,20 @@ static void populate_live_range_holes( unsigned expanded_live_range_start) { std::vector sorted_by_startpc( - merge_vars.begin(),merge_vars.end()); - std::sort(sorted_by_startpc.begin(),sorted_by_startpc.end(),lt_startpc); + merge_vars.begin(), merge_vars.end()); + std::sort(sorted_by_startpc.begin(), sorted_by_startpc.end(), lt_startpc); - maybe_add_hole(merge_into,expanded_live_range_start,sorted_by_startpc[0]->var.start_pc); + maybe_add_hole( + merge_into, + expanded_live_range_start, + sorted_by_startpc[0]->var.start_pc); for(unsigned idx=0; idxvar.start_pc + sorted_by_startpc[idx]->var.length, - sorted_by_startpc[idx+1]->var.start_pc); + { + maybe_add_hole( + merge_into, + sorted_by_startpc[idx]->var.start_pc+sorted_by_startpc[idx]->var.length, + sorted_by_startpc[idx+1]->var.start_pc); + } } static void merge_variable_table_entries( @@ -299,30 +353,36 @@ static void merge_variable_table_entries( const java_cfg_dominatorst& dominator_analysis, std::ostream& debug_out) { - // Because we need a lexically-scoped declaration, we must have the merged variable + // Because we need a lexically-scoped declaration, + // we must have the merged variable // enter scope both in a block that dominates all entries, and which // precedes them in program order. unsigned found_dominator= - get_common_dominator(merge_vars,dominator_analysis); + get_common_dominator(merge_vars, dominator_analysis); - // Populate the holes in the live range (addresses where the new variable - // will be in scope, but references to this stack slot should not resolve to it + // Populate the holes in the live range + // (addresses where the new variable will be in scope, + // but references to this stack slot should not resolve to it // as it was not visible in the original local variable table) - populate_live_range_holes(merge_into,merge_vars,found_dominator); + populate_live_range_holes(merge_into, merge_vars, found_dominator); unsigned last_pc=0; for(auto v : merge_vars) - if(v->var.start_pc+v->var.length > last_pc) + { + if(v->var.start_pc+v->var.length>last_pc) last_pc=v->var.start_pc+v->var.length; + } // Apply the changes: merge_into.var.start_pc=found_dominator; merge_into.var.length=last_pc-found_dominator; #ifdef DEBUG - debug_out << "Merged " << merge_vars.size() << " variables named " << merge_into.var.name << - "; new live range " << merge_into.var.start_pc << "-" << - merge_into.var.start_pc + merge_into.var.length << messaget::eom; + debug_out << "Merged " << merge_vars.size() << " variables named " + << merge_into.var.name << "; new live range " + << merge_into.var.start_pc << "-" + << merge_into.var.start_pc + merge_into.var.length + << messaget::eom; #endif // Nuke the now-subsumed var-table entries: @@ -331,20 +391,20 @@ static void merge_variable_table_entries( v->var.length=0; } -} // End anonymous namespace - void java_bytecode_convert_methodt::find_initialisers_for_slot( local_variable_table_with_holest::iterator firstvar, local_variable_table_with_holest::iterator varlimit, const address_mapt& amap, const java_cfg_dominatorst& dominator_analysis) { - // Build a simple map from instruction PC to the variable live in this slot at that PC, - // and a map from each variable to variables that flow into it: + // Build a simple map from instruction PC to the variable + // live in this slot at that PC, and a map from each variable + // to variables that flow into it: std::vector live_variable_at_address; - populate_variable_address_map(firstvar,varlimit,live_variable_at_address); + populate_variable_address_map(firstvar, varlimit, live_variable_at_address); - // Now find variables that flow together by walking backwards to find initialisers + // Now find variables that flow together by + // walking backwards to find initialisers // or branches from other live ranges: predecessor_mapt predecessor_map; populate_predecessor_map( @@ -355,24 +415,24 @@ void java_bytecode_convert_methodt::find_initialisers_for_slot( predecessor_map, get_message_handler()); - // OK, we've established the flows all seem sensible. Now merge vartable entries - // according to the predecessor_map: + // OK, we've established the flows all seem sensible. + // Now merge vartable entries according to the predecessor_map: // Take the transitive closure of the predecessor map: for(auto& kv : predecessor_map) { std::set closed_preds; - gather_transitive_predecessors(kv.first,predecessor_map,closed_preds); + gather_transitive_predecessors(kv.first, predecessor_map, closed_preds); kv.second=std::move(closed_preds); } // Top-sort so that we get the bottom variables first: - is_predecessor_of comp(predecessor_map); + is_predecessor_oft comp(predecessor_map); std::vector topsorted_vars; - for(auto it=firstvar,itend=varlimit; it!=itend; ++it) + for(auto it=firstvar, itend=varlimit; it!=itend; ++it) topsorted_vars.push_back(&*it); - std::sort(topsorted_vars.begin(),topsorted_vars.end(),comp); + std::sort(topsorted_vars.begin(), topsorted_vars.end(), comp); // Now merge the entries: for(auto merge_into : topsorted_vars) @@ -389,9 +449,12 @@ void java_bytecode_convert_methodt::find_initialisers_for_slot( const auto& merge_vars=findit->second; assert(merge_vars.size()>=2); - merge_variable_table_entries(*merge_into,merge_vars,dominator_analysis,status()); + merge_variable_table_entries( + *merge_into, + merge_vars, + dominator_analysis, + status()); } - } static void walk_to_next_index( @@ -418,28 +481,29 @@ void java_bytecode_convert_methodt::find_initialisers( const java_cfg_dominatorst& dominator_analysis) { // Handle one stack slot at a time: - std::sort(vars.begin(),vars.end(),lt_index); + std::sort(vars.begin(), vars.end(), lt_index); auto it1=vars.begin(); auto it2=it1; auto itend=vars.end(); - walk_to_next_index(it1,it2,itend); - for(; it1!=itend; walk_to_next_index(it1,it2,itend)) - find_initialisers_for_slot(it1,it2,amap,dominator_analysis); + walk_to_next_index(it1, it2, itend); + for(; it1!=itend; walk_to_next_index(it1, it2, itend)) + find_initialisers_for_slot(it1, it2, amap, dominator_analysis); } -static void cleanup_var_table(std::vector& vars_with_holes) +static void cleanup_var_table( + std::vector& vars_with_holes) { size_t toremove=0; for(size_t i=0; i<(vars_with_holes.size()-toremove); ++i) { - auto& v=vars_with_holes[i]; + auto &v=vars_with_holes[i]; if(v.var.length==0) { // Move to end; consider the new element we've swapped in: ++toremove; if(i!=vars_with_holes.size()-toremove) // Already where it needs to be? - std::swap(v,vars_with_holes[vars_with_holes.size()-toremove]); + std::swap(v, vars_with_holes[vars_with_holes.size()-toremove]); --i; } } @@ -448,8 +512,9 @@ static void cleanup_var_table(std::vector& vars_with vars_with_holes.resize(vars_with_holes.size()-toremove); } -void java_bytecode_convert_methodt::setup_local_variables(const methodt& m, - const address_mapt& amap) +void java_bytecode_convert_methodt::setup_local_variables( + const methodt& m, + const address_mapt& amap) { // Compute CFG dominator tree java_cfg_dominatorst dominator_analysis; @@ -462,7 +527,7 @@ void java_bytecode_convert_methodt::setup_local_variables(const methodt& m, vars_with_holes.push_back({v, {}}); // Merge variable records: - find_initialisers(vars_with_holes,amap,dominator_analysis); + find_initialisers(vars_with_holes, amap, dominator_analysis); // Clean up removed records from the variable table: cleanup_var_table(vars_with_holes); @@ -488,10 +553,10 @@ void java_bytecode_convert_methodt::setup_local_variables(const methodt& m, result.set(ID_C_base_name, v.var.name); variables[v.var.index].push_back(variablet()); - auto& newv=variables[v.var.index].back(); - newv.symbol_expr = result; - newv.start_pc = v.var.start_pc; - newv.length = v.var.length; + auto &newv=variables[v.var.index].back(); + newv.symbol_expr=result; + newv.start_pc=v.var.start_pc; + newv.length=v.var.length; newv.holes=std::move(v.holes); symbolt new_symbol; @@ -506,7 +571,6 @@ void java_bytecode_convert_methodt::setup_local_variables(const methodt& m, new_symbol.is_lvalue=true; symbol_table.add(new_symbol); } - } const java_bytecode_convert_methodt::variablet & @@ -516,9 +580,9 @@ java_bytecode_convert_methodt::find_variable_for_slot( { for(const variablet &var : var_list) { - size_t start_pc = var.start_pc; - size_t length = var.length; - if (address>=start_pc && address<(start_pc+length)) + size_t start_pc=var.start_pc; + size_t length=var.length; + if(address>=start_pc && address<(start_pc+length)) { bool found_hole=false; for(auto &hole : var.holes) @@ -536,7 +600,7 @@ java_bytecode_convert_methodt::find_variable_for_slot( // with scope from 0 to INT_MAX // as it is at the end of the vector, it will only be taken into account // if no other variable is valid - size_t list_length = var_list.size(); + size_t list_length=var_list.size(); var_list.resize(list_length+1); var_list[list_length].start_pc=0; var_list[list_length].length=std::numeric_limits::max(); From e438bb3c7d639d8b941bcc0328dd9d111f21e051 Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Thu, 19 Jan 2017 12:49:05 +0000 Subject: [PATCH 8/8] Add documentation --- .../java_local_variable_table.cpp | 252 +++++++++++++++++- 1 file changed, 247 insertions(+), 5 deletions(-) diff --git a/src/java_bytecode/java_local_variable_table.cpp b/src/java_bytecode/java_local_variable_table.cpp index fa566318d24..d70fd18264e 100644 --- a/src/java_bytecode/java_local_variable_table.cpp +++ b/src/java_bytecode/java_local_variable_table.cpp @@ -60,6 +60,7 @@ struct procedure_local_cfg_baset< } }; +// Grab some class typdefs for brevity: typedef java_bytecode_convert_methodt::holet holet; typedef java_bytecode_convert_methodt::local_variable_with_holest @@ -112,6 +113,20 @@ struct is_predecessor_oft // Helper routines for the find-initialisers code below: +/*******************************************************************\ + +Function: gather_transitive_predecessors + + Inputs: `start`: Variable to find the predecessors of + `predecessor_map`: Map from local variables + to sets of predecessors + Outputs: `result`: populated with all transitive predecessors of + `start` found in `predecessor_map` + + Purpose: See above + +\*******************************************************************/ + static void gather_transitive_predecessors( local_variable_with_holest* start, const predecessor_mapt& predecessor_map, @@ -126,6 +141,20 @@ static void gather_transitive_predecessors( gather_transitive_predecessors(pred, predecessor_map, result); } +/*******************************************************************\ + +Function: is_store_to_slot + + Inputs: `inst`: Java bytecode instruction + `slotidx`: local variable slot number + + Outputs: Returns true if `inst` is any form of store instruction + targeting slot `slotidx` + + Purpose: See above + +\*******************************************************************/ + static bool is_store_to_slot( const java_bytecode_convert_methodt::instructiont& inst, unsigned slotidx) @@ -137,11 +166,13 @@ static bool is_store_to_slot( std::string storeslot; if(inst.args.size()==1) { + // Store with an argument: const auto& arg=inst.args[0]; storeslot=id2string(to_constant_expr(arg).get_value()); } else { + // Store shorthands, like "store_0", "store_1" assert(prevstatement[6]=='_' && prevstatement.size()==8); storeslot=prevstatement[7]; assert(isdigit(storeslot[0])); @@ -150,6 +181,19 @@ static bool is_store_to_slot( return storeslotidx==slotidx; } +/*******************************************************************\ + +Function: maybe_add_hole + + Inputs: `from`, `to`: bounds of a gap in `var`'s live range, + inclusive and exclusive respectively + + Outputs: Adds a hole to `var`, unless it would be of zero size. + + Purpose: See above + +\*******************************************************************/ + static void maybe_add_hole( local_variable_with_holest& var, unsigned from, @@ -160,6 +204,22 @@ static void maybe_add_hole( var.holes.push_back({from, to-from}); } +/*******************************************************************\ + +Function: populate_variable_address_map + + Inputs: `firstvar`-`varlimit`: range of local variable table + entries to consider + + Outputs: `live_variable_at_address` is populated with a sequence of + local variable table entry pointers, such that + `live_variable_at_address[addr]` yields the unique table + entry covering that address. Asserts if entries overlap. + + Purpose: See above + +\*******************************************************************/ + static void populate_variable_address_map( local_variable_table_with_holest::iterator firstvar, local_variable_table_with_holest::iterator varlimit, @@ -172,7 +232,8 @@ static void populate_variable_address_map( for(unsigned idx=it->var.start_pc, idxlim=it->var.start_pc+it->var.length; - idx!=idxlim; ++idx) + idx!=idxlim; + ++idx) { assert((!live_variable_at_address[idx]) && "Local variable table clash?"); live_variable_at_address[idx]=&*it; @@ -180,6 +241,32 @@ static void populate_variable_address_map( } } +/*******************************************************************\ + +Function: populate_predecessor_map + + Inputs: `firstvar`-`varlimit`: range of local variable table + entries to consider + `live_variable_at_address`: map from bytecode address + to table entry (drawn from firstvar-varlimit) live + at that address + `amap`: map from bytecode address to instructions + + Outputs: Populates `predecessor_map` with a graph from local variable + table entries to their predecessors (table entries which + may flow together and thus may be considered the same live + range). + + Purpose: Usually a live variable range begins with a store + instruction initialising the relevant local variable slot, + but instead of or in addition to this, control flow edges + may exist from bytecode addresses that fall under a + table entry which differs, but which has the same variable + name and type descriptor. This indicates a split live + range, and will be recorded in the predecessor map. + +\*******************************************************************/ + static void populate_predecessor_map( local_variable_table_with_holest::iterator firstvar, local_variable_table_with_holest::iterator varlimit, @@ -198,6 +285,7 @@ static void populate_predecessor_map( // Find the last instruction within the live range: unsigned end_pc=it->var.start_pc+it->var.length; auto amapit=amap.find(end_pc); + assert(amapit!=amap.begin()); auto old_amapit=amapit; --amapit; if(old_amapit==amap.end()) @@ -228,6 +316,7 @@ static void populate_predecessor_map( // Check if this is an initialiser, and if so expand the live range // to include it, but don't check its predecessors: auto inst_before_this=amapit; + assert(inst_before_this!=amap.begin()); --inst_before_this; if(amapit->first!=it->var.start_pc || inst_before_this->first!=pred) { @@ -276,11 +365,29 @@ static void populate_predecessor_map( } } +/*******************************************************************\ + +Function: get_common_dominator + + Inputs: `merge_vars`: Set of variables we want the common dominator + for. + `dominator_analysis`: Already-initialised dominator tree + + Outputs: Returns the bytecode address of the closest common + dominator of all given variable table entries. + In the worst case the function entry point should always + satisfy this criterion. + + Purpose: Used to find out where to put a variable declaration that + subsumes several variable live ranges. + +\*******************************************************************/ + static unsigned get_common_dominator( const std::set& merge_vars, const java_cfg_dominatorst& dominator_analysis) { - assert(merge_vars.size()!=0); + assert(!merge_vars.empty()); unsigned first_pc=UINT_MAX; for(auto v : merge_vars) @@ -325,6 +432,24 @@ static unsigned get_common_dominator( throw "variable live ranges with no common dominator?"; } +/*******************************************************************\ + +Function: populate_live_range_holes + + Inputs: `merge_vars`: a set of 2+ variable table entries to merge + `expanded_live_range_start`: address where the merged + variable will be declared + + Outputs: Adds holes to `merge_into`, indicating where gaps in the + variable's live range fall. For example, if the + declaration happens at address 10 and the entries in + `merge_into` have live ranges [(20-30), (40-50)] then + holes will be added at (10-20) and (30-40). + + Purpose: See above + +\*******************************************************************/ + static void populate_live_range_holes( local_variable_with_holest& merge_into, const std::set& merge_vars, @@ -347,6 +472,22 @@ static void populate_live_range_holes( } } +/*******************************************************************\ + +Function: merge_variable_table_entries + + Inputs: `merge_vars`: a set of 2+ variable table entries to merge + `dominator_analysis`: already-calculated dominator tree + + Outputs: Populates `merge_into` as a combined variable table entry, + with live range holes if the `merge_vars` entries do not + cover a contiguous address range, beginning the combined + live range at the common dominator of all `merge_vars`. + + Purpose: See above + +\*******************************************************************/ + static void merge_variable_table_entries( local_variable_with_holest& merge_into, const std::set& merge_vars, @@ -391,6 +532,28 @@ static void merge_variable_table_entries( v->var.length=0; } +/*******************************************************************\ + +Function: find_initialisers_for_slot + + Inputs: `firstvar`-`varlimit`: sequence of variable table entries, + all of which should concern the same slot index. + `amap`: Map from bytecode address to instruction + + Outputs: Side-effect: merges variable table entries which flow into + one another (e.g. there are branches from one live range + to another without re-initialising the local slot). + + Purpose: Given a sequence of users of the same local variable slot, + this figures out which ones are related by control flow, + and combines them into a single entry with holes, such that + after combination we can create a single + declaration per variable table entry, + placed at the live range's start address, which may + be moved back so that the declaration dominates all uses. + +\*******************************************************************/ + void java_bytecode_convert_methodt::find_initialisers_for_slot( local_variable_table_with_holest::iterator firstvar, local_variable_table_with_holest::iterator varlimit, @@ -457,6 +620,23 @@ void java_bytecode_convert_methodt::find_initialisers_for_slot( } } +/*******************************************************************\ + +Function: walk_to_next_index + + Inputs: `it1` and `it2`, which are iterators into the same vector, + of which `itend` is the end() iterator. + + Outputs: Moves `it1` and `it2` to delimit a sequence of variable + table entries with slot index equal to it2->var.index + on entering this function, or to both equal itend if + it2==itend on entry. + + Purpose: Walk a vector, a contiguous block of entries with equal + slot index at a time. + +\*******************************************************************/ + static void walk_to_next_index( local_variable_table_with_holest::iterator& it1, local_variable_table_with_holest::iterator& it2, @@ -475,14 +655,31 @@ static void walk_to_next_index( it1=old_it2; } +/*******************************************************************\ + +Function: find_initialisers + + Inputs: `vars`: Local variable table + `amap`: Map from bytecode index to instruction + `dominator_analysis`: Already computed dominator tree for + the Java function described by `amap` + + Outputs: Combines entries in `vars` which flow together + + Purpose: See `find_initialisers_for_slot` above for more detail. + +\*******************************************************************/ + void java_bytecode_convert_methodt::find_initialisers( local_variable_table_with_holest& vars, const address_mapt& amap, const java_cfg_dominatorst& dominator_analysis) { - // Handle one stack slot at a time: + // Sort table entries by local slot index: std::sort(vars.begin(), vars.end(), lt_index); + // For each block of entries using a particular index, + // try to combine them: auto it1=vars.begin(); auto it2=it1; auto itend=vars.end(); @@ -491,6 +688,18 @@ void java_bytecode_convert_methodt::find_initialisers( find_initialisers_for_slot(it1, it2, amap, dominator_analysis); } +/*******************************************************************\ + +Function: cleanup_var_table + + Inputs: `vars_with_holes`: variable table + + Outputs: Removes zero-size entries from `vars_with_holes` + + Purpose: See above + +\*******************************************************************/ + static void cleanup_var_table( std::vector& vars_with_holes) { @@ -504,7 +713,8 @@ static void cleanup_var_table( ++toremove; if(i!=vars_with_holes.size()-toremove) // Already where it needs to be? std::swap(v, vars_with_holes[vars_with_holes.size()-toremove]); - --i; + --i; // May make i (size_t)-1, but it will immediately be + // re-incremented as the loop iterates. } } @@ -512,6 +722,22 @@ static void cleanup_var_table( vars_with_holes.resize(vars_with_holes.size()-toremove); } +/*******************************************************************\ + +Function: setup_local_variables + + Inputs: `m`: Java method + `amap`: Map from bytecode indices to instructions in `m` + + Outputs: Populates `this->vars_with_holes` equal to + `this->local_variable_table`, only with variable table + entries that flow together combined. + Also symbol-table registers all locals. + + Purpose: See `find_initialisers_for_slot` above for more detail. + +\*******************************************************************/ + void java_bytecode_convert_methodt::setup_local_variables( const methodt& m, const address_mapt& amap) @@ -573,6 +799,22 @@ void java_bytecode_convert_methodt::setup_local_variables( } } +/*******************************************************************\ + +Function: find_variable_for_slot + + Inputs: `address`: Address to find a variable table entry for + `var_list`: List of candidates that use the slot we're + interested in + + Outputs: Returns the list entry covering this address (taking live + range holes into account), or creates/returns an anonymous + variable entry if nothing covers `address`. + + Purpose: See above + +\*******************************************************************/ + const java_bytecode_convert_methodt::variablet & java_bytecode_convert_methodt::find_variable_for_slot( size_t address, @@ -597,7 +839,7 @@ java_bytecode_convert_methodt::find_variable_for_slot( } } // add unnamed local variable to end of list at this index - // with scope from 0 to INT_MAX + // with scope from 0 to SIZE_T_MAX // as it is at the end of the vector, it will only be taken into account // if no other variable is valid size_t list_length=var_list.size();