diff --git a/src/java_bytecode/CMakeLists.txt b/src/java_bytecode/CMakeLists.txt index 60abcd475f0..2adca01e36e 100644 --- a/src/java_bytecode/CMakeLists.txt +++ b/src/java_bytecode/CMakeLists.txt @@ -7,7 +7,6 @@ add_subdirectory(library) add_library(java_bytecode ${sources} ${headers} ) add_dependencies(java_bytecode core_models_files) -target_sources(java_bytecode PUBLIC ${sources} ${headers}) generic_includes(java_bytecode) diff --git a/src/java_bytecode/Makefile b/src/java_bytecode/Makefile index 1ff17b8e587..bcf7fd1e5b2 100644 --- a/src/java_bytecode/Makefile +++ b/src/java_bytecode/Makefile @@ -23,6 +23,7 @@ SRC = bytecode_info.cpp \ java_object_factory.cpp \ java_pointer_casts.cpp \ java_root_class.cpp \ + java_static_initializers.cpp \ java_string_library_preprocess.cpp \ java_string_literals.cpp \ java_types.cpp \ diff --git a/src/java_bytecode/ci_lazy_methods.cpp b/src/java_bytecode/ci_lazy_methods.cpp index 7ebdb84221e..8ce9b93dd35 100644 --- a/src/java_bytecode/ci_lazy_methods.cpp +++ b/src/java_bytecode/ci_lazy_methods.cpp @@ -111,9 +111,9 @@ bool ci_lazy_methodst::operator()( std::set needed_classes; { - std::vector needed_clinits; + std::vector initial_needed_methods; ci_lazy_methods_neededt initial_lazy_methods( - needed_clinits, + initial_needed_methods, needed_classes, symbol_table); initialize_needed_classes( @@ -122,8 +122,8 @@ bool ci_lazy_methodst::operator()( initial_lazy_methods); method_worklist2.insert( method_worklist2.end(), - needed_clinits.begin(), - needed_clinits.end()); + initial_needed_methods.begin(), + initial_needed_methods.end()); } std::set methods_already_populated; diff --git a/src/java_bytecode/ci_lazy_methods_needed.cpp b/src/java_bytecode/ci_lazy_methods_needed.cpp index ee20b8a1d17..4c6dfb0239d 100644 --- a/src/java_bytecode/ci_lazy_methods_needed.cpp +++ b/src/java_bytecode/ci_lazy_methods_needed.cpp @@ -34,14 +34,12 @@ bool ci_lazy_methods_neededt::add_needed_class( { if(!needed_classes.insert(class_symbol_name).second) return false; - const std::string &class_name_string = id2string(class_symbol_name); - const irep_idt clinit_name(class_name_string + ".:()V"); - if(symbol_table.symbols.count(clinit_name)) - add_needed_method(clinit_name); + const std::string &class_name_string = id2string(class_symbol_name); const irep_idt cprover_validate( class_name_string + ".cproverNondetInitialize:()V"); if(symbol_table.symbols.count(cprover_validate)) add_needed_method(cprover_validate); + return true; } diff --git a/src/java_bytecode/java_bytecode_convert_method.cpp b/src/java_bytecode/java_bytecode_convert_method.cpp index c1ee3a0bc3b..b834efc4d5c 100644 --- a/src/java_bytecode/java_bytecode_convert_method.cpp +++ b/src/java_bytecode/java_bytecode_convert_method.cpp @@ -16,6 +16,7 @@ Author: Daniel Kroening, kroening@kroening.com #include "java_bytecode_convert_method.h" #include "java_bytecode_convert_method_class.h" #include "bytecode_info.h" +#include "java_static_initializers.h" #include "java_string_library_preprocess.h" #include "java_types.h" #include "java_utils.h" @@ -930,156 +931,6 @@ void java_bytecode_convert_methodt::check_static_field_stub( } } -/// Determine whether a `new` or static access against `classname` should be -/// prefixed with a static initialization check. -/// \param classname: Class name -/// \return Returns true if the given class or one of its parents has a static -/// initializer -bool java_bytecode_convert_methodt::class_needs_clinit( - const irep_idt &classname) -{ - auto findit_any=any_superclass_has_clinit_method.insert({classname, false}); - if(!findit_any.second) - return findit_any.first->second; - - auto findit_here=class_has_clinit_method.insert({classname, false}); - if(findit_here.second) - { - const irep_idt &clinit_name=id2string(classname)+".:()V"; - findit_here.first->second=symbol_table.symbols.count(clinit_name); - } - if(findit_here.first->second) - { - findit_any.first->second=true; - return true; - } - const auto maybe_symbol=symbol_table.lookup(classname); - // Stub class? - if(!maybe_symbol) - { - warning() << "SKIPPED: " << classname << eom; - return false; - } - const symbolt &class_symbol=*maybe_symbol; - for(const auto &base : to_class_type(class_symbol.type).bases()) - { - if(class_needs_clinit(to_symbol_type(base.type()).get_identifier())) - { - findit_any.first->second=true; - return true; - } - } - return false; -} - -/// Create a ::clinit_wrapper the first time a static initializer might be -/// called. The wrapper method checks whether static init has already taken -/// place, calls the actual method if not, and initializes super- -/// classes and interfaces. -/// \param classname: Class name -/// \return Returns a symbol_exprt pointing to the given class' clinit wrapper -/// if one is required, or nil otherwise. -exprt java_bytecode_convert_methodt::get_or_create_clinit_wrapper( - const irep_idt &classname) -{ - if(!class_needs_clinit(classname)) - return static_cast(get_nil_irep()); - - // if the symbol table already contains the clinit_wrapper() function, return - // it - const irep_idt &clinit_wrapper_name= - id2string(classname)+"::clinit_wrapper"; - auto findit=symbol_table.symbols.find(clinit_wrapper_name); - if(findit!=symbol_table.symbols.end()) - return findit->second.symbol_expr(); - - // Otherwise, assume that class C extends class C' and implements interfaces - // I1, ..., In. We now create the following function (possibly recursively - // creating the clinit_wrapper functions for C' and I1, ..., In): - // - // java::C::clinit_wrapper() - // { - // if (java::C::clinit_already_run == false) - // { - // java::C::clinit_already_run = true; // before recursive calls! - // - // java::C'::clinit_wrapper(); - // java::I1::clinit_wrapper(); - // java::I2::clinit_wrapper(); - // // ... - // java::In::clinit_wrapper(); - // - // java::C::(); - // } - // } - const irep_idt &already_run_name= - id2string(classname)+"::clinit_already_run"; - symbolt already_run_symbol; - already_run_symbol.name=already_run_name; - already_run_symbol.pretty_name=already_run_name; - already_run_symbol.base_name="clinit_already_run"; - already_run_symbol.type=bool_typet(); - already_run_symbol.value=false_exprt(); - already_run_symbol.is_lvalue=true; - already_run_symbol.is_state_var=true; - already_run_symbol.is_static_lifetime=true; - already_run_symbol.mode=ID_java; - symbol_table.add(already_run_symbol); - - equal_exprt check_already_run( - already_run_symbol.symbol_expr(), - false_exprt()); - - // the entire body of the function is an if-then-else - code_ifthenelset wrapper_body; - - // add the condition to the if - wrapper_body.cond()=check_already_run; - - // add the "already-run = false" statement - code_blockt init_body; - code_assignt set_already_run(already_run_symbol.symbol_expr(), true_exprt()); - init_body.move_to_operands(set_already_run); - - // iterate through the base types and add recursive calls to the - // clinit_wrapper() - const symbolt &class_symbol=*symbol_table.lookup(classname); - for(const auto &base : to_class_type(class_symbol.type).bases()) - { - const auto base_name=to_symbol_type(base.type()).get_identifier(); - exprt base_init_routine=get_or_create_clinit_wrapper(base_name); - if(base_init_routine.is_nil()) - continue; - code_function_callt call_base; - call_base.function()=base_init_routine; - init_body.move_to_operands(call_base); - } - - // call java::C::(), if the class has one static initializer - const irep_idt &real_clinit_name=id2string(classname)+".:()V"; - auto find_sym_it=symbol_table.symbols.find(real_clinit_name); - if(find_sym_it!=symbol_table.symbols.end()) - { - code_function_callt call_real_init; - call_real_init.function()=find_sym_it->second.symbol_expr(); - init_body.move_to_operands(call_real_init); - } - wrapper_body.then_case()=init_body; - - // insert symbol in the symbol table - symbolt wrapper_method_symbol; - code_typet wrapper_method_type; - wrapper_method_type.return_type()=void_typet(); - wrapper_method_symbol.name=clinit_wrapper_name; - wrapper_method_symbol.pretty_name=clinit_wrapper_name; - wrapper_method_symbol.base_name="clinit_wrapper"; - wrapper_method_symbol.type=wrapper_method_type; - wrapper_method_symbol.value=wrapper_body; - wrapper_method_symbol.mode=ID_java; - symbol_table.add(wrapper_method_symbol); - return wrapper_method_symbol.symbol_expr(); -} - /// Each static access to classname should be prefixed with a check for /// necessary static init; this returns a call implementing that check. /// \param classname: Class name @@ -1088,12 +939,17 @@ exprt java_bytecode_convert_methodt::get_or_create_clinit_wrapper( codet java_bytecode_convert_methodt::get_clinit_call( const irep_idt &classname) { - exprt callee=get_or_create_clinit_wrapper(classname); - if(callee.is_nil()) + auto findit = symbol_table.symbols.find(clinit_wrapper_name(classname)); + if(findit == symbol_table.symbols.end()) return code_skipt(); - code_function_callt ret; - ret.function()=callee; - return ret; + else + { + code_function_callt ret; + ret.function() = findit->second.symbol_expr(); + if(needed_lazy_methods) + needed_lazy_methods->add_needed_method(findit->second.name); + return ret; + } } static unsigned get_bytecode_type_width(const typet &ty) diff --git a/src/java_bytecode/java_bytecode_language.cpp b/src/java_bytecode/java_bytecode_language.cpp index 9475860a00a..0400d10ed08 100644 --- a/src/java_bytecode/java_bytecode_language.cpp +++ b/src/java_bytecode/java_bytecode_language.cpp @@ -31,6 +31,7 @@ Author: Daniel Kroening, kroening@kroening.com #include "java_bytecode_parser.h" #include "java_class_loader.h" #include "java_string_literals.h" +#include "java_static_initializers.h" #include "java_utils.h" #include #include @@ -476,6 +477,11 @@ bool java_bytecode_languaget::typecheck( << " String or Class constant symbols" << messaget::eom; + // For each class that will require a static initializer wrapper, create a + // function named package.classname::clinit_wrapper, and a corresponding + // global tracking whether it has run or not: + create_static_initializer_wrappers(symbol_table); + // Now incrementally elaborate methods // that are reachable from this entry point. if(lazy_methods_mode==LAZY_METHODS_MODE_CONTEXT_INSENSITIVE) @@ -488,6 +494,25 @@ bool java_bytecode_languaget::typecheck( { journalling_symbol_tablet journalling_symbol_table = journalling_symbol_tablet::wrap(symbol_table); + + { + // Convert all static initialisers: + std::vector static_initialisers; + + // Careful not to add symbols while iterating over the symbol table! + for(const auto &symbol : symbol_table.symbols) + { + if(is_clinit_wrapper_function(symbol.second.name)) + static_initialisers.push_back(symbol.second.name); + } + + for(const auto &static_init_wrapper_name : static_initialisers) + { + convert_single_method( + static_init_wrapper_name, journalling_symbol_table); + } + } + // Convert all methods for which we have bytecode now for(const auto &method_sig : method_bytecode) { @@ -633,6 +658,39 @@ void java_bytecode_languaget::convert_lazy_method( symbol_table, get_message_handler(), string_refinement_enabled); } +/// Notify ci_lazy_methods, if present, of any static function calls made by +/// the given function body. +/// \param function_body: function body code +/// \param needed_lazy_methods: optional ci_lazy_method_neededt interface. If +/// not set, this is a no-op; otherwise, its add_needed_method function will +/// be called for each function call in `function_body`. +static void notify_static_method_calls( + const exprt &function_body, + optionalt needed_lazy_methods) +{ + if(needed_lazy_methods) + { + for(const_depth_iteratort it = function_body.depth_cbegin(); + it != function_body.depth_cend(); + ++it) + { + if(it->id() == ID_code) + { + const auto fn_call = expr_try_dynamic_cast(*it); + if(!fn_call) + continue; + // Only support non-virtual function calls for now, if string solver + // starts to introduce virtual function calls then we will need to + // duplicate the behavior of java_bytecode_convert_method where it + // handles the invokevirtual instruction + const symbol_exprt &fn_sym = + expr_dynamic_cast(fn_call->function()); + needed_lazy_methods->add_needed_method(fn_sym.get_identifier()); + } + } + } +} + /// \brief Convert a method (one whose type is known but whose body hasn't /// been converted) but don't run typecheck, etc /// \remarks Amends the symbol table entry for function `function_id`, which @@ -673,32 +731,21 @@ bool java_bytecode_languaget::convert_single_method( generated_code.is_not_nil(), "Couldn't retrieve code for string method"); // String solver can make calls to functions that haven't yet been seen. // Add these to the needed_lazy_methods collection - if(needed_lazy_methods) - { - for(const_depth_iteratort it = generated_code.depth_cbegin(); - it != generated_code.depth_cend(); - ++it) - { - if(it->id() == ID_code) - { - const auto fn_call = expr_try_dynamic_cast(*it); - if(!fn_call) - continue; - // Only support non-virtual function calls for now, if string solver - // starts to introduce virtual function calls then we will need to - // duplicate the behavior of java_bytecode_convert_method where it - // handles the invokevirtual instruction - const symbol_exprt &fn_sym = - expr_dynamic_cast(fn_call->function()); - needed_lazy_methods->add_needed_method(fn_sym.get_identifier()); - } - } - } + notify_static_method_calls(generated_code, needed_lazy_methods); symbol.value = generated_code; return false; } + else if(is_clinit_wrapper_function(function_id)) + { + symbolt &symbol = symbol_table.get_writeable_ref(function_id); + symbol.value = get_clinit_wrapper_body(function_id, symbol_table); + // Notify lazy methods of other static init functions called: + notify_static_method_calls(symbol.value, needed_lazy_methods); + return false; + } - // No string solver implementation, check if have bytecode for it + // No string solver or static init wrapper implementation; + // check if have bytecode for it if(cmb) { java_bytecode_convert_method( diff --git a/src/java_bytecode/java_static_initializers.cpp b/src/java_bytecode/java_static_initializers.cpp new file mode 100644 index 00000000000..e5739c1de09 --- /dev/null +++ b/src/java_bytecode/java_static_initializers.cpp @@ -0,0 +1,223 @@ +/*******************************************************************\ + +Module: Java Static Initializers + +Author: Chris Smowton, chris.smowton@diffblue.com + +\*******************************************************************/ + +#include "java_static_initializers.h" +#include +#include +#include +#include +#include + +// Disable linter here to allow a std::string constant, since that holds +// a length, whereas a cstr would require strlen every time. +const std::string clinit_wrapper_suffix = "::clinit_wrapper"; // NOLINT(*) + +/// Get the Java static initializer wrapper name for a given class (the wrapper +/// checks if static initialization has already been done before invoking the +/// real static initializer if not). +/// Doesn't check whether the symbol actually exists +/// \param class_name: class symbol name +/// \return static initializer wrapper name +irep_idt clinit_wrapper_name(const irep_idt &class_name) +{ + return id2string(class_name) + clinit_wrapper_suffix; +} + +/// Get the class name from a clinit wrapper name. This is the opposite of +/// `clinit_wrapper_name`. +/// \param wrapper_name: clinit wrapper name +/// \return class name +static irep_idt clinit_wrapper_name_to_class_name(const irep_idt &wrapper_name) +{ + const std::string &wrapper_str = id2string(wrapper_name); + PRECONDITION(wrapper_str.size() > clinit_wrapper_suffix.size()); + return wrapper_str.substr( + 0, wrapper_str.size() - clinit_wrapper_suffix.size()); +} + +/// Check if function_id is a clinit wrapper +/// \param function_id: some function identifier +/// \return true if the passed identifier is a clinit wrapper +bool is_clinit_wrapper_function(const irep_idt &function_id) +{ + return has_suffix(id2string(function_id), clinit_wrapper_suffix); +} + +/// Get name of the static-initialization-already-done global variable for a +/// given class. +/// \param class_name: class symbol name +/// \return static initializer wrapper-already run global name +static irep_idt clinit_already_run_variable_name(const irep_idt &class_name) +{ + return id2string(class_name) + "::clinit_already_run"; +} + +/// Get name of the real static initializer for a given class. Doesn't check +/// if a static initializer actually exists. +/// \param class_name: class symbol name +/// \return Static initializer symbol name +static irep_idt clinit_function_name(const irep_idt &class_name) +{ + return id2string(class_name) + ".:()V"; +} + +/// Checks whether a static initializer wrapper is needed for a given class, +/// i.e. if the given class or any superclass has a static initializer. +/// \param class_name: class symbol name +/// \param symbol_table: global symbol table +/// \return true if a static initializer wrapper is needed +static bool needs_clinit_wrapper( + const irep_idt &class_name, const symbol_tablet &symbol_table) +{ + if(symbol_table.has_symbol(clinit_function_name(class_name))) + return true; + + const class_typet &class_type = + to_class_type(symbol_table.lookup_ref(class_name).type); + + for(const class_typet::baset &base : class_type.bases()) + { + if(symbol_table.has_symbol( + clinit_wrapper_name(to_symbol_type(base.type()).get_identifier()))) + { + return true; + } + } + + return false; +} + +/// Creates a static initializer wrapper symbol for the given class, along with +/// a global boolean that tracks if it has been run already. +/// \param class_name: class symbol name +/// \param symbol_table: global symbol table; will gain a clinit_wrapper symbol +/// and a corresponding has-run-already global. +static void create_clinit_wrapper_symbols( + const irep_idt &class_name, symbol_tablet &symbol_table) +{ + const irep_idt &already_run_name = + clinit_already_run_variable_name(class_name); + symbolt already_run_symbol; + already_run_symbol.name = already_run_name; + already_run_symbol.pretty_name = already_run_name; + already_run_symbol.base_name = "clinit_already_run"; + already_run_symbol.type = bool_typet(); + already_run_symbol.value = false_exprt(); + already_run_symbol.is_lvalue = true; + already_run_symbol.is_state_var = true; + already_run_symbol.is_static_lifetime = true; + already_run_symbol.mode = ID_java; + bool failed = symbol_table.add(already_run_symbol); + INVARIANT(!failed, "clinit-already-run symbol should be fresh"); + + symbolt wrapper_method_symbol; + code_typet wrapper_method_type; + wrapper_method_type.return_type() = void_typet(); + wrapper_method_symbol.name = clinit_wrapper_name(class_name); + wrapper_method_symbol.pretty_name = wrapper_method_symbol.name; + wrapper_method_symbol.base_name = "clinit_wrapper"; + wrapper_method_symbol.type = wrapper_method_type; + wrapper_method_symbol.mode = ID_java; + failed = symbol_table.add(wrapper_method_symbol); + INVARIANT(!failed, "clinit-wrapper symbol should be fresh"); +} + +/// Produces the static initialiser wrapper body for the given function. +/// \param function_id: clinit wrapper function id (the wrapper_method_symbol +/// name created by `create_clinit_wrapper_symbols`) +/// \param symbol_table: global symbol table +/// \return the body of the static initialiser wrapper +codet get_clinit_wrapper_body( + const irep_idt &function_id, const symbol_tablet &symbol_table) +{ + // Assume that class C extends class C' and implements interfaces + // I1, ..., In. We now create the following function (possibly recursively + // creating the clinit_wrapper functions for C' and I1, ..., In): + // + // java::C::clinit_wrapper() + // { + // if (java::C::clinit_already_run == false) + // { + // java::C::clinit_already_run = true; // before recursive calls! + // + // java::C'::clinit_wrapper(); + // java::I1::clinit_wrapper(); + // java::I2::clinit_wrapper(); + // // ... + // java::In::clinit_wrapper(); + // + // java::C::(); + // } + // } + irep_idt class_name = clinit_wrapper_name_to_class_name(function_id); + const symbolt &already_run_symbol = + symbol_table.lookup_ref(clinit_already_run_variable_name(class_name)); + + equal_exprt check_already_run( + already_run_symbol.symbol_expr(), + false_exprt()); + + // the entire body of the function is an if-then-else + code_ifthenelset wrapper_body; + + // add the condition to the if + wrapper_body.cond()=check_already_run; + + // add the "already-run = false" statement + code_blockt init_body; + code_assignt set_already_run(already_run_symbol.symbol_expr(), true_exprt()); + init_body.move_to_operands(set_already_run); + + // iterate through the base types and add recursive calls to the + // clinit_wrapper() + const symbolt &class_symbol = symbol_table.lookup_ref(class_name); + for(const auto &base : to_class_type(class_symbol.type).bases()) + { + const auto base_name = to_symbol_type(base.type()).get_identifier(); + irep_idt base_init_routine = clinit_wrapper_name(base_name); + auto findit = symbol_table.symbols.find(base_init_routine); + if(findit == symbol_table.symbols.end()) + continue; + code_function_callt call_base; + call_base.function() = findit->second.symbol_expr(); + init_body.move_to_operands(call_base); + } + + // call java::C::(), if the class has one static initializer + const irep_idt &real_clinit_name = clinit_function_name(class_name); + auto find_sym_it = symbol_table.symbols.find(real_clinit_name); + if(find_sym_it!=symbol_table.symbols.end()) + { + code_function_callt call_real_init; + call_real_init.function()=find_sym_it->second.symbol_expr(); + init_body.move_to_operands(call_real_init); + } + wrapper_body.then_case()=init_body; + + return wrapper_body; +} + +/// Create static initializer wrappers for all classes that need them. +/// \param symbol_table: global symbol table +void create_static_initializer_wrappers(symbol_tablet &symbol_table) +{ + // Top-sort the class hierarchy, such that we visit parents before children, + // and can so identify parents that need static initialisation by whether we + // have made them a clinit_wrapper method. + class_hierarchy_grapht class_graph; + class_graph.populate(symbol_table); + std::list topsorted_nodes = + class_graph.topsort(); + + for(const auto node : topsorted_nodes) + { + const irep_idt &class_identifier = class_graph[node].class_identifier; + if(needs_clinit_wrapper(class_identifier, symbol_table)) + create_clinit_wrapper_symbols(class_identifier, symbol_table); + } +} diff --git a/src/java_bytecode/java_static_initializers.h b/src/java_bytecode/java_static_initializers.h new file mode 100644 index 00000000000..12f537d0c90 --- /dev/null +++ b/src/java_bytecode/java_static_initializers.h @@ -0,0 +1,24 @@ +/*******************************************************************\ + +Module: Java Static Initializers + +Author: Chris Smowton, chris.smowton@diffblue.com + +\*******************************************************************/ + +#ifndef CPROVER_JAVA_BYTECODE_JAVA_STATIC_INITIALIZERS_H +#define CPROVER_JAVA_BYTECODE_JAVA_STATIC_INITIALIZERS_H + +#include +#include + +irep_idt clinit_wrapper_name(const irep_idt &class_name); + +bool is_clinit_wrapper_function(const irep_idt &function_id); + +void create_static_initializer_wrappers(symbol_tablet &symbol_table); + +codet get_clinit_wrapper_body( + const irep_idt &function_id, const symbol_tablet &symbol_table); + +#endif