From c69b00de70fb8d80c6323aa36a40fe0971f2823b Mon Sep 17 00:00:00 2001 From: Nathan Phillips Date: Wed, 29 Nov 2017 11:13:50 +0000 Subject: [PATCH 1/7] Rename things to better reflect true meaning --- src/java_bytecode/ci_lazy_methods.cpp | 68 +++++++++---------- src/java_bytecode/ci_lazy_methods.h | 25 ++++--- .../java_bytecode_convert_class.cpp | 24 +++---- .../java_bytecode_convert_class.h | 2 +- .../java_bytecode_convert_method.cpp | 26 +++---- .../java_bytecode_convert_method.h | 2 +- .../java_bytecode_convert_method_class.h | 20 +++--- src/java_bytecode/java_bytecode_language.cpp | 58 ++++++++-------- src/java_bytecode/java_bytecode_language.h | 20 +++--- src/util/language.h | 5 +- src/util/language_file.cpp | 2 +- 11 files changed, 126 insertions(+), 126 deletions(-) diff --git a/src/java_bytecode/ci_lazy_methods.cpp b/src/java_bytecode/ci_lazy_methods.cpp index 14e51502868..3c727f0f913 100644 --- a/src/java_bytecode/ci_lazy_methods.cpp +++ b/src/java_bytecode/ci_lazy_methods.cpp @@ -61,13 +61,13 @@ ci_lazy_methodst::ci_lazy_methodst( /// from the main entry point (usually provided with the --function command- /// line option /// \param symbol_table: global symbol table -/// \param [out] lazy_methods: map from method names to relevant symbol and +/// \param [out] method_bytecode: map from method names to relevant symbol and /// parsed-method objects. /// \param method_converter: Function for converting methods on demand. /// \return Returns false on success bool ci_lazy_methodst::operator()( symbol_tablet &symbol_table, - lazy_methodst &lazy_methods, + method_bytecodet &method_bytecode, method_convertert method_converter) { std::vector method_worklist1; @@ -141,8 +141,8 @@ bool ci_lazy_methodst::operator()( { if(!methods_already_populated.insert(mname).second) continue; - auto findit=lazy_methods.find(mname); - if(findit==lazy_methods.end()) + auto findit = method_bytecode.find(mname); + if(findit == method_bytecode.end()) { debug() << "Skip " << mname << eom; continue; @@ -191,8 +191,9 @@ bool ci_lazy_methodst::operator()( { if(sym.second.is_static_lifetime) continue; - if(lazy_methods.count(sym.first) && - !methods_already_populated.count(sym.first)) + if( + method_bytecode.count(sym.first) && + !methods_already_populated.count(sym.first)) { continue; } @@ -263,13 +264,13 @@ void ci_lazy_methodst::resolve_method_names( /// \param entry_points: list of fully-qualified function names that /// we should assume are reachable /// \param ns: global namespace -/// \param [out] lazy_methods: Populated with all Java reference types whose -/// references may be passed, directly or indirectly, to any of the functions -/// in `entry_points`. +/// \param [out] needed_lazy_methods: Populated with all Java reference types +/// whose references may be passed, directly or indirectly, to any of the +/// functions in `entry_points`. void ci_lazy_methodst::initialize_needed_classes( const std::vector &entry_points, const namespacet &ns, - ci_lazy_methods_neededt &lazy_methods) + ci_lazy_methods_neededt &needed_lazy_methods) { for(const auto &mname : entry_points) { @@ -281,7 +282,7 @@ void ci_lazy_methodst::initialize_needed_classes( { const pointer_typet &original_pointer=to_pointer_type(param.type()); initialize_all_needed_classes_from_pointer( - original_pointer, ns, lazy_methods); + original_pointer, ns, needed_lazy_methods); } } } @@ -289,13 +290,13 @@ void ci_lazy_methodst::initialize_needed_classes( // Also add classes whose instances are magically // created by the JVM and so won't be spotted by // looking for constructors and calls as usual: - lazy_methods.add_needed_class("java::java.lang.String"); - lazy_methods.add_needed_class("java::java.lang.Class"); - lazy_methods.add_needed_class("java::java.lang.Object"); + needed_lazy_methods.add_needed_class("java::java.lang.String"); + needed_lazy_methods.add_needed_class("java::java.lang.Class"); + needed_lazy_methods.add_needed_class("java::java.lang.Object"); // As in class_loader, ensure these classes stay available for(const auto &id : extra_needed_classes) - lazy_methods.add_needed_class("java::" + id2string(id)); + needed_lazy_methods.add_needed_class("java::" + id2string(id)); } /// Build up list of methods for types for a pointer and any types it @@ -303,16 +304,15 @@ void ci_lazy_methodst::initialize_needed_classes( /// `initialize_needed_classes` for more details. /// \param pointer_type: The type to gather methods for. /// \param ns: global namespace -/// \param [out] lazy_methods: Populated with all Java reference types whose -/// references may be passed, directly or indirectly, to any of the functions -/// in `entry_points +/// \param [out] needed_lazy_methods: Populated with all Java reference types +/// whose references may be passed, directly or indirectly, to any of the +/// functions in `entry_points` void ci_lazy_methodst::initialize_all_needed_classes_from_pointer( const pointer_typet &pointer_type, const namespacet &ns, - ci_lazy_methods_neededt &lazy_methods) + ci_lazy_methods_neededt &needed_lazy_methods) { - initialize_needed_classes_from_pointer( - pointer_type, ns, lazy_methods); + initialize_needed_classes_from_pointer(pointer_type, ns, needed_lazy_methods); const pointer_typet &subbed_pointer_type= pointer_type_selector.convert_pointer_type(pointer_type, ns); @@ -320,7 +320,7 @@ void ci_lazy_methodst::initialize_all_needed_classes_from_pointer( if(subbed_pointer_type!=pointer_type) { initialize_needed_classes_from_pointer( - subbed_pointer_type, ns, lazy_methods); + subbed_pointer_type, ns, needed_lazy_methods); } } @@ -328,20 +328,20 @@ void ci_lazy_methodst::initialize_all_needed_classes_from_pointer( /// `initialize_needed_classes` for more details. /// \param pointer_type: The type to gather methods for. /// \param ns: global namespace -/// \param [out] lazy_methods: Populated with all Java reference types whose -/// references may be passed, directly or indirectly, to any of the functions -/// in `entry_points +/// \param [out] needed_lazy_methods: Populated with all Java reference types +/// whose references may be passed, directly or indirectly, to any of the +/// functions in `entry_points` void ci_lazy_methodst::initialize_needed_classes_from_pointer( const pointer_typet &pointer_type, const namespacet &ns, - ci_lazy_methods_neededt &lazy_methods) + ci_lazy_methods_neededt &needed_lazy_methods) { const symbol_typet &class_type=to_symbol_type(pointer_type.subtype()); const auto ¶m_classid=class_type.get_identifier(); - if(lazy_methods.add_needed_class(param_classid)) + if(needed_lazy_methods.add_needed_class(param_classid)) { - gather_field_types(pointer_type.subtype(), ns, lazy_methods); + gather_field_types(pointer_type.subtype(), ns, needed_lazy_methods); } } @@ -462,30 +462,30 @@ void ci_lazy_methodst::gather_needed_globals( gather_needed_globals(*opit, symbol_table, needed); } -/// See param lazy_methods +/// See param needed_lazy_methods /// \param class_type: root of class tree to search /// \param ns: global namespace -/// \param [out] lazy_methods: Popualted with all Java reference types reachable -/// starting at `class_type`. For example if `class_type` is +/// \param [out] needed_lazy_methods: Popualted with all Java reference types +/// reachable starting at `class_type`. For example if `class_type` is /// `symbol_typet("java::A")` and A has a B field, then `B` (but not `A`) will /// noted as a needed class. void ci_lazy_methodst::gather_field_types( const typet &class_type, const namespacet &ns, - ci_lazy_methods_neededt &lazy_methods) + ci_lazy_methods_neededt &needed_lazy_methods) { const auto &underlying_type=to_struct_type(ns.follow(class_type)); for(const auto &field : underlying_type.components()) { if(field.type().id()==ID_struct || field.type().id()==ID_symbol) - gather_field_types(field.type(), ns, lazy_methods); + gather_field_types(field.type(), ns, needed_lazy_methods); else if(field.type().id()==ID_pointer) { // Skip array primitive pointers, for example: if(field.type().subtype().id()!=ID_symbol) continue; initialize_all_needed_classes_from_pointer( - to_pointer_type(field.type()), ns, lazy_methods); + to_pointer_type(field.type()), ns, needed_lazy_methods); } } } diff --git a/src/java_bytecode/ci_lazy_methods.h b/src/java_bytecode/ci_lazy_methods.h index 5c5d2aca51c..3dcd8f6cf02 100644 --- a/src/java_bytecode/ci_lazy_methods.h +++ b/src/java_bytecode/ci_lazy_methods.h @@ -26,13 +26,11 @@ class java_string_library_preprocesst; -typedef std::pair< - const symbolt *, - const java_bytecode_parse_treet::methodt *> - lazy_method_valuet; - -typedef std::map - lazy_methodst; +// Pair of class id and methodt +typedef std::pair + class_and_bytecodet; +// Map from method id to class_and_bytecodet +typedef std::map method_bytecodet; typedef std::function &entry_points, const namespacet &ns, - ci_lazy_methods_neededt &lazy_methods); + ci_lazy_methods_neededt &needed_lazy_methods); void initialize_all_needed_classes_from_pointer( const pointer_typet &pointer_type, const namespacet &ns, - ci_lazy_methods_neededt &lazy_methods); + ci_lazy_methods_neededt &needed_lazy_methods); void initialize_needed_classes_from_pointer( const pointer_typet &pointer_type, const namespacet &ns, - ci_lazy_methods_neededt &lazy_methods); + ci_lazy_methods_neededt &needed_lazy_methods); void gather_virtual_callsites( const exprt &e, @@ -93,9 +91,10 @@ class ci_lazy_methodst:public messaget const symbol_tablet &symbol_table, symbol_tablet &needed); - void gather_field_types(const typet &class_type, + void gather_field_types( + const typet &class_type, const namespacet &ns, - ci_lazy_methods_neededt &lazy_methods); + ci_lazy_methods_neededt &needed_lazy_methods); irep_idt get_virtual_method_target( const std::set &needed_classes, diff --git a/src/java_bytecode/java_bytecode_convert_class.cpp b/src/java_bytecode/java_bytecode_convert_class.cpp index 4c2e640ff9e..7a3e3e41f26 100644 --- a/src/java_bytecode/java_bytecode_convert_class.cpp +++ b/src/java_bytecode/java_bytecode_convert_class.cpp @@ -35,15 +35,15 @@ class java_bytecode_convert_classt:public messaget symbol_tablet &_symbol_table, message_handlert &_message_handler, size_t _max_array_length, - lazy_methodst& _lazy_methods, + method_bytecodet &method_bytecode, lazy_methods_modet _lazy_methods_mode, - java_string_library_preprocesst &_string_preprocess): - messaget(_message_handler), - symbol_table(_symbol_table), - max_array_length(_max_array_length), - lazy_methods(_lazy_methods), - lazy_methods_mode(_lazy_methods_mode), - string_preprocess(_string_preprocess) + java_string_library_preprocesst &_string_preprocess) + : messaget(_message_handler), + symbol_table(_symbol_table), + max_array_length(_max_array_length), + method_bytecode(method_bytecode), + lazy_methods_mode(_lazy_methods_mode), + string_preprocess(_string_preprocess) { } @@ -73,7 +73,7 @@ class java_bytecode_convert_classt:public messaget protected: symbol_tablet &symbol_table; const size_t max_array_length; - lazy_methodst &lazy_methods; + method_bytecodet &method_bytecode; lazy_methods_modet lazy_methods_mode; java_string_library_preprocesst &string_preprocess; @@ -205,7 +205,7 @@ void java_bytecode_convert_classt::convert(const classt &c) method, symbol_table, get_message_handler()); - lazy_methods[method_identifier]=std::make_pair(class_symbol, &method); + method_bytecode[method_identifier] = std::make_pair(class_symbol, &method); } // is this a root class? @@ -483,7 +483,7 @@ bool java_bytecode_convert_class( symbol_tablet &symbol_table, message_handlert &message_handler, size_t max_array_length, - lazy_methodst &lazy_methods, + method_bytecodet &method_bytecode, lazy_methods_modet lazy_methods_mode, java_string_library_preprocesst &string_preprocess) { @@ -491,7 +491,7 @@ bool java_bytecode_convert_class( symbol_table, message_handler, max_array_length, - lazy_methods, + method_bytecode, lazy_methods_mode, string_preprocess); diff --git a/src/java_bytecode/java_bytecode_convert_class.h b/src/java_bytecode/java_bytecode_convert_class.h index 6f87db44baa..a4efa307641 100644 --- a/src/java_bytecode/java_bytecode_convert_class.h +++ b/src/java_bytecode/java_bytecode_convert_class.h @@ -24,7 +24,7 @@ bool java_bytecode_convert_class( symbol_tablet &symbol_table, message_handlert &message_handler, size_t max_array_length, - lazy_methodst &, + method_bytecodet &, lazy_methods_modet, java_string_library_preprocesst &string_preprocess); diff --git a/src/java_bytecode/java_bytecode_convert_method.cpp b/src/java_bytecode/java_bytecode_convert_method.cpp index 07627b0d062..be9bf11a7b3 100644 --- a/src/java_bytecode/java_bytecode_convert_method.cpp +++ b/src/java_bytecode/java_bytecode_convert_method.cpp @@ -1429,8 +1429,8 @@ codet java_bytecode_convert_methodt::convert_instructions( if(as_string(arg0.get(ID_identifier)) .find("")!=std::string::npos) { - if(lazy_methods) - lazy_methods->add_needed_class(classname); + if(needed_lazy_methods) + needed_lazy_methods->add_needed_class(classname); code_type.set(ID_constructor, true); } else @@ -1548,11 +1548,11 @@ codet java_bytecode_convert_methodt::convert_instructions( { // static binding call.function()=symbol_exprt(arg0.get(ID_identifier), arg0.type()); - if(lazy_methods) + if(needed_lazy_methods) { - lazy_methods->add_needed_method(arg0.get(ID_identifier)); + needed_lazy_methods->add_needed_method(arg0.get(ID_identifier)); // Calling a static method causes static initialization: - lazy_methods->add_needed_class(arg0.get(ID_C_class)); + needed_lazy_methods->add_needed_class(arg0.get(ID_C_class)); } } @@ -2180,11 +2180,11 @@ codet java_bytecode_convert_methodt::convert_instructions( // If external, create a symbol table entry for this static field: check_static_field_stub(symbol_expr, field_name); - if(lazy_methods) + if(needed_lazy_methods) { if(arg0.type().id()==ID_symbol) { - lazy_methods->add_needed_class( + needed_lazy_methods->add_needed_class( to_symbol_type(arg0.type()).get_identifier()); } else if(arg0.type().id()==ID_pointer) @@ -2192,13 +2192,13 @@ codet java_bytecode_convert_methodt::convert_instructions( const auto &pointer_type=to_pointer_type(arg0.type()); if(pointer_type.subtype().id()==ID_symbol) { - lazy_methods->add_needed_class( + needed_lazy_methods->add_needed_class( to_symbol_type(pointer_type.subtype()).get_identifier()); } } else if(is_assertions_disabled_field) { - lazy_methods->add_needed_class(arg0.get_string(ID_class)); + needed_lazy_methods->add_needed_class(arg0.get_string(ID_class)); } } results[0]=java_bytecode_promotion(symbol_expr); @@ -2235,9 +2235,9 @@ codet java_bytecode_convert_methodt::convert_instructions( // If external, create a symbol table entry for this static field: check_static_field_stub(symbol_expr, field_name); - if(lazy_methods && arg0.type().id()==ID_symbol) + if(needed_lazy_methods && arg0.type().id() == ID_symbol) { - lazy_methods->add_needed_class( + needed_lazy_methods->add_needed_class( to_symbol_type(arg0.type()).get_identifier()); } @@ -2850,7 +2850,7 @@ void java_bytecode_convert_method( symbol_tablet &symbol_table, message_handlert &message_handler, size_t max_array_length, - safe_pointer lazy_methods, + safe_pointer needed_lazy_methods, java_string_library_preprocesst &string_preprocess) { static const std::unordered_set methods_to_ignore @@ -2881,7 +2881,7 @@ void java_bytecode_convert_method( symbol_table, message_handler, max_array_length, - lazy_methods, + needed_lazy_methods, string_preprocess); java_bytecode_convert_method(class_symbol, method); diff --git a/src/java_bytecode/java_bytecode_convert_method.h b/src/java_bytecode/java_bytecode_convert_method.h index 6fd97a4b2d3..6ffa87d944e 100644 --- a/src/java_bytecode/java_bytecode_convert_method.h +++ b/src/java_bytecode/java_bytecode_convert_method.h @@ -28,7 +28,7 @@ void java_bytecode_convert_method( symbol_tablet &symbol_table, message_handlert &message_handler, size_t max_array_length, - safe_pointer lazy_methods, + safe_pointer needed_lazy_methods, java_string_library_preprocesst &string_preprocess); inline void java_bytecode_convert_method( diff --git a/src/java_bytecode/java_bytecode_convert_method_class.h b/src/java_bytecode/java_bytecode_convert_method_class.h index dfd71f322d6..08f9367c62c 100644 --- a/src/java_bytecode/java_bytecode_convert_method_class.h +++ b/src/java_bytecode/java_bytecode_convert_method_class.h @@ -35,15 +35,15 @@ class java_bytecode_convert_methodt:public messaget symbol_tablet &_symbol_table, message_handlert &_message_handler, size_t _max_array_length, - safe_pointer _lazy_methods, - java_string_library_preprocesst &_string_preprocess): - messaget(_message_handler), - symbol_table(_symbol_table), - max_array_length(_max_array_length), - lazy_methods(_lazy_methods), - string_preprocess(_string_preprocess), - slots_for_parameters(0), - method_has_this(false) + safe_pointer needed_lazy_methods, + java_string_library_preprocesst &_string_preprocess) + : messaget(_message_handler), + symbol_table(_symbol_table), + max_array_length(_max_array_length), + needed_lazy_methods(needed_lazy_methods), + string_preprocess(_string_preprocess), + slots_for_parameters(0), + method_has_this(false) { } @@ -61,7 +61,7 @@ class java_bytecode_convert_methodt:public messaget protected: symbol_tablet &symbol_table; const size_t max_array_length; - safe_pointer lazy_methods; + safe_pointer needed_lazy_methods; /// Fully qualified name of the method under translation. /// Initialized by `convert`. diff --git a/src/java_bytecode/java_bytecode_language.cpp b/src/java_bytecode/java_bytecode_language.cpp index c949b54f211..d2185d76158 100644 --- a/src/java_bytecode/java_bytecode_language.cpp +++ b/src/java_bytecode/java_bytecode_language.cpp @@ -205,14 +205,15 @@ bool java_bytecode_languaget::typecheck( if(c_it->second.parsed_class.name.empty()) continue; - if(java_bytecode_convert_class( - c_it->second, - symbol_table, - get_message_handler(), - max_user_array_length, - lazy_methods, - lazy_methods_mode, - string_preprocess)) + if( + java_bytecode_convert_class( + c_it->second, + symbol_table, + get_message_handler(), + max_user_array_length, + method_bytecode, + lazy_methods_mode, + string_preprocess)) return true; } @@ -241,13 +242,13 @@ bool java_bytecode_languaget::typecheck( if(lazy_methods_mode==LAZY_METHODS_MODE_CONTEXT_INSENSITIVE) { // ci: context-insensitive. - if(do_ci_lazy_method_conversion(symbol_table, lazy_methods)) + if(do_ci_lazy_method_conversion(symbol_table, method_bytecode)) return true; } else if(lazy_methods_mode==LAZY_METHODS_MODE_EAGER) { // Simply elaborate all methods symbols now. - for(const auto &method_sig : lazy_methods) + for(const auto &method_sig : method_bytecode) { java_bytecode_convert_method( *method_sig.second.first, @@ -300,14 +301,14 @@ bool java_bytecode_languaget::generate_support_functions( /// instantiated (or evidence that an object of that type exists before the main /// function is entered, such as being passed as a parameter). /// \par parameters: `symbol_table`: global symbol table -/// `lazy_methods`: map from method names to relevant symbol and parsed-method -/// objects. +/// `method_bytecode`: map from method names to relevant symbol and +/// parsed-method objects. /// \return Elaborates lazily-converted methods that may be reachable starting /// from the main entry point (usually provided with the --function command- /// line option) (side-effect on the symbol_table). Returns false on success. bool java_bytecode_languaget::do_ci_lazy_method_conversion( symbol_tablet &symbol_table, - lazy_methodst &lazy_methods) + method_bytecodet &method_bytecode) { const auto method_converter=[&]( const symbolt &symbol, @@ -334,7 +335,7 @@ bool java_bytecode_languaget::do_ci_lazy_method_conversion( get_pointer_type_selector(), get_message_handler()); - return method_gather(symbol_table, lazy_methods, method_converter); + return method_gather(symbol_table, method_bytecode, method_converter); } const select_pointer_typet & @@ -349,31 +350,30 @@ const select_pointer_typet & /// \return Populates `methods` with the complete list of lazy methods that are /// available to convert (those which are valid parameters for /// `convert_lazy_method`) -void java_bytecode_languaget::lazy_methods_provided( +void java_bytecode_languaget::methods_provided( std::set &methods) const { - for(const auto &kv : lazy_methods) + for(const auto &kv : method_bytecode) methods.insert(kv.first); } -/// Promote a lazy-converted method (one whose type is known but whose body -/// hasn't been converted) into a fully- elaborated one. -/// \par parameters: `id`: method ID to convert -/// `symtab`: global symbol table -/// \return Amends the symbol table entry for function `id`, which should be a -/// lazy method provided by this instance of `java_bytecode_languaget`. It -/// should initially have a nil value. After this method completes, it will -/// have a value representing the method body, identical to that produced -/// using eager method conversion. +/// \brief Promote a lazy-converted method (one whose type is known but whose +/// body hasn't been converted) into a fully-elaborated one. +/// \remarks Amends the symbol table entry for function `function_id`, which +/// should be a method provided by this instance of `java_bytecode_languaget` +/// to have a value representing the method body identical to that produced +/// using eager method conversion. +/// \param function_id: method ID to convert +/// \param symbol_table: global symbol table void java_bytecode_languaget::convert_lazy_method( - const irep_idt &id, - symbol_tablet &symtab) + const irep_idt &function_id, + symbol_tablet &symbol_table) { - const auto &lazy_method_entry=lazy_methods.at(id); + const auto &lazy_method_entry = method_bytecode.at(function_id); java_bytecode_convert_method( *lazy_method_entry.first, *lazy_method_entry.second, - symtab, + symbol_table, get_message_handler(), max_user_array_length, string_preprocess); diff --git a/src/java_bytecode/java_bytecode_language.h b/src/java_bytecode/java_bytecode_language.h index d8999572942..076f5d6af0e 100644 --- a/src/java_bytecode/java_bytecode_language.h +++ b/src/java_bytecode/java_bytecode_language.h @@ -81,12 +81,11 @@ struct object_factory_parameterst final bool string_printable = false; }; -typedef std::pair< - const symbolt *, - const java_bytecode_parse_treet::methodt *> - lazy_method_valuet; -typedef std::map - lazy_methodst; +// Pair of class id and methodt +typedef std::pair + class_and_bytecodet; +// Map from method id to class_and_bytecodet +typedef std::map method_bytecodet; class java_bytecode_languaget:public languaget { @@ -156,12 +155,13 @@ class java_bytecode_languaget:public languaget std::set extensions() const override; void modules_provided(std::set &modules) override; - virtual void lazy_methods_provided(std::set &) const override; + virtual void methods_provided(std::set &methods) const override; virtual void convert_lazy_method( - const irep_idt &id, symbol_tablet &) override; + const irep_idt &function_id, + symbol_tablet &symbol_table) override; protected: - bool do_ci_lazy_method_conversion(symbol_tablet &, lazy_methodst &); + bool do_ci_lazy_method_conversion(symbol_tablet &, method_bytecodet &); const select_pointer_typet &get_pointer_type_selector() const; irep_idt main_class; @@ -170,7 +170,7 @@ class java_bytecode_languaget:public languaget bool assume_inputs_non_null; // assume inputs variables to be non-null object_factory_parameterst object_factory_parameters; size_t max_user_array_length; // max size for user code created arrays - lazy_methodst lazy_methods; + method_bytecodet method_bytecode; lazy_methods_modet lazy_methods_mode; std::vector lazy_methods_extra_entry_points; bool string_refinement_enabled; diff --git a/src/util/language.h b/src/util/language.h index 817a522b9c7..16606694c24 100644 --- a/src/util/language.h +++ b/src/util/language.h @@ -76,11 +76,12 @@ class languaget:public messaget // add lazy functions provided to set - virtual void lazy_methods_provided(std::set &methods) const + virtual void methods_provided(std::set &methods) const { } // populate a lazy method - virtual void convert_lazy_method(const irep_idt &id, symbol_tablet &) + virtual void + convert_lazy_method(const irep_idt &function_id, symbol_tablet &symbol_table) { } /// Final adjustments, e.g. initializing stub functions and globals that diff --git a/src/util/language_file.cpp b/src/util/language_file.cpp index 5eb6b2c654a..e2d0ae622c8 100644 --- a/src/util/language_file.cpp +++ b/src/util/language_file.cpp @@ -142,7 +142,7 @@ bool language_filest::typecheck(symbol_tablet &symbol_table) // TODO: learn about modules and generalise this // to module-providing languages if required. std::set lazy_method_ids; - file.second.language->lazy_methods_provided(lazy_method_ids); + file.second.language->methods_provided(lazy_method_ids); for(const auto &id : lazy_method_ids) lazy_method_map[id]=&file.second; } From 4b245f8525be8fa18135403ca15e01bf970fb19a Mon Sep 17 00:00:00 2001 From: Nathan Phillips Date: Wed, 29 Nov 2017 12:02:41 +0000 Subject: [PATCH 2/7] Use optionalt instead of safe_pointer --- src/java_bytecode/ci_lazy_methods.cpp | 14 ++-- src/java_bytecode/ci_lazy_methods.h | 2 +- .../java_bytecode_convert_method.cpp | 2 +- .../java_bytecode_convert_method.h | 5 +- .../java_bytecode_convert_method_class.h | 7 +- src/java_bytecode/java_bytecode_language.cpp | 7 +- src/util/safe_pointer.h | 67 ------------------- 7 files changed, 16 insertions(+), 88 deletions(-) delete mode 100644 src/util/safe_pointer.h diff --git a/src/java_bytecode/ci_lazy_methods.cpp b/src/java_bytecode/ci_lazy_methods.cpp index 3c727f0f913..884032e484a 100644 --- a/src/java_bytecode/ci_lazy_methods.cpp +++ b/src/java_bytecode/ci_lazy_methods.cpp @@ -11,7 +11,6 @@ #include #include #include -#include #include #include @@ -68,7 +67,7 @@ ci_lazy_methodst::ci_lazy_methodst( bool ci_lazy_methodst::operator()( symbol_tablet &symbol_table, method_bytecodet &method_bytecode, - method_convertert method_converter) + const method_convertert &method_converter) { std::vector method_worklist1; std::vector method_worklist2; @@ -149,13 +148,12 @@ bool ci_lazy_methodst::operator()( } debug() << "CI lazy methods: elaborate " << mname << eom; const auto &parsed_method=findit->second; - // Note this wraps *references* to method_worklist2, needed_classes: - ci_lazy_methods_neededt new_lazy_methods( - method_worklist2, - needed_classes, - symbol_table); method_converter( - *parsed_method.first, *parsed_method.second, new_lazy_methods); + *parsed_method.first, + *parsed_method.second, + // Note this wraps *references* to method_worklist2 & needed_classes + ci_lazy_methods_neededt( + method_worklist2, needed_classes, symbol_table)); gather_virtual_callsites( symbol_table.lookup_ref(mname).value, virtual_callsites); diff --git a/src/java_bytecode/ci_lazy_methods.h b/src/java_bytecode/ci_lazy_methods.h index 3dcd8f6cf02..1f9b49b54cc 100644 --- a/src/java_bytecode/ci_lazy_methods.h +++ b/src/java_bytecode/ci_lazy_methods.h @@ -54,7 +54,7 @@ class ci_lazy_methodst:public messaget bool operator()( symbol_tablet &symbol_table, method_bytecodet &method_bytecode, - method_convertert method_converter); + const method_convertert &method_converter); private: void resolve_method_names( diff --git a/src/java_bytecode/java_bytecode_convert_method.cpp b/src/java_bytecode/java_bytecode_convert_method.cpp index be9bf11a7b3..64236c6bf92 100644 --- a/src/java_bytecode/java_bytecode_convert_method.cpp +++ b/src/java_bytecode/java_bytecode_convert_method.cpp @@ -2850,7 +2850,7 @@ void java_bytecode_convert_method( symbol_tablet &symbol_table, message_handlert &message_handler, size_t max_array_length, - safe_pointer needed_lazy_methods, + optionalt needed_lazy_methods, java_string_library_preprocesst &string_preprocess) { static const std::unordered_set methods_to_ignore diff --git a/src/java_bytecode/java_bytecode_convert_method.h b/src/java_bytecode/java_bytecode_convert_method.h index 6ffa87d944e..f3e1a0c4dfc 100644 --- a/src/java_bytecode/java_bytecode_convert_method.h +++ b/src/java_bytecode/java_bytecode_convert_method.h @@ -14,7 +14,6 @@ Author: Daniel Kroening, kroening@kroening.com #include #include -#include #include "java_string_library_preprocess.h" #include "java_bytecode_parse_tree.h" @@ -28,7 +27,7 @@ void java_bytecode_convert_method( symbol_tablet &symbol_table, message_handlert &message_handler, size_t max_array_length, - safe_pointer needed_lazy_methods, + optionalt needed_lazy_methods, java_string_library_preprocesst &string_preprocess); inline void java_bytecode_convert_method( @@ -45,7 +44,7 @@ inline void java_bytecode_convert_method( symbol_table, message_handler, max_array_length, - safe_pointer::create_null(), + optionalt(), string_preprocess); } diff --git a/src/java_bytecode/java_bytecode_convert_method_class.h b/src/java_bytecode/java_bytecode_convert_method_class.h index 08f9367c62c..a1f3cad2f46 100644 --- a/src/java_bytecode/java_bytecode_convert_method_class.h +++ b/src/java_bytecode/java_bytecode_convert_method_class.h @@ -16,7 +16,6 @@ Author: Daniel Kroening, kroening@kroening.com #include #include #include -#include #include #include "java_bytecode_parse_tree.h" #include "java_bytecode_convert_class.h" @@ -35,12 +34,12 @@ class java_bytecode_convert_methodt:public messaget symbol_tablet &_symbol_table, message_handlert &_message_handler, size_t _max_array_length, - safe_pointer needed_lazy_methods, + optionalt needed_lazy_methods, java_string_library_preprocesst &_string_preprocess) : messaget(_message_handler), symbol_table(_symbol_table), max_array_length(_max_array_length), - needed_lazy_methods(needed_lazy_methods), + needed_lazy_methods(std::move(needed_lazy_methods)), string_preprocess(_string_preprocess), slots_for_parameters(0), method_has_this(false) @@ -61,7 +60,7 @@ class java_bytecode_convert_methodt:public messaget protected: symbol_tablet &symbol_table; const size_t max_array_length; - safe_pointer needed_lazy_methods; + optionalt needed_lazy_methods; /// Fully qualified name of the method under translation. /// Initialized by `convert`. diff --git a/src/java_bytecode/java_bytecode_language.cpp b/src/java_bytecode/java_bytecode_language.cpp index d2185d76158..559c76d6585 100644 --- a/src/java_bytecode/java_bytecode_language.cpp +++ b/src/java_bytecode/java_bytecode_language.cpp @@ -310,18 +310,17 @@ bool java_bytecode_languaget::do_ci_lazy_method_conversion( symbol_tablet &symbol_table, method_bytecodet &method_bytecode) { - const auto method_converter=[&]( + const auto method_converter = [&]( const symbolt &symbol, const java_bytecode_parse_treet::methodt &method, - ci_lazy_methods_neededt new_lazy_methods) - { + ci_lazy_methods_neededt new_lazy_methods) { java_bytecode_convert_method( symbol, method, symbol_table, get_message_handler(), max_user_array_length, - safe_pointer::create_non_null(&new_lazy_methods), + std::move(new_lazy_methods), string_preprocess); }; diff --git a/src/util/safe_pointer.h b/src/util/safe_pointer.h deleted file mode 100644 index 152eb3242d9..00000000000 --- a/src/util/safe_pointer.h +++ /dev/null @@ -1,67 +0,0 @@ -/*******************************************************************\ - -Module: Simple checked pointers - -Author: Chris Smowton, chris@smowton.net - -\*******************************************************************/ - -/// \file -/// Simple checked pointers - -#ifndef CPROVER_UTIL_SAFE_POINTER_H -#define CPROVER_UTIL_SAFE_POINTER_H - -template class safe_pointer -{ - public: - operator bool() const - { - return !!ptr; - } - - T *get() const - { - assert(ptr && "dereferenced a null safe pointer"); - return ptr; - } - - T &operator*() const - { - return *get(); - } - - T *operator->() const - { - return get(); - } - - static safe_pointer create_null() - { - return safe_pointer(); - } - - static safe_pointer create_non_null( - T *target) - { - assert(target && "initialized safe pointer with null"); - return safe_pointer(target); - } - - static safe_pointer create_maybe_null( - T *target) - { - return safe_pointer(target); - } - - protected: - T *ptr; - - explicit safe_pointer(T *target) : ptr(target) - {} - - safe_pointer() : ptr(nullptr) - {} -}; - -#endif From 0ac4d28980bc29f62e5a8d79cf269e5280c1b294 Mon Sep 17 00:00:00 2001 From: Nathan Phillips Date: Wed, 29 Nov 2017 14:31:39 +0000 Subject: [PATCH 3/7] Don't store pointers to symbols in map --- src/java_bytecode/ci_lazy_methods.cpp | 2 +- src/java_bytecode/ci_lazy_methods.h | 2 +- src/java_bytecode/java_bytecode_convert_class.cpp | 3 ++- src/java_bytecode/java_bytecode_language.cpp | 4 ++-- src/java_bytecode/java_bytecode_language.h | 2 +- 5 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/java_bytecode/ci_lazy_methods.cpp b/src/java_bytecode/ci_lazy_methods.cpp index 884032e484a..1e4f53dc78d 100644 --- a/src/java_bytecode/ci_lazy_methods.cpp +++ b/src/java_bytecode/ci_lazy_methods.cpp @@ -149,7 +149,7 @@ bool ci_lazy_methodst::operator()( debug() << "CI lazy methods: elaborate " << mname << eom; const auto &parsed_method=findit->second; method_converter( - *parsed_method.first, + symbol_table.lookup_ref(parsed_method.first), *parsed_method.second, // Note this wraps *references* to method_worklist2 & needed_classes ci_lazy_methods_neededt( diff --git a/src/java_bytecode/ci_lazy_methods.h b/src/java_bytecode/ci_lazy_methods.h index 1f9b49b54cc..18bb8de333f 100644 --- a/src/java_bytecode/ci_lazy_methods.h +++ b/src/java_bytecode/ci_lazy_methods.h @@ -27,7 +27,7 @@ class java_string_library_preprocesst; // Pair of class id and methodt -typedef std::pair +typedef std::pair class_and_bytecodet; // Map from method id to class_and_bytecodet typedef std::map method_bytecodet; diff --git a/src/java_bytecode/java_bytecode_convert_class.cpp b/src/java_bytecode/java_bytecode_convert_class.cpp index 7a3e3e41f26..c342e46c790 100644 --- a/src/java_bytecode/java_bytecode_convert_class.cpp +++ b/src/java_bytecode/java_bytecode_convert_class.cpp @@ -205,7 +205,8 @@ void java_bytecode_convert_classt::convert(const classt &c) method, symbol_table, get_message_handler()); - method_bytecode[method_identifier] = std::make_pair(class_symbol, &method); + method_bytecode[method_identifier] = + std::make_pair(qualified_classname, &method); } // is this a root class? diff --git a/src/java_bytecode/java_bytecode_language.cpp b/src/java_bytecode/java_bytecode_language.cpp index 559c76d6585..23d48c1c247 100644 --- a/src/java_bytecode/java_bytecode_language.cpp +++ b/src/java_bytecode/java_bytecode_language.cpp @@ -251,7 +251,7 @@ bool java_bytecode_languaget::typecheck( for(const auto &method_sig : method_bytecode) { java_bytecode_convert_method( - *method_sig.second.first, + symbol_table.lookup_ref(method_sig.second.first), *method_sig.second.second, symbol_table, get_message_handler(), @@ -370,7 +370,7 @@ void java_bytecode_languaget::convert_lazy_method( { const auto &lazy_method_entry = method_bytecode.at(function_id); java_bytecode_convert_method( - *lazy_method_entry.first, + symbol_table.lookup_ref(lazy_method_entry.first), *lazy_method_entry.second, symbol_table, get_message_handler(), diff --git a/src/java_bytecode/java_bytecode_language.h b/src/java_bytecode/java_bytecode_language.h index 076f5d6af0e..4eaac9d3dab 100644 --- a/src/java_bytecode/java_bytecode_language.h +++ b/src/java_bytecode/java_bytecode_language.h @@ -82,7 +82,7 @@ struct object_factory_parameterst final }; // Pair of class id and methodt -typedef std::pair +typedef std::pair class_and_bytecodet; // Map from method id to class_and_bytecodet typedef std::map method_bytecodet; From ada4475b2777b27e1ba81cb607f90d8de621f080 Mon Sep 17 00:00:00 2001 From: Nathan Phillips Date: Tue, 5 Dec 2017 14:18:13 +0000 Subject: [PATCH 4/7] Replace map to pair with a more specialized type Requested in code review --- src/java_bytecode/ci_lazy_methods.cpp | 11 ++-- src/java_bytecode/ci_lazy_methods.h | 62 +++++++++++++++++-- .../java_bytecode_convert_class.cpp | 3 +- src/java_bytecode/java_bytecode_language.cpp | 11 ++-- src/java_bytecode/java_bytecode_language.h | 7 +-- 5 files changed, 70 insertions(+), 24 deletions(-) diff --git a/src/java_bytecode/ci_lazy_methods.cpp b/src/java_bytecode/ci_lazy_methods.cpp index 1e4f53dc78d..1f0a2b62029 100644 --- a/src/java_bytecode/ci_lazy_methods.cpp +++ b/src/java_bytecode/ci_lazy_methods.cpp @@ -140,17 +140,16 @@ bool ci_lazy_methodst::operator()( { if(!methods_already_populated.insert(mname).second) continue; - auto findit = method_bytecode.find(mname); - if(findit == method_bytecode.end()) + method_bytecodet::opt_reft cmb = method_bytecode.get(mname); + if(!cmb) { debug() << "Skip " << mname << eom; continue; } debug() << "CI lazy methods: elaborate " << mname << eom; - const auto &parsed_method=findit->second; method_converter( - symbol_table.lookup_ref(parsed_method.first), - *parsed_method.second, + symbol_table.lookup_ref(cmb->get().class_id), + cmb->get().method, // Note this wraps *references* to method_worklist2 & needed_classes ci_lazy_methods_neededt( method_worklist2, needed_classes, symbol_table)); @@ -190,7 +189,7 @@ bool ci_lazy_methodst::operator()( if(sym.second.is_static_lifetime) continue; if( - method_bytecode.count(sym.first) && + method_bytecode.contains_method(sym.first) && !methods_already_populated.count(sym.first)) { continue; diff --git a/src/java_bytecode/ci_lazy_methods.h b/src/java_bytecode/ci_lazy_methods.h index 18bb8de333f..2513c3878ba 100644 --- a/src/java_bytecode/ci_lazy_methods.h +++ b/src/java_bytecode/ci_lazy_methods.h @@ -26,11 +26,63 @@ class java_string_library_preprocesst; -// Pair of class id and methodt -typedef std::pair - class_and_bytecodet; -// Map from method id to class_and_bytecodet -typedef std::map method_bytecodet; +// Map from method id to class_method_and_bytecodet +class method_bytecodet +{ +public: + /// Pair of class id and methodt + struct class_method_and_bytecodet + { + irep_idt class_id; + irep_idt method_id; + const java_bytecode_parse_treet::methodt &method; + }; + + typedef optionalt> + opt_reft; + +private: + typedef std::map mapt; + mapt map; + +public: + bool contains_method(const irep_idt &method_id) const + { + return map.count(method_id) != 0; + } + + void add(const class_method_and_bytecodet &method_class_and_bytecode) + { + map.emplace( + std::make_pair( + method_class_and_bytecode.method_id, method_class_and_bytecode)); + } + + void add( + const irep_idt &class_id, + const irep_idt &method_id, + const java_bytecode_parse_treet::methodt &method) + { + add(class_method_and_bytecodet{class_id, method_id, method}); + } + + mapt::const_iterator begin() const + { + return map.begin(); + } + mapt::const_iterator end() const + { + return map.end(); + } + + opt_reft get(const irep_idt &method_id) + { + const auto it = map.find(method_id); + if(it == map.end()) + return opt_reft(); + return std::cref(it->second); + } +}; typedef std::function #include +#include "ci_lazy_methods.h" #include "java_class_loader.h" #include "java_string_library_preprocess.h" @@ -81,12 +82,6 @@ struct object_factory_parameterst final bool string_printable = false; }; -// Pair of class id and methodt -typedef std::pair - class_and_bytecodet; -// Map from method id to class_and_bytecodet -typedef std::map method_bytecodet; - class java_bytecode_languaget:public languaget { public: From d556380ecdbee9f1e74909f545640de8b23ebcd4 Mon Sep 17 00:00:00 2001 From: Nathan Phillips Date: Wed, 29 Nov 2017 17:19:47 +0000 Subject: [PATCH 5/7] Make string solver functions get converted correctly first time Previously they were replaced after everything else was done --- src/java_bytecode/ci_lazy_methods.cpp | 37 ++--- src/java_bytecode/ci_lazy_methods.h | 7 +- .../java_bytecode_convert_method.cpp | 6 +- .../java_bytecode_convert_method.h | 20 +-- .../java_bytecode_convert_method_class.h | 6 +- src/java_bytecode/java_bytecode_language.cpp | 126 +++++++++--------- src/java_bytecode/java_bytecode_language.h | 17 ++- .../java_string_library_preprocess.cpp | 37 ++++- .../java_string_library_preprocess.h | 11 +- src/util/language.h | 6 +- src/util/language_file.cpp | 2 +- 11 files changed, 154 insertions(+), 121 deletions(-) diff --git a/src/java_bytecode/ci_lazy_methods.cpp b/src/java_bytecode/ci_lazy_methods.cpp index 1f0a2b62029..4c108ba51b0 100644 --- a/src/java_bytecode/ci_lazy_methods.cpp +++ b/src/java_bytecode/ci_lazy_methods.cpp @@ -140,19 +140,17 @@ bool ci_lazy_methodst::operator()( { if(!methods_already_populated.insert(mname).second) continue; - method_bytecodet::opt_reft cmb = method_bytecode.get(mname); - if(!cmb) + debug() << "CI lazy methods: elaborate " << mname << eom; + if( + method_converter( + mname, + // Note this wraps *references* to method_worklist2 & needed_classes + ci_lazy_methods_neededt( + method_worklist2, needed_classes, symbol_table))) { - debug() << "Skip " << mname << eom; + // Couldn't convert this function continue; } - debug() << "CI lazy methods: elaborate " << mname << eom; - method_converter( - symbol_table.lookup_ref(cmb->get().class_id), - cmb->get().method, - // Note this wraps *references* to method_worklist2 & needed_classes - ci_lazy_methods_neededt( - method_worklist2, needed_classes, symbol_table)); gather_virtual_callsites( symbol_table.lookup_ref(mname).value, virtual_callsites); @@ -186,16 +184,23 @@ bool ci_lazy_methodst::operator()( for(const auto &sym : symbol_table.symbols) { + // Don't keep global variables (unless they're gathered below from a + // function that references them) if(sym.second.is_static_lifetime) continue; - if( - method_bytecode.contains_method(sym.first) && - !methods_already_populated.count(sym.first)) - { - continue; - } if(sym.second.type.id()==ID_code) + { + // Don't keep functions that belong to this language that we haven't + // converted above + if( + method_bytecode.contains_method(sym.first) && + !methods_already_populated.count(sym.first)) + { + continue; + } + // If this is a function then add all the things used in it gather_needed_globals(sym.second.value, symbol_table, keep_symbols); + } keep_symbols.add(sym.second); } diff --git a/src/java_bytecode/ci_lazy_methods.h b/src/java_bytecode/ci_lazy_methods.h index 2513c3878ba..9b9b7dda3c4 100644 --- a/src/java_bytecode/ci_lazy_methods.h +++ b/src/java_bytecode/ci_lazy_methods.h @@ -84,10 +84,9 @@ class method_bytecodet } }; -typedef std::function method_convertert; +typedef std::function< + bool(const irep_idt &function_id, ci_lazy_methods_neededt)> + method_convertert; class ci_lazy_methodst:public messaget { diff --git a/src/java_bytecode/java_bytecode_convert_method.cpp b/src/java_bytecode/java_bytecode_convert_method.cpp index 64236c6bf92..c2bf40bfbe9 100644 --- a/src/java_bytecode/java_bytecode_convert_method.cpp +++ b/src/java_bytecode/java_bytecode_convert_method.cpp @@ -79,10 +79,10 @@ class patternt /// \return Assigns parameter names (side-effects on `ftype`) to function stub /// parameters, which are initially nameless as method conversion hasn't /// happened. Also creates symbols in `symbol_table`. -void assign_parameter_names( +static void assign_parameter_names( code_typet &ftype, const irep_idt &name_prefix, - symbol_tablet &symbol_table) + symbol_table_baset &symbol_table) { code_typet::parameterst ¶meters=ftype.parameters(); @@ -2847,7 +2847,7 @@ codet java_bytecode_convert_methodt::convert_instructions( void java_bytecode_convert_method( const symbolt &class_symbol, const java_bytecode_parse_treet::methodt &method, - symbol_tablet &symbol_table, + symbol_table_baset &symbol_table, message_handlert &message_handler, size_t max_array_length, optionalt needed_lazy_methods, diff --git a/src/java_bytecode/java_bytecode_convert_method.h b/src/java_bytecode/java_bytecode_convert_method.h index f3e1a0c4dfc..6693978c026 100644 --- a/src/java_bytecode/java_bytecode_convert_method.h +++ b/src/java_bytecode/java_bytecode_convert_method.h @@ -24,30 +24,12 @@ class class_hierarchyt; void java_bytecode_convert_method( const symbolt &class_symbol, const java_bytecode_parse_treet::methodt &, - symbol_tablet &symbol_table, + symbol_table_baset &symbol_table, message_handlert &message_handler, size_t max_array_length, optionalt needed_lazy_methods, java_string_library_preprocesst &string_preprocess); -inline void java_bytecode_convert_method( - const symbolt &class_symbol, - const java_bytecode_parse_treet::methodt &method, - symbol_tablet &symbol_table, - message_handlert &message_handler, - size_t max_array_length, - java_string_library_preprocesst &string_preprocess) -{ - java_bytecode_convert_method( - class_symbol, - method, - symbol_table, - message_handler, - max_array_length, - optionalt(), - string_preprocess); -} - void java_bytecode_convert_method_lazy( const symbolt &class_symbol, const irep_idt &method_identifier, diff --git a/src/java_bytecode/java_bytecode_convert_method_class.h b/src/java_bytecode/java_bytecode_convert_method_class.h index a1f3cad2f46..0d1c34d82db 100644 --- a/src/java_bytecode/java_bytecode_convert_method_class.h +++ b/src/java_bytecode/java_bytecode_convert_method_class.h @@ -31,13 +31,13 @@ class java_bytecode_convert_methodt:public messaget { public: java_bytecode_convert_methodt( - symbol_tablet &_symbol_table, + symbol_table_baset &symbol_table, message_handlert &_message_handler, size_t _max_array_length, optionalt needed_lazy_methods, java_string_library_preprocesst &_string_preprocess) : messaget(_message_handler), - symbol_table(_symbol_table), + symbol_table(symbol_table), max_array_length(_max_array_length), needed_lazy_methods(std::move(needed_lazy_methods)), string_preprocess(_string_preprocess), @@ -58,7 +58,7 @@ class java_bytecode_convert_methodt:public messaget } protected: - symbol_tablet &symbol_table; + symbol_table_baset &symbol_table; const size_t max_array_length; optionalt needed_lazy_methods; diff --git a/src/java_bytecode/java_bytecode_language.cpp b/src/java_bytecode/java_bytecode_language.cpp index c16518e20e0..fcaf134415d 100644 --- a/src/java_bytecode/java_bytecode_language.cpp +++ b/src/java_bytecode/java_bytecode_language.cpp @@ -14,6 +14,7 @@ Author: Daniel Kroening, kroening@kroening.com #include #include #include +#include #include #include #include @@ -247,16 +248,20 @@ bool java_bytecode_languaget::typecheck( } else if(lazy_methods_mode==LAZY_METHODS_MODE_EAGER) { - // Simply elaborate all methods symbols now. + journalling_symbol_tablet journalling_symbol_table = + journalling_symbol_tablet::wrap(symbol_table); + // Convert all methods for which we have bytecode now for(const auto &method_sig : method_bytecode) { - java_bytecode_convert_method( - symbol_table.lookup_ref(method_sig.second.class_id), - method_sig.second.method, - symbol_table, - get_message_handler(), - max_user_array_length, - string_preprocess); + convert_single_method(method_sig.first, journalling_symbol_table); + } + // Now convert all newly added string methods + id_sett string_methods; + string_preprocess.get_all_function_names(string_methods); + for(const auto &fn_name : journalling_symbol_table.get_inserted()) + { + if(string_methods.count(fn_name) != 0) + convert_single_method(fn_name, symbol_table); } } // Otherwise our caller is in charge of elaborating methods on demand. @@ -310,19 +315,13 @@ bool java_bytecode_languaget::do_ci_lazy_method_conversion( symbol_tablet &symbol_table, method_bytecodet &method_bytecode) { - const auto method_converter = [&]( - const symbolt &symbol, - const java_bytecode_parse_treet::methodt &method, - ci_lazy_methods_neededt new_lazy_methods) { - java_bytecode_convert_method( - symbol, - method, - symbol_table, - get_message_handler(), - max_user_array_length, - std::move(new_lazy_methods), - string_preprocess); - }; + const method_convertert method_converter = + [this, &symbol_table] + (const irep_idt &function_id, ci_lazy_methods_neededt lazy_methods_needed) + { + return convert_single_method( + function_id, symbol_table, std::move(lazy_methods_needed)); + }; ci_lazy_methodst method_gather( symbol_table, @@ -349,9 +348,11 @@ const select_pointer_typet & /// \return Populates `methods` with the complete list of lazy methods that are /// available to convert (those which are valid parameters for /// `convert_lazy_method`) -void java_bytecode_languaget::methods_provided( - std::set &methods) const +void java_bytecode_languaget::methods_provided(id_sett &methods) const { + // Add all string solver methods to map + string_preprocess.get_all_function_names(methods); + // Add all concrete methods to map for(const auto &kv : method_bytecode) methods.insert(kv.first); } @@ -368,59 +369,60 @@ void java_bytecode_languaget::convert_lazy_method( const irep_idt &function_id, symbol_tablet &symbol_table) { - const method_bytecodet::class_method_and_bytecodet &cmb = - *method_bytecode.get(function_id); - java_bytecode_convert_method( - symbol_table.lookup_ref(cmb.class_id), - cmb.method, - symbol_table, - get_message_handler(), - max_user_array_length, - string_preprocess); + convert_single_method(function_id, symbol_table); } -/// Replace methods of the String library that are in the symbol table by code -/// generated by string_preprocess. -/// \param context: a symbol table -void java_bytecode_languaget::replace_string_methods( - symbol_table_baset &context) +/// \brief Convert a method (one whose type is known but whose body hasn't +/// been converted) but don't run typecheck, etc +/// \remarks Amends the symbol table entry for function `function_id`, which +/// should be a method provided by this instance of `java_bytecode_languaget` +/// to have a value representing the method body. +/// \param function_id: method ID to convert +/// \param symbol_table: global symbol table +/// \param needed_lazy_methods: optionally a collection of needed methods to +/// update with any methods touched during the conversion +bool java_bytecode_languaget::convert_single_method( + const irep_idt &function_id, + symbol_table_baset &symbol_table, + optionalt needed_lazy_methods) { - // Symbols that have code type are potentialy to be replaced - std::list code_symbols; - forall_symbols(symbol, context.symbols) + const symbolt &symbol = symbol_table.lookup_ref(function_id); + + // Nothing to do if body is already loaded + if(symbol.value.is_not_nil()) + return false; + + exprt generated_code = + string_preprocess.code_for_function(symbol, symbol_table); + if(generated_code.is_not_nil()) { - if(symbol->second.type.id()==ID_code) - code_symbols.push_back(symbol->second); + // Populate body of the function with code generated by string preprocess + symbol_table.get_writeable_ref(function_id).value = generated_code; + return false; } - for(const auto &symbol : code_symbols) + // No string solver implementation, check if have bytecode for it + method_bytecodet::opt_reft cmb = method_bytecode.get(function_id); + if(cmb) { - const irep_idt &id=symbol.name; - exprt generated_code=string_preprocess.code_for_function( - id, to_code_type(symbol.type), symbol.location, context); - if(generated_code.is_not_nil()) - { - // Replace body of the function by code generated by string preprocess - symbolt &symbol=*context.get_writeable(id); - symbol.value=generated_code; - // Specifically instrument the new code, since this comes after the - // blanket instrumentation pass called before typechecking. - java_bytecode_instrument_symbol( - context, - symbol, - throw_runtime_exceptions, - get_message_handler()); - } + java_bytecode_convert_method( + symbol_table.lookup_ref(cmb->get().class_id), + cmb->get().method, + symbol_table, + get_message_handler(), + max_user_array_length, + std::move(needed_lazy_methods), + string_preprocess); + return false; } + + return true; } bool java_bytecode_languaget::final(symbol_table_baset &symbol_table) { PRECONDITION(language_options_initialized); - // replace code of String methods calls by code we generate - replace_string_methods(symbol_table); - return false; } diff --git a/src/java_bytecode/java_bytecode_language.h b/src/java_bytecode/java_bytecode_language.h index 31456c64866..905c56afb6e 100644 --- a/src/java_bytecode/java_bytecode_language.h +++ b/src/java_bytecode/java_bytecode_language.h @@ -17,6 +17,7 @@ Author: Daniel Kroening, kroening@kroening.com #include #include "ci_lazy_methods.h" +#include "ci_lazy_methods_needed.h" #include "java_class_loader.h" #include "java_string_library_preprocess.h" @@ -103,8 +104,6 @@ class java_bytecode_languaget:public languaget symbol_tablet &context, const std::string &module) override; - void replace_string_methods(symbol_table_baset &context); - virtual bool final(symbol_table_baset &context) override; void show_parse(std::ostream &out) override; @@ -150,12 +149,24 @@ class java_bytecode_languaget:public languaget std::set extensions() const override; void modules_provided(std::set &modules) override; - virtual void methods_provided(std::set &methods) const override; + virtual void methods_provided(id_sett &methods) const override; virtual void convert_lazy_method( const irep_idt &function_id, symbol_tablet &symbol_table) override; protected: + void convert_single_method( + const irep_idt &function_id, + symbol_table_baset &symbol_table) + { + convert_single_method( + function_id, symbol_table, optionalt()); + } + bool convert_single_method( + const irep_idt &function_id, + symbol_table_baset &symbol_table, + optionalt needed_lazy_methods); + bool do_ci_lazy_method_conversion(symbol_tablet &, method_bytecodet &); const select_pointer_typet &get_pointer_type_selector() const; diff --git a/src/java_bytecode/java_string_library_preprocess.cpp b/src/java_bytecode/java_string_library_preprocess.cpp index 1da4340e163..ae0496bcf6b 100644 --- a/src/java_bytecode/java_string_library_preprocess.cpp +++ b/src/java_bytecode/java_string_library_preprocess.cpp @@ -1758,6 +1758,36 @@ codet java_string_library_preprocesst::make_string_length_code( return code_returnt(get_length(deref, symbol_table)); } +template +void add_keys_to_container(const TMap &map, TContainer &container) +{ + static_assert( + std::is_same::value, + "TContainer value_type doesn't match TMap key_type"); + std::transform( + map.begin(), + map.end(), + std::inserter(container, container.begin()), + [](const typename TMap::value_type &pair) { return pair.first; }); +} + +void java_string_library_preprocesst::get_all_function_names( + id_sett &methods) const +{ + const id_mapt *maps[] = { + &cprover_equivalent_to_java_function, + &cprover_equivalent_to_java_string_returning_function, + &cprover_equivalent_to_java_constructor, + &cprover_equivalent_to_java_assign_and_return_function, + &cprover_equivalent_to_java_assign_function, + }; + for(const id_mapt *map : maps) + add_keys_to_container(*map, methods); + + add_keys_to_container(conversion_table, methods); +} + /// Should be called to provide code for string functions that are used in the /// code but for which no implementation is provided. /// \param function_id: name of the function @@ -1767,11 +1797,12 @@ codet java_string_library_preprocesst::make_string_length_code( /// \return Code for the body of the String functions if they are part of the /// supported String functions, nil_exprt otherwise. exprt java_string_library_preprocesst::code_for_function( - const irep_idt &function_id, - const code_typet &type, - const source_locationt &loc, + const symbolt &symbol, symbol_table_baset &symbol_table) { + const irep_idt &function_id = symbol.name; + const code_typet &type = to_code_type(symbol.type); + const source_locationt &loc = symbol.location; auto it_id=cprover_equivalent_to_java_function.find(function_id); if(it_id!=cprover_equivalent_to_java_function.end()) return make_function_from_call(it_id->second, type, loc, symbol_table); diff --git a/src/java_bytecode/java_string_library_preprocess.h b/src/java_bytecode/java_string_library_preprocess.h index ef9c7a5e3cf..73f88352b50 100644 --- a/src/java_bytecode/java_string_library_preprocess.h +++ b/src/java_bytecode/java_string_library_preprocess.h @@ -30,6 +30,8 @@ Date: March 2017 // Arbitrary limit of 10 arguments for the number of arguments to String.format #define MAX_FORMAT_ARGS 10 +typedef std::unordered_set id_sett; + class java_string_library_preprocesst:public messaget { public: @@ -43,11 +45,10 @@ class java_string_library_preprocesst:public messaget void initialize_conversion_table(); void initialize_refined_string_type(); - exprt code_for_function( - const irep_idt &function_id, - const code_typet &type, - const source_locationt &loc, - symbol_table_baset &symbol_table); + void get_all_function_names(id_sett &methods) const; + + exprt + code_for_function(const symbolt &symbol, symbol_table_baset &symbol_table); codet replace_character_call(code_function_callt call) { diff --git a/src/util/language.h b/src/util/language.h index 16606694c24..f1ee81f84a9 100644 --- a/src/util/language.h +++ b/src/util/language.h @@ -12,7 +12,7 @@ Author: Daniel Kroening, kroening@kroening.com #ifndef CPROVER_UTIL_LANGUAGE_H #define CPROVER_UTIL_LANGUAGE_H -#include +#include #include #include #include // unique_ptr @@ -22,6 +22,8 @@ Author: Daniel Kroening, kroening@kroening.com #include "message.h" +typedef std::unordered_set id_sett; + class symbol_tablet; class symbol_table_baset; class exprt; @@ -76,7 +78,7 @@ class languaget:public messaget // add lazy functions provided to set - virtual void methods_provided(std::set &methods) const + virtual void methods_provided(id_sett &methods) const { } // populate a lazy method diff --git a/src/util/language_file.cpp b/src/util/language_file.cpp index e2d0ae622c8..f42e7f88159 100644 --- a/src/util/language_file.cpp +++ b/src/util/language_file.cpp @@ -141,7 +141,7 @@ bool language_filest::typecheck(symbol_tablet &symbol_table) // register lazy methods. // TODO: learn about modules and generalise this // to module-providing languages if required. - std::set lazy_method_ids; + id_sett lazy_method_ids; file.second.language->methods_provided(lazy_method_ids); for(const auto &id : lazy_method_ids) lazy_method_map[id]=&file.second; From 0e716586ea7a5642422a59f15c4a74f2686dd18c Mon Sep 17 00:00:00 2001 From: Nathan Phillips Date: Fri, 1 Dec 2017 13:56:41 +0000 Subject: [PATCH 6/7] Initialise string solver function parameter names Get the names of the parameters for string solver functions from bytecode if we have the bytecode for them --- .../java_bytecode_convert_method.cpp | 90 +++++++++++++++++++ .../java_bytecode_convert_method.h | 6 ++ src/java_bytecode/java_bytecode_language.cpp | 26 ++++-- .../java_string_library_preprocess.cpp | 19 ++-- .../java_string_library_preprocess.h | 13 +++ 5 files changed, 138 insertions(+), 16 deletions(-) diff --git a/src/java_bytecode/java_bytecode_convert_method.cpp b/src/java_bytecode/java_bytecode_convert_method.cpp index c2bf40bfbe9..ac6bceab861 100644 --- a/src/java_bytecode/java_bytecode_convert_method.cpp +++ b/src/java_bytecode/java_bytecode_convert_method.cpp @@ -2844,6 +2844,96 @@ codet java_bytecode_convert_methodt::convert_instructions( return code; } +/// This uses a cut-down version of the logic in +/// java_bytecode_convert_methodt::convert to initialize symbols for the +/// parameters and update the parameters in the type of method_symbol with +/// names read from the local_variable_table read from the bytecode +/// \remarks This is useful for pre-initialization for methods generated by +/// the string solver +/// \param method_symbol: The symbol for the method for which to initialize the +/// parameters +/// \param local_variable_table: The local variable table containing the +/// bytecode for the parameters +/// \param symbol_table: The symbol table into which to insert symbols for the +/// parameters +void java_bytecode_initialize_parameter_names( + symbolt &method_symbol, + const java_bytecode_parse_treet::methodt::local_variable_tablet + &local_variable_table, + symbol_table_baset &symbol_table) +{ + // Obtain a std::vector of code_typet::parametert objects from the + // (function) type of the symbol + code_typet &code_type = to_code_type(method_symbol.type); + code_typet::parameterst ¶meters = code_type.parameters(); + + // Find number of parameters + unsigned slots_for_parameters = java_method_parameter_slots(code_type); + + // Find parameter names in the local variable table: + typedef std::pair base_name_and_identifiert; + std::map param_names; + for(const auto &v : local_variable_table) + { + if(v.index < slots_for_parameters) + param_names.emplace( + v.index, + make_pair( + v.name, id2string(method_symbol.name) + "::" + id2string(v.name))); + } + + // Assign names to parameters + std::size_t param_index = 0; + for(auto ¶m : parameters) + { + irep_idt base_name, identifier; + + // Construct a sensible base name (base_name) and a fully qualified name + // (identifier) for every parameter of the method under translation, + // regardless of whether we have an LVT or not; and assign it to the + // parameter object (which is stored in the type of the symbol, not in the + // symbol table) + if(param_index == 0 && param.get_this()) + { + // my.package.ClassName.myMethodName:(II)I::this + base_name = "this"; + identifier = id2string(method_symbol.name) + "::" + id2string(base_name); + } + else + { + auto param_name = param_names.find(param_index); + if(param_name != param_names.end()) + { + base_name = param_name->second.first; + identifier = param_name->second.second; + } + else + { + // my.package.ClassName.myMethodName:(II)I::argNT, where N is the local + // variable slot where the parameter is stored and T is a character + // indicating the type + const typet &type = param.type(); + char suffix = java_char_from_type(type); + base_name = "arg" + std::to_string(param_index) + suffix; + identifier = + id2string(method_symbol.name) + "::" + id2string(base_name); + } + } + param.set_base_name(base_name); + param.set_identifier(identifier); + + // Build a new symbol for the parameter and add it to the symbol table + parameter_symbolt parameter_symbol; + parameter_symbol.base_name = base_name; + parameter_symbol.mode = ID_java; + parameter_symbol.name = identifier; + parameter_symbol.type = param.type(); + symbol_table.insert(parameter_symbol); + + param_index += java_local_variable_slots(param.type()); + } +} + void java_bytecode_convert_method( const symbolt &class_symbol, const java_bytecode_parse_treet::methodt &method, diff --git a/src/java_bytecode/java_bytecode_convert_method.h b/src/java_bytecode/java_bytecode_convert_method.h index 6693978c026..b27fd1cf898 100644 --- a/src/java_bytecode/java_bytecode_convert_method.h +++ b/src/java_bytecode/java_bytecode_convert_method.h @@ -21,6 +21,12 @@ Author: Daniel Kroening, kroening@kroening.com class class_hierarchyt; +void java_bytecode_initialize_parameter_names( + symbolt &method_symbol, + const java_bytecode_parse_treet::methodt::local_variable_tablet + &local_variable_table, + symbol_table_baset &symbol_table); + void java_bytecode_convert_method( const symbolt &class_symbol, const java_bytecode_parse_treet::methodt &, diff --git a/src/java_bytecode/java_bytecode_language.cpp b/src/java_bytecode/java_bytecode_language.cpp index fcaf134415d..39fb3a677be 100644 --- a/src/java_bytecode/java_bytecode_language.cpp +++ b/src/java_bytecode/java_bytecode_language.cpp @@ -256,11 +256,9 @@ bool java_bytecode_languaget::typecheck( convert_single_method(method_sig.first, journalling_symbol_table); } // Now convert all newly added string methods - id_sett string_methods; - string_preprocess.get_all_function_names(string_methods); for(const auto &fn_name : journalling_symbol_table.get_inserted()) { - if(string_methods.count(fn_name) != 0) + if(string_preprocess.implements_function(fn_name)) convert_single_method(fn_name, symbol_table); } } @@ -392,17 +390,29 @@ bool java_bytecode_languaget::convert_single_method( if(symbol.value.is_not_nil()) return false; - exprt generated_code = - string_preprocess.code_for_function(symbol, symbol_table); - if(generated_code.is_not_nil()) + // Get bytecode for specified function if we have it + method_bytecodet::opt_reft cmb = method_bytecode.get(function_id); + + // Check if have a string solver implementation + if(string_preprocess.implements_function(function_id)) { + symbolt &symbol = symbol_table.get_writeable_ref(function_id); + // Load parameter names from any extant bytecode before filling in body + if(cmb) + { + java_bytecode_initialize_parameter_names( + symbol, cmb->get().method.local_variable_table, symbol_table); + } // Populate body of the function with code generated by string preprocess - symbol_table.get_writeable_ref(function_id).value = generated_code; + exprt generated_code = + string_preprocess.code_for_function(symbol, symbol_table); + INVARIANT( + generated_code.is_not_nil(), "Couldn't retrieve code for string method"); + symbol.value = generated_code; return false; } // No string solver implementation, check if have bytecode for it - method_bytecodet::opt_reft cmb = method_bytecode.get(function_id); if(cmb) { java_bytecode_convert_method( diff --git a/src/java_bytecode/java_string_library_preprocess.cpp b/src/java_bytecode/java_string_library_preprocess.cpp index ae0496bcf6b..6cee9ea59b8 100644 --- a/src/java_bytecode/java_string_library_preprocess.cpp +++ b/src/java_bytecode/java_string_library_preprocess.cpp @@ -1758,6 +1758,16 @@ codet java_string_library_preprocesst::make_string_length_code( return code_returnt(get_length(deref, symbol_table)); } +bool java_string_library_preprocesst::implements_function( + const irep_idt &function_id) const +{ + for(const id_mapt *map : id_maps) + if(map->count(function_id) != 0) + return true; + + return conversion_table.count(function_id) != 0; +} + template void add_keys_to_container(const TMap &map, TContainer &container) { @@ -1775,14 +1785,7 @@ void add_keys_to_container(const TMap &map, TContainer &container) void java_string_library_preprocesst::get_all_function_names( id_sett &methods) const { - const id_mapt *maps[] = { - &cprover_equivalent_to_java_function, - &cprover_equivalent_to_java_string_returning_function, - &cprover_equivalent_to_java_constructor, - &cprover_equivalent_to_java_assign_and_return_function, - &cprover_equivalent_to_java_assign_function, - }; - for(const id_mapt *map : maps) + for(const id_mapt *map : id_maps) add_keys_to_container(*map, methods); add_keys_to_container(conversion_table, methods); diff --git a/src/java_bytecode/java_string_library_preprocess.h b/src/java_bytecode/java_string_library_preprocess.h index 73f88352b50..59b05304f28 100644 --- a/src/java_bytecode/java_string_library_preprocess.h +++ b/src/java_bytecode/java_string_library_preprocess.h @@ -22,6 +22,7 @@ Date: March 2017 #include // should get rid of this +#include #include #include #include "character_refine_preprocess.h" @@ -45,6 +46,7 @@ class java_string_library_preprocesst:public messaget void initialize_conversion_table(); void initialize_refined_string_type(); + bool implements_function(const irep_idt &function_id) const; void get_all_function_names(id_sett &methods) const; exprt @@ -126,6 +128,17 @@ class java_string_library_preprocesst:public messaget // of returning it id_mapt cprover_equivalent_to_java_assign_function; + const std::array id_maps = std::array + { + { + &cprover_equivalent_to_java_function, + &cprover_equivalent_to_java_string_returning_function, + &cprover_equivalent_to_java_constructor, + &cprover_equivalent_to_java_assign_and_return_function, + &cprover_equivalent_to_java_assign_function, + } + }; + // A set tells us what java types should be considered as string objects std::unordered_set string_types; From ea7646bfb252657e462a34fcebef17e60e79ed5c Mon Sep 17 00:00:00 2001 From: Nathan Phillips Date: Fri, 1 Dec 2017 16:08:15 +0000 Subject: [PATCH 7/7] Collect string solver function calls Gather up function calls made by string solver into the list of needed functions --- src/java_bytecode/java_bytecode_language.cpp | 24 ++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/src/java_bytecode/java_bytecode_language.cpp b/src/java_bytecode/java_bytecode_language.cpp index 39fb3a677be..21d9d4ce123 100644 --- a/src/java_bytecode/java_bytecode_language.cpp +++ b/src/java_bytecode/java_bytecode_language.cpp @@ -14,6 +14,7 @@ Author: Daniel Kroening, kroening@kroening.com #include #include #include +#include #include #include #include @@ -408,6 +409,29 @@ bool java_bytecode_languaget::convert_single_method( string_preprocess.code_for_function(symbol, symbol_table); INVARIANT( generated_code.is_not_nil(), "Couldn't retrieve code for string method"); + // String solver can make calls to functions that haven't yet been seen. + // Add these to the needed_lazy_methods collection + if(needed_lazy_methods) + { + for(const_depth_iteratort it = generated_code.depth_cbegin(); + it != generated_code.depth_cend(); + ++it) + { + if(it->id() == ID_code) + { + const auto fn_call = expr_try_dynamic_cast(*it); + if(!fn_call) + continue; + // Only support non-virtual function calls for now, if string solver + // starts to introduce virtual function calls then we will need to + // duplicate the behavior of java_bytecode_convert_method where it + // handles the invokevirtual instruction + const symbol_exprt &fn_sym = + expr_dynamic_cast(fn_call->function()); + needed_lazy_methods->add_needed_method(fn_sym.get_identifier()); + } + } + } symbol.value = generated_code; return false; }