Skip to content

Add validation of smt parse trees to construct smt_responset instances. #6458

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/solvers/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,7 @@ SRC = $(BOOLEFORCE_SRC) \
smt2_incremental/smt_core_theory.cpp \
smt2_incremental/smt_logics.cpp \
smt2_incremental/smt_options.cpp \
smt2_incremental/smt_response_validation.cpp \
smt2_incremental/smt_responses.cpp \
smt2_incremental/smt_solver_process.cpp \
smt2_incremental/smt_sorts.cpp \
Expand Down
328 changes: 328 additions & 0 deletions src/solvers/smt2_incremental/smt_response_validation.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,328 @@
// Author: Diffblue Ltd.

/// \file
///
/// Validation of smt response parse trees to produce either a strongly typed
/// `smt_responset` representation, or a set of error messages.
///
/// \note
///
/// Functions named with the prefix `validate_` require the given parse tree to
/// be a particular kind of sub tree. Functions named with the prefix `valid_`
/// are called in places where the exact kind of sub-tree expected is unknown
/// and so the function must determine if the sub-tree is of that type at all,
/// before performing validation of it. These functions will return a
/// `response_or_errort` in the case where the parse tree is of that type or
/// an empty optional otherwise.

#include <solvers/smt2_incremental/smt_response_validation.h>

#include <util/mp_arith.h>
#include <util/range.h>

#include <regex>

template <class smtt>
response_or_errort<smtt>::response_or_errort(smtt smt) : smt{std::move(smt)}
{
}

template <class smtt>
response_or_errort<smtt>::response_or_errort(std::string message)
: messages{std::move(message)}
{
}

template <class smtt>
response_or_errort<smtt>::response_or_errort(std::vector<std::string> messages)
: messages{std::move(messages)}
{
}

template <class smtt>
const smtt *response_or_errort<smtt>::get_if_valid() const
{
INVARIANT(
smt.has_value() == messages.empty(),
"The response_or_errort class must be in the valid state or error state, "
"exclusively.");
return smt.has_value() ? &smt.value() : nullptr;
}

template <class smtt>
const std::vector<std::string> *response_or_errort<smtt>::get_if_error() const
{
INVARIANT(
smt.has_value() == messages.empty(),
"The response_or_errort class must be in the valid state or error state, "
"exclusively.");
return smt.has_value() ? nullptr : &messages;
}

template class response_or_errort<smt_responset>;

// Implementation detail of `collect_messages` below.
template <typename argumentt, typename... argumentst>
void collect_messages_impl(
std::vector<std::string> &collected_messages,
argumentt &&argument)
{
if(const auto messages = argument.get_if_error())
{
collected_messages.insert(
collected_messages.end(), messages->cbegin(), messages->end());
}
}

// Implementation detail of `collect_messages` below.
template <typename argumentt, typename... argumentst>
void collect_messages_impl(
std::vector<std::string> &collected_messages,
argumentt &&argument,
argumentst &&... arguments)
{
collect_messages_impl(collected_messages, argument);
collect_messages_impl(collected_messages, arguments...);
}

/// Builds a collection of messages composed all messages in the
/// `response_or_errort` typed arguments in \p arguments. This is a templated
/// function in order to handle `response_or_errort` instances which are
/// specialised differently.
template <typename... argumentst>
std::vector<std::string> collect_messages(argumentst &&... arguments)
{
std::vector<std::string> collected_messages;
collect_messages_impl(collected_messages, arguments...);
return collected_messages;
}

/// \brief Given a class to construct and a set of arguments to its constructor
/// which may include errors, either return the collected errors if there are
/// any or construct the class otherwise.
/// \tparam smt_to_constructt
/// The class to construct.
/// \tparam smt_baset
/// If the class to construct should be upcast to a base class before being
/// stored in the `response_or_errort`, then the base class should be supplied
/// in this parameter. If no upcast is required, then this should be left
/// empty.
/// \tparam argumentst
/// The pack of argument types matching the constructor of
/// `smt_to_constructt`. These must each be packed into an instance of
/// `response_or_errort`.
template <
typename smt_to_constructt,
typename smt_baset = smt_to_constructt,
typename... argumentst>
response_or_errort<smt_baset> validation_propagating(argumentst &&... arguments)
{
const auto collected_messages = collect_messages(arguments...);
if(!collected_messages.empty())
return response_or_errort<smt_baset>(collected_messages);
else
{
return response_or_errort<smt_baset>{
smt_to_constructt{(*arguments.get_if_valid())...}};
}
}

/// Produces a human-readable representation of the given \p parse_tree, for use
/// in error messaging.
/// \note This is currently implemented using `pretty`, but this function is
/// used instead of calling `pretty` directly so that will be more straight
/// forward to replace with an implementation specific to our use case which
/// is more easily readable by users of CBMC.
static std::string print_parse_tree(const irept &parse_tree)
{
return parse_tree.pretty(0, 0);
}

static response_or_errort<irep_idt>
validate_string_literal(const irept &parse_tree)
{
if(!parse_tree.get_sub().empty())
{
return response_or_errort<irep_idt>(
"Expected string literal, found \"" + print_parse_tree(parse_tree) +
"\".");
}
return response_or_errort<irep_idt>{parse_tree.id()};
}

/// \returns: A response or error in the case where the parse tree appears to be
/// a get-value command. Returns empty otherwise.
/// \note: Because this kind of response does not start with an identifying
/// keyword, it will be considered that the response is intended to be a
/// get-value response if it is composed of a collection of one or more pairs.
static optionalt<response_or_errort<smt_responset>>
valid_smt_error_response(const irept &parse_tree)
{
// Check if the parse tree looks to be an error response.
if(!parse_tree.id().empty())
return {};
if(parse_tree.get_sub().empty())
return {};
if(parse_tree.get_sub().at(0).id() != "error")
return {};
// Parse tree is now considered to be an error response and anything
// unexpected in the parse tree is now considered to be an invalid response.
if(parse_tree.get_sub().size() == 1)
{
return {response_or_errort<smt_responset>{
"Error response is missing the error message."}};
}
if(parse_tree.get_sub().size() > 2)
{
return {response_or_errort<smt_responset>{
"Error response has multiple error messages - \"" +
print_parse_tree(parse_tree) + "\"."}};
}
return validation_propagating<smt_error_responset, smt_responset>(
validate_string_literal(parse_tree.get_sub()[1]));
}

static bool all_subs_are_pairs(const irept &parse_tree)
{
return std::all_of(
parse_tree.get_sub().cbegin(),
parse_tree.get_sub().cend(),
[](const irept &sub) { return sub.get_sub().size() == 2; });
}

static response_or_errort<irep_idt>
validate_smt_identifier(const irept &parse_tree)
{
if(!parse_tree.get_sub().empty() || parse_tree.id().empty())
{
return response_or_errort<irep_idt>(
"Expected identifier, found - \"" + print_parse_tree(parse_tree) + "\".");
}
return response_or_errort<irep_idt>(parse_tree.id());
}

static optionalt<smt_termt> valid_smt_bool(const irept &parse_tree)
{
if(!parse_tree.get_sub().empty())
return {};
if(parse_tree.id() == ID_true)
return {smt_bool_literal_termt{true}};
if(parse_tree.id() == ID_false)
return {smt_bool_literal_termt{false}};
return {};
}

static optionalt<smt_termt> valid_smt_binary(const std::string &text)
{
static const std::regex binary_format{"#b[01]+"};
if(!std::regex_match(text, binary_format))
return {};
const mp_integer value = string2integer({text.begin() + 2, text.end()}, 2);
// Width is number of bit values minus the "#b" prefix.
const std::size_t width = text.size() - 2;
return {smt_bit_vector_constant_termt{value, width}};
}

static optionalt<smt_termt> valid_smt_hex(const std::string &text)
{
static const std::regex hex_format{"#x[0-9A-Za-z]+"};
if(!std::regex_match(text, hex_format))
return {};
const std::string hex{text.begin() + 2, text.end()};
// SMT-LIB 2 allows hex characters to be upper of lower case, but they should
// be upper case for mp_integer.
const mp_integer value =
string2integer(make_range(hex).map<std::function<int(int)>>(toupper), 16);
const std::size_t width = (text.size() - 2) * 4;
return {smt_bit_vector_constant_termt{value, width}};
}

static optionalt<smt_termt>
valid_smt_bit_vector_constant(const irept &parse_tree)
{
if(!parse_tree.get_sub().empty() || parse_tree.id().empty())
return {};
const auto value_string = id2string(parse_tree.id());
if(const auto smt_binary = valid_smt_binary(value_string))
return *smt_binary;
if(const auto smt_hex = valid_smt_hex(value_string))
return *smt_hex;
return {};
}

static response_or_errort<smt_termt> validate_term(const irept &parse_tree)
{
if(const auto smt_bool = valid_smt_bool(parse_tree))
return response_or_errort<smt_termt>{*smt_bool};
if(const auto bit_vector_constant = valid_smt_bit_vector_constant(parse_tree))
return response_or_errort<smt_termt>{*bit_vector_constant};
return response_or_errort<smt_termt>{"Unrecognised SMT term - \"" +
print_parse_tree(parse_tree) + "\"."};
}

static response_or_errort<smt_get_value_responset::valuation_pairt>
validate_valuation_pair(const irept &pair_parse_tree)
{
PRECONDITION(pair_parse_tree.get_sub().size() == 2);
const auto &descriptor = pair_parse_tree.get_sub()[0];
const auto &value = pair_parse_tree.get_sub()[1];
return validation_propagating<smt_get_value_responset::valuation_pairt>(
validate_smt_identifier(descriptor), validate_term(value));
}

/// \returns: A response or error in the case where the parse tree appears to be
/// a get-value command. Returns empty otherwise.
/// \note: Because this kind of response does not start with an identifying
/// keyword, it will be considered that the response is intended to be a
/// get-value response if it is composed of a collection of one or more pairs.
static optionalt<response_or_errort<smt_responset>>
valid_smt_get_value_response(const irept &parse_tree)
{
// Shape matching for does this look like a get value response?
if(!parse_tree.id().empty())
return {};
if(parse_tree.get_sub().empty())
return {};
if(!all_subs_are_pairs(parse_tree))
return {};
std::vector<std::string> error_messages;
std::vector<smt_get_value_responset::valuation_pairt> valuation_pairs;
for(const auto &pair : parse_tree.get_sub())
{
const auto pair_validation_result = validate_valuation_pair(pair);
if(const auto error = pair_validation_result.get_if_error())
error_messages.insert(error_messages.end(), error->begin(), error->end());
if(const auto valid_pair = pair_validation_result.get_if_valid())
valuation_pairs.push_back(*valid_pair);
}
if(!error_messages.empty())
return {response_or_errort<smt_responset>{std::move(error_messages)}};
else
{
return {response_or_errort<smt_responset>{
smt_get_value_responset{valuation_pairs}}};
}
}

response_or_errort<smt_responset> validate_smt_response(const irept &parse_tree)
{
if(parse_tree.id() == "sat")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⛏️ Why not switch (parse_tree.id()) { case "sat": }. Might make it a bit more readable?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because you can't switch on a string in C++.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

😱

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But you can switch on a/the first character... so if you want a big nested tree of case statements... ;)

return response_or_errort<smt_responset>{
smt_check_sat_responset{smt_sat_responset{}}};
if(parse_tree.id() == "unsat")
return response_or_errort<smt_responset>{
smt_check_sat_responset{smt_unsat_responset{}}};
if(parse_tree.id() == "unknown")
return response_or_errort<smt_responset>{
smt_check_sat_responset{smt_unknown_responset{}}};
if(const auto error_response = valid_smt_error_response(parse_tree))
return *error_response;
if(parse_tree.id() == "success")
return response_or_errort<smt_responset>{smt_success_responset{}};
if(parse_tree.id() == "unsupported")
return response_or_errort<smt_responset>{smt_unsupported_responset{}};
if(const auto get_value_response = valid_smt_get_value_response(parse_tree))
return *get_value_response;
return response_or_errort<smt_responset>{"Invalid SMT response \"" +
id2string(parse_tree.id()) + "\""};
}
43 changes: 43 additions & 0 deletions src/solvers/smt2_incremental/smt_response_validation.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
// Author: Diffblue Ltd.

#ifndef CPROVER_SOLVERS_SMT2_INCREMENTAL_SMT_RESPONSE_VALIDATION_H
#define CPROVER_SOLVERS_SMT2_INCREMENTAL_SMT_RESPONSE_VALIDATION_H

#include <solvers/smt2_incremental/smt_responses.h>
#include <util/invariant.h>
#include <util/optional.h>

#include <string>
#include <vector>

/// Holds either a valid parsed response or response sub-tree of type \tparam
/// smtt or a collection of message strings explaining why the given input was
/// not valid.
template <class smtt>
class response_or_errort final
{
public:
explicit response_or_errort(smtt smt);
explicit response_or_errort(std::string message);
explicit response_or_errort(std::vector<std::string> messages);

/// \brief Gets the smt response if the response is valid, or returns nullptr
/// otherwise.
const smtt *get_if_valid() const;
/// \brief Gets the error messages if the response is invalid, or returns
/// nullptr otherwise.
const std::vector<std::string> *get_if_error() const;

private:
// The below two fields could be a single `std::variant` field, if there was
// an implementation of it available in the cbmc repository. However at the
// time of writing we are targeting C++11, `std::variant` was introduced in
// C++17 and we have no backported version.
optionalt<smtt> smt;
std::vector<std::string> messages;
};

response_or_errort<smt_responset>
validate_smt_response(const irept &parse_tree);

#endif // CPROVER_SOLVERS_SMT2_INCREMENTAL_SMT_RESPONSE_VALIDATION_H
Loading