Skip to content

Commit 516f109

Browse files
Merge pull request #6458 from thomasspriggs/tas/type_check_smt_response
Add validation of smt parse trees to construct smt_responset instances.
2 parents cb8ad8e + 6c3d1da commit 516f109

File tree

12 files changed

+673
-79
lines changed

12 files changed

+673
-79
lines changed

src/solvers/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,7 @@ SRC = $(BOOLEFORCE_SRC) \
199199
smt2_incremental/smt_core_theory.cpp \
200200
smt2_incremental/smt_logics.cpp \
201201
smt2_incremental/smt_options.cpp \
202+
smt2_incremental/smt_response_validation.cpp \
202203
smt2_incremental/smt_responses.cpp \
203204
smt2_incremental/smt_solver_process.cpp \
204205
smt2_incremental/smt_sorts.cpp \
Lines changed: 328 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,328 @@
1+
// Author: Diffblue Ltd.
2+
3+
/// \file
4+
///
5+
/// Validation of smt response parse trees to produce either a strongly typed
6+
/// `smt_responset` representation, or a set of error messages.
7+
///
8+
/// \note
9+
///
10+
/// Functions named with the prefix `validate_` require the given parse tree to
11+
/// be a particular kind of sub tree. Functions named with the prefix `valid_`
12+
/// are called in places where the exact kind of sub-tree expected is unknown
13+
/// and so the function must determine if the sub-tree is of that type at all,
14+
/// before performing validation of it. These functions will return a
15+
/// `response_or_errort` in the case where the parse tree is of that type or
16+
/// an empty optional otherwise.
17+
18+
#include <solvers/smt2_incremental/smt_response_validation.h>
19+
20+
#include <util/mp_arith.h>
21+
#include <util/range.h>
22+
23+
#include <regex>
24+
25+
template <class smtt>
26+
response_or_errort<smtt>::response_or_errort(smtt smt) : smt{std::move(smt)}
27+
{
28+
}
29+
30+
template <class smtt>
31+
response_or_errort<smtt>::response_or_errort(std::string message)
32+
: messages{std::move(message)}
33+
{
34+
}
35+
36+
template <class smtt>
37+
response_or_errort<smtt>::response_or_errort(std::vector<std::string> messages)
38+
: messages{std::move(messages)}
39+
{
40+
}
41+
42+
template <class smtt>
43+
const smtt *response_or_errort<smtt>::get_if_valid() const
44+
{
45+
INVARIANT(
46+
smt.has_value() == messages.empty(),
47+
"The response_or_errort class must be in the valid state or error state, "
48+
"exclusively.");
49+
return smt.has_value() ? &smt.value() : nullptr;
50+
}
51+
52+
template <class smtt>
53+
const std::vector<std::string> *response_or_errort<smtt>::get_if_error() const
54+
{
55+
INVARIANT(
56+
smt.has_value() == messages.empty(),
57+
"The response_or_errort class must be in the valid state or error state, "
58+
"exclusively.");
59+
return smt.has_value() ? nullptr : &messages;
60+
}
61+
62+
template class response_or_errort<smt_responset>;
63+
64+
// Implementation detail of `collect_messages` below.
65+
template <typename argumentt, typename... argumentst>
66+
void collect_messages_impl(
67+
std::vector<std::string> &collected_messages,
68+
argumentt &&argument)
69+
{
70+
if(const auto messages = argument.get_if_error())
71+
{
72+
collected_messages.insert(
73+
collected_messages.end(), messages->cbegin(), messages->end());
74+
}
75+
}
76+
77+
// Implementation detail of `collect_messages` below.
78+
template <typename argumentt, typename... argumentst>
79+
void collect_messages_impl(
80+
std::vector<std::string> &collected_messages,
81+
argumentt &&argument,
82+
argumentst &&... arguments)
83+
{
84+
collect_messages_impl(collected_messages, argument);
85+
collect_messages_impl(collected_messages, arguments...);
86+
}
87+
88+
/// Builds a collection of messages composed all messages in the
89+
/// `response_or_errort` typed arguments in \p arguments. This is a templated
90+
/// function in order to handle `response_or_errort` instances which are
91+
/// specialised differently.
92+
template <typename... argumentst>
93+
std::vector<std::string> collect_messages(argumentst &&... arguments)
94+
{
95+
std::vector<std::string> collected_messages;
96+
collect_messages_impl(collected_messages, arguments...);
97+
return collected_messages;
98+
}
99+
100+
/// \brief Given a class to construct and a set of arguments to its constructor
101+
/// which may include errors, either return the collected errors if there are
102+
/// any or construct the class otherwise.
103+
/// \tparam smt_to_constructt
104+
/// The class to construct.
105+
/// \tparam smt_baset
106+
/// If the class to construct should be upcast to a base class before being
107+
/// stored in the `response_or_errort`, then the base class should be supplied
108+
/// in this parameter. If no upcast is required, then this should be left
109+
/// empty.
110+
/// \tparam argumentst
111+
/// The pack of argument types matching the constructor of
112+
/// `smt_to_constructt`. These must each be packed into an instance of
113+
/// `response_or_errort`.
114+
template <
115+
typename smt_to_constructt,
116+
typename smt_baset = smt_to_constructt,
117+
typename... argumentst>
118+
response_or_errort<smt_baset> validation_propagating(argumentst &&... arguments)
119+
{
120+
const auto collected_messages = collect_messages(arguments...);
121+
if(!collected_messages.empty())
122+
return response_or_errort<smt_baset>(collected_messages);
123+
else
124+
{
125+
return response_or_errort<smt_baset>{
126+
smt_to_constructt{(*arguments.get_if_valid())...}};
127+
}
128+
}
129+
130+
/// Produces a human-readable representation of the given \p parse_tree, for use
131+
/// in error messaging.
132+
/// \note This is currently implemented using `pretty`, but this function is
133+
/// used instead of calling `pretty` directly so that will be more straight
134+
/// forward to replace with an implementation specific to our use case which
135+
/// is more easily readable by users of CBMC.
136+
static std::string print_parse_tree(const irept &parse_tree)
137+
{
138+
return parse_tree.pretty(0, 0);
139+
}
140+
141+
static response_or_errort<irep_idt>
142+
validate_string_literal(const irept &parse_tree)
143+
{
144+
if(!parse_tree.get_sub().empty())
145+
{
146+
return response_or_errort<irep_idt>(
147+
"Expected string literal, found \"" + print_parse_tree(parse_tree) +
148+
"\".");
149+
}
150+
return response_or_errort<irep_idt>{parse_tree.id()};
151+
}
152+
153+
/// \returns: A response or error in the case where the parse tree appears to be
154+
/// a get-value command. Returns empty otherwise.
155+
/// \note: Because this kind of response does not start with an identifying
156+
/// keyword, it will be considered that the response is intended to be a
157+
/// get-value response if it is composed of a collection of one or more pairs.
158+
static optionalt<response_or_errort<smt_responset>>
159+
valid_smt_error_response(const irept &parse_tree)
160+
{
161+
// Check if the parse tree looks to be an error response.
162+
if(!parse_tree.id().empty())
163+
return {};
164+
if(parse_tree.get_sub().empty())
165+
return {};
166+
if(parse_tree.get_sub().at(0).id() != "error")
167+
return {};
168+
// Parse tree is now considered to be an error response and anything
169+
// unexpected in the parse tree is now considered to be an invalid response.
170+
if(parse_tree.get_sub().size() == 1)
171+
{
172+
return {response_or_errort<smt_responset>{
173+
"Error response is missing the error message."}};
174+
}
175+
if(parse_tree.get_sub().size() > 2)
176+
{
177+
return {response_or_errort<smt_responset>{
178+
"Error response has multiple error messages - \"" +
179+
print_parse_tree(parse_tree) + "\"."}};
180+
}
181+
return validation_propagating<smt_error_responset, smt_responset>(
182+
validate_string_literal(parse_tree.get_sub()[1]));
183+
}
184+
185+
static bool all_subs_are_pairs(const irept &parse_tree)
186+
{
187+
return std::all_of(
188+
parse_tree.get_sub().cbegin(),
189+
parse_tree.get_sub().cend(),
190+
[](const irept &sub) { return sub.get_sub().size() == 2; });
191+
}
192+
193+
static response_or_errort<irep_idt>
194+
validate_smt_identifier(const irept &parse_tree)
195+
{
196+
if(!parse_tree.get_sub().empty() || parse_tree.id().empty())
197+
{
198+
return response_or_errort<irep_idt>(
199+
"Expected identifier, found - \"" + print_parse_tree(parse_tree) + "\".");
200+
}
201+
return response_or_errort<irep_idt>(parse_tree.id());
202+
}
203+
204+
static optionalt<smt_termt> valid_smt_bool(const irept &parse_tree)
205+
{
206+
if(!parse_tree.get_sub().empty())
207+
return {};
208+
if(parse_tree.id() == ID_true)
209+
return {smt_bool_literal_termt{true}};
210+
if(parse_tree.id() == ID_false)
211+
return {smt_bool_literal_termt{false}};
212+
return {};
213+
}
214+
215+
static optionalt<smt_termt> valid_smt_binary(const std::string &text)
216+
{
217+
static const std::regex binary_format{"#b[01]+"};
218+
if(!std::regex_match(text, binary_format))
219+
return {};
220+
const mp_integer value = string2integer({text.begin() + 2, text.end()}, 2);
221+
// Width is number of bit values minus the "#b" prefix.
222+
const std::size_t width = text.size() - 2;
223+
return {smt_bit_vector_constant_termt{value, width}};
224+
}
225+
226+
static optionalt<smt_termt> valid_smt_hex(const std::string &text)
227+
{
228+
static const std::regex hex_format{"#x[0-9A-Za-z]+"};
229+
if(!std::regex_match(text, hex_format))
230+
return {};
231+
const std::string hex{text.begin() + 2, text.end()};
232+
// SMT-LIB 2 allows hex characters to be upper of lower case, but they should
233+
// be upper case for mp_integer.
234+
const mp_integer value =
235+
string2integer(make_range(hex).map<std::function<int(int)>>(toupper), 16);
236+
const std::size_t width = (text.size() - 2) * 4;
237+
return {smt_bit_vector_constant_termt{value, width}};
238+
}
239+
240+
static optionalt<smt_termt>
241+
valid_smt_bit_vector_constant(const irept &parse_tree)
242+
{
243+
if(!parse_tree.get_sub().empty() || parse_tree.id().empty())
244+
return {};
245+
const auto value_string = id2string(parse_tree.id());
246+
if(const auto smt_binary = valid_smt_binary(value_string))
247+
return *smt_binary;
248+
if(const auto smt_hex = valid_smt_hex(value_string))
249+
return *smt_hex;
250+
return {};
251+
}
252+
253+
static response_or_errort<smt_termt> validate_term(const irept &parse_tree)
254+
{
255+
if(const auto smt_bool = valid_smt_bool(parse_tree))
256+
return response_or_errort<smt_termt>{*smt_bool};
257+
if(const auto bit_vector_constant = valid_smt_bit_vector_constant(parse_tree))
258+
return response_or_errort<smt_termt>{*bit_vector_constant};
259+
return response_or_errort<smt_termt>{"Unrecognised SMT term - \"" +
260+
print_parse_tree(parse_tree) + "\"."};
261+
}
262+
263+
static response_or_errort<smt_get_value_responset::valuation_pairt>
264+
validate_valuation_pair(const irept &pair_parse_tree)
265+
{
266+
PRECONDITION(pair_parse_tree.get_sub().size() == 2);
267+
const auto &descriptor = pair_parse_tree.get_sub()[0];
268+
const auto &value = pair_parse_tree.get_sub()[1];
269+
return validation_propagating<smt_get_value_responset::valuation_pairt>(
270+
validate_smt_identifier(descriptor), validate_term(value));
271+
}
272+
273+
/// \returns: A response or error in the case where the parse tree appears to be
274+
/// a get-value command. Returns empty otherwise.
275+
/// \note: Because this kind of response does not start with an identifying
276+
/// keyword, it will be considered that the response is intended to be a
277+
/// get-value response if it is composed of a collection of one or more pairs.
278+
static optionalt<response_or_errort<smt_responset>>
279+
valid_smt_get_value_response(const irept &parse_tree)
280+
{
281+
// Shape matching for does this look like a get value response?
282+
if(!parse_tree.id().empty())
283+
return {};
284+
if(parse_tree.get_sub().empty())
285+
return {};
286+
if(!all_subs_are_pairs(parse_tree))
287+
return {};
288+
std::vector<std::string> error_messages;
289+
std::vector<smt_get_value_responset::valuation_pairt> valuation_pairs;
290+
for(const auto &pair : parse_tree.get_sub())
291+
{
292+
const auto pair_validation_result = validate_valuation_pair(pair);
293+
if(const auto error = pair_validation_result.get_if_error())
294+
error_messages.insert(error_messages.end(), error->begin(), error->end());
295+
if(const auto valid_pair = pair_validation_result.get_if_valid())
296+
valuation_pairs.push_back(*valid_pair);
297+
}
298+
if(!error_messages.empty())
299+
return {response_or_errort<smt_responset>{std::move(error_messages)}};
300+
else
301+
{
302+
return {response_or_errort<smt_responset>{
303+
smt_get_value_responset{valuation_pairs}}};
304+
}
305+
}
306+
307+
response_or_errort<smt_responset> validate_smt_response(const irept &parse_tree)
308+
{
309+
if(parse_tree.id() == "sat")
310+
return response_or_errort<smt_responset>{
311+
smt_check_sat_responset{smt_sat_responset{}}};
312+
if(parse_tree.id() == "unsat")
313+
return response_or_errort<smt_responset>{
314+
smt_check_sat_responset{smt_unsat_responset{}}};
315+
if(parse_tree.id() == "unknown")
316+
return response_or_errort<smt_responset>{
317+
smt_check_sat_responset{smt_unknown_responset{}}};
318+
if(const auto error_response = valid_smt_error_response(parse_tree))
319+
return *error_response;
320+
if(parse_tree.id() == "success")
321+
return response_or_errort<smt_responset>{smt_success_responset{}};
322+
if(parse_tree.id() == "unsupported")
323+
return response_or_errort<smt_responset>{smt_unsupported_responset{}};
324+
if(const auto get_value_response = valid_smt_get_value_response(parse_tree))
325+
return *get_value_response;
326+
return response_or_errort<smt_responset>{"Invalid SMT response \"" +
327+
id2string(parse_tree.id()) + "\""};
328+
}
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
// Author: Diffblue Ltd.
2+
3+
#ifndef CPROVER_SOLVERS_SMT2_INCREMENTAL_SMT_RESPONSE_VALIDATION_H
4+
#define CPROVER_SOLVERS_SMT2_INCREMENTAL_SMT_RESPONSE_VALIDATION_H
5+
6+
#include <solvers/smt2_incremental/smt_responses.h>
7+
#include <util/invariant.h>
8+
#include <util/optional.h>
9+
10+
#include <string>
11+
#include <vector>
12+
13+
/// Holds either a valid parsed response or response sub-tree of type \tparam
14+
/// smtt or a collection of message strings explaining why the given input was
15+
/// not valid.
16+
template <class smtt>
17+
class response_or_errort final
18+
{
19+
public:
20+
explicit response_or_errort(smtt smt);
21+
explicit response_or_errort(std::string message);
22+
explicit response_or_errort(std::vector<std::string> messages);
23+
24+
/// \brief Gets the smt response if the response is valid, or returns nullptr
25+
/// otherwise.
26+
const smtt *get_if_valid() const;
27+
/// \brief Gets the error messages if the response is invalid, or returns
28+
/// nullptr otherwise.
29+
const std::vector<std::string> *get_if_error() const;
30+
31+
private:
32+
// The below two fields could be a single `std::variant` field, if there was
33+
// an implementation of it available in the cbmc repository. However at the
34+
// time of writing we are targeting C++11, `std::variant` was introduced in
35+
// C++17 and we have no backported version.
36+
optionalt<smtt> smt;
37+
std::vector<std::string> messages;
38+
};
39+
40+
response_or_errort<smt_responset>
41+
validate_smt_response(const irept &parse_tree);
42+
43+
#endif // CPROVER_SOLVERS_SMT2_INCREMENTAL_SMT_RESPONSE_VALIDATION_H

0 commit comments

Comments
 (0)