-
Notifications
You must be signed in to change notification settings - Fork 1.6k
Serialize (and deserialize) string values #864
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,6 +19,8 @@ | |
#include <pb_decode.h> | ||
#include <pb_encode.h> | ||
|
||
#include <string> | ||
|
||
namespace firebase { | ||
namespace firestore { | ||
namespace remote { | ||
|
@@ -98,6 +100,47 @@ int64_t DecodeInteger(pb_istream_t* stream) { | |
return DecodeVarint(stream); | ||
} | ||
|
||
void EncodeString(pb_ostream_t* stream, const std::string& string_value) { | ||
bool status = pb_encode_string( | ||
stream, reinterpret_cast<const pb_byte_t*>(string_value.c_str()), | ||
string_value.length()); | ||
if (!status) { | ||
// TODO(rsgowman): figure out error handling | ||
abort(); | ||
} | ||
} | ||
|
||
std::string DecodeString(pb_istream_t* stream) { | ||
pb_istream_t substream; | ||
bool status = pb_make_string_substream(stream, &substream); | ||
if (!status) { | ||
// TODO(rsgowman): figure out error handling | ||
abort(); | ||
} | ||
|
||
std::string result(substream.bytes_left, '\0'); | ||
status = pb_read(&substream, reinterpret_cast<pb_byte_t*>(&result[0]), | ||
substream.bytes_left); | ||
if (!status) { | ||
// TODO(rsgowman): figure out error handling | ||
abort(); | ||
} | ||
|
||
// NB: future versions of nanopb read the remaining characters out of the | ||
// substream (and return false if that fails) as an additional safety | ||
// check within pb_close_string_substream. Unfortunately, that's not present | ||
// in the current version (0.38). We'll make a stronger assertion and check | ||
// to make sure there *are* no remaining characters in the substream. | ||
if (substream.bytes_left != 0) { | ||
// TODO(rsgowman): figure out error handling | ||
abort(); | ||
} | ||
|
||
pb_close_string_substream(stream, &substream); | ||
|
||
return result; | ||
} | ||
|
||
} // namespace | ||
|
||
using firebase::firestore::model::FieldValue; | ||
|
@@ -149,6 +192,16 @@ void Serializer::EncodeFieldValue(const FieldValue& field_value, | |
EncodeInteger(&stream, field_value.integer_value()); | ||
break; | ||
|
||
case FieldValue::Type::String: | ||
status = pb_encode_tag(&stream, PB_WT_STRING, | ||
google_firestore_v1beta1_Value_string_value_tag); | ||
if (!status) { | ||
// TODO(rsgowman): figure out error handling | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Optional nit: maybe do a helper function like There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Rather than do all that refactoring, I'd probably be better off to just implement error handling; it really isn't going to be all that complicated. I'll issue a followup PR. |
||
abort(); | ||
} | ||
EncodeString(&stream, field_value.string_value()); | ||
break; | ||
|
||
default: | ||
// TODO(rsgowman): implement the other types | ||
abort(); | ||
|
@@ -163,11 +216,32 @@ FieldValue Serializer::DecodeFieldValue(const uint8_t* bytes, size_t length) { | |
uint32_t tag; | ||
bool eof; | ||
bool status = pb_decode_tag(&stream, &wire_type, &tag, &eof); | ||
if (!status || wire_type != PB_WT_VARINT) { | ||
if (!status) { | ||
// TODO(rsgowman): figure out error handling | ||
abort(); | ||
} | ||
|
||
// Ensure the tag matches the wire type | ||
// TODO(rsgowman): figure out error handling | ||
switch (tag) { | ||
case google_firestore_v1beta1_Value_null_value_tag: | ||
case google_firestore_v1beta1_Value_boolean_value_tag: | ||
case google_firestore_v1beta1_Value_integer_value_tag: | ||
if (wire_type != PB_WT_VARINT) { | ||
abort(); | ||
} | ||
break; | ||
|
||
case google_firestore_v1beta1_Value_string_value_tag: | ||
if (wire_type != PB_WT_STRING) { | ||
abort(); | ||
} | ||
break; | ||
|
||
default: | ||
abort(); | ||
} | ||
|
||
switch (tag) { | ||
case google_firestore_v1beta1_Value_null_value_tag: | ||
DecodeNull(&stream); | ||
|
@@ -176,6 +250,8 @@ FieldValue Serializer::DecodeFieldValue(const uint8_t* bytes, size_t length) { | |
return FieldValue::BooleanValue(DecodeBool(&stream)); | ||
case google_firestore_v1beta1_Value_integer_value_tag: | ||
return FieldValue::IntegerValue(DecodeInteger(&stream)); | ||
case google_firestore_v1beta1_Value_string_value_tag: | ||
return FieldValue::StringValue(DecodeString(&stream)); | ||
|
||
default: | ||
// TODO(rsgowman): figure out error handling | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -151,6 +151,46 @@ TEST_F(SerializerTest, EncodesIntegersModelToBytes) { | |
} | ||
} | ||
|
||
TEST_F(SerializerTest, EncodesStringModelToBytes) { | ||
struct TestCase { | ||
std::string value; | ||
std::vector<uint8_t> bytes; | ||
}; | ||
|
||
std::vector<TestCase> cases{ | ||
// TEXT_FORMAT_PROTO: 'string_value: ""' | ||
{"", {0x8a, 0x01, 0x00}}, | ||
// TEXT_FORMAT_PROTO: 'string_value: "a"' | ||
{"a", {0x8a, 0x01, 0x01, 0x61}}, | ||
// TEXT_FORMAT_PROTO: 'string_value: "abc def"' | ||
{"abc def", {0x8a, 0x01, 0x07, 0x61, 0x62, 0x63, 0x20, 0x64, 0x65, 0x66}}, | ||
// TEXT_FORMAT_PROTO: 'string_value: "æ"' | ||
{"æ", {0x8a, 0x01, 0x02, 0xc3, 0xa6}}, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. absolutely optional: Is it possible to use escape-sequence for the unicode character here instead of use that character directly? Assuming any modern editor/source-control-system supports unicode, there is no problem; so it is optional. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We could; but OTOH, we've already got some test cases that use escape characters below. (More context: These test cases are taken from the other test suites, so my preference is to leave them as close as possible to the original.) We could make an argument that this is redundant... but it's probably more trouble than it's worth to remove it across all platforms. |
||
// TEXT_FORMAT_PROTO: 'string_value: "\0\ud7ff\ue000\uffff"' | ||
// Note: Each one of the three embedded universal character names | ||
// (\u-escaped) maps to three chars, so the total length of the string | ||
// literal is 10 (ignoring the terminating null), and the resulting string | ||
// literal is the same as '\0\xed\x9f\xbf\xee\x80\x80\xef\xbf\xbf'". The | ||
// size of 10 must be added, or else std::string will see the \0 at the | ||
// start and assume that's the end of the string. | ||
{{"\0\ud7ff\ue000\uffff", 10}, | ||
{0x8a, 0x01, 0x0a, 0x00, 0xed, 0x9f, 0xbf, 0xee, 0x80, 0x80, 0xef, 0xbf, | ||
0xbf}}, | ||
{{"\0\xed\x9f\xbf\xee\x80\x80\xef\xbf\xbf", 10}, | ||
{0x8a, 0x01, 0x0a, 0x00, 0xed, 0x9f, 0xbf, 0xee, 0x80, 0x80, 0xef, 0xbf, | ||
0xbf}}, | ||
// TEXT_FORMAT_PROTO: 'string_value: "(╯°□°)╯︵ ┻━┻"' | ||
{"(╯°□°)╯︵ ┻━┻", | ||
{0x8a, 0x01, 0x1e, 0x28, 0xe2, 0x95, 0xaf, 0xc2, 0xb0, 0xe2, 0x96, | ||
0xa1, 0xc2, 0xb0, 0xef, 0xbc, 0x89, 0xe2, 0x95, 0xaf, 0xef, 0xb8, | ||
0xb5, 0x20, 0xe2, 0x94, 0xbb, 0xe2, 0x94, 0x81, 0xe2, 0x94, 0xbb}}}; | ||
|
||
for (const TestCase& test : cases) { | ||
FieldValue model = FieldValue::StringValue(test.value); | ||
ExpectRoundTrip(model, test.bytes, FieldValue::Type::String); | ||
} | ||
} | ||
|
||
// TODO(rsgowman): Test [en|de]coding multiple protos into the same output | ||
// vector. | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I really hate to ask this question but does this need to be exception safe?
As it stands the string allocation could fail and we'd fail to undo this business here.
A simple way forward would be to move the allocation of result outside the group of C function calls. Secondarily though, there's the question of when we adopt error handling, is it a leak to fail to call this?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@wilhuff I thought we just crash on exceptions and basically treat them as if they didn't exist?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In Objective-C, yes. Exceptions aren't expected to be handled and the runtime isn't capable of handling them without leaking anyway.
In general we don't throw exceptions there except for assertions and we expect those to crash the app.
In plain C++ that's not necessarily the case. Does anyone reasonably catch
std::bad_alloc
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I mean in C++. The style guide doesn't seem to distinguish between
try
andcatch
, so I presumed we don't use either. I'd be happy if that's not the case.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah, I see, you mean user code should be able to catch the exception and Firestore shouldn't leak. If that's the case, sorry for misunderstanding.
Can't think of how client code might reasonably expect to catch it from here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You've just kicked the hornet's nest. :(
I can't easily[1] do that. I need to wait until after pb_make_string_substream to know how large the string should be. (Well, I could allocate 0 bytes, and then extend it afterwards, but that doesn't avoid the problem.)
[1] - not technically correct. I could peak into the implementation details of pb_make_string_substream and simply do what it does, namely read the size required from the input stream. At which point, I've essentially just inlined the function.
In this particular case, I believe the code is safe.
pb_make_string_substream/pb_close_string_substream don't actually allocate any memory. In particular, the close call just sets state on the stream (and newer versions of nanopb also ensure the substream is "consumed", i.e. the pointer is pointing to the end rather than somewhere in the middle.) So generally, there's no leak.
However, the stream may not be consistent, so recovering from bad_alloc and assuming the stream is fine would be incorrect. So as long as we don't catch the exception, we're ok (since by the time the exception goes up to the user, the stream has fallen off the stack. Also note that streams are typically stack allocated, so no free() calls are required.)
That said, these calls look like they could malloc or open() or otherwise consume resources that needs to be freed and, while nanopb clearly tries really hard to not malloc(), there's no guarantee it wouldn't change the implementation in the future to do this. (Side note: if you define PB_ENABLE_MALLOC=0 while compiling nanopb, it will ifdef out all the malloc calls. This reduces functionality, but a quick glance didn't turn up anything we actually use, so this might be worth investigating. Worst case: it would fail and therefore confirm that we are actually mallocing within nanopb.)
Not generally on linux. These don't occur by default on linux due to it's over-committing memory model. (Instead, it relies on the infamous OOM killer to "take care" of things.) But I don't know about android/ios. (Probably developers still don't since there's usually not much that can usefully be done except crash.) OTOH, when people actually do catch bad_alloc, it's safe to assume that they really, really care about memory management, and would likely be disappointed (though not surprised) when a library they used didn't clean up properly due to this state.
So, options:
#ifdef __cpp_exceptions
.Votes? (Keep in mind this will likely appear in many other places too, so vote twice if you feel we should do something different here than the general guidance.)
I like (1) for this specific case, and (2), (6) or (7) for the general case. (But (1) for the general case may be more pragmatic.)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Notes:
__cpp_exceptions
is insufficient for all compilers. ThankfullyABSL_HAVE_EXCEPTIONS
does the right thing. I'd consider this unusable if we had to do this in every function that could potentially throw though.new (std::nothrow) char[bufsize]
from#include <new>
.I'd articulate general principles:
std::unique_ptr
.malloc
/free
unless specifically dealing with a C API that requires it (e.g.asprintf
if we were crazy enough to use it).unique_ptr
so we can't forget, e.g.If we agree I'll chuck these I'll chuck these in our migration guide (plus your feedback of course).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If we're voting, I'd vote for 1 and 6, depending on the situation.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Re (2): Darn. For some reason, I thought that worked. (Must've been confused.)
Yeah, everything else sgtm. I'll merge this PR shortly as is.