diff --git a/Cargo.toml b/Cargo.toml index bec0c3db..a4b58c56 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -73,6 +73,7 @@ serde_with-3 = { package = "serde_with", version = "3.1.0", optional = true } time = { version = "0.3.9", features = ["formatting", "parsing", "macros", "large-dates"] } bitvec = "1.0.1" serde_path_to_error = { version = "0.1.16", optional = true } +bytes = "1.10.0" [target.'cfg(all(target_arch = "wasm32", target_os = "unknown"))'.dependencies] js-sys = "0.3" diff --git a/proptest-regressions/raw/test/mod.txt b/proptest-regressions/raw/test/mod.txt new file mode 100644 index 00000000..f8ee2aa2 --- /dev/null +++ b/proptest-regressions/raw/test/mod.txt @@ -0,0 +1,8 @@ +# Seeds for failure cases proptest has generated in the past. It is +# automatically read and these particular cases re-run before any +# novel cases are generated. +# +# It is recommended to check this file in to source control so that +# everyone who runs the test benefits from these saved cases. +cc 7d265dcc185e765bd763a321052fec8f67887f8f16dca9781d0161bbb0f8fdb0 # shrinks to bson = Document({"": String("")}) +cc b931d167d43e92047967875bd28287e133c1464fcdae96025b5345e959f097fb # shrinks to bson = JavaScriptCodeWithScope { code: "", scope: Document({"": Document({"": Binary { subtype: BinaryOld, bytes: [0, 0, 0, 0] }})}) } diff --git a/src/raw/test/append.rs b/src/raw/test/append.rs index 147fa152..7894f164 100644 --- a/src/raw/test/append.rs +++ b/src/raw/test/append.rs @@ -1,22 +1,9 @@ use std::iter::FromIterator; use crate::{ - oid::ObjectId, - raw::RawJavaScriptCodeWithScope, - spec::BinarySubtype, - tests::LOCK, - Binary, - Bson, - DateTime, - DbPointer, - Decimal128, - Document, - JavaScriptCodeWithScope, - RawArrayBuf, - RawBson, - RawDocumentBuf, - Regex, - Timestamp, + oid::ObjectId, raw::RawJavaScriptCodeWithScope, spec::BinarySubtype, tests::LOCK, Binary, Bson, + DateTime, DbPointer, Decimal128, Document, JavaScriptCodeWithScope, RawArrayBuf, RawBson, + RawDocumentBuf, Regex, Timestamp, }; use pretty_assertions::assert_eq; diff --git a/src/ser/mod.rs b/src/ser/mod.rs index 10fb8037..a0639c51 100644 --- a/src/ser/mod.rs +++ b/src/ser/mod.rs @@ -34,18 +34,19 @@ use std::io::Write; use crate::{ bson::{Bson, Document}, - de::MAX_BSON_SIZE, - spec::BinarySubtype, RawDocumentBuf, }; use ::serde::{ser::Error as SerdeError, Serialize}; +use bytes::BufMut; +// XXX remove pub(crate) fn write_string(buf: &mut Vec, s: &str) { buf.extend(&(s.len() as i32 + 1).to_le_bytes()); buf.extend(s.as_bytes()); buf.push(0); } +// XXX remove pub(crate) fn write_cstring(buf: &mut Vec, s: &str) -> Result<()> { if s.contains('\0') { return Err(Error::InvalidCString(s.into())); @@ -55,6 +56,7 @@ pub(crate) fn write_cstring(buf: &mut Vec, s: &str) -> Result<()> { Ok(()) } +// XXX remove #[inline] pub(crate) fn write_i32(writer: &mut W, val: i32) -> Result<()> { writer @@ -63,47 +65,6 @@ pub(crate) fn write_i32(writer: &mut W, val: i32) -> Result<( .map_err(From::from) } -#[inline] -fn write_i64(writer: &mut W, val: i64) -> Result<()> { - writer - .write_all(&val.to_le_bytes()) - .map(|_| ()) - .map_err(From::from) -} - -#[inline] -fn write_f64(writer: &mut W, val: f64) -> Result<()> { - writer - .write_all(&val.to_le_bytes()) - .map(|_| ()) - .map_err(From::from) -} - -#[inline] -fn write_binary(mut writer: W, bytes: &[u8], subtype: BinarySubtype) -> Result<()> { - let len = if let BinarySubtype::BinaryOld = subtype { - bytes.len() + 4 - } else { - bytes.len() - }; - - if len > MAX_BSON_SIZE as usize { - return Err(Error::custom(format!( - "binary length {} exceeded maximum size", - bytes.len() - ))); - } - - write_i32(&mut writer, len as i32)?; - writer.write_all(&[subtype.into()])?; - - if let BinarySubtype::BinaryOld = subtype { - write_i32(&mut writer, len as i32 - 4)?; - }; - - writer.write_all(bytes).map_err(From::from) -} - /// Encode a `T` Serializable into a [`Bson`] value. /// /// The [`Serializer`] used by this function presents itself as human readable, whereas the @@ -201,7 +162,28 @@ pub fn to_vec(value: &T) -> Result> where T: Serialize, { - let mut serializer = raw::Serializer::new(); + to_buf_mut(value, |len| Vec::with_capacity(len)) +} + +#[inline] +pub fn to_buf_mut(value: &T, create: F) -> Result +where + T: Serialize, + F: Fn(usize) -> B, + B: BufMut, +{ + let mut len_serializer = raw::Serializer::new(raw::LenRecordingDocumentBufMut::new()); + #[cfg(feature = "serde_path_to_error")] + { + serde_path_to_error::serialize(value, &mut len_serializer).map_err(Error::with_path)?; + } + #[cfg(not(feature = "serde_path_to_error"))] + { + value.serialize(&mut len_serializer)?; + } + let lens = len_serializer.into_buf().into_lens(); + let buf = create(*lens.first().unwrap_or(&5) as usize); + let mut serializer = raw::Serializer::new(raw::LenReplayingDocumentBufMut::new(buf, lens)); #[cfg(feature = "serde_path_to_error")] { serde_path_to_error::serialize(value, &mut serializer).map_err(Error::with_path)?; @@ -210,7 +192,7 @@ where { value.serialize(&mut serializer)?; } - Ok(serializer.into_vec()) + Ok(serializer.into_buf().into_inner()) } /// Serialize the given `T` as a [`RawDocumentBuf`]. diff --git a/src/ser/raw/document_serializer.rs b/src/ser/raw/document_serializer.rs index 114a11e9..325a44c5 100644 --- a/src/ser/raw/document_serializer.rs +++ b/src/ser/raw/document_serializer.rs @@ -1,72 +1,50 @@ use serde::{ser::Impossible, Serialize}; use crate::{ - ser::{write_cstring, write_i32, Error, Result}, - to_bson, - Bson, + ser::{Error, Result}, + spec::ElementType, + to_bson, Bson, }; -use super::Serializer; - -pub(crate) struct DocumentSerializationResult<'a> { - pub(crate) root_serializer: &'a mut Serializer, -} +use super::{DocumentBufMut, Key, Serializer}; /// Serializer used to serialize document or array bodies. -pub(crate) struct DocumentSerializer<'a> { - root_serializer: &'a mut Serializer, +pub(crate) struct DocumentSerializer<'a, B> { + root_serializer: &'a mut Serializer, num_keys_serialized: usize, - start: usize, } -impl<'a> DocumentSerializer<'a> { - pub(crate) fn start(rs: &'a mut Serializer) -> crate::ser::Result { - let start = rs.bytes.len(); - write_i32(&mut rs.bytes, 0)?; +impl<'a, B: DocumentBufMut> DocumentSerializer<'a, B> { + pub(crate) fn start( + rs: &'a mut Serializer, + doc_type: ElementType, + ) -> crate::ser::Result { + rs.buf.begin_doc(doc_type)?; Ok(Self { root_serializer: rs, num_keys_serialized: 0, - start, }) } - /// Serialize a document key using the provided closure. - fn serialize_doc_key_custom Result<()>>( - &mut self, - f: F, - ) -> Result<()> { - // push a dummy element type for now, will update this once we serialize the value - self.root_serializer.reserve_element_type(); - f(self.root_serializer)?; - self.num_keys_serialized += 1; - Ok(()) - } - /// Serialize a document key to string using [`KeySerializer`]. fn serialize_doc_key(&mut self, key: &T) -> Result<()> where T: serde::Serialize + ?Sized, { - self.serialize_doc_key_custom(|rs| { - key.serialize(KeySerializer { - root_serializer: rs, - })?; - Ok(()) + key.serialize(KeySerializer { + root_serializer: &mut self.root_serializer, })?; + self.num_keys_serialized += 1; Ok(()) } - pub(crate) fn end_doc(self) -> crate::ser::Result> { - self.root_serializer.bytes.push(0); - let length = (self.root_serializer.bytes.len() - self.start) as i32; - self.root_serializer.replace_i32(self.start, length); - Ok(DocumentSerializationResult { - root_serializer: self.root_serializer, - }) + pub(crate) fn end_doc(self) -> crate::ser::Result<&'a mut Serializer> { + self.root_serializer.buf.end_doc()?; + Ok(self.root_serializer) } } -impl serde::ser::SerializeSeq for DocumentSerializer<'_> { +impl serde::ser::SerializeSeq for DocumentSerializer<'_, B> { type Ok = (); type Error = Error; @@ -75,13 +53,9 @@ impl serde::ser::SerializeSeq for DocumentSerializer<'_> { where T: serde::Serialize + ?Sized, { - let index = self.num_keys_serialized; - self.serialize_doc_key_custom(|rs| { - use std::io::Write; - write!(&mut rs.bytes, "{}", index)?; - rs.bytes.push(0); - Ok(()) - })?; + self.root_serializer + .set_next_key(Key::Index(self.num_keys_serialized)); + self.num_keys_serialized += 1; value.serialize(&mut *self.root_serializer) } @@ -91,7 +65,7 @@ impl serde::ser::SerializeSeq for DocumentSerializer<'_> { } } -impl serde::ser::SerializeMap for DocumentSerializer<'_> { +impl serde::ser::SerializeMap for DocumentSerializer<'_, B> { type Ok = (); type Error = Error; @@ -101,7 +75,7 @@ impl serde::ser::SerializeMap for DocumentSerializer<'_> { where T: serde::Serialize + ?Sized, { - self.serialize_doc_key(key) + self.serialize_doc_key(key) // XXX this may result in a new copy. } #[inline] @@ -117,7 +91,7 @@ impl serde::ser::SerializeMap for DocumentSerializer<'_> { } } -impl serde::ser::SerializeStruct for DocumentSerializer<'_> { +impl serde::ser::SerializeStruct for DocumentSerializer<'_, B> { type Ok = (); type Error = Error; @@ -127,7 +101,7 @@ impl serde::ser::SerializeStruct for DocumentSerializer<'_> { where T: serde::Serialize + ?Sized, { - self.serialize_doc_key(key)?; + self.root_serializer.set_next_key(Key::Static(key)); value.serialize(&mut *self.root_serializer) } @@ -137,7 +111,7 @@ impl serde::ser::SerializeStruct for DocumentSerializer<'_> { } } -impl serde::ser::SerializeTuple for DocumentSerializer<'_> { +impl serde::ser::SerializeTuple for DocumentSerializer<'_, B> { type Ok = (); type Error = Error; @@ -147,7 +121,9 @@ impl serde::ser::SerializeTuple for DocumentSerializer<'_> { where T: serde::Serialize + ?Sized, { - self.serialize_doc_key(&self.num_keys_serialized.to_string())?; + self.root_serializer + .set_next_key(Key::Index(self.num_keys_serialized)); + self.num_keys_serialized += 1; value.serialize(&mut *self.root_serializer) } @@ -157,7 +133,7 @@ impl serde::ser::SerializeTuple for DocumentSerializer<'_> { } } -impl serde::ser::SerializeTupleStruct for DocumentSerializer<'_> { +impl serde::ser::SerializeTupleStruct for DocumentSerializer<'_, B> { type Ok = (); type Error = Error; @@ -179,17 +155,17 @@ impl serde::ser::SerializeTupleStruct for DocumentSerializer<'_> { /// Serializer used specifically for serializing document keys. /// Only keys that serialize to strings will be accepted. -struct KeySerializer<'a> { - root_serializer: &'a mut Serializer, +struct KeySerializer<'a, B> { + root_serializer: &'a mut Serializer, } -impl KeySerializer<'_> { +impl KeySerializer<'_, B> { fn invalid_key(v: T) -> Error { Error::InvalidDocumentKey(to_bson(&v).unwrap_or(Bson::Null)) } } -impl serde::Serializer for KeySerializer<'_> { +impl serde::Serializer for KeySerializer<'_, B> { type Ok = (); type Error = Error; @@ -264,7 +240,8 @@ impl serde::Serializer for KeySerializer<'_> { #[inline] fn serialize_str(self, v: &str) -> Result { - write_cstring(&mut self.root_serializer.bytes, v) + self.root_serializer.set_next_key(Key::Owned(v.to_owned())); + Ok(()) } #[inline] diff --git a/src/ser/raw/mod.rs b/src/ser/raw/mod.rs index 69b7320e..1710c190 100644 --- a/src/ser/raw/mod.rs +++ b/src/ser/raw/mod.rs @@ -1,8 +1,7 @@ mod document_serializer; mod value_serializer; -use std::io::Write; - +use bytes::BufMut; use serde::{ ser::{Error as SerdeError, SerializeMap, SerializeStruct}, Serialize, @@ -10,8 +9,8 @@ use serde::{ use self::value_serializer::{ValueSerializer, ValueType}; -use super::{write_binary, write_cstring, write_f64, write_i32, write_i64, write_string}; use crate::{ + de::MAX_BSON_SIZE, raw::{RAW_ARRAY_NEWTYPE, RAW_DOCUMENT_NEWTYPE}, ser::{Error, Result}, serde_helpers::HUMAN_READABLE_NEWTYPE, @@ -20,14 +19,163 @@ use crate::{ }; use document_serializer::DocumentSerializer; +// XXX begin_doc and end_doc appear to be infalliable. +pub(crate) trait DocumentBufMut: BufMut { + /// Track/record information related to the document started at this point. + fn begin_doc(&mut self, doc_type: ElementType) -> Result<()>; + /// Track/record any information related to the end of the current document. + fn end_doc(&mut self) -> Result<()>; + /// Return true if begin_doc() has been called at least once. + fn in_document(&self) -> bool; +} + +#[derive(Debug)] +struct StackItem { + len_index: usize, + begin_offset: usize, + doc_type: ElementType, +} + +pub(crate) struct LenRecordingDocumentBufMut { + stream_len: usize, + lens: Vec, + stack: Vec, +} + +impl LenRecordingDocumentBufMut { + pub(crate) fn new() -> Self { + Self { + stream_len: 0, + lens: vec![], + stack: vec![], + } + } + + pub(crate) fn into_lens(self) -> Vec { + assert!(self.stack.is_empty(), "{:?}", self.stack); + self.lens + } +} + +impl DocumentBufMut for LenRecordingDocumentBufMut { + fn begin_doc(&mut self, doc_type: ElementType) -> Result<()> { + if self.stack.is_empty() && self.stream_len > 0 { + panic!("must begin stream with a document.") + } + let index = self.lens.len(); + self.lens.push(0); + self.stack.push(StackItem { + len_index: index, + begin_offset: self.stream_len, + doc_type, + }); + self.stream_len += 4; // length value that will be written to the stream. + Ok(()) + } + + fn end_doc(&mut self) -> Result<()> { + let item = self.stack.pop().expect("paired with begin_doc()"); + if item.doc_type != ElementType::JavaScriptCodeWithScope { + self.stream_len += 1; // null terminator + } + self.lens[item.len_index] = self.stream_len as i32 - item.begin_offset as i32; + Ok(()) + } + + fn in_document(&self) -> bool { + !self.stack.is_empty() + } +} + +unsafe impl BufMut for LenRecordingDocumentBufMut { + fn remaining_mut(&self) -> usize { + 0 + } + + unsafe fn advance_mut(&mut self, cnt: usize) { + self.stream_len += cnt; + } + + fn chunk_mut(&mut self) -> &mut bytes::buf::UninitSlice { + bytes::buf::UninitSlice::new(&mut []) + } + + fn put(&mut self, src: T) + where + Self: Sized, + { + self.stream_len += src.remaining() + } + + fn put_slice(&mut self, src: &[u8]) { + self.stream_len += src.len(); + } + + fn put_bytes(&mut self, _val: u8, cnt: usize) { + self.stream_len += cnt; + } +} + +pub(crate) struct LenReplayingDocumentBufMut { + buf: B, + lens: std::vec::IntoIter, + doc_type_stack: Vec, +} + +impl LenReplayingDocumentBufMut { + pub(crate) fn new(buf: B, lens: Vec) -> Self { + Self { + buf, + lens: lens.into_iter(), + doc_type_stack: vec![], + } + } + + pub(crate) fn into_inner(self) -> B { + self.buf + } +} + +impl DocumentBufMut for LenReplayingDocumentBufMut { + fn begin_doc(&mut self, doc_type: ElementType) -> Result<()> { + let len = self.lens.next().unwrap(); + self.buf.put_i32_le(len); + self.doc_type_stack.push(doc_type); + Ok(()) + } + + fn end_doc(&mut self) -> Result<()> { + let doc_type = self.doc_type_stack.pop().expect("paired with begin_doc()"); + if doc_type != ElementType::JavaScriptCodeWithScope { + self.buf.put_u8(0); + } + Ok(()) + } + + fn in_document(&self) -> bool { + !self.doc_type_stack.is_empty() + } +} + +unsafe impl BufMut for LenReplayingDocumentBufMut { + fn remaining_mut(&self) -> usize { + self.buf.remaining_mut() + } + + unsafe fn advance_mut(&mut self, cnt: usize) { + self.buf.advance_mut(cnt); + } + + fn chunk_mut(&mut self) -> &mut bytes::buf::UninitSlice { + self.buf.chunk_mut() + } +} + /// Serializer used to convert a type `T` into raw BSON bytes. -pub(crate) struct Serializer { - bytes: Vec, +pub(crate) struct Serializer { + buf: B, - /// The index into `bytes` where the current element type will need to be stored. - /// This needs to be set retroactively because in BSON, the element type comes before the key, - /// but in serde, the serializer learns of the type after serializing the key. - type_index: usize, + next_key: Option, /// Hint provided by the type being serialized. hint: SerializerHint, @@ -51,72 +199,116 @@ enum SerializerHint { RawArray, } +#[derive(Debug, Clone)] +enum Key { + Static(&'static str), + Owned(String), + Index(usize), +} + impl SerializerHint { fn take(&mut self) -> SerializerHint { std::mem::replace(self, SerializerHint::None) } } -impl Serializer { - pub(crate) fn new() -> Self { +impl Serializer { + pub(crate) fn new(buf: B) -> Self { Self { - bytes: Vec::new(), - type_index: 0, + buf, + next_key: None, hint: SerializerHint::None, human_readable: false, } } /// Convert this serializer into the vec of the serialized bytes. - pub(crate) fn into_vec(self) -> Vec { - self.bytes + pub(crate) fn into_buf(self) -> B { + self.buf } - /// Reserve a spot for the element type to be set retroactively via `update_element_type`. #[inline] - fn reserve_element_type(&mut self) { - self.type_index = self.bytes.len(); // record index - self.bytes.push(0); // push temporary placeholder + fn set_next_key(&mut self, key: Key) { + self.next_key = Some(key); } - /// Retroactively set the element type of the most recently serialized element. #[inline] - fn update_element_type(&mut self, t: ElementType) -> Result<()> { - if self.type_index == 0 { - if matches!(t, ElementType::EmbeddedDocument) { - // don't need to set the element type for the top level document - return Ok(()); + fn write_key(&mut self, t: ElementType) -> Result<()> { + if let Some(key) = self.next_key.take() { + self.buf.put_u8(t as u8); + match key { + Key::Static(k) => self.write_cstring(k), + Key::Owned(k) => self.write_cstring(&k), + Key::Index(i) => self.write_cstring(&i.to_string()), + } + } else { + if !self.buf.in_document() && t == ElementType::EmbeddedDocument { + // don't need to write element type and key for top-level document. + Ok(()) } else { - return Err(Error::custom(format!( + Err(Error::custom(format!( "attempted to encode a non-document type at the top level: {:?}", t - ))); + ))) } } + } - self.bytes[self.type_index] = t as u8; + #[inline] + fn write_cstring(&mut self, s: &str) -> Result<()> { + if s.contains('\0') { + return Err(Error::InvalidCString(s.into())); + } + self.buf.put_slice(s.as_bytes()); + self.buf.put_u8(0); Ok(()) } - /// Replace an i32 value at the given index with the given value. #[inline] - fn replace_i32(&mut self, at: usize, with: i32) { - let portion = &mut self.bytes[at..at + 4]; - portion.copy_from_slice(&with.to_le_bytes()); + fn write_string(&mut self, s: &str) { + self.buf.put_i32_le(s.len() as i32 + 1); + self.buf.put_slice(s.as_bytes()); + self.buf.put_u8(0); + } + + #[inline] + fn write_binary(&mut self, bytes: &[u8], subtype: BinarySubtype) -> Result<()> { + let len = if let BinarySubtype::BinaryOld = subtype { + bytes.len() + 4 + } else { + bytes.len() + }; + + if len > MAX_BSON_SIZE as usize { + return Err(Error::custom(format!( + "binary length {} exceeded maximum size", + bytes.len() + ))); + } + + self.buf.put_i32_le(len as i32); + self.buf.put_u8(subtype.into()); + + if let BinarySubtype::BinaryOld = subtype { + self.buf.put_i32_le(len as i32 - 4); + }; + + self.buf.put_slice(bytes); + Ok(()) } } -impl<'a> serde::Serializer for &'a mut Serializer { +impl<'a, B: DocumentBufMut> serde::Serializer for &'a mut Serializer { type Ok = (); type Error = Error; - type SerializeSeq = DocumentSerializer<'a>; - type SerializeTuple = DocumentSerializer<'a>; - type SerializeTupleStruct = DocumentSerializer<'a>; - type SerializeTupleVariant = VariantSerializer<'a>; - type SerializeMap = DocumentSerializer<'a>; - type SerializeStruct = StructSerializer<'a>; - type SerializeStructVariant = VariantSerializer<'a>; + type SerializeSeq = DocumentSerializer<'a, B>; + type SerializeTuple = DocumentSerializer<'a, B>; + type SerializeTupleStruct = DocumentSerializer<'a, B>; + type SerializeTupleVariant = VariantSerializer<'a, B>; + type SerializeMap = DocumentSerializer<'a, B>; + type SerializeStruct = StructSerializer<'a, B>; + type SerializeStructVariant = VariantSerializer<'a, B>; fn is_human_readable(&self) -> bool { self.human_readable @@ -124,8 +316,8 @@ impl<'a> serde::Serializer for &'a mut Serializer { #[inline] fn serialize_bool(self, v: bool) -> Result { - self.update_element_type(ElementType::Boolean)?; - self.bytes.push(v as u8); + self.write_key(ElementType::Boolean)?; + self.buf.put_u8(v as u8); Ok(()) } @@ -141,15 +333,15 @@ impl<'a> serde::Serializer for &'a mut Serializer { #[inline] fn serialize_i32(self, v: i32) -> Result { - self.update_element_type(ElementType::Int32)?; - write_i32(&mut self.bytes, v)?; + self.write_key(ElementType::Int32)?; + self.buf.put_i32_le(v); Ok(()) } #[inline] fn serialize_i64(self, v: i64) -> Result { - self.update_element_type(ElementType::Int64)?; - write_i64(&mut self.bytes, v)?; + self.write_key(ElementType::Int64)?; + self.buf.put_i64_le(v); Ok(()) } @@ -185,8 +377,9 @@ impl<'a> serde::Serializer for &'a mut Serializer { #[inline] fn serialize_f64(self, v: f64) -> Result { - self.update_element_type(ElementType::Double)?; - write_f64(&mut self.bytes, v) + self.write_key(ElementType::Double)?; + self.buf.put_f64_le(v); + Ok(()) } #[inline] @@ -198,8 +391,8 @@ impl<'a> serde::Serializer for &'a mut Serializer { #[inline] fn serialize_str(self, v: &str) -> Result { - self.update_element_type(ElementType::String)?; - write_string(&mut self.bytes, v); + self.write_key(ElementType::String)?; + self.write_string(v); Ok(()) } @@ -207,15 +400,15 @@ impl<'a> serde::Serializer for &'a mut Serializer { fn serialize_bytes(self, v: &[u8]) -> Result { match self.hint.take() { SerializerHint::RawDocument => { - self.update_element_type(ElementType::EmbeddedDocument)?; - self.bytes.write_all(v)?; + self.write_key(ElementType::EmbeddedDocument)?; + self.buf.put_slice(v); } SerializerHint::RawArray => { - self.update_element_type(ElementType::Array)?; - self.bytes.write_all(v)?; + self.write_key(ElementType::Array)?; + self.buf.put_slice(v); } hint => { - self.update_element_type(ElementType::Binary)?; + self.write_key(ElementType::Binary)?; let subtype = if matches!(hint, SerializerHint::Uuid) { BinarySubtype::Uuid @@ -223,7 +416,7 @@ impl<'a> serde::Serializer for &'a mut Serializer { BinarySubtype::Generic }; - write_binary(&mut self.bytes, v, subtype)?; + self.write_binary(v, subtype)?; } }; Ok(()) @@ -231,7 +424,7 @@ impl<'a> serde::Serializer for &'a mut Serializer { #[inline] fn serialize_none(self) -> Result { - self.update_element_type(ElementType::Null)?; + self.write_key(ElementType::Null)?; Ok(()) } @@ -295,8 +488,8 @@ impl<'a> serde::Serializer for &'a mut Serializer { where T: serde::Serialize + ?Sized, { - self.update_element_type(ElementType::EmbeddedDocument)?; - let mut d = DocumentSerializer::start(&mut *self)?; + self.write_key(ElementType::EmbeddedDocument)?; + let mut d = DocumentSerializer::start(&mut *self, ElementType::EmbeddedDocument)?; d.serialize_entry(variant, value)?; d.end_doc()?; Ok(()) @@ -304,8 +497,8 @@ impl<'a> serde::Serializer for &'a mut Serializer { #[inline] fn serialize_seq(self, _len: Option) -> Result { - self.update_element_type(ElementType::Array)?; - DocumentSerializer::start(&mut *self) + self.write_key(ElementType::Array)?; + DocumentSerializer::start(&mut *self, ElementType::Array) } #[inline] @@ -330,14 +523,14 @@ impl<'a> serde::Serializer for &'a mut Serializer { variant: &'static str, _len: usize, ) -> Result { - self.update_element_type(ElementType::EmbeddedDocument)?; + self.write_key(ElementType::EmbeddedDocument)?; VariantSerializer::start(&mut *self, variant, VariantInnerType::Tuple) } #[inline] fn serialize_map(self, _len: Option) -> Result { - self.update_element_type(ElementType::EmbeddedDocument)?; - DocumentSerializer::start(&mut *self) + self.write_key(ElementType::EmbeddedDocument)?; + DocumentSerializer::start(&mut *self, ElementType::EmbeddedDocument) } #[inline] @@ -359,14 +552,17 @@ impl<'a> serde::Serializer for &'a mut Serializer { _ => None, }; - self.update_element_type( + self.write_key( value_type .map(Into::into) .unwrap_or(ElementType::EmbeddedDocument), )?; match value_type { Some(vt) => Ok(StructSerializer::Value(ValueSerializer::new(self, vt))), - None => Ok(StructSerializer::Document(DocumentSerializer::start(self)?)), + None => Ok(StructSerializer::Document(DocumentSerializer::start( + self, + ElementType::EmbeddedDocument, + )?)), } } @@ -378,20 +574,20 @@ impl<'a> serde::Serializer for &'a mut Serializer { variant: &'static str, _len: usize, ) -> Result { - self.update_element_type(ElementType::EmbeddedDocument)?; + self.write_key(ElementType::EmbeddedDocument)?; VariantSerializer::start(&mut *self, variant, VariantInnerType::Struct) } } -pub(crate) enum StructSerializer<'a> { +pub(crate) enum StructSerializer<'a, B> { /// Serialize a BSON value currently represented in serde as a struct (e.g. ObjectId) - Value(ValueSerializer<'a>), + Value(ValueSerializer<'a, B>), /// Serialize the struct as a document. - Document(DocumentSerializer<'a>), + Document(DocumentSerializer<'a, B>), } -impl SerializeStruct for StructSerializer<'_> { +impl SerializeStruct for StructSerializer<'_, B> { type Ok = (); type Error = Error; @@ -422,55 +618,41 @@ enum VariantInnerType { /// Serializer used for enum variants, including both tuple (e.g. Foo::Bar(1, 2, 3)) and /// struct (e.g. Foo::Bar { a: 1 }). -pub(crate) struct VariantSerializer<'a> { - root_serializer: &'a mut Serializer, - - /// Variants are serialized as documents of the form `{ : }`, - /// and `doc_start` indicates the index at which the outer document begins. - doc_start: usize, - - /// `inner_start` indicates the index at which the inner document or array begins. - inner_start: usize, +pub(crate) struct VariantSerializer<'a, B> { + root_serializer: &'a mut Serializer, /// How many elements have been serialized in the inner document / array so far. num_elements_serialized: usize, } -impl<'a> VariantSerializer<'a> { +impl<'a, B: DocumentBufMut> VariantSerializer<'a, B> { fn start( - rs: &'a mut Serializer, + rs: &'a mut Serializer, variant: &'static str, inner_type: VariantInnerType, ) -> Result { - let doc_start = rs.bytes.len(); - // write placeholder length for document, will be updated at end - write_i32(&mut rs.bytes, 0)?; + rs.buf.begin_doc(ElementType::EmbeddedDocument)?; let inner = match inner_type { VariantInnerType::Struct => ElementType::EmbeddedDocument, VariantInnerType::Tuple => ElementType::Array, }; - rs.bytes.push(inner as u8); - write_cstring(&mut rs.bytes, variant)?; - let inner_start = rs.bytes.len(); - // write placeholder length for inner, will be updated at end - write_i32(&mut rs.bytes, 0)?; + rs.buf.put_u8(inner as u8); + rs.write_cstring(&variant)?; + rs.buf.begin_doc(inner)?; Ok(Self { root_serializer: rs, num_elements_serialized: 0, - doc_start, - inner_start, }) } #[inline] - fn serialize_element(&mut self, k: &str, v: &T) -> Result<()> + fn serialize_element(&mut self, k: Key, v: &T) -> Result<()> where T: Serialize + ?Sized, { - self.root_serializer.reserve_element_type(); - write_cstring(&mut self.root_serializer.bytes, k)?; + self.root_serializer.set_next_key(k); v.serialize(&mut *self.root_serializer)?; self.num_elements_serialized += 1; @@ -479,21 +661,13 @@ impl<'a> VariantSerializer<'a> { #[inline] fn end_both(self) -> Result<()> { - // null byte for the inner - self.root_serializer.bytes.push(0); - let arr_length = (self.root_serializer.bytes.len() - self.inner_start) as i32; - self.root_serializer - .replace_i32(self.inner_start, arr_length); - - // null byte for document - self.root_serializer.bytes.push(0); - let doc_length = (self.root_serializer.bytes.len() - self.doc_start) as i32; - self.root_serializer.replace_i32(self.doc_start, doc_length); + self.root_serializer.buf.end_doc()?; + self.root_serializer.buf.end_doc()?; Ok(()) } } -impl serde::ser::SerializeTupleVariant for VariantSerializer<'_> { +impl serde::ser::SerializeTupleVariant for VariantSerializer<'_, B> { type Ok = (); type Error = Error; @@ -503,7 +677,7 @@ impl serde::ser::SerializeTupleVariant for VariantSerializer<'_> { where T: Serialize + ?Sized, { - self.serialize_element(format!("{}", self.num_elements_serialized).as_str(), value) + self.serialize_element(Key::Index(self.num_elements_serialized), value) } #[inline] @@ -512,7 +686,7 @@ impl serde::ser::SerializeTupleVariant for VariantSerializer<'_> { } } -impl serde::ser::SerializeStructVariant for VariantSerializer<'_> { +impl serde::ser::SerializeStructVariant for VariantSerializer<'_, B> { type Ok = (); type Error = Error; @@ -522,7 +696,7 @@ impl serde::ser::SerializeStructVariant for VariantSerializer<'_> { where T: Serialize + ?Sized, { - self.serialize_element(key, value) + self.serialize_element(Key::Static(key), value) } #[inline] diff --git a/src/ser/raw/value_serializer.rs b/src/ser/raw/value_serializer.rs index 8c0b2215..918721a7 100644 --- a/src/ser/raw/value_serializer.rs +++ b/src/ser/raw/value_serializer.rs @@ -1,4 +1,4 @@ -use std::{convert::TryFrom, io::Write}; +use std::convert::TryFrom; use serde::{ ser::{Error as SerdeError, Impossible, SerializeMap, SerializeStruct}, @@ -9,18 +9,17 @@ use crate::{ base64, oid::ObjectId, raw::RAW_DOCUMENT_NEWTYPE, - ser::{write_binary, write_cstring, write_i32, write_i64, write_string, Error, Result}, + ser::{Error, Result}, spec::{BinarySubtype, ElementType}, - RawDocument, - RawJavaScriptCodeWithScopeRef, + RawDocument, RawJavaScriptCodeWithScopeRef, }; -use super::{document_serializer::DocumentSerializer, Serializer}; +use super::{document_serializer::DocumentSerializer, DocumentBufMut, Serializer}; /// A serializer used specifically for serializing the serde-data-model form of a BSON type (e.g. /// [`Binary`]) to raw bytes. -pub(crate) struct ValueSerializer<'a> { - root_serializer: &'a mut Serializer, +pub(crate) struct ValueSerializer<'a, B> { + root_serializer: &'a mut Serializer, state: SerializationStep, } @@ -118,8 +117,8 @@ impl From for ElementType { } } -impl<'a> ValueSerializer<'a> { - pub(super) fn new(rs: &'a mut Serializer, value_type: ValueType) -> Self { +impl<'a, B> ValueSerializer<'a, B> { + pub(super) fn new(rs: &'a mut Serializer, value_type: ValueType) -> Self { let state = match value_type { ValueType::DateTime => SerializationStep::DateTime, ValueType::Binary => SerializationStep::Binary, @@ -149,7 +148,7 @@ impl<'a> ValueSerializer<'a> { } } -impl<'b> serde::Serializer for &'b mut ValueSerializer<'_> { +impl<'b, B: DocumentBufMut> serde::Serializer for &'b mut ValueSerializer<'_, B> { type Ok = (); type Error = Error; @@ -157,7 +156,7 @@ impl<'b> serde::Serializer for &'b mut ValueSerializer<'_> { type SerializeTuple = Impossible<(), Error>; type SerializeTupleStruct = Impossible<(), Error>; type SerializeTupleVariant = Impossible<(), Error>; - type SerializeMap = CodeWithScopeSerializer<'b>; + type SerializeMap = CodeWithScopeSerializer<'b, B>; type SerializeStruct = Self; type SerializeStructVariant = Impossible<(), Error>; @@ -192,8 +191,8 @@ impl<'b> serde::Serializer for &'b mut ValueSerializer<'_> { let t = u32::try_from(time).map_err(Error::custom)?; let i = u32::try_from(v).map_err(Error::custom)?; - write_i32(&mut self.root_serializer.bytes, i as i32)?; - write_i32(&mut self.root_serializer.bytes, t as i32)?; + self.root_serializer.buf.put_i32_le(i as i32); + self.root_serializer.buf.put_i32_le(t as i32); Ok(()) } _ => Err(self.invalid_step("i64")), @@ -204,7 +203,8 @@ impl<'b> serde::Serializer for &'b mut ValueSerializer<'_> { fn serialize_u8(self, v: u8) -> Result { match self.state { SerializationStep::RawBinarySubType { ref bytes } => { - write_binary(&mut self.root_serializer.bytes, bytes.as_slice(), v.into())?; + self.root_serializer + .write_binary(bytes.as_slice(), v.into())?; self.state = SerializationStep::Done; Ok(()) } @@ -246,11 +246,11 @@ impl<'b> serde::Serializer for &'b mut ValueSerializer<'_> { match &self.state { SerializationStep::DateTimeNumberLong => { let millis: i64 = v.parse().map_err(Error::custom)?; - write_i64(&mut self.root_serializer.bytes, millis)?; + self.root_serializer.buf.put_i64_le(millis); } SerializationStep::Oid => { let oid = ObjectId::parse_str(v).map_err(Error::custom)?; - self.root_serializer.bytes.write_all(&oid.bytes())?; + self.root_serializer.buf.put_slice(&oid.bytes()); } SerializationStep::BinaryBytes => { self.state = SerializationStep::BinarySubType { @@ -263,23 +263,24 @@ impl<'b> serde::Serializer for &'b mut ValueSerializer<'_> { let bytes = base64::decode(base64.as_str()).map_err(Error::custom)?; - write_binary(&mut self.root_serializer.bytes, bytes.as_slice(), subtype)?; + self.root_serializer + .write_binary(bytes.as_slice(), subtype)?; } SerializationStep::Symbol | SerializationStep::DbPointerRef => { - write_string(&mut self.root_serializer.bytes, v); + self.root_serializer.write_string(v); } SerializationStep::RegExPattern => { - write_cstring(&mut self.root_serializer.bytes, v)?; + self.root_serializer.write_cstring(v)?; } SerializationStep::RegExOptions => { let mut chars: Vec<_> = v.chars().collect(); chars.sort_unstable(); let sorted = chars.into_iter().collect::(); - write_cstring(&mut self.root_serializer.bytes, sorted.as_str())?; + self.root_serializer.write_cstring(sorted.as_str())?; } SerializationStep::Code => { - write_string(&mut self.root_serializer.bytes, v); + self.root_serializer.write_string(v); } SerializationStep::CodeWithScopeCode => { self.state = SerializationStep::CodeWithScopeScope { @@ -301,7 +302,7 @@ impl<'b> serde::Serializer for &'b mut ValueSerializer<'_> { fn serialize_bytes(self, v: &[u8]) -> Result { match self.state { SerializationStep::Decimal128Value => { - self.root_serializer.bytes.write_all(v)?; + self.root_serializer.buf.put_slice(v); Ok(()) } SerializationStep::BinaryBytes => { @@ -313,9 +314,9 @@ impl<'b> serde::Serializer for &'b mut ValueSerializer<'_> { code, scope: RawDocument::from_bytes(v).map_err(Error::custom)?, }; - write_i32(&mut self.root_serializer.bytes, raw.len())?; - write_string(&mut self.root_serializer.bytes, code); - self.root_serializer.bytes.write_all(v)?; + self.root_serializer.buf.put_i32_le(raw.len() as i32); + self.root_serializer.write_string(code); + self.root_serializer.buf.put_slice(v); self.state = SerializationStep::Done; Ok(()) } @@ -451,7 +452,7 @@ impl<'b> serde::Serializer for &'b mut ValueSerializer<'_> { } } -impl SerializeStruct for &mut ValueSerializer<'_> { +impl SerializeStruct for &mut ValueSerializer<'_, B> { type Ok = (); type Error = Error; @@ -581,24 +582,22 @@ impl SerializeStruct for &mut ValueSerializer<'_> { } } -pub(crate) struct CodeWithScopeSerializer<'a> { - start: usize, - doc: DocumentSerializer<'a>, +pub(crate) struct CodeWithScopeSerializer<'a, B> { + doc: DocumentSerializer<'a, B>, } -impl<'a> CodeWithScopeSerializer<'a> { +impl<'a, B: DocumentBufMut> CodeWithScopeSerializer<'a, B> { #[inline] - fn start(code: &str, rs: &'a mut Serializer) -> Result { - let start = rs.bytes.len(); - write_i32(&mut rs.bytes, 0)?; // placeholder length - write_string(&mut rs.bytes, code); + fn start(code: &str, rs: &'a mut Serializer) -> Result { + rs.buf.begin_doc(ElementType::JavaScriptCodeWithScope)?; + rs.write_string(code); - let doc = DocumentSerializer::start(rs)?; - Ok(Self { start, doc }) + let doc = DocumentSerializer::start(rs, ElementType::EmbeddedDocument)?; + Ok(Self { doc }) } } -impl SerializeMap for CodeWithScopeSerializer<'_> { +impl SerializeMap for CodeWithScopeSerializer<'_, B> { type Ok = (); type Error = Error; @@ -620,10 +619,7 @@ impl SerializeMap for CodeWithScopeSerializer<'_> { #[inline] fn end(self) -> Result { - let result = self.doc.end_doc()?; - - let total_len = (result.root_serializer.bytes.len() - self.start) as i32; - result.root_serializer.replace_i32(self.start, total_len); - Ok(()) + let rs = self.doc.end_doc()?; + rs.buf.end_doc() } }