Skip to content

Commit 797cb36

Browse files
committed
encoding: reuse buffers when possible
Applies when using `encoding_rs` via the `query_encoding_2` feature. Code originally by @hsivonen in servo#262
1 parent dd69a70 commit 797cb36

File tree

1 file changed

+46
-6
lines changed

1 file changed

+46
-6
lines changed

src/encoding.rs

Lines changed: 46 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -78,17 +78,57 @@ impl EncodingOverride {
7878

7979
pub fn decode<'a>(&self, input: Cow<'a, [u8]>) -> Cow<'a, str> {
8080
match self.encoding {
81-
// encoding_rs returns a short-lived Cow, so create an owned Cow
82-
Some(encoding) => Cow::from(encoding.decode(&input).0.into_owned()),
83-
None => decode_utf8_lossy(input.into()),
81+
Some(encoding) => {
82+
match input {
83+
Cow::Borrowed(b) => {
84+
let (cow, _) = encoding.decode_without_bom_handling(b);
85+
cow
86+
},
87+
Cow::Owned(v) => {
88+
{
89+
let (cow, _) = encoding.decode_without_bom_handling(&v[..]);
90+
match cow {
91+
Cow::Owned(s) => {
92+
// Free old heap buffer and return a new one.
93+
return Cow::Owned(s);
94+
}
95+
Cow::Borrowed(_) => {}
96+
}
97+
}
98+
// Reuse the old heap buffer.
99+
Cow::Owned(unsafe { String::from_utf8_unchecked(v) })
100+
},
101+
}
102+
},
103+
None => decode_utf8_lossy(input),
84104
}
85105
}
86106

87107
pub fn encode<'a>(&self, input: Cow<'a, str>) -> Cow<'a, [u8]> {
88108
match self.encoding {
89-
// encoding_rs returns a short-lived Cow, so create an owned Cow
90-
Some(encoding) => Cow::from(encoding.encode(&input).0.into_owned()),
91-
None => encode_utf8(input)
109+
Some(encoding) => {
110+
match input {
111+
Cow::Borrowed(s) => {
112+
let (cow, _, _) = encoding.encode(s);
113+
cow
114+
},
115+
Cow::Owned(s) => {
116+
{
117+
let (cow, _, _) = encoding.encode(&s[..]);
118+
match cow {
119+
Cow::Owned(v) => {
120+
// Free old heap buffer and return a new one.
121+
return Cow::Owned(v);
122+
},
123+
Cow::Borrowed(_) => {},
124+
}
125+
}
126+
// Reuse the old heap buffer.
127+
Cow::Owned(s.into_bytes())
128+
},
129+
}
130+
},
131+
None => encode_utf8(input),
92132
}
93133
}
94134
}

0 commit comments

Comments
 (0)