Skip to content

Commit a3e676f

Browse files
committed
Add support for encoding_rs behind the feature flag query_encoding_rs.
1 parent a60029a commit a3e676f

File tree

4 files changed

+120
-10
lines changed

4 files changed

+120
-10
lines changed

Cargo.toml

+2
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,12 @@ rustc-serialize = "0.3"
3030

3131
[features]
3232
query_encoding = ["encoding"]
33+
query_encoding_rs = ["encoding_rs"]
3334
heap_size = ["heapsize"]
3435

3536
[dependencies]
3637
encoding = {version = "0.2", optional = true}
38+
encoding_rs = {version = "0.3.1", optional = true}
3739
heapsize = {version = ">=0.1.1, <0.4", optional = true}
3840
idna = { version = "0.1.0", path = "./idna" }
3941
matches = "0.1"

src/encoding.rs

+98-3
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,22 @@
88

99

1010
//! Abstraction that conditionally compiles either to rust-encoding,
11-
//! or to only support UTF-8.
11+
//! encoding_rs or to only support UTF-8.
1212
1313
#[cfg(feature = "query_encoding")] extern crate encoding;
14+
#[cfg(feature = "query_encoding_rs")] extern crate encoding_rs;
1415

1516
use std::borrow::Cow;
1617

1718
#[cfg(feature = "query_encoding")] use self::encoding::types::{DecoderTrap, EncoderTrap};
1819
#[cfg(feature = "query_encoding")] use self::encoding::label::encoding_from_whatwg_label;
1920
#[cfg(feature = "query_encoding")] pub use self::encoding::types::EncodingRef;
2021

22+
#[cfg(feature = "query_encoding_rs")] use self::encoding_rs::UTF_8;
23+
#[cfg(feature = "query_encoding_rs")] pub use self::encoding_rs::Encoding;
24+
#[cfg(feature = "query_encoding_rs")] pub type EncodingRef = &'static Encoding;
25+
26+
2127
#[cfg(feature = "query_encoding")]
2228
#[derive(Copy, Clone)]
2329
pub struct EncodingOverride {
@@ -90,11 +96,99 @@ impl EncodingOverride {
9096
}
9197

9298

93-
#[cfg(not(feature = "query_encoding"))]
99+
#[cfg(feature = "query_encoding_rs")]
100+
#[derive(Copy, Clone)]
101+
pub struct EncodingOverride {
102+
encoding: &'static Encoding
103+
}
104+
105+
#[cfg(feature = "query_encoding_rs")]
106+
impl EncodingOverride {
107+
pub fn from_opt_encoding(encoding: Option<&'static Encoding>) -> Self {
108+
encoding.map(Self::from_encoding).unwrap_or_else(Self::utf8)
109+
}
110+
111+
pub fn from_encoding(encoding: &'static Encoding) -> Self {
112+
EncodingOverride {
113+
encoding: encoding
114+
}
115+
}
116+
117+
#[inline]
118+
pub fn utf8() -> Self {
119+
EncodingOverride { encoding: UTF_8 }
120+
}
121+
122+
pub fn lookup(label: &[u8]) -> Option<Self> {
123+
Encoding::for_label(label).map(Self::from_encoding)
124+
}
125+
126+
/// https://encoding.spec.whatwg.org/#get-an-output-encoding
127+
pub fn to_output_encoding(self) -> Self {
128+
Self::from_encoding(self.encoding.output_encoding())
129+
}
130+
131+
pub fn is_utf8(&self) -> bool {
132+
self.encoding == UTF_8
133+
}
134+
135+
pub fn name(&self) -> &'static str {
136+
self.encoding.name()
137+
}
138+
139+
pub fn decode<'a>(&self, input: Cow<'a, [u8]>) -> Cow<'a, str> {
140+
match input {
141+
Cow::Borrowed(b) => {
142+
let (cow, _) = self.encoding.decode_without_bom_handling(b);
143+
return cow;
144+
}
145+
Cow::Owned(v) => {
146+
{
147+
let (cow, _) = self.encoding.decode_without_bom_handling(&v[..]);
148+
match cow {
149+
Cow::Owned(s) => {
150+
// Free old heap buffer and return a new one.
151+
return Cow::Owned(s);
152+
}
153+
Cow::Borrowed(_) => {}
154+
}
155+
}
156+
// Reuse the old heap buffer.
157+
return Cow::Owned(unsafe { String::from_utf8_unchecked(v) });
158+
}
159+
}
160+
}
161+
162+
pub fn encode<'a>(&self, input: Cow<'a, str>) -> Cow<'a, [u8]> {
163+
match input {
164+
Cow::Borrowed(s) => {
165+
let (cow, _, _) = self.encoding.encode(s);
166+
return cow;
167+
}
168+
Cow::Owned(s) => {
169+
{
170+
let (cow, _, _) = self.encoding.encode(&s[..]);
171+
match cow {
172+
Cow::Owned(v) => {
173+
// Free old heap buffer and return a new one.
174+
return Cow::Owned(v);
175+
}
176+
Cow::Borrowed(_) => {}
177+
}
178+
}
179+
// Reuse the old heap buffer.
180+
return Cow::Owned(s.into_bytes())
181+
}
182+
}
183+
}
184+
}
185+
186+
187+
#[cfg(not(any(feature = "query_encoding", feature = "query_encoding_rs")))]
94188
#[derive(Copy, Clone)]
95189
pub struct EncodingOverride;
96190

97-
#[cfg(not(feature = "query_encoding"))]
191+
#[cfg(not(any(feature = "query_encoding", feature = "query_encoding_rs")))]
98192
impl EncodingOverride {
99193
#[inline]
100194
pub fn utf8() -> Self {
@@ -127,6 +221,7 @@ pub fn decode_utf8_lossy(input: Cow<[u8]>) -> Cow<str> {
127221
}
128222
}
129223

224+
#[cfg(not(feature = "query_encoding_rs"))]
130225
pub fn encode_utf8(input: Cow<str>) -> Cow<[u8]> {
131226
match input {
132227
Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()),

src/form_urlencoded.rs

+5-4
Original file line numberDiff line numberDiff line change
@@ -40,14 +40,15 @@ pub fn parse(input: &[u8]) -> Parse {
4040
///
4141
/// Use `parse(input.as_bytes())` to parse a `&str` string.
4242
///
43-
/// This function is only available if the `query_encoding` Cargo feature is enabled.
43+
/// This function is only available if either the `query_encoding` or the
44+
/// `query_encoding_rs` Cargo feature is enabled.
4445
///
4546
/// Arguments:
4647
///
4748
/// * `encoding_override`: The character encoding each name and values is decoded as
4849
/// after percent-decoding. Defaults to UTF-8.
4950
/// * `use_charset`: The *use _charset_ flag*. If in doubt, set to `false`.
50-
#[cfg(feature = "query_encoding")]
51+
#[cfg(any(feature = "query_encoding", feature = "query_encoding_rs"))]
5152
pub fn parse_with_encoding<'a>(input: &'a [u8],
5253
encoding_override: Option<::encoding::EncodingRef>,
5354
use_charset: bool)
@@ -279,7 +280,7 @@ impl<T: Target> Serializer<T> {
279280
}
280281

281282
/// Set the character encoding to be used for names and values before percent-encoding.
282-
#[cfg(feature = "query_encoding")]
283+
#[cfg(any(feature = "query_encoding", feature = "query_encoding_rs"))]
283284
pub fn encoding_override(&mut self, new: Option<::encoding::EncodingRef>) -> &mut Self {
284285
self.encoding = EncodingOverride::from_opt_encoding(new).to_output_encoding();
285286
self
@@ -317,7 +318,7 @@ impl<T: Target> Serializer<T> {
317318
/// (See the `encoding_override()` method.)
318319
///
319320
/// Panics if called after `.finish()`.
320-
#[cfg(feature = "query_encoding")]
321+
#[cfg(any(feature = "query_encoding", feature = "query_encoding_rs"))]
321322
pub fn append_charset(&mut self) -> &mut Self {
322323
{
323324
let string = string(&mut self.target);

src/lib.rs

+15-3
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,14 @@ git = "https://github.com/servo/rust-url"
2020
```
2121
2222
Supporting encodings other than UTF-8 in query strings is an optional feature
23-
that requires [rust-encoding](https://github.com/lifthrasiir/rust-encoding)
24-
and is off by default.
23+
that requires either
24+
[rust-encoding](https://github.com/lifthrasiir/rust-encoding) or
25+
[encoding_rs](https://github.com/hsivonen/encoding_rs) and is off by default.
2526
You can enable it with
2627
[Cargo’s *features* mechanism](http://doc.crates.io/manifest.html#the-[features]-section):
2728
29+
For `rust-encoding`:
30+
2831
```Cargo
2932
[dependencies.url]
3033
git = "https://github.com/servo/rust-url"
@@ -33,6 +36,15 @@ features = ["query_encoding"]
3336
3437
… or by passing `--cfg 'feature="query_encoding"'` to rustc.
3538
39+
Or for `encoding_rs`:
40+
41+
```Cargo
42+
[dependencies.url]
43+
git = "https://github.com/servo/rust-url"
44+
features = ["query_encoding_rs"]
45+
```
46+
47+
… or by passing `--cfg 'feature="query_encoding_rs"'` to rustc.
3648
3749
# URL parsing and data structures
3850
@@ -202,7 +214,7 @@ impl<'a> ParseOptions<'a> {
202214

203215
/// Override the character encoding of query strings.
204216
/// This is a legacy concept only relevant for HTML.
205-
#[cfg(feature = "query_encoding")]
217+
#[cfg(any(feature = "query_encoding", feature = "query_encoding_rs"))]
206218
pub fn encoding_override(mut self, new: Option<encoding::EncodingRef>) -> Self {
207219
self.encoding_override = EncodingOverride::from_opt_encoding(new).to_output_encoding();
208220
self

0 commit comments

Comments
 (0)