diff --git a/Cargo.toml b/Cargo.toml index 5ac4c7c66..1537e71ac 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -30,10 +30,12 @@ rustc-serialize = "0.3" [features] query_encoding = ["encoding"] +query_encoding_rs = ["encoding_rs"] heap_size = ["heapsize"] [dependencies] encoding = {version = "0.2", optional = true} +encoding_rs = {version = "0.3.1", optional = true} heapsize = {version = ">=0.1.1, <0.4", optional = true} idna = { version = "0.1.0", path = "./idna" } matches = "0.1" diff --git a/src/encoding.rs b/src/encoding.rs index 0703c788f..09793e1f8 100644 --- a/src/encoding.rs +++ b/src/encoding.rs @@ -8,9 +8,10 @@ //! Abstraction that conditionally compiles either to rust-encoding, -//! or to only support UTF-8. +//! encoding_rs or to only support UTF-8. #[cfg(feature = "query_encoding")] extern crate encoding; +#[cfg(feature = "query_encoding_rs")] extern crate encoding_rs; use std::borrow::Cow; @@ -18,6 +19,11 @@ use std::borrow::Cow; #[cfg(feature = "query_encoding")] use self::encoding::label::encoding_from_whatwg_label; #[cfg(feature = "query_encoding")] pub use self::encoding::types::EncodingRef; +#[cfg(feature = "query_encoding_rs")] use self::encoding_rs::UTF_8; +#[cfg(feature = "query_encoding_rs")] pub use self::encoding_rs::Encoding; +#[cfg(feature = "query_encoding_rs")] pub type EncodingRef = &'static Encoding; + + #[cfg(feature = "query_encoding")] #[derive(Copy, Clone)] pub struct EncodingOverride { @@ -90,11 +96,99 @@ impl EncodingOverride { } -#[cfg(not(feature = "query_encoding"))] +#[cfg(feature = "query_encoding_rs")] +#[derive(Copy, Clone)] +pub struct EncodingOverride { + encoding: &'static Encoding +} + +#[cfg(feature = "query_encoding_rs")] +impl EncodingOverride { + pub fn from_opt_encoding(encoding: Option<&'static Encoding>) -> Self { + encoding.map(Self::from_encoding).unwrap_or_else(Self::utf8) + } + + pub fn from_encoding(encoding: &'static Encoding) -> Self { + EncodingOverride { + encoding: encoding + } + } + + #[inline] + pub fn utf8() -> Self { + EncodingOverride { encoding: UTF_8 } + } + + pub fn lookup(label: &[u8]) -> Option { + Encoding::for_label(label).map(Self::from_encoding) + } + + /// https://encoding.spec.whatwg.org/#get-an-output-encoding + pub fn to_output_encoding(self) -> Self { + Self::from_encoding(self.encoding.output_encoding()) + } + + pub fn is_utf8(&self) -> bool { + self.encoding == UTF_8 + } + + pub fn name(&self) -> &'static str { + self.encoding.name() + } + + pub fn decode<'a>(&self, input: Cow<'a, [u8]>) -> Cow<'a, str> { + match input { + Cow::Borrowed(b) => { + let (cow, _) = self.encoding.decode_without_bom_handling(b); + return cow; + } + Cow::Owned(v) => { + { + let (cow, _) = self.encoding.decode_without_bom_handling(&v[..]); + match cow { + Cow::Owned(s) => { + // Free old heap buffer and return a new one. + return Cow::Owned(s); + } + Cow::Borrowed(_) => {} + } + } + // Reuse the old heap buffer. + return Cow::Owned(unsafe { String::from_utf8_unchecked(v) }); + } + } + } + + pub fn encode<'a>(&self, input: Cow<'a, str>) -> Cow<'a, [u8]> { + match input { + Cow::Borrowed(s) => { + let (cow, _, _) = self.encoding.encode(s); + return cow; + } + Cow::Owned(s) => { + { + let (cow, _, _) = self.encoding.encode(&s[..]); + match cow { + Cow::Owned(v) => { + // Free old heap buffer and return a new one. + return Cow::Owned(v); + } + Cow::Borrowed(_) => {} + } + } + // Reuse the old heap buffer. + return Cow::Owned(s.into_bytes()) + } + } + } +} + + +#[cfg(not(any(feature = "query_encoding", feature = "query_encoding_rs")))] #[derive(Copy, Clone)] pub struct EncodingOverride; -#[cfg(not(feature = "query_encoding"))] +#[cfg(not(any(feature = "query_encoding", feature = "query_encoding_rs")))] impl EncodingOverride { #[inline] pub fn utf8() -> Self { @@ -127,6 +221,7 @@ pub fn decode_utf8_lossy(input: Cow<[u8]>) -> Cow { } } +#[cfg(not(feature = "query_encoding_rs"))] pub fn encode_utf8(input: Cow) -> Cow<[u8]> { match input { Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()), diff --git a/src/form_urlencoded.rs b/src/form_urlencoded.rs index f4a655507..6392e43ce 100644 --- a/src/form_urlencoded.rs +++ b/src/form_urlencoded.rs @@ -40,14 +40,15 @@ pub fn parse(input: &[u8]) -> Parse { /// /// Use `parse(input.as_bytes())` to parse a `&str` string. /// -/// This function is only available if the `query_encoding` Cargo feature is enabled. +/// This function is only available if either the `query_encoding` or the +/// `query_encoding_rs` Cargo feature is enabled. /// /// Arguments: /// /// * `encoding_override`: The character encoding each name and values is decoded as /// after percent-decoding. Defaults to UTF-8. /// * `use_charset`: The *use _charset_ flag*. If in doubt, set to `false`. -#[cfg(feature = "query_encoding")] +#[cfg(any(feature = "query_encoding", feature = "query_encoding_rs"))] pub fn parse_with_encoding<'a>(input: &'a [u8], encoding_override: Option<::encoding::EncodingRef>, use_charset: bool) @@ -279,7 +280,7 @@ impl Serializer { } /// Set the character encoding to be used for names and values before percent-encoding. - #[cfg(feature = "query_encoding")] + #[cfg(any(feature = "query_encoding", feature = "query_encoding_rs"))] pub fn encoding_override(&mut self, new: Option<::encoding::EncodingRef>) -> &mut Self { self.encoding = EncodingOverride::from_opt_encoding(new).to_output_encoding(); self @@ -317,7 +318,7 @@ impl Serializer { /// (See the `encoding_override()` method.) /// /// Panics if called after `.finish()`. - #[cfg(feature = "query_encoding")] + #[cfg(any(feature = "query_encoding", feature = "query_encoding_rs"))] pub fn append_charset(&mut self) -> &mut Self { { let string = string(&mut self.target); diff --git a/src/lib.rs b/src/lib.rs index 9f273cbbe..8aa23621e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -20,11 +20,14 @@ git = "https://github.com/servo/rust-url" ``` Supporting encodings other than UTF-8 in query strings is an optional feature -that requires [rust-encoding](https://github.com/lifthrasiir/rust-encoding) -and is off by default. +that requires either +[rust-encoding](https://github.com/lifthrasiir/rust-encoding) or +[encoding_rs](https://github.com/hsivonen/encoding_rs) and is off by default. You can enable it with [Cargo’s *features* mechanism](http://doc.crates.io/manifest.html#the-[features]-section): +For `rust-encoding`: + ```Cargo [dependencies.url] git = "https://github.com/servo/rust-url" @@ -33,6 +36,15 @@ features = ["query_encoding"] … or by passing `--cfg 'feature="query_encoding"'` to rustc. +Or for `encoding_rs`: + +```Cargo +[dependencies.url] +git = "https://github.com/servo/rust-url" +features = ["query_encoding_rs"] +``` + +… or by passing `--cfg 'feature="query_encoding_rs"'` to rustc. # URL parsing and data structures @@ -202,7 +214,7 @@ impl<'a> ParseOptions<'a> { /// Override the character encoding of query strings. /// This is a legacy concept only relevant for HTML. - #[cfg(feature = "query_encoding")] + #[cfg(any(feature = "query_encoding", feature = "query_encoding_rs"))] pub fn encoding_override(mut self, new: Option) -> Self { self.encoding_override = EncodingOverride::from_opt_encoding(new).to_output_encoding(); self