Skip to content

Add support for encoding_rs behind a new feature flag #262

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -30,10 +30,12 @@ rustc-serialize = "0.3"

[features]
query_encoding = ["encoding"]
query_encoding_rs = ["encoding_rs"]
heap_size = ["heapsize"]

[dependencies]
encoding = {version = "0.2", optional = true}
encoding_rs = {version = "0.3.1", optional = true}
heapsize = {version = ">=0.1.1, <0.4", optional = true}
idna = { version = "0.1.0", path = "./idna" }
matches = "0.1"
101 changes: 98 additions & 3 deletions src/encoding.rs
Original file line number Diff line number Diff line change
@@ -8,16 +8,22 @@


//! Abstraction that conditionally compiles either to rust-encoding,
//! or to only support UTF-8.
//! encoding_rs or to only support UTF-8.

#[cfg(feature = "query_encoding")] extern crate encoding;
#[cfg(feature = "query_encoding_rs")] extern crate encoding_rs;

use std::borrow::Cow;

#[cfg(feature = "query_encoding")] use self::encoding::types::{DecoderTrap, EncoderTrap};
#[cfg(feature = "query_encoding")] use self::encoding::label::encoding_from_whatwg_label;
#[cfg(feature = "query_encoding")] pub use self::encoding::types::EncodingRef;

#[cfg(feature = "query_encoding_rs")] use self::encoding_rs::UTF_8;
#[cfg(feature = "query_encoding_rs")] pub use self::encoding_rs::Encoding;
#[cfg(feature = "query_encoding_rs")] pub type EncodingRef = &'static Encoding;


#[cfg(feature = "query_encoding")]
#[derive(Copy, Clone)]
pub struct EncodingOverride {
@@ -90,11 +96,99 @@ impl EncodingOverride {
}


#[cfg(not(feature = "query_encoding"))]
#[cfg(feature = "query_encoding_rs")]
#[derive(Copy, Clone)]
pub struct EncodingOverride {
encoding: &'static Encoding
}

#[cfg(feature = "query_encoding_rs")]
impl EncodingOverride {
pub fn from_opt_encoding(encoding: Option<&'static Encoding>) -> Self {
encoding.map(Self::from_encoding).unwrap_or_else(Self::utf8)
}

pub fn from_encoding(encoding: &'static Encoding) -> Self {
EncodingOverride {
encoding: encoding
}
}

#[inline]
pub fn utf8() -> Self {
EncodingOverride { encoding: UTF_8 }
}

pub fn lookup(label: &[u8]) -> Option<Self> {
Encoding::for_label(label).map(Self::from_encoding)
}

/// https://encoding.spec.whatwg.org/#get-an-output-encoding
pub fn to_output_encoding(self) -> Self {
Self::from_encoding(self.encoding.output_encoding())
}

pub fn is_utf8(&self) -> bool {
self.encoding == UTF_8
}

pub fn name(&self) -> &'static str {
self.encoding.name()
}

pub fn decode<'a>(&self, input: Cow<'a, [u8]>) -> Cow<'a, str> {
match input {
Cow::Borrowed(b) => {
let (cow, _) = self.encoding.decode_without_bom_handling(b);
return cow;
}
Cow::Owned(v) => {
{
let (cow, _) = self.encoding.decode_without_bom_handling(&v[..]);
match cow {
Cow::Owned(s) => {
// Free old heap buffer and return a new one.
return Cow::Owned(s);
}
Cow::Borrowed(_) => {}
}
}
// Reuse the old heap buffer.
return Cow::Owned(unsafe { String::from_utf8_unchecked(v) });
}
}
}

pub fn encode<'a>(&self, input: Cow<'a, str>) -> Cow<'a, [u8]> {
match input {
Cow::Borrowed(s) => {
let (cow, _, _) = self.encoding.encode(s);
return cow;
}
Cow::Owned(s) => {
{
let (cow, _, _) = self.encoding.encode(&s[..]);
match cow {
Cow::Owned(v) => {
// Free old heap buffer and return a new one.
return Cow::Owned(v);
}
Cow::Borrowed(_) => {}
}
}
// Reuse the old heap buffer.
return Cow::Owned(s.into_bytes())
}
}
}
}


#[cfg(not(any(feature = "query_encoding", feature = "query_encoding_rs")))]
#[derive(Copy, Clone)]
pub struct EncodingOverride;

#[cfg(not(feature = "query_encoding"))]
#[cfg(not(any(feature = "query_encoding", feature = "query_encoding_rs")))]
impl EncodingOverride {
#[inline]
pub fn utf8() -> Self {
@@ -127,6 +221,7 @@ pub fn decode_utf8_lossy(input: Cow<[u8]>) -> Cow<str> {
}
}

#[cfg(not(feature = "query_encoding_rs"))]
pub fn encode_utf8(input: Cow<str>) -> Cow<[u8]> {
match input {
Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()),
9 changes: 5 additions & 4 deletions src/form_urlencoded.rs
Original file line number Diff line number Diff line change
@@ -40,14 +40,15 @@ pub fn parse(input: &[u8]) -> Parse {
///
/// Use `parse(input.as_bytes())` to parse a `&str` string.
///
/// This function is only available if the `query_encoding` Cargo feature is enabled.
/// This function is only available if either the `query_encoding` or the
/// `query_encoding_rs` Cargo feature is enabled.
///
/// Arguments:
///
/// * `encoding_override`: The character encoding each name and values is decoded as
/// after percent-decoding. Defaults to UTF-8.
/// * `use_charset`: The *use _charset_ flag*. If in doubt, set to `false`.
#[cfg(feature = "query_encoding")]
#[cfg(any(feature = "query_encoding", feature = "query_encoding_rs"))]
pub fn parse_with_encoding<'a>(input: &'a [u8],
encoding_override: Option<::encoding::EncodingRef>,
use_charset: bool)
@@ -279,7 +280,7 @@ impl<T: Target> Serializer<T> {
}

/// Set the character encoding to be used for names and values before percent-encoding.
#[cfg(feature = "query_encoding")]
#[cfg(any(feature = "query_encoding", feature = "query_encoding_rs"))]
pub fn encoding_override(&mut self, new: Option<::encoding::EncodingRef>) -> &mut Self {
self.encoding = EncodingOverride::from_opt_encoding(new).to_output_encoding();
self
@@ -317,7 +318,7 @@ impl<T: Target> Serializer<T> {
/// (See the `encoding_override()` method.)
///
/// Panics if called after `.finish()`.
#[cfg(feature = "query_encoding")]
#[cfg(any(feature = "query_encoding", feature = "query_encoding_rs"))]
pub fn append_charset(&mut self) -> &mut Self {
{
let string = string(&mut self.target);
18 changes: 15 additions & 3 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -20,11 +20,14 @@ git = "https://github.com/servo/rust-url"
```

Supporting encodings other than UTF-8 in query strings is an optional feature
that requires [rust-encoding](https://github.com/lifthrasiir/rust-encoding)
and is off by default.
that requires either
[rust-encoding](https://github.com/lifthrasiir/rust-encoding) or
[encoding_rs](https://github.com/hsivonen/encoding_rs) and is off by default.
You can enable it with
[Cargo’s *features* mechanism](http://doc.crates.io/manifest.html#the-[features]-section):

For `rust-encoding`:

```Cargo
[dependencies.url]
git = "https://github.com/servo/rust-url"
@@ -33,6 +36,15 @@ features = ["query_encoding"]

… or by passing `--cfg 'feature="query_encoding"'` to rustc.

Or for `encoding_rs`:

```Cargo
[dependencies.url]
git = "https://github.com/servo/rust-url"
features = ["query_encoding_rs"]
```

… or by passing `--cfg 'feature="query_encoding_rs"'` to rustc.

# URL parsing and data structures

@@ -202,7 +214,7 @@ impl<'a> ParseOptions<'a> {

/// Override the character encoding of query strings.
/// This is a legacy concept only relevant for HTML.
#[cfg(feature = "query_encoding")]
#[cfg(any(feature = "query_encoding", feature = "query_encoding_rs"))]
pub fn encoding_override(mut self, new: Option<encoding::EncodingRef>) -> Self {
self.encoding_override = EncodingOverride::from_opt_encoding(new).to_output_encoding();
self