Skip to content

Add Cargo feature query_encoding_2 to use encoding_rs crate #446

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 5 commits into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 25 additions & 5 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -8,17 +8,37 @@ jobs:
- cargo update
# getopts is only used in tests. Its versions 0.2.16+ don’t build on 1.17.0
- cargo update -p getopts --precise 0.2.15
# data-url uses pub(crate) which is unstable in 1.17
script: cargo test --all-features -p url -p idna -p percent-encoding -p url_serde
script:
# test building potentially conflicting features
- cargo build
- cargo build --features query_encoding
- cargo build --features query_encoding_2
# data-url uses pub(crate) which is unstable in 1.17
- cargo test --all-features -p url -p idna -p percent-encoding -p url_serde

- rust: stable
script: cargo test --all-features --all
script:
# test building potentially conflicting features
- cargo build
- cargo build --features query_encoding
- cargo build --features query_encoding_2
- cargo test --all-features --all

- rust: beta
script: cargo test --all-features --all
script:
# test building potentially conflicting features
- cargo build
- cargo build --features query_encoding
- cargo build --features query_encoding_2
- cargo test --all-features --all

- rust: nightly
script: cargo test --all-features --all
script:
# test building potentially conflicting features
- cargo build
- cargo build --features query_encoding
- cargo build --features query_encoding_2
- cargo test --all-features --all

- rust: nightly
env: TARGET=WASM32 # For job list UI
4 changes: 3 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -36,11 +36,13 @@ rustc-serialize = "0.3"
serde_json = ">=0.6.1, <0.9"

[features]
query_encoding_2 = ["encoding_rs"]
query_encoding = ["encoding"]
heap_size = ["heapsize"]

[dependencies]
encoding = {version = "0.2", optional = true}
encoding_rs = {version = "0.7", optional = true}
heapsize = {version = ">=0.4.1, <0.5", optional = true}
idna = { version = "0.1.0", path = "./idna" }
matches = "0.1"
@@ -49,4 +51,4 @@ rustc-serialize = {version = "0.3", optional = true}
serde = {version = ">=0.6.1, <0.9", optional = true}

[package.metadata.docs.rs]
features = ["query_encoding"]
features = ["query_encoding_2", "query_encoding"]
146 changes: 0 additions & 146 deletions src/encoding.rs

This file was deleted.

125 changes: 125 additions & 0 deletions src/encoding/encoding_rs.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
// Copyright 2013-2018 The rust-url developers.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.


//! Implementation using [encoding_rs](https://github.com/hsivonen/encoding_rs).
//! Only built with feature flag `query_encoding_2`.
extern crate encoding_rs;

use encoding::EncodingOverride;
use encoding::utf8_helpers::{decode_utf8_lossy, encode_utf8};

use std::borrow::Cow;
use std::fmt::{self, Debug, Formatter};

use self::encoding_rs::Encoding;

pub struct EncodingOverrideRs {
/// `None` means UTF-8.
encoding: Option<&'static Encoding>
}

impl EncodingOverrideRs {
fn from_encoding(encoding: &'static Encoding) -> Self {
Self {
encoding: if encoding.name() == "UTF-8" { None } else { Some(encoding) }
}
}
}

impl EncodingOverride for EncodingOverrideRs {
#[inline]
fn utf8() -> Self {
Self { encoding: None }
}

fn lookup(label: &[u8]) -> Option<Self> {
// Don't use String::from_utf8_lossy since no encoding label contains U+FFFD
// https://encoding.spec.whatwg.org/#names-and-labels
Encoding::for_label(label)
.map(Self::from_encoding)
}

fn is_utf8(&self) -> bool {
self.encoding.is_none()
}

fn name(&self) -> &'static str {
match self.encoding {
Some(encoding) => encoding.name(),
None => encoding_rs::UTF_8.name(),
}
}

fn decode<'a>(&self, input: Cow<'a, [u8]>) -> Cow<'a, str> {
match self.encoding {
Some(encoding) => {
match input {
Cow::Borrowed(b) => {
let (cow, _) = encoding.decode_without_bom_handling(b);
cow
},
Cow::Owned(v) => {
{
let (cow, _) = encoding.decode_without_bom_handling(&v[..]);
match cow {
Cow::Owned(s) => {
// Free old heap buffer and return a new one.
return Cow::Owned(s);
},
Cow::Borrowed(_) => {},
}
}
// Reuse the old heap buffer.
Cow::Owned(unsafe { String::from_utf8_unchecked(v) })
},
}
},
None => decode_utf8_lossy(input),
}
}

fn encode<'a>(&self, input: Cow<'a, str>) -> Cow<'a, [u8]> {
match self.encoding {
Some(encoding) => {
match input {
Cow::Borrowed(s) => {
let (cow, _, _) = encoding.encode(s);
cow
},
Cow::Owned(s) => {
{
let (cow, _, _) = encoding.encode(&s[..]);
match cow {
Cow::Owned(v) => {
// Free old heap buffer and return a new one.
return Cow::Owned(v);
},
Cow::Borrowed(_) => {},
}
}
// Reuse the old heap buffer.
Cow::Owned(s.into_bytes())
},
}
},
None => encode_utf8(input),
}
}
}

impl Debug for EncodingOverrideRs {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
write!(f, "EncodingOverride {{ encoding: ")?;
match self.encoding {
Some(e) => write!(f, "{} }}", e.name()),
None => write!(f, "None }}")
}
}
}
53 changes: 53 additions & 0 deletions src/encoding/fallback.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
// Copyright 2013-2018 The rust-url developers.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.


//! Implementation using UTF-8 only.
//! Used when building without any query encoding feature flags.
use std::borrow::Cow;

use encoding::EncodingOverride;
use encoding::utf8_helpers::{decode_utf8_lossy, encode_utf8};

#[derive(Copy, Clone, Debug)]
pub struct EncodingOverrideFallback;

impl EncodingOverrideFallback {
#[inline]
pub fn utf8() -> Self {
EncodingOverrideFallback
}
}

impl EncodingOverride for EncodingOverrideFallback {
fn utf8() -> Self {
Self {}
}

fn lookup(_label: &[u8]) -> Option<Self> {
// always return `None` which means UTF-8
None
}

fn is_utf8(&self) -> bool {
true
}

fn name(&self) -> &'static str {
"utf-8"
}

fn decode<'a>(&self, input: Cow<'a, [u8]>) -> Cow<'a, str> {
decode_utf8_lossy(input)
}

fn encode<'a>(&self, input: Cow<'a, str>) -> Cow<'a, [u8]> {
encode_utf8(input)
}
}
96 changes: 96 additions & 0 deletions src/encoding/legacy.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
// Copyright 2013-2018 The rust-url developers.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.


//! Legacy implementation using
//! [rust-encoding](https://github.com/lifthrasiir/rust-encoding).
//! Only built when setting feature flag `query_encoding`.
//! Use feature flag `query_encoding_2` for the new `encoding_rs` implementation.
extern crate encoding;

use encoding::EncodingOverride;
use encoding::utf8_helpers::{decode_utf8_lossy, encode_utf8};

use std::borrow::Cow;
use std::fmt::{self, Debug, Formatter};

use self::encoding::types::{DecoderTrap, EncoderTrap};
use self::encoding::label::encoding_from_whatwg_label;
pub use self::encoding::types::EncodingRef;

#[derive(Copy, Clone)]
pub struct EncodingOverrideLegacy {
/// `None` means UTF-8.
encoding: Option<EncodingRef>
}

impl EncodingOverrideLegacy {
pub fn from_opt_encoding(encoding: Option<EncodingRef>) -> Self {
encoding.map(Self::from_encoding).unwrap_or_else(Self::utf8)
}

pub fn from_encoding(encoding: EncodingRef) -> Self {
Self {
encoding: if encoding.name() == "utf-8" { None } else { Some(encoding) }
}
}
}

impl EncodingOverride for EncodingOverrideLegacy {
#[inline]
fn utf8() -> Self {
Self { encoding: None }
}

fn lookup(label: &[u8]) -> Option<Self> {
// Don't use String::from_utf8_lossy since no encoding label contains U+FFFD
// https://encoding.spec.whatwg.org/#names-and-labels
::std::str::from_utf8(label)
.ok()
.and_then(encoding_from_whatwg_label)
.map(Self::from_encoding)
}

fn is_utf8(&self) -> bool {
self.encoding.is_none()
}

fn name(&self) -> &'static str {
match self.encoding {
Some(encoding) => encoding.name(),
None => "utf-8",
}
}

fn decode<'a>(&self, input: Cow<'a, [u8]>) -> Cow<'a, str> {
match self.encoding {
// `encoding.decode` never returns `Err` when called with `DecoderTrap::Replace`
Some(encoding) => encoding.decode(&input, DecoderTrap::Replace).unwrap().into(),
None => decode_utf8_lossy(input),
}
}

fn encode<'a>(&self, input: Cow<'a, str>) -> Cow<'a, [u8]> {
match self.encoding {
// `encoding.encode` never returns `Err` when called with `EncoderTrap::NcrEscape`
Some(encoding) => Cow::Owned(encoding.encode(&input, EncoderTrap::NcrEscape).unwrap()),
None => encode_utf8(input)
}
}
}

impl Debug for EncodingOverrideLegacy {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
write!(f, "EncodingOverride {{ encoding: ")?;
match self.encoding {
Some(e) => write!(f, "{} }}", e.name()),
None => write!(f, "None }}")
}
}
}
96 changes: 96 additions & 0 deletions src/encoding/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
// Copyright 2013-2018 The rust-url developers.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.


//! Abstraction that conditionally compiles either to encoding_rs,
//! or rust-encoding (legacy), or to only support UTF-8.
mod utf8_helpers;

use std::borrow::Cow;
use std::fmt::Debug;

#[cfg(feature = "query_encoding_2")] mod encoding_rs;
#[cfg(feature = "query_encoding_2")] use self::encoding_rs::EncodingOverrideRs;

#[cfg(feature = "query_encoding")] mod legacy;
#[cfg(feature = "query_encoding")] pub use self::legacy::{EncodingOverrideLegacy, EncodingRef};

#[cfg(not(any(feature = "query_encoding", feature = "query_encoding_2")))]
mod fallback;
#[cfg(not(any(feature = "query_encoding", feature = "query_encoding_2")))]
use self::fallback::EncodingOverrideFallback;


pub trait EncodingOverride : Debug {
/// Get an Encoding representing UTF-8.
fn utf8() -> Self where Self: Sized;

/// Look up an Encoding using the WHATWG label,
/// listed at https://encoding.spec.whatwg.org/#names-and-labels
fn lookup(label: &[u8]) -> Option<Self> where Self: Sized;

/// Whether this Encoding represents UTF-8.
fn is_utf8(&self) -> bool;

/// Get the name of this Encoding, which when ASCII lowercased, may be used as a
/// lookup label. https://encoding.spec.whatwg.org/#names-and-labels
fn name(&self) -> &'static str;

/// https://encoding.spec.whatwg.org/#get-an-output-encoding
fn to_output_encoding(self) -> Self where Self: Sized {
if !self.is_utf8() {
let lowercased = self.name().to_lowercase();
if lowercased == "utf-16le" || lowercased == "utf-16be" {
return Self::utf8()
}
}
self
}

/// Decode the specified bytes in the current encoding, to UTF-8.
fn decode<'a>(&self, input: Cow<'a, [u8]>) -> Cow<'a, str>;

/// Encode the UTF-8 string to the current encoding.
fn encode<'a>(&self, input: Cow<'a, str>) -> Cow<'a, [u8]>;
}

#[cfg(feature = "query_encoding_2")]
pub fn default_encoding_override() -> EncodingOverrideRs {
EncodingOverrideRs::utf8()
}

#[cfg(feature = "query_encoding_2")]
pub fn encoding_override_for_label(label: Option<&[u8]>) -> EncodingOverrideRs {
if let Some(label) = label {
if let Some(encoding) = EncodingOverrideRs::lookup(label) {
return encoding;
}
}
EncodingOverrideRs::utf8()
}

#[cfg(all(feature = "query_encoding", not(feature = "query_encoding_2")))]
pub fn default_encoding_override() -> EncodingOverrideLegacy {
EncodingOverrideLegacy::utf8()
}

#[cfg(all(feature = "query_encoding", not(feature = "query_encoding_2")))]
pub fn encoding_override_for_label(label: Option<&[u8]>) -> EncodingOverrideLegacy {
if let Some(label) = label {
if let Some(encoding) = EncodingOverrideLegacy::lookup(label) {
return encoding;
}
}
EncodingOverrideLegacy::utf8()
}

#[cfg(not(any(feature = "query_encoding", feature = "query_encoding_2")))]
pub fn default_encoding_override() -> EncodingOverrideFallback {
EncodingOverrideFallback::utf8()
}
36 changes: 36 additions & 0 deletions src/encoding/utf8_helpers.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
// Copyright 2013-2018 The rust-url developers.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.


//! UTF-8 encode and decode methods.
use std::borrow::Cow;

pub fn decode_utf8_lossy(input: Cow<[u8]>) -> Cow<str> {
match input {
Cow::Borrowed(bytes) => String::from_utf8_lossy(bytes),
Cow::Owned(bytes) => {
let raw_utf8: *const [u8];
match String::from_utf8_lossy(&bytes) {
Cow::Borrowed(utf8) => raw_utf8 = utf8.as_bytes(),
Cow::Owned(s) => return s.into(),
}
// from_utf8_lossy returned a borrow of `bytes` unchanged.
debug_assert!(raw_utf8 == &*bytes as *const [u8]);
// Reuse the existing `Vec` allocation.
unsafe { String::from_utf8_unchecked(bytes) }.into()
}
}
}

pub fn encode_utf8(input: Cow<str>) -> Cow<[u8]> {
match input {
Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()),
Cow::Owned(s) => Cow::Owned(s.into_bytes())
}
}
77 changes: 61 additions & 16 deletions src/form_urlencoded.rs
Original file line number Diff line number Diff line change
@@ -14,9 +14,13 @@
//! and a sequence of (name, value) pairs.
use encoding::EncodingOverride;
#[cfg(feature = "query_encoding")] use encoding::EncodingOverrideLegacy;
use encoding::default_encoding_override;
#[cfg(any(feature = "query_encoding", feature = "query_encoding_2"))] use encoding::encoding_override_for_label;
use percent_encoding::{percent_encode_byte, percent_decode};
use std::borrow::{Borrow, Cow};
use std::fmt;
use std::rc::Rc;
use std::str;


@@ -31,11 +35,34 @@ use std::str;
pub fn parse(input: &[u8]) -> Parse {
Parse {
input: input,
encoding: EncodingOverride::utf8(),
encoding: Rc::new(default_encoding_override()),
}
}


/// Convert a byte string in the `application/x-www-form-urlencoded` syntax
/// into a iterator of (name, value) pairs.
///
/// Use `parse(input.as_bytes())` to parse a `&str` string.
///
/// This function is only available if the `query_encoding_2` or `query_encoding`
/// [feature](http://doc.crates.io/manifest.html#the-features-section]) is enabled.
///
/// Arguments:
///
/// * `encoding_label`: The character encoding each name and values is decoded as
/// after percent-decoding. Defaults to UTF-8.
/// Labels are listed at https://encoding.spec.whatwg.org/#names-and-labels
/// * `use_charset`: The *use _charset_ flag*. If in doubt, set to `false`.
#[cfg(any(feature = "query_encoding", feature = "query_encoding_2"))]
pub fn parse_with_encoding_label<'a>(input: &'a [u8],
encoding_label: Option<&[u8]>,
use_charset: bool)
-> Result<Parse<'a>, ()> {
let encoding = encoding_override_for_label(encoding_label);
parse_with_encoding_override(input, encoding, use_charset)
}

/// Convert a byte string in the `application/x-www-form-urlencoded` syntax
/// into a iterator of (name, value) pairs.
///
@@ -51,13 +78,23 @@ pub fn parse(input: &[u8]) -> Parse {
/// `EncodingRef` is defined in [rust-encoding](https://github.com/lifthrasiir/rust-encoding).
/// * `use_charset`: The *use _charset_ flag*. If in doubt, set to `false`.
#[cfg(feature = "query_encoding")]
#[deprecated(note="Build with `query_encoding_2` instead")]
pub fn parse_with_encoding<'a>(input: &'a [u8],
encoding_override: Option<::encoding::EncodingRef>,
use_charset: bool)
-> Result<Parse<'a>, ()> {
let encoding = EncodingOverrideLegacy::from_opt_encoding(encoding_override);
parse_with_encoding_override(input, encoding, use_charset)
}

#[cfg(any(feature = "query_encoding", feature = "query_encoding_2"))]
fn parse_with_encoding_override<'a, E>(input: &'a [u8],
mut encoding: E,
use_charset: bool)
-> Result<Parse<'a>, ()>
where E: 'static + EncodingOverride {
use std::ascii::AsciiExt;

let mut encoding = EncodingOverride::from_opt_encoding(encoding_override);
if !(encoding.is_utf8() || input.is_ascii()) {
return Err(())
}
@@ -77,15 +114,15 @@ pub fn parse_with_encoding<'a>(input: &'a [u8],
}
Ok(Parse {
input: input,
encoding: encoding,
encoding: Rc::new(encoding),
})
}

/// The return type of `parse()`.
#[derive(Copy, Clone, Debug)]
#[derive(Clone, Debug)]
pub struct Parse<'a> {
input: &'a [u8],
encoding: EncodingOverride,
encoding: Rc<EncodingOverride>,
}

impl<'a> Iterator for Parse<'a> {
@@ -106,14 +143,14 @@ impl<'a> Iterator for Parse<'a> {
let name = split2.next().unwrap();
let value = split2.next().unwrap_or(&[][..]);
return Some((
decode(name, self.encoding),
decode(value, self.encoding),
decode(name, &*self.encoding),
decode(value, &*self.encoding),
))
}
}
}

fn decode(input: &[u8], encoding: EncodingOverride) -> Cow<str> {
fn decode<'i>(input: &'i [u8], encoding: &EncodingOverride) -> Cow<'i, str> {
let replaced = replace_plus(input);
encoding.decode(match percent_decode(&replaced).if_any() {
Some(vec) => Cow::Owned(vec),
@@ -216,7 +253,7 @@ impl<'a> Iterator for ByteSerialize<'a> {
pub struct Serializer<T: Target> {
target: Option<T>,
start_position: usize,
encoding: EncodingOverride,
encoding: Rc<EncodingOverride>,
custom_encoding: Option<SilentDebug<Box<FnMut(&str) -> Cow<[u8]>>>>,
}

@@ -281,7 +318,7 @@ impl<T: Target> Serializer<T> {
Serializer {
target: Some(target),
start_position: start_position,
encoding: EncodingOverride::utf8(),
encoding: Rc::new(default_encoding_override()),
custom_encoding: None,
}
}
@@ -294,10 +331,18 @@ impl<T: Target> Serializer<T> {
self
}

/// Set the character encoding to be used for names and values before percent-encoding.
#[cfg(any(feature = "query_encoding", feature = "query_encoding_2"))]
pub fn encoding_override_for_label(&mut self, label: Option<&[u8]>) -> &mut Self {
self.encoding = Rc::new(encoding_override_for_label(label).to_output_encoding());
self
}

/// Set the character encoding to be used for names and values before percent-encoding.
#[cfg(feature = "query_encoding")]
#[deprecated(note="Build with `query_encoding_2` instead")]
pub fn encoding_override(&mut self, new: Option<::encoding::EncodingRef>) -> &mut Self {
self.encoding = EncodingOverride::from_opt_encoding(new).to_output_encoding();
self.encoding = Rc::new(EncodingOverrideLegacy::from_opt_encoding(new).to_output_encoding());
self
}

@@ -313,7 +358,7 @@ impl<T: Target> Serializer<T> {
///
/// Panics if called after `.finish()`.
pub fn append_pair(&mut self, name: &str, value: &str) -> &mut Self {
append_pair(string(&mut self.target), self.start_position, self.encoding,
append_pair(string(&mut self.target), self.start_position, &*self.encoding,
&mut self.custom_encoding, name, value);
self
}
@@ -331,7 +376,7 @@ impl<T: Target> Serializer<T> {
let string = string(&mut self.target);
for pair in iter {
let &(ref k, ref v) = pair.borrow();
append_pair(string, self.start_position, self.encoding,
append_pair(string, self.start_position, &*self.encoding,
&mut self.custom_encoding, k.as_ref(), v.as_ref());
}
}
@@ -343,7 +388,7 @@ impl<T: Target> Serializer<T> {
/// (See the `encoding_override()` method.)
///
/// Panics if called after `.finish()`.
#[cfg(feature = "query_encoding")]
#[cfg(any(feature = "query_encoding", feature = "query_encoding_2"))]
pub fn append_charset(&mut self) -> &mut Self {
assert!(self.custom_encoding.is_none(),
"Cannot use both custom_encoding_override() and append_charset()");
@@ -383,7 +428,7 @@ fn string<T: Target>(target: &mut Option<T>) -> &mut String {
target.as_mut().expect("url::form_urlencoded::Serializer finished").as_mut_string()
}

fn append_pair(string: &mut String, start_position: usize, encoding: EncodingOverride,
fn append_pair(string: &mut String, start_position: usize, encoding: &EncodingOverride,
custom_encoding: &mut Option<SilentDebug<Box<FnMut(&str) -> Cow<[u8]>>>>,
name: &str, value: &str) {
append_separator_if_needed(string, start_position);
@@ -392,7 +437,7 @@ fn append_pair(string: &mut String, start_position: usize, encoding: EncodingOve
append_encoded(value, string, encoding, custom_encoding);
}

fn append_encoded(s: &str, string: &mut String, encoding: EncodingOverride,
fn append_encoded(s: &str, string: &mut String, encoding: &EncodingOverride,
custom_encoding: &mut Option<SilentDebug<Box<FnMut(&str) -> Cow<[u8]>>>>) {
let bytes = if let Some(SilentDebug(ref mut custom)) = *custom_encoding {
custom(s)
30 changes: 24 additions & 6 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -115,6 +115,9 @@ pub extern crate idna;
pub extern crate percent_encoding;

use encoding::EncodingOverride;
use encoding::default_encoding_override;
#[cfg(any(feature = "query_encoding", feature = "query_encoding_2"))] use encoding::encoding_override_for_label;
#[cfg(feature = "query_encoding")] use encoding::EncodingOverrideLegacy;
#[cfg(feature = "heapsize")] use heapsize::HeapSizeOf;
use host::HostInternal;
use parser::{Parser, Context, SchemeType, to_u32, ViolationFn};
@@ -130,6 +133,7 @@ use std::mem;
use std::net::{ToSocketAddrs, IpAddr};
use std::ops::{Range, RangeFrom, RangeTo};
use std::path::{Path, PathBuf};
use std::rc::Rc;
use std::str;

pub use origin::{Origin, OpaqueOrigin};
@@ -182,10 +186,10 @@ impl HeapSizeOf for Url {
}

/// Full configuration for the URL parser.
#[derive(Copy, Clone)]
#[derive(Clone)]
pub struct ParseOptions<'a> {
base_url: Option<&'a Url>,
encoding_override: encoding::EncodingOverride,
encoding_override: Rc<EncodingOverride>,
violation_fn: ViolationFn<'a>,
}

@@ -196,6 +200,19 @@ impl<'a> ParseOptions<'a> {
self
}

/// Override the character encoding of query strings.
/// This is a legacy concept only relevant for HTML.
///
/// Labels are listed at https://encoding.spec.whatwg.org/#names-and-labels
///
/// This method is only available if the `query_encoding_2` or `query_encoding`
/// [feature](http://doc.crates.io/manifest.html#the-features-section]) is enabled.
#[cfg(any(feature = "query_encoding", feature = "query_encoding_2"))]
pub fn encoding_override_for_label(mut self, label: Option<&[u8]>) -> Self {
self.encoding_override = Rc::new(encoding_override_for_label(label).to_output_encoding());
self
}

/// Override the character encoding of query strings.
/// This is a legacy concept only relevant for HTML.
///
@@ -204,8 +221,9 @@ impl<'a> ParseOptions<'a> {
/// This method is only available if the `query_encoding`
/// [feature](http://doc.crates.io/manifest.html#the-features-section]) is enabled.
#[cfg(feature = "query_encoding")]
#[deprecated(note="Build with `query_encoding_2` instead")]
pub fn encoding_override(mut self, new: Option<encoding::EncodingRef>) -> Self {
self.encoding_override = EncodingOverride::from_opt_encoding(new).to_output_encoding();
self.encoding_override = Rc::new(EncodingOverrideLegacy::from_opt_encoding(new).to_output_encoding());
self
}

@@ -258,7 +276,7 @@ impl<'a> ParseOptions<'a> {
Parser {
serialization: String::with_capacity(input.len()),
base_url: self.base_url,
query_encoding_override: self.encoding_override,
query_encoding_override: self.encoding_override.clone(),
violation_fn: self.violation_fn,
context: Context::UrlParser,
}.parse_url(input)
@@ -401,7 +419,7 @@ impl Url {
pub fn options<'a>() -> ParseOptions<'a> {
ParseOptions {
base_url: None,
encoding_override: EncodingOverride::utf8(),
encoding_override: Rc::new(default_encoding_override()),
violation_fn: ViolationFn::NoOp,
}
}
@@ -1143,7 +1161,7 @@ impl Url {
/// let url = Url::parse("https://example.com/products?page=2&sort=desc")?;
/// let mut pairs = url.query_pairs();
///
/// assert_eq!(pairs.count(), 2);
/// assert_eq!(pairs.clone().count(), 2);
///
/// assert_eq!(pairs.next(), Some((Cow::Borrowed("page"), Cow::Borrowed("2"))));
/// assert_eq!(pairs.next(), Some((Cow::Borrowed("sort"), Cow::Borrowed("desc"))));
10 changes: 6 additions & 4 deletions src/parser.rs
Original file line number Diff line number Diff line change
@@ -9,10 +9,12 @@
use std::ascii::AsciiExt;
use std::error::Error;
use std::fmt::{self, Formatter, Write};
use std::rc::Rc;
use std::str;

use Url;
use encoding::EncodingOverride;
use encoding::default_encoding_override;
use host::{Host, HostInternal};
use percent_encoding::{
utf8_percent_encode, percent_encode,
@@ -315,7 +317,7 @@ impl<'a> fmt::Debug for ViolationFn<'a> {
pub struct Parser<'a> {
pub serialization: String,
pub base_url: Option<&'a Url>,
pub query_encoding_override: EncodingOverride,
pub query_encoding_override: Rc<EncodingOverride>,
pub violation_fn: ViolationFn<'a>,
pub context: Context,
}
@@ -332,7 +334,7 @@ impl<'a> Parser<'a> {
Parser {
serialization: serialization,
base_url: None,
query_encoding_override: EncodingOverride::utf8(),
query_encoding_override: Rc::new(default_encoding_override()),
violation_fn: ViolationFn::NoOp,
context: Context::Setter,
}
@@ -1151,8 +1153,8 @@ impl<'a> Parser<'a> {
}

let encoding = match &self.serialization[..scheme_end as usize] {
"http" | "https" | "file" | "ftp" | "gopher" => self.query_encoding_override,
_ => EncodingOverride::utf8(),
"http" | "https" | "file" | "ftp" | "gopher" => self.query_encoding_override.clone(),
_ => Rc::new(default_encoding_override()),
};
let query_bytes = encoding.encode(query.into());
self.serialization.extend(percent_encode(&query_bytes, QUERY_ENCODE_SET));
6 changes: 3 additions & 3 deletions tests/unit.rs
Original file line number Diff line number Diff line change
@@ -528,16 +528,16 @@ fn test_syntax_violation_callback_lifetimes() {
}

#[test]
fn test_options_reuse() {
fn test_options_clone() {
use url::SyntaxViolation::*;
let violations = RefCell::new(Vec::new());
let vfn = |v| violations.borrow_mut().push(v);

let options = Url::options()
.syntax_violation_callback(Some(&vfn));
let url = options.parse("http:////mozilla.org").unwrap();
let url = options.clone().parse("http:////mozilla.org").unwrap();

let options = options.base_url(Some(&url));
let options = options.clone().base_url(Some(&url));
let url = options.parse("/sub\\path").unwrap();
assert_eq!(url.as_str(), "http://mozilla.org/sub/path");
assert_eq!(*violations.borrow(),