diff --git a/.travis.yml b/.travis.yml index e58559ec2..f3417d793 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,28 +1,12 @@ language: rust +script: cargo test --all-features --all jobs: include: - - rust: 1.17.0 - install: - # --precise requires Cargo.lock to already exist - - cargo update - # getopts is only used in tests. Its versions 0.2.16+ don’t build on 1.17.0 - - cargo update -p getopts --precise 0.2.15 - - - cargo update -p unicode-normalization --precise 0.1.5 - - # data-url uses pub(crate) which is unstable in 1.17 - script: cargo test --all-features -p url -p idna -p percent-encoding -p url_serde - + - rust: 1.30.0 - rust: stable - script: cargo test --all-features --all - - rust: beta - script: cargo test --all-features --all - - rust: nightly - script: cargo test --all-features --all - - rust: nightly env: TARGET=WASM32 # For job list UI install: rustup target add wasm32-unknown-unknown diff --git a/Cargo.toml b/Cargo.toml index 40e6b64f5..4c69f8c85 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,7 +2,7 @@ name = "url" # When updating version, also modify html_root_url in the lib.rs -version = "1.7.2" +version = "2.0.0" authors = ["The rust-url developers"] description = "URL library for Rust, based on the WHATWG URL Standard" @@ -18,7 +18,7 @@ travis-ci = { repository = "servo/rust-url" } appveyor = { repository = "Manishearth/rust-url" } [workspace] -members = [".", "idna", "percent_encoding", "url_serde", "data-url"] +members = [".", "idna", "percent_encoding", "data-url"] [[test]] name = "unit" @@ -32,27 +32,16 @@ test = false [dev-dependencies] rustc-test = "0.3" -rustc-serialize = "0.3" -serde_json = ">=0.6.1, <0.9" +serde_json = "1.0" bencher = "0.1" -[features] -query_encoding = ["encoding"] -heap_size = ["heapsize"] - [dependencies] -encoding = {version = "0.2", optional = true} -heapsize = {version = ">=0.4.1, <0.5", optional = true} -idna = { version = "0.1.0", path = "./idna" } +idna = { version = "0.2.0", path = "./idna" } matches = "0.1" percent-encoding = { version = "1.0.0", path = "./percent_encoding" } -rustc-serialize = {version = "0.3", optional = true} -serde = {version = ">=0.6.1, <0.9", optional = true} +serde = {version = "1.0", optional = true} [[bench]] name = "parse_url" harness = false - -[package.metadata.docs.rs] -features = ["query_encoding"] diff --git a/data-url/src/forgiving_base64.rs b/data-url/src/forgiving_base64.rs index a4dbae75d..95dd92232 100644 --- a/data-url/src/forgiving_base64.rs +++ b/data-url/src/forgiving_base64.rs @@ -29,7 +29,7 @@ impl From> for InvalidBase64 { fn from(e: DecodeError) -> Self { match e { DecodeError::InvalidBase64(e) => e, - DecodeError::WriteError(e) => match e {} + DecodeError::WriteError(e) => match e {}, } } } @@ -46,14 +46,20 @@ pub fn decode_to_vec(input: &[u8]) -> Result, InvalidBase64> { } /// -pub struct Decoder where F: FnMut(&[u8]) -> Result<(), E> { +pub struct Decoder +where + F: FnMut(&[u8]) -> Result<(), E>, +{ write_bytes: F, bit_buffer: u32, buffer_bit_length: u8, padding_symbols: u8, } -impl Decoder where F: FnMut(&[u8]) -> Result<(), E> { +impl Decoder +where + F: FnMut(&[u8]) -> Result<(), E>, +{ pub fn new(write_bytes: F) -> Self { Self { write_bytes, @@ -72,12 +78,12 @@ impl Decoder where F: FnMut(&[u8]) -> Result<(), E> { // Remove ASCII whitespace if matches!(byte, b' ' | b'\t' | b'\n' | b'\r' | b'\x0C') { - continue + continue; } if byte == b'=' { self.padding_symbols = self.padding_symbols.saturating_add(1); - continue + continue; } Err(InvalidBase64Details::UnexpectedSymbol(byte))? @@ -115,32 +121,22 @@ impl Decoder where F: FnMut(&[u8]) -> Result<(), E> { (12, 2) | (12, 0) => { // A multiple of four of alphabet symbols, followed by two more symbols, // optionally followed by two padding characters (which make a total multiple of four). - let byte_buffer = [ - (self.bit_buffer >> 4) as u8, - ]; + let byte_buffer = [(self.bit_buffer >> 4) as u8]; (self.write_bytes)(&byte_buffer).map_err(DecodeError::WriteError)?; } (18, 1) | (18, 0) => { // A multiple of four of alphabet symbols, followed by three more symbols, // optionally followed by one padding character (which make a total multiple of four). - let byte_buffer = [ - (self.bit_buffer >> 10) as u8, - (self.bit_buffer >> 2) as u8, - ]; + let byte_buffer = [(self.bit_buffer >> 10) as u8, (self.bit_buffer >> 2) as u8]; (self.write_bytes)(&byte_buffer).map_err(DecodeError::WriteError)?; } - (6, _) => { - Err(InvalidBase64Details::LoneAlphabetSymbol)? - } - _ => { - Err(InvalidBase64Details::Padding)? - } + (6, _) => Err(InvalidBase64Details::LoneAlphabetSymbol)?, + _ => Err(InvalidBase64Details::Padding)?, } Ok(()) } } - /// Generated by `make_base64_decode_table.py` based on "Table 1: The Base 64 Alphabet" /// at /// @@ -148,6 +144,7 @@ impl Decoder where F: FnMut(&[u8]) -> Result<(), E> { /// Array values are their positions in the base64 alphabet, /// or -1 for symbols not in the alphabet. /// The position contributes 6 bits to the decoded bytes. +#[rustfmt::skip] const BASE64_DECODE_TABLE: [i8; 256] = [ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, diff --git a/data-url/src/lib.rs b/data-url/src/lib.rs index cd5788f78..34bbd1927 100644 --- a/data-url/src/lib.rs +++ b/data-url/src/lib.rs @@ -15,14 +15,15 @@ //! assert!(fragment.is_none()); //! ``` -#[macro_use] extern crate matches; +#[macro_use] +extern crate matches; macro_rules! require { ($condition: expr) => { if !$condition { - return None + return None; } - } + }; } pub mod forgiving_base64; @@ -53,7 +54,11 @@ impl<'a> DataUrl<'a> { let (mime_type, base64) = parse_header(from_colon_to_comma); - Ok(DataUrl { mime_type, base64, encoded_body_plus_fragment }) + Ok(DataUrl { + mime_type, + base64, + encoded_body_plus_fragment, + }) } pub fn mime_type(&self) -> &mime::Mime { @@ -62,9 +67,12 @@ impl<'a> DataUrl<'a> { /// Streaming-decode the data URL’s body to `write_body_bytes`, /// and return the URL’s fragment identifier if it has one. - pub fn decode(&self, write_body_bytes: F) - -> Result>, forgiving_base64::DecodeError> - where F: FnMut(&[u8]) -> Result<(), E> + pub fn decode( + &self, + write_body_bytes: F, + ) -> Result>, forgiving_base64::DecodeError> + where + F: FnMut(&[u8]) -> Result<(), E>, { if self.base64 { decode_with_base64(self.encoded_body_plus_fragment, write_body_bytes) @@ -75,9 +83,9 @@ impl<'a> DataUrl<'a> { } /// Return the decoded body, and the URL’s fragment identifier if it has one. - pub fn decode_to_vec(&self) - -> Result<(Vec, Option>), forgiving_base64::InvalidBase64> - { + pub fn decode_to_vec( + &self, + ) -> Result<(Vec, Option>), forgiving_base64::InvalidBase64> { let mut body = Vec::new(); let fragment = self.decode(|bytes| Ok(body.extend_from_slice(bytes)))?; Ok((body, fragment)) @@ -100,7 +108,7 @@ impl<'a> FragmentIdentifier<'a> { percent_encode(byte, &mut string) } // Printable ASCII - _ => string.push(byte as char) + _ => string.push(byte as char), } } string @@ -125,7 +133,9 @@ fn pretend_parse_data_url(input: &str) -> Option<&str> { let mut bytes = left_trimmed.bytes(); { // Ignore ASCII tabs or newlines like the URL parser would - let mut iter = bytes.by_ref().filter(|&byte| !matches!(byte, b'\t' | b'\n' | b'\r')); + let mut iter = bytes + .by_ref() + .filter(|&byte| !matches!(byte, b'\t' | b'\n' | b'\r')); require!(iter.next()?.to_ascii_lowercase() == b'd'); require!(iter.next()?.to_ascii_lowercase() == b'a'); require!(iter.next()?.to_ascii_lowercase() == b't'); @@ -142,10 +152,10 @@ fn pretend_parse_data_url(input: &str) -> Option<&str> { fn find_comma_before_fragment(after_colon: &str) -> Option<(&str, &str)> { for (i, byte) in after_colon.bytes().enumerate() { if byte == b',' { - return Some((&after_colon[..i], &after_colon[i + 1..])) + return Some((&after_colon[..i], &after_colon[i + 1..])); } if byte == b'#' { - break + break; } } None @@ -187,18 +197,16 @@ fn parse_header(from_colon_to_comma: &str) -> (mime::Mime, bool) { } // Printable ASCII - _ => string.push(byte as char) + _ => string.push(byte as char), } } // FIXME: does Mime::from_str match the MIME Sniffing Standard’s parsing algorithm? // - let mime_type = string.parse().unwrap_or_else(|_| { - mime::Mime { - type_: String::from("text"), - subtype: String::from("plain"), - parameters: vec![(String::from("charset"), String::from("US-ASCII"))], - } + let mime_type = string.parse().unwrap_or_else(|_| mime::Mime { + type_: String::from("text"), + subtype: String::from("plain"), + parameters: vec![(String::from("charset"), String::from("US-ASCII"))], }); (mime_type, base64) @@ -209,7 +217,9 @@ fn remove_base64_suffix(s: &str) -> Option<&str> { let mut bytes = s.bytes(); { // Ignore ASCII tabs or newlines like the URL parser would - let iter = bytes.by_ref().filter(|&byte| !matches!(byte, b'\t' | b'\n' | b'\r')); + let iter = bytes + .by_ref() + .filter(|&byte| !matches!(byte, b'\t' | b'\n' | b'\r')); // Search from the end let mut iter = iter.rev(); @@ -240,9 +250,12 @@ fn percent_encode(byte: u8, string: &mut String) { /// Anything that would have been UTF-8 percent-encoded by the URL parser /// would be percent-decoded here. /// We skip that round-trip and pass it through unchanged. -fn decode_without_base64(encoded_body_plus_fragment: &str, mut write_bytes: F) - -> Result, E> - where F: FnMut(&[u8]) -> Result<(), E> +fn decode_without_base64( + encoded_body_plus_fragment: &str, + mut write_bytes: F, +) -> Result, E> +where + F: FnMut(&[u8]) -> Result<(), E>, { let bytes = encoded_body_plus_fragment.as_bytes(); let mut slice_start = 0; @@ -275,11 +288,11 @@ fn decode_without_base64(encoded_body_plus_fragment: &str, mut write_bytes b'#' => { let fragment_start = i + 1; let fragment = &encoded_body_plus_fragment[fragment_start..]; - return Ok(Some(FragmentIdentifier(fragment))) + return Ok(Some(FragmentIdentifier(fragment))); } // Ignore over '\t' | '\n' | '\r' - _ => slice_start = i + 1 + _ => slice_start = i + 1, } } } @@ -290,9 +303,12 @@ fn decode_without_base64(encoded_body_plus_fragment: &str, mut write_bytes /// `decode_without_base64()` composed with /// composed with /// . -fn decode_with_base64(encoded_body_plus_fragment: &str, write_bytes: F) - -> Result, forgiving_base64::DecodeError> - where F: FnMut(&[u8]) -> Result<(), E> +fn decode_with_base64( + encoded_body_plus_fragment: &str, + write_bytes: F, +) -> Result, forgiving_base64::DecodeError> +where + F: FnMut(&[u8]) -> Result<(), E>, { let mut decoder = forgiving_base64::Decoder::new(write_bytes); let fragment = decode_without_base64(encoded_body_plus_fragment, |bytes| decoder.feed(bytes))?; diff --git a/data-url/src/mime.rs b/data-url/src/mime.rs index 101b240a1..fbbe1150a 100644 --- a/data-url/src/mime.rs +++ b/data-url/src/mime.rs @@ -7,14 +7,16 @@ pub struct Mime { pub type_: String, pub subtype: String, /// (name, value) - pub parameters: Vec<(String, String)> + pub parameters: Vec<(String, String)>, } impl Mime { pub fn get_parameter

(&self, name: &P) -> Option<&str> - where P: ?Sized + PartialEq + where + P: ?Sized + PartialEq, { - self.parameters.iter() + self.parameters + .iter() .find(|&&(ref n, _)| name == &**n) .map(|&(_, ref v)| &**v) } @@ -67,11 +69,11 @@ fn parse_parameters(s: &str, parameters: &mut Vec<(String, String)>) { let piece = piece.trim_left_matches(ascii_whitespace); let (name, value) = split2(piece, '='); if name.is_empty() || !only_http_token_code_points(name) || contains(¶meters, name) { - continue + continue; } if let Some(value) = value { let value = if value.starts_with('"') { - let max_len = value.len().saturating_sub(2); // without start or end quotes + let max_len = value.len().saturating_sub(2); // without start or end quotes let mut unescaped_value = String::with_capacity(max_len); let mut chars = value[1..].chars(); 'until_closing_quote: loop { @@ -79,7 +81,7 @@ fn parse_parameters(s: &str, parameters: &mut Vec<(String, String)>) { match c { '"' => break 'until_closing_quote, '\\' => unescaped_value.push(chars.next().unwrap_or('\\')), - _ => unescaped_value.push(c) + _ => unescaped_value.push(c), } } if let Some(piece) = semicolon_separated.next() { @@ -88,17 +90,17 @@ fn parse_parameters(s: &str, parameters: &mut Vec<(String, String)>) { unescaped_value.push(';'); chars = piece.chars() } else { - break + break; } } if !valid_value(&unescaped_value) { - continue + continue; } unescaped_value } else { let value = value.trim_right_matches(ascii_whitespace); if !valid_value(value) { - continue + continue; } value.to_owned() }; @@ -160,6 +162,7 @@ macro_rules! byte_map { } // Copied from https://github.com/hyperium/mime/blob/v0.3.5/src/parse.rs#L293 +#[rustfmt::skip] static IS_HTTP_TOKEN: [bool; 256] = byte_map![ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, diff --git a/data-url/tests/wpt.rs b/data-url/tests/wpt.rs index 7481529e8..3ab410051 100644 --- a/data-url/tests/wpt.rs +++ b/data-url/tests/wpt.rs @@ -1,6 +1,7 @@ extern crate data_url; extern crate rustc_test; -#[macro_use] extern crate serde; +#[macro_use] +extern crate serde; extern crate serde_json; fn run_data_url(input: String, expected_mime: Option, expected_body: Option>) { @@ -22,11 +23,10 @@ fn run_data_url(input: String, expected_mime: Option, expected_body: Opt } fn collect_data_url(add_test: &mut F) - where F: FnMut(String, bool, rustc_test::TestFn) +where + F: FnMut(String, bool, rustc_test::TestFn), { - let known_failures = [ - "data://test:test/,X", - ]; + let known_failures = ["data://test:test/,X"]; #[derive(Deserialize)] #[serde(untagged)] @@ -47,7 +47,7 @@ fn collect_data_url(add_test: &mut F) should_panic, rustc_test::TestFn::dyn_test_fn(move || { run_data_url(input, expected_mime, expected_body) - }) + }), ); } } @@ -62,9 +62,9 @@ fn run_base64(input: String, expected: Option>) { } } - fn collect_base64(add_test: &mut F) - where F: FnMut(String, bool, rustc_test::TestFn) +where + F: FnMut(String, bool, rustc_test::TestFn), { let known_failures = []; @@ -75,9 +75,7 @@ fn collect_base64(add_test: &mut F) add_test( format!("base64 {:?}", input), should_panic, - rustc_test::TestFn::dyn_test_fn(move || { - run_base64(input, expected) - }) + rustc_test::TestFn::dyn_test_fn(move || run_base64(input, expected)), ); } } @@ -92,9 +90,9 @@ fn run_mime(input: String, expected: Option) { } } - fn collect_mime(add_test: &mut F) - where F: FnMut(String, bool, rustc_test::TestFn) +where + F: FnMut(String, bool, rustc_test::TestFn), { let known_failures = []; @@ -102,7 +100,10 @@ fn collect_mime(add_test: &mut F) #[serde(untagged)] enum Entry { Comment(String), - TestCase { input: String, output: Option } + TestCase { + input: String, + output: Option, + }, } let v: Vec = serde_json::from_str(include_str!("mime-types.json")).unwrap(); @@ -115,7 +116,7 @@ fn collect_mime(add_test: &mut F) Entry::TestCase { input, output } => (input, output), Entry::Comment(s) => { last_comment = Some(s); - continue + continue; } }; @@ -127,9 +128,7 @@ fn collect_mime(add_test: &mut F) format!("MIME type {:?}", input) }, should_panic, - rustc_test::TestFn::dyn_test_fn(move || { - run_mime(input, expected) - }) + rustc_test::TestFn::dyn_test_fn(move || run_mime(input, expected)), ); } } diff --git a/docs/.nojekyll b/docs/.nojekyll deleted file mode 100644 index e69de29bb..000000000 diff --git a/docs/404.html b/docs/404.html deleted file mode 100644 index b13eac0ee..000000000 --- a/docs/404.html +++ /dev/null @@ -1,3 +0,0 @@ - - -Moved to docs.rs diff --git a/docs/index.html b/docs/index.html deleted file mode 100644 index b13eac0ee..000000000 --- a/docs/index.html +++ /dev/null @@ -1,3 +0,0 @@ - - -Moved to docs.rs diff --git a/idna/Cargo.toml b/idna/Cargo.toml index 1050ad35b..2f7155441 100644 --- a/idna/Cargo.toml +++ b/idna/Cargo.toml @@ -1,10 +1,11 @@ [package] name = "idna" -version = "0.1.5" +version = "0.2.0" authors = ["The rust-url developers"] description = "IDNA (Internationalizing Domain Names in Applications) and Punycode." repository = "https://github.com/servo/rust-url/" license = "MIT/Apache-2.0" +autotests = false [lib] doctest = false diff --git a/idna/src/lib.rs b/idna/src/lib.rs index 92b5df230..b1d858fe8 100644 --- a/idna/src/lib.rs +++ b/idna/src/lib.rs @@ -32,12 +32,15 @@ //! > that minimizes the impact of this transition for client software, //! > allowing client software to access domains that are valid under either system. -#[macro_use] extern crate matches; +#[macro_use] +extern crate matches; extern crate unicode_bidi; extern crate unicode_normalization; pub mod punycode; -pub mod uts46; +mod uts46; + +pub use uts46::{Config, Errors}; /// The [domain to ASCII](https://url.spec.whatwg.org/#concept-domain-to-ascii) algorithm. /// @@ -47,11 +50,16 @@ pub mod uts46; /// /// This process may fail. pub fn domain_to_ascii(domain: &str) -> Result { - uts46::to_ascii(domain, uts46::Flags { - use_std3_ascii_rules: false, - transitional_processing: false, - verify_dns_length: false, - }) + Config::default().to_ascii(domain) +} + +/// The [domain to ASCII](https://url.spec.whatwg.org/#concept-domain-to-ascii) algorithm, +/// with the `beStrict` flag set. +pub fn domain_to_ascii_strict(domain: &str) -> Result { + Config::default() + .use_std3_ascii_rules(true) + .verify_dns_length(true) + .to_ascii(domain) } /// The [domain to Unicode](https://url.spec.whatwg.org/#concept-domain-to-unicode) algorithm. @@ -63,11 +71,5 @@ pub fn domain_to_ascii(domain: &str) -> Result { /// This may indicate [syntax violations](https://url.spec.whatwg.org/#syntax-violation) /// but always returns a string for the mapped domain. pub fn domain_to_unicode(domain: &str) -> (String, Result<(), uts46::Errors>) { - uts46::to_unicode(domain, uts46::Flags { - use_std3_ascii_rules: false, - - // Unused: - transitional_processing: false, - verify_dns_length: false, - }) + Config::default().to_unicode(domain) } diff --git a/idna/src/punycode.rs b/idna/src/punycode.rs index acdde5897..829684b14 100644 --- a/idna/src/punycode.rs +++ b/idna/src/punycode.rs @@ -13,10 +13,8 @@ //! `encode_str` and `decode_to_string` provide convenience wrappers //! that convert from and to Rust’s UTF-8 based `str` and `String` types. -use std::u32; use std::char; -#[allow(unused_imports, deprecated)] -use std::ascii::AsciiExt; +use std::u32; // Bootstring parameters for Punycode static BASE: u32 = 36; @@ -28,7 +26,6 @@ static INITIAL_BIAS: u32 = 72; static INITIAL_N: u32 = 0x80; static DELIMITER: char = '-'; - #[inline] fn adapt(mut delta: u32, num_points: u32, first_time: bool) -> u32 { delta /= if first_time { DAMP } else { 2 }; @@ -41,7 +38,6 @@ fn adapt(mut delta: u32, num_points: u32, first_time: bool) -> u32 { k + (((BASE - T_MIN + 1) * delta) / (delta + SKEW)) } - /// Convert Punycode to an Unicode `String`. /// /// This is a convenience wrapper around `decode`. @@ -50,7 +46,6 @@ pub fn decode_to_string(input: &str) -> Option { decode(input).map(|chars| chars.into_iter().collect()) } - /// Convert Punycode to Unicode. /// /// Return None on malformed input or overflow. @@ -63,8 +58,12 @@ pub fn decode(input: &str) -> Option> { None => (Vec::new(), input), Some(position) => ( input[..position].chars().collect(), - if position > 0 { &input[position + 1..] } else { input } - ) + if position > 0 { + &input[position + 1..] + } else { + input + }, + ), }; let mut code_point = INITIAL_N; let mut bias = INITIAL_BIAS; @@ -82,35 +81,39 @@ pub fn decode(input: &str) -> Option> { // which gets added to i. loop { let digit = match byte { - byte @ b'0' ... b'9' => byte - b'0' + 26, - byte @ b'A' ... b'Z' => byte - b'A', - byte @ b'a' ... b'z' => byte - b'a', - _ => return None + byte @ b'0'..=b'9' => byte - b'0' + 26, + byte @ b'A'..=b'Z' => byte - b'A', + byte @ b'a'..=b'z' => byte - b'a', + _ => return None, } as u32; if digit > (u32::MAX - i) / weight { - return None // Overflow + return None; // Overflow } i += digit * weight; - let t = if k <= bias { T_MIN } - else if k >= bias + T_MAX { T_MAX } - else { k - bias }; + let t = if k <= bias { + T_MIN + } else if k >= bias + T_MAX { + T_MAX + } else { + k - bias + }; if digit < t { - break + break; } if weight > u32::MAX / (BASE - t) { - return None // Overflow + return None; // Overflow } weight *= BASE - t; k += BASE; byte = match iter.next() { - None => return None, // End of input before the end of this delta + None => return None, // End of input before the end of this delta Some(byte) => byte, }; } let length = output.len() as u32; bias = adapt(i - previous_i, length + 1, previous_i == 0); if i / (length + 1) > u32::MAX - code_point { - return None // Overflow + return None; // Overflow } // i was supposed to wrap around from length+1 to 0, // incrementing code_point each time. @@ -118,7 +121,7 @@ pub fn decode(input: &str) -> Option> { i %= length + 1; let c = match char::from_u32(code_point) { Some(c) => c, - None => return None + None => return None, }; output.insert(i as usize, c); i += 1; @@ -126,7 +129,6 @@ pub fn decode(input: &str) -> Option> { Some(output) } - /// Convert an Unicode `str` to Punycode. /// /// This is a convenience wrapper around `encode`. @@ -135,16 +137,16 @@ pub fn encode_str(input: &str) -> Option { encode(&input.chars().collect::>()) } - /// Convert Unicode to Punycode. /// /// Return None on overflow, which can only happen on inputs that would take more than /// 63 encoded bytes, the DNS limit on domain name labels. pub fn encode(input: &[char]) -> Option { // Handle "basic" (ASCII) code points. They are encoded as-is. - let output_bytes = input.iter().filter_map(|&c| - if c.is_ascii() { Some(c as u8) } else { None } - ).collect(); + let output_bytes = input + .iter() + .filter_map(|&c| if c.is_ascii() { Some(c as u8) } else { None }) + .collect(); let mut output = unsafe { String::from_utf8_unchecked(output_bytes) }; let basic_length = output.len() as u32; if basic_length > 0 { @@ -158,10 +160,14 @@ pub fn encode(input: &[char]) -> Option { while processed < input_length { // All code points < code_point have been handled already. // Find the next larger one. - let min_code_point = input.iter().map(|&c| c as u32) - .filter(|&c| c >= code_point).min().unwrap(); + let min_code_point = input + .iter() + .map(|&c| c as u32) + .filter(|&c| c >= code_point) + .min() + .unwrap(); if min_code_point - code_point > (u32::MAX - delta) / (processed + 1) { - return None // Overflow + return None; // Overflow } // Increase delta to advance the decoder’s state to delta += (min_code_point - code_point) * (processed + 1); @@ -171,7 +177,7 @@ pub fn encode(input: &[char]) -> Option { if c < code_point { delta += 1; if delta == 0 { - return None // Overflow + return None; // Overflow } } if c == code_point { @@ -179,11 +185,15 @@ pub fn encode(input: &[char]) -> Option { let mut q = delta; let mut k = BASE; loop { - let t = if k <= bias { T_MIN } - else if k >= bias + T_MAX { T_MAX } - else { k - bias }; + let t = if k <= bias { + T_MIN + } else if k >= bias + T_MAX { + T_MAX + } else { + k - bias + }; if q < t { - break + break; } let value = t + ((q - t) % (BASE - t)); output.push(value_to_digit(value)); @@ -202,12 +212,11 @@ pub fn encode(input: &[char]) -> Option { Some(output) } - #[inline] fn value_to_digit(value: u32) -> char { match value { - 0 ... 25 => (value as u8 + 'a' as u8) as char, // a..z - 26 ... 35 => (value as u8 - 26 + '0' as u8) as char, // 0..9 - _ => panic!() + 0..=25 => (value as u8 + 'a' as u8) as char, // a..z + 26..=35 => (value as u8 - 26 + '0' as u8) as char, // 0..9 + _ => panic!(), } } diff --git a/idna/src/uts46.rs b/idna/src/uts46.rs index ac348d1fa..b860acc42 100644 --- a/idna/src/uts46.rs +++ b/idna/src/uts46.rs @@ -11,18 +11,14 @@ use self::Mapping::*; use punycode; -#[allow(unused_imports, deprecated)] -use std::ascii::AsciiExt; -use std::cmp::Ordering::{Equal, Less, Greater}; -use unicode_bidi::{BidiClass, bidi_class}; -use unicode_normalization::UnicodeNormalization; +use std::cmp::Ordering::{Equal, Greater, Less}; +use unicode_bidi::{bidi_class, BidiClass}; use unicode_normalization::char::is_combining_mark; +use unicode_normalization::UnicodeNormalization; include!("uts46_mapping_table.rs"); - -pub static PUNYCODE_PREFIX: &'static str = "xn--"; - +const PUNYCODE_PREFIX: &'static str = "xn--"; #[derive(Debug)] struct StringTableSlice { @@ -68,28 +64,30 @@ fn find_char(codepoint: char) -> &'static Mapping { Equal } }); - r.ok().map(|i| { - const SINGLE_MARKER: u16 = 1 << 15; + r.ok() + .map(|i| { + const SINGLE_MARKER: u16 = 1 << 15; - let x = INDEX_TABLE[i]; - let single = (x & SINGLE_MARKER) != 0; - let offset = !SINGLE_MARKER & x; + let x = INDEX_TABLE[i]; + let single = (x & SINGLE_MARKER) != 0; + let offset = !SINGLE_MARKER & x; - if single { - &MAPPING_TABLE[offset as usize] - } else { - &MAPPING_TABLE[(offset + (codepoint as u16 - TABLE[i].from as u16)) as usize] - } - }).unwrap() + if single { + &MAPPING_TABLE[offset as usize] + } else { + &MAPPING_TABLE[(offset + (codepoint as u16 - TABLE[i].from as u16)) as usize] + } + }) + .unwrap() } -fn map_char(codepoint: char, flags: Flags, output: &mut String, errors: &mut Vec) { +fn map_char(codepoint: char, config: Config, output: &mut String, errors: &mut Vec) { match *find_char(codepoint) { Mapping::Valid => output.push(codepoint), - Mapping::Ignored => {}, + Mapping::Ignored => {} Mapping::Mapped(ref slice) => output.push_str(decode_slice(slice)), Mapping::Deviation(ref slice) => { - if flags.transitional_processing { + if config.transitional_processing { output.push_str(decode_slice(slice)) } else { output.push(codepoint) @@ -100,13 +98,13 @@ fn map_char(codepoint: char, flags: Flags, output: &mut String, errors: &mut Vec output.push(codepoint); } Mapping::DisallowedStd3Valid => { - if flags.use_std3_ascii_rules { + if config.use_std3_ascii_rules { errors.push(Error::DissallowedByStd3AsciiRules); } output.push(codepoint) } Mapping::DisallowedStd3Mapped(ref slice) => { - if flags.use_std3_ascii_rules { + if config.use_std3_ascii_rules { errors.push(Error::DissallowedMappedInStd3); } output.push_str(decode_slice(slice)) @@ -135,16 +133,23 @@ fn passes_bidi(label: &str, is_bidi_domain: bool) -> bool { loop { match chars.next() { Some(c) => { - if !matches!(bidi_class(c), - BidiClass::L | BidiClass::EN | - BidiClass::ES | BidiClass::CS | - BidiClass::ET | BidiClass::ON | - BidiClass::BN | BidiClass::NSM - ) { + if !matches!( + bidi_class(c), + BidiClass::L + | BidiClass::EN + | BidiClass::ES + | BidiClass::CS + | BidiClass::ET + | BidiClass::ON + | BidiClass::BN + | BidiClass::NSM + ) { return false; } - }, - None => { break; }, + } + None => { + break; + } } } @@ -158,16 +163,18 @@ fn passes_bidi(label: &str, is_bidi_domain: bool) -> bool { last_non_nsm = rev_chars.next(); continue; } - _ => { break; }, + _ => { + break; + } } } match last_non_nsm { - Some(c) if bidi_class(c) == BidiClass::L - || bidi_class(c) == BidiClass::EN => {}, - Some(_) => { return false; }, + Some(c) if bidi_class(c) == BidiClass::L || bidi_class(c) == BidiClass::EN => {} + Some(_) => { + return false; + } _ => {} } - } // RTL label @@ -188,33 +195,51 @@ fn passes_bidi(label: &str, is_bidi_domain: bool) -> bool { found_an = true; } - if !matches!(char_class, BidiClass::R | BidiClass::AL | - BidiClass::AN | BidiClass::EN | - BidiClass::ES | BidiClass::CS | - BidiClass::ET | BidiClass::ON | - BidiClass::BN | BidiClass::NSM) { + if !matches!( + char_class, + BidiClass::R + | BidiClass::AL + | BidiClass::AN + | BidiClass::EN + | BidiClass::ES + | BidiClass::CS + | BidiClass::ET + | BidiClass::ON + | BidiClass::BN + | BidiClass::NSM + ) { return false; } - }, - None => { break; }, + } + None => { + break; + } } } // Rule 3 let mut rev_chars = label.chars().rev(); let mut last = rev_chars.next(); - loop { // must end in L or EN followed by 0 or more NSM + loop { + // must end in L or EN followed by 0 or more NSM match last { Some(c) if bidi_class(c) == BidiClass::NSM => { last = rev_chars.next(); continue; } - _ => { break; }, + _ => { + break; + } } } match last { - Some(c) if matches!(bidi_class(c), BidiClass::R | BidiClass::AL | - BidiClass::EN | BidiClass::AN) => {}, - _ => { return false; } + Some(c) + if matches!( + bidi_class(c), + BidiClass::R | BidiClass::AL | BidiClass::EN | BidiClass::AN + ) => {} + _ => { + return false; + } } // Rule 4 @@ -233,34 +258,30 @@ fn passes_bidi(label: &str, is_bidi_domain: bool) -> bool { } /// http://www.unicode.org/reports/tr46/#Validity_Criteria -fn validate_full(label: &str, is_bidi_domain: bool, flags: Flags, errors: &mut Vec) { +fn validate_full(label: &str, is_bidi_domain: bool, config: Config, errors: &mut Vec) { // V1: Must be in NFC form. if label.nfc().ne(label.chars()) { errors.push(Error::ValidityCriteria); } else { - validate(label, is_bidi_domain, flags, errors); + validate(label, is_bidi_domain, config, errors); } } -fn validate(label: &str, is_bidi_domain: bool, flags: Flags, errors: &mut Vec) { +fn validate(label: &str, is_bidi_domain: bool, config: Config, errors: &mut Vec) { let first_char = label.chars().next(); if first_char == None { // Empty string, pass } - // V2: No U+002D HYPHEN-MINUS in both third and fourth positions. // // NOTE: Spec says that the label must not contain a HYPHEN-MINUS character in both the // third and fourth positions. But nobody follows this criteria. See the spec issue below: // https://github.com/whatwg/url/issues/53 - // - // TODO: Add *CheckHyphens* flag. // V3: neither begin nor end with a U+002D HYPHEN-MINUS - else if label.starts_with("-") || label.ends_with("-") { + else if config.check_hyphens && (label.starts_with("-") || label.ends_with("-")) { errors.push(Error::ValidityCriteria); } - // V4: not contain a U+002E FULL STOP // // Here, label can't contain '.' since the input is from .split('.') @@ -269,17 +290,15 @@ fn validate(label: &str, is_bidi_domain: bool, flags: Flags, errors: &mut Vec false, - Mapping::Deviation(_) => flags.transitional_processing, - Mapping::DisallowedStd3Valid => flags.use_std3_ascii_rules, + Mapping::Deviation(_) => config.transitional_processing, + Mapping::DisallowedStd3Valid => config.use_std3_ascii_rules, _ => true, }) { errors.push(Error::ValidityCriteria); } - // V7: ContextJ rules // // TODO: Implement rules and add *CheckJoiners* flag. @@ -287,17 +306,16 @@ fn validate(label: &str, is_bidi_domain: bool, flags: Flags, errors: &mut Vec) -> String { +fn processing(domain: &str, config: Config, errors: &mut Vec) -> String { let mut mapped = String::with_capacity(domain.len()); for c in domain.chars() { - map_char(c, flags, &mut mapped, errors) + map_char(c, config, &mut mapped, errors) } let mut normalized = String::with_capacity(mapped.len()); normalized.extend(mapped.nfc()); @@ -305,18 +323,18 @@ fn processing(domain: &str, flags: Flags, errors: &mut Vec) -> String { // Find out if it's a Bidi Domain Name // // First, check for literal bidi chars - let mut is_bidi_domain = domain.chars().any(|c| - matches!(bidi_class(c), BidiClass::R | BidiClass::AL | BidiClass::AN) - ); + let mut is_bidi_domain = domain + .chars() + .any(|c| matches!(bidi_class(c), BidiClass::R | BidiClass::AL | BidiClass::AN)); if !is_bidi_domain { // Then check for punycode-encoded bidi chars for label in normalized.split('.') { if label.starts_with(PUNYCODE_PREFIX) { match punycode::decode_to_string(&label[PUNYCODE_PREFIX.len()..]) { Some(decoded_label) => { - if decoded_label.chars().any(|c| + if decoded_label.chars().any(|c| { matches!(bidi_class(c), BidiClass::R | BidiClass::AL | BidiClass::AN) - ) { + }) { is_bidi_domain = true; } } @@ -338,26 +356,124 @@ fn processing(domain: &str, flags: Flags, errors: &mut Vec) -> String { if label.starts_with(PUNYCODE_PREFIX) { match punycode::decode_to_string(&label[PUNYCODE_PREFIX.len()..]) { Some(decoded_label) => { - let flags = Flags { transitional_processing: false, ..flags }; - validate_full(&decoded_label, is_bidi_domain, flags, errors); + let config = config.transitional_processing(false); + validate_full(&decoded_label, is_bidi_domain, config, errors); validated.push_str(&decoded_label) } - None => errors.push(Error::PunycodeError) + None => errors.push(Error::PunycodeError), } } else { // `normalized` is already `NFC` so we can skip that check - validate(label, is_bidi_domain, flags, errors); + validate(label, is_bidi_domain, config, errors); validated.push_str(label) } } validated } -#[derive(Copy, Clone)] -pub struct Flags { - pub use_std3_ascii_rules: bool, - pub transitional_processing: bool, - pub verify_dns_length: bool, +#[derive(Clone, Copy)] +pub struct Config { + use_std3_ascii_rules: bool, + transitional_processing: bool, + verify_dns_length: bool, + check_hyphens: bool, +} + +/// The defaults are that of https://url.spec.whatwg.org/#idna +impl Default for Config { + fn default() -> Self { + Config { + use_std3_ascii_rules: false, + transitional_processing: false, + check_hyphens: false, + // check_bidi: true, + // check_joiners: true, + + // Only use for to_ascii, not to_unicode + verify_dns_length: false, + } + } +} + +impl Config { + #[inline] + pub fn use_std3_ascii_rules(mut self, value: bool) -> Self { + self.use_std3_ascii_rules = value; + self + } + + #[inline] + pub fn transitional_processing(mut self, value: bool) -> Self { + self.transitional_processing = value; + self + } + + #[inline] + pub fn verify_dns_length(mut self, value: bool) -> Self { + self.verify_dns_length = value; + self + } + + #[inline] + pub fn check_hyphens(mut self, value: bool) -> Self { + self.check_hyphens = value; + self + } + + /// http://www.unicode.org/reports/tr46/#ToASCII + pub fn to_ascii(self, domain: &str) -> Result { + let mut errors = Vec::new(); + let mut result = String::new(); + let mut first = true; + for label in processing(domain, self, &mut errors).split('.') { + if !first { + result.push('.'); + } + first = false; + if label.is_ascii() { + result.push_str(label); + } else { + match punycode::encode_str(label) { + Some(x) => { + result.push_str(PUNYCODE_PREFIX); + result.push_str(&x); + } + None => errors.push(Error::PunycodeError), + } + } + } + + if self.verify_dns_length { + let domain = if result.ends_with(".") { + &result[..result.len() - 1] + } else { + &*result + }; + if domain.len() < 1 || domain.split('.').any(|label| label.len() < 1) { + errors.push(Error::TooShortForDns) + } + if domain.len() > 253 || domain.split('.').any(|label| label.len() > 63) { + errors.push(Error::TooLongForDns) + } + } + if errors.is_empty() { + Ok(result) + } else { + Err(Errors(errors)) + } + } + + /// http://www.unicode.org/reports/tr46/#ToUnicode + pub fn to_unicode(self, domain: &str) -> (String, Result<(), Errors>) { + let mut errors = Vec::new(); + let domain = processing(domain, self, &mut errors); + let errors = if errors.is_empty() { + Ok(()) + } else { + Err(Errors(errors)) + }; + (domain, errors) + } } #[derive(PartialEq, Eq, Clone, Copy, Debug)] @@ -377,57 +493,3 @@ enum Error { /// More details may be exposed in the future. #[derive(Debug)] pub struct Errors(Vec); - -/// http://www.unicode.org/reports/tr46/#ToASCII -pub fn to_ascii(domain: &str, flags: Flags) -> Result { - let mut errors = Vec::new(); - let mut result = String::new(); - let mut first = true; - for label in processing(domain, flags, &mut errors).split('.') { - if !first { - result.push('.'); - } - first = false; - if label.is_ascii() { - result.push_str(label); - } else { - match punycode::encode_str(label) { - Some(x) => { - result.push_str(PUNYCODE_PREFIX); - result.push_str(&x); - }, - None => errors.push(Error::PunycodeError) - } - } - } - - if flags.verify_dns_length { - let domain = if result.ends_with(".") { &result[..result.len()-1] } else { &*result }; - if domain.len() < 1 || domain.split('.').any(|label| label.len() < 1) { - errors.push(Error::TooShortForDns) - } - if domain.len() > 253 || domain.split('.').any(|label| label.len() > 63) { - errors.push(Error::TooLongForDns) - } - } - if errors.is_empty() { - Ok(result) - } else { - Err(Errors(errors)) - } -} - -/// http://www.unicode.org/reports/tr46/#ToUnicode -/// -/// Only `use_std3_ascii_rules` is used in `flags`. -pub fn to_unicode(domain: &str, mut flags: Flags) -> (String, Result<(), Errors>) { - flags.transitional_processing = false; - let mut errors = Vec::new(); - let domain = processing(domain, flags, &mut errors); - let errors = if errors.is_empty() { - Ok(()) - } else { - Err(Errors(errors)) - }; - (domain, errors) -} diff --git a/idna/tests/punycode.rs b/idna/tests/punycode.rs index 67988e80c..fe5e94edc 100644 --- a/idna/tests/punycode.rs +++ b/idna/tests/punycode.rs @@ -15,19 +15,25 @@ fn one_test(decoded: &str, encoded: &str) { None => panic!("Decoding {} failed.", encoded), Some(result) => { let result = result.into_iter().collect::(); - assert!(result == decoded, - format!("Incorrect decoding of \"{}\":\n \"{}\"\n!= \"{}\"\n", - encoded, result, decoded)) + assert!( + result == decoded, + format!( + "Incorrect decoding of \"{}\":\n \"{}\"\n!= \"{}\"\n", + encoded, result, decoded + ) + ) } } match encode_str(decoded) { None => panic!("Encoding {} failed.", decoded), - Some(result) => { - assert!(result == encoded, - format!("Incorrect encoding of \"{}\":\n \"{}\"\n!= \"{}\"\n", - decoded, result, encoded)) - } + Some(result) => assert!( + result == encoded, + format!( + "Incorrect encoding of \"{}\":\n \"{}\"\n!= \"{}\"\n", + decoded, result, encoded + ) + ), } } @@ -41,25 +47,29 @@ fn get_string<'a>(map: &'a Object, key: &str) -> &'a str { pub fn collect_tests(add_test: &mut F) { match Json::from_str(include_str!("punycode_tests.json")) { - Ok(Json::Array(tests)) => for (i, test) in tests.into_iter().enumerate() { - match test { - Json::Object(o) => { - let test_name = { - let desc = get_string(&o, "description"); + Ok(Json::Array(tests)) => { + for (i, test) in tests.into_iter().enumerate() { + match test { + Json::Object(o) => { + let test_name = { + let desc = get_string(&o, "description"); if desc.is_empty() { - format!("Punycode {}", i + 1) - } else { - format!("Punycode {}: {}", i + 1, desc) - } - }; - add_test(test_name, TestFn::dyn_test_fn(move || one_test( - get_string(&o, "decoded"), - get_string(&o, "encoded"), - ))) + format!("Punycode {}", i + 1) + } else { + format!("Punycode {}: {}", i + 1, desc) + } + }; + add_test( + test_name, + TestFn::dyn_test_fn(move || { + one_test(get_string(&o, "decoded"), get_string(&o, "encoded")) + }), + ) + } + _ => panic!(), } - _ => panic!(), } - }, - other => panic!("{:?}", other) + } + other => panic!("{:?}", other), } } diff --git a/idna/tests/unit.rs b/idna/tests/unit.rs index a7d158d5c..e24e2a319 100644 --- a/idna/tests/unit.rs +++ b/idna/tests/unit.rs @@ -1,16 +1,13 @@ extern crate idna; extern crate unicode_normalization; -use idna::uts46; use unicode_normalization::char::is_combining_mark; - -fn _to_ascii(domain: &str) -> Result { - uts46::to_ascii(domain, uts46::Flags { - transitional_processing: false, - use_std3_ascii_rules: true, - verify_dns_length: true, - }) +fn _to_ascii(domain: &str) -> Result { + idna::Config::default() + .verify_dns_length(true) + .use_std3_ascii_rules(true) + .to_ascii(domain) } #[test] @@ -29,7 +26,10 @@ fn test_v8_bidi_rules() { assert_eq!(_to_ascii("אבּג").unwrap(), "xn--kdb3bdf"); assert_eq!(_to_ascii("ابج").unwrap(), "xn--mgbcm"); assert_eq!(_to_ascii("abc.ابج").unwrap(), "abc.xn--mgbcm"); - assert_eq!(_to_ascii("אבּג.ابج").unwrap(), "xn--kdb3bdf.xn--mgbcm"); + assert_eq!( + _to_ascii("אבּג.ابج").unwrap(), + "xn--kdb3bdf.xn--mgbcm" + ); // Bidi domain names cannot start with digits assert!(_to_ascii("0a.\u{05D0}").is_err()); diff --git a/idna/tests/uts46.rs b/idna/tests/uts46.rs index 59ec1cd76..b3a5ff334 100644 --- a/idna/tests/uts46.rs +++ b/idna/tests/uts46.rs @@ -7,19 +7,18 @@ // except according to those terms. use std::char; -use idna::uts46; use test::TestFn; pub fn collect_tests(add_test: &mut F) { // http://www.unicode.org/Public/idna/latest/IdnaTest.txt for (i, line) in include_str!("IdnaTest.txt").lines().enumerate() { if line == "" || line.starts_with("#") { - continue + continue; } // Remove comments let mut line = match line.find("#") { Some(index) => &line[0..index], - None => line + None => line, }; let mut expected_failure = false; @@ -35,61 +34,85 @@ pub fn collect_tests(add_test: &mut F) { let source = unescape(original); let to_unicode = pieces.remove(0); let to_ascii = pieces.remove(0); - let nv8 = if pieces.len() > 0 { pieces.remove(0) } else { "" }; + let nv8 = if pieces.len() > 0 { + pieces.remove(0) + } else { + "" + }; if expected_failure { continue; } let test_name = format!("UTS #46 line {}", i + 1); - add_test(test_name, TestFn::dyn_test_fn(move || { - let result = uts46::to_ascii(&source, uts46::Flags { - use_std3_ascii_rules: true, - transitional_processing: test_type == "T", - verify_dns_length: true, - }); + add_test( + test_name, + TestFn::dyn_test_fn(move || { + let result = idna::Config::default() + .use_std3_ascii_rules(true) + .verify_dns_length(true) + .check_hyphens(true) + .transitional_processing(test_type == "T") + .to_ascii(&source); - if to_ascii.starts_with("[") { - if to_ascii.starts_with("[C") { - // http://unicode.org/reports/tr46/#Deviations - // applications that perform IDNA2008 lookup are not required to check - // for these contexts - return; - } - if to_ascii == "[V2]" { - // Everybody ignores V2 - // https://github.com/servo/rust-url/pull/240 - // https://github.com/whatwg/url/issues/53#issuecomment-181528158 - // http://www.unicode.org/review/pri317/ + if to_ascii.starts_with("[") { + if to_ascii.starts_with("[C") { + // http://unicode.org/reports/tr46/#Deviations + // applications that perform IDNA2008 lookup are not required to check + // for these contexts + return; + } + if to_ascii == "[V2]" { + // Everybody ignores V2 + // https://github.com/servo/rust-url/pull/240 + // https://github.com/whatwg/url/issues/53#issuecomment-181528158 + // http://www.unicode.org/review/pri317/ + return; + } + let res = result.ok(); + assert!( + res == None, + "Expected error. result: {} | original: {} | source: {}", + res.unwrap(), + original, + source + ); return; } - let res = result.ok(); - assert!(res == None, "Expected error. result: {} | original: {} | source: {}", - res.unwrap(), original, source); - return; - } - let to_ascii = if to_ascii.len() > 0 { - to_ascii.to_string() - } else { - if to_unicode.len() > 0 { - to_unicode.to_string() + let to_ascii = if to_ascii.len() > 0 { + to_ascii.to_string() } else { - source.clone() - } - }; + if to_unicode.len() > 0 { + to_unicode.to_string() + } else { + source.clone() + } + }; - if nv8 == "NV8" { - // This result isn't valid under IDNA2008. Skip it - return; - } + if nv8 == "NV8" { + // This result isn't valid under IDNA2008. Skip it + return; + } - assert!(result.is_ok(), "Couldn't parse {} | original: {} | error: {:?}", - source, original, result.err()); - let output = result.ok().unwrap(); - assert!(output == to_ascii, "result: {} | expected: {} | original: {} | source: {}", - output, to_ascii, original, source); - })) + assert!( + result.is_ok(), + "Couldn't parse {} | original: {} | error: {:?}", + source, + original, + result.err() + ); + let output = result.ok().unwrap(); + assert!( + output == to_ascii, + "result: {} | expected: {} | original: {} | source: {}", + output, + to_ascii, + original, + source + ); + }), + ) } } @@ -99,7 +122,7 @@ fn unescape(input: &str) -> String { loop { match chars.next() { None => return output, - Some(c) => + Some(c) => { if c == '\\' { match chars.next().unwrap() { '\\' => output.push('\\'), @@ -108,10 +131,12 @@ fn unescape(input: &str) -> String { let c2 = chars.next().unwrap().to_digit(16).unwrap(); let c3 = chars.next().unwrap().to_digit(16).unwrap(); let c4 = chars.next().unwrap().to_digit(16).unwrap(); - match char::from_u32(((c1 * 16 + c2) * 16 + c3) * 16 + c4) - { + match char::from_u32(((c1 * 16 + c2) * 16 + c3) * 16 + c4) { Some(c) => output.push(c), - None => { output.push_str(&format!("\\u{:X}{:X}{:X}{:X}",c1,c2,c3,c4)); } + None => { + output + .push_str(&format!("\\u{:X}{:X}{:X}{:X}", c1, c2, c3, c4)); + } }; } _ => panic!("Invalid test data input"), @@ -119,6 +144,7 @@ fn unescape(input: &str) -> String { } else { output.push(c); } + } } } } diff --git a/percent_encoding/lib.rs b/percent_encoding/lib.rs index cb094116d..a5c2987a2 100644 --- a/percent_encoding/lib.rs +++ b/percent_encoding/lib.rs @@ -32,7 +32,6 @@ //! assert_eq!(utf8_percent_encode("foo bar?", DEFAULT_ENCODE_SET).to_string(), "foo%20bar%3F"); //! ``` -use std::ascii::AsciiExt; use std::borrow::Cow; use std::fmt; use std::slice; @@ -176,23 +175,23 @@ define_encode_set! { pub fn percent_encode_byte(byte: u8) -> &'static str { let index = usize::from(byte) * 3; &"\ - %00%01%02%03%04%05%06%07%08%09%0A%0B%0C%0D%0E%0F\ - %10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F\ - %20%21%22%23%24%25%26%27%28%29%2A%2B%2C%2D%2E%2F\ - %30%31%32%33%34%35%36%37%38%39%3A%3B%3C%3D%3E%3F\ - %40%41%42%43%44%45%46%47%48%49%4A%4B%4C%4D%4E%4F\ - %50%51%52%53%54%55%56%57%58%59%5A%5B%5C%5D%5E%5F\ - %60%61%62%63%64%65%66%67%68%69%6A%6B%6C%6D%6E%6F\ - %70%71%72%73%74%75%76%77%78%79%7A%7B%7C%7D%7E%7F\ - %80%81%82%83%84%85%86%87%88%89%8A%8B%8C%8D%8E%8F\ - %90%91%92%93%94%95%96%97%98%99%9A%9B%9C%9D%9E%9F\ - %A0%A1%A2%A3%A4%A5%A6%A7%A8%A9%AA%AB%AC%AD%AE%AF\ - %B0%B1%B2%B3%B4%B5%B6%B7%B8%B9%BA%BB%BC%BD%BE%BF\ - %C0%C1%C2%C3%C4%C5%C6%C7%C8%C9%CA%CB%CC%CD%CE%CF\ - %D0%D1%D2%D3%D4%D5%D6%D7%D8%D9%DA%DB%DC%DD%DE%DF\ - %E0%E1%E2%E3%E4%E5%E6%E7%E8%E9%EA%EB%EC%ED%EE%EF\ - %F0%F1%F2%F3%F4%F5%F6%F7%F8%F9%FA%FB%FC%FD%FE%FF\ - "[index..index + 3] + %00%01%02%03%04%05%06%07%08%09%0A%0B%0C%0D%0E%0F\ + %10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F\ + %20%21%22%23%24%25%26%27%28%29%2A%2B%2C%2D%2E%2F\ + %30%31%32%33%34%35%36%37%38%39%3A%3B%3C%3D%3E%3F\ + %40%41%42%43%44%45%46%47%48%49%4A%4B%4C%4D%4E%4F\ + %50%51%52%53%54%55%56%57%58%59%5A%5B%5C%5D%5E%5F\ + %60%61%62%63%64%65%66%67%68%69%6A%6B%6C%6D%6E%6F\ + %70%71%72%73%74%75%76%77%78%79%7A%7B%7C%7D%7E%7F\ + %80%81%82%83%84%85%86%87%88%89%8A%8B%8C%8D%8E%8F\ + %90%91%92%93%94%95%96%97%98%99%9A%9B%9C%9D%9E%9F\ + %A0%A1%A2%A3%A4%A5%A6%A7%A8%A9%AA%AB%AC%AD%AE%AF\ + %B0%B1%B2%B3%B4%B5%B6%B7%B8%B9%BA%BB%BC%BD%BE%BF\ + %C0%C1%C2%C3%C4%C5%C6%C7%C8%C9%CA%CB%CC%CD%CE%CF\ + %D0%D1%D2%D3%D4%D5%D6%D7%D8%D9%DA%DB%DC%DD%DE%DF\ + %E0%E1%E2%E3%E4%E5%E6%E7%E8%E9%EA%EB%EC%ED%EE%EF\ + %F0%F1%F2%F3%F4%F5%F6%F7%F8%F9%FA%FB%FC%FD%FE%FF\ + "[index..index + 3] } /// Percent-encode the given bytes with the given encode set. @@ -260,7 +259,7 @@ impl<'a, E: EncodeSet> Iterator for PercentEncode<'a, E> { // 1 for first_byte + i for previous iterations of this loop let (unchanged_slice, remaining) = self.bytes.split_at(1 + i); self.bytes = remaining; - return Some(unsafe { str::from_utf8_unchecked(unchanged_slice) }) + return Some(unsafe { str::from_utf8_unchecked(unchanged_slice) }); } else { assert!(byte.is_ascii()); } @@ -296,17 +295,15 @@ impl<'a, E: EncodeSet> From> for Cow<'a, str> { fn from(mut iter: PercentEncode<'a, E>) -> Self { match iter.next() { None => "".into(), - Some(first) => { - match iter.next() { - None => first.into(), - Some(second) => { - let mut string = first.to_owned(); - string.push_str(second); - string.extend(iter); - string.into() - } + Some(first) => match iter.next() { + None => first.into(), + Some(second) => { + let mut string = first.to_owned(); + string.push_str(second); + string.extend(iter); + string.into() } - } + }, } } } @@ -328,7 +325,7 @@ impl<'a, E: EncodeSet> From> for Cow<'a, str> { #[inline] pub fn percent_decode(input: &[u8]) -> PercentDecode { PercentDecode { - bytes: input.iter() + bytes: input.iter(), } } @@ -388,10 +385,8 @@ impl<'a> PercentDecode<'a> { let unchanged_bytes_len = initial_bytes.len() - bytes_iter.len() - 3; let mut decoded = initial_bytes[..unchanged_bytes_len].to_owned(); decoded.push(decoded_byte); - decoded.extend(PercentDecode { - bytes: bytes_iter - }); - return Some(decoded) + decoded.extend(PercentDecode { bytes: bytes_iter }); + return Some(decoded); } } // Nothing to decode @@ -403,18 +398,14 @@ impl<'a> PercentDecode<'a> { /// This is return `Err` when the percent-decoded bytes are not well-formed in UTF-8. pub fn decode_utf8(self) -> Result, str::Utf8Error> { match self.clone().into() { - Cow::Borrowed(bytes) => { - match str::from_utf8(bytes) { - Ok(s) => Ok(s.into()), - Err(e) => Err(e), - } - } - Cow::Owned(bytes) => { - match String::from_utf8(bytes) { - Ok(s) => Ok(s.into()), - Err(e) => Err(e.utf8_error()), - } - } + Cow::Borrowed(bytes) => match str::from_utf8(bytes) { + Ok(s) => Ok(s.into()), + Err(e) => Err(e), + }, + Cow::Owned(bytes) => match String::from_utf8(bytes) { + Ok(s) => Ok(s.into()), + Err(e) => Err(e.utf8_error()), + }, } } @@ -443,5 +434,3 @@ fn decode_utf8_lossy(input: Cow<[u8]>) -> Cow { } } } - - diff --git a/src/encoding.rs b/src/encoding.rs deleted file mode 100644 index 920b30e11..000000000 --- a/src/encoding.rs +++ /dev/null @@ -1,146 +0,0 @@ -// Copyright 2013-2014 The rust-url developers. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - - -//! Abstraction that conditionally compiles either to rust-encoding, -//! or to only support UTF-8. - -#[cfg(feature = "query_encoding")] extern crate encoding; - -use std::borrow::Cow; -#[cfg(feature = "query_encoding")] use std::fmt::{self, Debug, Formatter}; - -#[cfg(feature = "query_encoding")] use self::encoding::types::{DecoderTrap, EncoderTrap}; -#[cfg(feature = "query_encoding")] use self::encoding::label::encoding_from_whatwg_label; -#[cfg(feature = "query_encoding")] pub use self::encoding::types::EncodingRef; - -#[cfg(feature = "query_encoding")] -#[derive(Copy, Clone)] -pub struct EncodingOverride { - /// `None` means UTF-8. - encoding: Option -} - -#[cfg(feature = "query_encoding")] -impl EncodingOverride { - pub fn from_opt_encoding(encoding: Option) -> Self { - encoding.map(Self::from_encoding).unwrap_or_else(Self::utf8) - } - - pub fn from_encoding(encoding: EncodingRef) -> Self { - EncodingOverride { - encoding: if encoding.name() == "utf-8" { None } else { Some(encoding) } - } - } - - #[inline] - pub fn utf8() -> Self { - EncodingOverride { encoding: None } - } - - pub fn lookup(label: &[u8]) -> Option { - // Don't use String::from_utf8_lossy since no encoding label contains U+FFFD - // https://encoding.spec.whatwg.org/#names-and-labels - ::std::str::from_utf8(label) - .ok() - .and_then(encoding_from_whatwg_label) - .map(Self::from_encoding) - } - - /// https://encoding.spec.whatwg.org/#get-an-output-encoding - pub fn to_output_encoding(self) -> Self { - if let Some(encoding) = self.encoding { - if matches!(encoding.name(), "utf-16le" | "utf-16be") { - return Self::utf8() - } - } - self - } - - pub fn is_utf8(&self) -> bool { - self.encoding.is_none() - } - - pub fn name(&self) -> &'static str { - match self.encoding { - Some(encoding) => encoding.name(), - None => "utf-8", - } - } - - pub fn decode<'a>(&self, input: Cow<'a, [u8]>) -> Cow<'a, str> { - match self.encoding { - // `encoding.decode` never returns `Err` when called with `DecoderTrap::Replace` - Some(encoding) => encoding.decode(&input, DecoderTrap::Replace).unwrap().into(), - None => decode_utf8_lossy(input), - } - } - - pub fn encode<'a>(&self, input: Cow<'a, str>) -> Cow<'a, [u8]> { - match self.encoding { - // `encoding.encode` never returns `Err` when called with `EncoderTrap::NcrEscape` - Some(encoding) => Cow::Owned(encoding.encode(&input, EncoderTrap::NcrEscape).unwrap()), - None => encode_utf8(input) - } - } -} - -#[cfg(feature = "query_encoding")] -impl Debug for EncodingOverride { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!(f, "EncodingOverride {{ encoding: ")?; - match self.encoding { - Some(e) => write!(f, "{} }}", e.name()), - None => write!(f, "None }}") - } - } -} - -#[cfg(not(feature = "query_encoding"))] -#[derive(Copy, Clone, Debug)] -pub struct EncodingOverride; - -#[cfg(not(feature = "query_encoding"))] -impl EncodingOverride { - #[inline] - pub fn utf8() -> Self { - EncodingOverride - } - - pub fn decode<'a>(&self, input: Cow<'a, [u8]>) -> Cow<'a, str> { - decode_utf8_lossy(input) - } - - pub fn encode<'a>(&self, input: Cow<'a, str>) -> Cow<'a, [u8]> { - encode_utf8(input) - } -} - -pub fn decode_utf8_lossy(input: Cow<[u8]>) -> Cow { - match input { - Cow::Borrowed(bytes) => String::from_utf8_lossy(bytes), - Cow::Owned(bytes) => { - let raw_utf8: *const [u8]; - match String::from_utf8_lossy(&bytes) { - Cow::Borrowed(utf8) => raw_utf8 = utf8.as_bytes(), - Cow::Owned(s) => return s.into(), - } - // from_utf8_lossy returned a borrow of `bytes` unchanged. - debug_assert!(raw_utf8 == &*bytes as *const [u8]); - // Reuse the existing `Vec` allocation. - unsafe { String::from_utf8_unchecked(bytes) }.into() - } - } -} - -pub fn encode_utf8(input: Cow) -> Cow<[u8]> { - match input { - Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()), - Cow::Owned(s) => Cow::Owned(s.into_bytes()) - } -} diff --git a/src/form_urlencoded.rs b/src/form_urlencoded.rs index f378c9a6c..176ffb750 100644 --- a/src/form_urlencoded.rs +++ b/src/form_urlencoded.rs @@ -13,13 +13,11 @@ //! Converts between a string (such as an URL’s query string) //! and a sequence of (name, value) pairs. -use encoding::EncodingOverride; -use percent_encoding::{percent_encode_byte, percent_decode}; +use percent_encoding::{percent_decode, percent_encode_byte}; +use query_encoding::{self, decode_utf8_lossy, EncodingOverride}; use std::borrow::{Borrow, Cow}; -use std::fmt; use std::str; - /// Convert a byte string in the `application/x-www-form-urlencoded` syntax /// into a iterator of (name, value) pairs. /// @@ -29,63 +27,12 @@ use std::str; /// converted to `[("#first", "%try%")]`. #[inline] pub fn parse(input: &[u8]) -> Parse { - Parse { - input: input, - encoding: EncodingOverride::utf8(), - } + Parse { input: input } } - - -/// Convert a byte string in the `application/x-www-form-urlencoded` syntax -/// into a iterator of (name, value) pairs. -/// -/// Use `parse(input.as_bytes())` to parse a `&str` string. -/// -/// This function is only available if the `query_encoding` -/// [feature](http://doc.crates.io/manifest.html#the-features-section]) is enabled. -/// -/// Arguments: -/// -/// * `encoding_override`: The character encoding each name and values is decoded as -/// after percent-decoding. Defaults to UTF-8. -/// `EncodingRef` is defined in [rust-encoding](https://github.com/lifthrasiir/rust-encoding). -/// * `use_charset`: The *use _charset_ flag*. If in doubt, set to `false`. -#[cfg(feature = "query_encoding")] -pub fn parse_with_encoding<'a>(input: &'a [u8], - encoding_override: Option<::encoding::EncodingRef>, - use_charset: bool) - -> Result, ()> { - use std::ascii::AsciiExt; - - let mut encoding = EncodingOverride::from_opt_encoding(encoding_override); - if !(encoding.is_utf8() || input.is_ascii()) { - return Err(()) - } - if use_charset { - for sequence in input.split(|&b| b == b'&') { - // No '+' in "_charset_" to replace with ' '. - if sequence.starts_with(b"_charset_=") { - let value = &sequence[b"_charset_=".len()..]; - // Skip replacing '+' with ' ' in value since no encoding label contains either: - // https://encoding.spec.whatwg.org/#names-and-labels - if let Some(e) = EncodingOverride::lookup(value) { - encoding = e; - break - } - } - } - } - Ok(Parse { - input: input, - encoding: encoding, - }) -} - /// The return type of `parse()`. -#[derive(Copy, Clone, Debug)] +#[derive(Copy, Clone)] pub struct Parse<'a> { input: &'a [u8], - encoding: EncodingOverride, } impl<'a> Iterator for Parse<'a> { @@ -94,28 +41,25 @@ impl<'a> Iterator for Parse<'a> { fn next(&mut self) -> Option { loop { if self.input.is_empty() { - return None + return None; } let mut split2 = self.input.splitn(2, |&b| b == b'&'); let sequence = split2.next().unwrap(); self.input = split2.next().unwrap_or(&[][..]); if sequence.is_empty() { - continue + continue; } let mut split2 = sequence.splitn(2, |&b| b == b'='); let name = split2.next().unwrap(); let value = split2.next().unwrap_or(&[][..]); - return Some(( - decode(name, self.encoding), - decode(value, self.encoding), - )) + return Some((decode(name), decode(value))); } } } -fn decode(input: &[u8], encoding: EncodingOverride) -> Cow { +fn decode(input: &[u8]) -> Cow { let replaced = replace_plus(input); - encoding.decode(match percent_decode(&replaced).if_any() { + decode_utf8_lossy(match percent_decode(&replaced).if_any() { Some(vec) => Cow::Owned(vec), None => replaced, }) @@ -146,16 +90,17 @@ impl<'a> Parse<'a> { } /// Like `Parse`, but yields pairs of `String` instead of pairs of `Cow`. -#[derive(Debug)] pub struct ParseIntoOwned<'a> { - inner: Parse<'a> + inner: Parse<'a>, } impl<'a> Iterator for ParseIntoOwned<'a> { type Item = (String, String); fn next(&mut self) -> Option { - self.inner.next().map(|(k, v)| (k.into_owned(), v.into_owned())) + self.inner + .next() + .map(|(k, v)| (k.into_owned(), v.into_owned())) } } @@ -164,9 +109,7 @@ impl<'a> Iterator for ParseIntoOwned<'a> { /// /// Return an iterator of `&str` slices. pub fn byte_serialize(input: &[u8]) -> ByteSerialize { - ByteSerialize { - bytes: input, - } + ByteSerialize { bytes: input } } /// Return value of `byte_serialize()`. @@ -176,7 +119,7 @@ pub struct ByteSerialize<'a> { } fn byte_serialized_unchanged(byte: u8) -> bool { - matches!(byte, b'*' | b'-' | b'.' | b'0' ... b'9' | b'A' ... b'Z' | b'_' | b'a' ... b'z') + matches!(byte, b'*' | b'-' | b'.' | b'0' ..= b'9' | b'A' ..= b'Z' | b'_' | b'a' ..= b'z') } impl<'a> Iterator for ByteSerialize<'a> { @@ -186,7 +129,11 @@ impl<'a> Iterator for ByteSerialize<'a> { if let Some((&first, tail)) = self.bytes.split_first() { if !byte_serialized_unchanged(first) { self.bytes = tail; - return Some(if first == b' ' { "+" } else { percent_encode_byte(first) }) + return Some(if first == b' ' { + "+" + } else { + percent_encode_byte(first) + }); } let position = tail.iter().position(|&b| !byte_serialized_unchanged(b)); let (unchanged_slice, remaining) = match position { @@ -212,20 +159,10 @@ impl<'a> Iterator for ByteSerialize<'a> { /// The [`application/x-www-form-urlencoded` serializer]( /// https://url.spec.whatwg.org/#concept-urlencoded-serializer). -#[derive(Debug)] -pub struct Serializer { +pub struct Serializer<'a, T: Target> { target: Option, start_position: usize, - encoding: EncodingOverride, - custom_encoding: Option Cow<[u8]>>>>, -} - -struct SilentDebug(T); - -impl fmt::Debug for SilentDebug { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - f.write_str("…") - } + encoding: EncodingOverride<'a>, } pub trait Target { @@ -235,14 +172,22 @@ pub trait Target { } impl Target for String { - fn as_mut_string(&mut self) -> &mut String { self } - fn finish(self) -> Self { self } + fn as_mut_string(&mut self) -> &mut String { + self + } + fn finish(self) -> Self { + self + } type Finished = Self; } impl<'a> Target for &'a mut String { - fn as_mut_string(&mut self) -> &mut String { &mut **self } - fn finish(self) -> Self { self } + fn as_mut_string(&mut self) -> &mut String { + &mut **self + } + fn finish(self) -> Self { + self + } type Finished = Self; } @@ -270,7 +215,7 @@ impl<'a> Target for ::UrlQuery<'a> { type Finished = &'a mut ::Url; } -impl Serializer { +impl<'a, T: Target> Serializer<'a, T> { /// Create a new `application/x-www-form-urlencoded` serializer for the given target. /// /// If the target is non-empty, @@ -285,12 +230,11 @@ impl Serializer { /// If that suffix is non-empty, /// its content is assumed to already be in `application/x-www-form-urlencoded` syntax. pub fn for_suffix(mut target: T, start_position: usize) -> Self { - &target.as_mut_string()[start_position..]; // Panic if out of bounds + &target.as_mut_string()[start_position..]; // Panic if out of bounds Serializer { target: Some(target), start_position: start_position, - encoding: EncodingOverride::utf8(), - custom_encoding: None, + encoding: None, } } @@ -303,17 +247,8 @@ impl Serializer { } /// Set the character encoding to be used for names and values before percent-encoding. - #[cfg(feature = "query_encoding")] - pub fn encoding_override(&mut self, new: Option<::encoding::EncodingRef>) -> &mut Self { - self.encoding = EncodingOverride::from_opt_encoding(new).to_output_encoding(); - self - } - - /// Set the character encoding to be used for names and values before percent-encoding. - pub fn custom_encoding_override(&mut self, encode: F) -> &mut Self - where F: FnMut(&str) -> Cow<[u8]> + 'static - { - self.custom_encoding = Some(SilentDebug(Box::new(encode))); + pub fn encoding_override(&mut self, new: EncodingOverride<'a>) -> &mut Self { + self.encoding = new; self } @@ -321,8 +256,13 @@ impl Serializer { /// /// Panics if called after `.finish()`. pub fn append_pair(&mut self, name: &str, value: &str) -> &mut Self { - append_pair(string(&mut self.target), self.start_position, self.encoding, - &mut self.custom_encoding, name, value); + append_pair( + string(&mut self.target), + self.start_position, + self.encoding, + name, + value, + ); self } @@ -334,36 +274,28 @@ impl Serializer { /// /// Panics if called after `.finish()`. pub fn extend_pairs(&mut self, iter: I) -> &mut Self - where I: IntoIterator, I::Item: Borrow<(K, V)>, K: AsRef, V: AsRef { + where + I: IntoIterator, + I::Item: Borrow<(K, V)>, + K: AsRef, + V: AsRef, + { { let string = string(&mut self.target); for pair in iter { let &(ref k, ref v) = pair.borrow(); - append_pair(string, self.start_position, self.encoding, - &mut self.custom_encoding, k.as_ref(), v.as_ref()); + append_pair( + string, + self.start_position, + self.encoding, + k.as_ref(), + v.as_ref(), + ); } } self } - /// Add a name/value pair whose name is `_charset_` - /// and whose value is the character encoding’s name. - /// (See the `encoding_override()` method.) - /// - /// Panics if called after `.finish()`. - #[cfg(feature = "query_encoding")] - pub fn append_charset(&mut self) -> &mut Self { - assert!(self.custom_encoding.is_none(), - "Cannot use both custom_encoding_override() and append_charset()"); - { - let string = string(&mut self.target); - append_separator_if_needed(string, self.start_position); - string.push_str("_charset_="); - string.push_str(self.encoding.name()); - } - self - } - /// If this serializer was constructed with a string, take and return that string. /// /// ```rust @@ -377,7 +309,10 @@ impl Serializer { /// /// Panics if called more than once. pub fn finish(&mut self) -> T::Finished { - self.target.take().expect("url::form_urlencoded::Serializer double finish").finish() + self.target + .take() + .expect("url::form_urlencoded::Serializer double finish") + .finish() } } @@ -388,24 +323,25 @@ fn append_separator_if_needed(string: &mut String, start_position: usize) { } fn string(target: &mut Option) -> &mut String { - target.as_mut().expect("url::form_urlencoded::Serializer finished").as_mut_string() + target + .as_mut() + .expect("url::form_urlencoded::Serializer finished") + .as_mut_string() } -fn append_pair(string: &mut String, start_position: usize, encoding: EncodingOverride, - custom_encoding: &mut Option Cow<[u8]>>>>, - name: &str, value: &str) { +fn append_pair( + string: &mut String, + start_position: usize, + encoding: EncodingOverride, + name: &str, + value: &str, +) { append_separator_if_needed(string, start_position); - append_encoded(name, string, encoding, custom_encoding); + append_encoded(name, string, encoding); string.push('='); - append_encoded(value, string, encoding, custom_encoding); + append_encoded(value, string, encoding); } -fn append_encoded(s: &str, string: &mut String, encoding: EncodingOverride, - custom_encoding: &mut Option Cow<[u8]>>>>) { - let bytes = if let Some(SilentDebug(ref mut custom)) = *custom_encoding { - custom(s) - } else { - encoding.encode(s.into()) - }; - string.extend(byte_serialize(&bytes)); +fn append_encoded(s: &str, string: &mut String, encoding: EncodingOverride) { + string.extend(byte_serialize(&query_encoding::encode(encoding, s.into()))) } diff --git a/src/host.rs b/src/host.rs index 38e88a3bb..6aa820911 100644 --- a/src/host.rs +++ b/src/host.rs @@ -6,15 +6,12 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -#[cfg(feature = "heapsize")] use heapsize::HeapSizeOf; +use idna; +use parser::{ParseError, ParseResult}; +use percent_encoding::{percent_decode, utf8_percent_encode, SIMPLE_ENCODE_SET}; use std::cmp; use std::fmt::{self, Formatter}; -use std::io; -use std::net::{Ipv4Addr, Ipv6Addr, SocketAddr, SocketAddrV4, SocketAddrV6, ToSocketAddrs}; -use std::vec; -use parser::{ParseResult, ParseError}; -use percent_encoding::{percent_decode, utf8_percent_encode, SIMPLE_ENCODE_SET}; -use idna; +use std::net::{Ipv4Addr, Ipv6Addr}; #[derive(Copy, Clone, Debug, Eq, PartialEq)] pub enum HostInternal { @@ -24,12 +21,12 @@ pub enum HostInternal { Ipv6(Ipv6Addr), } -#[cfg(feature = "heapsize")] -known_heap_size!(0, HostInternal); - -#[cfg(feature="serde")] +#[cfg(feature = "serde")] impl ::serde::Serialize for HostInternal { - fn serialize(&self, serializer: &mut S) -> Result<(), S::Error> where S: ::serde::Serializer { + fn serialize(&self, serializer: S) -> Result + where + S: ::serde::Serializer, + { // This doesn’t use `derive` because that involves // large dependencies (that take a long time to build), and // either Macros 1.1 which are not stable yet or a cumbersome build script. @@ -42,13 +39,17 @@ impl ::serde::Serialize for HostInternal { HostInternal::Domain => Some(None), HostInternal::Ipv4(addr) => Some(Some(IpAddr::V4(addr))), HostInternal::Ipv6(addr) => Some(Some(IpAddr::V6(addr))), - }.serialize(serializer) + } + .serialize(serializer) } } -#[cfg(feature="serde")] -impl ::serde::Deserialize for HostInternal { - fn deserialize(deserializer: &mut D) -> Result where D: ::serde::Deserializer { +#[cfg(feature = "serde")] +impl<'de> ::serde::Deserialize<'de> for HostInternal { + fn deserialize(deserializer: D) -> Result + where + D: ::serde::Deserializer<'de>, + { use std::net::IpAddr; Ok(match ::serde::Deserialize::deserialize(deserializer)? { None => HostInternal::None, @@ -71,7 +72,7 @@ impl From> for HostInternal { /// The host name of an URL. #[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] -pub enum Host { +pub enum Host { /// A DNS domain name, as '.' dot-separated labels. /// Non-ASCII labels are encoded in punycode per IDNA if this is the host of /// a special URL, or percent encoded for non-special URLs. Hosts for @@ -91,21 +92,28 @@ pub enum Host { Ipv6(Ipv6Addr), } -#[cfg(feature="serde")] -impl ::serde::Serialize for Host { - fn serialize(&self, serializer: &mut R) -> Result<(), R::Error> where R: ::serde::Serializer { +#[cfg(feature = "serde")] +impl ::serde::Serialize for Host { + fn serialize(&self, serializer: R) -> Result + where + R: ::serde::Serializer, + { use std::net::IpAddr; match *self { Host::Domain(ref s) => Ok(s), Host::Ipv4(addr) => Err(IpAddr::V4(addr)), Host::Ipv6(addr) => Err(IpAddr::V6(addr)), - }.serialize(serializer) + } + .serialize(serializer) } } -#[cfg(feature="serde")] -impl ::serde::Deserialize for Host { - fn deserialize(deserializer: &mut D) -> Result where D: ::serde::Deserializer { +#[cfg(feature = "serde")] +impl<'de, S: ::serde::Deserialize<'de>> ::serde::Deserialize<'de> for Host { + fn deserialize(deserializer: D) -> Result + where + D: ::serde::Deserializer<'de>, + { use std::net::IpAddr; Ok(match ::serde::Deserialize::deserialize(deserializer)? { Ok(s) => Host::Domain(s), @@ -115,16 +123,6 @@ impl ::serde::Deserialize for Host { } } -#[cfg(feature = "heapsize")] -impl HeapSizeOf for Host { - fn heap_size_of_children(&self) -> usize { - match *self { - Host::Domain(ref s) => s.heap_size_of_children(), - _ => 0, - } - } -} - impl<'a> Host<&'a str> { /// Return a copy of `self` that owns an allocated `String` but does not borrow an `&Url`. pub fn to_owned(&self) -> Host { @@ -143,16 +141,34 @@ impl Host { pub fn parse(input: &str) -> Result { if input.starts_with('[') { if !input.ends_with(']') { - return Err(ParseError::InvalidIpv6Address) + return Err(ParseError::InvalidIpv6Address); } - return parse_ipv6addr(&input[1..input.len() - 1]).map(Host::Ipv6) + return parse_ipv6addr(&input[1..input.len() - 1]).map(Host::Ipv6); } let domain = percent_decode(input.as_bytes()).decode_utf8_lossy(); let domain = idna::domain_to_ascii(&domain)?; - if domain.find(|c| matches!(c, - '\0' | '\t' | '\n' | '\r' | ' ' | '#' | '%' | '/' | ':' | '?' | '@' | '[' | '\\' | ']' - )).is_some() { - return Err(ParseError::InvalidDomainCharacter) + if domain + .find(|c| { + matches!( + c, + '\0' | '\t' + | '\n' + | '\r' + | ' ' + | '#' + | '%' + | '/' + | ':' + | '?' + | '@' + | '[' + | '\\' + | ']' + ) + }) + .is_some() + { + return Err(ParseError::InvalidDomainCharacter); } if let Some(address) = parse_ipv4addr(&domain)? { Ok(Host::Ipv4(address)) @@ -165,14 +181,31 @@ impl Host { pub fn parse_opaque(input: &str) -> Result { if input.starts_with('[') { if !input.ends_with(']') { - return Err(ParseError::InvalidIpv6Address) + return Err(ParseError::InvalidIpv6Address); } - return parse_ipv6addr(&input[1..input.len() - 1]).map(Host::Ipv6) + return parse_ipv6addr(&input[1..input.len() - 1]).map(Host::Ipv6); } - if input.find(|c| matches!(c, - '\0' | '\t' | '\n' | '\r' | ' ' | '#' | '/' | ':' | '?' | '@' | '[' | '\\' | ']' - )).is_some() { - return Err(ParseError::InvalidDomainCharacter) + if input + .find(|c| { + matches!( + c, + '\0' | '\t' + | '\n' + | '\r' + | ' ' + | '#' + | '/' + | ':' + | '?' + | '@' + | '[' + | '\\' + | ']' + ) + }) + .is_some() + { + return Err(ParseError::InvalidDomainCharacter); } let s = utf8_percent_encode(input, SIMPLE_ENCODE_SET).to_string(); Ok(Host::Domain(s)) @@ -193,80 +226,6 @@ impl> fmt::Display for Host { } } -/// This mostly exists because coherence rules don’t allow us to implement -/// `ToSocketAddrs for (Host, u16)`. -#[derive(Clone, Debug)] -pub struct HostAndPort { - pub host: Host, - pub port: u16, -} - -impl<'a> HostAndPort<&'a str> { - /// Return a copy of `self` that owns an allocated `String` but does not borrow an `&Url`. - pub fn to_owned(&self) -> HostAndPort { - HostAndPort { - host: self.host.to_owned(), - port: self.port - } - } -} - -impl> fmt::Display for HostAndPort { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - self.host.fmt(f)?; - f.write_str(":")?; - self.port.fmt(f) - } -} - - -impl> ToSocketAddrs for HostAndPort { - type Iter = SocketAddrs; - - fn to_socket_addrs(&self) -> io::Result { - let port = self.port; - match self.host { - Host::Domain(ref domain) => Ok(SocketAddrs { - // FIXME: use std::net::lookup_host when it’s stable. - state: SocketAddrsState::Domain((domain.as_ref(), port).to_socket_addrs()?) - }), - Host::Ipv4(address) => Ok(SocketAddrs { - state: SocketAddrsState::One(SocketAddr::V4(SocketAddrV4::new(address, port))) - }), - Host::Ipv6(address) => Ok(SocketAddrs { - state: SocketAddrsState::One(SocketAddr::V6(SocketAddrV6::new(address, port, 0, 0))) - }), - } - } -} - -/// Socket addresses for an URL. -#[derive(Debug)] -pub struct SocketAddrs { - state: SocketAddrsState -} - -#[derive(Debug)] -enum SocketAddrsState { - Domain(vec::IntoIter), - One(SocketAddr), - Done, -} - -impl Iterator for SocketAddrs { - type Item = SocketAddr; - fn next(&mut self) -> Option { - match self.state { - SocketAddrsState::Domain(ref mut iter) => iter.next(), - SocketAddrsState::One(s) => { - self.state = SocketAddrsState::Done; - Some(s) - } - SocketAddrsState::Done => None - } - } -} - fn write_ipv6(addr: &Ipv6Addr, f: &mut Formatter) -> fmt::Result { let segments = addr.segments(); let (compress_start, compress_end) = longest_zero_sequence(&segments); @@ -344,10 +303,12 @@ fn parse_ipv4number(mut input: &str) -> Result, ()> { // So instead we check if the input looks like a real number and only return // an error when it's an overflow. let valid_number = match r { - 8 => input.chars().all(|c| c >= '0' && c <='7'), - 10 => input.chars().all(|c| c >= '0' && c <='9'), - 16 => input.chars().all(|c| (c >= '0' && c <='9') || (c >='a' && c <= 'f') || (c >= 'A' && c <= 'F')), - _ => false + 8 => input.chars().all(|c| c >= '0' && c <= '7'), + 10 => input.chars().all(|c| c >= '0' && c <= '9'), + 16 => input + .chars() + .all(|c| (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')), + _ => false, }; if !valid_number { @@ -369,7 +330,7 @@ fn parse_ipv4number(mut input: &str) -> Result, ()> { /// fn parse_ipv4addr(input: &str) -> ParseResult> { if input.is_empty() { - return Ok(None) + return Ok(None); } let mut parts: Vec<&str> = input.split('.').collect(); if parts.last() == Some(&"") { @@ -387,7 +348,7 @@ fn parse_ipv4addr(input: &str) -> ParseResult> { match parse_ipv4number(part) { Ok(Some(n)) => numbers.push(n), Ok(None) => return Ok(None), - Err(()) => overflow = true + Err(()) => overflow = true, }; } if overflow { @@ -395,7 +356,7 @@ fn parse_ipv4addr(input: &str) -> ParseResult> { } let mut ipv4 = numbers.pop().expect("a non-empty list of numbers"); // Equivalent to: ipv4 >= 256 ** (4 − numbers.len()) - if ipv4 > u32::max_value() >> (8 * numbers.len() as u32) { + if ipv4 > u32::max_value() >> (8 * numbers.len() as u32) { return Err(ParseError::InvalidIpv4Address); } if numbers.iter().any(|x| *x > 255) { @@ -418,12 +379,12 @@ fn parse_ipv6addr(input: &str) -> ParseResult { let mut i = 0; if len < 2 { - return Err(ParseError::InvalidIpv6Address) + return Err(ParseError::InvalidIpv6Address); } if input[0] == b':' { if input[1] != b':' { - return Err(ParseError::InvalidIpv6Address) + return Err(ParseError::InvalidIpv6Address); } i = 2; piece_pointer = 1; @@ -432,16 +393,16 @@ fn parse_ipv6addr(input: &str) -> ParseResult { while i < len { if piece_pointer == 8 { - return Err(ParseError::InvalidIpv6Address) + return Err(ParseError::InvalidIpv6Address); } if input[i] == b':' { if compress_pointer.is_some() { - return Err(ParseError::InvalidIpv6Address) + return Err(ParseError::InvalidIpv6Address); } i += 1; piece_pointer += 1; compress_pointer = Some(piece_pointer); - continue + continue; } let start = i; let end = cmp::min(len, start + 4); @@ -451,33 +412,33 @@ fn parse_ipv6addr(input: &str) -> ParseResult { Some(digit) => { value = value * 0x10 + digit as u16; i += 1; - }, - None => break + } + None => break, } } if i < len { match input[i] { b'.' => { if i == start { - return Err(ParseError::InvalidIpv6Address) + return Err(ParseError::InvalidIpv6Address); } i = start; if piece_pointer > 6 { - return Err(ParseError::InvalidIpv6Address) + return Err(ParseError::InvalidIpv6Address); } is_ip_v4 = true; - }, + } b':' => { i += 1; if i == len { - return Err(ParseError::InvalidIpv6Address) + return Err(ParseError::InvalidIpv6Address); } - }, - _ => return Err(ParseError::InvalidIpv6Address) + } + _ => return Err(ParseError::InvalidIpv6Address), } } if is_ip_v4 { - break + break; } pieces[piece_pointer] = value; piece_pointer += 1; @@ -485,7 +446,7 @@ fn parse_ipv6addr(input: &str) -> ParseResult { if is_ip_v4 { if piece_pointer > 6 { - return Err(ParseError::InvalidIpv6Address) + return Err(ParseError::InvalidIpv6Address); } let mut numbers_seen = 0; while i < len { @@ -493,23 +454,23 @@ fn parse_ipv6addr(input: &str) -> ParseResult { if numbers_seen < 4 && (i < len && input[i] == b'.') { i += 1 } else { - return Err(ParseError::InvalidIpv6Address) + return Err(ParseError::InvalidIpv6Address); } } let mut ipv4_piece = None; while i < len { let digit = match input[i] { - c @ b'0' ... b'9' => c - b'0', - _ => break + c @ b'0'..=b'9' => c - b'0', + _ => break, }; match ipv4_piece { None => ipv4_piece = Some(digit as u16), - Some(0) => return Err(ParseError::InvalidIpv6Address), // No leading zero + Some(0) => return Err(ParseError::InvalidIpv6Address), // No leading zero Some(ref mut v) => { *v = *v * 10 + digit as u16; if *v > 255 { - return Err(ParseError::InvalidIpv6Address) + return Err(ParseError::InvalidIpv6Address); } } } @@ -519,7 +480,7 @@ fn parse_ipv6addr(input: &str) -> ParseResult { pieces[piece_pointer] = if let Some(v) = ipv4_piece { pieces[piece_pointer] * 0x100 + v } else { - return Err(ParseError::InvalidIpv6Address) + return Err(ParseError::InvalidIpv6Address); }; numbers_seen += 1; @@ -529,12 +490,12 @@ fn parse_ipv6addr(input: &str) -> ParseResult { } if numbers_seen != 4 { - return Err(ParseError::InvalidIpv6Address) + return Err(ParseError::InvalidIpv6Address); } } if i < len { - return Err(ParseError::InvalidIpv6Address) + return Err(ParseError::InvalidIpv6Address); } match compress_pointer { @@ -547,10 +508,13 @@ fn parse_ipv6addr(input: &str) -> ParseResult { piece_pointer -= 1; } } - _ => if piece_pointer != 8 { - return Err(ParseError::InvalidIpv6Address) + _ => { + if piece_pointer != 8 { + return Err(ParseError::InvalidIpv6Address); + } } } - Ok(Ipv6Addr::new(pieces[0], pieces[1], pieces[2], pieces[3], - pieces[4], pieces[5], pieces[6], pieces[7])) + Ok(Ipv6Addr::new( + pieces[0], pieces[1], pieces[2], pieces[3], pieces[4], pieces[5], pieces[6], pieces[7], + )) } diff --git a/src/lib.rs b/src/lib.rs index f1e630642..4cbac60ea 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -105,50 +105,51 @@ assert_eq!(css_url.as_str(), "http://servo.github.io/rust-url/main.css"); # run().unwrap(); */ -#![doc(html_root_url = "https://docs.rs/url/1.7.0")] +#![doc(html_root_url = "https://docs.rs/url/2.0.0")] -#[cfg(feature="rustc-serialize")] extern crate rustc_serialize; -#[macro_use] extern crate matches; -#[cfg(feature="serde")] extern crate serde; -#[cfg(feature="heapsize")] #[macro_use] extern crate heapsize; - -pub extern crate idna; #[macro_use] -pub extern crate percent_encoding; +extern crate matches; +extern crate idna; +#[cfg(feature = "serde")] +extern crate serde; +#[macro_use] +extern crate percent_encoding; -use encoding::EncodingOverride; -#[cfg(feature = "heapsize")] use heapsize::HeapSizeOf; use host::HostInternal; -use parser::{Parser, Context, SchemeType, to_u32, ViolationFn}; -use percent_encoding::{PATH_SEGMENT_ENCODE_SET, USERINFO_ENCODE_SET, - percent_encode, percent_decode, utf8_percent_encode}; +use parser::{to_u32, Context, Parser, SchemeType}; +use percent_encoding::{ + percent_decode, percent_encode, utf8_percent_encode, PATH_SEGMENT_ENCODE_SET, + USERINFO_ENCODE_SET, +}; use std::borrow::Borrow; use std::cmp; -#[cfg(feature = "serde")] use std::error::Error; -use std::fmt::{self, Write, Debug, Formatter}; +#[cfg(feature = "serde")] +use std::error::Error; +use std::fmt::{self, Write}; use std::hash; -use std::io; use std::mem; -use std::net::{ToSocketAddrs, IpAddr}; +use std::net::IpAddr; use std::ops::{Range, RangeFrom, RangeTo}; use std::path::{Path, PathBuf}; use std::str; -pub use origin::{Origin, OpaqueOrigin}; -pub use host::{Host, HostAndPort, SocketAddrs}; -pub use path_segments::PathSegmentsMut; +pub use host::Host; +pub use origin::{OpaqueOrigin, Origin}; pub use parser::{ParseError, SyntaxViolation}; +pub use path_segments::PathSegmentsMut; +pub use query_encoding::EncodingOverride; pub use slicing::Position; -mod encoding; mod host; mod origin; -mod path_segments; mod parser; +mod path_segments; +mod query_encoding; mod slicing; pub mod form_urlencoded; -#[doc(hidden)] pub mod quirks; +#[doc(hidden)] +pub mod quirks; /// A parsed URL record. #[derive(Clone)] @@ -165,30 +166,23 @@ pub struct Url { serialization: String, // Components - scheme_end: u32, // Before ':' - username_end: u32, // Before ':' (if a password is given) or '@' (if not) + scheme_end: u32, // Before ':' + username_end: u32, // Before ':' (if a password is given) or '@' (if not) host_start: u32, host_end: u32, host: HostInternal, port: Option, - path_start: u32, // Before initial '/', if any - query_start: Option, // Before '?', unlike Position::QueryStart - fragment_start: Option, // Before '#', unlike Position::FragmentStart -} - -#[cfg(feature = "heapsize")] -impl HeapSizeOf for Url { - fn heap_size_of_children(&self) -> usize { - self.serialization.heap_size_of_children() - } + path_start: u32, // Before initial '/', if any + query_start: Option, // Before '?', unlike Position::QueryStart + fragment_start: Option, // Before '#', unlike Position::FragmentStart } /// Full configuration for the URL parser. #[derive(Copy, Clone)] pub struct ParseOptions<'a> { base_url: Option<&'a Url>, - encoding_override: encoding::EncodingOverride, - violation_fn: ViolationFn<'a>, + encoding_override: EncodingOverride<'a>, + violation_fn: Option<&'a dyn Fn(SyntaxViolation)>, } impl<'a> ParseOptions<'a> { @@ -200,28 +194,8 @@ impl<'a> ParseOptions<'a> { /// Override the character encoding of query strings. /// This is a legacy concept only relevant for HTML. - /// - /// `EncodingRef` is defined in [rust-encoding](https://github.com/lifthrasiir/rust-encoding). - /// - /// This method is only available if the `query_encoding` - /// [feature](http://doc.crates.io/manifest.html#the-features-section]) is enabled. - #[cfg(feature = "query_encoding")] - pub fn encoding_override(mut self, new: Option) -> Self { - self.encoding_override = EncodingOverride::from_opt_encoding(new).to_output_encoding(); - self - } - - /// Call the provided function or closure on non-fatal parse errors, passing - /// a static string description. This method is deprecated in favor of - /// `syntax_violation_callback` and is implemented as an adaptor for the - /// latter, passing the `SyntaxViolation` description. Only the last value - /// passed to either method will be used by a parser. - #[deprecated] - pub fn log_syntax_violation(mut self, new: Option<&'a Fn(&'static str)>) -> Self { - self.violation_fn = match new { - Some(f) => ViolationFn::OldFn(f), - None => ViolationFn::NoOp - }; + pub fn encoding_override(mut self, new: EncodingOverride<'a>) -> Self { + self.encoding_override = new; self } @@ -247,11 +221,8 @@ impl<'a> ParseOptions<'a> { /// # } /// # run().unwrap(); /// ``` - pub fn syntax_violation_callback(mut self, new: Option<&'a Fn(SyntaxViolation)>) -> Self { - self.violation_fn = match new { - Some(f) => ViolationFn::NewFn(f), - None => ViolationFn::NoOp - }; + pub fn syntax_violation_callback(mut self, new: Option<&'a dyn Fn(SyntaxViolation)>) -> Self { + self.violation_fn = new; self } @@ -263,18 +234,8 @@ impl<'a> ParseOptions<'a> { query_encoding_override: self.encoding_override, violation_fn: self.violation_fn, context: Context::UrlParser, - }.parse_url(input) - } -} - -impl<'a> Debug for ParseOptions<'a> { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!(f, - "ParseOptions {{ base_url: {:?}, encoding_override: {:?}, \ - violation_fn: {:?} }}", - self.base_url, - self.encoding_override, - self.violation_fn) + } + .parse_url(input) } } @@ -331,10 +292,11 @@ impl Url { /// [`ParseError`]: enum.ParseError.html #[inline] pub fn parse_with_params(input: &str, iter: I) -> Result - where I: IntoIterator, - I::Item: Borrow<(K, V)>, - K: AsRef, - V: AsRef + where + I: IntoIterator, + I::Item: Borrow<(K, V)>, + K: AsRef, + V: AsRef, { let mut url = Url::options().parse(input); @@ -403,8 +365,8 @@ impl Url { pub fn options<'a>() -> ParseOptions<'a> { ParseOptions { base_url: None, - encoding_override: EncodingOverride::utf8(), - violation_fn: ViolationFn::NoOp, + encoding_override: None, + violation_fn: None, } } @@ -464,10 +426,13 @@ impl Url { macro_rules! assert { ($x: expr) => { if !$x { - return Err(format!("!( {} ) for URL {:?}", - stringify!($x), self.serialization)) + return Err(format!( + "!( {} ) for URL {:?}", + stringify!($x), + self.serialization + )); } - } + }; } macro_rules! assert_eq { @@ -485,12 +450,14 @@ impl Url { } assert!(self.scheme_end >= 1); - assert!(matches!(self.byte_at(0), b'a'...b'z' | b'A'...b'Z')); - assert!(self.slice(1..self.scheme_end).chars() - .all(|c| matches!(c, 'a'...'z' | 'A'...'Z' | '0'...'9' | '+' | '-' | '.'))); + assert!(matches!(self.byte_at(0), b'a'..=b'z' | b'A'..=b'Z')); + assert!(self + .slice(1..self.scheme_end) + .chars() + .all(|c| matches!(c, 'a'..='z' | 'A'..='Z' | '0'..='9' | '+' | '-' | '.'))); assert_eq!(self.byte_at(self.scheme_end), b':'); - if self.slice(self.scheme_end + 1 ..).starts_with("//") { + if self.slice(self.scheme_end + 1..).starts_with("//") { // URL with authority match self.byte_at(self.username_end) { b':' => { @@ -521,7 +488,10 @@ impl Url { } else { assert_eq!(self.byte_at(self.host_end), b':'); let port_str = self.slice(self.host_end + 1..self.path_start); - assert_eq!(self.port, Some(port_str.parse::().expect("Couldn't parse port?"))); + assert_eq!( + self.port, + Some(port_str.parse::().expect("Couldn't parse port?")) + ); } assert_eq!(self.byte_at(self.path_start), b'/'); } else { @@ -551,10 +521,12 @@ impl Url { assert_eq!(self.username_end, other.username_end); assert_eq!(self.host_start, other.host_start); assert_eq!(self.host_end, other.host_end); - assert!(self.host == other.host || + assert!( + self.host == other.host || // XXX No host round-trips to empty host. // See https://github.com/whatwg/url/issues/79 - (self.host_str(), other.host_str()) == (None, Some(""))); + (self.host_str(), other.host_str()) == (None, Some("")) + ); assert_eq!(self.port, other.port); assert_eq!(self.path_start, other.path_start); assert_eq!(self.query_start, other.query_start); @@ -977,47 +949,6 @@ impl Url { self.port.or_else(|| parser::default_port(self.scheme())) } - /// If the URL has a host, return something that implements `ToSocketAddrs`. - /// - /// If the URL has no port number and the scheme’s default port number is not known - /// (see `Url::port_or_known_default`), - /// the closure is called to obtain a port number. - /// Typically, this closure can match on the result `Url::scheme` - /// to have per-scheme default port numbers, - /// and panic for schemes it’s not prepared to handle. - /// For example: - /// - /// ```rust - /// # use url::Url; - /// # use std::net::TcpStream; - /// # use std::io; - /// fn connect(url: &Url) -> io::Result { - /// TcpStream::connect(url.with_default_port(default_port)?) - /// } - /// - /// fn default_port(url: &Url) -> Result { - /// match url.scheme() { - /// "git" => Ok(9418), - /// "git+ssh" => Ok(22), - /// "git+https" => Ok(443), - /// "git+http" => Ok(80), - /// _ => Err(()), - /// } - /// } - /// ``` - pub fn with_default_port(&self, f: F) -> io::Result> - where F: FnOnce(&Url) -> Result { - Ok(HostAndPort { - host: self.host() - .ok_or(()) - .or_else(|()| io_error("URL has no host"))?, - port: self.port_or_known_default() - .ok_or(()) - .or_else(|()| f(self)) - .or_else(|()| io_error("URL has no port number"))? - }) - } - /// Return the path for this URL, as a percent-encoded ASCII string. /// For cannot-be-a-base URLs, this is an arbitrary string that doesn’t start with '/'. /// For other URLs, this starts with a '/' slash @@ -1044,8 +975,7 @@ impl Url { pub fn path(&self) -> &str { match (self.query_start, self.fragment_start) { (None, None) => self.slice(self.path_start..), - (Some(next_component_start), _) | - (None, Some(next_component_start)) => { + (Some(next_component_start), _) | (None, Some(next_component_start)) => { self.slice(self.path_start..next_component_start) } } @@ -1351,7 +1281,10 @@ impl Url { self.serialization.push('?'); } - let query = UrlQuery { url: Some(self), fragment: fragment }; + let query = UrlQuery { + url: Some(self), + fragment: fragment, + }; form_urlencoded::Serializer::for_suffix(query, query_start + "?".len()) } @@ -1361,7 +1294,7 @@ impl Url { let after_path = self.slice(i..).to_owned(); self.serialization.truncate(i as usize); after_path - }, + } (None, None) => String::new(), } } @@ -1402,7 +1335,7 @@ impl Url { } parser.parse_cannot_be_a_base_path(parser::Input::new(path)); } else { - let mut has_host = true; // FIXME + let mut has_host = true; // FIXME parser.parse_path_start(scheme_type, &mut has_host, parser::Input::new(path)); } }); @@ -1426,8 +1359,12 @@ impl Url { *index -= old_after_path_position; *index += new_after_path_position; }; - if let Some(ref mut index) = self.query_start { adjust(index) } - if let Some(ref mut index) = self.fragment_start { adjust(index) } + if let Some(ref mut index) = self.query_start { + adjust(index) + } + if let Some(ref mut index) = self.fragment_start { + adjust(index) + } self.serialization.push_str(after_path) } @@ -1494,7 +1431,7 @@ impl Url { pub fn set_port(&mut self, mut port: Option) -> Result<(), ()> { // has_host implies !cannot_be_a_base if !self.has_host() || self.host() == Some(Host::Domain("")) || self.scheme() == "file" { - return Err(()) + return Err(()); } if port.is_some() && port == parser::default_port(self.scheme()) { port = None @@ -1507,11 +1444,16 @@ impl Url { match (self.port, port) { (None, None) => {} (Some(_), None) => { - self.serialization.drain(self.host_end as usize .. self.path_start as usize); + self.serialization + .drain(self.host_end as usize..self.path_start as usize); let offset = self.path_start - self.host_end; self.path_start = self.host_end; - if let Some(ref mut index) = self.query_start { *index -= offset } - if let Some(ref mut index) = self.fragment_start { *index -= offset } + if let Some(ref mut index) = self.query_start { + *index -= offset + } + if let Some(ref mut index) = self.fragment_start { + *index -= offset + } } (Some(old), Some(new)) if old == new => {} (_, Some(new)) => { @@ -1525,8 +1467,12 @@ impl Url { *index -= old_path_start; *index += new_path_start; }; - if let Some(ref mut index) = self.query_start { adjust(index) } - if let Some(ref mut index) = self.fragment_start { adjust(index) } + if let Some(ref mut index) = self.query_start { + adjust(index) + } + if let Some(ref mut index) = self.fragment_start { + adjust(index) + } self.serialization.push_str(&path_and_after); } } @@ -1617,7 +1563,7 @@ impl Url { /// [`ParseError`]: enum.ParseError.html pub fn set_host(&mut self, host: Option<&str>) -> Result<(), ParseError> { if self.cannot_be_a_base() { - return Err(ParseError::SetHostOnCannotBeABaseUrl) + return Err(ParseError::SetHostOnCannotBeABaseUrl); } if let Some(host) = host { @@ -1631,27 +1577,36 @@ impl Url { } } else if self.has_host() { if SchemeType::from(self.scheme()).is_special() { - return Err(ParseError::EmptyHost) + return Err(ParseError::EmptyHost); } debug_assert!(self.byte_at(self.scheme_end) == b':'); debug_assert!(self.byte_at(self.path_start) == b'/'); let new_path_start = self.scheme_end + 1; - self.serialization.drain(new_path_start as usize..self.path_start as usize); + self.serialization + .drain(new_path_start as usize..self.path_start as usize); let offset = self.path_start - new_path_start; self.path_start = new_path_start; self.username_end = new_path_start; self.host_start = new_path_start; self.host_end = new_path_start; self.port = None; - if let Some(ref mut index) = self.query_start { *index -= offset } - if let Some(ref mut index) = self.fragment_start { *index -= offset } + if let Some(ref mut index) = self.query_start { + *index -= offset + } + if let Some(ref mut index) = self.fragment_start { + *index -= offset + } } Ok(()) } /// opt_new_port: None means leave unchanged, Some(None) means remove any port number. fn set_host_internal(&mut self, host: Host, opt_new_port: Option>) { - let old_suffix_pos = if opt_new_port.is_some() { self.path_start } else { self.host_end }; + let old_suffix_pos = if opt_new_port.is_some() { + self.path_start + } else { + self.host_end + }; let suffix = self.slice(old_suffix_pos..).to_owned(); self.serialization.truncate(self.host_start as usize); if !self.has_authority() { @@ -1680,8 +1635,12 @@ impl Url { *index += new_suffix_pos; }; adjust(&mut self.path_start); - if let Some(ref mut index) = self.query_start { adjust(index) } - if let Some(ref mut index) = self.fragment_start { adjust(index) } + if let Some(ref mut index) = self.query_start { + adjust(index) + } + if let Some(ref mut index) = self.fragment_start { + adjust(index) + } } /// Change this URL’s host to the given IP address. @@ -1723,7 +1682,7 @@ impl Url { /// pub fn set_ip_host(&mut self, address: IpAddr) -> Result<(), ()> { if self.cannot_be_a_base() { - return Err(()) + return Err(()); } let address = match address { @@ -1763,13 +1722,14 @@ impl Url { pub fn set_password(&mut self, password: Option<&str>) -> Result<(), ()> { // has_host implies !cannot_be_a_base if !self.has_host() || self.host() == Some(Host::Domain("")) || self.scheme() == "file" { - return Err(()) + return Err(()); } if let Some(password) = password { let host_and_after = self.slice(self.host_start..).to_owned(); self.serialization.truncate(self.username_end as usize); self.serialization.push(':'); - self.serialization.extend(utf8_percent_encode(password, USERINFO_ENCODE_SET)); + self.serialization + .extend(utf8_percent_encode(password, USERINFO_ENCODE_SET)); self.serialization.push('@'); let old_host_start = self.host_start; @@ -1781,28 +1741,37 @@ impl Url { self.host_start = new_host_start; adjust(&mut self.host_end); adjust(&mut self.path_start); - if let Some(ref mut index) = self.query_start { adjust(index) } - if let Some(ref mut index) = self.fragment_start { adjust(index) } + if let Some(ref mut index) = self.query_start { + adjust(index) + } + if let Some(ref mut index) = self.fragment_start { + adjust(index) + } self.serialization.push_str(&host_and_after); - } else if self.byte_at(self.username_end) == b':' { // If there is a password to remove + } else if self.byte_at(self.username_end) == b':' { + // If there is a password to remove let has_username_or_password = self.byte_at(self.host_start - 1) == b'@'; debug_assert!(has_username_or_password); let username_start = self.scheme_end + 3; let empty_username = username_start == self.username_end; - let start = self.username_end; // Remove the ':' + let start = self.username_end; // Remove the ':' let end = if empty_username { self.host_start // Remove the '@' as well } else { - self.host_start - 1 // Keep the '@' to separate the username from the host + self.host_start - 1 // Keep the '@' to separate the username from the host }; - self.serialization.drain(start as usize .. end as usize); + self.serialization.drain(start as usize..end as usize); let offset = end - start; self.host_start -= offset; self.host_end -= offset; self.path_start -= offset; - if let Some(ref mut index) = self.query_start { *index -= offset } - if let Some(ref mut index) = self.fragment_start { *index -= offset } + if let Some(ref mut index) = self.query_start { + *index -= offset + } + if let Some(ref mut index) = self.fragment_start { + *index -= offset + } } Ok(()) } @@ -1845,16 +1814,17 @@ impl Url { pub fn set_username(&mut self, username: &str) -> Result<(), ()> { // has_host implies !cannot_be_a_base if !self.has_host() || self.host() == Some(Host::Domain("")) || self.scheme() == "file" { - return Err(()) + return Err(()); } let username_start = self.scheme_end + 3; debug_assert!(self.slice(self.scheme_end..username_start) == "://"); if self.slice(username_start..self.username_end) == username { - return Ok(()) + return Ok(()); } let after_username = self.slice(self.username_end..).to_owned(); self.serialization.truncate(username_start as usize); - self.serialization.extend(utf8_percent_encode(username, USERINFO_ENCODE_SET)); + self.serialization + .extend(utf8_percent_encode(username, USERINFO_ENCODE_SET)); let mut removed_bytes = self.username_end; self.username_end = to_u32(self.serialization.len()).unwrap(); @@ -1883,8 +1853,12 @@ impl Url { adjust(&mut self.host_start); adjust(&mut self.host_end); adjust(&mut self.path_start); - if let Some(ref mut index) = self.query_start { adjust(index) } - if let Some(ref mut index) = self.fragment_start { adjust(index) } + if let Some(ref mut index) = self.query_start { + adjust(index) + } + if let Some(ref mut index) = self.fragment_start { + adjust(index) + } Ok(()) } @@ -1949,9 +1923,10 @@ impl Url { pub fn set_scheme(&mut self, scheme: &str) -> Result<(), ()> { let mut parser = Parser::for_setter(String::new()); let remaining = parser.parse_scheme(parser::Input::new(scheme))?; - if !remaining.is_empty() || - (!self.has_host() && SchemeType::from(&parser.serialization).is_special()) { - return Err(()) + if !remaining.is_empty() + || (!self.has_host() && SchemeType::from(&parser.serialization).is_special()) + { + return Err(()); } let old_scheme_end = self.scheme_end; let new_scheme_end = to_u32(parser.serialization.len()).unwrap(); @@ -1965,8 +1940,12 @@ impl Url { adjust(&mut self.host_start); adjust(&mut self.host_end); adjust(&mut self.path_start); - if let Some(ref mut index) = self.query_start { adjust(index) } - if let Some(ref mut index) = self.fragment_start { adjust(index) } + if let Some(ref mut index) = self.query_start { + adjust(index) + } + if let Some(ref mut index) = self.fragment_start { + adjust(index) + } parser.serialization.push_str(self.slice(old_scheme_end..)); self.serialization = parser.serialization; @@ -2000,7 +1979,7 @@ impl Url { /// # run().unwrap(); /// # } /// ``` - #[cfg(any(unix, windows, target_os="redox"))] + #[cfg(any(unix, windows, target_os = "redox"))] pub fn from_file_path>(path: P) -> Result { let mut serialization = "file://".to_owned(); let host_start = serialization.len() as u32; @@ -2036,7 +2015,7 @@ impl Url { /// /// Note that `std::path` does not consider trailing slashes significant /// and usually does not include them (e.g. in `Path::parent()`). - #[cfg(any(unix, windows, target_os="redox"))] + #[cfg(any(unix, windows, target_os = "redox"))] pub fn from_directory_path>(path: P) -> Result { let mut url = Url::from_file_path(path)?; if !url.serialization.ends_with('/') { @@ -2053,18 +2032,38 @@ impl Url { /// This method is only available if the `serde` Cargo feature is enabled. #[cfg(feature = "serde")] #[deny(unused)] - pub fn serialize_internal(&self, serializer: &mut S) -> Result<(), S::Error> where S: serde::Serializer { + pub fn serialize_internal(&self, serializer: S) -> Result + where + S: serde::Serializer, + { use serde::Serialize; // Destructuring first lets us ensure that adding or removing fields forces this method // to be updated - let Url { ref serialization, ref scheme_end, - ref username_end, ref host_start, - ref host_end, ref host, ref port, - ref path_start, ref query_start, - ref fragment_start} = *self; - (serialization, scheme_end, username_end, - host_start, host_end, host, port, path_start, - query_start, fragment_start).serialize(serializer) + let Url { + ref serialization, + ref scheme_end, + ref username_end, + ref host_start, + ref host_end, + ref host, + ref port, + ref path_start, + ref query_start, + ref fragment_start, + } = *self; + ( + serialization, + scheme_end, + username_end, + host_start, + host_end, + host, + port, + path_start, + query_start, + fragment_start, + ) + .serialize(serializer) } /// Serialize with Serde using the internal representation of the `Url` struct. @@ -2075,11 +2074,23 @@ impl Url { /// This method is only available if the `serde` Cargo feature is enabled. #[cfg(feature = "serde")] #[deny(unused)] - pub fn deserialize_internal(deserializer: &mut D) -> Result where D: serde::Deserializer { - use serde::{Deserialize, Error}; - let (serialization, scheme_end, username_end, - host_start, host_end, host, port, path_start, - query_start, fragment_start) = Deserialize::deserialize(deserializer)?; + pub fn deserialize_internal<'de, D>(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + use serde::de::{Deserialize, Error, Unexpected}; + let ( + serialization, + scheme_end, + username_end, + host_start, + host_end, + host, + port, + path_start, + query_start, + fragment_start, + ) = Deserialize::deserialize(deserializer)?; let url = Url { serialization: serialization, scheme_end: scheme_end, @@ -2090,15 +2101,17 @@ impl Url { port: port, path_start: path_start, query_start: query_start, - fragment_start: fragment_start + fragment_start: fragment_start, }; if cfg!(debug_assertions) { - url.check_invariants().map_err(|ref reason| Error::invalid_value(&reason))? + url.check_invariants().map_err(|reason| { + let reason: &str = &reason; + Error::invalid_value(Unexpected::Other("value"), &reason) + })? } Ok(url) } - /// Assuming the URL is in the `file` scheme or similar, /// convert its path to an absolute `std::path::Path`. /// @@ -2118,15 +2131,15 @@ impl Url { /// (That is, if the percent-decoded path contains a NUL byte or, /// for a Windows path, is not UTF-8.) #[inline] - #[cfg(any(unix, windows, target_os="redox"))] + #[cfg(any(unix, windows, target_os = "redox"))] pub fn to_file_path(&self) -> Result { if let Some(segments) = self.path_segments() { let host = match self.host() { None | Some(Host::Domain("localhost")) => None, Some(_) if cfg!(windows) && self.scheme() == "file" => { - Some(&self.serialization[self.host_start as usize .. self.host_end as usize]) - }, - _ => return Err(()) + Some(&self.serialization[self.host_start as usize..self.host_end as usize]) + } + _ => return Err(()), }; return file_url_segments_to_pathbuf(host, segments); @@ -2137,7 +2150,10 @@ impl Url { // Private helper methods: #[inline] - fn slice(&self, range: R) -> &str where R: RangeArg { + fn slice(&self, range: R) -> &str + where + R: RangeArg, + { range.slice_of(&self.serialization) } @@ -2147,15 +2163,6 @@ impl Url { } } -/// Return an error if `Url::host` or `Url::port_or_known_default` return `None`. -impl ToSocketAddrs for Url { - type Iter = SocketAddrs; - - fn to_socket_addrs(&self) -> io::Result { - self.with_default_port(|_| Err(()))?.to_socket_addrs() - } -} - /// Parse a string as an URL, without a base URL or encoding override. impl str::FromStr for Url { type Err = ParseError; @@ -2212,7 +2219,10 @@ impl PartialOrd for Url { /// URLs hash like their serialization. impl hash::Hash for Url { #[inline] - fn hash(&self, state: &mut H) where H: hash::Hasher { + fn hash(&self, state: &mut H) + where + H: hash::Hasher, + { hash::Hash::hash(&self.serialization, state) } } @@ -2232,47 +2242,33 @@ trait RangeArg { impl RangeArg for Range { #[inline] fn slice_of<'a>(&self, s: &'a str) -> &'a str { - &s[self.start as usize .. self.end as usize] + &s[self.start as usize..self.end as usize] } } impl RangeArg for RangeFrom { #[inline] fn slice_of<'a>(&self, s: &'a str) -> &'a str { - &s[self.start as usize ..] + &s[self.start as usize..] } } impl RangeArg for RangeTo { #[inline] fn slice_of<'a>(&self, s: &'a str) -> &'a str { - &s[.. self.end as usize] - } -} - -#[cfg(feature="rustc-serialize")] -impl rustc_serialize::Encodable for Url { - fn encode(&self, encoder: &mut S) -> Result<(), S::Error> { - encoder.emit_str(self.as_str()) - } -} - - -#[cfg(feature="rustc-serialize")] -impl rustc_serialize::Decodable for Url { - fn decode(decoder: &mut D) -> Result { - Url::parse(&*decoder.read_str()?).map_err(|error| { - decoder.error(&format!("URL parsing error: {}", error)) - }) + &s[..self.end as usize] } } /// Serializes this URL into a `serde` stream. /// /// This implementation is only available if the `serde` Cargo feature is enabled. -#[cfg(feature="serde")] +#[cfg(feature = "serde")] impl serde::Serialize for Url { - fn serialize(&self, serializer: &mut S) -> Result<(), S::Error> where S: serde::Serializer { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { serializer.serialize_str(self.as_str()) } } @@ -2280,22 +2276,28 @@ impl serde::Serialize for Url { /// Deserializes this URL from a `serde` stream. /// /// This implementation is only available if the `serde` Cargo feature is enabled. -#[cfg(feature="serde")] -impl serde::Deserialize for Url { - fn deserialize(deserializer: &mut D) -> Result where D: serde::Deserializer { +#[cfg(feature = "serde")] +impl<'de> serde::Deserialize<'de> for Url { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + use serde::de::{Error, Unexpected}; let string_representation: String = serde::Deserialize::deserialize(deserializer)?; Url::parse(&string_representation).map_err(|err| { - serde::Error::invalid_value(err.description()) + Error::invalid_value(Unexpected::Str(&string_representation), &err.description()) }) } } #[cfg(any(unix, target_os = "redox"))] -fn path_to_file_url_segments(path: &Path, serialization: &mut String) - -> Result<(u32, HostInternal), ()> { +fn path_to_file_url_segments( + path: &Path, + serialization: &mut String, +) -> Result<(u32, HostInternal), ()> { use std::os::unix::prelude::OsStrExt; if !path.is_absolute() { - return Err(()) + return Err(()); } let host_end = to_u32(serialization.len()).unwrap(); let mut empty = true; @@ -2304,7 +2306,9 @@ fn path_to_file_url_segments(path: &Path, serialization: &mut String) empty = false; serialization.push('/'); serialization.extend(percent_encode( - component.as_os_str().as_bytes(), PATH_SEGMENT_ENCODE_SET)); + component.as_os_str().as_bytes(), + PATH_SEGMENT_ENCODE_SET, + )); } if empty { // An URL’s path must not be empty. @@ -2314,18 +2318,22 @@ fn path_to_file_url_segments(path: &Path, serialization: &mut String) } #[cfg(windows)] -fn path_to_file_url_segments(path: &Path, serialization: &mut String) - -> Result<(u32, HostInternal), ()> { +fn path_to_file_url_segments( + path: &Path, + serialization: &mut String, +) -> Result<(u32, HostInternal), ()> { path_to_file_url_segments_windows(path, serialization) } // Build this unconditionally to alleviate https://github.com/servo/rust-url/issues/102 #[cfg_attr(not(windows), allow(dead_code))] -fn path_to_file_url_segments_windows(path: &Path, serialization: &mut String) - -> Result<(u32, HostInternal), ()> { - use std::path::{Prefix, Component}; +fn path_to_file_url_segments_windows( + path: &Path, + serialization: &mut String, +) -> Result<(u32, HostInternal), ()> { + use std::path::{Component, Prefix}; if !path.is_absolute() { - return Err(()) + return Err(()); } let mut components = path.components(); @@ -2339,7 +2347,7 @@ fn path_to_file_url_segments_windows(path: &Path, serialization: &mut String) serialization.push('/'); serialization.push(letter as char); serialization.push(':'); - }, + } Prefix::UNC(server, share) | Prefix::VerbatimUNC(server, share) => { let host = Host::parse(server.to_str().ok_or(())?).map_err(|_| ())?; write!(serialization, "{}", host).unwrap(); @@ -2348,29 +2356,35 @@ fn path_to_file_url_segments_windows(path: &Path, serialization: &mut String) serialization.push('/'); let share = share.to_str().ok_or(())?; serialization.extend(percent_encode(share.as_bytes(), PATH_SEGMENT_ENCODE_SET)); - }, - _ => return Err(()) + } + _ => return Err(()), }, - _ => return Err(()) + _ => return Err(()), } for component in components { - if component == Component::RootDir { continue } + if component == Component::RootDir { + continue; + } // FIXME: somehow work with non-unicode? let component = component.as_os_str().to_str().ok_or(())?; serialization.push('/'); - serialization.extend(percent_encode(component.as_bytes(), PATH_SEGMENT_ENCODE_SET)); + serialization.extend(percent_encode( + component.as_bytes(), + PATH_SEGMENT_ENCODE_SET, + )); } Ok((host_end, host_internal)) } - #[cfg(any(unix, target_os = "redox"))] -fn file_url_segments_to_pathbuf(host: Option<&str>, segments: str::Split) -> Result { +fn file_url_segments_to_pathbuf( + host: Option<&str>, + segments: str::Split, +) -> Result { use std::ffi::OsStr; use std::os::unix::prelude::OsStrExt; - use std::path::PathBuf; if host.is_some() { return Err(()); @@ -2387,20 +2401,27 @@ fn file_url_segments_to_pathbuf(host: Option<&str>, segments: str::Split) } let os_str = OsStr::from_bytes(&bytes); let path = PathBuf::from(os_str); - debug_assert!(path.is_absolute(), - "to_file_path() failed to produce an absolute Path"); + debug_assert!( + path.is_absolute(), + "to_file_path() failed to produce an absolute Path" + ); Ok(path) } #[cfg(windows)] -fn file_url_segments_to_pathbuf(host: Option<&str>, segments: str::Split) -> Result { +fn file_url_segments_to_pathbuf( + host: Option<&str>, + segments: str::Split, +) -> Result { file_url_segments_to_pathbuf_windows(host, segments) } // Build this unconditionally to alleviate https://github.com/servo/rust-url/issues/102 #[cfg_attr(not(windows), allow(dead_code))] -fn file_url_segments_to_pathbuf_windows(host: Option<&str>, mut segments: str::Split) -> Result { - +fn file_url_segments_to_pathbuf_windows( + host: Option<&str>, + mut segments: str::Split, +) -> Result { let mut string = if let Some(host) = host { r"\\".to_owned() + host } else { @@ -2409,23 +2430,23 @@ fn file_url_segments_to_pathbuf_windows(host: Option<&str>, mut segments: str::S match first.len() { 2 => { if !first.starts_with(parser::ascii_alpha) || first.as_bytes()[1] != b':' { - return Err(()) + return Err(()); } first.to_owned() - }, + } 4 => { if !first.starts_with(parser::ascii_alpha) { - return Err(()) + return Err(()); } let bytes = first.as_bytes(); if bytes[1] != b'%' || bytes[2] != b'3' || (bytes[3] != b'a' && bytes[3] != b'A') { - return Err(()) + return Err(()); } first[0..1].to_owned() + ":" - }, + } _ => return Err(()), } @@ -2441,15 +2462,13 @@ fn file_url_segments_to_pathbuf_windows(host: Option<&str>, mut segments: str::S } } let path = PathBuf::from(string); - debug_assert!(path.is_absolute(), - "to_file_path() failed to produce an absolute Path"); + debug_assert!( + path.is_absolute(), + "to_file_path() failed to produce an absolute Path" + ); Ok(path) } -fn io_error(reason: &str) -> io::Result { - Err(io::Error::new(io::ErrorKind::InvalidData, reason)) -} - /// Implementation detail of `Url::query_pairs_mut`. Typically not used directly. #[derive(Debug)] pub struct UrlQuery<'a> { @@ -2464,48 +2483,3 @@ impl<'a> Drop for UrlQuery<'a> { } } } - - -/// Define a new struct -/// that implements the [`EncodeSet`](percent_encoding/trait.EncodeSet.html) trait, -/// for use in [`percent_decode()`](percent_encoding/fn.percent_encode.html) -/// and related functions. -/// -/// Parameters are characters to include in the set in addition to those of the base set. -/// See [encode sets specification](http://url.spec.whatwg.org/#simple-encode-set). -/// -/// Example -/// ======= -/// -/// ```rust -/// #[macro_use] extern crate url; -/// use url::percent_encoding::{utf8_percent_encode, SIMPLE_ENCODE_SET}; -/// define_encode_set! { -/// /// This encode set is used in the URL parser for query strings. -/// pub QUERY_ENCODE_SET = [SIMPLE_ENCODE_SET] | {' ', '"', '#', '<', '>'} -/// } -/// # fn main() { -/// assert_eq!(utf8_percent_encode("foo bar", QUERY_ENCODE_SET).collect::(), "foo%20bar"); -/// # } -/// ``` -#[macro_export] -macro_rules! define_encode_set { - ($(#[$attr: meta])* pub $name: ident = [$base_set: expr] | {$($ch: pat),*}) => { - $(#[$attr])* - #[derive(Copy, Clone)] - #[allow(non_camel_case_types)] - pub struct $name; - - impl $crate::percent_encoding::EncodeSet for $name { - #[inline] - fn contains(&self, byte: u8) -> bool { - match byte as char { - $( - $ch => true, - )* - _ => $base_set.contains(byte) - } - } - } - } -} diff --git a/src/origin.rs b/src/origin.rs index ee0b83e50..3223709dd 100644 --- a/src/origin.rs +++ b/src/origin.rs @@ -6,11 +6,10 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -#[cfg(feature = "heapsize")] use heapsize::HeapSizeOf; use host::Host; use idna::domain_to_unicode; use parser::default_port; -use std::sync::atomic::{AtomicUsize, ATOMIC_USIZE_INIT, Ordering}; +use std::sync::atomic::{AtomicUsize, Ordering}; use Url; pub fn url_origin(url: &Url) -> Origin { @@ -20,16 +19,17 @@ pub fn url_origin(url: &Url) -> Origin { let result = Url::parse(url.path()); match result { Ok(ref url) => url_origin(url), - Err(_) => Origin::new_opaque() + Err(_) => Origin::new_opaque(), } - }, - "ftp" | "gopher" | "http" | "https" | "ws" | "wss" => { - Origin::Tuple(scheme.to_owned(), url.host().unwrap().to_owned(), - url.port_or_known_default().unwrap()) - }, + } + "ftp" | "gopher" | "http" | "https" | "ws" | "wss" => Origin::Tuple( + scheme.to_owned(), + url.host().unwrap().to_owned(), + url.port_or_known_default().unwrap(), + ), // TODO: Figure out what to do if the scheme is a file "file" => Origin::new_opaque(), - _ => Origin::new_opaque() + _ => Origin::new_opaque(), } } @@ -56,27 +56,13 @@ pub enum Origin { Opaque(OpaqueOrigin), /// Consists of the URL's scheme, host and port - Tuple(String, Host, u16) -} - -#[cfg(feature = "heapsize")] -impl HeapSizeOf for Origin { - fn heap_size_of_children(&self) -> usize { - match *self { - Origin::Tuple(ref scheme, ref host, _) => { - scheme.heap_size_of_children() + - host.heap_size_of_children() - }, - _ => 0, - } - } + Tuple(String, Host, u16), } - impl Origin { /// Creates a new opaque origin that is only equal to itself. pub fn new_opaque() -> Origin { - static COUNTER: AtomicUsize = ATOMIC_USIZE_INIT; + static COUNTER: AtomicUsize = AtomicUsize::new(0); Origin::Opaque(OpaqueOrigin(COUNTER.fetch_add(1, Ordering::SeqCst))) } @@ -110,7 +96,7 @@ impl Origin { let (domain, _errors) = domain_to_unicode(domain); Host::Domain(domain) } - _ => host.clone() + _ => host.clone(), }; if default_port(scheme) == Some(port) { format!("{}://{}", scheme, host) @@ -125,6 +111,3 @@ impl Origin { /// Opaque identifier for URLs that have file or other schemes #[derive(Eq, PartialEq, Hash, Clone, Debug)] pub struct OpaqueOrigin(usize); - -#[cfg(feature = "heapsize")] -known_heap_size!(0, OpaqueOrigin); diff --git a/src/parser.rs b/src/parser.rs index 4f9cc524b..7a6eaad4f 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -6,21 +6,17 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -#[allow(unused_imports, deprecated)] -use std::ascii::AsciiExt; - use std::error::Error; use std::fmt::{self, Formatter, Write}; use std::str; -use Url; -use encoding::EncodingOverride; use host::{Host, HostInternal}; use percent_encoding::{ - utf8_percent_encode, percent_encode, - SIMPLE_ENCODE_SET, DEFAULT_ENCODE_SET, USERINFO_ENCODE_SET, QUERY_ENCODE_SET, - PATH_SEGMENT_ENCODE_SET + percent_encode, utf8_percent_encode, DEFAULT_ENCODE_SET, PATH_SEGMENT_ENCODE_SET, + QUERY_ENCODE_SET, SIMPLE_ENCODE_SET, USERINFO_ENCODE_SET, }; +use query_encoding::EncodingOverride; +use Url; define_encode_set! { // The backslash (\) character is treated as a path separator in special URLs @@ -65,17 +61,16 @@ simple_enum_error! { Overflow => "URLs more than 4 GB are not supported", } -#[cfg(feature = "heapsize")] -known_heap_size!(0, ParseError); - impl fmt::Display for ParseError { fn fmt(&self, fmt: &mut Formatter) -> fmt::Result { self.description().fmt(fmt) } } -impl From<::idna::uts46::Errors> for ParseError { - fn from(_: ::idna::uts46::Errors) -> ParseError { ParseError::IdnaError } +impl From<::idna::Errors> for ParseError { + fn from(_: ::idna::Errors) -> ParseError { + ParseError::IdnaError + } } macro_rules! syntax_violation_enum { @@ -117,9 +112,6 @@ syntax_violation_enum! { UnencodedAtSign => "unencoded @ sign in username or password", } -#[cfg(feature = "heapsize")] -known_heap_size!(0, SyntaxViolation); - impl fmt::Display for SyntaxViolation { fn fmt(&self, fmt: &mut Formatter) -> fmt::Result { self.description().fmt(fmt) @@ -168,20 +160,22 @@ pub struct Input<'i> { impl<'i> Input<'i> { pub fn new(input: &'i str) -> Self { - Input::with_log(input, ViolationFn::NoOp) + Input::with_log(input, None) } - pub fn with_log(original_input: &'i str, vfn: ViolationFn) -> Self { + pub fn with_log(original_input: &'i str, vfn: Option<&dyn Fn(SyntaxViolation)>) -> Self { let input = original_input.trim_matches(c0_control_or_space); - if vfn.is_set() { + if let Some(vfn) = vfn { if input.len() < original_input.len() { - vfn.call(SyntaxViolation::C0SpaceIgnored) + vfn(SyntaxViolation::C0SpaceIgnored) } if input.chars().any(|c| matches!(c, '\t' | '\n' | '\r')) { - vfn.call(SyntaxViolation::TabOrNewlineIgnored) + vfn(SyntaxViolation::TabOrNewlineIgnored) } } - Input { chars: input.chars() } + Input { + chars: input.chars(), + } } #[inline] @@ -220,7 +214,7 @@ impl<'i> Input<'i> { remaining = input; count += 1; } else { - return (count, remaining) + return (count, remaining); } } } @@ -232,10 +226,10 @@ impl<'i> Input<'i> { match self.chars.next() { Some(c) => { if !matches!(c, '\t' | '\n' | '\r') { - return Some((c, &utf8[..c.len_utf8()])) + return Some((c, &utf8[..c.len_utf8()])); } } - None => return None + None => return None, } } } @@ -246,14 +240,16 @@ pub trait Pattern { } impl Pattern for char { - fn split_prefix<'i>(self, input: &mut Input<'i>) -> bool { input.next() == Some(self) } + fn split_prefix<'i>(self, input: &mut Input<'i>) -> bool { + input.next() == Some(self) + } } impl<'a> Pattern for &'a str { fn split_prefix<'i>(self, input: &mut Input<'i>) -> bool { for c in self.chars() { if input.next() != Some(c) { - return false + return false; } } true @@ -261,70 +257,25 @@ impl<'a> Pattern for &'a str { } impl bool> Pattern for F { - fn split_prefix<'i>(self, input: &mut Input<'i>) -> bool { input.next().map_or(false, self) } + fn split_prefix<'i>(self, input: &mut Input<'i>) -> bool { + input.next().map_or(false, self) + } } impl<'i> Iterator for Input<'i> { type Item = char; fn next(&mut self) -> Option { - self.chars.by_ref().find(|&c| !matches!(c, '\t' | '\n' | '\r')) - } -} - -/// Wrapper for syntax violation callback functions. -#[derive(Copy, Clone)] -pub enum ViolationFn<'a> { - NewFn(&'a (Fn(SyntaxViolation) + 'a)), - OldFn(&'a (Fn(&'static str) + 'a)), - NoOp -} - -impl<'a> ViolationFn<'a> { - /// Call with a violation. - pub fn call(self, v: SyntaxViolation) { - match self { - ViolationFn::NewFn(f) => f(v), - ViolationFn::OldFn(f) => f(v.description()), - ViolationFn::NoOp => {} - } - } - - /// Call with a violation, if provided test returns true. Avoids - /// the test entirely if `NoOp`. - pub fn call_if(self, v: SyntaxViolation, test: F) - where F: Fn() -> bool - { - match self { - ViolationFn::NewFn(f) => if test() { f(v) }, - ViolationFn::OldFn(f) => if test() { f(v.description()) }, - ViolationFn::NoOp => {} // avoid test - } - } - - /// True if not `NoOp` - pub fn is_set(self) -> bool { - match self { - ViolationFn::NoOp => false, - _ => true - } - } -} - -impl<'a> fmt::Debug for ViolationFn<'a> { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - match *self { - ViolationFn::NewFn(_) => write!(f, "NewFn(Fn(SyntaxViolation))"), - ViolationFn::OldFn(_) => write!(f, "OldFn(Fn(&'static str))"), - ViolationFn::NoOp => write!(f, "NoOp") - } + self.chars + .by_ref() + .find(|&c| !matches!(c, '\t' | '\n' | '\r')) } } pub struct Parser<'a> { pub serialization: String, pub base_url: Option<&'a Url>, - pub query_encoding_override: EncodingOverride, - pub violation_fn: ViolationFn<'a>, + pub query_encoding_override: EncodingOverride<'a>, + pub violation_fn: Option<&'a dyn Fn(SyntaxViolation)>, pub context: Context, } @@ -336,12 +287,26 @@ pub enum Context { } impl<'a> Parser<'a> { + fn log_violation(&self, v: SyntaxViolation) { + if let Some(f) = self.violation_fn { + f(v) + } + } + + fn log_violation_if(&self, v: SyntaxViolation, test: impl FnOnce() -> bool) { + if let Some(f) = self.violation_fn { + if test() { + f(v) + } + } + } + pub fn for_setter(serialization: String) -> Parser<'a> { Parser { serialization: serialization, base_url: None, - query_encoding_override: EncodingOverride::utf8(), - violation_fn: ViolationFn::NoOp, + query_encoding_override: None, + violation_fn: None, context: Context::Setter, } } @@ -350,7 +315,7 @@ impl<'a> Parser<'a> { pub fn parse_url(mut self, input: &str) -> ParseResult { let input = Input::with_log(input, self.violation_fn); if let Ok(remaining) = self.parse_scheme(input.clone()) { - return self.parse_with_scheme(remaining) + return self.parse_with_scheme(remaining); } // No-scheme state @@ -374,18 +339,18 @@ impl<'a> Parser<'a> { pub fn parse_scheme<'i>(&mut self, mut input: Input<'i>) -> Result, ()> { if input.is_empty() || !input.starts_with(ascii_alpha) { - return Err(()) + return Err(()); } debug_assert!(self.serialization.is_empty()); while let Some(c) = input.next() { match c { - 'a'...'z' | 'A'...'Z' | '0'...'9' | '+' | '-' | '.' => { + 'a'..='z' | 'A'..='Z' | '0'..='9' | '+' | '-' | '.' => { self.serialization.push(c.to_ascii_lowercase()) } ':' => return Ok(input), _ => { self.serialization.clear(); - return Err(()) + return Err(()); } } } @@ -399,15 +364,19 @@ impl<'a> Parser<'a> { } fn parse_with_scheme(mut self, input: Input) -> ParseResult { - use SyntaxViolation::{ExpectedFileDoubleSlash, ExpectedDoubleSlash}; + use SyntaxViolation::{ExpectedDoubleSlash, ExpectedFileDoubleSlash}; let scheme_end = to_u32(self.serialization.len())?; let scheme_type = SchemeType::from(&self.serialization); self.serialization.push(':'); match scheme_type { SchemeType::File => { - self.violation_fn.call_if(ExpectedFileDoubleSlash, || !input.starts_with("//")); + self.log_violation_if(ExpectedFileDoubleSlash, || !input.starts_with("//")); let base_file_url = self.base_url.and_then(|base| { - if base.scheme() == "file" { Some(base) } else { None } + if base.scheme() == "file" { + Some(base) + } else { + None + } }); self.serialization.clear(); self.parse_file(input, base_file_url) @@ -416,31 +385,39 @@ impl<'a> Parser<'a> { // special relative or authority state let (slashes_count, remaining) = input.count_matching(|c| matches!(c, '/' | '\\')); if let Some(base_url) = self.base_url { - if slashes_count < 2 && - base_url.scheme() == &self.serialization[..scheme_end as usize] { + if slashes_count < 2 + && base_url.scheme() == &self.serialization[..scheme_end as usize] + { // "Cannot-be-a-base" URLs only happen with "not special" schemes. debug_assert!(!base_url.cannot_be_a_base()); self.serialization.clear(); - return self.parse_relative(input, scheme_type, base_url) + return self.parse_relative(input, scheme_type, base_url); } } // special authority slashes state - self.violation_fn.call_if(ExpectedDoubleSlash, || { - input.clone().take_while(|&c| matches!(c, '/' | '\\')) - .collect::() != "//" + self.log_violation_if(ExpectedDoubleSlash, || { + input + .clone() + .take_while(|&c| matches!(c, '/' | '\\')) + .collect::() + != "//" }); self.after_double_slash(remaining, scheme_type, scheme_end) } - SchemeType::NotSpecial => self.parse_non_special(input, scheme_type, scheme_end) + SchemeType::NotSpecial => self.parse_non_special(input, scheme_type, scheme_end), } } /// Scheme other than file, http, https, ws, ws, ftp, gopher. - fn parse_non_special(mut self, input: Input, scheme_type: SchemeType, scheme_end: u32) - -> ParseResult { + fn parse_non_special( + mut self, + input: Input, + scheme_type: SchemeType, + scheme_end: u32, + ) -> ParseResult { // path or authority state ( if let Some(input) = input.split_prefix("//") { - return self.after_double_slash(input, scheme_type, scheme_end) + return self.after_double_slash(input, scheme_type, scheme_end); } // Anarchist URL (no authority) let path_start = to_u32(self.serialization.len())?; @@ -456,8 +433,16 @@ impl<'a> Parser<'a> { } else { self.parse_cannot_be_a_base_path(input) }; - self.with_query_and_fragment(scheme_end, username_end, host_start, - host_end, host, port, path_start, remaining) + self.with_query_and_fragment( + scheme_end, + username_end, + host_start, + host_end, + host, + port, + path_start, + remaining, + ) } fn parse_file(mut self, input: Input, mut base_file_url: Option<&Url>) -> ParseResult { @@ -496,14 +481,13 @@ impl<'a> Parser<'a> { fragment_start: None, }) } - }, + } Some('?') => { if let Some(base_url) = base_file_url { // Copy everything up to the query string let before_query = match (base_url.query_start, base_url.fragment_start) { (None, None) => &*base_url.serialization, - (Some(i), _) | - (None, Some(i)) => base_url.slice(..i) + (Some(i), _) | (None, Some(i)) => base_url.slice(..i), }; self.serialization.push_str(before_query); let (query_start, fragment_start) = @@ -533,7 +517,7 @@ impl<'a> Parser<'a> { fragment_start: fragment_start, }) } - }, + } Some('#') => { if let Some(base_url) = base_file_url { self.fragment_only(base_url, input) @@ -559,10 +543,10 @@ impl<'a> Parser<'a> { } } Some('/') | Some('\\') => { - self.violation_fn.call_if(Backslash, || first_char == Some('\\')); + self.log_violation_if(Backslash, || first_char == Some('\\')); // file slash state let (next_char, input_after_next_char) = input_after_first_char.split_first(); - self.violation_fn.call_if(Backslash, || next_char == Some('\\')); + self.log_violation_if(Backslash, || next_char == Some('\\')); if matches!(next_char, Some('/') | Some('\\')) { // file host state self.serialization.push_str("file://"); @@ -582,7 +566,8 @@ impl<'a> Parser<'a> { // For file URLs that have a host and whose path starts // with the windows drive letter we just remove the host. if !has_host { - self.serialization.drain(host_start as usize..host_end as usize); + self.serialization + .drain(host_start as usize..host_end as usize); host_end = host_start; host = HostInternal::None; } @@ -613,7 +598,11 @@ impl<'a> Parser<'a> { } } let remaining = self.parse_path( - SchemeType::File, &mut false, path_start, input_after_first_char); + SchemeType::File, + &mut false, + path_start, + input_after_first_char, + ); let (query_start, fragment_start) = self.parse_query_and_fragment(scheme_end, remaining)?; let path_start = path_start as u32; @@ -638,22 +627,32 @@ impl<'a> Parser<'a> { if let Some(base_url) = base_file_url { let before_query = match (base_url.query_start, base_url.fragment_start) { (None, None) => &*base_url.serialization, - (Some(i), _) | - (None, Some(i)) => base_url.slice(..i) + (Some(i), _) | (None, Some(i)) => base_url.slice(..i), }; self.serialization.push_str(before_query); self.pop_path(SchemeType::File, base_url.path_start as usize); let remaining = self.parse_path( - SchemeType::File, &mut true, base_url.path_start as usize, input); + SchemeType::File, + &mut true, + base_url.path_start as usize, + input, + ); self.with_query_and_fragment( - base_url.scheme_end, base_url.username_end, base_url.host_start, - base_url.host_end, base_url.host, base_url.port, base_url.path_start, remaining) + base_url.scheme_end, + base_url.username_end, + base_url.host_start, + base_url.host_end, + base_url.host, + base_url.port, + base_url.path_start, + remaining, + ) } else { self.serialization.push_str("file:///"); let scheme_end = "file".len() as u32; let path_start = "file://".len(); - let remaining = self.parse_path( - SchemeType::File, &mut false, path_start, input); + let remaining = + self.parse_path(SchemeType::File, &mut false, path_start, input); let (query_start, fragment_start) = self.parse_query_and_fragment(scheme_end, remaining)?; let path_start = path_start as u32; @@ -674,8 +673,12 @@ impl<'a> Parser<'a> { } } - fn parse_relative(mut self, input: Input, scheme_type: SchemeType, base_url: &Url) - -> ParseResult { + fn parse_relative( + mut self, + input: Input, + scheme_type: SchemeType, + base_url: &Url, + ) -> ParseResult { // relative state debug_assert!(self.serialization.is_empty()); let (first_char, input_after_first_char) = input.split_first(); @@ -692,13 +695,12 @@ impl<'a> Parser<'a> { fragment_start: None, ..*base_url }) - }, + } Some('?') => { // Copy everything up to the query string let before_query = match (base_url.query_start, base_url.fragment_start) { (None, None) => &*base_url.serialization, - (Some(i), _) | - (None, Some(i)) => base_url.slice(..i) + (Some(i), _) | (None, Some(i)) => base_url.slice(..i), }; self.serialization.push_str(before_query); let (query_start, fragment_start) = @@ -709,49 +711,75 @@ impl<'a> Parser<'a> { fragment_start: fragment_start, ..*base_url }) - }, + } Some('#') => self.fragment_only(base_url, input), Some('/') | Some('\\') => { let (slashes_count, remaining) = input.count_matching(|c| matches!(c, '/' | '\\')); if slashes_count >= 2 { - self.violation_fn.call_if(SyntaxViolation::ExpectedDoubleSlash, || { - input.clone().take_while(|&c| matches!(c, '/' | '\\')) - .collect::() != "//" + self.log_violation_if(SyntaxViolation::ExpectedDoubleSlash, || { + input + .clone() + .take_while(|&c| matches!(c, '/' | '\\')) + .collect::() + != "//" }); let scheme_end = base_url.scheme_end; debug_assert!(base_url.byte_at(scheme_end) == b':'); - self.serialization.push_str(base_url.slice(..scheme_end + 1)); - return self.after_double_slash(remaining, scheme_type, scheme_end) + self.serialization + .push_str(base_url.slice(..scheme_end + 1)); + return self.after_double_slash(remaining, scheme_type, scheme_end); } let path_start = base_url.path_start; debug_assert!(base_url.byte_at(path_start) == b'/'); - self.serialization.push_str(base_url.slice(..path_start + 1)); + self.serialization + .push_str(base_url.slice(..path_start + 1)); let remaining = self.parse_path( - scheme_type, &mut true, path_start as usize, input_after_first_char); + scheme_type, + &mut true, + path_start as usize, + input_after_first_char, + ); self.with_query_and_fragment( - base_url.scheme_end, base_url.username_end, base_url.host_start, - base_url.host_end, base_url.host, base_url.port, base_url.path_start, remaining) + base_url.scheme_end, + base_url.username_end, + base_url.host_start, + base_url.host_end, + base_url.host, + base_url.port, + base_url.path_start, + remaining, + ) } _ => { let before_query = match (base_url.query_start, base_url.fragment_start) { (None, None) => &*base_url.serialization, - (Some(i), _) | - (None, Some(i)) => base_url.slice(..i) + (Some(i), _) | (None, Some(i)) => base_url.slice(..i), }; self.serialization.push_str(before_query); // FIXME spec says just "remove last entry", not the "pop" algorithm self.pop_path(scheme_type, base_url.path_start as usize); - let remaining = self.parse_path( - scheme_type, &mut true, base_url.path_start as usize, input); + let remaining = + self.parse_path(scheme_type, &mut true, base_url.path_start as usize, input); self.with_query_and_fragment( - base_url.scheme_end, base_url.username_end, base_url.host_start, - base_url.host_end, base_url.host, base_url.port, base_url.path_start, remaining) + base_url.scheme_end, + base_url.username_end, + base_url.host_start, + base_url.host_end, + base_url.host, + base_url.port, + base_url.path_start, + remaining, + ) } } } - fn after_double_slash(mut self, input: Input, scheme_type: SchemeType, scheme_end: u32) - -> ParseResult { + fn after_double_slash( + mut self, + input: Input, + scheme_type: SchemeType, + scheme_end: u32, + ) -> ParseResult { self.serialization.push('/'); self.serialization.push('/'); // authority state @@ -762,15 +790,25 @@ impl<'a> Parser<'a> { self.parse_host_and_port(remaining, scheme_end, scheme_type)?; // path state let path_start = to_u32(self.serialization.len())?; - let remaining = self.parse_path_start( - scheme_type, &mut true, remaining); - self.with_query_and_fragment(scheme_end, username_end, host_start, - host_end, host, port, path_start, remaining) + let remaining = self.parse_path_start(scheme_type, &mut true, remaining); + self.with_query_and_fragment( + scheme_end, + username_end, + host_start, + host_end, + host, + port, + path_start, + remaining, + ) } /// Return (username_end, remaining) - fn parse_userinfo<'i>(&mut self, mut input: Input<'i>, scheme_type: SchemeType) - -> ParseResult<(u32, Input<'i>)> { + fn parse_userinfo<'i>( + &mut self, + mut input: Input<'i>, + scheme_type: SchemeType, + ) -> ParseResult<(u32, Input<'i>)> { let mut last_at = None; let mut remaining = input.clone(); let mut char_count = 0; @@ -778,12 +816,12 @@ impl<'a> Parser<'a> { match c { '@' => { if last_at.is_some() { - self.violation_fn.call(SyntaxViolation::UnencodedAtSign) + self.log_violation(SyntaxViolation::UnencodedAtSign) } else { - self.violation_fn.call(SyntaxViolation::EmbeddedCredentials) + self.log_violation(SyntaxViolation::EmbeddedCredentials) } last_at = Some((char_count, remaining.clone())) - }, + } '/' | '?' | '#' => break, '\\' if scheme_type.is_special() => break, _ => (), @@ -793,7 +831,7 @@ impl<'a> Parser<'a> { let (mut userinfo_char_count, remaining) = match last_at { None => return Ok((to_u32(self.serialization.len())?, input)), Some((0, remaining)) => return Ok((to_u32(self.serialization.len())?, remaining)), - Some(x) => x + Some(x) => x, }; let mut username_end = None; @@ -815,7 +853,8 @@ impl<'a> Parser<'a> { has_username = true; } self.check_url_code_point(c, &input); - self.serialization.extend(utf8_percent_encode(utf8_c, USERINFO_ENCODE_SET)); + self.serialization + .extend(utf8_percent_encode(utf8_c, USERINFO_ENCODE_SET)); } } let username_end = match username_end { @@ -828,9 +867,12 @@ impl<'a> Parser<'a> { Ok((username_end, remaining)) } - fn parse_host_and_port<'i>(&mut self, input: Input<'i>, - scheme_end: u32, scheme_type: SchemeType) - -> ParseResult<(u32, HostInternal, Option, Input<'i>)> { + fn parse_host_and_port<'i>( + &mut self, + input: Input<'i>, + scheme_end: u32, + scheme_type: SchemeType, + ) -> ParseResult<(u32, HostInternal, Option, Input<'i>)> { let (host, remaining) = Parser::parse_host(input, scheme_type)?; write!(&mut self.serialization, "{}", host).unwrap(); let host_end = to_u32(self.serialization.len())?; @@ -846,8 +888,10 @@ impl<'a> Parser<'a> { Ok((host_end, host.into(), port, remaining)) } - pub fn parse_host(mut input: Input, scheme_type: SchemeType) - -> ParseResult<(Host, Input)> { + pub fn parse_host( + mut input: Input, + scheme_type: SchemeType, + ) -> ParseResult<(Host, Input)> { // Undo the Input abstraction here to avoid allocating in the common case // where the host part of the input does not contain any tab or newline let input_str = input.chars.as_str(); @@ -871,7 +915,7 @@ impl<'a> Parser<'a> { inside_square_brackets = false; non_ignored_chars += 1 } - _ => non_ignored_chars += 1 + _ => non_ignored_chars += 1, } bytes += c.len_utf8(); } @@ -888,7 +932,7 @@ impl<'a> Parser<'a> { } } if scheme_type.is_special() && host_str.is_empty() { - return Err(ParseError::EmptyHost) + return Err(ParseError::EmptyHost); } if !scheme_type.is_special() { let host = Host::parse_opaque(host_str)?; @@ -898,8 +942,10 @@ impl<'a> Parser<'a> { Ok((host, input)) } - pub fn parse_file_host<'i>(&mut self, input: Input<'i>) - -> ParseResult<(bool, HostInternal, Input<'i>)> { + pub fn parse_file_host<'i>( + &mut self, + input: Input<'i>, + ) -> ParseResult<(bool, HostInternal, Input<'i>)> { // Undo the Input abstraction here to avoid allocating in the common case // where the host part of the input does not contain any tab or newline let input_str = input.chars.as_str(); @@ -928,7 +974,7 @@ impl<'a> Parser<'a> { } } if is_windows_drive_letter(host_str) { - return Ok((false, HostInternal::None, input)) + return Ok((false, HostInternal::None, input)); } let host = if host_str.is_empty() { HostInternal::None @@ -944,23 +990,27 @@ impl<'a> Parser<'a> { Ok((true, host, remaining)) } - pub fn parse_port

(mut input: Input, default_port: P, - context: Context) - -> ParseResult<(Option, Input)> - where P: Fn() -> Option { + pub fn parse_port

( + mut input: Input, + default_port: P, + context: Context, + ) -> ParseResult<(Option, Input)> + where + P: Fn() -> Option, + { let mut port: u32 = 0; let mut has_any_digit = false; while let (Some(c), remaining) = input.split_first() { if let Some(digit) = c.to_digit(10) { port = port * 10 + digit; if port > ::std::u16::MAX as u32 { - return Err(ParseError::InvalidPort) + return Err(ParseError::InvalidPort); } has_any_digit = true; } else if context == Context::UrlParser && !matches!(c, '/' | '\\' | '?' | '#') { - return Err(ParseError::InvalidPort) + return Err(ParseError::InvalidPort); } else { - break + break; } input = remaining; } @@ -971,16 +1021,21 @@ impl<'a> Parser<'a> { Ok((opt_port, input)) } - pub fn parse_path_start<'i>(&mut self, scheme_type: SchemeType, has_host: &mut bool, - mut input: Input<'i>) - -> Input<'i> { + pub fn parse_path_start<'i>( + &mut self, + scheme_type: SchemeType, + has_host: &mut bool, + mut input: Input<'i>, + ) -> Input<'i> { // Path start state match input.split_first() { (Some('/'), remaining) => input = remaining, - (Some('\\'), remaining) => if scheme_type.is_special() { - self.violation_fn.call(SyntaxViolation::Backslash); - input = remaining - }, + (Some('\\'), remaining) => { + if scheme_type.is_special() { + self.log_violation(SyntaxViolation::Backslash); + input = remaining + } + } _ => {} } let path_start = self.serialization.len(); @@ -988,9 +1043,13 @@ impl<'a> Parser<'a> { self.parse_path(scheme_type, has_host, path_start, input) } - pub fn parse_path<'i>(&mut self, scheme_type: SchemeType, has_host: &mut bool, - path_start: usize, mut input: Input<'i>) - -> Input<'i> { + pub fn parse_path<'i>( + &mut self, + scheme_type: SchemeType, + has_host: &mut bool, + path_start: usize, + mut input: Input<'i>, + ) -> Input<'i> { // Relative path state debug_assert!(self.serialization.ends_with('/')); loop { @@ -998,62 +1057,70 @@ impl<'a> Parser<'a> { let mut ends_with_slash = false; loop { let input_before_c = input.clone(); - let (c, utf8_c) = if let Some(x) = input.next_utf8() { x } else { break }; + let (c, utf8_c) = if let Some(x) = input.next_utf8() { + x + } else { + break; + }; match c { '/' if self.context != Context::PathSegmentSetter => { ends_with_slash = true; - break - }, - '\\' if self.context != Context::PathSegmentSetter && - scheme_type.is_special() => { - self.violation_fn.call(SyntaxViolation::Backslash); + break; + } + '\\' if self.context != Context::PathSegmentSetter + && scheme_type.is_special() => + { + self.log_violation(SyntaxViolation::Backslash); ends_with_slash = true; - break - }, + break; + } '?' | '#' if self.context == Context::UrlParser => { input = input_before_c; - break - }, + break; + } _ => { self.check_url_code_point(c, &input); if self.context == Context::PathSegmentSetter { if scheme_type.is_special() { self.serialization.extend(utf8_percent_encode( - utf8_c, SPECIAL_PATH_SEGMENT_ENCODE_SET)); + utf8_c, + SPECIAL_PATH_SEGMENT_ENCODE_SET, + )); } else { - self.serialization.extend(utf8_percent_encode( - utf8_c, PATH_SEGMENT_ENCODE_SET)); + self.serialization + .extend(utf8_percent_encode(utf8_c, PATH_SEGMENT_ENCODE_SET)); } } else { - self.serialization.extend(utf8_percent_encode( - utf8_c, DEFAULT_ENCODE_SET)); + self.serialization + .extend(utf8_percent_encode(utf8_c, DEFAULT_ENCODE_SET)); } } } } match &self.serialization[segment_start..] { - ".." | "%2e%2e" | "%2e%2E" | "%2E%2e" | "%2E%2E" | "%2e." | "%2E." | ".%2e" | ".%2E" => { + ".." | "%2e%2e" | "%2e%2E" | "%2E%2e" | "%2E%2E" | "%2e." | "%2E." | ".%2e" + | ".%2E" => { debug_assert!(self.serialization.as_bytes()[segment_start - 1] == b'/'); - self.serialization.truncate(segment_start - 1); // Truncate "/.." + self.serialization.truncate(segment_start - 1); // Truncate "/.." self.pop_path(scheme_type, path_start); if !self.serialization[path_start..].ends_with('/') { self.serialization.push('/') } - }, + } "." | "%2e" | "%2E" => { self.serialization.truncate(segment_start); - }, + } _ => { - if scheme_type.is_file() && is_windows_drive_letter( - &self.serialization[path_start + 1..] - ) { + if scheme_type.is_file() + && is_windows_drive_letter(&self.serialization[path_start + 1..]) + { if self.serialization.ends_with('|') { self.serialization.pop(); self.serialization.push(':'); } if *has_host { - self.violation_fn.call(SyntaxViolation::FileWithHostAndWindowsDrive); - *has_host = false; // FIXME account for this in callers + self.log_violation(SyntaxViolation::FileWithHostAndWindowsDrive); + *has_host = false; // FIXME account for this in callers } } if ends_with_slash { @@ -1062,7 +1129,7 @@ impl<'a> Parser<'a> { } } if !ends_with_slash { - break + break; } } input @@ -1076,14 +1143,12 @@ impl<'a> Parser<'a> { let segment_start = path_start + slash_position + 1; // Don’t pop a Windows drive letter // FIXME: *normalized* Windows drive letter - if !( - scheme_type.is_file() && - is_windows_drive_letter(&self.serialization[segment_start..]) - ) { + if !(scheme_type.is_file() + && is_windows_drive_letter(&self.serialization[segment_start..])) + { self.serialization.truncate(segment_start); } } - } pub fn parse_cannot_be_a_base_path<'i>(&mut self, mut input: Input<'i>) -> Input<'i> { @@ -1095,20 +1160,26 @@ impl<'a> Parser<'a> { } Some((c, utf8_c)) => { self.check_url_code_point(c, &input); - self.serialization.extend(utf8_percent_encode( - utf8_c, SIMPLE_ENCODE_SET)); + self.serialization + .extend(utf8_percent_encode(utf8_c, SIMPLE_ENCODE_SET)); } - None => return input + None => return input, } } } - fn with_query_and_fragment(mut self, scheme_end: u32, username_end: u32, - host_start: u32, host_end: u32, host: HostInternal, - port: Option, path_start: u32, remaining: Input) - -> ParseResult { - let (query_start, fragment_start) = - self.parse_query_and_fragment(scheme_end, remaining)?; + fn with_query_and_fragment( + mut self, + scheme_end: u32, + username_end: u32, + host_start: u32, + host_end: u32, + host: HostInternal, + port: Option, + path_start: u32, + remaining: Input, + ) -> ParseResult { + let (query_start, fragment_start) = self.parse_query_and_fragment(scheme_end, remaining)?; Ok(Url { serialization: self.serialization, scheme_end: scheme_end, @@ -1119,13 +1190,16 @@ impl<'a> Parser<'a> { port: port, path_start: path_start, query_start: query_start, - fragment_start: fragment_start + fragment_start: fragment_start, }) } /// Return (query_start, fragment_start) - fn parse_query_and_fragment(&mut self, scheme_end: u32, mut input: Input) - -> ParseResult<(Option, Option)> { + fn parse_query_and_fragment( + &mut self, + scheme_end: u32, + mut input: Input, + ) -> ParseResult<(Option, Option)> { let mut query_start = None; match input.next() { Some('#') => {} @@ -1136,11 +1210,11 @@ impl<'a> Parser<'a> { if let Some(remaining) = remaining { input = remaining } else { - return Ok((query_start, None)) + return Ok((query_start, None)); } } None => return Ok((None, None)), - _ => panic!("Programming error. parse_query_and_fragment() called without ? or #") + _ => panic!("Programming error. parse_query_and_fragment() called without ? or #"), } let fragment_start = to_u32(self.serialization.len())?; @@ -1149,14 +1223,13 @@ impl<'a> Parser<'a> { Ok((query_start, Some(fragment_start))) } - pub fn parse_query<'i>(&mut self, scheme_end: u32, mut input: Input<'i>) - -> Option> { - let mut query = String::new(); // FIXME: use a streaming decoder instead + pub fn parse_query<'i>(&mut self, scheme_end: u32, mut input: Input<'i>) -> Option> { + let mut query = String::new(); // FIXME: use a streaming decoder instead let mut remaining = None; while let Some(c) = input.next() { if c == '#' && self.context == Context::UrlParser { remaining = Some(input); - break + break; } else { self.check_url_code_point(c, &input); query.push(c); @@ -1165,10 +1238,11 @@ impl<'a> Parser<'a> { let encoding = match &self.serialization[..scheme_end as usize] { "http" | "https" | "file" | "ftp" | "gopher" => self.query_encoding_override, - _ => EncodingOverride::utf8(), + _ => None, }; - let query_bytes = encoding.encode(query.into()); - self.serialization.extend(percent_encode(&query_bytes, QUERY_ENCODE_SET)); + let query_bytes = ::query_encoding::encode(encoding, &query); + self.serialization + .extend(percent_encode(&query_bytes, QUERY_ENCODE_SET)); remaining } @@ -1178,7 +1252,8 @@ impl<'a> Parser<'a> { None => &*base_url.serialization, }; debug_assert!(self.serialization.is_empty()); - self.serialization.reserve(before_fragment.len() + input.chars.as_str().len()); + self.serialization + .reserve(before_fragment.len() + input.chars.as_str().len()); self.serialization.push_str(before_fragment); self.serialization.push('#'); let next = input.next(); @@ -1193,27 +1268,27 @@ impl<'a> Parser<'a> { pub fn parse_fragment(&mut self, mut input: Input) { while let Some((c, utf8_c)) = input.next_utf8() { - if c == '\0' { - self.violation_fn.call(SyntaxViolation::NullInFragment) + if c == '\0' { + self.log_violation(SyntaxViolation::NullInFragment) } else { self.check_url_code_point(c, &input); - self.serialization.extend(utf8_percent_encode(utf8_c, - SIMPLE_ENCODE_SET)); + self.serialization + .extend(utf8_percent_encode(utf8_c, SIMPLE_ENCODE_SET)); } } } fn check_url_code_point(&self, c: char, input: &Input) { - let vfn = self.violation_fn; - if vfn.is_set() { + if let Some(vfn) = self.violation_fn { if c == '%' { let mut input = input.clone(); if !matches!((input.next(), input.next()), (Some(a), Some(b)) - if is_ascii_hex_digit(a) && is_ascii_hex_digit(b)) { - vfn.call(SyntaxViolation::PercentDecode) + if is_ascii_hex_digit(a) && is_ascii_hex_digit(b)) + { + vfn(SyntaxViolation::PercentDecode) } } else if !is_url_code_point(c) { - vfn.call(SyntaxViolation::NonUrlCodePoint) + vfn(SyntaxViolation::NonUrlCodePoint) } } } @@ -1221,7 +1296,7 @@ impl<'a> Parser<'a> { #[inline] fn is_ascii_hex_digit(c: char) -> bool { - matches!(c, 'a'...'f' | 'A'...'F' | '0'...'9') + matches!(c, 'a'..='f' | 'A'..='F' | '0'..='9') } // Non URL code points: @@ -1234,32 +1309,32 @@ fn is_ascii_hex_digit(c: char) -> bool { #[inline] fn is_url_code_point(c: char) -> bool { matches!(c, - 'a'...'z' | - 'A'...'Z' | - '0'...'9' | + 'a'..='z' | + 'A'..='Z' | + '0'..='9' | '!' | '$' | '&' | '\'' | '(' | ')' | '*' | '+' | ',' | '-' | '.' | '/' | ':' | ';' | '=' | '?' | '@' | '_' | '~' | - '\u{A0}'...'\u{D7FF}' | '\u{E000}'...'\u{FDCF}' | '\u{FDF0}'...'\u{FFFD}' | - '\u{10000}'...'\u{1FFFD}' | '\u{20000}'...'\u{2FFFD}' | - '\u{30000}'...'\u{3FFFD}' | '\u{40000}'...'\u{4FFFD}' | - '\u{50000}'...'\u{5FFFD}' | '\u{60000}'...'\u{6FFFD}' | - '\u{70000}'...'\u{7FFFD}' | '\u{80000}'...'\u{8FFFD}' | - '\u{90000}'...'\u{9FFFD}' | '\u{A0000}'...'\u{AFFFD}' | - '\u{B0000}'...'\u{BFFFD}' | '\u{C0000}'...'\u{CFFFD}' | - '\u{D0000}'...'\u{DFFFD}' | '\u{E1000}'...'\u{EFFFD}' | - '\u{F0000}'...'\u{FFFFD}' | '\u{100000}'...'\u{10FFFD}') + '\u{A0}'..='\u{D7FF}' | '\u{E000}'..='\u{FDCF}' | '\u{FDF0}'..='\u{FFFD}' | + '\u{10000}'..='\u{1FFFD}' | '\u{20000}'..='\u{2FFFD}' | + '\u{30000}'..='\u{3FFFD}' | '\u{40000}'..='\u{4FFFD}' | + '\u{50000}'..='\u{5FFFD}' | '\u{60000}'..='\u{6FFFD}' | + '\u{70000}'..='\u{7FFFD}' | '\u{80000}'..='\u{8FFFD}' | + '\u{90000}'..='\u{9FFFD}' | '\u{A0000}'..='\u{AFFFD}' | + '\u{B0000}'..='\u{BFFFD}' | '\u{C0000}'..='\u{CFFFD}' | + '\u{D0000}'..='\u{DFFFD}' | '\u{E1000}'..='\u{EFFFD}' | + '\u{F0000}'..='\u{FFFFD}' | '\u{100000}'..='\u{10FFFD}') } /// https://url.spec.whatwg.org/#c0-controls-and-space #[inline] fn c0_control_or_space(ch: char) -> bool { - ch <= ' ' // U+0000 to U+0020 + ch <= ' ' // U+0000 to U+0020 } /// https://url.spec.whatwg.org/#ascii-alpha #[inline] pub fn ascii_alpha(ch: char) -> bool { - matches!(ch, 'a'...'z' | 'A'...'Z') + matches!(ch, 'a'..='z' | 'A'..='Z') } #[inline] @@ -1274,13 +1349,11 @@ pub fn to_u32(i: usize) -> ParseResult { /// Wether the scheme is file:, the path has a single segment, and that segment /// is a Windows drive letter fn is_windows_drive_letter(segment: &str) -> bool { - segment.len() == 2 - && starts_with_windows_drive_letter(segment) + segment.len() == 2 && starts_with_windows_drive_letter(segment) } fn starts_with_windows_drive_letter(s: &str) -> bool { - ascii_alpha(s.as_bytes()[0] as char) - && matches!(s.as_bytes()[1], b':' | b'|') + ascii_alpha(s.as_bytes()[0] as char) && matches!(s.as_bytes()[1], b':' | b'|') } fn starts_with_windows_drive_letter_segment(input: &Input) -> bool { diff --git a/src/path_segments.rs b/src/path_segments.rs index f5b7d51f9..459d088db 100644 --- a/src/path_segments.rs +++ b/src/path_segments.rs @@ -6,7 +6,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -use parser::{self, SchemeType, to_u32}; +use parser::{self, to_u32, SchemeType}; use std::str; use Url; @@ -56,7 +56,8 @@ pub fn new(url: &mut Url) -> PathSegmentsMut { impl<'a> Drop for PathSegmentsMut<'a> { fn drop(&mut self) { - self.url.restore_after_path(self.old_after_path_position, &self.after_path) + self.url + .restore_after_path(self.old_after_path_position, &self.after_path) } } @@ -126,8 +127,12 @@ impl<'a> PathSegmentsMut<'a> { /// /// Returns `&mut Self` so that method calls can be chained. pub fn pop(&mut self) -> &mut Self { - let last_slash = self.url.serialization[self.after_first_slash..].rfind('/').unwrap_or(0); - self.url.serialization.truncate(self.after_first_slash + last_slash); + let last_slash = self.url.serialization[self.after_first_slash..] + .rfind('/') + .unwrap_or(0); + self.url + .serialization + .truncate(self.after_first_slash + last_slash); self } @@ -194,7 +199,10 @@ impl<'a> PathSegmentsMut<'a> { /// # run().unwrap(); /// ``` pub fn extend(&mut self, segments: I) -> &mut Self - where I: IntoIterator, I::Item: AsRef { + where + I: IntoIterator, + I::Item: AsRef, + { let scheme_type = SchemeType::from(self.url.scheme()); let path_start = self.url.path_start as usize; self.url.mutate(|parser| { @@ -202,14 +210,18 @@ impl<'a> PathSegmentsMut<'a> { for segment in segments { let segment = segment.as_ref(); if matches!(segment, "." | "..") { - continue + continue; } if parser.serialization.len() > path_start + 1 { parser.serialization.push('/'); } - let mut has_host = true; // FIXME account for this? - parser.parse_path(scheme_type, &mut has_host, path_start, - parser::Input::new(segment)); + let mut has_host = true; // FIXME account for this? + parser.parse_path( + scheme_type, + &mut has_host, + path_start, + parser::Input::new(segment), + ); } }); self diff --git a/src/query_encoding.rs b/src/query_encoding.rs new file mode 100644 index 000000000..76aed15a7 --- /dev/null +++ b/src/query_encoding.rs @@ -0,0 +1,35 @@ +// Copyright 2019 The rust-url developers. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use std::borrow::Cow; + +pub type EncodingOverride<'a> = Option<&'a dyn Fn(&str) -> Cow<[u8]>>; + +pub(crate) fn encode<'a>(encoding_override: EncodingOverride, input: &'a str) -> Cow<'a, [u8]> { + if let Some(o) = encoding_override { + return o(input); + } + input.as_bytes().into() +} + +pub(crate) fn decode_utf8_lossy(input: Cow<[u8]>) -> Cow { + match input { + Cow::Borrowed(bytes) => String::from_utf8_lossy(bytes), + Cow::Owned(bytes) => { + let raw_utf8: *const [u8]; + match String::from_utf8_lossy(&bytes) { + Cow::Borrowed(utf8) => raw_utf8 = utf8.as_bytes(), + Cow::Owned(s) => return s.into(), + } + // from_utf8_lossy returned a borrow of `bytes` unchanged. + debug_assert!(raw_utf8 == &*bytes as *const [u8]); + // Reuse the existing `Vec` allocation. + unsafe { String::from_utf8_unchecked(bytes) }.into() + } + } +} diff --git a/src/quirks.rs b/src/quirks.rs index 0c7aaa894..285ee21b6 100644 --- a/src/quirks.rs +++ b/src/quirks.rs @@ -11,8 +11,8 @@ //! Unless you need to be interoperable with web browsers, //! you probably want to use `Url` method instead. -use {Url, Position, Host, ParseError, idna}; -use parser::{Parser, SchemeType, default_port, Context, Input}; +use parser::{default_port, Context, Input, Parser, SchemeType}; +use {idna, Host, ParseError, Position, Url}; /// https://url.spec.whatwg.org/#dom-url-domaintoascii pub fn domain_to_ascii(domain: &str) -> String { @@ -84,7 +84,11 @@ pub fn password(url: &Url) -> &str { /// Setter for https://url.spec.whatwg.org/#dom-url-password pub fn set_password(url: &mut Url, new_password: &str) -> Result<(), ()> { - url.set_password(if new_password.is_empty() { None } else { Some(new_password) }) + url.set_password(if new_password.is_empty() { + None + } else { + Some(new_password) + }) } /// Getter for https://url.spec.whatwg.org/#dom-url-host @@ -96,7 +100,7 @@ pub fn host(url: &Url) -> &str { /// Setter for https://url.spec.whatwg.org/#dom-url-host pub fn set_host(url: &mut Url, new_host: &str) -> Result<(), ()> { if url.cannot_be_a_base() { - return Err(()) + return Err(()); } let host; let opt_port; @@ -108,12 +112,13 @@ pub fn set_host(url: &mut Url, new_host: &str) -> Result<(), ()> { host = h; opt_port = if let Some(remaining) = remaining.split_prefix(':') { Parser::parse_port(remaining, || default_port(scheme), Context::Setter) - .ok().map(|(port, _remaining)| port) + .ok() + .map(|(port, _remaining)| port) } else { None }; } - Err(_) => return Err(()) + Err(_) => return Err(()), } } url.set_host_internal(host, opt_port); @@ -129,7 +134,7 @@ pub fn hostname(url: &Url) -> &str { /// Setter for https://url.spec.whatwg.org/#dom-url-hostname pub fn set_hostname(url: &mut Url, new_hostname: &str) -> Result<(), ()> { if url.cannot_be_a_base() { - return Err(()) + return Err(()); } let result = Parser::parse_host(Input::new(new_hostname), SchemeType::from(url.scheme())); if let Ok((host, _remaining)) = result { @@ -153,9 +158,13 @@ pub fn set_port(url: &mut Url, new_port: &str) -> Result<(), ()> { // has_host implies !cannot_be_a_base let scheme = url.scheme(); if !url.has_host() || url.host() == Some(Host::Domain("")) || scheme == "file" { - return Err(()) + return Err(()); } - result = Parser::parse_port(Input::new(new_port), || default_port(scheme), Context::Setter) + result = Parser::parse_port( + Input::new(new_port), + || default_port(scheme), + Context::Setter, + ) } if let Ok((new_port, _remaining)) = result { url.set_port_internal(new_port); @@ -168,7 +177,7 @@ pub fn set_port(url: &mut Url, new_port: &str) -> Result<(), ()> { /// Getter for https://url.spec.whatwg.org/#dom-url-pathname #[inline] pub fn pathname(url: &Url) -> &str { - url.path() + url.path() } /// Setter for https://url.spec.whatwg.org/#dom-url-pathname diff --git a/src/slicing.rs b/src/slicing.rs index 926f3c796..2d7f78e6f 100644 --- a/src/slicing.rs +++ b/src/slicing.rs @@ -6,7 +6,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -use std::ops::{Range, RangeFrom, RangeTo, RangeFull, Index}; +use std::ops::{Index, Range, RangeFrom, RangeFull, RangeTo}; use Url; impl Index for Url { @@ -94,7 +94,7 @@ pub enum Position { BeforeQuery, AfterQuery, BeforeFragment, - AfterFragment + AfterFragment, } impl Url { @@ -105,43 +105,49 @@ impl Url { Position::AfterScheme => self.scheme_end as usize, - Position::BeforeUsername => if self.has_authority() { - self.scheme_end as usize + "://".len() - } else { - debug_assert!(self.byte_at(self.scheme_end) == b':'); - debug_assert!(self.scheme_end + ":".len() as u32 == self.username_end); - self.scheme_end as usize + ":".len() - }, + Position::BeforeUsername => { + if self.has_authority() { + self.scheme_end as usize + "://".len() + } else { + debug_assert!(self.byte_at(self.scheme_end) == b':'); + debug_assert!(self.scheme_end + ":".len() as u32 == self.username_end); + self.scheme_end as usize + ":".len() + } + } Position::AfterUsername => self.username_end as usize, - Position::BeforePassword => if self.has_authority() && - self.byte_at(self.username_end) == b':' { - self.username_end as usize + ":".len() - } else { - debug_assert!(self.username_end == self.host_start); - self.username_end as usize - }, - - Position::AfterPassword => if self.has_authority() && - self.byte_at(self.username_end) == b':' { - debug_assert!(self.byte_at(self.host_start - "@".len() as u32) == b'@'); - self.host_start as usize - "@".len() - } else { - debug_assert!(self.username_end == self.host_start); - self.host_start as usize - }, + Position::BeforePassword => { + if self.has_authority() && self.byte_at(self.username_end) == b':' { + self.username_end as usize + ":".len() + } else { + debug_assert!(self.username_end == self.host_start); + self.username_end as usize + } + } + + Position::AfterPassword => { + if self.has_authority() && self.byte_at(self.username_end) == b':' { + debug_assert!(self.byte_at(self.host_start - "@".len() as u32) == b'@'); + self.host_start as usize - "@".len() + } else { + debug_assert!(self.username_end == self.host_start); + self.host_start as usize + } + } Position::BeforeHost => self.host_start as usize, Position::AfterHost => self.host_end as usize, - Position::BeforePort => if self.port.is_some() { - debug_assert!(self.byte_at(self.host_end) == b':'); - self.host_end as usize + ":".len() - } else { - self.host_end as usize - }, + Position::BeforePort => { + if self.port.is_some() { + debug_assert!(self.byte_at(self.host_end) == b':'); + self.host_end as usize + ":".len() + } else { + self.host_end as usize + } + } Position::AfterPort => self.path_start as usize, @@ -179,4 +185,3 @@ impl Url { } } } - diff --git a/tests/data.rs b/tests/data.rs index e9203b1b0..1981814e6 100644 --- a/tests/data.rs +++ b/tests/data.rs @@ -8,29 +8,29 @@ //! Data-driven tests -extern crate rustc_serialize; extern crate rustc_test as test; +extern crate serde_json; extern crate url; -use rustc_serialize::json::{self, Json}; -use url::{Url, quirks}; +use serde_json::Value; +use std::str::FromStr; +use url::{quirks, Url}; fn check_invariants(url: &Url) { url.check_invariants().unwrap(); - #[cfg(feature="serde")] { - extern crate serde_json; + #[cfg(feature = "serde")] + { let bytes = serde_json::to_vec(url).unwrap(); let new_url: Url = serde_json::from_slice(&bytes).unwrap(); assert_eq!(url, &new_url); } } - fn run_parsing(input: &str, base: &str, expected: Result) { let base = match Url::parse(&base) { Ok(base) => base, Err(_) if expected.is_err() => return, - Err(message) => panic!("Error parsing base {:?}: {}", base, message) + Err(message) => panic!("Error parsing base {:?}: {}", base, message), }; let (url, expected) = match (base.join(&input), expected) { (Ok(url), Ok(expected)) => (url, expected), @@ -42,14 +42,18 @@ fn run_parsing(input: &str, base: &str, expected: Result check_invariants(&url); macro_rules! assert_eq { - ($expected: expr, $got: expr) => { - { - let expected = $expected; - let got = $got; - assert!(expected == got, "{:?} != {} {:?} for URL {:?}", - got, stringify!($expected), expected, url); - } - } + ($expected: expr, $got: expr) => {{ + let expected = $expected; + let got = $got; + assert!( + expected == got, + "{:?} != {} {:?} for URL {:?}", + got, + stringify!($expected), + expected, + url + ); + }}; } macro_rules! assert_attributes { @@ -84,46 +88,45 @@ struct ExpectedAttributes { } trait JsonExt { - fn take(&mut self, key: &str) -> Option; - fn object(self) -> json::Object; + fn take_key(&mut self, key: &str) -> Option; fn string(self) -> String; fn take_string(&mut self, key: &str) -> String; } -impl JsonExt for Json { - fn take(&mut self, key: &str) -> Option { +impl JsonExt for Value { + fn take_key(&mut self, key: &str) -> Option { self.as_object_mut().unwrap().remove(key) } - fn object(self) -> json::Object { - if let Json::Object(o) = self { o } else { panic!("Not a Json::Object") } - } - fn string(self) -> String { - if let Json::String(s) = self { s } else { panic!("Not a Json::String") } + if let Value::String(s) = self { + s + } else { + panic!("Not a Value::String") + } } fn take_string(&mut self, key: &str) -> String { - self.take(key).unwrap().string() + self.take_key(key).unwrap().string() } } fn collect_parsing(add_test: &mut F) { // Copied form https://github.com/w3c/web-platform-tests/blob/master/url/ - let mut json = Json::from_str(include_str!("urltestdata.json")) + let mut json = Value::from_str(include_str!("urltestdata.json")) .expect("JSON parse error in urltestdata.json"); for entry in json.as_array_mut().unwrap() { if entry.is_string() { - continue // ignore comments + continue; // ignore comments } let base = entry.take_string("base"); let input = entry.take_string("input"); - let expected = if entry.find("failure").is_some() { + let expected = if entry.take_key("failure").is_some() { Err(()) } else { Ok(ExpectedAttributes { href: entry.take_string("href"), - origin: entry.take("origin").map(Json::string), + origin: entry.take_key("origin").map(|s| s.string()), protocol: entry.take_string("protocol"), username: entry.take_string("username"), password: entry.take_string("password"), @@ -135,24 +138,31 @@ fn collect_parsing(add_test: &mut F) { hash: entry.take_string("hash"), }) }; - add_test(format!("{:?} @ base {:?}", input, base), - test::TestFn::dyn_test_fn(move || run_parsing(&input, &base, expected))); + add_test( + format!("{:?} @ base {:?}", input, base), + test::TestFn::dyn_test_fn(move || run_parsing(&input, &base, expected)), + ); } } -fn collect_setters(add_test: &mut F) where F: FnMut(String, test::TestFn) { - let mut json = Json::from_str(include_str!("setters_tests.json")) +fn collect_setters(add_test: &mut F) +where + F: FnMut(String, test::TestFn), +{ + let mut json = Value::from_str(include_str!("setters_tests.json")) .expect("JSON parse error in setters_tests.json"); macro_rules! setter { ($attr: expr, $setter: ident) => {{ - let mut tests = json.take($attr).unwrap(); + let mut tests = json.take_key($attr).unwrap(); for mut test in tests.as_array_mut().unwrap().drain(..) { - let comment = test.take("comment").map(Json::string).unwrap_or(String::new()); + let comment = test.take_key("comment") + .map(|s| s.string()) + .unwrap_or(String::new()); let href = test.take_string("href"); let new_value = test.take_string("new_value"); let name = format!("{:?}.{} = {:?} {}", href, $attr, new_value, comment); - let mut expected = test.take("expected").unwrap(); + let mut expected = test.take_key("expected").unwrap(); add_test(name, test::TestFn::dyn_test_fn(move || { let mut url = Url::parse(&href).unwrap(); check_invariants(&url); @@ -167,7 +177,7 @@ fn collect_setters(add_test: &mut F) where F: FnMut(String, test::TestFn) { macro_rules! assert_attributes { ($url: expr, $expected: expr, $($attr: ident)+) => { $( - if let Some(value) = $expected.take(stringify!($attr)) { + if let Some(value) = $expected.take_key(stringify!($attr)) { assert_eq!(quirks::$attr(&$url), value.string()) } )+ diff --git a/tests/unit.rs b/tests/unit.rs index 62401c943..9f3764911 100644 --- a/tests/unit.rs +++ b/tests/unit.rs @@ -8,15 +8,15 @@ //! Unit tests -#[macro_use] extern crate url; +#[macro_use] +extern crate percent_encoding; -use std::ascii::AsciiExt; use std::borrow::Cow; use std::cell::{Cell, RefCell}; use std::net::{Ipv4Addr, Ipv6Addr}; use std::path::{Path, PathBuf}; -use url::{Host, HostAndPort, Url, form_urlencoded}; +use url::{form_urlencoded, Host, Url}; #[test] fn size() { @@ -25,7 +25,9 @@ fn size() { } macro_rules! assert_from_file_path { - ($path: expr) => { assert_from_file_path!($path, $path) }; + ($path: expr) => { + assert_from_file_path!($path, $path) + }; ($path: expr, $url_path: expr) => {{ let url = Url::from_file_path(Path::new($path)).unwrap(); assert_eq!(url.host(), None); @@ -34,8 +36,6 @@ macro_rules! assert_from_file_path { }}; } - - #[test] fn new_file_paths() { if cfg!(unix) { @@ -74,7 +74,10 @@ fn new_path_windows_fun() { assert_from_file_path!("C:\\foo\\ba\0r", "/C:/foo/ba%00r"); // Invalid UTF-8 - assert!(Url::parse("file:///C:/foo/ba%80r").unwrap().to_file_path().is_err()); + assert!(Url::parse("file:///C:/foo/ba%80r") + .unwrap() + .to_file_path() + .is_err()); // test windows canonicalized path let path = PathBuf::from(r"\\?\C:\foo\bar"); @@ -86,7 +89,6 @@ fn new_path_windows_fun() { } } - #[test] fn new_directory_paths() { if cfg!(unix) { @@ -100,7 +102,10 @@ fn new_directory_paths() { if cfg!(windows) { assert_eq!(Url::from_directory_path(Path::new("relative")), Err(())); assert_eq!(Url::from_directory_path(Path::new(r"..\relative")), Err(())); - assert_eq!(Url::from_directory_path(Path::new(r"\drive-relative")), Err(())); + assert_eq!( + Url::from_directory_path(Path::new(r"\drive-relative")), + Err(()) + ); assert_eq!(Url::from_directory_path(Path::new(r"\\ucn\")), Err(())); let url = Url::from_directory_path(Path::new(r"C:\foo\bar")).unwrap(); @@ -127,10 +132,16 @@ fn from_str() { #[test] fn parse_with_params() { - let url = Url::parse_with_params("http://testing.com/this?dont=clobberme", - &[("lang", "rust")]).unwrap(); + let url = Url::parse_with_params( + "http://testing.com/this?dont=clobberme", + &[("lang", "rust")], + ) + .unwrap(); - assert_eq!(url.as_str(), "http://testing.com/this?dont=clobberme&lang=rust"); + assert_eq!( + url.as_str(), + "http://testing.com/this?dont=clobberme&lang=rust" + ); } #[test] @@ -145,8 +156,8 @@ fn issue_124() { #[test] fn test_equality() { - use std::hash::{Hash, Hasher}; use std::collections::hash_map::DefaultHasher; + use std::hash::{Hash, Hasher}; fn check_eq(a: &Url, b: &Url) { assert_eq!(a, b); @@ -196,13 +207,29 @@ fn host() { assert_eq!(Url::parse(input).unwrap().host(), Some(host)); } assert_host("http://www.mozilla.org", Host::Domain("www.mozilla.org")); - assert_host("http://1.35.33.49", Host::Ipv4(Ipv4Addr::new(1, 35, 33, 49))); - assert_host("http://[2001:0db8:85a3:08d3:1319:8a2e:0370:7344]", Host::Ipv6(Ipv6Addr::new( - 0x2001, 0x0db8, 0x85a3, 0x08d3, 0x1319, 0x8a2e, 0x0370, 0x7344))); + assert_host( + "http://1.35.33.49", + Host::Ipv4(Ipv4Addr::new(1, 35, 33, 49)), + ); + assert_host( + "http://[2001:0db8:85a3:08d3:1319:8a2e:0370:7344]", + Host::Ipv6(Ipv6Addr::new( + 0x2001, 0x0db8, 0x85a3, 0x08d3, 0x1319, 0x8a2e, 0x0370, 0x7344, + )), + ); assert_host("http://1.35.+33.49", Host::Domain("1.35.+33.49")); - assert_host("http://[::]", Host::Ipv6(Ipv6Addr::new(0, 0, 0, 0, 0, 0, 0, 0))); - assert_host("http://[::1]", Host::Ipv6(Ipv6Addr::new(0, 0, 0, 0, 0, 0, 0, 1))); - assert_host("http://0x1.0X23.0x21.061", Host::Ipv4(Ipv4Addr::new(1, 35, 33, 49))); + assert_host( + "http://[::]", + Host::Ipv6(Ipv6Addr::new(0, 0, 0, 0, 0, 0, 0, 0)), + ); + assert_host( + "http://[::1]", + Host::Ipv6(Ipv6Addr::new(0, 0, 0, 0, 0, 0, 0, 1)), + ); + assert_host( + "http://0x1.0X23.0x21.061", + Host::Ipv4(Ipv4Addr::new(1, 35, 33, 49)), + ); assert_host("http://0x1232131", Host::Ipv4(Ipv4Addr::new(1, 35, 33, 49))); assert_host("http://111", Host::Ipv4(Ipv4Addr::new(0, 0, 0, 111))); assert_host("http://2..2.3", Host::Domain("2..2.3")); @@ -217,15 +244,26 @@ fn host_serialization() { // but https://url.spec.whatwg.org/#concept-ipv6-serializer specifies not to. // Not [::0.0.0.2] / [::ffff:0.0.0.2] - assert_eq!(Url::parse("http://[0::2]").unwrap().host_str(), Some("[::2]")); - assert_eq!(Url::parse("http://[0::ffff:0:2]").unwrap().host_str(), Some("[::ffff:0:2]")); + assert_eq!( + Url::parse("http://[0::2]").unwrap().host_str(), + Some("[::2]") + ); + assert_eq!( + Url::parse("http://[0::ffff:0:2]").unwrap().host_str(), + Some("[::ffff:0:2]") + ); } #[test] fn test_idna() { assert!("http://goșu.ro".parse::().is_ok()); - assert_eq!(Url::parse("http://☃.net/").unwrap().host(), Some(Host::Domain("xn--n3h.net"))); - assert!("https://r2---sn-huoa-cvhl.googlevideo.com/crossdomain.xml".parse::().is_ok()); + assert_eq!( + Url::parse("http://☃.net/").unwrap().host(), + Some(Host::Domain("xn--n3h.net")) + ); + assert!("https://r2---sn-huoa-cvhl.googlevideo.com/crossdomain.xml" + .parse::() + .is_ok()); } #[test] @@ -236,9 +274,18 @@ fn test_serialization() { ("http://@emptyuser.com/", "http://emptyuser.com/"), ("http://:@emptypass.com/", "http://emptypass.com/"), ("http://user@user.com/", "http://user@user.com/"), - ("http://user:pass@userpass.com/", "http://user:pass@userpass.com/"), - ("http://slashquery.com/path/?q=something", "http://slashquery.com/path/?q=something"), - ("http://noslashquery.com/path?q=something", "http://noslashquery.com/path?q=something") + ( + "http://user:pass@userpass.com/", + "http://user:pass@userpass.com/", + ), + ( + "http://slashquery.com/path/?q=something", + "http://slashquery.com/path/?q=something", + ), + ( + "http://noslashquery.com/path?q=something", + "http://noslashquery.com/path?q=something", + ), ]; for &(input, result) in &data { let url = Url::parse(input).unwrap(); @@ -251,11 +298,16 @@ fn test_form_urlencoded() { let pairs: &[(Cow, Cow)] = &[ ("foo".into(), "é&".into()), ("bar".into(), "".into()), - ("foo".into(), "#".into()) + ("foo".into(), "#".into()), ]; - let encoded = form_urlencoded::Serializer::new(String::new()).extend_pairs(pairs).finish(); + let encoded = form_urlencoded::Serializer::new(String::new()) + .extend_pairs(pairs) + .finish(); assert_eq!(encoded, "foo=%C3%A9%26&bar=&foo=%23"); - assert_eq!(form_urlencoded::parse(encoded.as_bytes()).collect::>(), pairs.to_vec()); + assert_eq!( + form_urlencoded::parse(encoded.as_bytes()).collect::>(), + pairs.to_vec() + ); } #[test] @@ -269,44 +321,14 @@ fn test_form_serialize() { } #[test] -fn form_urlencoded_custom_encoding_override() { +fn form_urlencoded_encoding_override() { let encoded = form_urlencoded::Serializer::new(String::new()) - .custom_encoding_override(|s| s.as_bytes().to_ascii_uppercase().into()) + .encoding_override(Some(&|s| s.as_bytes().to_ascii_uppercase().into())) .append_pair("foo", "bar") .finish(); assert_eq!(encoded, "FOO=BAR"); } -#[test] -fn host_and_port_display() { - assert_eq!( - format!( - "{}", - HostAndPort{ host: Host::Domain("www.mozilla.org"), port: 80} - ), - "www.mozilla.org:80" - ); - assert_eq!( - format!( - "{}", - HostAndPort::{ host: Host::Ipv4(Ipv4Addr::new(1, 35, 33, 49)), port: 65535 } - ), - "1.35.33.49:65535" - ); - assert_eq!( - format!( - "{}", - HostAndPort::{ - host: Host::Ipv6(Ipv6Addr::new( - 0x2001, 0x0db8, 0x85a3, 0x08d3, 0x1319, 0x8a2e, 0x0370, 0x7344 - )), - port: 1337 - }) - , - "[2001:db8:85a3:8d3:1319:8a2e:370:7344]:1337" - ) -} - #[test] /// https://github.com/servo/rust-url/issues/61 fn issue_61() { @@ -323,8 +345,13 @@ fn issue_61() { fn issue_197() { let mut url = Url::from_file_path("/").expect("Failed to parse path"); url.check_invariants().unwrap(); - assert_eq!(url, Url::parse("file:///").expect("Failed to parse path + protocol")); - url.path_segments_mut().expect("path_segments_mut").pop_if_empty(); + assert_eq!( + url, + Url::parse("file:///").expect("Failed to parse path + protocol") + ); + url.path_segments_mut() + .expect("path_segments_mut") + .pop_if_empty(); } #[test] @@ -346,12 +373,19 @@ fn append_trailing_slash() { /// https://github.com/servo/rust-url/issues/227 fn extend_query_pairs_then_mutate() { let mut url: Url = "http://localhost:6767/foo/bar".parse().unwrap(); - url.query_pairs_mut().extend_pairs(vec![ ("auth", "my-token") ].into_iter()); + url.query_pairs_mut() + .extend_pairs(vec![("auth", "my-token")].into_iter()); url.check_invariants().unwrap(); - assert_eq!(url.to_string(), "http://localhost:6767/foo/bar?auth=my-token"); + assert_eq!( + url.to_string(), + "http://localhost:6767/foo/bar?auth=my-token" + ); url.path_segments_mut().unwrap().push("some_other_path"); url.check_invariants().unwrap(); - assert_eq!(url.to_string(), "http://localhost:6767/foo/bar/some_other_path?auth=my-token"); + assert_eq!( + url.to_string(), + "http://localhost:6767/foo/bar/some_other_path?auth=my-token" + ); } #[test] @@ -388,7 +422,10 @@ fn test_set_host() { #[test] // https://github.com/servo/rust-url/issues/166 fn test_leading_dots() { - assert_eq!(Host::parse(".org").unwrap(), Host::Domain(".org".to_owned())); + assert_eq!( + Host::parse(".org").unwrap(), + Host::Domain(".org".to_owned()) + ); assert_eq!(Url::parse("file://./foo").unwrap().domain(), Some(".")); } @@ -396,17 +433,20 @@ fn test_leading_dots() { // inside both a module and a function #[test] fn define_encode_set_scopes() { - use url::percent_encoding::{utf8_percent_encode, SIMPLE_ENCODE_SET}; + use percent_encoding::{utf8_percent_encode, SIMPLE_ENCODE_SET}; define_encode_set! { /// This encode set is used in the URL parser for query strings. pub QUERY_ENCODE_SET = [SIMPLE_ENCODE_SET] | {' ', '"', '#', '<', '>'} } - assert_eq!(utf8_percent_encode("foo bar", QUERY_ENCODE_SET).collect::(), "foo%20bar"); + assert_eq!( + utf8_percent_encode("foo bar", QUERY_ENCODE_SET).collect::(), + "foo%20bar" + ); mod m { - use url::percent_encoding::{utf8_percent_encode, SIMPLE_ENCODE_SET}; + use percent_encoding::{utf8_percent_encode, SIMPLE_ENCODE_SET}; define_encode_set! { /// This encode set is used in the URL parser for query strings. @@ -414,7 +454,10 @@ fn define_encode_set_scopes() { } pub fn test() { - assert_eq!(utf8_percent_encode("foo bar", QUERY_ENCODE_SET).collect::(), "foo%20bar"); + assert_eq!( + utf8_percent_encode("foo bar", QUERY_ENCODE_SET).collect::(), + "foo%20bar" + ); } } @@ -424,8 +467,8 @@ fn define_encode_set_scopes() { #[test] /// https://github.com/servo/rust-url/issues/302 fn test_origin_hash() { - use std::hash::{Hash,Hasher}; use std::collections::hash_map::DefaultHasher; + use std::hash::{Hash, Hasher}; fn hash(value: &T) -> u64 { let mut hasher = DefaultHasher::new(); @@ -444,7 +487,9 @@ fn test_origin_hash() { Url::parse("ftp://example.net").unwrap().origin(), Url::parse("file://example.net").unwrap().origin(), Url::parse("http://user@example.net/").unwrap().origin(), - Url::parse("http://user:pass@example.net/").unwrap().origin(), + Url::parse("http://user:pass@example.net/") + .unwrap() + .origin(), ]; for origin_to_compare in &origins_to_compare { @@ -466,7 +511,7 @@ fn test_origin_hash() { #[test] fn test_windows_unc_path() { if !cfg!(windows) { - return + return; } let url = Url::from_file_path(Path::new(r"\\host\share\path\file.txt")).unwrap(); @@ -490,28 +535,14 @@ fn test_windows_unc_path() { assert!(url.is_err()); } -// Test the now deprecated log_syntax_violation method for backward -// compatibility -#[test] -#[allow(deprecated)] -fn test_old_log_violation_option() { - let violation = Cell::new(None); - let url = Url::options() - .log_syntax_violation(Some(&|s| violation.set(Some(s.to_owned())))) - .parse("http:////mozilla.org:42").unwrap(); - assert_eq!(url.port(), Some(42)); - - let violation = violation.take(); - assert_eq!(violation, Some("expected //".to_string())); -} - #[test] fn test_syntax_violation_callback() { use url::SyntaxViolation::*; let violation = Cell::new(None); let url = Url::options() .syntax_violation_callback(Some(&|v| violation.set(Some(v)))) - .parse("http:////mozilla.org:42").unwrap(); + .parse("http:////mozilla.org:42") + .unwrap(); assert_eq!(url.port(), Some(42)); let v = violation.take().unwrap(); @@ -527,13 +558,15 @@ fn test_syntax_violation_callback_lifetimes() { let url = Url::options() .syntax_violation_callback(Some(&vfn)) - .parse("http:////mozilla.org:42").unwrap(); + .parse("http:////mozilla.org:42") + .unwrap(); assert_eq!(url.port(), Some(42)); assert_eq!(violation.take(), Some(ExpectedDoubleSlash)); let url = Url::options() .syntax_violation_callback(Some(&vfn)) - .parse("http://mozilla.org\\path").unwrap(); + .parse("http://mozilla.org\\path") + .unwrap(); assert_eq!(url.path(), "/path"); assert_eq!(violation.take(), Some(Backslash)); } @@ -544,13 +577,11 @@ fn test_options_reuse() { let violations = RefCell::new(Vec::new()); let vfn = |v| violations.borrow_mut().push(v); - let options = Url::options() - .syntax_violation_callback(Some(&vfn)); + let options = Url::options().syntax_violation_callback(Some(&vfn)); let url = options.parse("http:////mozilla.org").unwrap(); let options = options.base_url(Some(&url)); let url = options.parse("/sub\\path").unwrap(); assert_eq!(url.as_str(), "http://mozilla.org/sub/path"); - assert_eq!(*violations.borrow(), - vec!(ExpectedDoubleSlash, Backslash)); + assert_eq!(*violations.borrow(), vec!(ExpectedDoubleSlash, Backslash)); } diff --git a/url_serde/Cargo.toml b/url_serde/Cargo.toml deleted file mode 100644 index ad57e79b1..000000000 --- a/url_serde/Cargo.toml +++ /dev/null @@ -1,23 +0,0 @@ -[package] - -name = "url_serde" -version = "0.2.0" -authors = ["The rust-url developers"] - -description = "Serde support for URL types" -documentation = "https://docs.rs/url_serde/" -repository = "https://github.com/servo/rust-url" -readme = "README.md" -keywords = ["url", "serde"] -license = "MIT/Apache-2.0" - -[dependencies] -serde = "1.0" -url = {version = "1.0.0", path = ".."} - -[dev-dependencies] -serde_json = "1.0" -serde_derive = "1.0" - -[lib] -doctest = false diff --git a/url_serde/LICENSE-APACHE b/url_serde/LICENSE-APACHE deleted file mode 120000 index 965b606f3..000000000 --- a/url_serde/LICENSE-APACHE +++ /dev/null @@ -1 +0,0 @@ -../LICENSE-APACHE \ No newline at end of file diff --git a/url_serde/LICENSE-MIT b/url_serde/LICENSE-MIT deleted file mode 120000 index 76219eb72..000000000 --- a/url_serde/LICENSE-MIT +++ /dev/null @@ -1 +0,0 @@ -../LICENSE-MIT \ No newline at end of file diff --git a/url_serde/README.md b/url_serde/README.md deleted file mode 100644 index dea503946..000000000 --- a/url_serde/README.md +++ /dev/null @@ -1,11 +0,0 @@ -Serde support for rust-url types -================================ - -This crate provides wrappers and convenience functions to make `rust-url` and `serde` -work hand in hand. - -Version `0.2` or newer of this crate offer support for `serde 1.0`. -Version `0.1` of this crate offer support for `serde 0.9`. -Versions of `serde` older than `0.9` are natively supported by `rust-url` crate directly. - -For more details, see the crate [documentation](https://docs.rs/url_serde/). \ No newline at end of file diff --git a/url_serde/src/lib.rs b/url_serde/src/lib.rs deleted file mode 100644 index ce8c47466..000000000 --- a/url_serde/src/lib.rs +++ /dev/null @@ -1,421 +0,0 @@ -// Copyright 2017 The rust-url developers. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -/*! - -This crate provides wrappers and convenience functions to make rust-url -and Serde work hand in hand. - -The supported types are: - -* `url::Url` - -# How do I use a data type with a `Url` member with Serde? - -Use the serde attributes `deserialize_with` and `serialize_with`. - -``` -#[derive(serde::Serialize, serde::Deserialize)] -struct MyStruct { - #[serde(serialize_with = "serialize")] - url: Url, -} -``` - -# How do I use a data type with an unnamed `Url` member with serde? - -Same problem, same solution. - -``` -#[derive(serde::Serialize, serde::Deserialize)] -enum MyEnum { - A(#[serde(with = "url_serde")] Url, OtherType), -} -``` - -# How do I encode a `Url` value with `serde_json::to_string`? - -Use the `Ser` wrapper. - -``` -serde_json::to_string(&Ser::new(&url)) -``` - -# How do I decode a `Url` value with `serde_json::parse`? - -Use the `De` wrapper. - -``` -serde_json::from_str(r"http:://www.rust-lang.org").map(De::into_inner) -``` - -# How do I send `Url` values as part of an IPC channel? - -Use the `Serde` wrapper. It implements `Deref` and `DerefMut` for convenience. - -``` -ipc::channel::>() -``` -*/ - -#![deny(missing_docs)] -#![deny(unsafe_code)] - -extern crate serde; -#[cfg(test)] #[macro_use] extern crate serde_derive; -#[cfg(test)] extern crate serde_json; -extern crate url; - -use serde::{Deserialize, Serialize, Serializer, Deserializer}; -use std::cmp::PartialEq; -use std::error::Error; -use std::fmt; -use std::io::Write; -use std::ops::{Deref, DerefMut}; -use std::str; -use url::{Url, Host}; - -/// Serialises `value` with a given serializer. -/// -/// This is useful to serialize `rust-url` types used in structure fields or -/// tuple members with `#[serde(serialize_with = "url_serde::serialize")]`. -pub fn serialize(value: &T, serializer: S) -> Result - where S: Serializer, for<'a> Ser<'a, T>: Serialize -{ - Ser::new(value).serialize(serializer) -} - -/// A wrapper to serialize `rust-url` types. -/// -/// This is useful with functions such as `serde_json::to_string`. -/// -/// Values of this type can only be passed to the `serde::Serialize` trait. -#[derive(Debug)] -pub struct Ser<'a, T: 'a>(&'a T); - -impl<'a, T> Ser<'a, T> where Ser<'a, T>: Serialize { - /// Returns a new `Ser` wrapper. - #[inline(always)] - pub fn new(value: &'a T) -> Self { - Ser(value) - } -} - -/// Serializes this URL into a `serde` stream. -impl<'a> Serialize for Ser<'a, Url> { - fn serialize(&self, serializer: S) -> Result where S: Serializer { - serializer.serialize_str(self.0.as_str()) - } -} - -/// Serializes this Option into a `serde` stream. -impl<'a> Serialize for Ser<'a, Option> { - fn serialize(&self, serializer: S) -> Result where S: Serializer { - if let Some(url) = self.0.as_ref() { - serializer.serialize_some(url.as_str()) - } else { - serializer.serialize_none() - } - } -} - -impl<'a, String> Serialize for Ser<'a, Host> where String: AsRef { - fn serialize(&self, serializer: S) -> Result where S: Serializer { - match *self.0 { - Host::Domain(ref s) => serializer.serialize_str(s.as_ref()), - Host::Ipv4(_) | Host::Ipv6(_) => { - // max("101.102.103.104".len(), - // "[1000:1002:1003:1004:1005:1006:101.102.103.104]".len()) - const MAX_LEN: usize = 47; - let mut buffer = [0; MAX_LEN]; - serializer.serialize_str(display_into_buffer(&self.0, &mut buffer)) - } - } - } -} - -/// Like .to_string(), but doesn’t allocate memory for a `String`. -/// -/// Panics if `buffer` is too small. -fn display_into_buffer<'a, T: fmt::Display>(value: &T, buffer: &'a mut [u8]) -> &'a str { - let remaining_len; - { - let mut remaining = &mut *buffer; - write!(remaining, "{}", value).unwrap(); - remaining_len = remaining.len() - } - let written_len = buffer.len() - remaining_len; - let written = &buffer[..written_len]; - - // write! only provides std::fmt::Formatter to Display implementations, - // which has methods write_str and write_char but no method to write arbitrary bytes. - // Therefore, `written` is well-formed in UTF-8. - #[allow(unsafe_code)] - unsafe { - str::from_utf8_unchecked(written) - } -} - -/// Deserialises a `T` value with a given deserializer. -/// -/// This is useful to deserialize Url types used in structure fields or -/// tuple members with `#[serde(deserialize_with = "url_serde::deserialize")]`. -pub fn deserialize<'de, T, D>(deserializer: D) -> Result - where D: Deserializer<'de>, De: Deserialize<'de> -{ - De::deserialize(deserializer).map(De::into_inner) -} - -/// A wrapper to deserialize `rust-url` types. -/// -/// This is useful with functions such as `serde_json::from_str`. -/// -/// Values of this type can only be obtained through -/// the `serde::Deserialize` trait. -#[derive(Debug)] -pub struct De(T); - -impl<'de, T> De where De: serde::Deserialize<'de> { - /// Consumes this wrapper, returning the deserialized value. - #[inline(always)] - pub fn into_inner(self) -> T { - self.0 - } -} - -/// Deserializes this URL from a `serde` stream. -impl<'de> Deserialize<'de> for De { - fn deserialize(deserializer: D) -> Result where D: Deserializer<'de> { - let string_representation: String = Deserialize::deserialize(deserializer)?; - Url::parse(&string_representation).map(De).map_err(|err| { - serde::de::Error::custom(err.description()) - }) - } -} - -/// Deserializes this Option from a `serde` stream. -impl<'de> Deserialize<'de> for De> { - fn deserialize(deserializer: D) -> Result where D: Deserializer<'de> { - let option_representation: Option = Deserialize::deserialize(deserializer)?; - if let Some(s) = option_representation { - return Url::parse(&s) - .map(Some) - .map(De) - .map_err(|err| {serde::de::Error::custom(err.description())}); - } - Ok(De(None)) - - } -} - -impl<'de> Deserialize<'de> for De { - fn deserialize(deserializer: D) -> Result where D: Deserializer<'de> { - let string_representation: String = Deserialize::deserialize(deserializer)?; - Host::parse(&string_representation).map(De).map_err(|err| { - serde::de::Error::custom(err.description()) - }) - } -} - -/// A convenience wrapper to be used as a type parameter, for example when -/// a `Vec` or an `HashMap` need to be passed to serde. -#[derive(Clone, Eq, Hash, PartialEq)] -pub struct Serde(pub T); - -/// A convenience type alias for Serde. -pub type SerdeUrl = Serde; - -impl<'de, T> Serde -where De: Deserialize<'de>, for<'a> Ser<'a, T>: Serialize -{ - /// Consumes this wrapper, returning the inner value. - #[inline(always)] - pub fn into_inner(self) -> T { - self.0 - } -} - -impl<'de, T> fmt::Debug for Serde -where T: fmt::Debug, De: Deserialize<'de>, for<'a> Ser<'a, T>: Serialize -{ - fn fmt(&self, formatter: &mut fmt::Formatter) -> Result<(), fmt::Error> { - self.0.fmt(formatter) - } -} - -impl<'de, T> Deref for Serde -where De: Deserialize<'de>, for<'a> Ser<'a, T>: Serialize -{ - type Target = T; - - fn deref(&self) -> &T { - &self.0 - } -} - -impl<'de, T> DerefMut for Serde -where De: Deserialize<'de>, for<'a> Ser<'a, T>: Serialize -{ - fn deref_mut(&mut self) -> &mut T { - &mut self.0 - } -} - -impl<'de, T: PartialEq> PartialEq for Serde -where De: Deserialize<'de>, for<'a> Ser<'a, T>: Serialize -{ - fn eq(&self, other: &T) -> bool { - self.0 == *other - } -} - -impl<'de, T> Deserialize<'de> for Serde -where De: Deserialize<'de>, for<'a> Ser<'a, T>: Serialize -{ - fn deserialize(deserializer: D) -> Result - where D: Deserializer<'de> - { - De::deserialize(deserializer).map(De::into_inner).map(Serde) - } -} - -impl<'de, T> Serialize for Serde -where De: Deserialize<'de>, for<'a> Ser<'a, T>: Serialize -{ - fn serialize(&self, serializer: S) -> Result - where S: Serializer - { - Ser(&self.0).serialize(serializer) - } -} - -#[test] -fn test_ser_de_url() { - let url = Url::parse("http://www.test.com/foo/bar?$param=bazz").unwrap(); - let s = serde_json::to_string(&Ser::new(&url)).unwrap(); - let new_url: Url = serde_json::from_str(&s).map(De::into_inner).unwrap(); - assert_eq!(url, new_url); -} - -#[test] -fn test_derive_deserialize_with_for_url() { - #[derive(Deserialize, Debug, Eq, PartialEq)] - struct Test { - #[serde(deserialize_with = "deserialize", rename = "_url_")] - url: Url - } - - let url_str = "http://www.test.com/foo/bar?$param=bazz"; - - let expected = Test { - url: Url::parse(url_str).unwrap() - }; - let json_string = format!(r#"{{"_url_": "{}"}}"#, url_str); - let got: Test = serde_json::from_str(&json_string).unwrap(); - assert_eq!(expected, got); - -} - -#[test] -fn test_derive_deserialize_with_for_option_url() { - #[derive(Deserialize, Debug, Eq, PartialEq)] - struct Test { - #[serde(deserialize_with = "deserialize", rename = "_url_")] - url: Option - } - - let url_str = "http://www.test.com/foo/bar?$param=bazz"; - - let expected = Test { - url: Some(Url::parse(url_str).unwrap()) - }; - let json_string = format!(r#"{{"_url_": "{}"}}"#, url_str); - let got: Test = serde_json::from_str(&json_string).unwrap(); - assert_eq!(expected, got); - - let expected = Test { - url: None - }; - let json_string = r#"{"_url_": null}"#; - let got: Test = serde_json::from_str(&json_string).unwrap(); - assert_eq!(expected, got); -} - -#[test] -fn test_derive_serialize_with_for_url() { - #[derive(Serialize, Debug, Eq, PartialEq)] - struct Test { - #[serde(serialize_with = "serialize", rename = "_url_")] - url: Url - } - - let url_str = "http://www.test.com/foo/bar?$param=bazz"; - - let expected = format!(r#"{{"_url_":"{}"}}"#, url_str); - let input = Test {url: Url::parse(url_str).unwrap()}; - let got = serde_json::to_string(&input).unwrap(); - assert_eq!(expected, got); -} - -#[test] -fn test_derive_serialize_with_for_option_url() { - #[derive(Serialize, Debug, Eq, PartialEq)] - struct Test { - #[serde(serialize_with = "serialize", rename = "_url_")] - url: Option - } - - let url_str = "http://www.test.com/foo/bar?$param=bazz"; - - let expected = format!(r#"{{"_url_":"{}"}}"#, url_str); - let input = Test {url: Some(Url::parse(url_str).unwrap())}; - let got = serde_json::to_string(&input).unwrap(); - assert_eq!(expected, got); - - let expected = format!(r#"{{"_url_":null}}"#); - let input = Test {url: None}; - let got = serde_json::to_string(&input).unwrap(); - assert_eq!(expected, got); -} - -#[test] -fn test_derive_with_for_url() { - #[derive(Serialize, Deserialize, Debug, Eq, PartialEq)] - struct Test { - #[serde(with = "self", rename = "_url_")] - url: Url - } - - let url_str = "http://www.test.com/foo/bar?$param=bazz"; - let json_string = format!(r#"{{"_url_":"{}"}}"#, url_str); - - // test deserialization - let expected = Test { - url: Url::parse(url_str).unwrap() - }; - let got: Test = serde_json::from_str(&json_string).unwrap(); - assert_eq!(expected, got); - - // test serialization - let input = Test {url: Url::parse(url_str).unwrap()}; - let got = serde_json::to_string(&input).unwrap(); - assert_eq!(json_string, got); -} - -#[test] -fn test_host() { - for host in &[ - Host::Domain("foo.com".to_owned()), - Host::Ipv4("127.0.0.1".parse().unwrap()), - Host::Ipv6("::1".parse().unwrap()), - ] { - let json = serde_json::to_string(&Ser(host)).unwrap(); - let de: De = serde_json::from_str(&json).unwrap(); - assert_eq!(de.into_inner(), *host) - } -}