diff --git a/url/src/host.rs b/url/src/host.rs index 8439664a6..410f42723 100644 --- a/url/src/host.rs +++ b/url/src/host.rs @@ -111,6 +111,7 @@ impl Host { { return Err(ParseError::InvalidDomainCharacter); } + if let Some(address) = parse_ipv4addr(&domain)? { Ok(Host::Ipv4(address)) } else { diff --git a/url/src/lib.rs b/url/src/lib.rs index 804704a46..5c3104ab2 100644 --- a/url/src/lib.rs +++ b/url/src/lib.rs @@ -1693,7 +1693,11 @@ impl Url { let old_suffix_pos = if opt_new_port.is_some() { self.path_start } else { - self.host_end + if self.has_authority() { + self.host_end + } else { + self.path_start + } }; let suffix = self.slice(old_suffix_pos..).to_owned(); self.serialization.truncate(self.host_start as usize); diff --git a/url/src/parser.rs b/url/src/parser.rs index 20ff9f6ef..43db1dc32 100644 --- a/url/src/parser.rs +++ b/url/src/parser.rs @@ -9,6 +9,7 @@ use std::error::Error; use std::fmt::{self, Formatter, Write}; use std::str; +use std::convert::TryInto; use form_urlencoded::EncodingOverride; use host::{Host, HostInternal}; @@ -484,10 +485,11 @@ impl<'a> Parser<'a> { let host_end = path_start; let host = HostInternal::None; let port = None; + let mut path_start : usize = path_start.try_into().unwrap(); let remaining = if let Some(input) = input.split_prefix('/') { - let path_start = self.serialization.len(); + path_start = self.serialization.len().try_into().unwrap(); self.serialization.push('/'); - self.parse_path(scheme_type, &mut false, path_start, input) + self.parse_path(scheme_type, &mut false, &mut path_start, input) } else { self.parse_cannot_be_a_base_path(input) }; @@ -499,7 +501,7 @@ impl<'a> Parser<'a> { host_end, host, port, - path_start, + path_start as u32, remaining, ) } @@ -531,9 +533,9 @@ impl<'a> Parser<'a> { let remaining = if path_start { self.parse_path_start(SchemeType::File, &mut has_host, remaining) } else { - let path_start = self.serialization.len(); + let mut path_start = self.serialization.len(); self.serialization.push('/'); - self.parse_path(SchemeType::File, &mut has_host, path_start, remaining) + self.parse_path(SchemeType::File, &mut has_host, &mut path_start, remaining) }; // For file URLs that have a host and whose path starts @@ -588,8 +590,9 @@ impl<'a> Parser<'a> { input_after_first_char }; + let mut path_start = host_end; let remaining = - self.parse_path(SchemeType::File, &mut false, host_end, parse_path_input); + self.parse_path(SchemeType::File, &mut false, &mut path_start, parse_path_input); let host_start = host_start as u32; @@ -605,7 +608,7 @@ impl<'a> Parser<'a> { host_end, host, port: None, - path_start: host_end, + path_start: path_start as u32, query_start, fragment_start, }); @@ -651,10 +654,11 @@ impl<'a> Parser<'a> { }; self.serialization.push_str(before_query); self.shorten_path(SchemeType::File, base_url.path_start as usize); + let mut path_start : usize = base_url.path_start as usize; let remaining = self.parse_path( SchemeType::File, &mut true, - base_url.path_start as usize, + &mut path_start, input, ); self.with_query_and_fragment( @@ -665,15 +669,15 @@ impl<'a> Parser<'a> { base_url.host_end, base_url.host, base_url.port, - base_url.path_start, + path_start as u32, remaining, ) } else { self.serialization.push_str("file:///"); let scheme_end = "file".len() as u32; - let path_start = "file://".len(); + let mut path_start = "file://".len(); let remaining = - self.parse_path(SchemeType::File, &mut false, path_start, input); + self.parse_path(SchemeType::File, &mut false, &mut path_start, input); let (query_start, fragment_start) = self.parse_query_and_fragment(SchemeType::File, scheme_end, remaining)?; let path_start = path_start as u32; @@ -695,8 +699,8 @@ impl<'a> Parser<'a> { } else { self.serialization.push_str("file:///"); let scheme_end = "file".len() as u32; - let path_start = "file://".len(); - let remaining = self.parse_path(SchemeType::File, &mut false, path_start, input); + let mut path_start = "file://".len(); + let remaining = self.parse_path(SchemeType::File, &mut false, &mut path_start, input); let (query_start, fragment_start) = self.parse_query_and_fragment(SchemeType::File, scheme_end, remaining)?; let path_start = path_start as u32; @@ -774,13 +778,13 @@ impl<'a> Parser<'a> { } return self.after_double_slash(remaining, scheme_type, scheme_end); } - let path_start = base_url.path_start; - self.serialization.push_str(base_url.slice(..path_start)); + let mut path_start : usize = base_url.path_start as usize; + self.serialization.push_str(base_url.slice(..path_start as u32)); self.serialization.push_str("/"); let remaining = self.parse_path( scheme_type, &mut true, - path_start as usize, + &mut path_start, input_after_first_char, ); self.with_query_and_fragment( @@ -810,15 +814,16 @@ impl<'a> Parser<'a> { { self.serialization.push('/'); } + let mut path_start = base_url.path_start as usize; let remaining = match input.split_first() { (Some('/'), remaining) => self.parse_path( scheme_type, &mut true, - base_url.path_start as usize, + &mut path_start, remaining, ), _ => { - self.parse_path(scheme_type, &mut true, base_url.path_start as usize, input) + self.parse_path(scheme_type, &mut true, &mut path_start, input) } }; self.with_query_and_fragment( @@ -1141,7 +1146,7 @@ impl<'a> Parser<'a> { has_host: &mut bool, input: Input<'i>, ) -> Input<'i> { - let path_start = self.serialization.len(); + let mut path_start = self.serialization.len(); let (maybe_c, remaining) = input.split_first(); // If url is special, then: if scheme_type.is_special() { @@ -1154,10 +1159,10 @@ impl<'a> Parser<'a> { self.serialization.push('/'); // We have already made sure the forward slash is present. if maybe_c == Some('/') || maybe_c == Some('\\') { - return self.parse_path(scheme_type, has_host, path_start, remaining); + return self.parse_path(scheme_type, has_host, &mut path_start, remaining); } } - return self.parse_path(scheme_type, has_host, path_start, input); + return self.parse_path(scheme_type, has_host, &mut path_start, input); } else if maybe_c == Some('?') || maybe_c == Some('#') { // Otherwise, if state override is not given and c is U+003F (?), // set url’s query to the empty string and state to query state. @@ -1171,14 +1176,14 @@ impl<'a> Parser<'a> { self.serialization.push('/'); } // Otherwise, if c is not the EOF code point: - self.parse_path(scheme_type, has_host, path_start, input) + self.parse_path(scheme_type, has_host, &mut path_start, input) } pub fn parse_path<'i>( &mut self, scheme_type: SchemeType, has_host: &mut bool, - path_start: usize, + path_start: &mut usize, mut input: Input<'i>, ) -> Input<'i> { // Relative path state @@ -1240,11 +1245,11 @@ impl<'a> Parser<'a> { debug_assert!(self.serialization.as_bytes()[segment_start - 1] == b'/'); self.serialization.truncate(segment_start); if self.serialization.ends_with("/") - && Parser::last_slash_can_be_removed(&self.serialization, path_start) + && Parser::last_slash_can_be_removed(&self.serialization, *path_start) { self.serialization.pop(); } - self.shorten_path(scheme_type, path_start); + self.shorten_path(scheme_type, *path_start); // and then if neither c is U+002F (/), nor url is special and c is U+005C (\), append the empty string to url’s path. if ends_with_slash && !self.serialization.ends_with("/") { @@ -1283,13 +1288,17 @@ impl<'a> Parser<'a> { if !ends_with_slash { break; } + if !scheme_type.is_special() && ends_with_slash && &self.serialization[*path_start..self.serialization.len()] == "/" && !input.is_empty() && !*has_host { + *path_start += 2; + self.serialization.push_str("./"); + } } if scheme_type.is_file() { // while url’s path’s size is greater than 1 // and url’s path[0] is the empty string, // validation error, remove the first item from url’s path. //FIXME: log violation - let path = self.serialization.split_off(path_start); + let path = self.serialization.split_off(*path_start); self.serialization.push('/'); self.serialization.push_str(&path.trim_start_matches("/")); } diff --git a/url/src/path_segments.rs b/url/src/path_segments.rs index 6f5679887..523442502 100644 --- a/url/src/path_segments.rs +++ b/url/src/path_segments.rs @@ -212,7 +212,7 @@ impl<'a> PathSegmentsMut<'a> { I::Item: AsRef, { let scheme_type = SchemeType::from(self.url.scheme()); - let path_start = self.url.path_start as usize; + let mut path_start = self.url.path_start as usize; self.url.mutate(|parser| { parser.context = parser::Context::PathSegmentSetter; for segment in segments { @@ -230,7 +230,7 @@ impl<'a> PathSegmentsMut<'a> { parser.parse_path( scheme_type, &mut has_host, - path_start, + &mut path_start, parser::Input::new(segment), ); } diff --git a/url/tests/setters_tests.json b/url/tests/setters_tests.json index 4280032a2..f2a30412e 100644 --- a/url/tests/setters_tests.json +++ b/url/tests/setters_tests.json @@ -1,7 +1,7 @@ { "comment": [ - "AS OF https://github.com/jsdom/whatwg-url/commit/35f04dfd3048cf6362f4398745bb13375c5020c2", "## Tests for setters of https://url.spec.whatwg.org/#urlutils-members", + "AS OF https://github.com/web-platform-tests/wpt/commit/551c9d604fb8b97d3f8c65793bb047d15baddbc2", "", "This file contains a JSON object.", "Other than 'comment', each key is an attribute of the `URL` interface", @@ -120,11 +120,11 @@ } }, { - "href": "gopher://example.net:1234", + "href": "https://example.net:1234", "new_value": "file", "expected": { - "href": "gopher://example.net:1234", - "protocol": "gopher:" + "href": "https://example.net:1234/", + "protocol": "https:" } }, { @@ -146,7 +146,7 @@ }, { "href": "file:///test", - "new_value": "gopher", + "new_value": "https", "expected": { "href": "file:///test", "protocol": "file:" @@ -962,6 +962,16 @@ "port": "" } }, + { + "href": "file://hi/x", + "new_value": "", + "expected": { + "href": "file:///x", + "host": "", + "hostname": "", + "port": "" + } + }, { "href": "sc://test@test/", "new_value": "", @@ -1286,6 +1296,16 @@ "port": "" } }, + { + "href": "file://hi/x", + "new_value": "", + "expected": { + "href": "file:///x", + "host": "", + "hostname": "", + "port": "" + } + }, { "href": "sc://test@test/", "new_value": "", @@ -1305,6 +1325,27 @@ "hostname": "test", "port": "12" } + }, + { + "comment": "Drop /. from path", + "href": "non-spec:/.//p", + "new_value": "h", + "expected": { + "href": "non-spec://h//p", + "host": "h", + "hostname": "h", + "pathname": "//p" + } + }, + { + "href": "non-spec:/.//p", + "new_value": "", + "expected": { + "href": "non-spec:////p", + "host": "", + "hostname": "", + "pathname": "//p" + } } ], "port": [ @@ -1653,6 +1694,40 @@ "href": "file:///", "pathname": "/" } + }, + { + "comment": "Serialize /. in path", + "href": "non-spec:/", + "new_value": "/.//p", + "expected": { + "href": "non-spec:/.//p", + "pathname": "//p" + } + }, + { + "href": "non-spec:/", + "new_value": "/..//p", + "expected": { + "href": "non-spec:/.//p", + "pathname": "//p" + } + }, + { + "href": "non-spec:/", + "new_value": "//p", + "expected": { + "href": "non-spec:/.//p", + "pathname": "//p" + } + }, + { + "comment": "Drop /. from path", + "href": "non-spec:/.//", + "new_value": "p", + "expected": { + "href": "non-spec:/p", + "pathname": "/p" + } } ], "search": [ diff --git a/url/tests/unit.rs b/url/tests/unit.rs index beb4a39e9..0dbbe8cf5 100644 --- a/url/tests/unit.rs +++ b/url/tests/unit.rs @@ -655,3 +655,25 @@ fn test_non_special_path3() { assert_eq!(db_url.as_str(), "postgres://postgres@localhost/foo"); assert_eq!(db_url.path(), "/foo"); } + +#[test] +fn test_hostnew() { + let mut url = url::Url::parse("a:/a/..//a").unwrap(); + assert_eq!(url.as_str(), "a:/.//a"); + assert_eq!(url.path(), "//a"); + url.set_host(Some("servo")).unwrap(); + assert_eq!(url.as_str(), "a://servo//a"); +} + +#[test] +fn test_hostnew2() { + let url = url::Url::parse("a://a/..//a").unwrap(); + assert_eq!(url.as_str(), "a://a//a"); + assert_eq!(url.path(), "//a"); +} + +#[test] +fn test_hostnew3() { + let url = url::Url::parse("a:/a/../\r").unwrap(); + assert_eq!(url.as_str(), "a:/"); +} diff --git a/url/tests/urltestdata.json b/url/tests/urltestdata.json index 554e61914..4d1727dd6 100644 --- a/url/tests/urltestdata.json +++ b/url/tests/urltestdata.json @@ -1,6 +1,7 @@ [ + "# Based on http://trac.webkit.org/browser/trunk/LayoutTests/fast/url/script-tests/segments.js", - "# AS OF https://github.com/jsdom/whatwg-url/commit/35f04dfd3048cf6362f4398745bb13375c5020c2", + "# AS OF https://github.com/web-platform-tests/wpt/commit/551c9d604fb8b97d3f8c65793bb047d15baddbc2", { "input": "http://example\t.\norg", "base": "http://example.org/foo/bar", @@ -6446,6 +6447,190 @@ "search": "", "hash": "" }, + "Serialize /. in path", + { + "input": "non-spec:/.//", + "base": "about:blank", + "href": "non-spec:/.//", + "protocol": "non-spec:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//", + "search": "", + "hash": "" + }, + { + "input": "non-spec:/..//", + "base": "about:blank", + "href": "non-spec:/.//", + "protocol": "non-spec:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//", + "search": "", + "hash": "" + }, + { + "input": "non-spec:/a/..//", + "base": "about:blank", + "href": "non-spec:/.//", + "protocol": "non-spec:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//", + "search": "", + "hash": "" + }, + { + "input": "non-spec:/.//path", + "base": "about:blank", + "href": "non-spec:/.//path", + "protocol": "non-spec:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//path", + "search": "", + "hash": "" + }, + { + "input": "non-spec:/..//path", + "base": "about:blank", + "href": "non-spec:/.//path", + "protocol": "non-spec:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//path", + "search": "", + "hash": "" + }, + { + "input": "non-spec:/a/..//path", + "base": "about:blank", + "href": "non-spec:/.//path", + "protocol": "non-spec:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//path", + "search": "", + "hash": "" + }, + { + "input": "/.//path", + "base": "non-spec:/p", + "href": "non-spec:/.//path", + "protocol": "non-spec:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//path", + "search": "", + "hash": "" + }, + { + "input": "/..//path", + "base": "non-spec:/p", + "href": "non-spec:/.//path", + "protocol": "non-spec:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//path", + "search": "", + "hash": "" + }, + { + "input": "..//path", + "base": "non-spec:/p", + "href": "non-spec:/.//path", + "protocol": "non-spec:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//path", + "search": "", + "hash": "" + }, + { + "input": "a/..//path", + "base": "non-spec:/p", + "href": "non-spec:/.//path", + "protocol": "non-spec:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//path", + "search": "", + "hash": "" + }, + { + "input": "", + "base": "non-spec:/..//p", + "href": "non-spec:/.//p", + "protocol": "non-spec:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//p", + "search": "", + "hash": "" + }, + { + "input": "path", + "base": "non-spec:/..//p", + "href": "non-spec:/.//path", + "protocol": "non-spec:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//path", + "search": "", + "hash": "" + }, + "Do not serialize /. in path", + { + "input": "../path", + "base": "non-spec:/.//p", + "href": "non-spec:/path", + "protocol": "non-spec:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/path", + "search": "", + "hash": "" + }, "# percent encoded hosts in non-special-URLs", { "input": "non-special://%E2%80%A0/", @@ -6987,5 +7172,51 @@ "pathname": "/", "search": "", "hash": "#link" + }, + "UTF-8 percent-encode of C0 control percent-encode set and supersets", + { + "input": "non-special:cannot-be-a-base-url-\u0000\u0001\u001F\u001E\u007E\u007F\u0080", + "base": "about:blank", + "hash": "", + "host": "", + "hostname": "", + "href": "non-special:cannot-be-a-base-url-%00%01%1F%1E~%7F%C2%80", + "origin": "null", + "password": "", + "pathname": "cannot-be-a-base-url-%00%01%1F%1E~%7F%C2%80", + "port": "", + "protocol": "non-special:", + "search": "", + "username": "" + }, + { + "input": "https://www.example.com/path{\u007Fpath.html?query'\u007F=query#fragment<\u007Ffragment", + "base": "about:blank", + "hash": "#fragment%3C%7Ffragment", + "host": "www.example.com", + "hostname": "www.example.com", + "href": "https://www.example.com/path%7B%7Fpath.html?query%27%7F=query#fragment%3C%7Ffragment", + "origin": "https://www.example.com", + "password": "", + "pathname": "/path%7B%7Fpath.html", + "port": "", + "protocol": "https:", + "search": "?query%27%7F=query", + "username": "" + }, + { + "input": "https://user:pass[\u007F@foo/bar", + "base": "http://example.org", + "hash": "", + "host": "foo", + "hostname": "foo", + "href": "https://user:pass%5B%7F@foo/bar", + "origin": "https://foo", + "password": "pass%5B%7F", + "pathname": "/bar", + "port": "", + "protocol": "https:", + "search": "", + "username": "user" } ]