Skip to content

Fix non-idempotent serialization for ./ and ../ paths (fixes #601) #629

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions url/src/host.rs
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ impl Host<String> {
{
return Err(ParseError::InvalidDomainCharacter);
}

if let Some(address) = parse_ipv4addr(&domain)? {
Ok(Host::Ipv4(address))
} else {
Expand Down
6 changes: 5 additions & 1 deletion url/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1693,7 +1693,11 @@ impl Url {
let old_suffix_pos = if opt_new_port.is_some() {
self.path_start
} else {
self.host_end
if self.has_authority() {
self.host_end
} else {
self.path_start
}
};
let suffix = self.slice(old_suffix_pos..).to_owned();
self.serialization.truncate(self.host_start as usize);
Expand Down
61 changes: 35 additions & 26 deletions url/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
use std::error::Error;
use std::fmt::{self, Formatter, Write};
use std::str;
use std::convert::TryInto;

use form_urlencoded::EncodingOverride;
use host::{Host, HostInternal};
Expand Down Expand Up @@ -484,10 +485,11 @@ impl<'a> Parser<'a> {
let host_end = path_start;
let host = HostInternal::None;
let port = None;
let mut path_start : usize = path_start.try_into().unwrap();
let remaining = if let Some(input) = input.split_prefix('/') {
let path_start = self.serialization.len();
path_start = self.serialization.len().try_into().unwrap();
self.serialization.push('/');
self.parse_path(scheme_type, &mut false, path_start, input)
self.parse_path(scheme_type, &mut false, &mut path_start, input)
} else {
self.parse_cannot_be_a_base_path(input)
};
Expand All @@ -499,7 +501,7 @@ impl<'a> Parser<'a> {
host_end,
host,
port,
path_start,
path_start as u32,
remaining,
)
}
Expand Down Expand Up @@ -531,9 +533,9 @@ impl<'a> Parser<'a> {
let remaining = if path_start {
self.parse_path_start(SchemeType::File, &mut has_host, remaining)
} else {
let path_start = self.serialization.len();
let mut path_start = self.serialization.len();
self.serialization.push('/');
self.parse_path(SchemeType::File, &mut has_host, path_start, remaining)
self.parse_path(SchemeType::File, &mut has_host, &mut path_start, remaining)
};

// For file URLs that have a host and whose path starts
Expand Down Expand Up @@ -588,8 +590,9 @@ impl<'a> Parser<'a> {
input_after_first_char
};

let mut path_start = host_end;
let remaining =
self.parse_path(SchemeType::File, &mut false, host_end, parse_path_input);
self.parse_path(SchemeType::File, &mut false, &mut path_start, parse_path_input);

let host_start = host_start as u32;

Expand All @@ -605,7 +608,7 @@ impl<'a> Parser<'a> {
host_end,
host,
port: None,
path_start: host_end,
path_start: path_start as u32,
query_start,
fragment_start,
});
Expand Down Expand Up @@ -651,10 +654,11 @@ impl<'a> Parser<'a> {
};
self.serialization.push_str(before_query);
self.shorten_path(SchemeType::File, base_url.path_start as usize);
let mut path_start : usize = base_url.path_start as usize;
let remaining = self.parse_path(
SchemeType::File,
&mut true,
base_url.path_start as usize,
&mut path_start,
input,
);
self.with_query_and_fragment(
Expand All @@ -665,15 +669,15 @@ impl<'a> Parser<'a> {
base_url.host_end,
base_url.host,
base_url.port,
base_url.path_start,
path_start as u32,
remaining,
)
} else {
self.serialization.push_str("file:///");
let scheme_end = "file".len() as u32;
let path_start = "file://".len();
let mut path_start = "file://".len();
let remaining =
self.parse_path(SchemeType::File, &mut false, path_start, input);
self.parse_path(SchemeType::File, &mut false, &mut path_start, input);
let (query_start, fragment_start) =
self.parse_query_and_fragment(SchemeType::File, scheme_end, remaining)?;
let path_start = path_start as u32;
Expand All @@ -695,8 +699,8 @@ impl<'a> Parser<'a> {
} else {
self.serialization.push_str("file:///");
let scheme_end = "file".len() as u32;
let path_start = "file://".len();
let remaining = self.parse_path(SchemeType::File, &mut false, path_start, input);
let mut path_start = "file://".len();
let remaining = self.parse_path(SchemeType::File, &mut false, &mut path_start, input);
let (query_start, fragment_start) =
self.parse_query_and_fragment(SchemeType::File, scheme_end, remaining)?;
let path_start = path_start as u32;
Expand Down Expand Up @@ -774,13 +778,13 @@ impl<'a> Parser<'a> {
}
return self.after_double_slash(remaining, scheme_type, scheme_end);
}
let path_start = base_url.path_start;
self.serialization.push_str(base_url.slice(..path_start));
let mut path_start : usize = base_url.path_start as usize;
self.serialization.push_str(base_url.slice(..path_start as u32));
self.serialization.push_str("/");
let remaining = self.parse_path(
scheme_type,
&mut true,
path_start as usize,
&mut path_start,
input_after_first_char,
);
self.with_query_and_fragment(
Expand Down Expand Up @@ -810,15 +814,16 @@ impl<'a> Parser<'a> {
{
self.serialization.push('/');
}
let mut path_start = base_url.path_start as usize;
let remaining = match input.split_first() {
(Some('/'), remaining) => self.parse_path(
scheme_type,
&mut true,
base_url.path_start as usize,
&mut path_start,
remaining,
),
_ => {
self.parse_path(scheme_type, &mut true, base_url.path_start as usize, input)
self.parse_path(scheme_type, &mut true, &mut path_start, input)
}
};
self.with_query_and_fragment(
Expand Down Expand Up @@ -1141,7 +1146,7 @@ impl<'a> Parser<'a> {
has_host: &mut bool,
input: Input<'i>,
) -> Input<'i> {
let path_start = self.serialization.len();
let mut path_start = self.serialization.len();
let (maybe_c, remaining) = input.split_first();
// If url is special, then:
if scheme_type.is_special() {
Expand All @@ -1154,10 +1159,10 @@ impl<'a> Parser<'a> {
self.serialization.push('/');
// We have already made sure the forward slash is present.
if maybe_c == Some('/') || maybe_c == Some('\\') {
return self.parse_path(scheme_type, has_host, path_start, remaining);
return self.parse_path(scheme_type, has_host, &mut path_start, remaining);
}
}
return self.parse_path(scheme_type, has_host, path_start, input);
return self.parse_path(scheme_type, has_host, &mut path_start, input);
} else if maybe_c == Some('?') || maybe_c == Some('#') {
// Otherwise, if state override is not given and c is U+003F (?),
// set url’s query to the empty string and state to query state.
Expand All @@ -1171,14 +1176,14 @@ impl<'a> Parser<'a> {
self.serialization.push('/');
}
// Otherwise, if c is not the EOF code point:
self.parse_path(scheme_type, has_host, path_start, input)
self.parse_path(scheme_type, has_host, &mut path_start, input)
}

pub fn parse_path<'i>(
&mut self,
scheme_type: SchemeType,
has_host: &mut bool,
path_start: usize,
path_start: &mut usize,
mut input: Input<'i>,
) -> Input<'i> {
// Relative path state
Expand Down Expand Up @@ -1240,11 +1245,11 @@ impl<'a> Parser<'a> {
debug_assert!(self.serialization.as_bytes()[segment_start - 1] == b'/');
self.serialization.truncate(segment_start);
if self.serialization.ends_with("/")
&& Parser::last_slash_can_be_removed(&self.serialization, path_start)
&& Parser::last_slash_can_be_removed(&self.serialization, *path_start)
{
self.serialization.pop();
}
self.shorten_path(scheme_type, path_start);
self.shorten_path(scheme_type, *path_start);

// and then if neither c is U+002F (/), nor url is special and c is U+005C (\), append the empty string to url’s path.
if ends_with_slash && !self.serialization.ends_with("/") {
Expand Down Expand Up @@ -1283,13 +1288,17 @@ impl<'a> Parser<'a> {
if !ends_with_slash {
break;
}
if !scheme_type.is_special() && ends_with_slash && &self.serialization[*path_start..self.serialization.len()] == "/" && !input.is_empty() && !*has_host {
*path_start += 2;
self.serialization.push_str("./");
}
}
if scheme_type.is_file() {
// while url’s path’s size is greater than 1
// and url’s path[0] is the empty string,
// validation error, remove the first item from url’s path.
//FIXME: log violation
let path = self.serialization.split_off(path_start);
let path = self.serialization.split_off(*path_start);
self.serialization.push('/');
self.serialization.push_str(&path.trim_start_matches("/"));
}
Expand Down
4 changes: 2 additions & 2 deletions url/src/path_segments.rs
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ impl<'a> PathSegmentsMut<'a> {
I::Item: AsRef<str>,
{
let scheme_type = SchemeType::from(self.url.scheme());
let path_start = self.url.path_start as usize;
let mut path_start = self.url.path_start as usize;
self.url.mutate(|parser| {
parser.context = parser::Context::PathSegmentSetter;
for segment in segments {
Expand All @@ -230,7 +230,7 @@ impl<'a> PathSegmentsMut<'a> {
parser.parse_path(
scheme_type,
&mut has_host,
path_start,
&mut path_start,
parser::Input::new(segment),
);
}
Expand Down
85 changes: 80 additions & 5 deletions url/tests/setters_tests.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"comment": [
"AS OF https://github.com/jsdom/whatwg-url/commit/35f04dfd3048cf6362f4398745bb13375c5020c2",
"## Tests for setters of https://url.spec.whatwg.org/#urlutils-members",
"AS OF https://github.com/web-platform-tests/wpt/commit/551c9d604fb8b97d3f8c65793bb047d15baddbc2",
"",
"This file contains a JSON object.",
"Other than 'comment', each key is an attribute of the `URL` interface",
Expand Down Expand Up @@ -120,11 +120,11 @@
}
},
{
"href": "gopher://example.net:1234",
"href": "https://example.net:1234",
"new_value": "file",
"expected": {
"href": "gopher://example.net:1234",
"protocol": "gopher:"
"href": "https://example.net:1234/",
"protocol": "https:"
}
},
{
Expand All @@ -146,7 +146,7 @@
},
{
"href": "file:///test",
"new_value": "gopher",
"new_value": "https",
"expected": {
"href": "file:///test",
"protocol": "file:"
Expand Down Expand Up @@ -962,6 +962,16 @@
"port": ""
}
},
{
"href": "file://hi/x",
"new_value": "",
"expected": {
"href": "file:///x",
"host": "",
"hostname": "",
"port": ""
}
},
{
"href": "sc://test@test/",
"new_value": "",
Expand Down Expand Up @@ -1286,6 +1296,16 @@
"port": ""
}
},
{
"href": "file://hi/x",
"new_value": "",
"expected": {
"href": "file:///x",
"host": "",
"hostname": "",
"port": ""
}
},
{
"href": "sc://test@test/",
"new_value": "",
Expand All @@ -1305,6 +1325,27 @@
"hostname": "test",
"port": "12"
}
},
{
"comment": "Drop /. from path",
"href": "non-spec:/.//p",
"new_value": "h",
"expected": {
"href": "non-spec://h//p",
"host": "h",
"hostname": "h",
"pathname": "//p"
}
},
{
"href": "non-spec:/.//p",
"new_value": "",
"expected": {
"href": "non-spec:////p",
"host": "",
"hostname": "",
"pathname": "//p"
}
}
],
"port": [
Expand Down Expand Up @@ -1653,6 +1694,40 @@
"href": "file:///",
"pathname": "/"
}
},
{
"comment": "Serialize /. in path",
"href": "non-spec:/",
"new_value": "/.//p",
"expected": {
"href": "non-spec:/.//p",
"pathname": "//p"
}
},
{
"href": "non-spec:/",
"new_value": "/..//p",
"expected": {
"href": "non-spec:/.//p",
"pathname": "//p"
}
},
{
"href": "non-spec:/",
"new_value": "//p",
"expected": {
"href": "non-spec:/.//p",
"pathname": "//p"
}
},
{
"comment": "Drop /. from path",
"href": "non-spec:/.//",
"new_value": "p",
"expected": {
"href": "non-spec:/p",
"pathname": "/p"
}
}
],
"search": [
Expand Down
Loading