|
| 1 | +use std::convert::Infallible; |
| 2 | + |
| 3 | +use bstr::{BStr, BString, ByteSlice}; |
| 4 | + |
| 5 | +#[derive(Debug, thiserror::Error)] |
| 6 | +#[allow(missing_docs)] |
| 7 | +pub enum Error { |
| 8 | + #[error("Could not decode URL as UTF8")] |
| 9 | + Utf8(#[from] std::str::Utf8Error), |
| 10 | + #[error(transparent)] |
| 11 | + Url(#[from] url::ParseError), |
| 12 | + #[error("URLs need to specify the path to the repository")] |
| 13 | + MissingResourceLocation, |
| 14 | + #[error("file URLs require an absolute or relative path to the repository")] |
| 15 | + MissingRepositoryPath, |
| 16 | + #[error("\"{url}\" is not a valid local path")] |
| 17 | + NotALocalFile { url: BString }, |
| 18 | + #[error("Relative URLs are not permitted: {url:?}")] |
| 19 | + RelativeUrl { url: String }, |
| 20 | +} |
| 21 | + |
| 22 | +impl From<Infallible> for Error { |
| 23 | + fn from(_: Infallible) -> Self { |
| 24 | + unreachable!("Cannot actually happen, but it seems there can't be a blanket impl for this") |
| 25 | + } |
| 26 | +} |
| 27 | + |
| 28 | +enum InputScheme { |
| 29 | + Uri { protocol_end: usize }, |
| 30 | + Scp { colon: usize }, |
| 31 | + Local, |
| 32 | +} |
| 33 | + |
| 34 | +fn find_scheme(input: &BStr) -> InputScheme { |
| 35 | + // TODO: uri's may only contain `:/`, we should additionally check if the characters used for |
| 36 | + // protocol are all valid |
| 37 | + if let Some(protocol_end) = input.find("://") { |
| 38 | + return InputScheme::Uri { protocol_end }; |
| 39 | + } |
| 40 | + |
| 41 | + if let Some(colon) = input.find_byte(b':') { |
| 42 | + // allow user to select files containing a `:` by passing them as absolute or relative path |
| 43 | + // this is behavior explicitly mentioned by the scp and git manuals |
| 44 | + if !&input[..colon].contains(&b'/') { |
| 45 | + // TODO: implement windows specific checks |
| 46 | + return InputScheme::Scp { colon }; |
| 47 | + } |
| 48 | + } |
| 49 | + |
| 50 | + InputScheme::Local |
| 51 | +} |
| 52 | + |
| 53 | +/// Parse the given `bytes` as a [git url](crate::Url). |
| 54 | +/// |
| 55 | +/// # Note |
| 56 | +/// |
| 57 | +/// We cannot and should never have to deal with UTF-16 encoded windows strings, so bytes input is acceptable. |
| 58 | +/// For file-paths, we don't expect UTF8 encoding either. |
| 59 | +pub fn parse(input: &BStr) -> Result<crate::Url, Error> { |
| 60 | + match find_scheme(input) { |
| 61 | + InputScheme::Uri { .. } => parse_url(input), |
| 62 | + InputScheme::Scp { colon } => parse_scp(input, colon), |
| 63 | + InputScheme::Local => parse_local(input), |
| 64 | + } |
| 65 | +} |
| 66 | + |
| 67 | +fn parse_url(input: &BStr) -> Result<crate::Url, Error> { |
| 68 | + let url = url::Url::parse(std::str::from_utf8(input)?)?; |
| 69 | + |
| 70 | + Ok(crate::Url { |
| 71 | + serialize_alternative_form: false, |
| 72 | + scheme: url.scheme().into(), |
| 73 | + password: url.password().map(Into::into), |
| 74 | + user: if url.username().is_empty() && url.password().is_none() { |
| 75 | + None |
| 76 | + } else { |
| 77 | + Some(url.username().into()) |
| 78 | + }, |
| 79 | + host: url.host_str().map(Into::into), |
| 80 | + port: url.port(), |
| 81 | + path: url.path().into(), |
| 82 | + }) |
| 83 | +} |
| 84 | + |
| 85 | +fn parse_scp(input: &BStr, colon: usize) -> Result<crate::Url, Error> { |
| 86 | + let input = std::str::from_utf8(input)?; |
| 87 | + |
| 88 | + // TODO: this incorrectly splits at IPv6 addresses, check for `[]` before splitting |
| 89 | + let (host, path) = input.split_at(colon); |
| 90 | + let path = &path[1..]; // remove leading `:` |
| 91 | + |
| 92 | + // The path returned by the parsed url often has the wrong number of leading `/` characters but |
| 93 | + // should never differ in any other way (ssh URIs should not contain a query or fragment part). |
| 94 | + // To avoid the various off-by-one errors caused by the `/` characters, we keep using the path |
| 95 | + // determined above and can therefore skip parsing it here as well. |
| 96 | + let url = url::Url::parse(&format!("ssh://{host}"))?; |
| 97 | + |
| 98 | + Ok(crate::Url { |
| 99 | + serialize_alternative_form: true, |
| 100 | + scheme: url.scheme().into(), |
| 101 | + password: url.password().map(Into::into), |
| 102 | + user: if url.username().is_empty() && url.password().is_none() { |
| 103 | + None |
| 104 | + } else { |
| 105 | + Some(url.username().into()) |
| 106 | + }, |
| 107 | + host: url.host_str().map(Into::into), |
| 108 | + port: url.port(), |
| 109 | + path: path.into(), |
| 110 | + }) |
| 111 | +} |
| 112 | + |
| 113 | +fn parse_local(input: &BStr) -> Result<crate::Url, Error> { |
| 114 | + Ok(crate::Url { |
| 115 | + serialize_alternative_form: true, |
| 116 | + scheme: crate::scheme::Scheme::File, |
| 117 | + password: None, |
| 118 | + user: None, |
| 119 | + host: None, |
| 120 | + port: None, |
| 121 | + path: input.into(), |
| 122 | + }) |
| 123 | +} |
0 commit comments