diff --git a/README.md b/README.md index 5ee1587..935a064 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -## WHATWG URL parser for Rust +# WHATWG URL parser for Rust Fast [WHATWG URL Specification](https://url.spec.whatwg.org) compliant URL parser for Rust. Well-tested and widely used by Node.js since [Node 18](https://nodejs.org/en/blog/release/v18.17.0). @@ -6,24 +6,13 @@ Well-tested and widely used by Node.js since [Node 18](https://nodejs.org/en/blo The Ada library passes the full range of tests from the specification, across a wide range of platforms (e.g., Windows, Linux, macOS). It fully supports the relevant [Unicode Technical Standard](https://www.unicode.org/reports/tr46/#ToUnicode). -### Usage - -Here is an example illustrating a common usage: - -```Rust -use ada_url::Url; -fn main() { - let u = Url::parse("http://www.google:8080/love#drug", None).expect("bad url"); - println!("port: {:?}", u.port()); - println!("hash: {:?}", u.hash()); - println!("pathname: {:?}", u.pathname()); - println!("href: {:?}", u.href()); - u.set_port("9999"); - println!("href: {:?}", u.href()); -} -``` +## Usage + +See [here](examples/simple.rs) for a usage example. +You can run it locally with `cargo run --example simple`. +Feel free to adjust it for exploring this crate further. -#### Features +### Features **std:** Functionalities that require `std`. This feature is enabled by default, set `no-default-features` to `true` if you want `no-std`. @@ -38,7 +27,7 @@ Enabling this feature without `libc++` installed would cause compile error. Ada is fast. The benchmark below shows **3.34 times** faster URL parsing compared to `url` -``` +```text parse/ada_url time: [2.0790 µs 2.0812 µs 2.0835 µs] thrpt: [369.84 MiB/s 370.25 MiB/s 370.65 MiB/s] @@ -65,9 +54,9 @@ parse/url time: [6.9266 µs 6.9677 µs 7.0199 µs] | **[`Send`](https://doc.rust-lang.org/std/marker/trait.Send.html)** | Used to declare that the type can be transferred across thread boundaries. | | **[`Sync`](https://doc.rust-lang.org/stable/std/marker/trait.Sync.html)** | Used to declare that the type is thread-safe. | -### Development +## Development -#### `justfile` +### `justfile` The [`justfile`](./justfile) contains commands (called "recipes") that can be executed by [just](https://github.com/casey/just) for convenience. @@ -83,7 +72,7 @@ just all just all --skip=libcpp,serde ``` -### License +## License This code is made available under the Apache License 2.0 as well as the MIT license. diff --git a/examples/simple.rs b/examples/simple.rs new file mode 100644 index 0000000..e6dbfe4 --- /dev/null +++ b/examples/simple.rs @@ -0,0 +1,14 @@ +use ada_url::Url; + +fn main() { + let url = Url::parse("http://www.google:8080/love#drug", None).expect("bad url"); + + println!("port: {:?}", url.port()); + println!("hash: {:?}", url.hash()); + println!("pathname: {:?}", url.pathname()); + println!("href: {:?}", url.href()); + + let mut url = url; + url.set_port(Some("9999")).expect("bad port"); + println!("href: {:?}", url.href()); +} diff --git a/src/ffi.rs b/src/ffi.rs index 4110bed..10e3d3b 100644 --- a/src/ffi.rs +++ b/src/ffi.rs @@ -14,7 +14,8 @@ pub struct ada_string { } impl ada_string { - pub fn as_str(&self) -> &'static str { + #[must_use] + pub const fn as_str(&self) -> &'static str { unsafe { let slice = core::slice::from_raw_parts(self.data.cast(), self.length); core::str::from_utf8_unchecked(slice) diff --git a/src/idna.rs b/src/idna.rs index d5df15c..9f035a4 100644 --- a/src/idna.rs +++ b/src/idna.rs @@ -1,6 +1,6 @@ use crate::ffi; -/// IDNA struct implements the to_ascii and to_unicode functions from the Unicode Technical +/// IDNA struct implements the `to_ascii` and `to_unicode` functions from the Unicode Technical /// Standard supporting a wide range of systems. It is suitable for URL parsing. /// For more information, [read the specification](https://www.unicode.org/reports/tr46/#ToUnicode) pub struct Idna {} @@ -15,6 +15,7 @@ impl Idna { /// use ada_url::Idna; /// assert_eq!(Idna::unicode("xn--meagefactory-m9a.ca"), "meßagefactory.ca"); /// ``` + #[must_use] pub fn unicode(input: &str) -> &str { unsafe { let out = ffi::ada_idna_to_unicode(input.as_ptr().cast(), input.len()); @@ -32,6 +33,7 @@ impl Idna { /// use ada_url::Idna; /// assert_eq!(Idna::ascii("meßagefactory.ca"), "xn--meagefactory-m9a.ca"); /// ``` + #[must_use] pub fn ascii(input: &str) -> &str { unsafe { let out = ffi::ada_idna_to_ascii(input.as_ptr().cast(), input.len()); diff --git a/src/lib.rs b/src/lib.rs index 92ead82..7df43f8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -70,10 +70,10 @@ pub enum HostType { impl From for HostType { fn from(value: c_uint) -> Self { match value { - 0 => HostType::Domain, - 1 => HostType::IPV4, - 2 => HostType::IPV6, - _ => HostType::Domain, + 0 => Self::Domain, + 1 => Self::IPV4, + 2 => Self::IPV6, + _ => Self::Domain, } } } @@ -93,14 +93,14 @@ pub enum SchemeType { impl From for SchemeType { fn from(value: c_uint) -> Self { match value { - 0 => SchemeType::Http, - 1 => SchemeType::NotSpecial, - 2 => SchemeType::Https, - 3 => SchemeType::Ws, - 4 => SchemeType::Ftp, - 5 => SchemeType::Wss, - 6 => SchemeType::File, - _ => SchemeType::NotSpecial, + 0 => Self::Http, + 1 => Self::NotSpecial, + 2 => Self::Https, + 3 => Self::Ws, + 4 => Self::Ftp, + 5 => Self::Wss, + 6 => Self::File, + _ => Self::NotSpecial, } } } @@ -182,7 +182,7 @@ impl From<*mut ffi::ada_url> for Url { type SetterResult = Result<(), ()>; #[inline] -fn setter_result(successful: bool) -> SetterResult { +const fn setter_result(successful: bool) -> SetterResult { if successful { Ok(()) } else { @@ -199,7 +199,7 @@ impl Url { /// .expect("This is a valid URL. Should have parsed it."); /// assert_eq!(out.protocol(), "https:"); /// ``` - pub fn parse(input: Input, base: Option<&str>) -> Result> + pub fn parse(input: Input, base: Option<&str>) -> Result> where Input: AsRef, { @@ -231,6 +231,7 @@ impl Url { /// assert!(Url::can_parse("https://ada-url.github.io/ada", None)); /// assert!(Url::can_parse("/pathname", Some("https://ada-url.github.io/ada"))); /// ``` + #[must_use] pub fn can_parse(input: &str, base: Option<&str>) -> bool { unsafe { if let Some(base) = base { @@ -247,11 +248,13 @@ impl Url { } /// Returns the type of the host such as default, ipv4 or ipv6. + #[must_use] pub fn host_type(&self) -> HostType { HostType::from(unsafe { ffi::ada_get_host_type(self.0) }) } /// Returns the type of the scheme such as http, https, etc. + #[must_use] pub fn scheme_type(&self) -> SchemeType { SchemeType::from(unsafe { ffi::ada_get_scheme_type(self.0) }) } @@ -266,6 +269,7 @@ impl Url { /// let url = Url::parse("blob:https://example.com/foo", None).expect("Invalid URL"); /// assert_eq!(url.origin(), "https://example.com"); /// ``` + #[must_use] pub fn origin(&self) -> &str { unsafe { let out = ffi::ada_get_origin(self.0); @@ -277,6 +281,7 @@ impl Url { /// Return the parsed version of the URL with all components. /// /// For more information, read [WHATWG URL spec](https://url.spec.whatwg.org/#dom-url-href) + #[must_use] pub fn href(&self) -> &str { unsafe { ffi::ada_get_href(self.0) }.as_str() } @@ -305,6 +310,7 @@ impl Url { /// let url = Url::parse("ftp://rms:secret123@example.com", None).expect("Invalid URL"); /// assert_eq!(url.username(), "rms"); /// ``` + #[must_use] pub fn username(&self) -> &str { unsafe { ffi::ada_get_username(self.0) }.as_str() } @@ -324,7 +330,7 @@ impl Url { ffi::ada_set_username( self.0, input.unwrap_or("").as_ptr().cast(), - input.map_or(0, |i| i.len()), + input.map_or(0, str::len), ) }) } @@ -339,6 +345,7 @@ impl Url { /// let url = Url::parse("ftp://rms:secret123@example.com", None).expect("Invalid URL"); /// assert_eq!(url.password(), "secret123"); /// ``` + #[must_use] pub fn password(&self) -> &str { unsafe { ffi::ada_get_password(self.0) }.as_str() } @@ -358,7 +365,7 @@ impl Url { ffi::ada_set_password( self.0, input.unwrap_or("").as_ptr().cast(), - input.map_or(0, |i| i.len()), + input.map_or(0, str::len), ) }) } @@ -376,6 +383,7 @@ impl Url { /// let url = Url::parse("https://example.com:8080", None).expect("Invalid URL"); /// assert_eq!(url.port(), "8080"); /// ``` + #[must_use] pub fn port(&self) -> &str { unsafe { ffi::ada_get_port(self.0) }.as_str() } @@ -391,14 +399,11 @@ impl Url { /// ``` #[allow(clippy::result_unit_err)] pub fn set_port(&mut self, input: Option<&str>) -> SetterResult { - match input { - Some(value) => setter_result(unsafe { - ffi::ada_set_port(self.0, value.as_ptr().cast(), value.len()) - }), - None => { - unsafe { ffi::ada_clear_port(self.0) } - Ok(()) - } + if let Some(value) = input { + setter_result(unsafe { ffi::ada_set_port(self.0, value.as_ptr().cast(), value.len()) }) + } else { + unsafe { ffi::ada_clear_port(self.0) } + Ok(()) } } @@ -419,6 +424,7 @@ impl Url { /// assert_eq!(url.hash(), "#row=4"); /// assert!(url.has_hash()); /// ``` + #[must_use] pub fn hash(&self) -> &str { unsafe { ffi::ada_get_hash(self.0) }.as_str() } @@ -449,6 +455,7 @@ impl Url { /// let url = Url::parse("https://127.0.0.1:8080/index.html", None).expect("Invalid URL"); /// assert_eq!(url.host(), "127.0.0.1:8080"); /// ``` + #[must_use] pub fn host(&self) -> &str { unsafe { ffi::ada_get_host(self.0) }.as_str() } @@ -468,7 +475,7 @@ impl Url { ffi::ada_set_host( self.0, input.unwrap_or("").as_ptr().cast(), - input.map_or(0, |i| i.len()), + input.map_or(0, str::len), ) }) } @@ -487,6 +494,7 @@ impl Url { /// let url = Url::parse("https://127.0.0.1:8080/index.html", None).expect("Invalid URL"); /// assert_eq!(url.hostname(), "127.0.0.1"); /// ``` + #[must_use] pub fn hostname(&self) -> &str { unsafe { ffi::ada_get_hostname(self.0) }.as_str() } @@ -506,7 +514,7 @@ impl Url { ffi::ada_set_hostname( self.0, input.unwrap_or("").as_ptr().cast(), - input.map_or(0, |i| i.len()), + input.map_or(0, str::len), ) }) } @@ -521,6 +529,7 @@ impl Url { /// let url = Url::parse("https://example.com/api/versions?page=2", None).expect("Invalid URL"); /// assert_eq!(url.pathname(), "/api/versions"); /// ``` + #[must_use] pub fn pathname(&self) -> &str { unsafe { ffi::ada_get_pathname(self.0) }.as_str() } @@ -540,7 +549,7 @@ impl Url { ffi::ada_set_pathname( self.0, input.unwrap_or("").as_ptr().cast(), - input.map_or(0, |i| i.len()), + input.map_or(0, str::len), ) }) } @@ -558,6 +567,7 @@ impl Url { /// let url = Url::parse("https://example.com/products", None).expect("Invalid URL"); /// assert_eq!(url.search(), ""); /// ``` + #[must_use] pub fn search(&self) -> &str { unsafe { ffi::ada_get_search(self.0) }.as_str() } @@ -574,7 +584,7 @@ impl Url { pub fn set_search(&mut self, input: Option<&str>) { match input { Some(value) => unsafe { - ffi::ada_set_search(self.0, value.as_ptr().cast(), value.len()) + ffi::ada_set_search(self.0, value.as_ptr().cast(), value.len()); }, None => unsafe { ffi::ada_clear_search(self.0) }, } @@ -590,6 +600,7 @@ impl Url { /// let url = Url::parse("file:///tmp/foo", None).expect("Invalid URL"); /// assert_eq!(url.protocol(), "file:"); /// ``` + #[must_use] pub fn protocol(&self) -> &str { unsafe { ffi::ada_get_protocol(self.0) }.as_str() } @@ -609,46 +620,55 @@ impl Url { } /// A URL includes credentials if its username or password is not the empty string. + #[must_use] pub fn has_credentials(&self) -> bool { unsafe { ffi::ada_has_credentials(self.0) } } /// Returns true if it has an host but it is the empty string. + #[must_use] pub fn has_empty_hostname(&self) -> bool { unsafe { ffi::ada_has_empty_hostname(self.0) } } /// Returns true if it has a host (included an empty host) + #[must_use] pub fn has_hostname(&self) -> bool { unsafe { ffi::ada_has_hostname(self.0) } } /// Returns true if URL has a non-empty username. + #[must_use] pub fn has_non_empty_username(&self) -> bool { unsafe { ffi::ada_has_non_empty_username(self.0) } } /// Returns true if URL has a non-empty password. + #[must_use] pub fn has_non_empty_password(&self) -> bool { unsafe { ffi::ada_has_non_empty_password(self.0) } } /// Returns true if URL has a port. + #[must_use] pub fn has_port(&self) -> bool { unsafe { ffi::ada_has_port(self.0) } } /// Returns true if URL has password. + #[must_use] pub fn has_password(&self) -> bool { unsafe { ffi::ada_has_password(self.0) } } /// Returns true if URL has a hash/fragment. + #[must_use] pub fn has_hash(&self) -> bool { unsafe { ffi::ada_has_hash(self.0) } } /// Returns true if URL has search/query. + #[must_use] pub fn has_search(&self) -> bool { unsafe { ffi::ada_has_search(self.0) } } @@ -656,11 +676,13 @@ impl Url { /// Returns the parsed version of the URL with all components. /// /// For more information, read [WHATWG URL spec](https://url.spec.whatwg.org/#dom-url-href) + #[must_use] pub fn as_str(&self) -> &str { self.href() } /// Returns the URL components of the instance. + #[must_use] pub fn components(&self) -> UrlComponents { unsafe { ffi::ada_get_components(self.0).as_ref().unwrap() }.into() } @@ -742,7 +764,7 @@ impl Ord for Url { impl hash::Hash for Url { fn hash(&self, state: &mut H) { - self.href().hash(state) + self.href().hash(state); } }