diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e8cabbe..6da4f94 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -13,7 +13,7 @@ jobs: strategy: fail-fast: false matrix: - rust: [1.41.1, stable, beta, nightly] + rust: [1.56.1, stable, beta, nightly] steps: - uses: actions/checkout@v2 - uses: hecrj/setup-rust-action@v1 diff --git a/README.md b/README.md index 690fd59..fa960cc 100644 --- a/README.md +++ b/README.md @@ -35,11 +35,11 @@ ascii = { version = "1.1", default-features = false, features = ["alloc"] } ## Minimum supported Rust version -The minimum Rust version for 1.1.\* releases is 1.41.1. +The minimum Rust version for 1.2.\* releases is 1.56.1. Later 1.y.0 releases might require newer Rust versions, but the three most recent stable releases at the time of publishing will always be supported. For example this means that if the current stable Rust version is 1.70 when -ascii 1.2.0 is released, then ascii 1.2.\* will not require a newer +ascii 1.3.0 is released, then ascii 1.3.\* will not require a newer Rust version than 1.68. ## History diff --git a/src/ascii_char.rs b/src/ascii_char.rs index 5011949..266718d 100644 --- a/src/ascii_char.rs +++ b/src/ascii_char.rs @@ -360,6 +360,32 @@ impl AsciiChar { ALL[ch as usize] } + /// Create an `AsciiChar` from a `char`, in a `const fn` way. + /// + /// Within non-`const fn` functions the more general + /// [`from_ascii()`](#method.from_ascii) should be used instead. + /// + /// # Examples + /// ``` + /// # use ascii::AsciiChar; + /// assert!(AsciiChar::try_new('-').is_ok()); + /// assert!(AsciiChar::try_new('—').is_err()); + /// assert_eq!(AsciiChar::try_new('\x7f'), Ok(AsciiChar::DEL)); + /// ``` + /// + /// # Errors + /// + /// Fails for non-ASCII characters. + #[inline] + pub const fn try_new(ch: char) -> Result { + unsafe { + match ch as u32 { + 0..=127 => Ok(mem::transmute(ch as u8)), + _ => Err(ToAsciiCharError(())), + } + } + } + /// Constructs an ASCII character from a `u8`, `char` or other character /// type without any checks. /// @@ -375,9 +401,9 @@ impl AsciiChar { /// and `Some(AsciiChar::from_ascii_unchecked(128))` might be `None`. #[inline] #[must_use] - pub unsafe fn from_ascii_unchecked(ch: u8) -> Self { + pub const unsafe fn from_ascii_unchecked(ch: u8) -> Self { // SAFETY: Caller guarantees `ch` is within bounds of ascii. - unsafe { ch.to_ascii_char_unchecked() } + unsafe { mem::transmute(ch) } } /// Converts an ASCII character into a `u8`. @@ -411,7 +437,7 @@ impl AsciiChar { #[inline] #[must_use] pub const fn is_alphabetic(self) -> bool { - (self.to_not_upper() >= b'a') & (self.to_not_upper() <= b'z') + (self.to_not_upper() >= b'a') && (self.to_not_upper() <= b'z') } /// Check if the character is a letter (a-z, A-Z). @@ -457,14 +483,14 @@ impl AsciiChar { #[inline] #[must_use] pub const fn is_ascii_digit(&self) -> bool { - (*self as u8 >= b'0') & (*self as u8 <= b'9') + (*self as u8 >= b'0') && (*self as u8 <= b'9') } /// Check if the character is a letter or number #[inline] #[must_use] pub const fn is_alphanumeric(self) -> bool { - self.is_alphabetic() | self.is_ascii_digit() + self.is_alphabetic() || self.is_ascii_digit() } /// Check if the character is a letter or number @@ -491,7 +517,7 @@ impl AsciiChar { #[inline] #[must_use] pub const fn is_ascii_blank(&self) -> bool { - (*self as u8 == b' ') | (*self as u8 == b'\t') + (*self as u8 == b' ') || (*self as u8 == b'\t') } /// Check if the character one of ' ', '\t', '\n', '\r', @@ -500,7 +526,7 @@ impl AsciiChar { #[must_use] pub const fn is_whitespace(self) -> bool { let b = self as u8; - self.is_ascii_blank() | (b == b'\n') | (b == b'\r') | (b == 0x0b) | (b == 0x0c) + self.is_ascii_blank() || (b == b'\n') || (b == b'\r') || (b == 0x0b) || (b == 0x0c) } /// Check if the character is a ' ', '\t', '\n', '\r' or '\0xc' (form feed). @@ -510,9 +536,9 @@ impl AsciiChar { #[must_use] pub const fn is_ascii_whitespace(&self) -> bool { self.is_ascii_blank() - | (*self as u8 == b'\n') - | (*self as u8 == b'\r') - | (*self as u8 == 0x0c/*form feed*/) + || (*self as u8 == b'\n') + || (*self as u8 == b'\r') + || (*self as u8 == 0x0c/*form feed*/) } /// Check if the character is a control character @@ -530,7 +556,7 @@ impl AsciiChar { #[inline] #[must_use] pub const fn is_ascii_control(&self) -> bool { - ((*self as u8) < b' ') | (*self as u8 == 127) + ((*self as u8) < b' ') || (*self as u8 == 127) } /// Checks if the character is printable (except space) @@ -624,7 +650,7 @@ impl AsciiChar { #[inline] #[must_use] pub const fn is_ascii_punctuation(&self) -> bool { - self.is_ascii_graphic() & !self.is_alphanumeric() + self.is_ascii_graphic() && !self.is_alphanumeric() } /// Checks if the character is a valid hex digit @@ -641,7 +667,7 @@ impl AsciiChar { #[inline] #[must_use] pub const fn is_ascii_hexdigit(&self) -> bool { - self.is_ascii_digit() | ((*self as u8 | 0x20_u8).wrapping_sub(b'a') < 6) + self.is_ascii_digit() || ((*self as u8 | 0x20u8).wrapping_sub(b'a') < 6) } /// Unicode has printable versions of the ASCII control codes, like '␛'. @@ -659,14 +685,15 @@ impl AsciiChar { /// assert_eq!(AsciiChar::new('p').as_printable_char(), 'p'); /// ``` #[must_use] - pub fn as_printable_char(self) -> char { + pub const fn as_printable_char(self) -> char { + #![allow(clippy::transmute_int_to_char)] // from_utf32_unchecked() is not const fn yet. match self as u8 { // Non printable characters // SAFETY: From codepoint 0x2400 ('␀') to 0x241f (`␟`), there are characters representing // the unprintable characters from 0x0 to 0x1f, ordered correctly. // As `b` is guaranteed to be within 0x0 to 0x1f, the conversion represents a // valid character. - b @ 0x0..=0x1f => unsafe { char::from_u32_unchecked(u32::from('␀') + u32::from(b)) }, + b @ 0x0..=0x1f => unsafe { mem::transmute('␀' as u32 + b as u32) }, // 0x7f (delete) has it's own character at codepoint 0x2420, not 0x247f, so it is special // cased to return it's character @@ -728,7 +755,7 @@ impl AsciiChar { #[must_use] pub const fn eq_ignore_ascii_case(&self, other: &Self) -> bool { (self.as_byte() == other.as_byte()) - | (self.is_alphabetic() & (self.to_not_upper() == other.to_not_upper())) + || (self.is_alphabetic() && (self.to_not_upper() == other.to_not_upper())) } } diff --git a/src/ascii_str.rs b/src/ascii_str.rs index e8a6e12..bfdf54e 100644 --- a/src/ascii_str.rs +++ b/src/ascii_str.rs @@ -2,7 +2,7 @@ use alloc::borrow::ToOwned; #[cfg(feature = "alloc")] use alloc::boxed::Box; -use core::fmt; +use core::{fmt, mem}; use core::ops::{Index, IndexMut}; use core::ops::{Range, RangeFrom, RangeFull, RangeInclusive, RangeTo, RangeToInclusive}; use core::slice::{self, Iter, IterMut, SliceIndex}; @@ -28,20 +28,37 @@ pub struct AsciiStr { } impl AsciiStr { + /// Coerces into an `AsciiStr` slice. + /// + /// # Examples + /// ``` + /// # use ascii::{AsciiChar, AsciiStr}; + /// const HELLO: &AsciiStr = AsciiStr::new( + /// &[AsciiChar::H, AsciiChar::e, AsciiChar::l, AsciiChar::l, AsciiChar::o] + /// ); + /// + /// assert_eq!(HELLO.as_str(), "Hello"); + /// ``` + #[inline] + #[must_use] + pub const fn new(s: &[AsciiChar]) -> &Self { + unsafe { mem::transmute(s) } + } + /// Converts `&self` to a `&str` slice. #[inline] #[must_use] - pub fn as_str(&self) -> &str { + pub const fn as_str(&self) -> &str { // SAFETY: All variants of `AsciiChar` are valid bytes for a `str`. - unsafe { &*(self as *const AsciiStr as *const str) } + unsafe { mem::transmute(self) } } /// Converts `&self` into a byte slice. #[inline] #[must_use] - pub fn as_bytes(&self) -> &[u8] { + pub const fn as_bytes(&self) -> &[u8] { // SAFETY: All variants of `AsciiChar` are valid `u8`, given they're `repr(u8)`. - unsafe { &*(self as *const AsciiStr as *const [u8]) } + unsafe { mem::transmute(self) } } /// Returns the entire string as slice of `AsciiChar`s. @@ -108,6 +125,53 @@ impl AsciiStr { bytes.as_ref().as_ascii_str() } + /// Convert a byte slice innto an `AsciiStr`. + /// + /// [`from_ascii()`](#method.from_ascii) should be preferred outside of `const` contexts + /// as it might be faster due to using functions that are not `const fn`. + /// + /// # Errors + /// Returns `Err` if not all bytes are valid ASCII values. + /// + /// # Examples + /// ``` + /// # use ascii::AsciiStr; + /// assert!(AsciiStr::from_ascii_bytes(b"\x00\x22\x44").is_ok()); + /// assert!(AsciiStr::from_ascii_bytes(b"\x66\x77\x88").is_err()); + /// ``` + pub const fn from_ascii_bytes(b: &[u8]) -> Result<&Self, AsAsciiStrError> { + #![allow(clippy::indexing_slicing)] // .get() is not const yes (as of Rust 1.61) + let mut valid = 0; + loop { + if valid == b.len() { + // SAFETY: `is_ascii` having returned true for all bytes guarantees all bytes are within ascii range. + return unsafe { Ok(mem::transmute(b)) }; + } else if b[valid].is_ascii() { + valid += 1; + } else { + return Err(AsAsciiStrError(valid)); + } + } + } + + /// Convert a `str` innto an `AsciiStr`. + /// + /// [`from_ascii()`](#method.from_ascii) should be preferred outside of `const` contexts + /// as it might be faster due to using functions that are not `const fn`. + /// + /// # Errors + /// Returns `Err` if it contains non-ASCII codepoints. + /// + /// # Examples + /// ``` + /// # use ascii::AsciiStr; + /// assert!(AsciiStr::from_ascii_str("25 C").is_ok()); + /// assert!(AsciiStr::from_ascii_str("35°C").is_err()); + /// ``` + pub const fn from_ascii_str(s: &str) -> Result<&Self, AsAsciiStrError> { + Self::from_ascii_bytes(s.as_bytes()) + } + /// Converts anything that can be represented as a byte slice to an `AsciiStr` without checking /// for non-ASCII characters.. /// @@ -214,7 +278,7 @@ impl AsciiStr { /// assert_eq!("white \tspace", example.trim()); /// ``` #[must_use] - pub fn trim(&self) -> &Self { + pub const fn trim(&self) -> &Self { self.trim_start().trim_end() } @@ -227,14 +291,16 @@ impl AsciiStr { /// assert_eq!("white \tspace \t", example.trim_start()); /// ``` #[must_use] - pub fn trim_start(&self) -> &Self { - let whitespace_len = self - .chars() - .position(|ch| !ch.is_whitespace()) - .unwrap_or_else(|| self.len()); - - // SAFETY: `whitespace_len` is `0..=len`, which is at most `len`, which is a valid empty slice. - unsafe { self.as_slice().get_unchecked(whitespace_len..).into() } + pub const fn trim_start(&self) -> &Self { + let mut trimmed = &self.slice; + while let Some((first, rest)) = trimmed.split_first() { + if first.is_whitespace() { + trimmed = rest; + } else { + break; + } + } + AsciiStr::new(trimmed) } /// Returns an ASCII string slice with trailing whitespace removed. @@ -246,20 +312,16 @@ impl AsciiStr { /// assert_eq!(" \twhite \tspace", example.trim_end()); /// ``` #[must_use] - pub fn trim_end(&self) -> &Self { - // Number of whitespace characters counting from the end - let whitespace_len = self - .chars() - .rev() - .position(|ch| !ch.is_whitespace()) - .unwrap_or_else(|| self.len()); - - // SAFETY: `whitespace_len` is `0..=len`, which is at most `len`, which is a valid empty slice, and at least `0`, which is the whole slice. - unsafe { - self.as_slice() - .get_unchecked(..self.len() - whitespace_len) - .into() + pub const fn trim_end(&self) -> &Self { + let mut trimmed = &self.slice; + while let Some((last, rest)) = trimmed.split_last() { + if last.is_whitespace() { + trimmed = rest; + } else { + break; + } } + AsciiStr::new(trimmed) } /// Compares two strings case-insensitively. diff --git a/src/lib.rs b/src/lib.rs index 5eacc16..2147d77 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -15,11 +15,11 @@ //! //! # Minimum supported Rust version //! -//! The minimum Rust version for 1.1.\* releases is 1.41.1. +//! The minimum Rust version for 1.2.\* releases is 1.56.1. //! Later 1.y.0 releases might require newer Rust versions, but the three most //! recent stable releases at the time of publishing will always be supported. //! For example this means that if the current stable Rust version is 1.70 when -//! ascii 1.2.0 is released, then ascii 1.2.\* will not require a newer +//! ascii 1.3.0 is released, then ascii 1.3.\* will not require a newer //! Rust version than 1.68. //! //! # History