Skip to content
4 changes: 2 additions & 2 deletions Sources/RegexBuilder/Anchor.swift
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ extension Anchor {
///
/// Word boundaries are identified using the Unicode default word boundary
/// algorithm by default. To specify a different word boundary algorithm,
/// see the `RegexComponent.wordBoundaryKind(_:)` method.
/// use the `wordBoundaryKind(_:)` method.
///
/// This anchor is equivalent to `\b` in regex syntax.
public static var wordBoundary: Anchor {
Expand All @@ -157,7 +157,7 @@ extension Anchor {
/// The inverse of this anchor, which matches at every position that this
/// anchor does not.
///
/// For the `wordBoundary` and `textSegmentBoundary` anchors, the inverted
/// For the ``wordBoundary`` and ``textSegmentBoundary`` anchors, the inverted
/// version corresponds to `\B` and `\Y`, respectively.
public var inverted: Anchor {
var result = self
Expand Down
9 changes: 9 additions & 0 deletions Sources/RegexBuilder/Builder.swift
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,15 @@

@_spi(RegexBuilder) import _StringProcessing

/// A custom parameter attribute that constructs regular expressions from
/// closures.
///
/// You typically see `RegexComponentBuilder` as a parameter attribute for
/// `Regex`- or `RegexComponent`-producing closure parameters, allowing those
/// closures to combine multiple regular expression components. Type
/// initializers and string algorithm methods in the RegexBuilder framework
/// include a builder closure parameter, so that you can use regular expression
/// components together.
@available(SwiftStdlib 5.7, *)
@resultBuilder
public enum RegexComponentBuilder {
Expand Down
102 changes: 94 additions & 8 deletions Sources/RegexBuilder/CharacterClass.swift
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@
@_implementationOnly import _RegexParser
@_spi(RegexBuilder) import _StringProcessing

/// A class of characters that match in a regex.
///
/// A character class can represent individual characters, a group of
/// characters, the set of character that match some set of criteria, or
/// a set algebraic combination of all of the above.
@available(SwiftStdlib 5.7, *)
public struct CharacterClass {
internal var ccc: DSLTree.CustomCharacterClass
Expand Down Expand Up @@ -42,6 +47,20 @@ extension CharacterClass: RegexComponent {

@available(SwiftStdlib 5.7, *)
extension CharacterClass {
/// A character class that matches any character that does not match this
/// character class.
///
/// For example, you can use the `inverted` property to create a character
/// class that excludes a specific group of characters:
///
/// let validCharacters = CharacterClass("a"..."z", .anyOf("-_"))
/// let invalidCharacters = validCharacters.inverted
///
/// let username = "user123"
/// if username.contains(invalidCharacters) {
/// print("Invalid username: '\(username)'")
/// }
/// // Prints "Invalid username: 'user123'"
public var inverted: CharacterClass {
if let inv = builtin?.inverted {
return CharacterClass(builtin: inv)
Expand All @@ -53,26 +72,50 @@ extension CharacterClass {

@available(SwiftStdlib 5.7, *)
extension RegexComponent where Self == CharacterClass {
/// A character class that matches any element.
///
/// This character class is unaffected by the `dotMatchesNewlines()` method.
/// To match any character that isn't a newline, see
/// ``anyNonNewline``.
///
/// This character class is equivalent to the regex syntax "dot"
/// metacharacter in single-line mode: `(?s:.)`.
public static var any: CharacterClass {
.init(DSLTree.CustomCharacterClass(members: [.atom(.any)]))
}

/// A character class that matches any element that isn't a newline.
///
/// This character class is unaffected by the `dotMatchesNewlines()` method.
/// To match any character, including newlines, see ``any``.
///
/// This character class is equivalent to the regex syntax "dot"
/// metacharacter with single-line mode disabled: `(?-s:.)`.
public static var anyNonNewline: CharacterClass {
.init(DSLTree.CustomCharacterClass(members: [.atom(.anyNonNewline)]))
}

/// A character class that matches any single `Character`, or extended
/// grapheme cluster, regardless of the current semantic level.
///
/// This character class is equivalent to `\X` in regex syntax.
public static var anyGraphemeCluster: CharacterClass {
.init(builtin: .anyGrapheme)
}

public static var whitespace: CharacterClass {
.init(builtin: .whitespace)
}

/// A character class that matches any digit.
///
/// This character class is equivalent to `\d` in regex syntax.
public static var digit: CharacterClass {
.init(builtin: .digit)
}

/// A character class that matches any hexadecimal digit.
///
/// `hexDigit` matches the ASCII characters `0` through `9`, and upper- or
/// lowercase `a` through `f`. The corresponding characters in the "Halfwidth
/// and Fullwidth Forms" Unicode block are not matched by this character
/// class.
public static var hexDigit: CharacterClass {
.init(DSLTree.CustomCharacterClass(members: [
.range(.char("A"), .char("F")),
Expand All @@ -81,27 +124,56 @@ extension RegexComponent where Self == CharacterClass {
]))
}

/// A character class that matches any element that is a "word character".
///
/// This character class is equivalent to `\w` in regex syntax.
public static var word: CharacterClass {
.init(builtin: .word)
}

/// A character class that matches any element that is classified as
/// whitespace.
///
/// This character class is equivalent to `\s` in regex syntax.
public static var whitespace: CharacterClass {
.init(builtin: .whitespace)
}

/// A character class that matches any element that is classified as
/// horizontal whitespace.
///
/// This character class is equivalent to `\h` in regex syntax.
public static var horizontalWhitespace: CharacterClass {
.init(builtin: .horizontalWhitespace)
}

/// A character class that matches any newline sequence.
///
/// This character class is equivalent to `\R` or `\n` in regex syntax.
public static var newlineSequence: CharacterClass {
.init(builtin: .newlineSequence)
}

/// A character class that matches any element that is classified as
/// vertical whitespace.
///
/// This character class is equivalent to `\v` in regex syntax.
public static var verticalWhitespace: CharacterClass {
.init(builtin: .verticalWhitespace)
}

public static var word: CharacterClass {
.init(builtin: .word)
}
}

@available(SwiftStdlib 5.7, *)
extension RegexComponent where Self == CharacterClass {
/// Returns a character class that matches any character in the given string
/// or sequence.
///
/// Calling this method with a group of characters is equivalent to listing
/// those characters in a custom character class in regex syntax. For example,
/// the two regexes in this example are equivalent:
///
/// let regex1 = /[abcd]+/
/// let regex2 = OneOrMore(.anyOf("abcd"))
public static func anyOf<S: Sequence>(_ s: S) -> CharacterClass
where S.Element == Character
{
Expand All @@ -111,6 +183,9 @@ extension RegexComponent where Self == CharacterClass {

/// Returns a character class that matches any Unicode scalar in the given
/// sequence.
///
/// Calling this method with a group of Unicode scalars is equivalent to
/// listing them in a custom character class in regex syntax.
public static func anyOf<S: Sequence>(_ s: S) -> CharacterClass
where S.Element == UnicodeScalar
{
Expand All @@ -122,6 +197,11 @@ extension RegexComponent where Self == CharacterClass {
// Unicode properties
@available(SwiftStdlib 5.7, *)
extension CharacterClass {
/// Returns a character class that matches any element with the given Unicode
/// general category.
///
/// For example, when passed `.uppercaseLetter`, this method is equivalent to
/// `/\p{Uppercase_Letter}/` or `/\p{Lu}/`.
public static func generalCategory(_ category: Unicode.GeneralCategory) -> CharacterClass {
return CharacterClass(.generalCategory(category))
}
Expand All @@ -148,6 +228,7 @@ public func ...(lhs: UnicodeScalar, rhs: UnicodeScalar) -> CharacterClass {

@available(SwiftStdlib 5.7, *)
extension RegexComponent where Self == CharacterClass {
/// Creates a character class that combines the given classes in a union.
public init(_ first: CharacterClass, _ rest: CharacterClass...) {
if rest.isEmpty {
self.init(first.ccc)
Expand All @@ -161,24 +242,29 @@ extension RegexComponent where Self == CharacterClass {

@available(SwiftStdlib 5.7, *)
extension CharacterClass {
/// Returns a character class from the union of this class and the given class.
public func union(_ other: CharacterClass) -> CharacterClass {
CharacterClass(.init(members: [
.custom(self.ccc),
.custom(other.ccc)]))
}

/// Returns a character class from the intersection of this class and the given class.
public func intersection(_ other: CharacterClass) -> CharacterClass {
CharacterClass(.init(members: [
.intersection(self.ccc, other.ccc)
]))
}

/// Returns a character class by subtracting the given class from this class.
public func subtracting(_ other: CharacterClass) -> CharacterClass {
CharacterClass(.init(members: [
.subtraction(self.ccc, other.ccc)
]))
}

/// Returns a character class matching elements in one or the other, but not both,
/// of this class and the given class.
public func symmetricDifference(_ other: CharacterClass) -> CharacterClass {
CharacterClass(.init(members: [
.symmetricDifference(self.ccc, other.ccc)
Expand Down
Loading