diff --git a/Sources/FoundationEssentials/URL/URL.swift b/Sources/FoundationEssentials/URL/URL.swift index 294024f40..39ddb6bc0 100644 --- a/Sources/FoundationEssentials/URL/URL.swift +++ b/Sources/FoundationEssentials/URL/URL.swift @@ -763,6 +763,10 @@ public struct URL: Equatable, Sendable, Hashable { internal var _parseInfo: URLParseInfo! private var _baseParseInfo: URLParseInfo? + private static func parse(urlString: String, encodingInvalidCharacters: Bool = true) -> URLParseInfo? { + return Parser.parse(urlString: urlString, encodingInvalidCharacters: encodingInvalidCharacters, compatibility: .allowEmptyScheme) + } + internal init(parseInfo: URLParseInfo, relativeTo url: URL? = nil) { _parseInfo = parseInfo if parseInfo.scheme == nil { @@ -773,26 +777,44 @@ public struct URL: Equatable, Sendable, Hashable { #endif // FOUNDATION_FRAMEWORK } + /// The public initializers don't allow the empty string, and we must maintain that behavior + /// for compatibility. However, there are cases internally where we need to create a URL with + /// an empty string, such as when `.deletingLastPathComponent()` of a single path + /// component. This previously worked since `URL` just wrapped an `NSURL`, which + /// allows the empty string. + internal init?(stringOrEmpty: String, relativeTo url: URL? = nil) { + #if FOUNDATION_FRAMEWORK + guard foundation_swift_url_enabled() else { + guard let inner = NSURL(string: stringOrEmpty, relativeTo: url) else { return nil } + _url = URL._converted(from: inner) + return + } + #endif // FOUNDATION_FRAMEWORK + guard let parseInfo = URL.parse(urlString: stringOrEmpty) else { + return nil + } + _parseInfo = parseInfo + if parseInfo.scheme == nil { + _baseParseInfo = url?.absoluteURL._parseInfo + } + #if FOUNDATION_FRAMEWORK + _url = URL._nsURL(from: _parseInfo, baseParseInfo: _baseParseInfo) + #endif // FOUNDATION_FRAMEWORK + } + /// Initialize with string. /// /// Returns `nil` if a `URL` cannot be formed with the string (for example, if the string contains characters that are illegal in a URL, or is an empty string). public init?(string: __shared String) { + guard !string.isEmpty else { return nil } #if FOUNDATION_FRAMEWORK guard foundation_swift_url_enabled() else { - guard !string.isEmpty, let inner = NSURL(string: string) else { return nil } + guard let inner = NSURL(string: string) else { return nil } _url = URL._converted(from: inner) return } - // Linked-on-or-after check for apps which pass an empty string. - // The new URL(string:) implementations allow the empty string - // as input since an empty path is valid and can be resolved - // against a base URL. This is shown in the RFC 3986 examples: - // https://datatracker.ietf.org/doc/html/rfc3986#section-5.4.1 - if Self.compatibility1 && string.isEmpty { - return nil - } #endif // FOUNDATION_FRAMEWORK - guard let parseInfo = Parser.parse(urlString: string, encodingInvalidCharacters: true) else { + guard let parseInfo = URL.parse(urlString: string) else { return nil } _parseInfo = parseInfo @@ -805,14 +827,15 @@ public struct URL: Equatable, Sendable, Hashable { /// /// Returns `nil` if a `URL` cannot be formed with the string (for example, if the string contains characters that are illegal in a URL, or is an empty string). public init?(string: __shared String, relativeTo url: __shared URL?) { + guard !string.isEmpty else { return nil } #if FOUNDATION_FRAMEWORK guard foundation_swift_url_enabled() else { - guard !string.isEmpty, let inner = NSURL(string: string, relativeTo: url) else { return nil } + guard let inner = NSURL(string: string, relativeTo: url) else { return nil } _url = URL._converted(from: inner) return } #endif // FOUNDATION_FRAMEWORK - guard let parseInfo = Parser.parse(urlString: string, encodingInvalidCharacters: true) else { + guard let parseInfo = URL.parse(urlString: string) else { return nil } _parseInfo = parseInfo @@ -831,14 +854,15 @@ public struct URL: Equatable, Sendable, Hashable { /// If the URL string is still invalid after encoding, `nil` is returned. @available(macOS 14.0, iOS 17.0, watchOS 10.0, tvOS 17.0, *) public init?(string: __shared String, encodingInvalidCharacters: Bool) { + guard !string.isEmpty else { return nil } #if FOUNDATION_FRAMEWORK guard foundation_swift_url_enabled() else { - guard !string.isEmpty, let inner = NSURL(string: string, encodingInvalidCharacters: encodingInvalidCharacters) else { return nil } + guard let inner = NSURL(string: string, encodingInvalidCharacters: encodingInvalidCharacters) else { return nil } _url = URL._converted(from: inner) return } #endif // FOUNDATION_FRAMEWORK - guard let parseInfo = Parser.parse(urlString: string, encodingInvalidCharacters: encodingInvalidCharacters) else { + guard let parseInfo = URL.parse(urlString: string, encodingInvalidCharacters: encodingInvalidCharacters) else { return nil } _parseInfo = parseInfo @@ -865,7 +889,7 @@ public struct URL: Equatable, Sendable, Hashable { } #endif let directoryHint: DirectoryHint = isDirectory ? .isDirectory : .notDirectory - self.init(filePath: path, directoryHint: directoryHint, relativeTo: base) + self.init(filePath: path.isEmpty ? "." : path, directoryHint: directoryHint, relativeTo: base) } /// Initializes a newly created file URL referencing the local file or directory at path, relative to a base URL. @@ -884,7 +908,7 @@ public struct URL: Equatable, Sendable, Hashable { return } #endif - self.init(filePath: path, directoryHint: .checkFileSystem, relativeTo: base) + self.init(filePath: path.isEmpty ? "." : path, directoryHint: .checkFileSystem, relativeTo: base) } /// Initializes a newly created file URL referencing the local file or directory at path. @@ -905,7 +929,7 @@ public struct URL: Equatable, Sendable, Hashable { } #endif let directoryHint: DirectoryHint = isDirectory ? .isDirectory : .notDirectory - self.init(filePath: path, directoryHint: directoryHint) + self.init(filePath: path.isEmpty ? "." : path, directoryHint: directoryHint) } /// Initializes a newly created file URL referencing the local file or directory at path. @@ -924,7 +948,7 @@ public struct URL: Equatable, Sendable, Hashable { return } #endif - self.init(filePath: path, directoryHint: .checkFileSystem) + self.init(filePath: path.isEmpty ? "." : path, directoryHint: .checkFileSystem) } // NSURL(fileURLWithPath:) can return nil incorrectly for some malformed paths @@ -948,24 +972,24 @@ public struct URL: Equatable, Sendable, Hashable { /// /// If the data representation is not a legal URL string as ASCII bytes, the URL object may not behave as expected. If the URL cannot be formed then this will return nil. @available(macOS 10.11, iOS 9.0, watchOS 2.0, tvOS 9.0, *) - public init?(dataRepresentation: __shared Data, relativeTo url: __shared URL?, isAbsolute: Bool = false) { + public init?(dataRepresentation: __shared Data, relativeTo base: __shared URL?, isAbsolute: Bool = false) { guard !dataRepresentation.isEmpty else { return nil } #if FOUNDATION_FRAMEWORK guard foundation_swift_url_enabled() else { if isAbsolute { - _url = URL._converted(from: NSURL(absoluteURLWithDataRepresentation: dataRepresentation, relativeTo: url)) + _url = URL._converted(from: NSURL(absoluteURLWithDataRepresentation: dataRepresentation, relativeTo: base)) } else { - _url = URL._converted(from: NSURL(dataRepresentation: dataRepresentation, relativeTo: url)) + _url = URL._converted(from: NSURL(dataRepresentation: dataRepresentation, relativeTo: base)) } return } #endif var url: URL? if let string = String(data: dataRepresentation, encoding: .utf8) { - url = URL(string: string, relativeTo: url) + url = URL(stringOrEmpty: string, relativeTo: base) } if url == nil, let string = String(data: dataRepresentation, encoding: .isoLatin1) { - url = URL(string: string, relativeTo: url) + url = URL(stringOrEmpty: string, relativeTo: base) } guard let url else { return nil @@ -990,7 +1014,7 @@ public struct URL: Equatable, Sendable, Hashable { return } #endif - guard let parseInfo = Parser.parse(urlString: _url.relativeString, encodingInvalidCharacters: true) else { + guard let parseInfo = URL.parse(urlString: _url.relativeString) else { return nil } _parseInfo = parseInfo @@ -1011,7 +1035,7 @@ public struct URL: Equatable, Sendable, Hashable { } #endif bookmarkDataIsStale = stale.boolValue - let parseInfo = Parser.parse(urlString: _url.relativeString, encodingInvalidCharacters: true)! + let parseInfo = URL.parse(urlString: _url.relativeString)! _parseInfo = parseInfo if parseInfo.scheme == nil { _baseParseInfo = url?.absoluteURL._parseInfo @@ -1089,7 +1113,9 @@ public struct URL: Equatable, Sendable, Hashable { } if let baseScheme = _baseParseInfo.scheme { - result.scheme = String(baseScheme) + // Scheme might be empty, which URL allows for compatibility, + // but URLComponents does not, so we force it internally. + result.forceScheme(String(baseScheme)) } if hasAuthority { @@ -1236,6 +1262,14 @@ public struct URL: Equatable, Sendable, Hashable { return nil } + // According to RFC 3986, a host always exists if there is an authority + // component, it just might be empty. However, the old implementation + // of URL.host() returned nil for URLs like "https:///", and apps rely + // on this behavior, so keep it for bincompat. + if encodedHost.isEmpty, user() == nil, password() == nil, port == nil { + return nil + } + func requestedHost() -> String? { let didPercentEncodeHost = hasAuthority ? _parseInfo.didPercentEncodeHost : _baseParseInfo?.didPercentEncodeHost ?? false if percentEncoded { @@ -1456,7 +1490,7 @@ public struct URL: Equatable, Sendable, Hashable { } #endif if _baseParseInfo != nil { - return absoluteURL.path(percentEncoded: percentEncoded) + return absoluteURL.relativePath(percentEncoded: percentEncoded) } if percentEncoded { return String(_parseInfo.path) @@ -1844,7 +1878,7 @@ public struct URL: Equatable, Sendable, Hashable { var components = URLComponents(parseInfo: _parseInfo) let newPath = components.percentEncodedPath.removingDotSegments components.percentEncodedPath = newPath - return components.url(relativeTo: baseURL)! + return components.url(relativeTo: baseURL) ?? self } /// Standardizes the path of a file URL by removing dot segments. @@ -2060,7 +2094,7 @@ public struct URL: Equatable, Sendable, Hashable { return } #endif - if let parseInfo = Parser.parse(urlString: _url.relativeString, encodingInvalidCharacters: true) { + if let parseInfo = URL.parse(urlString: _url.relativeString) { _parseInfo = parseInfo } else { // Go to compatibility jail (allow `URL` as a dummy string container for `NSURL` instead of crashing) @@ -2218,7 +2252,7 @@ extension URL { #if !NO_FILESYSTEM baseURL = baseURL ?? .currentDirectoryOrNil() #endif - self.init(string: "", relativeTo: baseURL)! + self.init(string: "./", relativeTo: baseURL)! return } @@ -2481,6 +2515,14 @@ extension URL { #endif // NO_FILESYSTEM } #endif // FOUNDATION_FRAMEWORK + + // The old .appending(component:) implementation did not actually percent-encode + // "/" for file URLs as the documentation suggests. Many apps accidentally use + // .appending(component: "path/with/slashes") instead of using .appending(path:), + // so changing this behavior would cause breakage. + if isFileURL { + return appending(path: component, directoryHint: directoryHint, encodingSlashes: false) + } return appending(path: component, directoryHint: directoryHint, encodingSlashes: true) } diff --git a/Sources/FoundationEssentials/URL/URLComponents.swift b/Sources/FoundationEssentials/URL/URLComponents.swift index f5ce53ae7..43bd493be 100644 --- a/Sources/FoundationEssentials/URL/URLComponents.swift +++ b/Sources/FoundationEssentials/URL/URLComponents.swift @@ -142,10 +142,12 @@ public struct URLComponents: Hashable, Equatable, Sendable { return nil } - mutating func setScheme(_ newValue: String?) throws { + mutating func setScheme(_ newValue: String?, force: Bool = false) throws { reset(.scheme) - guard Parser.validate(newValue, component: .scheme) else { - throw InvalidComponentError.scheme + if !force { + guard Parser.validate(newValue, component: .scheme) else { + throw InvalidComponentError.scheme + } } _scheme = newValue if encodedHost != nil { @@ -364,6 +366,26 @@ public struct URLComponents: Hashable, Equatable, Sendable { return "" } + private var percentEncodedPathNoColon: String { + let p = percentEncodedPath + guard p.utf8.first(where: { $0 == ._colon || $0 == ._slash }) == ._colon else { + return p + } + if p.utf8.first == ._colon { + // In the rare case that an app relies on URL allowing an empty + // scheme and passes its URL string directly to URLComponents + // to modify other components, we need to return the path without + // encoding the colons. + return p + } + let firstSlash = p.utf8.firstIndex(of: ._slash) ?? p.endIndex + let colonEncodedSegment = Array(p[.. URLParseInfo? + static func parse(urlString: String, encodingInvalidCharacters: Bool, compatibility: URLParserCompatibility) -> URLParseInfo? + static func validate(_ string: (some StringProtocol)?, component: URLComponents.Component) -> Bool static func validate(_ string: (some StringProtocol)?, component: URLComponents.Component, percentEncodingAllowed: Bool) -> Bool @@ -401,15 +408,18 @@ internal struct RFC3986Parser: URLParserProtocol { } /// Fast path used during initial URL buffer parsing. - private static func validate(schemeBuffer: Slice>) -> Bool { - guard let first = schemeBuffer.first, - first >= UInt8(ascii: "A"), + private static func validate(schemeBuffer: Slice>, compatibility: URLParserCompatibility = .init()) -> Bool { + guard let first = schemeBuffer.first else { + return compatibility.contains(.allowEmptyScheme) + } + guard first >= UInt8(ascii: "A"), validate(buffer: schemeBuffer, component: .scheme, percentEncodingAllowed: false) else { return false } return true } + /// Only used by URLComponents, don't need to consider `URLParserCompatibility.allowEmptyScheme` private static func validate(scheme: some StringProtocol) -> Bool { // A valid scheme must start with an ALPHA character. // If first >= "A" and is in schemeAllowed, then first is ALPHA. @@ -593,10 +603,14 @@ internal struct RFC3986Parser: URLParserProtocol { /// Parses a URL string into `URLParseInfo`, with the option to add (or skip) encoding of invalid characters. /// If `encodingInvalidCharacters` is `true`, this function handles encoding of invalid components. static func parse(urlString: String, encodingInvalidCharacters: Bool) -> URLParseInfo? { + return parse(urlString: urlString, encodingInvalidCharacters: encodingInvalidCharacters, compatibility: .init()) + } + + static func parse(urlString: String, encodingInvalidCharacters: Bool, compatibility: URLParserCompatibility) -> URLParseInfo? { #if os(Windows) let urlString = urlString.replacing(UInt8(ascii: "\\"), with: UInt8(ascii: "/")) #endif - guard let parseInfo = parse(urlString: urlString) else { + guard let parseInfo = parse(urlString: urlString, compatibility: compatibility) else { return nil } @@ -690,10 +704,10 @@ internal struct RFC3986Parser: URLParserProtocol { /// Parses a URL string into its component parts and stores these ranges in a `URLParseInfo`. /// This function calls `parse(buffer:)`, then converts the buffer ranges into string ranges. - private static func parse(urlString: String) -> URLParseInfo? { + private static func parse(urlString: String, compatibility: URLParserCompatibility = .init()) -> URLParseInfo? { var string = urlString let bufferParseInfo = string.withUTF8 { - parse(buffer: $0) + parse(buffer: $0, compatibility: compatibility) } guard let bufferParseInfo else { return nil @@ -726,7 +740,7 @@ internal struct RFC3986Parser: URLParserProtocol { /// Parses a URL string into its component parts and stores these ranges in a `URLBufferParseInfo`. /// This function only parses based on delimiters and does not do any encoding. - private static func parse(buffer: UnsafeBufferPointer) -> URLBufferParseInfo? { + private static func parse(buffer: UnsafeBufferPointer, compatibility: URLParserCompatibility = .init()) -> URLBufferParseInfo? { // A URI is either: // 1. scheme ":" hier-part [ "?" query ] [ "#" fragment ] // 2. relative-ref @@ -746,12 +760,12 @@ internal struct RFC3986Parser: URLParserProtocol { let v = buffer[currentIndex] if v == UInt8(ascii: ":") { // Scheme must be at least 1 character, otherwise this is a relative-ref. - if currentIndex != buffer.startIndex { + if currentIndex != buffer.startIndex || compatibility.contains(.allowEmptyScheme) { parseInfo.schemeRange = buffer.startIndex.. 1) { // The trailing slash is stripped in .path for file system compatibility @@ -607,11 +607,13 @@ final class URLTests : XCTestCase { XCTAssertEqual(appended.absoluteString, "file:///var/mobile/relative/no:slash") XCTAssertEqual(appended.relativePath, "relative/no:slash") - // `appending(component:)` should explicitly treat `component` as a single - // path component, meaning "/" should be encoded to "%2F" before appending + // .appending(component:) should explicitly treat slashComponent as a single + // path component, meaning "/" should be encoded to "%2F" before appending. + // However, the old behavior didn't do this for file URLs, so we maintain the + // old behavior to prevent breakage. appended = url.appending(component: slashComponent, directoryHint: .notDirectory) - checkBehavior(appended.absoluteString, new: "file:///var/mobile/relative/%2Fwith:slash", old: "file:///var/mobile/relative/with:slash") - checkBehavior(appended.relativePath, new: "relative/%2Fwith:slash", old: "relative/with:slash") + XCTAssertEqual(appended.absoluteString, "file:///var/mobile/relative/with:slash") + XCTAssertEqual(appended.relativePath, "relative/with:slash") appended = url.appendingPathComponent(component, isDirectory: false) XCTAssertEqual(appended.absoluteString, "file:///var/mobile/relative/no:slash") @@ -687,7 +689,7 @@ final class URLTests : XCTestCase { checkBehavior(relative.path, new: "/", old: "/..") relative = URL(filePath: "", relativeTo: absolute) - checkBehavior(relative.relativePath, new: "", old: ".") + XCTAssertEqual(relative.relativePath, ".") XCTAssertTrue(relative.hasDirectoryPath) XCTAssertEqual(relative.path, "/absolute") @@ -964,6 +966,21 @@ final class URLTests : XCTestCase { XCTAssertEqual(schemeOnly.absoluteString, "scheme:foo") } + func testURLEmptySchemeCompatibility() throws { + var url = try XCTUnwrap(URL(string: ":memory:")) + XCTAssertEqual(url.scheme, "") + + let base = try XCTUnwrap(URL(string: "://home")) + XCTAssertEqual(base.host(), "home") + + url = try XCTUnwrap(URL(string: "/path", relativeTo: base)) + XCTAssertEqual(url.scheme, "") + XCTAssertEqual(url.host(), "home") + XCTAssertEqual(url.path, "/path") + XCTAssertEqual(url.absoluteString, "://home/path") + XCTAssertEqual(url.absoluteURL.scheme, "") + } + func testURLComponentsPercentEncodedUnencodedProperties() throws { var comp = URLComponents() @@ -1345,6 +1362,29 @@ final class URLTests : XCTestCase { comp = try XCTUnwrap(URLComponents(string: legalURLString)) XCTAssertEqual(comp.string, legalURLString) XCTAssertEqual(comp.percentEncodedPath, colonFirstPath) + + // Colons should be percent-encoded by URLComponents.string if + // they could be misinterpreted as a scheme separator. + + comp = URLComponents() + comp.percentEncodedPath = "not%20a%20scheme:" + XCTAssertEqual(comp.string, "not%20a%20scheme%3A") + + // These would fail if we did not percent-encode the colon. + // .string should always produce a valid URL string, or nil. + + XCTAssertNotNil(URL(string: comp.string!)) + XCTAssertNotNil(URLComponents(string: comp.string!)) + + // In rare cases, an app might rely on URL allowing an empty scheme, + // but then take that string and pass it to URLComponents to modify + // other components of the URL. We shouldn't percent-encode the colon + // in these cases. + + let url = try XCTUnwrap(URL(string: "://host/path")) + comp = try XCTUnwrap(URLComponents(string: url.absoluteString)) + comp.query = "key=value" + XCTAssertEqual(comp.string, "://host/path?key=value") } func testURLComponentsInvalidPaths() { @@ -1425,6 +1465,12 @@ final class URLTests : XCTestCase { XCTAssertEqual(comp.path, "/my\u{0}path") } + func testURLStandardizedEmptyString() { + let url = URL(string: "../../../")! + let standardized = url.standardized + XCTAssertTrue(standardized.path().isEmpty) + } + #if FOUNDATION_FRAMEWORK func testURLComponentsBridging() { var nsURLComponents = NSURLComponents(