swiftlang · ahoppen · Feb 14, 2023 · Feb 12, 2023 · Feb 12, 2023 · Feb 12, 2023
@@ -212,7 +212,7 @@ extension Parser {
   mutating func parseAttribute() -> RawAttributeListSyntax.Element {
     if self.at(.poundIfKeyword) {
       return .ifConfigDecl(
-        self.parsePoundIfDirective { parser -> RawAttributeListSyntax.Element in
+        self.parsePoundIfDirective { (parser, _) -> RawAttributeListSyntax.Element in
           return parser.parseAttribute()
         } syntax: { parser, attributes in
           return .attributes(RawAttributeListSyntax(elements: attributes, arena: parser.arena))
@@ -607,7 +607,7 @@ extension Parser {
   mutating func parseObjectiveCSelector() -> RawObjCSelectorSyntax {
     var elements = [RawObjCSelectorPieceSyntax]()
     var loopProgress = LoopProgressCondition()
-    while !self.at(.eof, .rightParen) && loopProgress.evaluate(currentToken) {
+    while loopProgress.evaluate(currentToken) {
       // Empty selector piece.
       if let colon = self.consume(if: .colon) {
         elements.append(
@@ -618,10 +618,8 @@ extension Parser {
           )
         )
         continue
-      }
-
-      if self.at(.identifier) || self.currentToken.isLexerClassifiedKeyword {
-        let name = self.consumeAnyToken()
+      } else if self.at(.identifier, .wildcard) || self.currentToken.isLexerClassifiedKeyword {
+        let name = self.consumeAnyToken(remapping: .identifier)
 
         // If we hit a ')' we may have a zero-argument selector.
         if self.at(.rightParen) && elements.isEmpty {
@@ -644,6 +642,8 @@ extension Parser {
             arena: self.arena
           )
         )
+      } else {
+        break
       }
     }
     return RawObjCSelectorSyntax(elements: elements, arena: self.arena)

@@ -176,7 +176,7 @@ extension Parser {
         // parsed when we're parsing the attributes.
         break
       }
-      let directive = self.parsePoundIfDirective { parser in
+      let directive = self.parsePoundIfDirective { (parser, _) in
         let parsedDecl = parser.parseDeclaration()
         let semicolon = parser.consume(if: .semicolon)
         return RawMemberDeclListItemSyntax(
@@ -1581,6 +1581,7 @@ extension Parser {
     var attributes: RawAttributeListSyntax?
     var modifier: RawDeclModifierSyntax?
     var kind: AccessorKind
+    var unexpectedBeforeToken: RawUnexpectedNodesSyntax?
     var token: RawTokenSyntax
   }
 
@@ -1591,7 +1592,7 @@ extension Parser {
     var look = self.lookahead()
     let _ = look.consumeAttributeList()
     let hasModifier = look.consume(if: .keyword(.mutating), .keyword(.nonmutating), .keyword(.__consuming)) != nil
-    guard let (kind, handle) = look.at(anyIn: AccessorKind.self) ?? forcedKind else {
+    guard let (kind, _) = look.at(anyIn: AccessorKind.self) ?? forcedKind else {
       return nil
     }
 
@@ -1612,11 +1613,12 @@ extension Parser {
       modifier = nil
     }
 
-    let introducer = self.eat(handle)
+    let (unexpectedBeforeIntroducer, introducer) = self.expect(kind.spec)
     return AccessorIntroducer(
       attributes: attrs,
       modifier: modifier,
       kind: kind,
+      unexpectedBeforeToken: unexpectedBeforeIntroducer,
       token: introducer
     )
   }
@@ -1673,6 +1675,7 @@ extension Parser {
     return RawAccessorDeclSyntax(
       attributes: introducer.attributes,
       modifier: introducer.modifier,
+      introducer.unexpectedBeforeToken,
       accessorKind: introducer.token,
       parameter: parameter,
       effectSpecifiers: effectSpecifiers,

@@ -13,22 +13,19 @@
 @_spi(RawSyntax) import SwiftSyntax
 
 extension Parser {
-  private enum IfConfigClauseStartKeyword: TokenSpecSet {
-    case poundIfKeyword
+  private enum IfConfigContinuationClauseStartKeyword: TokenSpecSet {
     case poundElseifKeyword
     case poundElseKeyword
 
     var spec: TokenSpec {
       switch self {
-      case .poundIfKeyword: return .poundIfKeyword
       case .poundElseifKeyword: return .poundElseifKeyword
       case .poundElseKeyword: return .poundElseKeyword
       }
     }
 
     init?(lexeme: Lexer.Lexeme) {
       switch PrepareForKeywordMatch(lexeme) {
-      case TokenSpec(.poundIfKeyword): self = .poundIfKeyword
       case TokenSpec(.poundElseifKeyword): self = .poundElseifKeyword
       case TokenSpec(.poundElseKeyword): self = .poundElseKeyword
       default: return nil
@@ -89,7 +86,7 @@ extension Parser {
   ///             into a syntax collection.
   @_spi(RawSyntax)
   public mutating func parsePoundIfDirective<Element: RawSyntaxNodeProtocol>(
-    _ parseElement: (inout Parser) -> Element?,
+    _ parseElement: (_ parser: inout Parser, _ isFirstElement: Bool) -> Element?,
     addSemicolonIfNeeded: (_ lastElement: Element, _ newItemAtStartOfLine: Bool, _ parser: inout Parser) -> Element? = { _, _, _ in nil },
     syntax: (inout Parser, [Element]) -> RawIfConfigClauseSyntax.Elements?
   ) -> RawIfConfigDeclSyntax {
@@ -106,7 +103,7 @@ extension Parser {
     do {
       var firstIteration = true
       var loopProgress = LoopProgressCondition()
-      while let poundIfHandle = firstIteration ? self.canRecoverTo(.poundIfKeyword) : self.canRecoverTo(anyIn: IfConfigClauseStartKeyword.self)?.handle,
+      while let poundIfHandle = firstIteration ? self.canRecoverTo(.poundIfKeyword) : self.canRecoverTo(anyIn: IfConfigContinuationClauseStartKeyword.self)?.handle,
         loopProgress.evaluate(self.currentToken)
       {
         let (unexpectedBeforePoundIf, poundIf) = self.eat(poundIfHandle)
@@ -127,7 +124,7 @@ extension Parser {
           var elementsProgress = LoopProgressCondition()
           while !self.at(.eof) && !self.at(.poundElseKeyword, .poundElseifKeyword, .poundEndifKeyword) && elementsProgress.evaluate(currentToken) {
             let newItemAtStartOfLine = self.currentToken.isAtStartOfLine
-            guard let element = parseElement(&self), !element.isEmpty else {
+            guard let element = parseElement(&self, elements.isEmpty), !element.isEmpty else {
               break
             }
             if let lastElement = elements.last, let fixedUpLastItem = addSemicolonIfNeeded(lastElement, newItemAtStartOfLine, &self) {

@@ -659,7 +659,10 @@ extension Parser {
   ) -> RawExprSyntax {
     assert(self.at(.poundIfKeyword))
 
-    let config = self.parsePoundIfDirective { parser -> RawExprSyntax? in
+    let config = self.parsePoundIfDirective { (parser, isFirstElement) -> RawExprSyntax? in
+      if !isFirstElement {
+        return nil
+      }
       let head: RawExprSyntax
       if parser.at(.period) {
         head = parser.parseDottedExpressionSuffix(nil)
@@ -2236,7 +2239,7 @@ extension Parser {
         elements.append(
           .ifConfigDecl(
             self.parsePoundIfDirective(
-              { $0.parseSwitchCases(allowStandaloneStmtRecovery: allowStandaloneStmtRecovery) },
+              { (parser, _) in parser.parseSwitchCases(allowStandaloneStmtRecovery: allowStandaloneStmtRecovery) },
               syntax: { parser, cases in
                 guard cases.count == 1, let firstCase = cases.first else {
                   assert(cases.isEmpty)
@@ -2250,6 +2253,9 @@ extension Parser {
       } else if allowStandaloneStmtRecovery && (self.atStartOfExpression() || self.atStartOfStatement() || self.atStartOfDeclaration()) {
         // Synthesize a label for the stamenent or declaration that isn't coverd by a case right now.
         let statements = parseSwitchCaseBody()
+        if statements.isEmpty {
+          break
+        }
         elements.append(
           .switchCase(
             RawSwitchCaseSyntax(

@@ -695,16 +695,6 @@ extension Lexer.Cursor {
   mutating func advanceValidatingUTF8Character() -> Unicode.Scalar? {
     return Unicode.Scalar.lexing(advance: { self.advance() }, peek: { self.peek(at: 0) })
   }
-
-  /// Rever the lexer by `offset` bytes. This should only be used by `resetForSplit`.
-  /// This must not back up by more bytes than the last token because that would
-  /// require us to also update `previousTokenKind`, which we don't do in this
-  /// function
-  mutating func backUp(by offset: Int) {
-    assert(!self.isAtStartOfFile)
-    self.previous = self.input.baseAddress!.advanced(by: -(offset + 1)).pointee
-    self.input = UnsafeBufferPointer(start: self.input.baseAddress!.advanced(by: -offset), count: self.input.count + offset)
-  }
 }
 
 // MARK: - Boundness of operators

@@ -61,18 +61,14 @@ extension Lexer {
       return self.nextToken
     }
 
+    /// Reset the lexeme sequence to the state we were in when lexing `splitToken`
+    /// but after we consumed `consumedPrefix` bytes from `splitToken`.
     /// - Warning: Do not add more usages of this function.
-    mutating func resetForSplit(of bytes: Int) -> Lexer.Lexeme {
-      guard bytes > 0 else {
-        return self.advance()
+    mutating func resetForSplit(splitToken: Lexeme, consumedPrefix: Int) -> Lexer.Lexeme {
+      self.cursor = splitToken.cursor
+      for _ in 0..<consumedPrefix {
+        _ = self.cursor.advance()
       }
-
-      // FIXME: This is kind of ridiculous. We shouldn't have to look backwards
-      // in the token stream. We should be fusing together runs of operator and
-      // identifier characters in the parser, not splitting and backing up
-      // again in the lexer.
-      let backUpLength = self.nextToken.byteLength + bytes
-      self.cursor.backUp(by: backUpLength)
       self.nextToken = self.cursor.nextToken(sourceBufferStart: self.sourceBufferStart, stateAllocator: lexerStateAllocator)
       return self.advance()
     }

@@ -122,12 +122,10 @@ extension Parser.Lookahead {
     }
     assert(tokenText.hasPrefix(prefix))
 
-    // See also: Parser.consumePrefix(_:as:)
-    let offset =
-      (self.currentToken.trailingTriviaByteLength
-        + tokenText.count
-        - prefix.count)
-    self.currentToken = self.lexemes.resetForSplit(of: offset)
+    self.currentToken = self.lexemes.resetForSplit(
+      splitToken: self.currentToken,
+      consumedPrefix: self.currentToken.leadingTriviaByteLength + prefix.count
+    )
   }
 }
 

@@ -555,36 +555,10 @@ extension Parser {
 
     self.adjustNestingLevel(for: tokenKind)
 
-    // ... or a multi-character token with the first N characters being the one
-    // that we want to consume as a separate token.
-    // Careful: We need to reset the lexer to a point just before it saw the
-    // current token, plus the split point. That means we need to take trailing
-    // trivia into account for the current token, but we only need to take the
-    // number of UTF-8 bytes of the text of the split - no trivia necessary.
-    //
-    // <TOKEN<trailing trivia>> <NEXT TOKEN> ... -> <T> <OKEN<trailing trivia>> <NEXT TOKEN>
-    //
-    // The current calculation is:
-    //
-    //        <<leading trivia>TOKEN<trailing trivia>>
-    //                                        CURSOR ^
-    // + trailing trivia length
-    //
-    //        <<leading trivia>TOKEN<trailing trivia>>
-    //                       CURSOR ^
-    // + content length
-    //
-    //        <<leading trivia>TOKEN<trailing trivia>>
-    //                  CURSOR ^
-    // - split point length
-    //
-    //        <<leading trivia>TOKEN<trailing trivia>>
-    //                   CURSOR ^
-    let offset =
-      (self.currentToken.trailingTriviaByteLength
-        + tokenText.count
-        - prefix.count)
-    self.currentToken = self.lexemes.resetForSplit(of: offset)
+    self.currentToken = self.lexemes.resetForSplit(
+      splitToken: self.currentToken,
+      consumedPrefix: self.currentToken.leadingTriviaByteLength + prefix.count
+    )
     return tok
   }
 }

@@ -309,7 +309,6 @@ extension Parser {
     // -------------------------------------------------------------------------
     // Precondition
 
-    assert(closeQuote.leadingTriviaByteLength == 0, "Closing quote produced by the lexer should not have leading trivia because we would drop it during post-processing")
     assert(
       allSegments.allSatisfy {
         if case .stringSegment(let segment) = $0 {
@@ -349,7 +348,7 @@ extension Parser {
       middleSegments: &middleSegments
     )
 
-    if !closeDelimiterOnNewLine {
+    if !closeDelimiterOnNewLine || closeQuote.leadingTriviaByteLength != 0 {
       unexpectedBeforeCloseQuote = [closeQuote]
       closeQuote = RawTokenSyntax(missing: closeQuote.tokenKind, leadingTriviaPieces: [.newlines(1)], arena: self.arena)
 

@@ -706,7 +706,6 @@ enum PrimaryExpressionStart: TokenSpecSet {
 enum ExpressionStart: TokenSpecSet {
   case awaitTryMove(AwaitTryMove)
   case expressionPrefixOperator(ExpressionPrefixOperator)
-  case matchingPatternStart(MatchingPatternStart)
   case primaryExpressionStart(PrimaryExpressionStart)
   case ifOrSwitch(IfOrSwitch)
 
@@ -715,8 +714,6 @@ enum ExpressionStart: TokenSpecSet {
       self = .awaitTryMove(subset)
     } else if let subset = ExpressionPrefixOperator(lexeme: lexeme) {
       self = .expressionPrefixOperator(subset)
-    } else if let subset = MatchingPatternStart(lexeme: lexeme) {
-      self = .matchingPatternStart(subset)
     } else if let subset = PrimaryExpressionStart(lexeme: lexeme) {
       self = .primaryExpressionStart(subset)
     } else if let subset = IfOrSwitch(lexeme: lexeme) {
@@ -729,7 +726,6 @@ enum ExpressionStart: TokenSpecSet {
   static var allCases: [ExpressionStart] {
     return AwaitTryMove.allCases.map(Self.awaitTryMove)
       + ExpressionPrefixOperator.allCases.map(Self.expressionPrefixOperator)
-      + MatchingPatternStart.allCases.map(Self.matchingPatternStart)
       + PrimaryExpressionStart.allCases.map(Self.primaryExpressionStart)
       + IfOrSwitch.allCases.map(Self.ifOrSwitch)
   }
@@ -738,7 +734,6 @@ enum ExpressionStart: TokenSpecSet {
     switch self {
     case .awaitTryMove(let underlyingKind): return underlyingKind.spec
     case .expressionPrefixOperator(let underlyingKind): return underlyingKind.spec
-    case .matchingPatternStart(let underlyingKind): return underlyingKind.spec
     case .primaryExpressionStart(let underlyingKind): return underlyingKind.spec
     case .ifOrSwitch(let underlyingKind): return underlyingKind.spec
     }

@@ -228,8 +228,8 @@ extension Parser {
     if self.at(.poundIfKeyword) && !self.withLookahead({ $0.consumeIfConfigOfAttributes() }) {
       // If config of attributes is parsed as part of declaration parsing as it
       // doesn't constitute its own code block item.
-      let directive = self.parsePoundIfDirective {
-        $0.parseCodeBlockItem()
+      let directive = self.parsePoundIfDirective { (parser, _) in
+        parser.parseCodeBlockItem()
       } addSemicolonIfNeeded: { lastElement, newItemAtStartOfLine, parser in
         if lastElement.semicolon == nil && !newItemAtStartOfLine {
           return RawCodeBlockItemSyntax(

@@ -90,6 +90,13 @@ final class AttributeTests: XCTestCase {
       func f(_: Int, _: Int, _: Int, _: Int, _: Int) { }
       """
     )
+
+    AssertParse(
+      """
+      @objc(_:)
+      func f(_: Int)
+      """
+    )
   }
 
   func testRethrowsAttribute() {

@@ -69,6 +69,22 @@ final class DirectiveTests: XCTestCase {
     )
   }
 
+  func testPostfixIfConfigExpressionContainsPoundIf() {
+    AssertParse(
+      """
+      b
+      #if true
+      .a
+      1️⃣#if true
+      #endif
+      #endif
+      """,
+      diagnostics: [
+        DiagnosticSpec(message: "unexpected code in conditional compilation block")
+      ]
+    )
+  }
+
   func testSourceLocation() {
     AssertParse(
       """