haskell · michaelpj · Jan 3, 2024 · Dec 31, 2023 · Jan 1, 2024 · Jan 1, 2024
diff --git a/lsp/src/Language/LSP/VFS.hs b/lsp/src/Language/LSP/VFS.hs
@@ -81,10 +81,11 @@ import Data.Row
 import Data.Text (Text)
 import Data.Text qualified as T
 import Data.Text.IO qualified as T
+import Data.Text.Lines as Char (Position (..))
 import Data.Text.Prettyprint.Doc hiding (line)
-import Data.Text.Rope qualified as URope
-import Data.Text.Utf16.Rope (Rope)
-import Data.Text.Utf16.Rope qualified as Rope
+import Data.Text.Utf16.Lines as Utf16 (Position (..))
+import Data.Text.Utf16.Rope.Mixed (Rope)
+import Data.Text.Utf16.Rope.Mixed qualified as Rope
 import Language.LSP.Protocol.Lens qualified as J
 import Language.LSP.Protocol.Message qualified as J
 import Language.LSP.Protocol.Types qualified as J
@@ -115,7 +116,7 @@ data VFS = VFS
   deriving (Show)
 
 data VfsLog
-  = SplitInsideCodePoint Rope.Position Rope
+  = SplitInsideCodePoint Utf16.Position Rope
   | URINotFound J.NormalizedUri
   | Opening J.NormalizedUri
   | Closing J.NormalizedUri
@@ -350,7 +351,7 @@ applyChange :: (Monad m) => LogAction m (WithSeverity VfsLog) -> Rope -> J.TextD
 applyChange logger str (J.TextDocumentContentChangeEvent (J.InL e))
   | J.Range (J.Position sl sc) (J.Position fl fc) <- e .! #range
   , txt <- e .! #text =
-      changeChars logger str (Rope.Position (fromIntegral sl) (fromIntegral sc)) (Rope.Position (fromIntegral fl) (fromIntegral fc)) txt
+      changeChars logger str (Utf16.Position (fromIntegral sl) (fromIntegral sc)) (Utf16.Position (fromIntegral fl) (fromIntegral fc)) txt
 applyChange _ _ (J.TextDocumentContentChangeEvent (J.InR e)) =
   pure $ Rope.fromText $ e .! #text
 
@@ -360,11 +361,11 @@ applyChange _ _ (J.TextDocumentContentChangeEvent (J.InR e)) =
  the given range with the new text. If the given positions lie within
  a code point then this does nothing (returns the original 'Rope') and logs.
 -}
-changeChars :: (Monad m) => LogAction m (WithSeverity VfsLog) -> Rope -> Rope.Position -> Rope.Position -> Text -> m Rope
+changeChars :: (Monad m) => LogAction m (WithSeverity VfsLog) -> Rope -> Utf16.Position -> Utf16.Position -> Text -> m Rope
 changeChars logger str start finish new = do
-  case Rope.splitAtPosition finish str of
+  case Rope.utf16SplitAtPosition finish str of
     Nothing -> logger <& SplitInsideCodePoint finish str `WithSeverity` Warning >> pure str
-    Just (before, after) -> case Rope.splitAtPosition start before of
+    Just (before, after) -> case Rope.utf16SplitAtPosition start before of
       Nothing -> logger <& SplitInsideCodePoint start before `WithSeverity` Warning >> pure str
       Just (before', _) -> pure $ mconcat [before', Rope.fromText new, after]
 
@@ -402,11 +403,14 @@ In particular, we use the good asymptotics of 'Rope' to our advantage:
 - We then split the line at the given position, and check how long the prefix is, which takes
 linear time in the length of the (single) line.
 
-We also may need to convert the line back and forth between ropes with different indexing. Again
-this is linear time in the length of the line.
-
 So the overall process is logarithmic in the number of lines, and linear in the length of the specific
 line. Which is okay-ish, so long as we don't have very long lines.
+
+We are not able to use the `Rope.splitAtPosition`
+Because when column index out of range or when the column indexing at the newline char.
+The prefix result would wrap over the line and having the same result (nextLineNum, 0).
+We would not be able to distinguish them. When the first case should return `Nothing`,
+second case should return a `Just (CurrentLineNum, columnNumberConverted)`.
 -}
 
 {- | Extracts a specific line from a 'Rope.Rope'.
@@ -415,41 +419,12 @@ line. Which is okay-ish, so long as we don't have very long lines.
 extractLine :: Rope.Rope -> Word -> Maybe Rope.Rope
 extractLine rope l = do
   -- Check for the line being out of bounds
-  let lastLine = Rope.posLine $ Rope.lengthAsPosition rope
+  let lastLine = Utf16.posLine $ Rope.utf16LengthAsPosition rope
   guard $ l <= lastLine
-
   let (_, suffix) = Rope.splitAtLine l rope
       (prefix, _) = Rope.splitAtLine 1 suffix
   pure prefix
 
-{- | Translate a code-point offset into a code-unit offset.
- Linear in the length of the rope.
--}
-codePointOffsetToCodeUnitOffset :: URope.Rope -> Word -> Maybe Word
-codePointOffsetToCodeUnitOffset rope offset = do
-  -- Check for the position being out of bounds
-  guard $ offset <= URope.length rope
-  -- Split at the given position in *code points*
-  let (prefix, _) = URope.splitAt offset rope
-      -- Convert the prefix to a rope using *code units*
-      utf16Prefix = Rope.fromText $ URope.toText prefix
-  -- Get the length of the prefix in *code units*
-  pure $ Rope.length utf16Prefix
-
-{- | Translate a UTF-16 code-unit offset into a code-point offset.
- Linear in the length of the rope.
--}
-codeUnitOffsetToCodePointOffset :: Rope.Rope -> Word -> Maybe Word
-codeUnitOffsetToCodePointOffset rope offset = do
-  -- Check for the position being out of bounds
-  guard $ offset <= Rope.length rope
-  -- Split at the given position in *code units*
-  (prefix, _) <- Rope.splitAt offset rope
-  -- Convert the prefix to a rope using *code points*
-  let utfPrefix = URope.fromText $ Rope.toText prefix
-  -- Get the length of the prefix in *code points*
-  pure $ URope.length utfPrefix
-
 {- | Given a virtual file, translate a 'CodePointPosition' in that file into a 'J.Position' in that file.
 
  Will return 'Nothing' if the requested position is out of bounds of the document.
@@ -458,15 +433,12 @@ codeUnitOffsetToCodePointOffset rope offset = do
  the position.
 -}
 codePointPositionToPosition :: VirtualFile -> CodePointPosition -> Maybe J.Position
-codePointPositionToPosition vFile (CodePointPosition l cpc) = do
+codePointPositionToPosition vFile (CodePointPosition l c) = do
   -- See Note [Converting between code points and code units]
   let text = _file_text vFile
-  utf16Line <- extractLine text (fromIntegral l)
-  -- Convert the line a rope using *code points*
-  let utfLine = URope.fromText $ Rope.toText utf16Line
-
-  cuc <- codePointOffsetToCodeUnitOffset utfLine (fromIntegral cpc)
-  pure $ J.Position l (fromIntegral cuc)
+  lineRope <- extractLine text $ fromIntegral l
+  guard $ c <= fromIntegral (Rope.charLength lineRope)
+  return $ J.Position l (fromIntegral $ Rope.utf16Length $ fst $ Rope.charSplitAt (fromIntegral c) lineRope)
 
 {- | Given a virtual file, translate a 'CodePointRange' in that file into a 'J.Range' in that file.
 
@@ -487,13 +459,12 @@ codePointRangeToRange vFile (CodePointRange b e) =
  the position.
 -}
 positionToCodePointPosition :: VirtualFile -> J.Position -> Maybe CodePointPosition
-positionToCodePointPosition vFile (J.Position l cuc) = do
+positionToCodePointPosition vFile (J.Position l c) = do
   -- See Note [Converting between code points and code units]
   let text = _file_text vFile
-  utf16Line <- extractLine text (fromIntegral l)
-
-  cpc <- codeUnitOffsetToCodePointOffset utf16Line (fromIntegral cuc)
-  pure $ CodePointPosition l (fromIntegral cpc)
+  lineRope <- extractLine text $ fromIntegral l
+  guard $ c <= fromIntegral (Rope.utf16Length lineRope)
+  CodePointPosition l . fromIntegral . Rope.charLength . fst <$> Rope.utf16SplitAt (fromIntegral c) lineRope
 
 {- | Given a virtual file, translate a 'J.Range' in that file into a 'CodePointRange' in that file.
 
@@ -535,7 +506,7 @@ getCompletionPrefix pos@(J.Position l c) (VirtualFile _ _ ropetext) =
         lastMaybe xs = Just $ last xs
 
     let curRope = fst $ Rope.splitAtLine 1 $ snd $ Rope.splitAtLine (fromIntegral l) ropetext
-    beforePos <- Rope.toText . fst <$> Rope.splitAt (fromIntegral c) curRope
+    beforePos <- Rope.toText . fst <$> Rope.utf16SplitAt (fromIntegral c) curRope
     curWord <-
       if
         | T.null beforePos -> Just ""

diff --git a/lsp/test/VspSpec.hs b/lsp/test/VspSpec.hs
@@ -6,7 +6,7 @@ module VspSpec where
 import Data.Row
 import Data.String
 import Data.Text qualified as T
-import Data.Text.Utf16.Rope qualified as Rope
+import Data.Text.Utf16.Rope.Mixed qualified as Rope
 import Language.LSP.Protocol.Types qualified as J
 import Language.LSP.VFS