Skip to content

Update Data.Text.Utf16.Rope to Data.Text.Utf16.Rope.Mixed #542

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Jan 3, 2024
79 changes: 25 additions & 54 deletions lsp/src/Language/LSP/VFS.hs
Original file line number Diff line number Diff line change
Expand Up @@ -81,10 +81,11 @@ import Data.Row
import Data.Text (Text)
import Data.Text qualified as T
import Data.Text.IO qualified as T
import Data.Text.Lines as Char (Position (..))
import Data.Text.Prettyprint.Doc hiding (line)
import Data.Text.Rope qualified as URope
import Data.Text.Utf16.Rope (Rope)
import Data.Text.Utf16.Rope qualified as Rope
import Data.Text.Utf16.Lines as Utf16 (Position (..))
import Data.Text.Utf16.Rope.Mixed (Rope)
import Data.Text.Utf16.Rope.Mixed qualified as Rope
import Language.LSP.Protocol.Lens qualified as J
import Language.LSP.Protocol.Message qualified as J
import Language.LSP.Protocol.Types qualified as J
Expand Down Expand Up @@ -115,7 +116,7 @@ data VFS = VFS
deriving (Show)

data VfsLog
= SplitInsideCodePoint Rope.Position Rope
= SplitInsideCodePoint Utf16.Position Rope
| URINotFound J.NormalizedUri
| Opening J.NormalizedUri
| Closing J.NormalizedUri
Expand Down Expand Up @@ -350,7 +351,7 @@ applyChange :: (Monad m) => LogAction m (WithSeverity VfsLog) -> Rope -> J.TextD
applyChange logger str (J.TextDocumentContentChangeEvent (J.InL e))
| J.Range (J.Position sl sc) (J.Position fl fc) <- e .! #range
, txt <- e .! #text =
changeChars logger str (Rope.Position (fromIntegral sl) (fromIntegral sc)) (Rope.Position (fromIntegral fl) (fromIntegral fc)) txt
changeChars logger str (Utf16.Position (fromIntegral sl) (fromIntegral sc)) (Utf16.Position (fromIntegral fl) (fromIntegral fc)) txt
applyChange _ _ (J.TextDocumentContentChangeEvent (J.InR e)) =
pure $ Rope.fromText $ e .! #text

Expand All @@ -360,11 +361,11 @@ applyChange _ _ (J.TextDocumentContentChangeEvent (J.InR e)) =
the given range with the new text. If the given positions lie within
a code point then this does nothing (returns the original 'Rope') and logs.
-}
changeChars :: (Monad m) => LogAction m (WithSeverity VfsLog) -> Rope -> Rope.Position -> Rope.Position -> Text -> m Rope
changeChars :: (Monad m) => LogAction m (WithSeverity VfsLog) -> Rope -> Utf16.Position -> Utf16.Position -> Text -> m Rope
changeChars logger str start finish new = do
case Rope.splitAtPosition finish str of
case Rope.utf16SplitAtPosition finish str of
Nothing -> logger <& SplitInsideCodePoint finish str `WithSeverity` Warning >> pure str
Just (before, after) -> case Rope.splitAtPosition start before of
Just (before, after) -> case Rope.utf16SplitAtPosition start before of
Nothing -> logger <& SplitInsideCodePoint start before `WithSeverity` Warning >> pure str
Just (before', _) -> pure $ mconcat [before', Rope.fromText new, after]

Expand Down Expand Up @@ -402,11 +403,14 @@ In particular, we use the good asymptotics of 'Rope' to our advantage:
- We then split the line at the given position, and check how long the prefix is, which takes
linear time in the length of the (single) line.

We also may need to convert the line back and forth between ropes with different indexing. Again
this is linear time in the length of the line.

So the overall process is logarithmic in the number of lines, and linear in the length of the specific
line. Which is okay-ish, so long as we don't have very long lines.

We are not able to use the `Rope.splitAtPosition`
Because when column index out of range or when the column indexing at the newline char.
The prefix result would wrap over the line and having the same result (nextLineNum, 0).
We would not be able to distinguish them. When the first case should return `Nothing`,
second case should return a `Just (CurrentLineNum, columnNumberConverted)`.
-}

{- | Extracts a specific line from a 'Rope.Rope'.
Expand All @@ -415,41 +419,12 @@ line. Which is okay-ish, so long as we don't have very long lines.
extractLine :: Rope.Rope -> Word -> Maybe Rope.Rope
extractLine rope l = do
-- Check for the line being out of bounds
let lastLine = Rope.posLine $ Rope.lengthAsPosition rope
let lastLine = Utf16.posLine $ Rope.utf16LengthAsPosition rope
guard $ l <= lastLine

let (_, suffix) = Rope.splitAtLine l rope
(prefix, _) = Rope.splitAtLine 1 suffix
pure prefix

{- | Translate a code-point offset into a code-unit offset.
Linear in the length of the rope.
-}
codePointOffsetToCodeUnitOffset :: URope.Rope -> Word -> Maybe Word
codePointOffsetToCodeUnitOffset rope offset = do
-- Check for the position being out of bounds
guard $ offset <= URope.length rope
-- Split at the given position in *code points*
let (prefix, _) = URope.splitAt offset rope
-- Convert the prefix to a rope using *code units*
utf16Prefix = Rope.fromText $ URope.toText prefix
-- Get the length of the prefix in *code units*
pure $ Rope.length utf16Prefix

{- | Translate a UTF-16 code-unit offset into a code-point offset.
Linear in the length of the rope.
-}
codeUnitOffsetToCodePointOffset :: Rope.Rope -> Word -> Maybe Word
codeUnitOffsetToCodePointOffset rope offset = do
-- Check for the position being out of bounds
guard $ offset <= Rope.length rope
-- Split at the given position in *code units*
(prefix, _) <- Rope.splitAt offset rope
-- Convert the prefix to a rope using *code points*
let utfPrefix = URope.fromText $ Rope.toText prefix
-- Get the length of the prefix in *code points*
pure $ URope.length utfPrefix

{- | Given a virtual file, translate a 'CodePointPosition' in that file into a 'J.Position' in that file.

Will return 'Nothing' if the requested position is out of bounds of the document.
Expand All @@ -458,15 +433,12 @@ codeUnitOffsetToCodePointOffset rope offset = do
the position.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, is this definitely still the same? For some reason I thought it might be faster 🤔 But maybe it's just that the code is simpler...

Copy link
Collaborator Author

@soulomoon soulomoon Jan 2, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It just remove the fromText and toText part, saving the conversion, other logic still the same.
since the mixed version have two indices, one for codePoint, one for utf16.
Should be a little faster, since saving the reindexing.

-}
codePointPositionToPosition :: VirtualFile -> CodePointPosition -> Maybe J.Position
codePointPositionToPosition vFile (CodePointPosition l cpc) = do
codePointPositionToPosition vFile (CodePointPosition l c) = do
-- See Note [Converting between code points and code units]
let text = _file_text vFile
utf16Line <- extractLine text (fromIntegral l)
-- Convert the line a rope using *code points*
let utfLine = URope.fromText $ Rope.toText utf16Line

cuc <- codePointOffsetToCodeUnitOffset utfLine (fromIntegral cpc)
pure $ J.Position l (fromIntegral cuc)
lineRope <- extractLine text $ fromIntegral l
guard $ c <= fromIntegral (Rope.charLength lineRope)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should include a comment about why we're not using Rope.splitAtPosition. The difference is quite subtle!

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

splitAtPosition just wrapped over the line. when column index out of range or when the column indexing at the newline char :(

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The first out of range problem is easy to deal with,
But the second case when the column indexing at the newline char and still wrapped over the line, it becomes the pain.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

comment added

return $ J.Position l (fromIntegral $ Rope.utf16Length $ fst $ Rope.charSplitAt (fromIntegral c) lineRope)

{- | Given a virtual file, translate a 'CodePointRange' in that file into a 'J.Range' in that file.

Expand All @@ -487,13 +459,12 @@ codePointRangeToRange vFile (CodePointRange b e) =
the position.
-}
positionToCodePointPosition :: VirtualFile -> J.Position -> Maybe CodePointPosition
positionToCodePointPosition vFile (J.Position l cuc) = do
positionToCodePointPosition vFile (J.Position l c) = do
-- See Note [Converting between code points and code units]
let text = _file_text vFile
utf16Line <- extractLine text (fromIntegral l)

cpc <- codeUnitOffsetToCodePointOffset utf16Line (fromIntegral cuc)
pure $ CodePointPosition l (fromIntegral cpc)
lineRope <- extractLine text $ fromIntegral l
guard $ c <= fromIntegral (Rope.utf16Length lineRope)
CodePointPosition l . fromIntegral . Rope.charLength . fst <$> Rope.utf16SplitAt (fromIntegral c) lineRope

{- | Given a virtual file, translate a 'J.Range' in that file into a 'CodePointRange' in that file.

Expand Down Expand Up @@ -535,7 +506,7 @@ getCompletionPrefix pos@(J.Position l c) (VirtualFile _ _ ropetext) =
lastMaybe xs = Just $ last xs

let curRope = fst $ Rope.splitAtLine 1 $ snd $ Rope.splitAtLine (fromIntegral l) ropetext
beforePos <- Rope.toText . fst <$> Rope.splitAt (fromIntegral c) curRope
beforePos <- Rope.toText . fst <$> Rope.utf16SplitAt (fromIntegral c) curRope
curWord <-
if
| T.null beforePos -> Just ""
Expand Down
2 changes: 1 addition & 1 deletion lsp/test/VspSpec.hs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ module VspSpec where
import Data.Row
import Data.String
import Data.Text qualified as T
import Data.Text.Utf16.Rope qualified as Rope
import Data.Text.Utf16.Rope.Mixed qualified as Rope
import Language.LSP.Protocol.Types qualified as J
import Language.LSP.VFS

Expand Down