-
Notifications
You must be signed in to change notification settings - Fork 93
Update Data.Text.Utf16.Rope to Data.Text.Utf16.Rope.Mixed #542
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
baefa46
b8f0908
513c22c
8ffe6d8
deab787
8e88af9
9b12397
40fe213
f7a6314
374041f
ac411ef
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -81,10 +81,11 @@ import Data.Row | |
import Data.Text (Text) | ||
import Data.Text qualified as T | ||
import Data.Text.IO qualified as T | ||
import Data.Text.Lines as Char (Position (..)) | ||
import Data.Text.Prettyprint.Doc hiding (line) | ||
import Data.Text.Rope qualified as URope | ||
import Data.Text.Utf16.Rope (Rope) | ||
import Data.Text.Utf16.Rope qualified as Rope | ||
import Data.Text.Utf16.Lines as Utf16 (Position (..)) | ||
import Data.Text.Utf16.Rope.Mixed (Rope) | ||
import Data.Text.Utf16.Rope.Mixed qualified as Rope | ||
import Language.LSP.Protocol.Lens qualified as J | ||
import Language.LSP.Protocol.Message qualified as J | ||
import Language.LSP.Protocol.Types qualified as J | ||
|
@@ -115,7 +116,7 @@ data VFS = VFS | |
deriving (Show) | ||
|
||
data VfsLog | ||
= SplitInsideCodePoint Rope.Position Rope | ||
= SplitInsideCodePoint Utf16.Position Rope | ||
| URINotFound J.NormalizedUri | ||
| Opening J.NormalizedUri | ||
| Closing J.NormalizedUri | ||
|
@@ -350,7 +351,7 @@ applyChange :: (Monad m) => LogAction m (WithSeverity VfsLog) -> Rope -> J.TextD | |
applyChange logger str (J.TextDocumentContentChangeEvent (J.InL e)) | ||
| J.Range (J.Position sl sc) (J.Position fl fc) <- e .! #range | ||
, txt <- e .! #text = | ||
changeChars logger str (Rope.Position (fromIntegral sl) (fromIntegral sc)) (Rope.Position (fromIntegral fl) (fromIntegral fc)) txt | ||
changeChars logger str (Utf16.Position (fromIntegral sl) (fromIntegral sc)) (Utf16.Position (fromIntegral fl) (fromIntegral fc)) txt | ||
applyChange _ _ (J.TextDocumentContentChangeEvent (J.InR e)) = | ||
pure $ Rope.fromText $ e .! #text | ||
|
||
|
@@ -360,11 +361,11 @@ applyChange _ _ (J.TextDocumentContentChangeEvent (J.InR e)) = | |
the given range with the new text. If the given positions lie within | ||
a code point then this does nothing (returns the original 'Rope') and logs. | ||
-} | ||
changeChars :: (Monad m) => LogAction m (WithSeverity VfsLog) -> Rope -> Rope.Position -> Rope.Position -> Text -> m Rope | ||
changeChars :: (Monad m) => LogAction m (WithSeverity VfsLog) -> Rope -> Utf16.Position -> Utf16.Position -> Text -> m Rope | ||
changeChars logger str start finish new = do | ||
case Rope.splitAtPosition finish str of | ||
case Rope.utf16SplitAtPosition finish str of | ||
Nothing -> logger <& SplitInsideCodePoint finish str `WithSeverity` Warning >> pure str | ||
Just (before, after) -> case Rope.splitAtPosition start before of | ||
Just (before, after) -> case Rope.utf16SplitAtPosition start before of | ||
Nothing -> logger <& SplitInsideCodePoint start before `WithSeverity` Warning >> pure str | ||
Just (before', _) -> pure $ mconcat [before', Rope.fromText new, after] | ||
|
||
|
@@ -402,11 +403,14 @@ In particular, we use the good asymptotics of 'Rope' to our advantage: | |
- We then split the line at the given position, and check how long the prefix is, which takes | ||
linear time in the length of the (single) line. | ||
|
||
We also may need to convert the line back and forth between ropes with different indexing. Again | ||
this is linear time in the length of the line. | ||
|
||
So the overall process is logarithmic in the number of lines, and linear in the length of the specific | ||
line. Which is okay-ish, so long as we don't have very long lines. | ||
|
||
We are not able to use the `Rope.splitAtPosition` | ||
Because when column index out of range or when the column indexing at the newline char. | ||
The prefix result would wrap over the line and having the same result (nextLineNum, 0). | ||
We would not be able to distinguish them. When the first case should return `Nothing`, | ||
second case should return a `Just (CurrentLineNum, columnNumberConverted)`. | ||
-} | ||
|
||
{- | Extracts a specific line from a 'Rope.Rope'. | ||
|
@@ -415,41 +419,12 @@ line. Which is okay-ish, so long as we don't have very long lines. | |
extractLine :: Rope.Rope -> Word -> Maybe Rope.Rope | ||
extractLine rope l = do | ||
-- Check for the line being out of bounds | ||
let lastLine = Rope.posLine $ Rope.lengthAsPosition rope | ||
let lastLine = Utf16.posLine $ Rope.utf16LengthAsPosition rope | ||
guard $ l <= lastLine | ||
|
||
let (_, suffix) = Rope.splitAtLine l rope | ||
(prefix, _) = Rope.splitAtLine 1 suffix | ||
pure prefix | ||
|
||
{- | Translate a code-point offset into a code-unit offset. | ||
Linear in the length of the rope. | ||
-} | ||
codePointOffsetToCodeUnitOffset :: URope.Rope -> Word -> Maybe Word | ||
codePointOffsetToCodeUnitOffset rope offset = do | ||
-- Check for the position being out of bounds | ||
guard $ offset <= URope.length rope | ||
-- Split at the given position in *code points* | ||
let (prefix, _) = URope.splitAt offset rope | ||
-- Convert the prefix to a rope using *code units* | ||
utf16Prefix = Rope.fromText $ URope.toText prefix | ||
-- Get the length of the prefix in *code units* | ||
pure $ Rope.length utf16Prefix | ||
|
||
{- | Translate a UTF-16 code-unit offset into a code-point offset. | ||
Linear in the length of the rope. | ||
-} | ||
codeUnitOffsetToCodePointOffset :: Rope.Rope -> Word -> Maybe Word | ||
codeUnitOffsetToCodePointOffset rope offset = do | ||
-- Check for the position being out of bounds | ||
guard $ offset <= Rope.length rope | ||
-- Split at the given position in *code units* | ||
(prefix, _) <- Rope.splitAt offset rope | ||
-- Convert the prefix to a rope using *code points* | ||
let utfPrefix = URope.fromText $ Rope.toText prefix | ||
-- Get the length of the prefix in *code points* | ||
pure $ URope.length utfPrefix | ||
|
||
{- | Given a virtual file, translate a 'CodePointPosition' in that file into a 'J.Position' in that file. | ||
|
||
Will return 'Nothing' if the requested position is out of bounds of the document. | ||
|
@@ -458,15 +433,12 @@ codeUnitOffsetToCodePointOffset rope offset = do | |
the position. | ||
-} | ||
codePointPositionToPosition :: VirtualFile -> CodePointPosition -> Maybe J.Position | ||
codePointPositionToPosition vFile (CodePointPosition l cpc) = do | ||
codePointPositionToPosition vFile (CodePointPosition l c) = do | ||
-- See Note [Converting between code points and code units] | ||
let text = _file_text vFile | ||
utf16Line <- extractLine text (fromIntegral l) | ||
-- Convert the line a rope using *code points* | ||
let utfLine = URope.fromText $ Rope.toText utf16Line | ||
|
||
cuc <- codePointOffsetToCodeUnitOffset utfLine (fromIntegral cpc) | ||
pure $ J.Position l (fromIntegral cuc) | ||
lineRope <- extractLine text $ fromIntegral l | ||
guard $ c <= fromIntegral (Rope.charLength lineRope) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we should include a comment about why we're not using There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. splitAtPosition just wrapped over the line. when column index out of range or when the column indexing at the newline char :( There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The first out of range problem is easy to deal with, There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. comment added |
||
return $ J.Position l (fromIntegral $ Rope.utf16Length $ fst $ Rope.charSplitAt (fromIntegral c) lineRope) | ||
|
||
{- | Given a virtual file, translate a 'CodePointRange' in that file into a 'J.Range' in that file. | ||
|
||
|
@@ -487,13 +459,12 @@ codePointRangeToRange vFile (CodePointRange b e) = | |
the position. | ||
soulomoon marked this conversation as resolved.
Show resolved
Hide resolved
|
||
-} | ||
positionToCodePointPosition :: VirtualFile -> J.Position -> Maybe CodePointPosition | ||
positionToCodePointPosition vFile (J.Position l cuc) = do | ||
positionToCodePointPosition vFile (J.Position l c) = do | ||
-- See Note [Converting between code points and code units] | ||
let text = _file_text vFile | ||
utf16Line <- extractLine text (fromIntegral l) | ||
|
||
cpc <- codeUnitOffsetToCodePointOffset utf16Line (fromIntegral cuc) | ||
pure $ CodePointPosition l (fromIntegral cpc) | ||
lineRope <- extractLine text $ fromIntegral l | ||
guard $ c <= fromIntegral (Rope.utf16Length lineRope) | ||
CodePointPosition l . fromIntegral . Rope.charLength . fst <$> Rope.utf16SplitAt (fromIntegral c) lineRope | ||
|
||
{- | Given a virtual file, translate a 'J.Range' in that file into a 'CodePointRange' in that file. | ||
|
||
|
@@ -535,7 +506,7 @@ getCompletionPrefix pos@(J.Position l c) (VirtualFile _ _ ropetext) = | |
lastMaybe xs = Just $ last xs | ||
|
||
let curRope = fst $ Rope.splitAtLine 1 $ snd $ Rope.splitAtLine (fromIntegral l) ropetext | ||
beforePos <- Rope.toText . fst <$> Rope.splitAt (fromIntegral c) curRope | ||
beforePos <- Rope.toText . fst <$> Rope.utf16SplitAt (fromIntegral c) curRope | ||
curWord <- | ||
if | ||
| T.null beforePos -> Just "" | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hmm, is this definitely still the same? For some reason I thought it might be faster 🤔 But maybe it's just that the code is simpler...
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It just remove the fromText and toText part, saving the conversion, other logic still the same.
since the mixed version have two indices, one for codePoint, one for utf16.
Should be a little faster, since saving the reindexing.