haskell · Bodigrim · May 22, 2022 · michaelpj · May 22, 2022 · Bodigrim
diff --git a/cabal.project b/cabal.project
@@ -10,3 +10,7 @@ tests: True
 benchmarks: True
 test-show-details: direct
 haddock-quickjump: True
+
+source-repository-package
+  type: git
+  location: https://github.com/Bodigrim/text-rope
diff --git a/lsp/lsp.cabal b/lsp/lsp.cabal
@@ -54,7 +54,7 @@ library
                      , scientific
                      , temporary
                      , text
-                     , text-rope
+                     , text-rope >= 0.2
                      , transformers >= 0.5.6 && < 0.6
                      , time
                      , unordered-containers
@@ -131,7 +131,7 @@ test-suite unit-test
                      , quickcheck-instances
                      , sorted-list == 0.2.1.*
                      , text
-                     , text-rope
+                     , text-rope >= 0.2
                      , unordered-containers
                      -- For GHCI tests
                      -- , async

diff --git a/lsp/src/Language/LSP/VFS.hs b/lsp/src/Language/LSP/VFS.hs
@@ -81,9 +81,10 @@ import           Data.Ord
 import qualified Data.HashMap.Strict as HashMap
 import qualified Data.Map.Strict as Map
 import           Data.Maybe
-import qualified Data.Text.Rope as URope
-import           Data.Text.Utf16.Rope ( Rope )
-import qualified Data.Text.Utf16.Rope as Rope
+import           Data.Text.Lines as Char ( Position(..) )
+import           Data.Text.Utf16.Lines as Utf16 ( Position(..) )
+import           Data.Text.Utf16.Rope.Mixed ( Rope )
+import qualified Data.Text.Utf16.Rope.Mixed as Rope
 import           Data.Text.Prettyprint.Doc hiding (line)
 import qualified Language.LSP.Types           as J
 import qualified Language.LSP.Types.Lens      as J
@@ -112,7 +113,7 @@ data VFS = VFS { _vfsMap :: !(Map.Map J.NormalizedUri VirtualFile)
                } deriving Show
 
 data VfsLog =
-  SplitInsideCodePoint Rope.Position Rope
+  SplitInsideCodePoint Utf16.Position Rope
   | URINotFound J.NormalizedUri
   | Opening J.NormalizedUri
   | Closing J.NormalizedUri
@@ -342,18 +343,18 @@ applyChange :: (Monad m) => LogAction m (WithSeverity VfsLog) -> Rope -> J.TextD
 applyChange _ _ (J.TextDocumentContentChangeEvent Nothing _ str)
   = pure $ Rope.fromText str
 applyChange logger str (J.TextDocumentContentChangeEvent (Just (J.Range (J.Position sl sc) (J.Position fl fc))) _ txt)
-  = changeChars logger str (Rope.Position (fromIntegral sl) (fromIntegral sc)) (Rope.Position (fromIntegral fl) (fromIntegral fc)) txt
+  = changeChars logger str (Utf16.Position (fromIntegral sl) (fromIntegral sc)) (Utf16.Position (fromIntegral fl) (fromIntegral fc)) txt
 
 -- ---------------------------------------------------------------------
 
 -- | Given a 'Rope', start and end positions, and some new text, replace
 -- the given range with the new text. If the given positions lie within
 -- a code point then this does nothing (returns the original 'Rope') and logs.
-changeChars :: (Monad m) => LogAction m (WithSeverity VfsLog) -> Rope -> Rope.Position -> Rope.Position -> Text -> m Rope
+changeChars :: (Monad m) => LogAction m (WithSeverity VfsLog) -> Rope -> Utf16.Position -> Utf16.Position -> Text -> m Rope
 changeChars logger str start finish new = do
- case Rope.splitAtPosition finish str of
+ case Rope.utf16SplitAtPosition finish str of
    Nothing -> logger <& SplitInsideCodePoint finish str `WithSeverity` Warning >> pure str
-   Just (before, after) ->  case Rope.splitAtPosition start before of
+   Just (before, after) ->  case Rope.utf16SplitAtPosition start before of
      Nothing -> logger <& SplitInsideCodePoint start before `WithSeverity` Warning >> pure str
      Just (before', _) -> pure $ mconcat [before', Rope.fromText new, after]
 
@@ -380,103 +381,42 @@ data CodePointRange =
 makeFieldsNoPrefix ''CodePointPosition
 makeFieldsNoPrefix ''CodePointRange
 
-{- Note [Converting between code points and code units]
-This is inherently a somewhat expensive operation, but we take some care to minimize the cost.
-In particular, we use the good asymptotics of 'Rope' to our advantage:
-- We extract the single line that we are interested in in time logarithmic in the number of lines.
-- We then split the line at the given position, and check how long the prefix is, which takes
-linear time in the length of the (single) line.
-
-We also may need to convert the line back and forth between ropes with different indexing. Again
-this is linear time in the length of the line.
-
-So the overall process is logarithmic in the number of lines, and linear in the length of the specific
-line. Which is okay-ish, so long as we don't have very long lines.
--}
-
--- | Extracts a specific line from a 'Rope.Rope'.
--- Logarithmic in the number of lines.
-extractLine :: Rope.Rope -> Word -> Maybe Rope.Rope
-extractLine rope l = do
-  -- Check for the line being out of bounds
-  let lastLine = Rope.posLine $ Rope.lengthAsPosition rope
-  guard $ l <= lastLine
-
-  let (_, suffix) = Rope.splitAtLine l rope
-      (prefix, _) = Rope.splitAtLine 1 suffix
-  pure prefix
-
--- | Translate a code-point offset into a code-unit offset.
--- Linear in the length of the rope.
-codePointOffsetToCodeUnitOffset :: URope.Rope -> Word -> Maybe Word
-codePointOffsetToCodeUnitOffset rope offset = do
-  -- Check for the position being out of bounds
-  guard $ offset <= URope.length rope
-  -- Split at the given position in *code points*
-  let (prefix, _) = URope.splitAt offset rope
-      -- Convert the prefix to a rope using *code units*
-      utf16Prefix = Rope.fromText $ URope.toText prefix
-      -- Get the length of the prefix in *code units*
-  pure $ Rope.length utf16Prefix
-
--- | Translate a UTF-16 code-unit offset into a code-point offset.
--- Linear in the length of the rope.
-codeUnitOffsetToCodePointOffset :: Rope.Rope -> Word -> Maybe Word
-codeUnitOffsetToCodePointOffset rope offset = do
-  -- Check for the position being out of bounds
-  guard $ offset <= Rope.length rope
-  -- Split at the given position in *code units*
-  (prefix, _) <- Rope.splitAt offset rope
-  -- Convert the prefix to a rope using *code points*
-  let utfPrefix = URope.fromText $ Rope.toText prefix
-      -- Get the length of the prefix in *code points*
-  pure $ URope.length utfPrefix
-
 -- | Given a virtual file, translate a 'CodePointPosition' in that file into a 'J.Position' in that file.
 --
--- Will return 'Nothing' if the requested position is out of bounds of the document.
---
 -- Logarithmic in the number of lines in the document, and linear in the length of the line containing
 -- the position.
-codePointPositionToPosition :: VirtualFile -> CodePointPosition -> Maybe J.Position
-codePointPositionToPosition vFile (CodePointPosition l cpc) = do
-  -- See Note [Converting between code points and code units]
-  let text = _file_text vFile
-  utf16Line <- extractLine text (fromIntegral l)
-  -- Convert the line a rope using *code points*
-  let utfLine = URope.fromText $ Rope.toText utf16Line
-
-  cuc <- codePointOffsetToCodeUnitOffset utfLine (fromIntegral cpc)
-  pure $ J.Position l (fromIntegral cuc)
+codePointPositionToPosition :: VirtualFile -> CodePointPosition -> J.Position
+codePointPositionToPosition vFile (CodePointPosition cpl cpc) =
+  J.Position (fromIntegral cul) (fromIntegral cuc)
+  where
+    text = _file_text vFile
+    (prefix, _) = Rope.charSplitAtPosition (Char.Position (fromIntegral cpl) (fromIntegral cpc)) text
+    Utf16.Position cul cuc = Rope.utf16LengthAsPosition prefix
 
 -- | Given a virtual file, translate a 'CodePointRange' in that file into a 'J.Range' in that file.
 --
--- Will return 'Nothing' if any of the positions are out of bounds of the document.
---
 -- Logarithmic in the number of lines in the document, and linear in the length of the lines containing
 -- the positions.
-codePointRangeToRange :: VirtualFile -> CodePointRange -> Maybe J.Range
+codePointRangeToRange :: VirtualFile -> CodePointRange -> J.Range
 codePointRangeToRange vFile (CodePointRange b e) =
-  J.Range <$> codePointPositionToPosition vFile b <*> codePointPositionToPosition vFile e
+  J.Range (codePointPositionToPosition vFile b) (codePointPositionToPosition vFile e)
 
 -- | Given a virtual file, translate a 'J.Position' in that file into a 'CodePointPosition' in that file.
 --
--- Will return 'Nothing' if the requested position lies inside a code point, or if it is out of bounds of the document.
+-- Will return 'Nothing' if the requested position lies inside a code point.
 --
 -- Logarithmic in the number of lines in the document, and linear in the length of the line containing
 -- the position.
 positionToCodePointPosition :: VirtualFile -> J.Position -> Maybe CodePointPosition
-positionToCodePointPosition vFile (J.Position l cuc) = do
-  -- See Note [Converting between code points and code units]
+positionToCodePointPosition vFile (J.Position cul cuc) = do
   let text = _file_text vFile
-  utf16Line <- extractLine text (fromIntegral l)
-
-  cpc <- codeUnitOffsetToCodePointOffset utf16Line (fromIntegral cuc)
-  pure $ CodePointPosition l (fromIntegral cpc)
+  (prefix, _) <- Rope.utf16SplitAtPosition (Utf16.Position (fromIntegral cul) (fromIntegral cuc)) text
+  let Char.Position cpl cpc = Rope.charLengthAsPosition prefix
+  pure $ CodePointPosition (fromIntegral cpl) (fromIntegral cpc)
 
 -- | Given a virtual file, translate a 'J.Range' in that file into a 'CodePointRange' in that file.
 --
--- Will return 'Nothing' if any of the positions are out of bounds of the document.
+-- Will return 'Nothing' if the requested position lies inside a code point.
 --
 -- Logarithmic in the number of lines in the document, and linear in the length of the lines containing
 -- the positions.
@@ -512,7 +452,7 @@ getCompletionPrefix pos@(J.Position l c) (VirtualFile _ _ ropetext) =
             lastMaybe xs = Just $ last xs
 
         let curRope = fst $ Rope.splitAtLine 1 $ snd $ Rope.splitAtLine (fromIntegral l) ropetext
-        beforePos <- Rope.toText . fst <$> Rope.splitAt (fromIntegral c) curRope
+        beforePos <- Rope.toText . fst <$> Rope.utf16SplitAt (fromIntegral c) curRope
         curWord <-
             if | T.null beforePos -> Just ""
                | T.last beforePos == ' ' -> Just "" -- don't count abc as the curword in 'abc '

diff --git a/lsp/test/VspSpec.hs b/lsp/test/VspSpec.hs
@@ -2,7 +2,7 @@
 module VspSpec where
 
 import           Data.String
-import qualified Data.Text.Utf16.Rope as Rope
+import qualified Data.Text.Utf16.Rope.Mixed as Rope
 import           Language.LSP.VFS
 import qualified Language.LSP.Types as J
 import qualified Data.Text as T
@@ -313,12 +313,12 @@ vspSpec = do
       positionToCodePointPosition vfile (J.Position 1 2) `shouldBe` Nothing
       positionToCodePointPosition vfile (J.Position 1 3) `shouldBe` Just (CodePointPosition 1 2)
       positionToCodePointPosition vfile (J.Position 1 4) `shouldBe` Just (CodePointPosition 1 3)
-      positionToCodePointPosition vfile (J.Position 1 5) `shouldBe` Just (CodePointPosition 1 4)
+      positionToCodePointPosition vfile (J.Position 1 5) `shouldBe` Just (CodePointPosition 2 0)
       -- Greater column than max column
-      positionToCodePointPosition vfile (J.Position 1 6) `shouldBe` Nothing
-      positionToCodePointPosition vfile (J.Position 2 1) `shouldBe` Nothing
+      positionToCodePointPosition vfile (J.Position 1 6) `shouldBe` Just (CodePointPosition 2 0)
+      positionToCodePointPosition vfile (J.Position 2 1) `shouldBe` Just (CodePointPosition 2 0)
       -- Greater line than max line
-      positionToCodePointPosition vfile (J.Position 3 0) `shouldBe` Nothing
+      positionToCodePointPosition vfile (J.Position 3 0) `shouldBe` Just (CodePointPosition 2 0)
 
     it "converts code points to code units" $ do
       let
@@ -328,16 +328,16 @@ vspSpec = do
           ]
         vfile = VirtualFile 0 0 (fromString orig)
 
-      codePointPositionToPosition vfile (CodePointPosition 1 0) `shouldBe` Just (J.Position 1 0)
-      codePointPositionToPosition vfile (CodePointPosition 1 1) `shouldBe` Just (J.Position 1 1)
-      codePointPositionToPosition vfile (CodePointPosition 1 2) `shouldBe` Just (J.Position 1 3)
-      codePointPositionToPosition vfile (CodePointPosition 1 3) `shouldBe` Just (J.Position 1 4)
-      codePointPositionToPosition vfile (CodePointPosition 1 4) `shouldBe` Just (J.Position 1 5)
+      codePointPositionToPosition vfile (CodePointPosition 1 0) `shouldBe` J.Position 1 0
+      codePointPositionToPosition vfile (CodePointPosition 1 1) `shouldBe` J.Position 1 1
+      codePointPositionToPosition vfile (CodePointPosition 1 2) `shouldBe` J.Position 1 3
+      codePointPositionToPosition vfile (CodePointPosition 1 3) `shouldBe` J.Position 1 4
+      codePointPositionToPosition vfile (CodePointPosition 1 4) `shouldBe` J.Position 2 0
       -- Greater column than max column
-      codePointPositionToPosition vfile (CodePointPosition 1 5) `shouldBe` Nothing
-      codePointPositionToPosition vfile (CodePointPosition 2 1) `shouldBe` Nothing
+      codePointPositionToPosition vfile (CodePointPosition 1 5) `shouldBe` J.Position 2 0
+      codePointPositionToPosition vfile (CodePointPosition 2 1) `shouldBe` J.Position 2 0
       -- Greater line than max line
-      codePointPositionToPosition vfile (CodePointPosition 3 0) `shouldBe` Nothing
+      codePointPositionToPosition vfile (CodePointPosition 3 0) `shouldBe` J.Position 2 0
 
     -- ---------------------------------
 

diff --git a/stack.yaml b/stack.yaml
@@ -10,5 +10,5 @@ extra-package-dbs: []
 nix:
   packages: [icu]
 extra-deps:
-- text-rope-0.1
+- text-rope-0.2
 - co-log-core-0.3.1.0