Skip to content

Commit 2033b64

Browse files
committed
Move ShortText to Distribution.Utils.ShortText to avoid cycles
This moves `String`/UTF8 conversion helpers to Distribution.Utils.String
1 parent 0c99981 commit 2033b64

File tree

8 files changed

+222
-180
lines changed

8 files changed

+222
-180
lines changed

Cabal/Cabal.cabal

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -410,6 +410,7 @@ library
410410
Distribution.Types.ComponentRequestedSpec
411411
Distribution.Types.TargetInfo
412412
Distribution.Utils.NubList
413+
Distribution.Utils.ShortText
413414
Distribution.Verbosity
414415
Distribution.Version
415416
Language.Haskell.Extension
@@ -422,6 +423,7 @@ library
422423
Distribution.Compat.Prelude
423424
Distribution.GetOpt
424425
Distribution.Lex
426+
Distribution.Utils.String
425427
Distribution.Simple.GHC.Internal
426428
Distribution.Simple.GHC.IPI642
427429
Distribution.Simple.GHC.IPIConvert
@@ -479,6 +481,7 @@ test-suite unit-tests
479481
UnitTests.Distribution.Simple.Utils
480482
UnitTests.Distribution.System
481483
UnitTests.Distribution.Utils.NubList
484+
UnitTests.Distribution.Utils.ShortText
482485
UnitTests.Distribution.Version
483486
main-is: UnitTests.hs
484487
build-depends:

Cabal/Distribution/Simple/Utils.hs

Lines changed: 4 additions & 160 deletions
Original file line numberDiff line numberDiff line change
@@ -152,18 +152,13 @@ module Distribution.Simple.Utils (
152152
-- * FilePath stuff
153153
isAbsoluteOnAnyPlatform,
154154
isRelativeOnAnyPlatform,
155-
156-
-- * 'ShortText' type
157-
ShortText,
158-
toShortText,
159-
fromShortText,
160155
) where
161156

162157
import Prelude ()
163158
import Distribution.Compat.Prelude
164-
import Data.String (IsString(..))
165159

166160
import Distribution.Text
161+
import Distribution.Utils.String
167162
import Distribution.Package
168163
import Distribution.ModuleName as ModuleName
169164
import Distribution.System
@@ -188,7 +183,6 @@ import Distribution.Verbosity
188183
import qualified Paths_Cabal (version)
189184
#endif
190185

191-
import Data.Word (Word8)
192186
import Control.Concurrent.MVar
193187
( newEmptyMVar, putMVar, takeMVar )
194188
import Data.Bits
@@ -205,16 +199,6 @@ import qualified Data.Set as Set
205199

206200
import qualified Data.ByteString as SBS
207201

208-
#if defined(MIN_VERSION_bytestring)
209-
# if MIN_VERSION_bytestring(0,10,4)
210-
# define HAVE_SHORTBYTESTRING 1
211-
# endif
212-
#endif
213-
214-
#if HAVE_SHORTBYTESTRING
215-
import qualified Data.ByteString.Short as BS.Short
216-
#endif
217-
218202
import System.Directory
219203
( Permissions(executable), getDirectoryContents, getPermissions
220204
, doesDirectoryExist, doesFileExist, removeFile, findExecutable
@@ -1390,55 +1374,10 @@ fromUTF8 (c:cs)
13901374
replacementChar = '\xfffd'
13911375

13921376
fromUTF8BS :: SBS.ByteString -> String
1393-
fromUTF8BS = fromUTF8BSImpl . SBS.unpack
1377+
fromUTF8BS = decodeStringUtf8 . SBS.unpack
13941378

13951379
fromUTF8LBS :: BS.ByteString -> String
1396-
fromUTF8LBS = fromUTF8BSImpl . BS.unpack
1397-
1398-
fromUTF8BSImpl :: [Word8] -> String
1399-
fromUTF8BSImpl = go
1400-
where
1401-
go :: [Word8] -> String
1402-
go [] = []
1403-
go (c : cs)
1404-
| c <= 0x7F = chr (fromIntegral c) : go cs
1405-
| c <= 0xBF = replacementChar : go cs
1406-
| c <= 0xDF = twoBytes c cs
1407-
| c <= 0xEF = moreBytes 3 0x800 cs (fromIntegral $ c .&. 0xF)
1408-
| c <= 0xF7 = moreBytes 4 0x10000 cs (fromIntegral $ c .&. 0x7)
1409-
| c <= 0xFB = moreBytes 5 0x200000 cs (fromIntegral $ c .&. 0x3)
1410-
| c <= 0xFD = moreBytes 6 0x4000000 cs (fromIntegral $ c .&. 0x1)
1411-
| otherwise = replacementChar : go cs
1412-
1413-
twoBytes :: Word8 -> [Word8] -> String
1414-
twoBytes c0 (c1:cs')
1415-
| c1 .&. 0xC0 == 0x80
1416-
= let d = ((c0 .&. 0x1F) `shiftL` 6)
1417-
.|. (c1 .&. 0x3F)
1418-
in if d >= 0x80
1419-
then chr (fromIntegral d) : go cs'
1420-
else replacementChar : go cs'
1421-
twoBytes _ cs' = replacementChar : go cs'
1422-
1423-
moreBytes :: Int -> Int -> [Word8] -> Int -> [Char]
1424-
moreBytes 1 overlong cs' acc
1425-
| overlong <= acc && acc <= 0x10FFFF
1426-
&& (acc < 0xD800 || 0xDFFF < acc)
1427-
&& (acc < 0xFFFE || 0xFFFF < acc)
1428-
= chr acc : go cs'
1429-
1430-
| otherwise
1431-
= replacementChar : go cs'
1432-
1433-
moreBytes byteCount overlong (cn:cs') acc
1434-
| cn .&. 0xC0 == 0x80
1435-
= moreBytes (byteCount-1) overlong cs'
1436-
((acc `shiftL` 6) .|. fromIntegral cn .&. 0x3F)
1437-
1438-
moreBytes _ _ cs' _
1439-
= replacementChar : go cs'
1440-
1441-
replacementChar = '\xfffd'
1380+
fromUTF8LBS = decodeStringUtf8 . BS.unpack
14421381

14431382
toUTF8 :: String -> String
14441383
toUTF8 [] = []
@@ -1459,26 +1398,6 @@ toUTF8 (c:cs)
14591398
: toUTF8 cs
14601399
where w = ord c
14611400

1462-
-- | Variant of 'toUTF8' operating on 'Word8's directly
1463-
toUTF8BSImpl :: String -> [Word8]
1464-
toUTF8BSImpl [] = []
1465-
toUTF8BSImpl (c:cs)
1466-
| c <= '\x07F' = w
1467-
: toUTF8BSImpl cs
1468-
| c <= '\x7FF' = (0xC0 .|. (w `shiftR` 6))
1469-
: (0x80 .|. (w .&. 0x3F))
1470-
: toUTF8BSImpl cs
1471-
| c <= '\xFFFF'= (0xE0 .|. (w `shiftR` 12))
1472-
: (0x80 .|. ((w `shiftR` 6) .&. 0x3F))
1473-
: (0x80 .|. (w .&. 0x3F))
1474-
: toUTF8BSImpl cs
1475-
| otherwise = (0xf0 .|. (w `shiftR` 18))
1476-
: (0x80 .|. ((w `shiftR` 12) .&. 0x3F))
1477-
: (0x80 .|. ((w `shiftR` 6) .&. 0x3F))
1478-
: (0x80 .|. (w .&. 0x3F))
1479-
: toUTF8BSImpl cs
1480-
where w = fromIntegral (ord c) :: Word8
1481-
14821401
-- | Whether BOM is at the beginning of the input
14831402
startsWithBOM :: String -> Bool
14841403
startsWithBOM ('\xFEFF':_) = True
@@ -1519,7 +1438,7 @@ withUTF8FileContents name action =
15191438
-- Uses 'writeFileAtomic', so provides the same guarantees.
15201439
--
15211440
writeUTF8File :: FilePath -> String -> NoCallStackIO ()
1522-
writeUTF8File path = writeFileAtomic path . BS.Char8.pack . toUTF8
1441+
writeUTF8File path = writeFileAtomic path . BS.pack . encodeStringUtf8
15231442

15241443
-- | Fix different systems silly line ending conventions
15251444
normaliseLineEndings :: String -> String
@@ -1664,78 +1583,3 @@ isAbsoluteOnAnyPlatform _ = False
16641583
-- | @isRelativeOnAnyPlatform = not . 'isAbsoluteOnAnyPlatform'@
16651584
isRelativeOnAnyPlatform :: FilePath -> Bool
16661585
isRelativeOnAnyPlatform = not . isAbsoluteOnAnyPlatform
1667-
1668-
-- ------------------------------------------------------------
1669-
-- * 'ShortText' type
1670-
-- ------------------------------------------------------------
1671-
1672-
-- TODO: if we start using this internally for more opaque types in
1673-
-- Cabal then we will likely need to promote it to it's own module in
1674-
-- Distribution.* to avoid cycles, or just to maintain the sanity of
1675-
-- the Distribution.* vs Distribution.Simple.* distinction.
1676-
1677-
-- | Construct 'ShortText' from 'String'
1678-
toShortText :: String -> ShortText
1679-
1680-
-- | Convert 'ShortText' to 'String'
1681-
fromShortText :: ShortText -> String
1682-
1683-
-- | Compact representation of short 'Strings'
1684-
--
1685-
-- The data is stored internally as UTF8 in an
1686-
-- 'BS.Short.ShortByteString' when compiled against @bytestring >=
1687-
-- 0.10.4@, and otherwise the fallback is to use plain old non-compat
1688-
-- '[Char]'.
1689-
--
1690-
-- Note: This type is for internal uses (such as e.g. 'PackageName')
1691-
-- and shall not be exposed in Cabal's API
1692-
--
1693-
-- @since 2.0.0
1694-
#if HAVE_SHORTBYTESTRING
1695-
newtype ShortText = ST { unST :: BS.Short.ShortByteString }
1696-
deriving (Eq,Ord,Generic)
1697-
1698-
# if MIN_VERSION_binary(0,8,1)
1699-
instance Binary ShortText where
1700-
put = put . unST
1701-
get = fmap ST get
1702-
# else
1703-
instance Binary ShortText where
1704-
put = put . BS.Short.fromShort . unST
1705-
get = fmap (ST . BS.Short.toShort) get
1706-
# endif
1707-
1708-
toShortText = ST . BS.Short.pack . toUTF8BSImpl
1709-
1710-
fromShortText = fromUTF8BSImpl . BS.Short.unpack . unST
1711-
#else
1712-
newtype ShortText = ST { unST :: String }
1713-
deriving (Eq,Ord,Generic)
1714-
1715-
instance Binary ShortText where
1716-
put = put . toUTF8BSImpl . unST
1717-
get = fmap (ST . fromUTF8BSImpl) get
1718-
1719-
toShortText = ST
1720-
1721-
fromShortText = unST
1722-
#endif
1723-
1724-
instance NFData ShortText where
1725-
rnf = rnf . unST
1726-
1727-
instance Show ShortText where
1728-
show = show . fromShortText
1729-
1730-
instance Read ShortText where
1731-
readsPrec p = map (first toShortText) . readsPrec p
1732-
1733-
instance Semigroup ShortText where
1734-
ST a <> ST b = ST (mappend a b)
1735-
1736-
instance Monoid ShortText where
1737-
mempty = ST mempty
1738-
mappend = (<>)
1739-
1740-
instance IsString ShortText where
1741-
fromString = toShortText

Cabal/Distribution/Utils/ShortText.hs

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
{-# LANGUAGE CPP #-}
2+
{-# LANGUAGE DeriveDataTypeable #-}
3+
{-# LANGUAGE DeriveGeneric #-}
4+
5+
module Distribution.Utils.ShortText
6+
( -- * 'ShortText' type
7+
ShortText
8+
, toShortText
9+
, fromShortText
10+
11+
-- * internal utilities
12+
, decodeStringUtf8
13+
, encodeStringUtf8
14+
) where
15+
16+
import Prelude ()
17+
import Distribution.Compat.Prelude
18+
import Distribution.Utils.String
19+
20+
import Data.String (IsString(..))
21+
22+
#if defined(MIN_VERSION_bytestring)
23+
# if MIN_VERSION_bytestring(0,10,4)
24+
# define HAVE_SHORTBYTESTRING 1
25+
# endif
26+
#endif
27+
28+
#if HAVE_SHORTBYTESTRING
29+
import qualified Data.ByteString.Short as BS.Short
30+
#endif
31+
32+
-- | Construct 'ShortText' from 'String'
33+
toShortText :: String -> ShortText
34+
35+
-- | Convert 'ShortText' to 'String'
36+
fromShortText :: ShortText -> String
37+
38+
-- | Compact representation of short 'Strings'
39+
--
40+
-- The data is stored internally as UTF8 in an
41+
-- 'BS.Short.ShortByteString' when compiled against @bytestring >=
42+
-- 0.10.4@, and otherwise the fallback is to use plain old non-compat
43+
-- '[Char]'.
44+
--
45+
-- Note: This type is for internal uses (such as e.g. 'PackageName')
46+
-- and shall not be exposed in Cabal's API
47+
--
48+
-- @since 2.0.0
49+
#if HAVE_SHORTBYTESTRING
50+
newtype ShortText = ST { unST :: BS.Short.ShortByteString }
51+
deriving (Eq,Ord,Generic,Data,Typeable)
52+
53+
# if MIN_VERSION_binary(0,8,1)
54+
instance Binary ShortText where
55+
put = put . unST
56+
get = fmap ST get
57+
# else
58+
instance Binary ShortText where
59+
put = put . BS.Short.fromShort . unST
60+
get = fmap (ST . BS.Short.toShort) get
61+
# endif
62+
63+
toShortText = ST . BS.Short.pack . encodeStringUtf8
64+
65+
fromShortText = decodeStringUtf8 . BS.Short.unpack . unST
66+
#else
67+
newtype ShortText = ST { unST :: String }
68+
deriving (Eq,Ord,Generic,Data,Typeable)
69+
70+
instance Binary ShortText where
71+
put = put . encodeStringUtf8 . unST
72+
get = fmap (ST . decodeStringUtf8) get
73+
74+
toShortText = ST
75+
76+
fromShortText = unST
77+
#endif
78+
79+
instance NFData ShortText where
80+
rnf = rnf . unST
81+
82+
instance Show ShortText where
83+
show = show . fromShortText
84+
85+
instance Read ShortText where
86+
readsPrec p = map (first toShortText) . readsPrec p
87+
88+
instance Semigroup ShortText where
89+
ST a <> ST b = ST (mappend a b)
90+
91+
instance Monoid ShortText where
92+
mempty = ST mempty
93+
mappend = (<>)
94+
95+
instance IsString ShortText where
96+
fromString = toShortText

0 commit comments

Comments
 (0)