From 96bcbf1d77d5583a899d4d23c23b99644cc20d5c Mon Sep 17 00:00:00 2001 From: Herbert Valerio Riedel Date: Sat, 5 Aug 2017 18:32:39 +0200 Subject: [PATCH 01/10] Make `CheckPackageContentOps` return a lazy ByteString This is the proper type, as 'getFileContents' is supposed to return contents read in binary file mode, and prior to this patch, `[Char]` was abused to return binary data. --- Cabal/Distribution/PackageDescription/Check.hs | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/Cabal/Distribution/PackageDescription/Check.hs b/Cabal/Distribution/PackageDescription/Check.hs index 194d1033b01..219aee92bc3 100644 --- a/Cabal/Distribution/PackageDescription/Check.hs +++ b/Cabal/Distribution/PackageDescription/Check.hs @@ -58,6 +58,7 @@ import Distribution.Text import Language.Haskell.Extension import Control.Monad (mapM) +import qualified Data.ByteString.Lazy as BS import Data.List (group) import qualified System.Directory as System ( doesFileExist, doesDirectoryExist ) @@ -67,7 +68,6 @@ import qualified Text.PrettyPrint as Disp import Text.PrettyPrint ((<+>)) import qualified System.Directory (getDirectoryContents) -import System.IO (openBinaryFile, IOMode(ReadMode), hGetContents) import System.FilePath ( (), (<.>), takeExtension, takeFileName, splitDirectories , splitPath, splitExtension ) @@ -1726,8 +1726,7 @@ checkPackageFiles pkg root = checkPackageContent checkFilesIO pkg doesFileExist = System.doesFileExist . relative, doesDirectoryExist = System.doesDirectoryExist . relative, getDirectoryContents = System.Directory.getDirectoryContents . relative, - getFileContents = \f -> openBinaryFile (relative f) ReadMode - >>= hGetContents + getFileContents = BS.readFile } relative path = root path @@ -1738,7 +1737,7 @@ data CheckPackageContentOps m = CheckPackageContentOps { doesFileExist :: FilePath -> m Bool, doesDirectoryExist :: FilePath -> m Bool, getDirectoryContents :: FilePath -> m [FilePath], - getFileContents :: FilePath -> m String + getFileContents :: FilePath -> m BS.ByteString } -- | Sanity check things that requires looking at files in the package. @@ -1777,12 +1776,16 @@ checkCabalFileBOM ops = do -- --cabal-file is specified. So if you can't find the file, -- just don't bother with this check. Left _ -> return $ Nothing - Right pdfile -> (flip check pc . startsWithBOM . fromUTF8) + Right pdfile -> (flip check pc . BS.isPrefixOf bomUtf8) `liftM` (getFileContents ops pdfile) where pc = PackageDistInexcusable $ pdfile ++ " starts with an Unicode byte order mark (BOM)." ++ " This may cause problems with older cabal versions." + where + bomUtf8 :: BS.ByteString + bomUtf8 = BS.pack [0xef,0xbb,0xbf] -- U+FEFF encoded as UTF8 + checkCabalFileName :: Monad m => CheckPackageContentOps m -> PackageDescription -> m (Maybe PackageCheck) From 0763b05a9e6ab7d2f891b0f6ff4f0ee30d6aae35 Mon Sep 17 00:00:00 2001 From: Herbert Valerio Riedel Date: Sat, 5 Aug 2017 18:55:47 +0200 Subject: [PATCH 02/10] Add `toUTF8LBS` and `toUTF8BS` conversions The API was assymetric, as there was `fromUTF8(L)BS` but not the dual operations. The plan is refactor all occurences of - `fromUTF8 :: String -> String` - `toUTF8` :: String -> String` until `fromUTF8`/`toUTF8` is unused, at which point we can officially deprecate or remove it. --- Cabal/Distribution/Simple/Utils.hs | 2 ++ Cabal/Distribution/Utils/Generic.hs | 20 ++++++++++++++++++-- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/Cabal/Distribution/Simple/Utils.hs b/Cabal/Distribution/Simple/Utils.hs index b8144412bbf..7de7fe7f0e6 100644 --- a/Cabal/Distribution/Simple/Utils.hs +++ b/Cabal/Distribution/Simple/Utils.hs @@ -132,6 +132,8 @@ module Distribution.Simple.Utils ( fromUTF8BS, fromUTF8LBS, toUTF8, + toUTF8BS, + toUTF8LBS, readUTF8File, withUTF8FileContents, writeUTF8File, diff --git a/Cabal/Distribution/Utils/Generic.hs b/Cabal/Distribution/Utils/Generic.hs index d6ef149fa3c..49336a411ce 100644 --- a/Cabal/Distribution/Utils/Generic.hs +++ b/Cabal/Distribution/Utils/Generic.hs @@ -27,20 +27,29 @@ module Distribution.Utils.Generic ( writeFileAtomic, -- * Unicode + + -- ** Conversions fromUTF8, fromUTF8BS, fromUTF8LBS, + toUTF8, + toUTF8BS, + toUTF8LBS, + + -- ** File I/O readUTF8File, withUTF8FileContents, writeUTF8File, - normaliseLineEndings, - -- * BOM + -- ** BOM startsWithBOM, fileHasBOM, ignoreBOM, + -- ** Misc + normaliseLineEndings, + -- * generic utils dropWhileEndLE, takeWhileEndLE, @@ -223,6 +232,13 @@ toUTF8 (c:cs) : toUTF8 cs where w = ord c + +toUTF8BS :: String -> SBS.ByteString +toUTF8BS = SBS.pack . encodeStringUtf8 + +toUTF8LBS :: String -> BS.ByteString +toUTF8LBS = BS.pack . encodeStringUtf8 + -- | Whether BOM is at the beginning of the input startsWithBOM :: String -> Bool startsWithBOM ('\xFEFF':_) = True From 881ba6be07d97d79587f27f711fc1c6050f096df Mon Sep 17 00:00:00 2001 From: Herbert Valerio Riedel Date: Sat, 5 Aug 2017 19:00:39 +0200 Subject: [PATCH 03/10] Refactor some use-sites of `{to,from}UTF8` to use `{to,From}UTF8LBS` --- Cabal/Distribution/Utils/Generic.hs | 10 ++++------ cabal-install/Distribution/Client/IndexUtils.hs | 7 +++---- cabal-install/Distribution/Client/Targets.hs | 5 ++--- 3 files changed, 9 insertions(+), 13 deletions(-) diff --git a/Cabal/Distribution/Utils/Generic.hs b/Cabal/Distribution/Utils/Generic.hs index 49336a411ce..2ef04e28df8 100644 --- a/Cabal/Distribution/Utils/Generic.hs +++ b/Cabal/Distribution/Utils/Generic.hs @@ -94,7 +94,7 @@ import System.Directory import System.FilePath ( (<.>), splitFileName ) import System.IO - ( openBinaryFile, withFile, withBinaryFile + ( withFile, withBinaryFile , openBinaryTempFileWithDefaultPermissions , IOMode(ReadMode), hGetContents, hClose ) import qualified Control.Exception as Exception @@ -246,8 +246,7 @@ startsWithBOM _ = False -- | Check whether a file has Unicode byte order mark (BOM). fileHasBOM :: FilePath -> NoCallStackIO Bool -fileHasBOM f = fmap (startsWithBOM . fromUTF8) - . hGetContents =<< openBinaryFile f ReadMode +fileHasBOM f = (startsWithBOM . fromUTF8LBS) <$> BS.readFile f -- | Ignore a Unicode byte order mark (BOM) at the beginning of the input -- @@ -260,8 +259,7 @@ ignoreBOM string = string -- Reads lazily using ordinary 'readFile'. -- readUTF8File :: FilePath -> NoCallStackIO String -readUTF8File f = fmap (ignoreBOM . fromUTF8) - . hGetContents =<< openBinaryFile f ReadMode +readUTF8File f = (ignoreBOM . fromUTF8LBS) <$> BS.readFile f -- | Reads a UTF8 encoded text file as a Unicode String -- @@ -270,7 +268,7 @@ readUTF8File f = fmap (ignoreBOM . fromUTF8) withUTF8FileContents :: FilePath -> (String -> IO a) -> IO a withUTF8FileContents name action = withBinaryFile name ReadMode - (\hnd -> hGetContents hnd >>= action . ignoreBOM . fromUTF8) + (\hnd -> BS.hGetContents hnd >>= action . ignoreBOM . fromUTF8LBS) -- | Writes a Unicode String as a UTF8 encoded text file. -- diff --git a/cabal-install/Distribution/Client/IndexUtils.hs b/cabal-install/Distribution/Client/IndexUtils.hs index 72856baebce..e0711ef2f90 100644 --- a/cabal-install/Distribution/Client/IndexUtils.hs +++ b/cabal-install/Distribution/Client/IndexUtils.hs @@ -81,7 +81,7 @@ import Distribution.ParseUtils import Distribution.PackageDescription.Parse ( parseGenericPackageDescription ) import Distribution.Simple.Utils - ( fromUTF8, ignoreBOM ) + ( fromUTF8LBS, ignoreBOM ) import qualified Distribution.PackageDescription.Parse as PackageDesc.Parse #endif @@ -477,8 +477,7 @@ extractPkg verbosity entry blockNo = case Tar.entryContent entry of Nothing -> error $ "Couldn't read cabal file " ++ show fileName #else - parsed = parseGenericPackageDescription . ignoreBOM . fromUTF8 . BS.Char8.unpack - $ content + parsed = parseGenericPackageDescription . ignoreBOM . fromUTF8LBS $ content descr = case parsed of ParseOk _ d -> d _ -> error $ "Couldn't read cabal file " @@ -746,7 +745,7 @@ packageListFromCache verbosity mkPkg hnd Cache{..} mode = accum mempty [] mempty Just gpd -> return gpd Nothing -> interror "failed to parse .cabal file" #else - case parseGenericPackageDescription . ignoreBOM . fromUTF8 . BS.Char8.unpack $ content of + case parseGenericPackageDescription . ignoreBOM . fromUTF8LBS $ content of ParseOk _ d -> return d _ -> interror "failed to parse .cabal file" #endif diff --git a/cabal-install/Distribution/Client/Targets.hs b/cabal-install/Distribution/Client/Targets.hs index e61bdf1de01..fb828fc9c62 100644 --- a/cabal-install/Distribution/Client/Targets.hs +++ b/cabal-install/Distribution/Client/Targets.hs @@ -98,8 +98,7 @@ import Distribution.PackageDescription.Parsec import Distribution.PackageDescription.Parse ( readGenericPackageDescription, parseGenericPackageDescription, ParseResult(..) ) import Distribution.Simple.Utils - ( fromUTF8, ignoreBOM ) -import qualified Data.ByteString.Lazy.Char8 as BS.Char8 + ( fromUTF8LBS, ignoreBOM ) #endif -- import Data.List ( find, nub ) @@ -563,7 +562,7 @@ readPackageTarget verbosity = traverse modifyLocation parseGenericPackageDescriptionMaybe (BS.toStrict bs) #else parsePackageDescription' content = - case parseGenericPackageDescription . ignoreBOM . fromUTF8 . BS.Char8.unpack $ content of + case parseGenericPackageDescription . ignoreBOM . fromUTF8LBS $ content of ParseOk _ pkg -> Just pkg _ -> Nothing #endif From 98c6dbc1e5566bd5cb2ee062ab75e0c9237e08f0 Mon Sep 17 00:00:00 2001 From: Herbert Valerio Riedel Date: Sat, 5 Aug 2017 19:36:13 +0200 Subject: [PATCH 04/10] Remove spurious UTF8 conversion for stdout This was introduced in 1821d8039023a85df2e09135d26018bbbaa0266e but it would imply that stdout was set to binary mode, which it isn't. --- cabal-install/Distribution/Client/Check.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cabal-install/Distribution/Client/Check.hs b/cabal-install/Distribution/Client/Check.hs index 92b127be25b..daa096b6c7d 100644 --- a/cabal-install/Distribution/Client/Check.hs +++ b/cabal-install/Distribution/Client/Check.hs @@ -30,7 +30,7 @@ import Distribution.PackageDescription.Configuration import Distribution.Verbosity ( Verbosity ) import Distribution.Simple.Utils - ( defaultPackageDesc, toUTF8, wrapText ) + ( defaultPackageDesc, wrapText ) check :: Verbosity -> IO Bool check verbosity = do @@ -91,4 +91,4 @@ check verbosity = do where printCheckMessages = mapM_ (putStrLn . format . explanation) - format = toUTF8 . wrapText . ("* "++) + format = wrapText . ("* "++) From 424bb5f74bd39c8fbea4fce0ded550a051a5a66c Mon Sep 17 00:00:00 2001 From: Herbert Valerio Riedel Date: Sat, 5 Aug 2017 19:21:08 +0200 Subject: [PATCH 05/10] Add new 'IOData' abstraction This new type will be used to disentangle conflated uses of `String` and clearly distinguish between binary and textual data. --- Cabal/Distribution/Simple/Utils.hs | 64 ++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/Cabal/Distribution/Simple/Utils.hs b/Cabal/Distribution/Simple/Utils.hs index 7de7fe7f0e6..8cc6dc7f053 100644 --- a/Cabal/Distribution/Simple/Utils.hs +++ b/Cabal/Distribution/Simple/Utils.hs @@ -44,6 +44,12 @@ module Distribution.Simple.Utils ( -- * exceptions handleDoesNotExist, + -- * 'IOData' + IOData(..), IODataMode(..), + ioDataNull, + ioDataHGetContents, + ioDataHPutContents, + -- * running programs rawSystemExit, rawSystemExitCode, @@ -201,6 +207,7 @@ import Control.Concurrent.MVar import Data.Typeable ( cast ) import qualified Data.ByteString.Lazy.Char8 as BS.Char8 +import qualified Data.ByteString.Lazy as BS import System.Directory ( Permissions(executable), getDirectoryContents, getPermissions @@ -810,6 +817,63 @@ rawSystemStdout verbosity path args = withFrozenCallStack $ do die errors return output +-- | Represents either textual or binary data passed via I/O functions +-- which support binary/text mode +-- +-- @since 2.2.0 +data IOData = IODataText String + -- ^ How Text gets encoded is usually locale-dependent. + | IODataBinary BS.ByteString + -- ^ Raw binary which gets read/written in binary mode. + +-- | Test whether 'IOData' is empty +-- +-- @since 2.2.0 +ioDataNull :: IOData -> Bool +ioDataNull (IODataText s) = null s +ioDataNull (IODataBinary b) = BS.null b + +instance NFData IOData where + rnf (IODataText s) = rnf s +#if MIN_VERSION_bytestring(0,10,0) + rnf (IODataBinary bs) = rnf bs +#else + rnf (IODataBinary bs) = rnf (BS.length bs) +#endif + +data IODataMode = IODataModeText | IODataModeBinary + +-- | 'IOData' Wrapper for 'hGetContents' +-- +-- __Note__: This operation uses lazy I/O. Use 'NFData' to force all +-- data to be read and consequently the internal file handle to be +-- closed. +-- +-- @since 2.2.0 +ioDataHGetContents :: Handle -> IODataMode -> IO IOData +ioDataHGetContents h IODataModeText = do + hSetBinaryMode h False + IODataText <$> hGetContents h +ioDataHGetContents h IODataModeBinary = do + hSetBinaryMode h True + IODataBinary <$> BS.hGetContents h + +-- | 'IOData' Wrapper for 'hPutStr' and 'hClose' +-- +-- This is the dual operation ot 'ioDataHGetContents', +-- and consequently the handle is closed with `hClose`. +-- +-- @since 2.2.0 +ioDataHPutContents :: Handle -> IOData -> IO () +ioDataHPutContents h (IODataText c) = do + hSetBinaryMode h False + hPutStr h c + hClose h +ioDataHPutContents h (IODataBinary c) = do + hSetBinaryMode h True + BS.hPutStr h c + hClose h + -- | Run a command and return its output, errors and exit status. Optionally -- also supply some input. Also provides control over whether the binary/text -- mode of the input and output. From 5027632b9ef0546d2699a307570afbee6852a093 Mon Sep 17 00:00:00 2001 From: Herbert Valerio Riedel Date: Sat, 5 Aug 2017 19:30:38 +0200 Subject: [PATCH 06/10] Refactor `rawSystemStdInOut` to use `IOData` abstraction This removes the remaining occurences of the weakly typed `{to,from}UTF8` conversion. --- Cabal/Distribution/Simple/Program/Run.hs | 32 ++++++++----------- Cabal/Distribution/Simple/Utils.hs | 31 +++++++++--------- .../UnitTests/Distribution/Simple/Utils.hs | 11 ++++--- 3 files changed, 35 insertions(+), 39 deletions(-) diff --git a/Cabal/Distribution/Simple/Program/Run.hs b/Cabal/Distribution/Simple/Program/Run.hs index 077f07af3ac..6b1d7d02e69 100644 --- a/Cabal/Distribution/Simple/Program/Run.hs +++ b/Cabal/Distribution/Simple/Program/Run.hs @@ -62,6 +62,10 @@ data ProgramInvocation = ProgramInvocation { data IOEncoding = IOEncodingText -- locale mode text | IOEncodingUTF8 -- always utf8 +encodeToIOData :: IOEncoding -> String -> IOData +encodeToIOData IOEncodingText = IODataText +encodeToIOData IOEncodingUTF8 = IODataBinary . toUTF8LBS + emptyProgramInvocation :: ProgramInvocation emptyProgramInvocation = ProgramInvocation { @@ -138,15 +142,11 @@ runProgramInvocation verbosity (_, errors, exitCode) <- rawSystemStdInOut verbosity path args mcwd menv - (Just input) True + (Just input) IODataModeBinary when (exitCode /= ExitSuccess) $ die' verbosity $ "'" ++ path ++ "' exited with an error:\n" ++ errors where - input = case encoding of - IOEncodingText -> (inputStr, False) - IOEncodingUTF8 -> (toUTF8 inputStr, True) -- use binary mode for - -- utf8 - + input = encodeToIOData encoding inputStr getProgramInvocationOutput :: Verbosity -> ProgramInvocation -> IO String getProgramInvocationOutput verbosity inv = do @@ -168,25 +168,21 @@ getProgramInvocationOutputAndErrors verbosity progInvokeInput = minputStr, progInvokeOutputEncoding = encoding } = do - let utf8 = case encoding of IOEncodingUTF8 -> True; _ -> False - decode | utf8 = fromUTF8 . normaliseLineEndings - | otherwise = id + let mode = case encoding of IOEncodingUTF8 -> IODataModeBinary + IOEncodingText -> IODataModeText + + decode (IODataBinary b) = normaliseLineEndings (fromUTF8LBS b) + decode (IODataText s) = s + pathOverride <- getExtraPathEnv envOverrides extraPath menv <- getEffectiveEnvironment (envOverrides ++ pathOverride) (output, errors, exitCode) <- rawSystemStdInOut verbosity path args mcwd menv - input utf8 + input mode return (decode output, errors, exitCode) where - input = - case minputStr of - Nothing -> Nothing - Just inputStr -> Just $ - case encoding of - IOEncodingText -> (inputStr, False) - IOEncodingUTF8 -> (toUTF8 inputStr, True) -- use binary mode for utf8 - + input = encodeToIOData encoding <$> minputStr getExtraPathEnv :: [(String, Maybe String)] -> [FilePath] -> NoCallStackIO [(String, Maybe String)] getExtraPathEnv _ [] = return [] diff --git a/Cabal/Distribution/Simple/Utils.hs b/Cabal/Distribution/Simple/Utils.hs index 8cc6dc7f053..e889c81e9ff 100644 --- a/Cabal/Distribution/Simple/Utils.hs +++ b/Cabal/Distribution/Simple/Utils.hs @@ -810,9 +810,9 @@ createProcessWithEnv verbosity path args mcwd menv inp out err = withFrozenCallS -- rawSystemStdout :: Verbosity -> FilePath -> [String] -> IO String rawSystemStdout verbosity path args = withFrozenCallStack $ do - (output, errors, exitCode) <- rawSystemStdInOut verbosity path args + (IODataText output, errors, exitCode) <- rawSystemStdInOut verbosity path args Nothing Nothing - Nothing False + Nothing IODataModeText when (exitCode /= ExitSuccess) $ die errors return output @@ -883,10 +883,10 @@ rawSystemStdInOut :: Verbosity -> [String] -- ^ Arguments -> Maybe FilePath -- ^ New working dir or inherit -> Maybe [(String, String)] -- ^ New environment or inherit - -> Maybe (String, Bool) -- ^ input text and binary mode - -> Bool -- ^ output in binary mode - -> IO (String, String, ExitCode) -- ^ output, errors, exit -rawSystemStdInOut verbosity path args mcwd menv input outputBinary = withFrozenCallStack $ do + -> Maybe IOData -- ^ input text and binary mode + -> IODataMode -- ^ output in binary mode + -> IO (IOData, String, ExitCode) -- ^ output, errors, exit +rawSystemStdInOut verbosity path args mcwd menv input outputMode = withFrozenCallStack $ do printRawCommandAndArgs verbosity path args Exception.bracket @@ -895,7 +895,6 @@ rawSystemStdInOut verbosity path args mcwd menv input outputBinary = withFrozenC $ \(inh,outh,errh,pid) -> do -- output mode depends on what the caller wants - hSetBinaryMode outh outputBinary -- but the errors are always assumed to be text (in the current locale) hSetBinaryMode errh False @@ -903,11 +902,12 @@ rawSystemStdInOut verbosity path args mcwd menv input outputBinary = withFrozenC -- so if the process writes to stderr we do not block. err <- hGetContents errh - out <- hGetContents outh + + out <- ioDataHGetContents outh outputMode mv <- newEmptyMVar let force str = do - mberr <- Exception.try (evaluate (length str) >> return ()) + mberr <- Exception.try (evaluate (rnf str) >> return ()) putMVar mv (mberr :: Either IOError ()) _ <- forkIO $ force out _ <- forkIO $ force err @@ -915,11 +915,9 @@ rawSystemStdInOut verbosity path args mcwd menv input outputBinary = withFrozenC -- push all the input, if any case input of Nothing -> return () - Just (inputStr, inputBinary) -> do - -- input mode depends on what the caller wants - hSetBinaryMode inh inputBinary - hPutStr inh inputStr - hClose inh + Just inputData -> do + -- input mode depends on what the caller wants + ioDataHPutContents inh inputData --TODO: this probably fails if the process refuses to consume -- or if it closes stdin (eg if it exits) @@ -935,8 +933,9 @@ rawSystemStdInOut verbosity path args mcwd menv input outputBinary = withFrozenC " with error message:\n" ++ err ++ case input of Nothing -> "" - Just ("", _) -> "" - Just (inp, _) -> "\nstdin input:\n" ++ inp + Just d | ioDataNull d -> "" + Just (IODataText inp) -> "\nstdin input:\n" ++ inp + Just (IODataBinary inp) -> "\nstdin input (binary):\n" ++ show inp -- Check if we we hit an exception while consuming the output -- (e.g. a text decoding error) diff --git a/Cabal/tests/UnitTests/Distribution/Simple/Utils.hs b/Cabal/tests/UnitTests/Distribution/Simple/Utils.hs index 59a9b4836de..c046169049d 100644 --- a/Cabal/tests/UnitTests/Distribution/Simple/Utils.hs +++ b/Cabal/tests/UnitTests/Distribution/Simple/Utils.hs @@ -66,20 +66,21 @@ rawSystemStdInOutTextDecodingTest hClose handleExe -- Compile - compilationResult <- rawSystemStdInOut normal + (IODataText resOutput, resErrors, resExitCode) <- rawSystemStdInOut normal "ghc" ["-o", filenameExe, filenameHs] Nothing Nothing Nothing - False - print compilationResult + IODataModeText + print (resOutput, resErrors, resExitCode) -- Execute Exception.try $ do rawSystemStdInOut normal filenameExe [] Nothing Nothing Nothing - False -- not binary mode output, ie utf8 text mode so try to decode + IODataModeText -- not binary mode output, ie utf8 text mode so try to decode case res of - Right x -> assertFailure $ "expected IO decoding exception: " ++ show x + Right (IODataText x1, x2, x3) -> assertFailure $ "expected IO decoding exception: " ++ show (x1,x2,x3) + Right (IODataBinary _, _, _) -> assertFailure "internal error" Left err | isDoesNotExistError err -> Exception.throwIO err -- no ghc! | otherwise -> return () From 9f7f7339e8c68595d3e2034fadf2d3280063a919 Mon Sep 17 00:00:00 2001 From: Herbert Valerio Riedel Date: Sat, 5 Aug 2017 19:52:12 +0200 Subject: [PATCH 07/10] Reimplement `{from,to}UTF8` in terms of `{encode,decode}StringUtf8` Since we don't use those functions anymore, we can finally `DEPRECATE` them. --- Cabal/Distribution/Utils/Generic.hs | 64 +++-------------------------- 1 file changed, 6 insertions(+), 58 deletions(-) diff --git a/Cabal/Distribution/Utils/Generic.hs b/Cabal/Distribution/Utils/Generic.hs index 2ef04e28df8..667f5a89843 100644 --- a/Cabal/Distribution/Utils/Generic.hs +++ b/Cabal/Distribution/Utils/Generic.hs @@ -78,8 +78,6 @@ import Distribution.Compat.Prelude import Distribution.Utils.String -import Data.Bits - ( Bits((.|.), (.&.), shiftL, shiftR) ) import Data.List ( isInfixOf ) import Data.Ord @@ -166,46 +164,12 @@ writeFileAtomic targetPath content = do -- This is a modification of the UTF8 code from gtk2hs and the -- utf8-string package. +{-# DEPRECATED fromUTF8 "Please use 'decodeStringUtf8', 'fromUTF8BS', or 'fromUTF8BS'" #-} fromUTF8 :: String -> String -fromUTF8 [] = [] -fromUTF8 (c:cs) - | c <= '\x7F' = c : fromUTF8 cs - | c <= '\xBF' = replacementChar : fromUTF8 cs - | c <= '\xDF' = twoBytes c cs - | c <= '\xEF' = moreBytes 3 0x800 cs (ord c .&. 0xF) - | c <= '\xF7' = moreBytes 4 0x10000 cs (ord c .&. 0x7) - | c <= '\xFB' = moreBytes 5 0x200000 cs (ord c .&. 0x3) - | c <= '\xFD' = moreBytes 6 0x4000000 cs (ord c .&. 0x1) - | otherwise = replacementChar : fromUTF8 cs +fromUTF8 = decodeStringUtf8 . map c2w where - twoBytes c0 (c1:cs') - | ord c1 .&. 0xC0 == 0x80 - = let d = ((ord c0 .&. 0x1F) `shiftL` 6) - .|. (ord c1 .&. 0x3F) - in if d >= 0x80 - then chr d : fromUTF8 cs' - else replacementChar : fromUTF8 cs' - twoBytes _ cs' = replacementChar : fromUTF8 cs' - - moreBytes :: Int -> Int -> [Char] -> Int -> [Char] - moreBytes 1 overlong cs' acc - | overlong <= acc && acc <= 0x10FFFF - && (acc < 0xD800 || 0xDFFF < acc) - && (acc < 0xFFFE || 0xFFFF < acc) - = chr acc : fromUTF8 cs' - - | otherwise - = replacementChar : fromUTF8 cs' - - moreBytes byteCount overlong (cn:cs') acc - | ord cn .&. 0xC0 == 0x80 - = moreBytes (byteCount-1) overlong cs' - ((acc `shiftL` 6) .|. ord cn .&. 0x3F) - - moreBytes _ _ cs' _ - = replacementChar : fromUTF8 cs' - - replacementChar = '\xfffd' + c2w c | c > '\xFF' = error "fromUTF8: invalid input data" + | otherwise = fromIntegral (ord c) fromUTF8BS :: SBS.ByteString -> String fromUTF8BS = decodeStringUtf8 . SBS.unpack @@ -213,25 +177,9 @@ fromUTF8BS = decodeStringUtf8 . SBS.unpack fromUTF8LBS :: BS.ByteString -> String fromUTF8LBS = decodeStringUtf8 . BS.unpack +{-# DEPRECATED toUTF8 "Please use 'encodeStringUtf8', 'toUTF8BS', or 'toUTF8BS'" #-} toUTF8 :: String -> String -toUTF8 [] = [] -toUTF8 (c:cs) - | c <= '\x07F' = c - : toUTF8 cs - | c <= '\x7FF' = chr (0xC0 .|. (w `shiftR` 6)) - : chr (0x80 .|. (w .&. 0x3F)) - : toUTF8 cs - | c <= '\xFFFF'= chr (0xE0 .|. (w `shiftR` 12)) - : chr (0x80 .|. ((w `shiftR` 6) .&. 0x3F)) - : chr (0x80 .|. (w .&. 0x3F)) - : toUTF8 cs - | otherwise = chr (0xf0 .|. (w `shiftR` 18)) - : chr (0x80 .|. ((w `shiftR` 12) .&. 0x3F)) - : chr (0x80 .|. ((w `shiftR` 6) .&. 0x3F)) - : chr (0x80 .|. (w .&. 0x3F)) - : toUTF8 cs - where w = ord c - +toUTF8 = map (chr . fromIntegral) . encodeStringUtf8 toUTF8BS :: String -> SBS.ByteString toUTF8BS = SBS.pack . encodeStringUtf8 From d158887ae6550d810bfe482d2e2a5730c18024a3 Mon Sep 17 00:00:00 2001 From: Herbert Valerio Riedel Date: Wed, 9 Aug 2017 19:55:47 +0200 Subject: [PATCH 08/10] Move `IOData` into new `Distribution.Utils.IOData` module --- Cabal/Cabal.cabal | 1 + Cabal/Distribution/Simple/Utils.hs | 80 +++++------------------------- Cabal/Distribution/Utils/IOData.hs | 73 +++++++++++++++++++++++++++ 3 files changed, 87 insertions(+), 67 deletions(-) create mode 100644 Cabal/Distribution/Utils/IOData.hs diff --git a/Cabal/Cabal.cabal b/Cabal/Cabal.cabal index b52ce47d80b..9410f84419f 100644 --- a/Cabal/Cabal.cabal +++ b/Cabal/Cabal.cabal @@ -126,6 +126,7 @@ library Distribution.Backpack.ModSubst Distribution.Backpack.ModuleShape Distribution.Backpack.PreModuleShape + Distribution.Utils.IOData Distribution.Utils.LogProgress Distribution.Utils.MapAccum Distribution.Compat.CreatePipe diff --git a/Cabal/Distribution/Simple/Utils.hs b/Cabal/Distribution/Simple/Utils.hs index e889c81e9ff..3d305d3e005 100644 --- a/Cabal/Distribution/Simple/Utils.hs +++ b/Cabal/Distribution/Simple/Utils.hs @@ -44,12 +44,6 @@ module Distribution.Simple.Utils ( -- * exceptions handleDoesNotExist, - -- * 'IOData' - IOData(..), IODataMode(..), - ioDataNull, - ioDataHGetContents, - ioDataHPutContents, - -- * running programs rawSystemExit, rawSystemExitCode, @@ -63,6 +57,14 @@ module Distribution.Simple.Utils ( findProgramLocation, findProgramVersion, + -- ** 'IOData' re-export + -- + -- These types are re-exported from + -- "Distribution.Utils.IOData" for convience as they're + -- exposed in the API of 'rawSystemStdInOut' + IOData(..), + IODataMode(..), + -- * copying files smartCopySources, createDirectoryIfMissingVerbose, @@ -178,6 +180,8 @@ import Distribution.Compat.Prelude import Distribution.Text import Distribution.Utils.Generic +import Distribution.Utils.IOData (IOData(..), IODataMode(..)) +import qualified Distribution.Utils.IOData as IOData import Distribution.ModuleName as ModuleName import Distribution.System import Distribution.Version @@ -207,7 +211,6 @@ import Control.Concurrent.MVar import Data.Typeable ( cast ) import qualified Data.ByteString.Lazy.Char8 as BS.Char8 -import qualified Data.ByteString.Lazy as BS import System.Directory ( Permissions(executable), getDirectoryContents, getPermissions @@ -817,63 +820,6 @@ rawSystemStdout verbosity path args = withFrozenCallStack $ do die errors return output --- | Represents either textual or binary data passed via I/O functions --- which support binary/text mode --- --- @since 2.2.0 -data IOData = IODataText String - -- ^ How Text gets encoded is usually locale-dependent. - | IODataBinary BS.ByteString - -- ^ Raw binary which gets read/written in binary mode. - --- | Test whether 'IOData' is empty --- --- @since 2.2.0 -ioDataNull :: IOData -> Bool -ioDataNull (IODataText s) = null s -ioDataNull (IODataBinary b) = BS.null b - -instance NFData IOData where - rnf (IODataText s) = rnf s -#if MIN_VERSION_bytestring(0,10,0) - rnf (IODataBinary bs) = rnf bs -#else - rnf (IODataBinary bs) = rnf (BS.length bs) -#endif - -data IODataMode = IODataModeText | IODataModeBinary - --- | 'IOData' Wrapper for 'hGetContents' --- --- __Note__: This operation uses lazy I/O. Use 'NFData' to force all --- data to be read and consequently the internal file handle to be --- closed. --- --- @since 2.2.0 -ioDataHGetContents :: Handle -> IODataMode -> IO IOData -ioDataHGetContents h IODataModeText = do - hSetBinaryMode h False - IODataText <$> hGetContents h -ioDataHGetContents h IODataModeBinary = do - hSetBinaryMode h True - IODataBinary <$> BS.hGetContents h - --- | 'IOData' Wrapper for 'hPutStr' and 'hClose' --- --- This is the dual operation ot 'ioDataHGetContents', --- and consequently the handle is closed with `hClose`. --- --- @since 2.2.0 -ioDataHPutContents :: Handle -> IOData -> IO () -ioDataHPutContents h (IODataText c) = do - hSetBinaryMode h False - hPutStr h c - hClose h -ioDataHPutContents h (IODataBinary c) = do - hSetBinaryMode h True - BS.hPutStr h c - hClose h - -- | Run a command and return its output, errors and exit status. Optionally -- also supply some input. Also provides control over whether the binary/text -- mode of the input and output. @@ -903,7 +849,7 @@ rawSystemStdInOut verbosity path args mcwd menv input outputMode = withFrozenCal err <- hGetContents errh - out <- ioDataHGetContents outh outputMode + out <- IOData.hGetContents outh outputMode mv <- newEmptyMVar let force str = do @@ -917,7 +863,7 @@ rawSystemStdInOut verbosity path args mcwd menv input outputMode = withFrozenCal Nothing -> return () Just inputData -> do -- input mode depends on what the caller wants - ioDataHPutContents inh inputData + IOData.hPutContents inh inputData --TODO: this probably fails if the process refuses to consume -- or if it closes stdin (eg if it exits) @@ -933,7 +879,7 @@ rawSystemStdInOut verbosity path args mcwd menv input outputMode = withFrozenCal " with error message:\n" ++ err ++ case input of Nothing -> "" - Just d | ioDataNull d -> "" + Just d | IOData.null d -> "" Just (IODataText inp) -> "\nstdin input:\n" ++ inp Just (IODataBinary inp) -> "\nstdin input (binary):\n" ++ show inp diff --git a/Cabal/Distribution/Utils/IOData.hs b/Cabal/Distribution/Utils/IOData.hs new file mode 100644 index 00000000000..a17468d762b --- /dev/null +++ b/Cabal/Distribution/Utils/IOData.hs @@ -0,0 +1,73 @@ +{-# LANGUAGE CPP #-} + +-- | @since 2.2.0 +module Distribution.Utils.IOData + ( -- * 'IOData' & 'IODataMode' type + IOData(..) + , IODataMode(..) + , null + , hGetContents + , hPutContents + ) where + +import qualified Data.ByteString.Lazy as BS +import Distribution.Compat.Prelude hiding (null) +import qualified Prelude +import qualified System.IO + +-- | Represents either textual or binary data passed via I/O functions +-- which support binary/text mode +-- +-- @since 2.2.0 +data IOData = IODataText String + -- ^ How Text gets encoded is usually locale-dependent. + | IODataBinary BS.ByteString + -- ^ Raw binary which gets read/written in binary mode. + +-- | Test whether 'IOData' is empty +-- +-- @since 2.2.0 +null :: IOData -> Bool +null (IODataText s) = Prelude.null s +null (IODataBinary b) = BS.null b + +instance NFData IOData where + rnf (IODataText s) = rnf s +#if MIN_VERSION_bytestring(0,10,0) + rnf (IODataBinary bs) = rnf bs +#else + rnf (IODataBinary bs) = rnf (BS.length bs) +#endif + +data IODataMode = IODataModeText | IODataModeBinary + +-- | 'IOData' Wrapper for 'System.IO.hGetContents' +-- +-- __Note__: This operation uses lazy I/O. Use 'NFData' to force all +-- data to be read and consequently the internal file handle to be +-- closed. +-- +-- @since 2.2.0 +hGetContents :: System.IO.Handle -> IODataMode -> Prelude.IO IOData +hGetContents h IODataModeText = do + System.IO.hSetBinaryMode h False + IODataText <$> System.IO.hGetContents h +hGetContents h IODataModeBinary = do + System.IO.hSetBinaryMode h True + IODataBinary <$> BS.hGetContents h + +-- | 'IOData' Wrapper for 'System.IO.hPutStr' and 'System.IO.hClose' +-- +-- This is the dual operation ot 'ioDataHGetContents', +-- and consequently the handle is closed with `hClose`. +-- +-- @since 2.2.0 +hPutContents :: System.IO.Handle -> IOData -> Prelude.IO () +hPutContents h (IODataText c) = do + System.IO.hSetBinaryMode h False + System.IO.hPutStr h c + System.IO.hClose h +hPutContents h (IODataBinary c) = do + System.IO.hSetBinaryMode h True + BS.hPutStr h c + System.IO.hClose h From 2430a29a838726bd2367f4d7680822764f5e9598 Mon Sep 17 00:00:00 2001 From: Herbert Valerio Riedel Date: Wed, 9 Aug 2017 20:11:42 +0200 Subject: [PATCH 09/10] Remove obsolete functions This reduces the surface area of lib:Cabal by removing entry points that Setup.hs scripts are very unlikely to use. --- Cabal/Distribution/Simple/Utils.hs | 4 ---- Cabal/Distribution/Utils/Generic.hs | 27 --------------------------- 2 files changed, 31 deletions(-) diff --git a/Cabal/Distribution/Simple/Utils.hs b/Cabal/Distribution/Simple/Utils.hs index 3d305d3e005..8a155e0c9d6 100644 --- a/Cabal/Distribution/Simple/Utils.hs +++ b/Cabal/Distribution/Simple/Utils.hs @@ -136,10 +136,8 @@ module Distribution.Simple.Utils ( rewriteFile, -- * Unicode - fromUTF8, fromUTF8BS, fromUTF8LBS, - toUTF8, toUTF8BS, toUTF8LBS, readUTF8File, @@ -148,8 +146,6 @@ module Distribution.Simple.Utils ( normaliseLineEndings, -- * BOM - startsWithBOM, - fileHasBOM, ignoreBOM, -- * generic utils diff --git a/Cabal/Distribution/Utils/Generic.hs b/Cabal/Distribution/Utils/Generic.hs index 667f5a89843..af7156a1235 100644 --- a/Cabal/Distribution/Utils/Generic.hs +++ b/Cabal/Distribution/Utils/Generic.hs @@ -29,11 +29,9 @@ module Distribution.Utils.Generic ( -- * Unicode -- ** Conversions - fromUTF8, fromUTF8BS, fromUTF8LBS, - toUTF8, toUTF8BS, toUTF8LBS, @@ -43,8 +41,6 @@ module Distribution.Utils.Generic ( writeUTF8File, -- ** BOM - startsWithBOM, - fileHasBOM, ignoreBOM, -- ** Misc @@ -161,41 +157,18 @@ writeFileAtomic targetPath content = do -- * Unicode stuff -- ------------------------------------------------------------ --- This is a modification of the UTF8 code from gtk2hs and the --- utf8-string package. - -{-# DEPRECATED fromUTF8 "Please use 'decodeStringUtf8', 'fromUTF8BS', or 'fromUTF8BS'" #-} -fromUTF8 :: String -> String -fromUTF8 = decodeStringUtf8 . map c2w - where - c2w c | c > '\xFF' = error "fromUTF8: invalid input data" - | otherwise = fromIntegral (ord c) - fromUTF8BS :: SBS.ByteString -> String fromUTF8BS = decodeStringUtf8 . SBS.unpack fromUTF8LBS :: BS.ByteString -> String fromUTF8LBS = decodeStringUtf8 . BS.unpack -{-# DEPRECATED toUTF8 "Please use 'encodeStringUtf8', 'toUTF8BS', or 'toUTF8BS'" #-} -toUTF8 :: String -> String -toUTF8 = map (chr . fromIntegral) . encodeStringUtf8 - toUTF8BS :: String -> SBS.ByteString toUTF8BS = SBS.pack . encodeStringUtf8 toUTF8LBS :: String -> BS.ByteString toUTF8LBS = BS.pack . encodeStringUtf8 --- | Whether BOM is at the beginning of the input -startsWithBOM :: String -> Bool -startsWithBOM ('\xFEFF':_) = True -startsWithBOM _ = False - --- | Check whether a file has Unicode byte order mark (BOM). -fileHasBOM :: FilePath -> NoCallStackIO Bool -fileHasBOM f = (startsWithBOM . fromUTF8LBS) <$> BS.readFile f - -- | Ignore a Unicode byte order mark (BOM) at the beginning of the input -- ignoreBOM :: String -> String From b1bbf2eeb71d27587a9560d42e01b12cda6cf4a6 Mon Sep 17 00:00:00 2001 From: Herbert Valerio Riedel Date: Wed, 9 Aug 2017 23:27:10 +0200 Subject: [PATCH 10/10] changelog entry for #4666 [skip ci] --- Cabal/changelog | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Cabal/changelog b/Cabal/changelog index fa5fdb76147..3b715992655 100644 --- a/Cabal/changelog +++ b/Cabal/changelog @@ -2,6 +2,11 @@ 2.2.0.0 (current development version) * Remove unused '--allow-newer'/'--allow-older' support (#4527) + * Change `rawSystemStdInOut` to use proper type to represent + binary and textual data; new 'Distribution.Utils.IOData' module; + removed obsolete 'startsWithBOM', 'fileHasBOM', 'fromUTF8', + and 'toUTF8' functions; add new `toUTF8BS`/`toUTF8LBS` + encoding functions. (#4666) * Warn about `.cabal` file-name not matching package name in 'cabal check' (#4592) * By default 'ar' program receives arguments via '@file' format.