haskell-foundation · vincenthz · Sep 17, 2016 · Sep 3, 2016 · Sep 3, 2016 · Sep 4, 2016
diff --git a/Foundation/Foreign.hs b/Foundation/Foreign.hs
@@ -9,8 +9,11 @@ module Foundation.Foreign
     ( module Foundation.Primitive.FinalPtr
     , V.foreignMem
     , V.mutableForeignMem
+    , module Foreign.C.Types
     ) where
 
 import           Foundation.Primitive.FinalPtr
 import qualified Foundation.Array.Unboxed as V
 import qualified Foundation.Array.Unboxed.Mutable as V
+
+import           Foreign.C.Types
diff --git a/Foundation/Primitive/Types.hs b/Foundation/Primitive/Types.hs
@@ -20,6 +20,7 @@ import           GHC.Prim
 import           GHC.Int
 import           GHC.Types
 import           GHC.Word
+import           Foreign.C.Types
 import           Foundation.Internal.Proxy
 import           Foundation.Internal.Base
 import           Foundation.Internal.Types
@@ -248,6 +249,38 @@ instance PrimType Char where
     primAddrWrite addr (Offset (I# n)) (C# w) = primitive $ \s1 -> (# writeWideCharOffAddr# addr n w s1, () #)
     {-# INLINE primAddrWrite #-}
 
+instance PrimType CChar where
+    primSizeInBytes _ = Size 1
+    {-# INLINE primSizeInBytes #-}
+    primBaIndex ba (Offset n) = CChar (primBaIndex ba (Offset n :: Offset Int8))
+    {-# INLINE primBaIndex #-}
+    primMbaRead mba (Offset n) = CChar <$> primMbaRead mba (Offset n :: Offset Int8)
+    {-# INLINE primMbaRead #-}
+    primMbaWrite mba (Offset n) (CChar int8) = primMbaWrite mba (Offset n) int8
+    {-# INLINE primMbaWrite #-}
+    primAddrIndex addr (Offset n) = CChar $ primAddrIndex addr (Offset n :: Offset Int8)
+    {-# INLINE primAddrIndex #-}
+    primAddrRead addr (Offset n) = CChar <$> primAddrRead addr (Offset n :: Offset Int8)
+    {-# INLINE primAddrRead #-}
+    primAddrWrite addr (Offset n) (CChar int8) = primAddrWrite addr (Offset n) int8
+    {-# INLINE primAddrWrite #-}
+instance PrimType CUChar where
+    primSizeInBytes _ = Size 1
+    {-# INLINE primSizeInBytes #-}
+    primBaIndex ba (Offset n) = CUChar (primBaIndex ba (Offset n :: Offset Word8))
+    {-# INLINE primBaIndex #-}
+    primMbaRead mba (Offset n) = CUChar <$> primMbaRead mba (Offset n :: Offset Word8)
+    {-# INLINE primMbaRead #-}
+    primMbaWrite mba (Offset n) (CUChar w8) = primMbaWrite mba (Offset n) w8
+    {-# INLINE primMbaWrite #-}
+    primAddrIndex addr (Offset n) = CUChar $ primAddrIndex addr (Offset n :: Offset Word8)
+    {-# INLINE primAddrIndex #-}
+    primAddrRead addr (Offset n) = CUChar <$> primAddrRead addr (Offset n :: Offset Word8)
+    {-# INLINE primAddrRead #-}
+    primAddrWrite addr (Offset n) (CUChar w8) = primAddrWrite addr (Offset n) w8
+    {-# INLINE primAddrWrite #-}
+
+
 -- | Cast a Size linked to type A (Size A) to a Size linked to type B (Size B)
 sizeRecast :: (PrimType a, PrimType b) => Size a -> Size b
 sizeRecast = doRecast Proxy Proxy

diff --git a/Foundation/String/ASCII.hs b/Foundation/String/ASCII.hs
@@ -0,0 +1,267 @@
+-- |
+-- Module      : Foundation.String.ASCII
+-- License     : BSD-style
+-- Maintainer  : Haskell Foundation
+-- Stability   : experimental
+-- Portability : portable
+--
+-- A AsciiString type backed by a `ASCII` encoded byte array and all the necessary
+-- functions to manipulate the string.
+--
+-- The recommended type is `AsciiString` from `Foundation.AsciiString.UTF8`
+--
+{-# LANGUAGE BangPatterns               #-}
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{-# LANGUAGE MagicHash                  #-}
+{-# LANGUAGE NoImplicitPrelude          #-}
+{-# LANGUAGE TypeFamilies               #-}
+{-# LANGUAGE UnboxedTuples              #-}
+{-# LANGUAGE FlexibleContexts           #-}
+module Foundation.String.ASCII
+    ( AsciiString
+    --, Buffer
+    , create
+    , replicate
+    -- * Binary conversion
+    , fromBytesUnsafe
+    , toBytes
+    , copy
+
+    -- * Legacy utility
+    , lines
+    , words
+    ) where
+
+import           Foundation.Array.Unboxed           (UArray)
+import qualified Foundation.Array.Unboxed           as Vec
+import qualified Foundation.Array.Unboxed.Mutable   as MVec
+import qualified Foundation.Collection              as C
+import           Foundation.Internal.Base
+import           Foundation.Internal.Types
+import           Foundation.Number
+import           Foundation.Primitive.Monad
+import           Foundation.Foreign
+
+import GHC.Int
+import GHC.Types
+import GHC.Prim
+
+ -- temporary
+import qualified Data.List
+import qualified Prelude
+import Foundation.Class.Bifunctor
+
+ccharToChar :: CChar -> Char
+ccharToChar (CChar (I8# i)) = C# (chr# i)
+charToCChar :: Char -> CChar
+charToCChar (C# i) = CChar (I8# (ord# i))
+
+-- | Opaque packed array of characters in the ASCII encoding
+newtype AsciiString = AsciiString { toBytes :: UArray CChar }
+    deriving (Typeable, Monoid, Eq, Ord)
+
+newtype MutableAsciiString st = MutableAsciiString (MVec.MUArray CChar st)
+    deriving (Typeable)
+
+instance Show AsciiString where
+    show = fmap ccharToChar . toList
+instance IsString AsciiString where
+    fromString = fromList . fmap charToCChar
+instance IsList AsciiString where
+    type Item AsciiString = CChar
+    fromList = sFromList
+    toList = sToList
+
+type instance C.Element AsciiString = CChar
+
+instance C.InnerFunctor AsciiString where
+    imap = ccharMap
+instance C.Collection AsciiString where
+    null = null
+    length = length
+    minimum = Data.List.minimum . toList . C.getNonEmpty -- TODO faster implementation
+    maximum = Data.List.maximum . toList . C.getNonEmpty -- TODO faster implementation
+instance C.Sequential AsciiString where
+    take = take
+    drop = drop
+    splitAt = splitAt
+    revTake = revTake
+    revDrop = revDrop
+    revSplitAt = revSplitAt
+    splitOn = splitOn
+    break = break
+    breakElem = breakElem
+    intersperse = intersperse
+    span = span
+    filter = filter
+    reverse = reverse
+    unsnoc = unsnoc
+    uncons = uncons
+    snoc = snoc
+    cons = cons
+    find = find
+    sortBy = sortBy
+    singleton = fromList . (:[])
+
+instance C.Zippable AsciiString where
+  -- TODO Use a string builder once available
+  zipWith f a b = sFromList (C.zipWith f a b)
+
+next :: AsciiString -> Offset CChar -> (# CChar, Offset CChar #)
+next (AsciiString ba) (Offset n) = (# h, Offset (n + 1) #)
+  where
+    !h = Vec.unsafeIndex ba n
+
+freeze :: PrimMonad prim => MutableAsciiString (PrimState prim) -> prim AsciiString
+freeze (MutableAsciiString mba) = AsciiString `fmap` C.unsafeFreeze mba
+{-# INLINE freeze #-}
+
+------------------------------------------------------------------------
+-- real functions
+
+sToList :: AsciiString -> [CChar]
+sToList s = loop azero
+  where
+    nbBytes :: Size CChar
+    !nbBytes = size s
+    !end = azero `offsetPlusE` nbBytes
+    loop idx
+        | idx == end = []
+        | otherwise  =
+            let (# c , idx' #) = next s idx in c : loop idx'
+
+sFromList :: [CChar] -> AsciiString
+sFromList = AsciiString . fromList
+{-# INLINE [0] sFromList #-}
+
+null :: AsciiString -> Bool
+null  = Vec.null . toBytes
+{-# INLINE null #-}
+
+-- | Create a string composed of a number @n of Chars (Unicode code points).
+--
+-- if the input @s contains less characters than required, then
+take :: Int -> AsciiString -> AsciiString
+take n s = fst $ splitAt n s -- TODO specialize
+{-# INLINE take #-}
+
+-- | Create a string with the remaining Chars after dropping @n Chars from the beginning
+drop :: Int -> AsciiString -> AsciiString
+drop n = AsciiString . Vec.drop n . toBytes
+{-# INLINE drop #-}
+
+splitAt :: Int -> AsciiString -> (AsciiString, AsciiString)
+splitAt n = bimap AsciiString AsciiString . Vec.splitAt n . toBytes
+{-# INLINE splitAt #-}
+
+-- rev{Take,Drop,SplitAt} TODO optimise:
+-- we can process the string from the end using a skipPrev instead of getting the length
+
+revTake :: Int -> AsciiString -> AsciiString
+revTake nbElems v = drop (length v - nbElems) v
+
+revDrop :: Int -> AsciiString -> AsciiString
+revDrop nbElems v = take (length v - nbElems) v
+
+revSplitAt :: Int -> AsciiString -> (AsciiString, AsciiString)
+revSplitAt n v = (drop idx v, take idx v)
+  where idx = length v - n
+
+-- | Split on the input string using the predicate as separator
+--
+-- e.g.
+--
+-- > splitOn (== ',') ","          == ["",""]
+-- > splitOn (== ',') ",abc,"      == ["","abc",""]
+-- > splitOn (== ':') "abc"        == ["abc"]
+-- > splitOn (== ':') "abc::def"   == ["abc","","def"]
+-- > splitOn (== ':') "::abc::def" == ["","","abc","","def"]
+--
+splitOn :: (CChar -> Bool) -> AsciiString -> [AsciiString]
+splitOn predicate = fmap AsciiString . Vec.splitOn predicate . toBytes
+
+break :: (CChar -> Bool) -> AsciiString -> (AsciiString, AsciiString)
+break predicate = bimap AsciiString AsciiString . Vec.break predicate . toBytes
+{-# INLINE[0] break #-}
+
+{-# RULES "break (== 'c')" [3] forall c . break (== c) = breakElem c #-}
+
+breakElem :: CChar -> AsciiString -> (AsciiString, AsciiString)
+breakElem !el (AsciiString ba) =
+    let (# v1,v2 #) = Vec.splitElem el ba in (AsciiString v1, AsciiString v2)
+{-# INLINE breakElem #-}
+
+intersperse :: CChar -> AsciiString -> AsciiString
+intersperse sep = AsciiString . Vec.intersperse sep . toBytes
+
+span :: (CChar -> Bool) -> AsciiString -> (AsciiString, AsciiString)
+span predicate = break (not . predicate)
+
+-- | size in bytes
+size :: AsciiString -> Size CChar
+size = Size . C.length . toBytes
+
+length :: AsciiString -> Int
+length s = let (Size l) = size s in l
+
+replicate :: Int -> CChar -> AsciiString
+replicate n c = AsciiString $ Vec.create n (const c)
+
+-- | Copy the AsciiString
+copy :: AsciiString -> AsciiString
+copy (AsciiString s) = AsciiString (Vec.copy s)
+
+-- | Allocate a MutableAsciiString of a specific size in bytes.
+new :: PrimMonad prim
+    => Size CChar -- ^ in number of bytes, not of elements.
+    -> prim (MutableAsciiString (PrimState prim))
+new n = MutableAsciiString `fmap` MVec.new n
+
+create :: PrimMonad prim => Int -> (MutableAsciiString (PrimState prim) -> prim Int) -> prim AsciiString
+create sz f = do
+    ms     <- new (Size sz)
+    filled <- f ms
+    if filled == sz
+        then freeze ms
+        else C.take filled `fmap` freeze ms
+
+ccharMap :: (CChar -> CChar) -> AsciiString -> AsciiString
+ccharMap f = AsciiString . Vec.map f . toBytes
+
+snoc :: AsciiString -> CChar -> AsciiString
+snoc (AsciiString ba) = AsciiString . Vec.snoc ba
+
+cons :: CChar -> AsciiString -> AsciiString
+cons c = AsciiString . Vec.cons c . toBytes
+
+unsnoc :: AsciiString -> Maybe (AsciiString, CChar)
+unsnoc str = first AsciiString <$> Vec.unsnoc (toBytes str)
+
+uncons :: AsciiString -> Maybe (CChar, AsciiString)
+uncons str = second AsciiString <$> Vec.uncons (toBytes str)
+
+find :: (CChar -> Bool) -> AsciiString -> Maybe CChar
+find predicate = Vec.find predicate . toBytes
+
+sortBy :: (CChar -> CChar -> Ordering) -> AsciiString -> AsciiString
+sortBy sortF = AsciiString . Vec.sortBy sortF . toBytes
+
+filter :: (CChar -> Bool) -> AsciiString -> AsciiString
+filter p s = fromList $ Data.List.filter p $ toList s
+
+reverse :: AsciiString -> AsciiString
+reverse (AsciiString ba) = AsciiString $ Vec.reverse ba
+
+-- | Convert a Byte Array representing UTF8 data directly to a string without checking for UTF8 validity
+--
+-- If the input contains invalid sequences, it will trigger runtime async errors when processing data.
+--
+-- In doubt, use 'fromBytes'
+fromBytesUnsafe :: UArray CChar -> AsciiString
+fromBytesUnsafe = AsciiString
+
+lines :: AsciiString -> [AsciiString]
+lines = fmap fromString . Prelude.lines . show
+
+words :: AsciiString -> [AsciiString]
+words = fmap fromString . Prelude.words . show
diff --git a/foundation.cabal b/foundation.cabal
@@ -50,6 +50,7 @@ Library
                      Foundation.Class.Bifunctor
                      Foundation.Convertible
                      Foundation.String
+                     Foundation.String.ASCII
                      Foundation.IO
                      Foundation.IO.FileMap
                      Foundation.VFS
@@ -140,11 +141,12 @@ Test-Suite test-foundation
   Other-modules:     Test.Utils.Foreign
                      Test.Data.List
                      Test.Data.Unicode
-                     Test.Foundation.Array
+                     Test.Data.ASCII
                      Test.Foundation.Collection
                      Test.Foundation.Number
                      Test.Foundation.Encoding
                      Test.Foundation.Parser
+                     Test.Foundation.Array
                      Test.Foundation.String
   Build-Depends:     base >= 3 && < 5
                    , mtl

diff --git a/tests/Test/Data/ASCII.hs b/tests/Test/Data/ASCII.hs
@@ -0,0 +1,17 @@
+-- |
+-- Module: Test.Data.ASCII
+--
+
+{-# LANGUAGE NoImplicitPrelude #-}
+
+module Test.Data.ASCII
+    ( genAsciiChar
+    ) where
+
+import Foundation
+import Foundation.Foreign
+import Test.Tasty.QuickCheck
+
+-- | a better generator for unicode Character
+genAsciiChar :: Gen CChar
+genAsciiChar = toEnum <$> choose (1, 127)
diff --git a/tests/Test/Foundation/Array.hs b/tests/Test/Foundation/Array.hs
@@ -33,6 +33,8 @@ testArrayRefs = testGroup "Array"
         , testCollection "UArray(I64)" (Proxy :: Proxy (UArray Int64))  arbitrary
         , testCollection "UArray(F32)" (Proxy :: Proxy (UArray Float))  arbitrary
         , testCollection "UArray(F64)" (Proxy :: Proxy (UArray Double)) arbitrary
+        , testCollection "UArray(CChar)"  (Proxy :: Proxy (UArray CChar))  (CChar <$> arbitrary)
+        , testCollection "UArray(CUChar)" (Proxy :: Proxy (UArray CUChar)) (CUChar <$> arbitrary)
         ]
     , testGroup "Unboxed-Foreign"
         [ testGroup "UArray(W8)"  (testUnboxedForeign (Proxy :: Proxy (UArray Word8))  arbitrary)
@@ -45,6 +47,8 @@ testArrayRefs = testGroup "Array"
         , testGroup "UArray(I64)" (testUnboxedForeign (Proxy :: Proxy (UArray Int64))  arbitrary)
         , testGroup "UArray(F32)" (testUnboxedForeign (Proxy :: Proxy (UArray Float))  arbitrary)
         , testGroup "UArray(F64)" (testUnboxedForeign (Proxy :: Proxy (UArray Double)) arbitrary)
+        , testGroup "UArray(CChar)"  (testUnboxedForeign (Proxy :: Proxy (UArray CChar))  (CChar <$> arbitrary))
+        , testGroup "UArray(CUChar)" (testUnboxedForeign (Proxy :: Proxy (UArray CUChar)) (CUChar <$> arbitrary))
         ]
     , testGroup "Boxed"
         [ testCollection "Array(W8)"  (Proxy :: Proxy (Array Word8))  arbitrary
@@ -60,6 +64,8 @@ testArrayRefs = testGroup "Array"
         , testCollection "Array(Int)" (Proxy :: Proxy (Array Int))  arbitrary
         , testCollection "Array(Int,Int)" (Proxy :: Proxy (Array (Int,Int)))  arbitrary
         , testCollection "Array(Integer)" (Proxy :: Proxy (Array Integer)) arbitrary
+        , testCollection "Array(CChar)"   (Proxy :: Proxy (Array CChar))  (CChar <$> arbitrary)
+        , testCollection "Array(CUChar)"  (Proxy :: Proxy (Array CUChar)) (CUChar <$> arbitrary)
         ]
     ]