Skip to content
This repository was archived by the owner on Sep 20, 2023. It is now read-only.

ASCII String type #110

Merged
merged 6 commits into from
Sep 17, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Foundation/Foreign.hs
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,11 @@ module Foundation.Foreign
( module Foundation.Primitive.FinalPtr
, V.foreignMem
, V.mutableForeignMem
, module Foreign.C.Types
) where

import Foundation.Primitive.FinalPtr
import qualified Foundation.Array.Unboxed as V
import qualified Foundation.Array.Unboxed.Mutable as V

import Foreign.C.Types
33 changes: 33 additions & 0 deletions Foundation/Primitive/Types.hs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import GHC.Prim
import GHC.Int
import GHC.Types
import GHC.Word
import Foreign.C.Types
import Foundation.Internal.Proxy
import Foundation.Internal.Base
import Foundation.Internal.Types
Expand Down Expand Up @@ -248,6 +249,38 @@ instance PrimType Char where
primAddrWrite addr (Offset (I# n)) (C# w) = primitive $ \s1 -> (# writeWideCharOffAddr# addr n w s1, () #)
{-# INLINE primAddrWrite #-}

instance PrimType CChar where
primSizeInBytes _ = Size 1
{-# INLINE primSizeInBytes #-}
primBaIndex ba (Offset n) = CChar (primBaIndex ba (Offset n :: Offset Int8))
{-# INLINE primBaIndex #-}
primMbaRead mba (Offset n) = CChar <$> primMbaRead mba (Offset n :: Offset Int8)
{-# INLINE primMbaRead #-}
primMbaWrite mba (Offset n) (CChar int8) = primMbaWrite mba (Offset n) int8
{-# INLINE primMbaWrite #-}
primAddrIndex addr (Offset n) = CChar $ primAddrIndex addr (Offset n :: Offset Int8)
{-# INLINE primAddrIndex #-}
primAddrRead addr (Offset n) = CChar <$> primAddrRead addr (Offset n :: Offset Int8)
{-# INLINE primAddrRead #-}
primAddrWrite addr (Offset n) (CChar int8) = primAddrWrite addr (Offset n) int8
{-# INLINE primAddrWrite #-}
instance PrimType CUChar where
primSizeInBytes _ = Size 1
{-# INLINE primSizeInBytes #-}
primBaIndex ba (Offset n) = CUChar (primBaIndex ba (Offset n :: Offset Word8))
{-# INLINE primBaIndex #-}
primMbaRead mba (Offset n) = CUChar <$> primMbaRead mba (Offset n :: Offset Word8)
{-# INLINE primMbaRead #-}
primMbaWrite mba (Offset n) (CUChar w8) = primMbaWrite mba (Offset n) w8
{-# INLINE primMbaWrite #-}
primAddrIndex addr (Offset n) = CUChar $ primAddrIndex addr (Offset n :: Offset Word8)
{-# INLINE primAddrIndex #-}
primAddrRead addr (Offset n) = CUChar <$> primAddrRead addr (Offset n :: Offset Word8)
{-# INLINE primAddrRead #-}
primAddrWrite addr (Offset n) (CUChar w8) = primAddrWrite addr (Offset n) w8
{-# INLINE primAddrWrite #-}


-- | Cast a Size linked to type A (Size A) to a Size linked to type B (Size B)
sizeRecast :: (PrimType a, PrimType b) => Size a -> Size b
sizeRecast = doRecast Proxy Proxy
Expand Down
267 changes: 267 additions & 0 deletions Foundation/String/ASCII.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,267 @@
-- |
-- Module : Foundation.String.ASCII
-- License : BSD-style
-- Maintainer : Haskell Foundation
-- Stability : experimental
-- Portability : portable
--
-- A AsciiString type backed by a `ASCII` encoded byte array and all the necessary
-- functions to manipulate the string.
--
-- The recommended type is `AsciiString` from `Foundation.AsciiString.UTF8`
--
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE UnboxedTuples #-}
{-# LANGUAGE FlexibleContexts #-}
module Foundation.String.ASCII
( AsciiString
--, Buffer
, create
, replicate
-- * Binary conversion
, fromBytesUnsafe
, toBytes
, copy

-- * Legacy utility
, lines
, words
) where

import Foundation.Array.Unboxed (UArray)
import qualified Foundation.Array.Unboxed as Vec
import qualified Foundation.Array.Unboxed.Mutable as MVec
import qualified Foundation.Collection as C
import Foundation.Internal.Base
import Foundation.Internal.Types
import Foundation.Number
import Foundation.Primitive.Monad
import Foundation.Foreign

import GHC.Int
import GHC.Types
import GHC.Prim

-- temporary
import qualified Data.List
import qualified Prelude
import Foundation.Class.Bifunctor

ccharToChar :: CChar -> Char
ccharToChar (CChar (I8# i)) = C# (chr# i)
charToCChar :: Char -> CChar
charToCChar (C# i) = CChar (I8# (ord# i))

-- | Opaque packed array of characters in the ASCII encoding
newtype AsciiString = AsciiString { toBytes :: UArray CChar }
deriving (Typeable, Monoid, Eq, Ord)

newtype MutableAsciiString st = MutableAsciiString (MVec.MUArray CChar st)
deriving (Typeable)

instance Show AsciiString where
show = fmap ccharToChar . toList
instance IsString AsciiString where
fromString = fromList . fmap charToCChar
instance IsList AsciiString where
type Item AsciiString = CChar
fromList = sFromList
toList = sToList

type instance C.Element AsciiString = CChar

instance C.InnerFunctor AsciiString where
imap = ccharMap
instance C.Collection AsciiString where
null = null
length = length
minimum = Data.List.minimum . toList . C.getNonEmpty -- TODO faster implementation
maximum = Data.List.maximum . toList . C.getNonEmpty -- TODO faster implementation
instance C.Sequential AsciiString where
take = take
drop = drop
splitAt = splitAt
revTake = revTake
revDrop = revDrop
revSplitAt = revSplitAt
splitOn = splitOn
break = break
breakElem = breakElem
intersperse = intersperse
span = span
filter = filter
reverse = reverse
unsnoc = unsnoc
uncons = uncons
snoc = snoc
cons = cons
find = find
sortBy = sortBy
singleton = fromList . (:[])

instance C.Zippable AsciiString where
-- TODO Use a string builder once available
zipWith f a b = sFromList (C.zipWith f a b)

next :: AsciiString -> Offset CChar -> (# CChar, Offset CChar #)
next (AsciiString ba) (Offset n) = (# h, Offset (n + 1) #)
where
!h = Vec.unsafeIndex ba n

freeze :: PrimMonad prim => MutableAsciiString (PrimState prim) -> prim AsciiString
freeze (MutableAsciiString mba) = AsciiString `fmap` C.unsafeFreeze mba
{-# INLINE freeze #-}

------------------------------------------------------------------------
-- real functions

sToList :: AsciiString -> [CChar]
sToList s = loop azero
where
nbBytes :: Size CChar
!nbBytes = size s
!end = azero `offsetPlusE` nbBytes
loop idx
| idx == end = []
| otherwise =
let (# c , idx' #) = next s idx in c : loop idx'

sFromList :: [CChar] -> AsciiString
sFromList = AsciiString . fromList
{-# INLINE [0] sFromList #-}

null :: AsciiString -> Bool
null = Vec.null . toBytes
{-# INLINE null #-}

-- | Create a string composed of a number @n of Chars (Unicode code points).
--
-- if the input @s contains less characters than required, then
take :: Int -> AsciiString -> AsciiString
take n s = fst $ splitAt n s -- TODO specialize
{-# INLINE take #-}

-- | Create a string with the remaining Chars after dropping @n Chars from the beginning
drop :: Int -> AsciiString -> AsciiString
drop n = AsciiString . Vec.drop n . toBytes
{-# INLINE drop #-}

splitAt :: Int -> AsciiString -> (AsciiString, AsciiString)
splitAt n = bimap AsciiString AsciiString . Vec.splitAt n . toBytes
{-# INLINE splitAt #-}

-- rev{Take,Drop,SplitAt} TODO optimise:
-- we can process the string from the end using a skipPrev instead of getting the length

revTake :: Int -> AsciiString -> AsciiString
revTake nbElems v = drop (length v - nbElems) v

revDrop :: Int -> AsciiString -> AsciiString
revDrop nbElems v = take (length v - nbElems) v

revSplitAt :: Int -> AsciiString -> (AsciiString, AsciiString)
revSplitAt n v = (drop idx v, take idx v)
where idx = length v - n

-- | Split on the input string using the predicate as separator
--
-- e.g.
--
-- > splitOn (== ',') "," == ["",""]
-- > splitOn (== ',') ",abc," == ["","abc",""]
-- > splitOn (== ':') "abc" == ["abc"]
-- > splitOn (== ':') "abc::def" == ["abc","","def"]
-- > splitOn (== ':') "::abc::def" == ["","","abc","","def"]
--
splitOn :: (CChar -> Bool) -> AsciiString -> [AsciiString]
splitOn predicate = fmap AsciiString . Vec.splitOn predicate . toBytes

break :: (CChar -> Bool) -> AsciiString -> (AsciiString, AsciiString)
break predicate = bimap AsciiString AsciiString . Vec.break predicate . toBytes
{-# INLINE[0] break #-}

{-# RULES "break (== 'c')" [3] forall c . break (== c) = breakElem c #-}

breakElem :: CChar -> AsciiString -> (AsciiString, AsciiString)
breakElem !el (AsciiString ba) =
let (# v1,v2 #) = Vec.splitElem el ba in (AsciiString v1, AsciiString v2)
{-# INLINE breakElem #-}

intersperse :: CChar -> AsciiString -> AsciiString
intersperse sep = AsciiString . Vec.intersperse sep . toBytes

span :: (CChar -> Bool) -> AsciiString -> (AsciiString, AsciiString)
span predicate = break (not . predicate)

-- | size in bytes
size :: AsciiString -> Size CChar
size = Size . C.length . toBytes

length :: AsciiString -> Int
length s = let (Size l) = size s in l

replicate :: Int -> CChar -> AsciiString
replicate n c = AsciiString $ Vec.create n (const c)

-- | Copy the AsciiString
copy :: AsciiString -> AsciiString
copy (AsciiString s) = AsciiString (Vec.copy s)

-- | Allocate a MutableAsciiString of a specific size in bytes.
new :: PrimMonad prim
=> Size CChar -- ^ in number of bytes, not of elements.
-> prim (MutableAsciiString (PrimState prim))
new n = MutableAsciiString `fmap` MVec.new n

create :: PrimMonad prim => Int -> (MutableAsciiString (PrimState prim) -> prim Int) -> prim AsciiString
create sz f = do
ms <- new (Size sz)
filled <- f ms
if filled == sz
then freeze ms
else C.take filled `fmap` freeze ms

ccharMap :: (CChar -> CChar) -> AsciiString -> AsciiString
ccharMap f = AsciiString . Vec.map f . toBytes

snoc :: AsciiString -> CChar -> AsciiString
snoc (AsciiString ba) = AsciiString . Vec.snoc ba

cons :: CChar -> AsciiString -> AsciiString
cons c = AsciiString . Vec.cons c . toBytes

unsnoc :: AsciiString -> Maybe (AsciiString, CChar)
unsnoc str = first AsciiString <$> Vec.unsnoc (toBytes str)

uncons :: AsciiString -> Maybe (CChar, AsciiString)
uncons str = second AsciiString <$> Vec.uncons (toBytes str)

find :: (CChar -> Bool) -> AsciiString -> Maybe CChar
find predicate = Vec.find predicate . toBytes

sortBy :: (CChar -> CChar -> Ordering) -> AsciiString -> AsciiString
sortBy sortF = AsciiString . Vec.sortBy sortF . toBytes

filter :: (CChar -> Bool) -> AsciiString -> AsciiString
filter p s = fromList $ Data.List.filter p $ toList s

reverse :: AsciiString -> AsciiString
reverse (AsciiString ba) = AsciiString $ Vec.reverse ba

-- | Convert a Byte Array representing UTF8 data directly to a string without checking for UTF8 validity
--
-- If the input contains invalid sequences, it will trigger runtime async errors when processing data.
--
-- In doubt, use 'fromBytes'
fromBytesUnsafe :: UArray CChar -> AsciiString
fromBytesUnsafe = AsciiString

lines :: AsciiString -> [AsciiString]
lines = fmap fromString . Prelude.lines . show

words :: AsciiString -> [AsciiString]
words = fmap fromString . Prelude.words . show
4 changes: 3 additions & 1 deletion foundation.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ Library
Foundation.Class.Bifunctor
Foundation.Convertible
Foundation.String
Foundation.String.ASCII
Foundation.IO
Foundation.IO.FileMap
Foundation.VFS
Expand Down Expand Up @@ -140,11 +141,12 @@ Test-Suite test-foundation
Other-modules: Test.Utils.Foreign
Test.Data.List
Test.Data.Unicode
Test.Foundation.Array
Test.Data.ASCII
Test.Foundation.Collection
Test.Foundation.Number
Test.Foundation.Encoding
Test.Foundation.Parser
Test.Foundation.Array
Test.Foundation.String
Build-Depends: base >= 3 && < 5
, mtl
Expand Down
17 changes: 17 additions & 0 deletions tests/Test/Data/ASCII.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
-- |
-- Module: Test.Data.ASCII
--

{-# LANGUAGE NoImplicitPrelude #-}

module Test.Data.ASCII
( genAsciiChar
) where

import Foundation
import Foundation.Foreign
import Test.Tasty.QuickCheck

-- | a better generator for unicode Character
genAsciiChar :: Gen CChar
genAsciiChar = toEnum <$> choose (1, 127)
6 changes: 6 additions & 0 deletions tests/Test/Foundation/Array.hs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ testArrayRefs = testGroup "Array"
, testCollection "UArray(I64)" (Proxy :: Proxy (UArray Int64)) arbitrary
, testCollection "UArray(F32)" (Proxy :: Proxy (UArray Float)) arbitrary
, testCollection "UArray(F64)" (Proxy :: Proxy (UArray Double)) arbitrary
, testCollection "UArray(CChar)" (Proxy :: Proxy (UArray CChar)) (CChar <$> arbitrary)
, testCollection "UArray(CUChar)" (Proxy :: Proxy (UArray CUChar)) (CUChar <$> arbitrary)
]
, testGroup "Unboxed-Foreign"
[ testGroup "UArray(W8)" (testUnboxedForeign (Proxy :: Proxy (UArray Word8)) arbitrary)
Expand All @@ -45,6 +47,8 @@ testArrayRefs = testGroup "Array"
, testGroup "UArray(I64)" (testUnboxedForeign (Proxy :: Proxy (UArray Int64)) arbitrary)
, testGroup "UArray(F32)" (testUnboxedForeign (Proxy :: Proxy (UArray Float)) arbitrary)
, testGroup "UArray(F64)" (testUnboxedForeign (Proxy :: Proxy (UArray Double)) arbitrary)
, testGroup "UArray(CChar)" (testUnboxedForeign (Proxy :: Proxy (UArray CChar)) (CChar <$> arbitrary))
, testGroup "UArray(CUChar)" (testUnboxedForeign (Proxy :: Proxy (UArray CUChar)) (CUChar <$> arbitrary))
]
, testGroup "Boxed"
[ testCollection "Array(W8)" (Proxy :: Proxy (Array Word8)) arbitrary
Expand All @@ -60,6 +64,8 @@ testArrayRefs = testGroup "Array"
, testCollection "Array(Int)" (Proxy :: Proxy (Array Int)) arbitrary
, testCollection "Array(Int,Int)" (Proxy :: Proxy (Array (Int,Int))) arbitrary
, testCollection "Array(Integer)" (Proxy :: Proxy (Array Integer)) arbitrary
, testCollection "Array(CChar)" (Proxy :: Proxy (Array CChar)) (CChar <$> arbitrary)
, testCollection "Array(CUChar)" (Proxy :: Proxy (Array CUChar)) (CUChar <$> arbitrary)
]
]

Expand Down
Loading