Skip to content
This repository was archived by the owner on Sep 20, 2023. It is now read-only.

Benchs infrastructure and String optimisations #112

Merged
merged 11 commits into from
Sep 5, 2016
10 changes: 9 additions & 1 deletion Foundation/Array/Unboxed.hs
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ module Foundation.Array.Unboxed
, unsafeUpdate
, unsafeIndex
, unsafeIndexer
, unsafeDewrap
, unsafeRead
, unsafeWrite
-- * Functions
Expand Down Expand Up @@ -180,7 +181,14 @@ unsafeIndexer (UVecAddr start _ fptr) f = withFinalPtr fptr (\ptr -> f (primAddr
{-# INLINE primAddrIndex' #-}
{-# NOINLINE unsafeIndexer #-}

{-# SPECIALIZE [3] unsafeIndexer :: UArray Word8 -> ((Offset Word8 -> Word8) -> ST s a) -> ST s a #-}
unsafeDewrap :: PrimType ty
=> (ByteArray# -> Offset ty -> a)
-> (Ptr ty -> Offset ty -> ST s a)
-> UArray ty
-> a
unsafeDewrap _ g (UVecAddr start _ fptr) = withUnsafeFinalPtr fptr $ \ptr -> g ptr start
unsafeDewrap f _ (UVecBA start _ _ ba) = f ba start
{-# INLINE unsafeDewrap #-}

foreignMem :: PrimType ty
=> FinalPtr ty -- ^ the start pointer with a finalizer
Expand Down
2 changes: 2 additions & 0 deletions Foundation/Internal/Base.hs
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ module Foundation.Internal.Base
, Control.Exception.Exception
, Control.Exception.throw
, Control.Exception.throwIO
, GHC.Ptr.Ptr(..)
-- * Errors
, internalError
) where
Expand All @@ -68,6 +69,7 @@ import qualified Data.Int
import qualified Foundation.Internal.IsList
import qualified GHC.Exts
import qualified GHC.Generics
import qualified GHC.Ptr

-- | Only to use internally for internal error cases
internalError :: [Prelude.Char] -> a
Expand Down
1 change: 0 additions & 1 deletion Foundation/String/ModifiedUTF8.hs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ module Foundation.String.ModifiedUTF8

import GHC.ST (runST, ST)
import GHC.Prim (Addr#)
import GHC.Ptr (Ptr(..))
import qualified Control.Monad (mapM)

import Foundation.Internal.Base
Expand Down
120 changes: 74 additions & 46 deletions Foundation/String/UTF8.hs
Original file line number Diff line number Diff line change
Expand Up @@ -240,10 +240,13 @@ mutableValidate mba ofsStart sz = do
else return (pos, Just InvalidContinuation)
_ -> error "internal error"

skipNext :: String -> Int -> Int
skipNext (String ba) n = n + 1 + getNbBytes h
where
!h = Vec.unsafeIndex ba n
skipNextHeaderValue :: Word8 -> Size Word8
skipNextHeaderValue !x
| x < 0xC0 = Size 1 -- 0b11000000
| x < 0xE0 = Size 2 -- 0b11100000
| x < 0xF0 = Size 3 -- 0b11110000
| otherwise = Size 4
{-# INLINE skipNextHeaderValue #-}

nextWithIndexer :: (Offset Word8 -> Word8)
-> Offset Word8
Expand Down Expand Up @@ -365,7 +368,7 @@ data UTF8Char =
| UTF8_4 {-# UNPACK #-} !Word8 {-# UNPACK #-} !Word8 {-# UNPACK #-} !Word8 {-# UNPACK #-} !Word8

writeBytes :: Char -> UTF8Char
writeBytes c =
writeBytes !c =
if bool# (ltWord# x 0x80## ) then encode1
else if bool# (ltWord# x 0x800## ) then encode2
else if bool# (ltWord# x 0x10000##) then encode3
Expand Down Expand Up @@ -473,8 +476,6 @@ sFromList l = runST (new bytes >>= startCopy)
where
loop _ [] = freeze ms
loop idx (c:xs) = write ms idx c >>= \idx' -> loop idx' xs
-- write those bytes
--loop :: MutableByteArray# st -> Int# -> State# st -> [Char] -> (# State# st, String #)
{-# INLINE [0] sFromList #-}

null :: String -> Bool
Expand All @@ -484,30 +485,57 @@ null (String ba) = C.length ba == 0
--
-- if the input @s contains less characters than required, then
take :: Int -> String -> String
take n s = fst $ splitAt n s -- TODO specialize
take n s@(String ba)
| n <= 0 = mempty
| n >= C.length ba = s
| otherwise = let (Offset o) = indexN n s in String $ Vec.take o ba

-- | Create a string with the remaining Chars after dropping @n Chars from the beginning
drop :: Int -> String -> String
drop n s@(String ba)
| n <= 0 = s
| otherwise = loop 0 0
where
!sz = C.length ba
loop idx i
| idx >= sz = mempty
| i == n = String $ C.drop idx ba
| otherwise = loop (skipNext s idx) (i + 1)
| n <= 0 = s
| n >= C.length ba = mempty
| otherwise = let (Offset o) = indexN n s in String $ Vec.drop o ba

splitAt :: Int -> String -> (String, String)
splitAt n s@(String ba)
| n <= 0 = (mempty, s)
| otherwise = loop 0 0
splitAt nI s@(String ba)
| nI <= 0 = (mempty, s)
| nI >= C.length ba = (s, mempty)
| otherwise =
let (Offset k) = indexN nI s
(v1,v2) = C.splitAt k ba
in (String v1, String v2)

-- | Return the offset (in bytes) of the N'th sequence in an UTF8 String
indexN :: Int -> String -> Offset Word8
indexN nI (String ba) = Vec.unsafeDewrap goVec goAddr ba
where
!sz = C.length ba
loop idx i
| idx >= sz = (s, mempty)
| i == n = let (v1,v2) = C.splitAt idx ba in (String v1, String v2)
| otherwise = loop (skipNext s idx) (i + 1)
!n = Size nI
end :: Offset Char
!end = Offset 0 `offsetPlusE` n

goVec :: ByteArray# -> Offset Word8 -> Offset Word8
goVec !ma !start = loop start (Offset 0)
where
!len = start `offsetPlusE` Vec.lengthSize ba
loop :: Offset Word8 -> Offset Char -> Offset Word8
loop !idx !i
| idx >= len || i >= end = sizeAsOffset (idx - start)
| otherwise = loop (idx `offsetPlusE` d) (i + Offset 1)
where d = skipNextHeaderValue (primBaIndex ma idx)
{-# INLINE goVec #-}

goAddr :: Ptr Word8 -> Offset Word8 -> ST s (Offset Word8)
goAddr !(Ptr ptr) !start = return $ loop start (Offset 0)
where
!len = start `offsetPlusE` Vec.lengthSize ba
loop :: Offset Word8 -> Offset Char -> Offset Word8
loop !idx !i
| idx >= len || i >= end = sizeAsOffset (idx - start)
| otherwise = loop (idx `offsetPlusE` d) (i + Offset 1)
where d = skipNextHeaderValue (primAddrIndex ptr idx)
{-# INLINE goAddr #-}
{-# INLINE indexN #-}

-- rev{Take,Drop,SplitAt} TODO optimise:
-- we can process the string from the end using a skipPrev instead of getting the length
Expand Down Expand Up @@ -645,15 +673,29 @@ span predicate s = break (not . predicate) s
size :: String -> Size8
size (String ba) = Size $ C.length ba

length :: String -> Int
length s@(String ba) = loop 0 0
lengthSize :: String -> Size Word8
lengthSize (String ba)
| C.null ba = Size 0
| otherwise = Vec.unsafeDewrap goVec goAddr ba
where
!sz = C.length ba
loop idx !i
| idx == sz = i
| otherwise =
let idx' = skipNext s idx
in loop idx' (i + 1)
goVec ma start = loop start (Size 0)
where
!end = start `offsetPlusE` Vec.lengthSize ba
loop !idx !i
| idx >= end = i
| otherwise = loop (idx `offsetPlusE` d) (i + Size 1)
where d = skipNextHeaderValue (primBaIndex ma idx)

goAddr (Ptr ptr) start = return $ loop start (Size 0)
where
!end = start `offsetPlusE` Vec.lengthSize ba
loop !idx !i
| idx >= end = i
| otherwise = loop (idx `offsetPlusE` d) (i + Size 1)
where d = skipNextHeaderValue (primAddrIndex ptr idx)

length :: String -> Int
length s = let (Size sz) = lengthSize s in sz

replicate :: Int -> Char -> String
replicate n c = runST (new nbBytes >>= fill)
Expand All @@ -668,11 +710,6 @@ replicate n c = runST (new nbBytes >>= fill)
| idx == end = freeze ms
| otherwise = write ms idx c >>= loop

{-
sizeBytes :: String -> Int
sizeBytes (String ba) = I# (sizeofByteArray# ba)
-}

-- | Copy the String
copy :: String -> String
copy (String s) = String (Vec.copy s)
Expand Down Expand Up @@ -850,15 +887,6 @@ reverse s@(String ba) = runST $ do
_ -> return () -- impossible
loop ms (sidx `offsetPlusE` nb) didx'

{-
-- | Convert a Byte Array to a string and check UTF8 validity
fromBytes :: Encoding -> UArray Word8 -> Maybe String
fromBytes UTF8 bytes =
case validate bytes 0 (C.length bytes) of
(_, Nothing) -> Just $ fromBytesUnsafe bytes
(_, Just _) -> Nothing
-}

data Encoding
= ASCII7
| UTF8
Expand Down
22 changes: 22 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,28 @@ Each modules that get compiled will create an equivalent file in the build direc
* ModuleName.dump-simpl
* ModuleName.dump-asm

For profiling individual programs, the following command is useful:

stack ghc -- -O --make X.hs -prof -auto-all -caf-all -fforce-recomp

Benchmarking
============

To get the list of benchmark:

stack bench --benchmark-arguments -l

To compare against other libraries, you need to set the `bench-all` flag

stack bench --flag foundation:bench-all --benchmark-arguments -l

To run a specific or set of benchmarks :

stack bench --flag foundation:bench-all --benchmark-arguments 'types/String/SplitAt/mascii-10/Text'
stack bench --flag foundation:bench-all --benchmark-arguments '-m prefix types/String/SplitAt'
stack bench --flag foundation:bench-all --benchmark-arguments '-m glob types/String/SplitAt'


Design
======

Expand Down
1 change: 1 addition & 0 deletions benchs/BenchUtil/Common.hs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
module BenchUtil.Common
( defaultMain
, Benchmark
, bgroup
, bench
, fbench
Expand Down
28 changes: 28 additions & 0 deletions benchs/BenchUtil/RefData.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
module BenchUtil.RefData
( rdLoremIpsum1
, rdLoremIpsum5
, rdFoundationEn
, rdFoundationZh
, rdFoundationJap
, rdFoundationHun
) where

import Prelude (Char)

rdLoremIpsum1 :: [Char]
rdLoremIpsum1 = "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nam ornare dui vitae porta varius. In quis diam sed felis elementum ultricies non sit amet lorem. Nullam ut erat varius lectus scelerisque iaculis sed eu leo. Vivamus gravida interdum elit suscipit tempus. Quisque at mauris ac sapien consequat feugiat. In varius interdum rhoncus. Etiam hendrerit pharetra consectetur. Pellentesque laoreet, nisi quis feugiat rhoncus, nisi ipsum tincidunt nulla, vel fermentum mauris nisl sed felis. Sed ac convallis nibh. Donec rutrum finibus odio et rhoncus. Suspendisse pulvinar ex ac fermentum fermentum. Nam dui dui, lobortis sit amet sapien sed, gravida sagittis magna. Vestibulum nec egestas dui, non efficitur lectus. Fusce vitae mattis sem, nec dignissim nibh. Sed ac tincidunt metus."

rdLoremIpsum5 :: [Char]
rdLoremIpsum5 = "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nam ornare dui vitae porta varius. In quis diam sed felis elementum ultricies non sit amet lorem. Nullam ut erat varius lectus scelerisque iaculis sed eu leo. Vivamus gravida interdum elit suscipit tempus. Quisque at mauris ac sapien consequat feugiat. In varius interdum rhoncus. Etiam hendrerit pharetra consectetur. Pellentesque laoreet, nisi quis feugiat rhoncus, nisi ipsum tincidunt nulla, vel fermentum mauris nisl sed felis. Sed ac convallis nibh. Donec rutrum finibus odio et rhoncus. Suspendisse pulvinar ex ac fermentum fermentum. Nam dui dui, lobortis sit amet sapien sed, gravida sagittis magna. Vestibulum nec egestas dui, non efficitur lectus. Fusce vitae mattis sem, nec dignissim nibh. Sed ac tincidunt metus. Vestibulum ac bibendum ex. In vulputate pellentesque elementum. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Maecenas elit libero, vehicula eget hendrerit non, convallis vel metus. Maecenas faucibus nulla id quam vestibulum, eget commodo tellus interdum. Mauris eu odio id lacus gravida sollicitudin. Aenean vel velit enim. Phasellus vitae urna nisl. Interdum et malesuada fames ac ante ipsum primis in faucibus. Nunc volutpat convallis elementum. Curabitur suscipit congue ligula non maximus. Fusce tristique lacinia sem sed condimentum. Sed non eleifend mi, fringilla congue tortor. Nunc rhoncus sit amet nisl ac tempor. Fusce sed consectetur purus, et aliquam sem. Vestibulum finibus lectus et vehicula euismod. Aliquam sed neque mattis, sollicitudin enim sed, vestibulum est. Quisque varius pharetra risus id tempor. In hac habitasse platea dictumst. Donec cursus nisi sed magna bibendum aliquet. Mauris a elit id erat imperdiet consequat. Phasellus at condimentum ipsum. Pellentesque vehicula pulvinar ipsum et porta. Nullam quis quam mauris. Sed scelerisque porta nibh eu tempor. Morbi sollicitudin fringilla sollicitudin. Cras nec velit quis velit sollicitudin pellentesque. Phasellus quis ullamcorper nisi. Curabitur fringilla sed turpis sit amet pharetra. Cras euismod eget massa eu posuere. Suspendisse id aliquam enim. Nullam sollicitudin aliquet elementum. Nulla sit amet ligula vitae lorem finibus laoreet sed ac velit. Nulla facilisi. Aenean vel pretium lectus. Nunc augue lorem, viverra et felis vel, vestibulum feugiat nisl. Vestibulum imperdiet laoreet posuere. Maecenas vestibulum consequat felis eu aliquam. Nullam ac efficitur ante, eget egestas mauris. Cras id tincidunt nisi. Cras tincidunt molestie lorem et bibendum. Donec commodo porttitor faucibus. Aenean aliquam suscipit iaculis. Cras eu purus sit amet elit rhoncus laoreet. Vestibulum fringilla nulla ut neque vestibulum porttitor. Pellentesque vitae risus elit. Quisque et sapien eu diam tincidunt luctus ac quis nunc. Proin nec nisl eget diam faucibus tempus id sed quam. Ut scelerisque enim lacus, at mollis diam sagittis et. Nam lobortis convallis maximus. Donec maximus tortor id consequat venenatis."

rdFoundationEn :: [Char]
rdFoundationEn = "Set in the year 0 F.E. (\"Foundation Era\"), The Psychohistorians opens on Trantor, the capital of the 12,000-year-old Galactic Empire. Though the empire appears stable and powerful, it is slowly decaying in ways that parallel the decline of the Western Roman Empire. Hari Seldon, a mathematician and psychologist, has developed psychohistory, a new field of science and psychology that equates all possibilities in large societies to mathematics, allowing for the prediction of future events."

rdFoundationZh :: [Char]
rdFoundationZh = "故事發生在〈心理史學家〉五十年後,端點星面臨首度的「謝頓危機」(Seldon Crisis)銀河帝國邊緣的星群紛紛獨立起來,端點星處於四個王國之間,備受威脅。此時,謝頓早前錄下影像突然播放,告知他的後人端點星「銀河百科全書第一號基地」的真正目的──在千年後建立一個新的銀河帝國。同時,在這一千年間,基地會遇到各種不同的危機,令基地可以急速成長。端點星市長塞佛·哈定(Salvor Hardin)趁機發動政變,從心神未定的百科全書理事會手中奪權,以他靈活的手腕帶領端點星走出危機。"

rdFoundationHun :: [Char]
rdFoundationHun = "A történet G.K. 12 067-ben (A.K. 1) játszódik. A fiatal és tehetséges matematikus, Gaal Dornick a Trantorra, a Galaktikus Birodalom központi bolygójára tart, hogy csatlakozzon egy tekintélyes matematikus, I. Cleon császár egykori első minisztere, Hari Seldon tervezetéhez, a Seldon-tervhez. Gaal nem sokat tud a terv mibenlétéről, ám Seldon személyes meghívásának hatására a Trantorra indul. Megérkezése után nem sokkal találkozik Seldonnal, aki elmondja neki, hogy a tervet és az azzal kapcsolatba hozható személyeket – így őt is – a Közbiztonsági Bizottság – a Birodalomban a császárral szemben a tényleges hatalmat gyakorló testület –, szigorú megfigyelés alatt tart. Seldon beszél Gaal-nak a terv néhány részletéről, és megemlíti, hogy Trantor a pszichohistóriai számítások szerint 500 éven belül elpusztul. Találkozásuk másnapján Gaal-t és Seldon-t is letartóztatják."

rdFoundationJap :: [Char]
rdFoundationJap = "数学者ハリ・セルダンは、膨大な集団の行動を予測する心理歴史学を作りあげ発展させることで、銀河帝国が近いうちに崩壊することを予言する[1]。セルダンは、帝国崩壊後に3万年続くはずの暗黒時代を、あらゆる知識を保存することで千年に縮めようとし、知識の集大成となる銀河百科事典 (Encyclopedia Galactica) を編纂するグループ「ファウンデーション」をつくったが、帝国崩壊を公言し平和を乱したという罪で裁判にかけられ、グループは銀河系辺縁部にある資源の乏しい無人惑星ターミナスへ追放されることになった。しかし、この追放劇すらもセルダンの計画に予定されていた事柄であった。病で死期をさとっていたセルダンは、己の仕事が終わったことを確信する。"
81 changes: 81 additions & 0 deletions benchs/Main.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
{-# LANGUAGE CPP #-}
{-# LANGUAGE NoImplicitPrelude #-}
module Main where

import Foundation
import Foundation.Collection
import BenchUtil.Common
import BenchUtil.RefData

#ifdef BENCH_ALL
import qualified Data.Text as Text
type TextText = Text.Text

textPack = Text.pack
textLength = Text.length
textSplitAt = Text.splitAt
textTake = Text.take
#else
data TextText = Text

textPack _ = Text
textLength = undefined
textSplitAt _ _ = (undefined, undefined)
textTake = undefined
#endif

--------------------------------------------------------------------------

benchsString = bgroup "String"
[ benchLength
, benchTake
, benchSplitAt
-- , bgroup "SplitAt"
]
where
diffTextString :: (String -> a)
-> (TextText -> b)
-> [Char]
-> [Benchmark]
diffTextString foundationBench textBench dat =
[ bench "String" $ whnf foundationBench s
#ifdef BENCH_ALL
, bench "Text" $ whnf textBench t
#endif
]
where
s = fromList dat
t = textPack dat

benchLength = bgroup "Length" $
fmap (\(n, dat) -> bgroup n $ diffTextString length textLength dat)
[ ("ascii", rdFoundationEn)
, ("mascii", rdFoundationHun)
, ("uni1" ,rdFoundationJap)
, ("uni2" ,rdFoundationZh)
]
benchTake = bgroup "Take" $
mconcat $ fmap (\p ->
fmap (\(n, dat) -> bgroup n $ diffTextString (take p) (textTake p) dat)
[ ("ascii-" <> show p, rdFoundationEn)
, ("mascii-" <> show p, rdFoundationHun)
, ("uni1-" <> show p,rdFoundationJap)
, ("uni2-" <> show p,rdFoundationZh)
]) [ 10, 100, 800 ]
benchSplitAt = bgroup "SplitAt" $
mconcat $ fmap (\p ->
fmap (\(n, dat) -> bgroup n $ diffTextString (fst . splitAt p) (fst . textSplitAt p) dat)
[ ("ascii-" <> show p, rdFoundationEn)
, ("mascii-" <> show p, rdFoundationHun)
, ("uni1-" <> show p,rdFoundationJap)
, ("uni2-" <> show p,rdFoundationZh)
]) [ 10, 100, 800 ]

--------------------------------------------------------------------------

benchsTypes = bgroup "types"
[ benchsString
]

main = defaultMain
[ benchsTypes ]
Loading