Skip to content

Fix micro benchmarks #538

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jan 22, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 36 additions & 36 deletions bench/micro/Bench/Database/LSMTree/Internal/Lookup.hs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

module Bench.Database.LSMTree.Internal.Lookup (benchmarks) where

import Control.Exception (assert)
import Control.Monad
import Control.Monad.ST.Strict (stToIO)
import Control.RefCount
Expand All @@ -11,16 +10,16 @@ import Criterion.Main (Benchmark, bench, bgroup, env, envWithCleanup,
import Data.Arena (ArenaManager, closeArena, newArena,
newArenaManager, withArena)
import Data.Bifunctor (Bifunctor (..))
import Data.ByteString (ByteString)
import qualified Data.List as List
import Data.Map.Strict (Map)
import qualified Data.Map.Strict as Map
import Data.Maybe (fromMaybe)
import qualified Data.Vector as V
import Database.LSMTree.Extras.Orphans ()
import Database.LSMTree.Extras.Random (frequency,
import Database.LSMTree.Extras.Random (frequency, randomByteStringR,
sampleUniformWithReplacement, uniformWithoutReplacement)
import Database.LSMTree.Extras.UTxO
import Database.LSMTree.Internal.BlobRef (BlobSpan (..))
import Database.LSMTree.Internal.Entry (Entry (..), NumEntries (..))
import Database.LSMTree.Internal.Lookup (bloomQueries, indexSearches,
intraPageLookups, lookupsIO, prepLookups)
Expand All @@ -34,6 +33,7 @@ import Database.LSMTree.Internal.Serialise
import qualified Database.LSMTree.Internal.WriteBuffer as WB
import qualified Database.LSMTree.Internal.WriteBufferBlobs as WBB
import GHC.Exts (RealWorld)
import GHC.Stack (HasCallStack)
import Prelude hiding (getContents)
import System.Directory (removeDirectoryRecursive)
import qualified System.FS.API as FS
Expand Down Expand Up @@ -83,7 +83,7 @@ benchmarks = bgroup "Bench.Database.LSMTree.Internal.Lookup" [

benchLookups :: Config -> Benchmark
benchLookups conf@Config{name} =
withEnv $ \ ~(_dir, arenaManager, hasFS, hasBlockIO, rs, ks) ->
withEnv $ \ ~(_dir, arenaManager, _hasFS, hasBlockIO, wbblobs, rs, ks) ->
env ( pure ( V.map (\(DeRef r) -> Run.runFilter r) rs
, V.map (\(DeRef r) -> Run.runIndex r) rs
, V.map (\(DeRef r) -> Run.runKOpsFile r) rs
Expand Down Expand Up @@ -126,28 +126,24 @@ benchLookups conf@Config{name} =
( do arena <- newArena arenaManager
(rkixs, ioops) <- stToIO (prepLookups arena blooms indexes kopsFiles ks)
ioress <- FS.submitIO hasBlockIO ioops
wbblobs <- WBB.new hasFS (FS.mkFsPath [])
pure (rkixs, ioops, ioress, arena, wbblobs)
pure (rkixs, ioops, ioress, arena)
)
(\(_, _, _, arena, wbblobs) -> do
closeArena arenaManager arena
releaseRef wbblobs)
(\ ~(rkixs, ioops, ioress, _, wbblobs_unused) -> do
!_ <- intraPageLookups resolveV WB.empty wbblobs_unused
(\(_, _, _, arena) -> closeArena arenaManager arena)
(\ ~(rkixs, ioops, ioress, _) -> do
!_ <- intraPageLookups resolveV WB.empty wbblobs
rs ks rkixs ioops ioress
pure ())
-- The whole shebang: lookup preparation, doing the IO, and then
-- performing intra-page-lookups. Again, we evaluate the result to
-- WHNF because it is the same result that intraPageLookups produces
-- (see above).
, let wb_unused = WB.empty in
env (WBB.new hasFS (FS.mkFsPath [])) $ \wbblobs_unused ->
bench "Lookups in IO" $
, bench "Lookups in IO" $
whnfAppIO (\ks' -> lookupsIO hasBlockIO arenaManager resolveV
wb_unused wbblobs_unused
WB.empty wbblobs
rs blooms indexes kopsFiles ks') ks
]
--TODO: consider adding benchmarks that also use the write buffer
-- TODO: consider adding benchmarks that also use the write buffer
-- (then we can't just use 'WB.empty', but must take it from the env)
where
withEnv = envWithCleanup
(lookupsInBatchesEnv conf)
Expand Down Expand Up @@ -181,6 +177,7 @@ lookupsInBatchesEnv ::
, ArenaManager RealWorld
, FS.HasFS IO FS.HandleIO
, FS.HasBlockIO IO FS.HandleIO
, Ref (WBB.WriteBufferBlobs IO FS.HandleIO)
, V.Vector (Ref (Run IO FS.HandleIO))
, V.Vector SerialisedKey
)
Expand All @@ -191,19 +188,19 @@ lookupsInBatchesEnv Config {..} = do
(storedKeys, lookupKeys) <- lookupsEnv (mkStdGen 17) nentries npos nneg
let hasFS = FS.ioHasFS (FS.MountPoint benchTmpDir)
hasBlockIO <- FS.ioHasBlockIO hasFS (fromMaybe FS.defaultIOCtxParams ioctxps)
let wb = WB.fromMap storedKeys
fsps = RunFsPaths (FS.mkFsPath []) (RunNumber 0)
wbblobs <- WBB.new hasFS (FS.mkFsPath [])
wbblobs <- WBB.new hasFS (FS.mkFsPath ["0.wbblobs"])
wb <- WB.fromMap <$> traverse (traverse (WBB.addBlob hasFS wbblobs)) storedKeys
let fsps = RunFsPaths (FS.mkFsPath []) (RunNumber 0)
r <- Run.fromWriteBuffer hasFS hasBlockIO caching (RunAllocFixed 10) fsps wb wbblobs
let NumEntries nentriesReal = Run.size r
assert (nentriesReal == nentries) $ pure ()
let npagesReal = Run.sizeInPages r
assert (getNumPages npagesReal * 42 <= nentriesReal) $ pure ()
assert (getNumPages npagesReal * 43 >= nentriesReal) $ pure ()
assertEqual nentriesReal nentries $ pure ()
-- 42 to 43 entries per page
assertEqual (nentriesReal `div` getNumPages (Run.sizeInPages r)) 42 $ pure ()
pure ( benchTmpDir
, arenaManager
, hasFS
, hasBlockIO
, wbblobs
, V.singleton r
, lookupKeys
)
Expand All @@ -213,13 +210,15 @@ lookupsInBatchesCleanup ::
, ArenaManager RealWorld
, FS.HasFS IO FS.HandleIO
, FS.HasBlockIO IO FS.HandleIO
, Ref (WBB.WriteBufferBlobs IO FS.HandleIO)
, V.Vector (Ref (Run IO FS.HandleIO))
, V.Vector SerialisedKey
)
-> IO ()
lookupsInBatchesCleanup (tmpDir, _arenaManager, _hasFS, hasBlockIO, rs, _) = do
lookupsInBatchesCleanup (tmpDir, _arenaManager, _hasFS, hasBlockIO, wbblobs, rs, _) = do
FS.close hasBlockIO
forM_ rs releaseRef
releaseRef wbblobs
removeDirectoryRecursive tmpDir

-- | Generate keys to store and keys to lookup
Expand All @@ -228,8 +227,8 @@ lookupsEnv ::
-> Int -- ^ Number of stored key\/operation pairs
-> Int -- ^ Number of positive lookups
-> Int -- ^ Number of negative lookups
-> IO ( Map SerialisedKey (Entry SerialisedValue BlobSpan)
, V.Vector (SerialisedKey)
-> IO ( Map SerialisedKey (Entry SerialisedValue SerialisedBlob)
, V.Vector SerialisedKey
)
lookupsEnv g nentries npos nneg = do
let (g1, g') = R.split g
Expand All @@ -242,25 +241,26 @@ lookupsEnv g nentries npos nneg = do
lookups <- generate $ shuffle (negLookups ++ posLookups)

let entries' = Map.mapKeys serialiseKey
$ Map.map (bimap serialiseValue id) entries
$ Map.map (bimap serialiseValue serialiseBlob) entries
lookups' = V.fromList $ fmap serialiseKey lookups
assert (Map.size entries' == nentries) $ pure ()
assert (length lookups' == npos + nneg) $ pure ()
assertEqual (Map.size entries') (nentries) $ pure ()
assertEqual (length lookups') (npos + nneg) $ pure ()
pure (entries', lookups')

-- TODO: tweak distribution
randomEntry :: StdGen -> (Entry UTxOValue BlobSpan, StdGen)
randomEntry :: StdGen -> (Entry UTxOValue ByteString, StdGen)
randomEntry g = frequency [
(20, \g' -> let (!v, !g'') = uniform g' in (Insert v, g''))
, (1, \g' -> let (!v, !g'') = uniform g'
(!b, !g''') = genBlobSpan g''
(!b, !g''') = randomByteStringR (0, 2000) g'' -- < 2kB
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can generate much smaller blobs. It would speed up the benchmark setup, and the performance of the lookups code does not depend on the blob size. But it's maybe also not super important because inserts with blobs are generated only rarely

in (InsertWithBlob v b, g'''))
, (2, \g' -> let (!v, !g'') = uniform g' in (Mupdate v, g''))
, (2, \g' -> (Delete, g'))
] g

genBlobSpan :: RandomGen g => g -> (BlobSpan, g)
genBlobSpan !g =
let (off, !g') = uniform g
(len, !g'') = uniform g'
in (BlobSpan off len, g'')
-- | Assertions on the generated environment should also be checked for release
-- builds, so don't use 'Control.Exception.assert'.
assertEqual :: (HasCallStack, Eq a, Show a) => a -> a -> b -> b
assertEqual x y
| x == y = id
| otherwise = error $ show x ++ " /= " ++ show y
36 changes: 18 additions & 18 deletions bench/micro/Bench/Database/LSMTree/Internal/Merge.hs
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
module Bench.Database.LSMTree.Internal.Merge (benchmarks) where

import Control.Monad (when, zipWithM)
import Control.Monad (zipWithM)
import Control.RefCount
import Criterion.Main (Benchmark, bench, bgroup)
import qualified Criterion.Main as Cr
import Data.Bifunctor (first)
import qualified Data.BloomFilter.Hash as Hash
import Data.Foldable (traverse_)
import Data.IORef
import qualified Data.List as List
import qualified Data.Map.Strict as Map
import Data.Maybe (fromMaybe)
Expand All @@ -19,8 +20,7 @@ import Database.LSMTree.Extras.UTxO
import Database.LSMTree.Internal.Entry
import Database.LSMTree.Internal.Merge (MergeType (..))
import qualified Database.LSMTree.Internal.Merge as Merge
import Database.LSMTree.Internal.Paths (RunFsPaths (..),
pathsForRunFiles, runChecksumsPath)
import Database.LSMTree.Internal.Paths (RunFsPaths (..))
import Database.LSMTree.Internal.Run (Run)
import qualified Database.LSMTree.Internal.Run as Run
import Database.LSMTree.Internal.RunAcc (RunBloomFilterAlloc (..))
Expand Down Expand Up @@ -114,7 +114,7 @@ benchmarks = bgroup "Bench.Database.LSMTree.Internal.Merge" [
{ name = "insert-large-keys-x4" -- potentially long keys
, nentries = (totalEntries `div` 10) `splitInto` 4
, finserts = 1
, randomKey = first serialiseKey . R.randomByteStringR (6, 4000)
, randomKey = first serialiseKey . R.randomByteStringR (8, 4000)
}
, benchMerge configWord64
{ name = "insert-mixed-vals-x4" -- potentially long values
Expand Down Expand Up @@ -201,28 +201,28 @@ benchMerge conf@Config{name} =
-- thread `runs` through the environment, too.
-- 2. It forces the result to normal form, which would traverse the
-- whole run, so we force to WHNF ourselves and just return `()`.

-- We make sure to immediately close resulting runs so we don't run
-- out of file handles or disk space. However, we don't want it to
-- be part of the measurement, as it includes deleting files.
-- Therefore, ... TODO
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is a dangling TODO here

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oops, that slipped through. Thanks for pointing it out. #547

Cr.perRunEnvWithCleanup
(pure (runs, outputRunPaths))
(const (removeOutputRunFiles hasFS)) $ \(runs', p) -> do
!run <- merge hasFS hasBlockIO conf p runs'
-- Make sure to immediately close resulting runs so we don't run
-- out of file handles. Ideally this would not be measured, but at
-- least it's pretty cheap.
releaseRef run
((runs,) <$> newIORef Nothing)
(releaseRun . snd) $ \(runs', ref) -> do
!run <- merge hasFS hasBlockIO conf outputRunPaths runs'
writeIORef ref $ Just $ releaseRef run
]
where
withEnv =
Cr.envWithCleanup
(mergeEnv conf)
mergeEnvCleanup

-- We need to keep the input runs, but remove the freshly created one.
removeOutputRunFiles :: FS.HasFS IO FS.HandleIO -> IO ()
removeOutputRunFiles hasFS = do
traverse_ (FS.removeFile hasFS) (pathsForRunFiles outputRunPaths)
exists <- FS.doesFileExist hasFS (runChecksumsPath outputRunPaths)
when exists $
FS.removeFile hasFS (runChecksumsPath outputRunPaths)
releaseRun :: IORef (Maybe (IO ())) -> IO ()
releaseRun ref =
readIORef ref >>= \case
Nothing -> pure ()
Just release -> release

merge ::
FS.HasFS IO FS.HandleIO
Expand Down
Loading