Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

More granular micro-benchmark for the IO part of lookups #214

Merged
merged 1 commit into from
May 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 49 additions & 20 deletions bench/micro/Bench/Database/LSMTree/Internal/Lookup.hs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ module Bench.Database.LSMTree.Internal.Lookup (benchmarks) where
import Control.Exception (assert)
import Control.Monad
import Criterion.Main (Benchmark, bench, bgroup, env, envWithCleanup,
nfAppIO, whnf, whnfAppIO)
perRunEnv, whnf, whnfAppIO)
import Data.Bifunctor (Bifunctor (..))
import qualified Data.List as List
import Data.Map.Strict (Map)
Expand All @@ -30,8 +30,8 @@ import Database.LSMTree.Extras.Random (frequency,
import Database.LSMTree.Extras.UTxO
import Database.LSMTree.Internal.Entry (Entry (..), NumEntries (..))
import Database.LSMTree.Internal.Lookup (BatchSize (..),
bloomQueriesDefault, indexSearches, lookupsInBatches,
prepLookups)
bloomQueriesDefault, indexSearches, intraPageLookups,
lookupsInBatches, prepLookups, submitInBatches)
import Database.LSMTree.Internal.Run (Run)
import qualified Database.LSMTree.Internal.Run as Run
import Database.LSMTree.Internal.RunFsPaths (RunFsPaths (..))
Expand All @@ -50,7 +50,20 @@ import Test.QuickCheck (generate, shuffle)
-- | TODO: add a separate micro-benchmark that includes multi-pages.
benchmarks :: Benchmark
benchmarks = bgroup "Bench.Database.LSMTree.Internal.Lookup" [
benchLookups defaultConfig
benchLookups Config {
name = "2_000_000 entries, 256 positive lookups"
, nentries = 2_000_000
, npos = 256
, nneg = 0
, ioctxps = Nothing
}
, benchLookups Config {
name = "2_000_000 entries, 256 negative lookups"
, nentries = 2_000_000
, npos = 0
, nneg = 256
, ioctxps = Nothing
}
]

benchLookups :: Config -> Benchmark
Expand All @@ -63,22 +76,47 @@ benchLookups conf@Config{name} =
) $ \ ~(blooms, indexes, kopsFiles) ->
bgroup name [
-- The bloomfilter is queried for all lookup keys. The result is an
-- unboxed vector, so only use whnf.
-- unboxed vector, so only use @whnf@.
bench "Bloomfilter query" $
whnf (\ks' -> bloomQueriesDefault blooms ks') ks
-- The compact index is only searched for (true and false) positive
-- lookup keys. We use whnf here because the result is an unboxed
-- vector.
-- lookup keys. We use whnf here because the result is
, env (pure $ bloomQueriesDefault blooms ks) $ \rkixs ->
bench "Compact index search" $
whnfAppIO (\ks' -> indexSearches indexes kopsFiles ks' rkixs) ks
-- All prepped lookups are going to be used eventually so we use
-- @nf@ on the vector of 'IOOp's. We only evaluate the vector of
-- indexes to WHNF, because it is an unboxed vector.
-- prepLookups combines bloom filter querying and index searching.
-- The implementation forces the results to WHNF, so we use
-- whnfAppIO here instead of nfAppIO.
, bench "Lookup preparation in memory" $
whnfAppIO (\ks' -> prepLookups blooms indexes kopsFiles ks') ks
-- Submit the IOOps we get from prepLookups to HasBlockIO. We use
-- perRunEnv because IOOps contain mutable buffers, so we want fresh
-- ones for each run of the benchmark. We manually evaluate the
-- result to WHNF since it is unboxed vector.
, bench "Submit IOOps in batches" $
perRunEnv (prepLookups blooms indexes kopsFiles ks) $ \ ~(_rkixs, ioops) -> do
!_ioress <- submitInBatches hasBlockIO bsize ioops
pure ()
-- When IO result have been collected, intra-page lookups searches
-- through the raw bytes (representing a disk page) for the lookup
-- key. Again, we use perRunEnv here because IOOps contain mutable
-- buffers, so we want fresh ones for each run of the benchmark. The
-- result is a boxed vector of Maybe Entry, but since the
-- implementation takes care to evaluate each of the elements, we
-- only compute WHNF.
, bench "Perform intra-page lookups" $
perRunEnv ( prepLookups blooms indexes kopsFiles ks >>= \(rkixs, ioops) ->
submitInBatches hasBlockIO bsize ioops >>= \ioress ->
pure (rkixs, ioops, ioress)
) $ \ ~(rkixs, ioops, ioress) -> do
!_ <- intraPageLookups resolveV rs ks rkixs ioops ioress
pure ()
-- The whole shebang: lookup preparation, doing the IO in batches,
-- and then performing intra-page-lookups. Again, we evaluate the
-- result to WHNF because it is the same result that
-- intraPageLookups produces (see above).
, bench "Batched lookups in IO" $
nfAppIO (\ks' -> lookupsInBatches hasBlockIO bsize resolveV rs blooms indexes kopsFiles ks') ks
whnfAppIO (\ks' -> lookupsInBatches hasBlockIO bsize resolveV rs blooms indexes kopsFiles ks') ks
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did this make a noticeable difference?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The difference is not very noticeable, or at least small enough that I can't say for certain whether there is a significant difference. We're talking about a ~10 micro-seconds difference in the positive lookup scenario, and no real difference in the negative lookup scenario

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Conceptually whnfAppIO is the right thing to do: the data should already be fully evaluated

]
where
withEnv = envWithCleanup
Expand All @@ -105,15 +143,6 @@ data Config = Config {
, ioctxps :: !(Maybe FS.IOCtxParams)
}

defaultConfig :: Config
defaultConfig = Config {
name = "2_000_000 entries, 256 positive lookups"
, nentries = 2_000_000
, npos = 256
, nneg = 0
, ioctxps = Nothing
}

lookupsInBatchesEnv ::
Config
-> IO ( FilePath -- ^ Temporary directory
Expand Down
29 changes: 23 additions & 6 deletions src/Database/LSMTree/Internal/Lookup.hs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ module Database.LSMTree.Internal.Lookup (
-- * Lookups in IO
, BatchSize (..)
, lookupsInBatches
, submitInBatches
, intraPageLookups
) where

Expand Down Expand Up @@ -77,8 +78,8 @@ prepLookups ::
-> V.Vector SerialisedKey
-> m (VU.Vector (RunIx, KeyIx), V.Vector (IOOp (PrimState m) h))
prepLookups blooms indexes kopsFiles ks = do
let rkixs = bloomQueriesDefault blooms ks
ioops <- indexSearches indexes kopsFiles ks rkixs
let !rkixs = bloomQueriesDefault blooms ks
!ioops <- indexSearches indexes kopsFiles ks rkixs
pure (rkixs, ioops)

type KeyIx = Int
Expand Down Expand Up @@ -286,17 +287,33 @@ lookupsInBatches ::
-> V.Vector SerialisedKey
-> m (V.Vector (Maybe (Entry SerialisedValue (BlobRef (Run (Handle h))))))
lookupsInBatches !hbio !n !resolveV !rs !blooms !indexes !kopsFiles !ks = assert precondition $ do
(rkixs0, ioops0) <- prepLookups blooms indexes kopsFiles ks
let batches = batchesOfN (unBatchSize n) ioops0
ioress <- forConcurrently batches (submitIO hbio)
intraPageLookups resolveV rs ks rkixs0 ioops0 (VU.concat ioress)
(rkixs, ioops) <- prepLookups blooms indexes kopsFiles ks
ioress <- submitInBatches hbio n ioops
intraPageLookups resolveV rs ks rkixs ioops ioress
where
precondition = and [
V.map Run.runFilter rs == blooms
, V.map Run.runIndex rs == indexes
, V.length rs == V.length kopsFiles
]

{-# SPECIALIZE submitInBatches ::
HasBlockIO IO HandleIO
-> BatchSize
-> V.Vector (IOOp RealWorld HandleIO)
-> IO (VU.Vector IOResult)
#-}
-- | Submit I\/O operation to the 'HasBlockIO' interface in batches.
submitInBatches ::
MonadAsync m
=> HasBlockIO m h
-> BatchSize
-> V.Vector (IOOp (PrimState m) h)
-> m (VU.Vector IOResult)
submitInBatches !hbio !n !ioops = do
let batches = batchesOfN (unBatchSize n) ioops
VU.concat <$> forConcurrently batches (submitIO hbio)

{-# SPECIALIZE intraPageLookups ::
ResolveSerialisedValue
-> V.Vector (Run (Handle HandleIO))
Expand Down