From b6667ec80d253e3b79b2356176b5e65d780d727b Mon Sep 17 00:00:00 2001 From: Joris Dral Date: Tue, 23 Apr 2024 13:24:11 +0200 Subject: [PATCH 1/3] Make `ioHasBufFS` more general The function is now no longer constrained to `PrimBase m`, but any `m` for which `PrimState m ~ PrimState IO`. --- fs-api/src/System/FS/IO.hs | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/fs-api/src/System/FS/IO.hs b/fs-api/src/System/FS/IO.hs index 803d4dc..32fb7c1 100644 --- a/fs-api/src/System/FS/IO.hs +++ b/fs-api/src/System/FS/IO.hs @@ -1,3 +1,6 @@ +{-# LANGUAGE TypeFamilies #-} +{-# LANGUAGE TypeOperators #-} + -- | IO implementation of the 'HasFS' class module System.FS.IO ( -- * IO implementation & monad @@ -9,7 +12,7 @@ module System.FS.IO ( import Control.Concurrent.MVar import qualified Control.Exception as E import Control.Monad.IO.Class (MonadIO (..)) -import Control.Monad.Primitive (PrimBase) +import Control.Monad.Primitive (PrimMonad (..)) import qualified Data.ByteString.Unsafe as BS import Data.Primitive (withMutableByteArrayContents) import qualified Data.Set as Set @@ -103,19 +106,22 @@ _rethrowFsError mount fp action = do HasBufFS -------------------------------------------------------------------------------} -ioHasBufFS :: (MonadIO m, PrimBase m) => MountPoint -> HasBufFS m HandleIO +ioHasBufFS :: + (MonadIO m, PrimState IO ~ PrimState m) + => MountPoint + -> HasBufFS m HandleIO ioHasBufFS mount = HasBufFS { - hGetBufSome = \(Handle h fp) buf bufOff c -> - withMutableByteArrayContents buf $ \ptr -> liftIO $ rethrowFsError fp $ + hGetBufSome = \(Handle h fp) buf bufOff c -> liftIO $ rethrowFsError fp $ + withMutableByteArrayContents buf $ \ptr -> F.readBuf h (ptr `Foreign.plusPtr` unBufferOffset bufOff) c - , hGetBufSomeAt = \(Handle h fp) buf bufOff c off -> - withMutableByteArrayContents buf $ \ptr -> liftIO $ rethrowFsError fp $ + , hGetBufSomeAt = \(Handle h fp) buf bufOff c off -> liftIO $ rethrowFsError fp $ + withMutableByteArrayContents buf $ \ptr -> F.preadBuf h (ptr `Foreign.plusPtr` unBufferOffset bufOff) c (fromIntegral $ unAbsOffset off) - , hPutBufSome = \(Handle h fp) buf bufOff c -> - withMutableByteArrayContents buf $ \ptr -> liftIO $ rethrowFsError fp $ + , hPutBufSome = \(Handle h fp) buf bufOff c -> liftIO $ rethrowFsError fp $ + withMutableByteArrayContents buf $ \ptr -> F.writeBuf h (ptr `Foreign.plusPtr` unBufferOffset bufOff) c - , hPutBufSomeAt = \(Handle h fp) buf bufOff c off -> - withMutableByteArrayContents buf $ \ptr -> liftIO $ rethrowFsError fp $ + , hPutBufSomeAt = \(Handle h fp) buf bufOff c off -> liftIO $ rethrowFsError fp $ + withMutableByteArrayContents buf $ \ptr -> F.pwriteBuf h (ptr `Foreign.plusPtr` unBufferOffset bufOff) c (fromIntegral $ unAbsOffset off) } where From b9fe5249ac83f222f7924454441ce3dd84a7e8a1 Mon Sep 17 00:00:00 2001 From: Joris Dral Date: Tue, 23 Apr 2024 12:50:50 +0200 Subject: [PATCH 2/3] Comparative benchmarks for `hGetSome(at)` and their replacement functions. `hGetSome'` is a new function that provides the same functionality as `hGetSome`, but uses the `hGetSomeBuf` primitive. Similary, we implement a new `hGetSomeAt'` function that provides the same functionality as `hGetSomeAt`. These comparative benchmarks should show whether we can replace the `hGetSome(At)` primitives with `hGetBufSome(At)` primtives and the new compound functions. --- fs-api/bench/Main.hs | 198 +++++++++++++++++++++++ fs-api/fs-api.cabal | 38 +++-- fs-api/src-unix/System/FS/IO/Internal.hs | 3 +- 3 files changed, 227 insertions(+), 12 deletions(-) create mode 100644 fs-api/bench/Main.hs diff --git a/fs-api/bench/Main.hs b/fs-api/bench/Main.hs new file mode 100644 index 0000000..3eb716e --- /dev/null +++ b/fs-api/bench/Main.hs @@ -0,0 +1,198 @@ +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE DeriveAnyClass #-} +{-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE DerivingStrategies #-} +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE NamedFieldPuns #-} +{-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE StandaloneDeriving #-} + +{-# OPTIONS_GHC -Wno-orphans #-} + +module Main (main) where + +import Control.DeepSeq (NFData (..)) +import Control.Exception (assert) +import Control.Monad.Primitive (PrimMonad) +import Criterion.Main +import qualified Data.ByteString as BS +import qualified Data.ByteString.Internal as BS +import qualified Data.ByteString.Lazy as LBS +import Data.Int (Int64) +import Data.List (unfoldr) +import Data.Primitive.ByteArray +import Data.Word (Word64) +import Foreign (withForeignPtr) +import qualified GHC.ForeignPtr as GHC +import GHC.Generics (Generic) +import qualified GHC.IO as GHC +import qualified GHC.Ptr as GHC +import GHC.Stack (HasCallStack) +import qualified System.Directory as Dir +import qualified System.FS.API as FS +import qualified System.FS.API.Lazy as FS +import System.FS.IO (HandleIO, ioHasBufFS, ioHasFS) +import System.FS.IO.Internal.Handle (HandleOS (..)) +import System.IO.Temp (createTempDirectory, + getCanonicalTemporaryDirectory) +import System.Random (mkStdGen, uniform) + +main :: IO () +main = do + putStrLn "WARNING: it is recommended to run each benchmark in isolation \ + \with short cooldown pauses in between benchmark executable \ + \invocations. This prevents noise coming from one benchmark \ + \from influencing another benchmark. Example incantion: \ + \cabal run fs-api-bench -- -m glob \"System.FS.API/hGetSome\"" + defaultMain [benchmarks] + +benchmarks :: Benchmark +benchmarks = bgroup "System.FS.API" [ + envWithCleanup (mkFileEnv (4096 * 64) "hGetSome") cleanupFileEnv $ \ ~(hfs, _, _, fsp) -> + bench "hGetSome" $ + perRunEnvWithCleanup (mkHandleEnv hfs fsp 0) (cleanupHandleEnv hfs) $ \h -> do + FS.hGetSome hfs h (4096 * 64) + , envWithCleanup (mkFileEnv (4096 * 64) "hGetSome'") cleanupFileEnv $ \ ~(hfs, hbfs, _, fsp) -> + bench "hGetSome'" $ + perRunEnvWithCleanup (mkHandleEnv hfs fsp 0) (cleanupHandleEnv hfs) $ \h -> do + hGetSome' hbfs h (4096 * 64) + , envWithCleanup (mkFileEnv (4096 * 64) "hGetSomeAt") cleanupFileEnv $ \ ~(hfs, _, _, fsp) -> + bench "hGetSomeAt" $ + perRunEnvWithCleanup (mkHandleEnv hfs fsp 0) (cleanupHandleEnv hfs) $ \h -> do + FS.hGetSomeAt hfs h (4096 * 64) 0 + , envWithCleanup (mkFileEnv (4096 * 64) "hGetSomeAt'") cleanupFileEnv $ \ ~(hfs, hbfs, _, fsp) -> + bench "hGetSomeAt'" $ + perRunEnvWithCleanup (mkHandleEnv hfs fsp 0) (cleanupHandleEnv hfs) $ \h -> do + hGetSomeAt' hbfs h (4096 * 64) 0 + ] + +{------------------------------------------------------------------------------- + Benchmarkable functions +-------------------------------------------------------------------------------} + +hGetSome' :: + (HasCallStack, PrimMonad m) + => FS.HasBufFS m h + -> FS.Handle h + -> Word64 + -> m BS.ByteString +hGetSome' hbfs !h !c = do + !buf <- newPinnedByteArray (fromIntegral c) + !c' <- FS.hGetBufSome hbfs h buf 0 (fromIntegral c) + ba <- unsafeFreezeByteArray buf + -- pure $ copyByteArrayToByteString ba 0 (fromIntegral c') + pure $! unsafeByteArrayToByteString ba (fromIntegral c') + +hGetSomeAt' :: + (HasCallStack, PrimMonad m) + => FS.HasBufFS m h + -> FS.Handle h + -> Word64 + -> FS.AbsOffset + -> m BS.ByteString +hGetSomeAt' hbfs !h !c !off = do + !buf <- newPinnedByteArray (fromIntegral c) + !c' <- FS.hGetBufSomeAt hbfs h buf 0 (fromIntegral c) off + ba <- unsafeFreezeByteArray buf + -- pure $ copyByteArrayToByteString ba 0 (fromIntegral c') + pure $! unsafeByteArrayToByteString ba (fromIntegral c') + +{-# INLINE unsafeByteArrayToByteString #-} +unsafeByteArrayToByteString :: ByteArray -> Int -> BS.ByteString +unsafeByteArrayToByteString !ba !len = + GHC.unsafeDupablePerformIO $ do + let !(GHC.Ptr addr#) = byteArrayContents ba + (MutableByteArray mba#) <- unsafeThawByteArray ba + let fp = GHC.ForeignPtr addr# (GHC.PlainPtr mba#) + BS.mkDeferredByteString fp len + +-- | Copy a 'Prim.ByteArray' at a certain offset and length into a +-- 'BS.ByteString'. +-- +-- This is a copy of a function from @cborg@. +_copyByteArrayToByteString :: + ByteArray -- ^ 'ByteArray' to copy from. + -> Int -- ^ Offset into the 'ByteArray' to start with. + -> Int -- ^ Length of the data to copy. + -> BS.ByteString +_copyByteArrayToByteString ba off len = + GHC.unsafeDupablePerformIO $ do + fp <- BS.mallocByteString len + withForeignPtr fp $ \ptr -> do + copyByteArrayToPtr ptr ba off len + return (BS.PS fp 0 len) + +{------------------------------------------------------------------------------- + Orphan instances +-------------------------------------------------------------------------------} + +deriving stock instance Generic (HandleOS h) +deriving anyclass instance NFData (HandleOS h) +deriving anyclass instance NFData FS.FsPath +deriving anyclass instance NFData h => NFData (FS.Handle h) +instance NFData (FS.HasFS m h) where + rnf hfs = + dumpState `seq` hOpen `seq` hClose `seq` hIsOpen `seq` hSeek `seq` + hGetSome `seq`hGetSomeAt `seq` hPutSome `seq` hTruncate `seq` + hGetSize `seq` createDirectory `seq` createDirectoryIfMissing `seq` + listDirectory `seq` doesDirectoryExist `seq` doesFileExist `seq` + removeDirectoryRecursive `seq` removeFile `seq` renameFile `seq` + mkFsErrorPath `seq` unsafeToFilePath `seq` () + where + FS.HasFS {..} = hfs + _coveredAllCases x = case x of + FS.HasFS _a _b _c _d _e _f _g _h _i _j _k _l _m _n _o _p _q _r _s _t -> () + + +instance NFData (FS.HasBufFS m h) where + rnf hbfs = hPutBufSome `seq` hPutBufSomeAt `seq` () + where + FS.HasBufFS { FS.hPutBufSome , FS.hPutBufSomeAt } = hbfs + +{------------------------------------------------------------------------------- + Environment initialisation and cleanup +-------------------------------------------------------------------------------} + +mkFileEnv :: + Int + -> String + -> IO (FS.HasFS IO HandleIO, FS.HasBufFS IO HandleIO, FilePath, FS.FsPath) +mkFileEnv nbytes dirName = do + sysTmpDir <- getCanonicalTemporaryDirectory + tmpDir <- createTempDirectory sysTmpDir dirName + let hfs = ioHasFS (FS.MountPoint tmpDir) + hbfs = ioHasBufFS (FS.MountPoint tmpDir) + + -- Create a file containing random bytes. + let g = mkStdGen 17 + bytes = take nbytes $ unfoldr (Just . uniform) g + bs = LBS.pack bytes + fp = "benchfile" + fsp = FS.mkFsPath [fp] + FS.withFile hfs fsp (FS.WriteMode FS.MustBeNew) $ \h -> do + nbytes' <- FS.hPutAll hfs h bs + assert (nbytes == fromIntegral nbytes') $ pure () + + -- Read the full file into memory to make doubly sure that the file is in + -- the page cache, even though it might still be in the page cache as a + -- result of writing the file. + -- + -- Having the full file in the page cache will hopefully prevent some noise + -- in the benchmark measurements. + FS.withFile hfs fsp FS.ReadMode $ \h -> do + bs' <- FS.hGetAll hfs h + pure $! rnf bs' + + pure (hfs, hbfs, tmpDir, fsp) + +cleanupFileEnv :: (a, b, FilePath, d) -> IO () +cleanupFileEnv (_, _, fp, _) = Dir.removeDirectoryRecursive fp + +mkHandleEnv :: FS.HasFS IO HandleIO -> FS.FsPath -> Int64 -> IO (FS.Handle HandleIO) +mkHandleEnv hfs fsp n = do + h <- FS.hOpen hfs fsp FS.ReadMode + FS.hSeek hfs h FS.AbsoluteSeek n + pure h + +cleanupHandleEnv :: FS.HasFS IO HandleIO -> FS.Handle HandleIO -> IO () +cleanupHandleEnv = FS.hClose diff --git a/fs-api/fs-api.cabal b/fs-api/fs-api.cabal index b5466cf..d2a2c29 100644 --- a/fs-api/fs-api.cabal +++ b/fs-api/fs-api.cabal @@ -24,7 +24,14 @@ source-repository head location: https://github.com/input-output-hk/fs-sim subdir: fs-api +common warnings + ghc-options: + -Wall -Wcompat -Wincomplete-uni-patterns + -Wincomplete-record-updates -Wpartial-fields -Widentities + -Wredundant-constraints -Wmissing-export-lists -Wunused-packages + library + import: warnings hs-source-dirs: src exposed-modules: System.FS.API @@ -69,12 +76,8 @@ library else hs-source-dirs: src-macos - ghc-options: - -Wall -Wcompat -Wincomplete-uni-patterns - -Wincomplete-record-updates -Wpartial-fields -Widentities - -Wredundant-constraints -Wmissing-export-lists -Wunused-packages - test-suite fs-api-test + import: warnings type: exitcode-stdio-1.0 hs-source-dirs: test main-is: Main.hs @@ -89,8 +92,23 @@ test-suite fs-api-test , tasty-quickcheck , temporary - ghc-options: - -Wall -Wcompat -Wincomplete-uni-patterns - -Wincomplete-record-updates -Wpartial-fields -Widentities - -Wredundant-constraints -Wmissing-export-lists -Wunused-packages - -fno-ignore-asserts + ghc-options: -fno-ignore-asserts + +benchmark fs-api-bench + import: warnings + type: exitcode-stdio-1.0 + hs-source-dirs: bench + main-is: Main.hs + default-language: Haskell2010 + build-depends: + , base + , bytestring + , criterion + , deepseq + , directory + , fs-api + , primitive + , random + , temporary + + ghc-options: -rtsopts -with-rtsopts=-T diff --git a/fs-api/src-unix/System/FS/IO/Internal.hs b/fs-api/src-unix/System/FS/IO/Internal.hs index b3ba47d..c26cbf4 100644 --- a/fs-api/src-unix/System/FS/IO/Internal.hs +++ b/fs-api/src-unix/System/FS/IO/Internal.hs @@ -1,5 +1,4 @@ -{-# LANGUAGE CPP #-} -{-# LANGUAGE PackageImports #-} +{-# LANGUAGE CPP #-} -- | This is meant to be used for the implementation of HasFS instances and not -- directly by client code. From 3b57cd3b040f77588752590086f3833f3796896d Mon Sep 17 00:00:00 2001 From: Joris Dral Date: Tue, 23 Apr 2024 16:10:26 +0200 Subject: [PATCH 3/3] TOSQUASH --- fs-api/bench/Main.hs | 4 ++-- fs-api/fs-api.cabal | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs-api/bench/Main.hs b/fs-api/bench/Main.hs index 3eb716e..07bb58d 100644 --- a/fs-api/bench/Main.hs +++ b/fs-api/bench/Main.hs @@ -23,10 +23,10 @@ import Data.List (unfoldr) import Data.Primitive.ByteArray import Data.Word (Word64) import Foreign (withForeignPtr) +import qualified GHC.Exts as GHC import qualified GHC.ForeignPtr as GHC import GHC.Generics (Generic) import qualified GHC.IO as GHC -import qualified GHC.Ptr as GHC import GHC.Stack (HasCallStack) import qualified System.Directory as Dir import qualified System.FS.API as FS @@ -104,7 +104,7 @@ unsafeByteArrayToByteString !ba !len = let !(GHC.Ptr addr#) = byteArrayContents ba (MutableByteArray mba#) <- unsafeThawByteArray ba let fp = GHC.ForeignPtr addr# (GHC.PlainPtr mba#) - BS.mkDeferredByteString fp len + pure $! BS.BS fp len -- | Copy a 'Prim.ByteArray' at a certain offset and length into a -- 'BS.ByteString'. diff --git a/fs-api/fs-api.cabal b/fs-api/fs-api.cabal index d2a2c29..03abf23 100644 --- a/fs-api/fs-api.cabal +++ b/fs-api/fs-api.cabal @@ -48,7 +48,7 @@ library default-language: Haskell2010 build-depends: , base >=4.14 && <4.20 - , bytestring >=0.10 && <0.13 + , bytestring >=0.11 && <0.13 , containers >=0.5 && <0.7 , deepseq , digest