diff --git a/.github/workflows/haskell.yml b/.github/workflows/haskell.yml index f22bc0022..86da07038 100644 --- a/.github/workflows/haskell.yml +++ b/.github/workflows/haskell.yml @@ -26,8 +26,14 @@ jobs: cabal: ["3.10.2.1"] os: [ubuntu-latest, windows-latest, macOS-latest] exclude: - - os: windows-latest - - os: macOS-latest + - ghc: "9.4.8" + os: windows-latest + - ghc: "9.4.8" + os: macOS-latest + - ghc: "9.8.1" + os: windows-latest + - ghc: "9.8.1" + os: macOS-latest steps: - name: Checkout repository diff --git a/README.md b/README.md index b92d1cecb..447c5f852 100644 --- a/README.md +++ b/README.md @@ -16,4 +16,6 @@ It has a number of custom features that are primarily tailored towards performan ## System requirements -This library only supports 64-bit, little-endian systems. \ No newline at end of file +This library only supports 64-bit, little-endian systems. + +Use -threaded if you depend on this library in Linux \ No newline at end of file diff --git a/cabal.project b/cabal.project index bcfd602fb..7ab1a56d4 100644 --- a/cabal.project +++ b/cabal.project @@ -34,7 +34,8 @@ source-repository-package -- Luckily, bloomfilter is not commonly used package, so this is good enough. constraints: bloomfilter <0 -source-repository-package - type: git - location: https://github.com/well-typed/blockio-uring - tag: bdac50eb4155ea3096b1db9b828307f4fd19ddb3 +if(os(linux)) + source-repository-package + type: git + location: https://github.com/well-typed/blockio-uring + tag: bdac50eb4155ea3096b1db9b828307f4fd19ddb3 diff --git a/fs-api-blockio/src-linux/System/FS/BlockIO/Internal.hs b/fs-api-blockio/src-linux/System/FS/BlockIO/Internal.hs new file mode 100644 index 000000000..44c3ba571 --- /dev/null +++ b/fs-api-blockio/src-linux/System/FS/BlockIO/Internal.hs @@ -0,0 +1,36 @@ +module System.FS.BlockIO.Internal ( + I.IOCtx + , I.IOCtxParams (..) + , I.defaultIOCtxParams + , I.initIOCtx + , I.closeIOCtx + , submitIO + ) where + +import System.FS.API (Handle (..)) +import System.FS.BlockIO.API (IOOp (..), IOResult (..)) +import System.FS.IO.Internal +import System.FS.IO.Internal.Handle +import qualified System.IO.BlockIO as I +import System.Posix.Types + +submitIO :: I.IOCtx -> [IOOp FHandle] -> IO [IOResult] +submitIO ioctx ioops = do + ioops' <- mapM fIOOp ioops + fmap fIOResult <$> I.submitIO ioctx ioops' + +fIOOp :: IOOp FHandle -> IO I.IOOp +fIOOp (IOOpRead h off bufptr c) = handleFD h >>= \fd -> pure (I.IOOpRead fd off bufptr c) +fIOOp (IOOpWrite h off bufptr c) = handleFD h >>= \fd -> pure (I.IOOpWrite fd off bufptr c) + +-- TODO: complete pragma +fIOResult :: I.IOResult -> IOResult +fIOResult (I.IOResult c) = IOResult c +fIOResult (I.IOError e) = IOError e +fIOResult _ = error "mapIOResult: missing incomplete pragma" + +-- This only checks whether the handle is open when we convert to an Fd. After +-- that, the handle could be closed when we're still performing blockio +-- operations. +handleFD :: Handle FHandle -> IO Fd +handleFD h = withOpenHandle "submitIO" (handleRaw h) pure diff --git a/fs-api-blockio/src-macos/System/FS/BlockIO/Internal.hs b/fs-api-blockio/src-macos/System/FS/BlockIO/Internal.hs new file mode 100644 index 000000000..3a37ee907 --- /dev/null +++ b/fs-api-blockio/src-macos/System/FS/BlockIO/Internal.hs @@ -0,0 +1,49 @@ +module System.FS.BlockIO.Internal ( + IOCtx + , IOCtxParams (..) + , defaultIOCtxParams + , initIOCtx + , closeIOCtx + , submitIO + ) where + +import Data.Word +import Foreign.Ptr +import System.FS.API +import System.FS.BlockIO.API (ByteCount, FileOffset, IOOp (..), + IOResult (IOResult_)) +import System.FS.IO.Internal +import System.FS.IO.Internal.Handle +import System.Posix.IO.ByteString.Ext + +-- TODO: use something like an MVar to keep track of open/closed state +data IOCtx = IOCtx + +data IOCtxParams = IOCtxParams + +defaultIOCtxParams :: IOCtxParams +defaultIOCtxParams = IOCtxParams + +initIOCtx :: IOCtxParams -> IO IOCtx +initIOCtx IOCtxParams = pure IOCtx + +closeIOCtx :: IOCtx -> IO () +closeIOCtx IOCtx = pure () + +submitIO :: IOCtx -> [IOOp FHandle] -> IO [IOResult] +submitIO IOCtx = mapM fIOOp + +-- TODO: catch exceptions and rethrow them as IOResult_ +fIOOp :: IOOp FHandle -> IO IOResult +fIOOp (IOOpRead h off bufptr c) = + preadBuf h bufptr c off >>= \c' -> pure (IOResult_ $ fromIntegral c') +fIOOp (IOOpWrite h off bufptr c) = + pwriteBuf h bufptr c off >>= \c' -> pure (IOResult_ $ fromIntegral c') + +-- TODO: possibly upstream to fs-api +preadBuf :: Handle FHandle -> Ptr Word8 -> ByteCount -> FileOffset -> IO ByteCount +preadBuf fh bufptr c off = withOpenHandle "preadBuf" (handleRaw fh) $ \h -> fdPreadBuf h bufptr c off + +-- TODO: possibly upstream to fs-api +pwriteBuf :: Handle FHandle -> Ptr Word8 -> ByteCount -> FileOffset -> IO ByteCount +pwriteBuf fh bufptr c off = withOpenHandle "pwriteBuf" (handleRaw fh) $ \h -> fdPwriteBuf h bufptr c off diff --git a/fs-api-blockio/src-windows/System/FS/BlockIO/Internal.hs b/fs-api-blockio/src-windows/System/FS/BlockIO/Internal.hs new file mode 100644 index 000000000..4bc1c778c --- /dev/null +++ b/fs-api-blockio/src-windows/System/FS/BlockIO/Internal.hs @@ -0,0 +1,66 @@ +-- TODO: unify with the implementation for MacOS, since the implementation is +-- the same except for 'preadBuf' and 'pwriteBuf'. +module System.FS.BlockIO.Internal ( + IOCtx + , IOCtxParams (..) + , defaultIOCtxParams + , initIOCtx + , closeIOCtx + , submitIO + ) where + +import Data.Int +import Data.Word +import Foreign.Ptr +import System.FS.API +import System.FS.BlockIO.API (ByteCount, FileOffset, IOOp (..), + IOResult (IOResult_)) +import System.FS.IO.Internal +import System.FS.IO.Internal.Handle +import System.Win32 + +-- TODO: use something like an MVar to keep track of open/closed state +data IOCtx = IOCtx + +data IOCtxParams = IOCtxParams + +defaultIOCtxParams :: IOCtxParams +defaultIOCtxParams = IOCtxParams + +initIOCtx :: IOCtxParams -> IO IOCtx +initIOCtx IOCtxParams = pure IOCtx + +closeIOCtx :: IOCtx -> IO () +closeIOCtx IOCtx = pure () + +submitIO :: IOCtx -> [IOOp FHandle] -> IO [IOResult] +submitIO IOCtx = mapM fIOOp + +-- TODO: catch exceptions and rethrow them as IOResult_ +fIOOp :: IOOp FHandle -> IO IOResult +fIOOp (IOOpRead h off bufptr c) = + preadBuf h bufptr c off >>= \c' -> pure (IOResult_ $ fromIntegral c') +fIOOp (IOOpWrite h off bufptr c) = + pwriteBuf h bufptr c off >>= \c' -> pure (IOResult_ $ fromIntegral c') + +-- TODO: copied from fs-api +getCurrentFileOffset :: HANDLE -> IO Int64 +getCurrentFileOffset h = setFilePointerEx h 0 fILE_CURRENT + +-- TODO: possibly upstream to fs-api +preadBuf :: Handle FHandle -> Ptr Word8 -> ByteCount -> FileOffset -> IO ByteCount +preadBuf fh bufptr c off = withOpenHandle "preadBuf" (handleRaw fh) $ \h -> do + initialOffset <- getCurrentFileOffset h + _ <- setFilePointerEx h (fromIntegral off) fILE_BEGIN + n <- fromIntegral <$> win32_ReadFile h bufptr (fromIntegral c) Nothing + _ <- setFilePointerEx h initialOffset fILE_BEGIN + return n + +-- TODO: possibly upstream to fs-api +pwriteBuf :: Handle FHandle -> Ptr Word8 -> ByteCount -> FileOffset -> IO ByteCount +pwriteBuf fh bufptr c off = withOpenHandle "pwriteBuf" (handleRaw fh) $ \h -> do + initialOffset <- getCurrentFileOffset h + _ <- setFilePointerEx h (fromIntegral off) fILE_BEGIN + n <- fromIntegral <$> win32_WriteFile h bufptr (fromIntegral c) Nothing + _ <- setFilePointerEx h initialOffset fILE_BEGIN + return n diff --git a/fs-api-blockio/src/System/FS/BlockIO/API.hs b/fs-api-blockio/src/System/FS/BlockIO/API.hs new file mode 100644 index 000000000..a3c958cb6 --- /dev/null +++ b/fs-api-blockio/src/System/FS/BlockIO/API.hs @@ -0,0 +1,66 @@ +{-# LANGUAGE GADTs #-} +{-# LANGUAGE PatternSynonyms #-} +{-# LANGUAGE ViewPatterns #-} + +module System.FS.BlockIO.API ( + SomeHasBlockIO (..) + , HasBlockIO (..) + , IOOp (..) + , IOResult (IOResult, IOError) + , ByteCount + , FileOffset + , Errno (..) + -- Internal + , pattern IOResult_ + ) where + +import Data.Word +import Foreign.C +import Foreign.Ptr +import System.FS.API +import System.Posix.Types + +data SomeHasBlockIO m where + SomeHasBlockIO :: + Eq h + => HasFS m h + -> HasBlockIO m h ioctxparams ioctx + -> SomeHasBlockIO m + +-- TODO: see if we can get away with monomorphising @ioctxparams@ +-- +-- TODO: make this compatible with fs-sim simulation +data HasBlockIO m h ioctxparams ioctx = HasBlockIO { + defaultIOCtxParams :: ioctxparams + , initIOCtx :: ioctxparams -> m ioctx + , closeIOCtx :: ioctx -> m () + , submitIO :: ioctx -> [IOOp h] -> m [IOResult] + } + +data IOOp h = IOOpRead !(Handle h) !FileOffset !(Ptr Word8) !ByteCount + | IOOpWrite !(Handle h) !FileOffset !(Ptr Word8) !ByteCount + deriving Show + +newtype IOResult = IOResult_ CInt + +{-# COMPLETE IOResult, IOError #-} + +pattern IOResult :: ByteCount -> IOResult +pattern IOResult c <- (viewIOResult -> Just c) + where + IOResult count = IOResult_ ((fromIntegral :: CSize -> CInt) count) + +pattern IOError :: Errno -> IOResult +pattern IOError e <- (viewIOError -> Just e) + where + IOError (Errno e) = IOResult_ (-e) + +viewIOResult :: IOResult -> Maybe ByteCount +viewIOResult (IOResult_ c) + | c >= 0 = Just ((fromIntegral :: CInt -> CSize) c) + | otherwise = Nothing + +viewIOError :: IOResult -> Maybe Errno +viewIOError (IOResult_ e) + | e < 0 = Just (Errno e) + | otherwise = Nothing diff --git a/fs-api-blockio/src/System/FS/BlockIO/IO.hs b/fs-api-blockio/src/System/FS/BlockIO/IO.hs new file mode 100644 index 000000000..d03b122e9 --- /dev/null +++ b/fs-api-blockio/src/System/FS/BlockIO/IO.hs @@ -0,0 +1,17 @@ +module System.FS.BlockIO.IO ( + ioHasBlockIO + , I.IOCtx + , I.IOCtxParams (..) + ) where + +import System.FS.BlockIO.API +import qualified System.FS.BlockIO.Internal as I +import System.FS.IO + +ioHasBlockIO :: HasBlockIO IO HandleIO I.IOCtxParams I.IOCtx +ioHasBlockIO = HasBlockIO { + defaultIOCtxParams = I.defaultIOCtxParams + , initIOCtx = I.initIOCtx + , closeIOCtx = I.closeIOCtx + , submitIO = I.submitIO + } diff --git a/fs-api-blockio/test/Main.hs b/fs-api-blockio/test/Main.hs new file mode 100644 index 000000000..5939c438e --- /dev/null +++ b/fs-api-blockio/test/Main.hs @@ -0,0 +1,59 @@ +{-# LANGUAGE BangPatterns #-} + +{-# OPTIONS_GHC -Wno-orphans #-} + +{- HLINT ignore "Use camelCase" -} + +module Main (main) where + +import Data.ByteString +import qualified Data.ByteString as BS +import Data.ByteString.Unsafe as BSU +import Foreign (castPtr) +import qualified System.FS.API as FS +import System.FS.BlockIO.API +import qualified System.FS.BlockIO.IO as IO +import qualified System.FS.IO as IO +import System.IO.Temp +import Test.Tasty +import Test.Tasty.HUnit +import Test.Tasty.QuickCheck + +main :: IO () +main = defaultMain tests + +tests :: TestTree +tests = testGroup "fs-api-blockio" [ + testCase "example_initClose" example_initClose + , testProperty "prop_readWrite" prop_readWrite + ] + +instance Arbitrary ByteString where + arbitrary = BS.pack <$> arbitrary + shrink = fmap BS.pack . shrink . BS.unpack + +example_initClose :: Assertion +example_initClose = do + let !hbio = IO.ioHasBlockIO + ctx <- initIOCtx hbio (defaultIOCtxParams hbio) + closeIOCtx hbio ctx + +prop_readWrite :: ByteString -> Property +prop_readWrite bs = ioProperty $ withSystemTempDirectory "prop_readWrite" $ \dirPath -> do + let hfs = IO.ioHasFS (FS.MountPoint dirPath) + hbio = IO.ioHasBlockIO + !bsCopy = BS.copy bs + ctx <- initIOCtx hbio (defaultIOCtxParams hbio) + prop <- FS.withFile hfs (FS.mkFsPath ["temp"]) (FS.WriteMode FS.MustBeNew) $ \h -> do + BSU.unsafeUseAsCStringLen bs $ \(ptr, n) -> do + [writeRes] <- submitIO hbio ctx [IOOpWrite h 0 (castPtr ptr) (fromIntegral n)] + let writeTest = case writeRes of + IOError e -> counterexample (show e) (property False) + IOResult m -> n === fromIntegral m + [readRes] <- submitIO hbio ctx [IOOpRead h 0 (castPtr ptr) (fromIntegral n)] + let readTest = case readRes of + IOError e -> counterexample (show e) (property False) + IOResult m -> n === fromIntegral m + pure $ writeTest .&&. readTest + closeIOCtx hbio ctx + pure (prop .&&. bs === bsCopy) diff --git a/lsm-tree.cabal b/lsm-tree.cabal index 5b9b6e77c..44098a190 100644 --- a/lsm-tree.cabal +++ b/lsm-tree.cabal @@ -63,20 +63,19 @@ library Database.LSMTree.Normal build-depends: - , base >=4.16 && <4.20 - , bitvec ^>=1.1 - , blockio-uring ^>=0.1 - , bytestring ^>=0.11.4.0 + , base >=4.16 && <4.20 + , bitvec ^>=1.1 + , bytestring ^>=0.11.4.0 , containers - , crc32c >=0.1 + , crc32c >=0.1 , deepseq , filepath - , fs-api ^>=0.2 - , io-classes ^>=1.3 - , lsm-tree:bloomfilter - , primitive ^>=0.9 - , vector ^>=0.13 - , vector-algorithms ^>=0.9 + , fs-api ^>=0.2 + , io-classes ^>=1.3 + , lsm-tree:{bloomfilter, fs-api-blockio} + , primitive ^>=0.9 + , vector ^>=0.13 + , vector-algorithms ^>=0.9 -- this fork doesn't work on 32bit systems library bloomfilter @@ -197,7 +196,7 @@ test-suite lsm-tree-test , transformers , vector - ghc-options: -fno-ignore-asserts + ghc-options: -fno-ignore-asserts -threaded benchmark lsm-tree-micro-bench import: warnings, wno-x-partial @@ -224,6 +223,8 @@ benchmark lsm-tree-micro-bench , random , vector + ghc-options: -threaded + benchmark lsm-tree-macro-bench import: warnings, wno-x-partial default-language: Haskell2010 @@ -231,6 +232,7 @@ benchmark lsm-tree-macro-bench hs-source-dirs: bench/macro main-is: Main.hs build-depends: base + ghc-options: -threaded library kmerge import: warnings, wno-x-partial @@ -332,3 +334,49 @@ test-suite bloomfilter-tests , random , test-framework , test-framework-quickcheck2 + +library fs-api-blockio + import: warnings, wno-x-partial + visibility: private + hs-source-dirs: fs-api-blockio/src + default-language: Haskell2010 + exposed-modules: + System.FS.BlockIO.API + System.FS.BlockIO.IO + + build-depends: + , base >=4.16 && <4.20 + , fs-api ^>=0.2 + + if os(linux) + hs-source-dirs: fs-api-blockio/src-linux + other-modules: System.FS.BlockIO.Internal + build-depends: blockio-uring ^>=0.1 + + elif os(osx) + hs-source-dirs: fs-api-blockio/src-macos + other-modules: System.FS.BlockIO.Internal + build-depends: unix-bytestring ^>=0.4 + + elif os(windows) + hs-source-dirs: fs-api-blockio/src-windows + other-modules: System.FS.BlockIO.Internal + build-depends: Win32 ^>=2.14 + +test-suite fs-api-blockio-test + import: warnings, wno-x-partial + default-language: Haskell2010 + type: exitcode-stdio-1.0 + hs-source-dirs: fs-api-blockio/test + main-is: Main.hs + build-depends: + , base >=4.16 && <4.20 + , bytestring + , fs-api + , lsm-tree:fs-api-blockio + , tasty + , tasty-hunit + , tasty-quickcheck + , temporary + + ghc-options: -threaded -fno-ignore-asserts diff --git a/src/Database/LSMTree/Internal/Lookup.hs b/src/Database/LSMTree/Internal/Lookup.hs index b86cf9de4..d5a639ce4 100644 --- a/src/Database/LSMTree/Internal/Lookup.hs +++ b/src/Database/LSMTree/Internal/Lookup.hs @@ -10,7 +10,7 @@ import Database.LSMTree.Internal.Run.Index.Compact (CompactIndex, PageSpan) import qualified Database.LSMTree.Internal.Run.Index.Compact as Index import Database.LSMTree.Internal.Serialise -import System.IO.BlockIO () +import System.FS.BlockIO.IO () -- | TODO: placeholder type for a run, replace by actual type once implemented type Run fd = (fd, Bloom SerialisedKey, CompactIndex)