From 5671c15f4520c6dc20e0805fd0b06157ff94bcd7 Mon Sep 17 00:00:00 2001 From: TheCharlatan Date: Tue, 25 Jul 2023 11:12:10 +0200 Subject: [PATCH 1/3] blockstorage: Mark FindBlockPos as nodiscard A false return value indicates a fatal error (disk space being too low), so make sure we always consume this error code. This commit is part of an ongoing process for making the handling of fatal errors more transparent and easier to understand. --- src/node/blockstorage.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/node/blockstorage.h b/src/node/blockstorage.h index eb40d45aba8..0e8b9abce97 100644 --- a/src/node/blockstorage.h +++ b/src/node/blockstorage.h @@ -93,7 +93,7 @@ class BlockManager EXCLUSIVE_LOCKS_REQUIRED(cs_main); void FlushBlockFile(bool fFinalize = false, bool finalize_undo = false); void FlushUndoFile(int block_file, bool finalize = false); - bool FindBlockPos(FlatFilePos& pos, unsigned int nAddSize, unsigned int nHeight, uint64_t nTime, bool fKnown); + [[nodiscard]] bool FindBlockPos(FlatFilePos& pos, unsigned int nAddSize, unsigned int nHeight, uint64_t nTime, bool fKnown); bool FindUndoPos(BlockValidationState& state, int nFile, FlatFilePos& pos, unsigned int nAddSize); FlatFileSeq BlockFileSeq() const; From f0207e00303a1030eca795ede231e3c0d94df061 Mon Sep 17 00:00:00 2001 From: TheCharlatan Date: Tue, 25 Jul 2023 11:32:09 +0200 Subject: [PATCH 2/3] blockstorage: Return on fatal block file flush error By returning an error code if `FlushBlockFile` fails, the caller now has to explicitly handle block file flushing errors. Before this change such errors were non-explicitly ignored without a clear rationale. Prior to this patch `FlushBlockFile` may have failed silently in `Chainstate::FlushStateToDisk`. Improve this with a log line. Also add a TODO comment to flesh out whether returning early in the case of an error is appropriate or not. Returning early might be appropriate to prohibit `WriteBlockIndexDB` from writing a block index entry that does not refer to a fully flushed block. Besides `Chainstate::FlushStateToDisk`, `FlushBlockFile` is also called by `FindBlockPos`. Don't change the abort behavior there, since we don't want to fail the function if the flushing of already written blocks fails. Instead, just document it. --- src/node/blockstorage.cpp | 21 ++++++++++++++++++--- src/node/blockstorage.h | 5 ++++- src/validation.cpp | 6 +++++- 3 files changed, 27 insertions(+), 5 deletions(-) diff --git a/src/node/blockstorage.cpp b/src/node/blockstorage.cpp index 0d25c798ce3..d5f4fed995e 100644 --- a/src/node/blockstorage.cpp +++ b/src/node/blockstorage.cpp @@ -546,8 +546,9 @@ void BlockManager::FlushUndoFile(int block_file, bool finalize) } } -void BlockManager::FlushBlockFile(bool fFinalize, bool finalize_undo) +bool BlockManager::FlushBlockFile(bool fFinalize, bool finalize_undo) { + bool success = true; LOCK(cs_LastBlockFile); if (m_blockfile_info.size() < 1) { @@ -555,17 +556,19 @@ void BlockManager::FlushBlockFile(bool fFinalize, bool finalize_undo) // chainstate init, when we call ChainstateManager::MaybeRebalanceCaches() (which // then calls FlushStateToDisk()), resulting in a call to this function before we // have populated `m_blockfile_info` via LoadBlockIndexDB(). - return; + return true; } assert(static_cast(m_blockfile_info.size()) > m_last_blockfile); FlatFilePos block_pos_old(m_last_blockfile, m_blockfile_info[m_last_blockfile].nSize); if (!BlockFileSeq().Flush(block_pos_old, fFinalize)) { m_opts.notifications.flushError("Flushing block file to disk failed. This is likely the result of an I/O error."); + success = false; } // we do not always flush the undo file, as the chain tip may be lagging behind the incoming blocks, // e.g. during IBD or a sync after a node going offline if (!fFinalize || finalize_undo) FlushUndoFile(m_last_blockfile, finalize_undo); + return success; } uint64_t BlockManager::CalculateCurrentUsage() @@ -658,7 +661,19 @@ bool BlockManager::FindBlockPos(FlatFilePos& pos, unsigned int nAddSize, unsigne if (!fKnown) { LogPrint(BCLog::BLOCKSTORAGE, "Leaving block file %i: %s\n", m_last_blockfile, m_blockfile_info[m_last_blockfile].ToString()); } - FlushBlockFile(!fKnown, finalize_undo); + + // Do not propagate the return code. The flush concerns a previous block + // and undo file that has already been written to. If a flush fails + // here, and we crash, there is no expected additional block data + // inconsistency arising from the flush failure here. However, the undo + // data may be inconsistent after a crash if the flush is called during + // a reindex. A flush error might also leave some of the data files + // untrimmed. + if (!FlushBlockFile(!fKnown, finalize_undo)) { + LogPrintLevel(BCLog::BLOCKSTORAGE, BCLog::Level::Warning, + "Failed to flush previous block file %05i (finalize=%i, finalize_undo=%i) before opening new block file %05i\n", + m_last_blockfile, !fKnown, finalize_undo, nFile); + } m_last_blockfile = nFile; m_undo_height_in_last_blockfile = 0; // No undo data yet in the new file, so reset our undo-height tracking. } diff --git a/src/node/blockstorage.h b/src/node/blockstorage.h index 0e8b9abce97..3e0962386f7 100644 --- a/src/node/blockstorage.h +++ b/src/node/blockstorage.h @@ -91,7 +91,10 @@ class BlockManager */ bool LoadBlockIndex() EXCLUSIVE_LOCKS_REQUIRED(cs_main); - void FlushBlockFile(bool fFinalize = false, bool finalize_undo = false); + + /** Return false if block file flushing fails. */ + [[nodiscard]] bool FlushBlockFile(bool fFinalize = false, bool finalize_undo = false); + void FlushUndoFile(int block_file, bool finalize = false); [[nodiscard]] bool FindBlockPos(FlatFilePos& pos, unsigned int nAddSize, unsigned int nHeight, uint64_t nTime, bool fKnown); bool FindUndoPos(BlockValidationState& state, int nFile, FlatFilePos& pos, unsigned int nAddSize); diff --git a/src/validation.cpp b/src/validation.cpp index cd6654abe48..a72e5159eff 100644 --- a/src/validation.cpp +++ b/src/validation.cpp @@ -2511,7 +2511,11 @@ bool Chainstate::FlushStateToDisk( LOG_TIME_MILLIS_WITH_CATEGORY("write block and undo data to disk", BCLog::BENCH); // First make sure all block and undo data is flushed to disk. - m_blockman.FlushBlockFile(); + // TODO: Handle return error, or add detailed comment why it is + // safe to not return an error upon failure. + if (!m_blockman.FlushBlockFile()) { + LogPrintLevel(BCLog::VALIDATION, BCLog::Level::Warning, "%s: Failed to flush block file.\n", __func__); + } } // Then update all block file information (which may refer to block and undo files). From d8041d4e042957660827313951b18c8dd9a99a16 Mon Sep 17 00:00:00 2001 From: TheCharlatan Date: Tue, 25 Jul 2023 12:03:26 +0200 Subject: [PATCH 3/3] blockstorage: Return on fatal undo file flush error By returning an error code if either `FlushUndoFile` or `FlushBlockFile` fail, the caller now has to explicitly handle block undo file flushing errors. Before this change such errors were non-explicitly ignored without a clear rationale. Besides the call to `FlushUndoFile` in `FlushBlockFile`, ignore its return code at its call site in `WriteUndoDataForBlock`. There, a failed flush of the undo data should not be indicative of a failed write. Add [[nodiscard]] annotations to `FlushUndoFile` such that its return value is not just ignored in the future. --- src/node/blockstorage.cpp | 19 ++++++++++++++++--- src/node/blockstorage.h | 6 ++++-- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/src/node/blockstorage.cpp b/src/node/blockstorage.cpp index d5f4fed995e..934088aeed7 100644 --- a/src/node/blockstorage.cpp +++ b/src/node/blockstorage.cpp @@ -538,12 +538,14 @@ bool BlockManager::UndoReadFromDisk(CBlockUndo& blockundo, const CBlockIndex& in return true; } -void BlockManager::FlushUndoFile(int block_file, bool finalize) +bool BlockManager::FlushUndoFile(int block_file, bool finalize) { FlatFilePos undo_pos_old(block_file, m_blockfile_info[block_file].nUndoSize); if (!UndoFileSeq().Flush(undo_pos_old, finalize)) { m_opts.notifications.flushError("Flushing undo file to disk failed. This is likely the result of an I/O error."); + return false; } + return true; } bool BlockManager::FlushBlockFile(bool fFinalize, bool finalize_undo) @@ -567,7 +569,11 @@ bool BlockManager::FlushBlockFile(bool fFinalize, bool finalize_undo) } // we do not always flush the undo file, as the chain tip may be lagging behind the incoming blocks, // e.g. during IBD or a sync after a node going offline - if (!fFinalize || finalize_undo) FlushUndoFile(m_last_blockfile, finalize_undo); + if (!fFinalize || finalize_undo) { + if (!FlushUndoFile(m_last_blockfile, finalize_undo)) { + success = false; + } + } return success; } @@ -764,7 +770,14 @@ bool BlockManager::WriteUndoDataForBlock(const CBlockUndo& blockundo, BlockValid // with the block writes (usually when a synced up node is getting newly mined blocks) -- this case is caught in // the FindBlockPos function if (_pos.nFile < m_last_blockfile && static_cast(block.nHeight) == m_blockfile_info[_pos.nFile].nHeightLast) { - FlushUndoFile(_pos.nFile, true); + // Do not propagate the return code, a failed flush here should not + // be an indication for a failed write. If it were propagated here, + // the caller would assume the undo data not to be written, when in + // fact it is. Note though, that a failed flush might leave the data + // file untrimmed. + if (!FlushUndoFile(_pos.nFile, true)) { + LogPrintLevel(BCLog::BLOCKSTORAGE, BCLog::Level::Warning, "Failed to flush undo file %05i\n", _pos.nFile); + } } else if (_pos.nFile == m_last_blockfile && static_cast(block.nHeight) > m_undo_height_in_last_blockfile) { m_undo_height_in_last_blockfile = block.nHeight; } diff --git a/src/node/blockstorage.h b/src/node/blockstorage.h index 3e0962386f7..9950d9c477b 100644 --- a/src/node/blockstorage.h +++ b/src/node/blockstorage.h @@ -92,10 +92,12 @@ class BlockManager bool LoadBlockIndex() EXCLUSIVE_LOCKS_REQUIRED(cs_main); - /** Return false if block file flushing fails. */ + /** Return false if block file or undo file flushing fails. */ [[nodiscard]] bool FlushBlockFile(bool fFinalize = false, bool finalize_undo = false); - void FlushUndoFile(int block_file, bool finalize = false); + /** Return false if undo file flushing fails. */ + [[nodiscard]] bool FlushUndoFile(int block_file, bool finalize = false); + [[nodiscard]] bool FindBlockPos(FlatFilePos& pos, unsigned int nAddSize, unsigned int nHeight, uint64_t nTime, bool fKnown); bool FindUndoPos(BlockValidationState& state, int nFile, FlatFilePos& pos, unsigned int nAddSize);