From 362285b934e3b3501a57f1e0b665a9604b971c92 Mon Sep 17 00:00:00 2001 From: evoskuil Date: Sun, 28 Jun 2026 23:41:55 -0400 Subject: [PATCH] Resolve stall when disk full affects validator block state writes. --- include/bitcoin/node/chase.hpp | 4 +++ .../bitcoin/node/chasers/chaser_validate.hpp | 1 + src/chasers/chaser_storage.cpp | 3 +++ src/chasers/chaser_validate.cpp | 27 +++++++++++-------- 4 files changed, 24 insertions(+), 11 deletions(-) diff --git a/include/bitcoin/node/chase.hpp b/include/bitcoin/node/chase.hpp index 6a2d6449..6f3d1eb8 100644 --- a/include/bitcoin/node/chase.hpp +++ b/include/bitcoin/node/chase.hpp @@ -53,6 +53,10 @@ enum class chase /// Issued by 'full_node' and handled by 'check', 'validate', 'confirm'. resume, + /// Node is recovering from disk full condition. + /// Issued by 'storage' and handled by 'validate'. + unfull, + /// Channel starved for work (object_t). /// Issued by 'block_in_31800' and handled by 'session_outbound'. starved, diff --git a/include/bitcoin/node/chasers/chaser_validate.hpp b/include/bitcoin/node/chasers/chaser_validate.hpp index 3a845f03..93fd1dce 100644 --- a/include/bitcoin/node/chasers/chaser_validate.hpp +++ b/include/bitcoin/node/chasers/chaser_validate.hpp @@ -143,6 +143,7 @@ class BCN_API chaser_validate std::atomic validate_backlog_{}; std::atomic batch_backlog_{}; std::atomic_bool maximum_posted_{}; + std::atomic_bool recovering_{}; network::asio::strand validation_strand_; const uint32_t subsidy_interval_; diff --git a/src/chasers/chaser_storage.cpp b/src/chasers/chaser_storage.cpp index 7abbd33d..3cc24bf4 100644 --- a/src/chasers/chaser_storage.cpp +++ b/src/chasers/chaser_storage.cpp @@ -156,6 +156,9 @@ void chaser_storage::do_reload() NOEXCEPT } else { + // Recovery from disk full message sent in addition to chase::resume. + notify(error::success, chase::unfull, {}); + resume(); const auto span = duration_cast(logger::now() - start); LOGN("Reload from disk full complete in " << span.count() << " secs."); diff --git a/src/chasers/chaser_validate.cpp b/src/chasers/chaser_validate.cpp index ef6cfa4e..df8d86ab 100644 --- a/src/chasers/chaser_validate.cpp +++ b/src/chasers/chaser_validate.cpp @@ -71,6 +71,14 @@ bool chaser_validate::handle_chase(const code&, chase event_, if (closed()) return false; + // Latch recovering from disk full, before suspension is lifted. + // Because in-flight blocks are lost, reset position when backlog clears. + if (event_ == chase::unfull) + { + recovering_.store(true); + return true; + } + // Stop generating query during suspension. // Incoming events may already be flushed to the strand at this point. if (suspended()) @@ -138,6 +146,12 @@ void chaser_validate::do_bump(height_t) NOEXCEPT { BC_ASSERT(stranded()); + if (recovering_.load() && is_zero(validate_backlog_.load())) + { + recovering_.store(false); + set_position(archive().get_fork()); + } + const auto height = add1(position()); if (archive().is_validateable(height)) do_bumped(height); @@ -257,19 +271,10 @@ void chaser_validate::complete_block(const code& ec, const header_link& link, // Batch jobs (all posting from unstranded). // ------------------------------------------------------------------------ - // Avoid posting new work when closing. - if (closed() || !batch_enabled_) + // Faulted implies disk full prevented threshold batch writes. + if (closed() || !batch_enabled_ || faulted) return; - // TODO: ensure doesn't lead to tight revalidation loop under disk full. - // Retry faulted threshold, re-enters backlog (presumes disk full). - if (faulted) - { - ++validate_backlog_; - POST(post_block, link, bypass); - return; - } - // Queue block and process batch if ready. if (batched) {